├── .github
    ├── .gitignore
    ├── issue_template.md
    └── workflows
    │   ├── pkgdown.yaml
    │   ├── R-CMD-check.yaml
    │   ├── pr-commands.yaml
    │   └── rhub.yaml
├── LICENSE
├── .gitignore
├── docs
    ├── logo.png
    ├── favicon.ico
    ├── favicon-16x16.png
    ├── favicon-32x32.png
    ├── apple-touch-icon.png
    ├── reference
    │   ├── Rplot001.png
    │   └── figures
    │   │   └── logo.png
    ├── apple-touch-icon-60x60.png
    ├── apple-touch-icon-76x76.png
    ├── apple-touch-icon-120x120.png
    ├── apple-touch-icon-152x152.png
    ├── apple-touch-icon-180x180.png
    ├── articles
    │   ├── athena_s3_example.png
    │   ├── how_to_retry_files
    │   │   ├── anchor-sections-1.0
    │   │   │   ├── anchor-sections.css
    │   │   │   └── anchor-sections.js
    │   │   ├── header-attrs-2.6
    │   │   │   └── header-attrs.js
    │   │   ├── header-attrs-2.7
    │   │   │   └── header-attrs.js
    │   │   ├── header-attrs-2.9
    │   │   │   └── header-attrs.js
    │   │   ├── header-attrs-2.10
    │   │   │   └── header-attrs.js
    │   │   └── header-attrs-2.11
    │   │   │   └── header-attrs.js
    │   ├── aws_s3_backend_files
    │   │   ├── anchor-sections-1.0
    │   │   │   ├── anchor-sections.css
    │   │   │   └── anchor-sections.js
    │   │   ├── header-attrs-2.10
    │   │   │   └── header-attrs.js
    │   │   ├── header-attrs-2.11
    │   │   │   └── header-attrs.js
    │   │   ├── header-attrs-2.6
    │   │   │   └── header-attrs.js
    │   │   ├── header-attrs-2.7
    │   │   │   └── header-attrs.js
    │   │   └── header-attrs-2.9
    │   │   │   └── header-attrs.js
    │   ├── getting_started_files
    │   │   ├── anchor-sections-1.0
    │   │   │   ├── anchor-sections.css
    │   │   │   └── anchor-sections.js
    │   │   ├── header-attrs-2.10
    │   │   │   └── header-attrs.js
    │   │   ├── header-attrs-2.11
    │   │   │   └── header-attrs.js
    │   │   ├── header-attrs-2.6
    │   │   │   └── header-attrs.js
    │   │   ├── header-attrs-2.7
    │   │   │   └── header-attrs.js
    │   │   └── header-attrs-2.9
    │   │   │   └── header-attrs.js
    │   ├── convert_and_save_cost_files
    │   │   ├── anchor-sections-1.0
    │   │   │   ├── anchor-sections.css
    │   │   │   └── anchor-sections.js
    │   │   ├── header-attrs-2.10
    │   │   │   └── header-attrs.js
    │   │   ├── header-attrs-2.11
    │   │   │   └── header-attrs.js
    │   │   ├── header-attrs-2.6
    │   │   │   └── header-attrs.js
    │   │   ├── header-attrs-2.7
    │   │   │   └── header-attrs.js
    │   │   └── header-attrs-2.9
    │   │   │   └── header-attrs.js
    │   ├── aws_athena_query_caching_files
    │   │   ├── anchor-sections-1.0
    │   │   │   ├── anchor-sections.css
    │   │   │   └── anchor-sections.js
    │   │   ├── header-attrs-2.10
    │   │   │   └── header-attrs.js
    │   │   ├── header-attrs-2.11
    │   │   │   └── header-attrs.js
    │   │   ├── header-attrs-2.6
    │   │   │   └── header-attrs.js
    │   │   ├── header-attrs-2.7
    │   │   │   └── header-attrs.js
    │   │   └── header-attrs-2.9
    │   │   │   └── header-attrs.js
    │   ├── changing_backend_file_parser_files
    │   │   ├── anchor-sections-1.0
    │   │   │   ├── anchor-sections.css
    │   │   │   └── anchor-sections.js
    │   │   ├── header-attrs-2.10
    │   │   │   └── header-attrs.js
    │   │   ├── header-attrs-2.11
    │   │   │   └── header-attrs.js
    │   │   ├── header-attrs-2.6
    │   │   │   └── header-attrs.js
    │   │   ├── header-attrs-2.7
    │   │   │   └── header-attrs.js
    │   │   └── header-attrs-2.9
    │   │   │   └── header-attrs.js
    │   └── aws_athena_unload_files
    │   │   └── header-attrs-2.11
    │   │       └── header-attrs.js
    ├── pkgdown.yml
    ├── link.svg
    ├── bootstrap-toc.css
    ├── docsearch.js
    ├── pkgdown.js
    ├── sitemap.xml
    └── bootstrap-toc.js
├── tests
    ├── testthat.R
    └── testthat
    │   ├── test-onload-functions.R
    │   ├── test-arn.R
    │   ├── test-region.R
    │   ├── test-classes.R
    │   ├── test-endpoint-override.R
    │   ├── test-retry.R
    │   ├── test-exist-remove.R
    │   ├── test-dplyr-copy_to.R
    │   ├── test-append-data.R
    │   ├── test-dbDisconnect.R
    │   ├── test-dplyr-compute.R
    │   ├── test-caching.R
    │   ├── test-keyboard-interrupt.R
    │   ├── test-athena-request.R
    │   ├── test-bigint.R
    │   ├── test-athena-ddl.R
    │   ├── test-work-groups.R
    │   ├── test-dbFetch-n.R
    │   ├── test-s3-upload-location.R
    │   ├── test-dbConvertTable.R
    │   ├── test-datatransfer-vroom.R
    │   ├── test-upload-file-parameters.R
    │   ├── test-datatransfer-datatable.R
    │   ├── helper.R
    │   ├── test-metadata.R
    │   └── test-view.R
├── inst
    └── icons
    │   ├── help.png
    │   ├── edit-sql.png
    │   └── athena-logo.png
├── man
    ├── figures
    │   └── logo.png
    ├── athena.Rd
    ├── dbplyr_edition.Rd
    ├── db_desc.Rd
    ├── db_connection_describe.Rd
    ├── AthenaPreviewObject.Rd
    ├── sql_translate_env.Rd
    ├── AthenaListObjects.Rd
    ├── AthenaDriver.Rd
    ├── backend_dbplyr_v1.Rd
    ├── noctua-package.Rd
    ├── dbListTables.Rd
    ├── sqlData.Rd
    ├── dbConvertTable.Rd
    ├── backend_dbplyr.Rd
    ├── AthenaResult.Rd
    ├── noctua_options.Rd
    ├── assume_role.Rd
    ├── session_token.Rd
    ├── sqlCreateTable.Rd
    ├── db_compute.Rd
    └── db_copy_to.Rd
├── pkgdown
    └── favicon
    │   ├── favicon.ico
    │   ├── favicon-16x16.png
    │   ├── favicon-32x32.png
    │   ├── apple-touch-icon.png
    │   ├── apple-touch-icon-60x60.png
    │   ├── apple-touch-icon-76x76.png
    │   ├── apple-touch-icon-120x120.png
    │   ├── apple-touch-icon-152x152.png
    │   └── apple-touch-icon-180x180.png
├── vignettes
    ├── athena_s3_example.png
    ├── changing_backend_file_parser.Rmd
    ├── how_to_retry.Rmd
    ├── aws_s3_backend.Rmd
    ├── aws_athena_query_caching.Rmd
    ├── getting_started.Rmd
    └── convert_and_save_cost.Rmd
├── .Rbuildignore
├── codecov.yml
├── noctua.Rproj
├── docker
    └── Dockerfile
├── cran-comments.md
├── R
    ├── noctua.R
    ├── zzz.R
    ├── sql_translate_utils.R
    ├── DataTypes.R
    ├── column_parser.R
    └── fetch_utils.R
├── DESCRIPTION
└── NAMESPACE


/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2019
2 | COPYRIGHT HOLDER: Dyfan Jones


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | .DS_Store
6 | 


--------------------------------------------------------------------------------
/docs/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DyfanJones/noctua/HEAD/docs/logo.png


--------------------------------------------------------------------------------
/docs/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DyfanJones/noctua/HEAD/docs/favicon.ico


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(noctua)
3 | 
4 | test_check("noctua")
5 | 


--------------------------------------------------------------------------------
/inst/icons/help.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DyfanJones/noctua/HEAD/inst/icons/help.png


--------------------------------------------------------------------------------
/man/figures/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DyfanJones/noctua/HEAD/man/figures/logo.png


--------------------------------------------------------------------------------
/docs/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DyfanJones/noctua/HEAD/docs/favicon-16x16.png


--------------------------------------------------------------------------------
/docs/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DyfanJones/noctua/HEAD/docs/favicon-32x32.png


--------------------------------------------------------------------------------
/docs/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DyfanJones/noctua/HEAD/docs/apple-touch-icon.png


--------------------------------------------------------------------------------
/inst/icons/edit-sql.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DyfanJones/noctua/HEAD/inst/icons/edit-sql.png


--------------------------------------------------------------------------------
/docs/reference/Rplot001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DyfanJones/noctua/HEAD/docs/reference/Rplot001.png


--------------------------------------------------------------------------------
/inst/icons/athena-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DyfanJones/noctua/HEAD/inst/icons/athena-logo.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DyfanJones/noctua/HEAD/pkgdown/favicon/favicon.ico


--------------------------------------------------------------------------------
/docs/apple-touch-icon-60x60.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DyfanJones/noctua/HEAD/docs/apple-touch-icon-60x60.png


--------------------------------------------------------------------------------
/docs/apple-touch-icon-76x76.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DyfanJones/noctua/HEAD/docs/apple-touch-icon-76x76.png


--------------------------------------------------------------------------------
/docs/reference/figures/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DyfanJones/noctua/HEAD/docs/reference/figures/logo.png


--------------------------------------------------------------------------------
/vignettes/athena_s3_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DyfanJones/noctua/HEAD/vignettes/athena_s3_example.png


--------------------------------------------------------------------------------
/docs/apple-touch-icon-120x120.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DyfanJones/noctua/HEAD/docs/apple-touch-icon-120x120.png


--------------------------------------------------------------------------------
/docs/apple-touch-icon-152x152.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DyfanJones/noctua/HEAD/docs/apple-touch-icon-152x152.png


--------------------------------------------------------------------------------
/docs/apple-touch-icon-180x180.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DyfanJones/noctua/HEAD/docs/apple-touch-icon-180x180.png


--------------------------------------------------------------------------------
/docs/articles/athena_s3_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DyfanJones/noctua/HEAD/docs/articles/athena_s3_example.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DyfanJones/noctua/HEAD/pkgdown/favicon/favicon-16x16.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DyfanJones/noctua/HEAD/pkgdown/favicon/favicon-32x32.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DyfanJones/noctua/HEAD/pkgdown/favicon/apple-touch-icon.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-60x60.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DyfanJones/noctua/HEAD/pkgdown/favicon/apple-touch-icon-60x60.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-76x76.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DyfanJones/noctua/HEAD/pkgdown/favicon/apple-touch-icon-76x76.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-120x120.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DyfanJones/noctua/HEAD/pkgdown/favicon/apple-touch-icon-120x120.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-152x152.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DyfanJones/noctua/HEAD/pkgdown/favicon/apple-touch-icon-152x152.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-180x180.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DyfanJones/noctua/HEAD/pkgdown/favicon/apple-touch-icon-180x180.png


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | .travis.yml
 2 | ^.*\.Rproj$
 3 | ^\.Rproj\.user$
 4 | ^README\.Rmd$
 5 | ^cran-comments\.md$
 6 | ^CRAN-RELEASE$
 7 | ^docs$
 8 | ^pkgdown$
 9 | ^\.github$
10 | ^codecov\.yml$
11 | ^docker$
12 | ^CRAN-SUBMISSION$
13 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | comment: false
 2 | 
 3 | coverage:
 4 |   status:
 5 |     project:
 6 |       default:
 7 |         target: auto
 8 |         threshold: 1%
 9 |     patch:
10 |       default:
11 |         target: auto
12 |         threshold: 1%
13 | 


--------------------------------------------------------------------------------
/docs/articles/how_to_retry_files/anchor-sections-1.0/anchor-sections.css:
--------------------------------------------------------------------------------
1 | /* Styles for section anchors */
2 | a.anchor-section {margin-left: 10px; visibility: hidden; color: inherit;}
3 | a.anchor-section::before {content: '#';}
4 | .hasAnchor:hover a.anchor-section {visibility: visible;}
5 | 


--------------------------------------------------------------------------------
/docs/articles/aws_s3_backend_files/anchor-sections-1.0/anchor-sections.css:
--------------------------------------------------------------------------------
1 | /* Styles for section anchors */
2 | a.anchor-section {margin-left: 10px; visibility: hidden; color: inherit;}
3 | a.anchor-section::before {content: '#';}
4 | .hasAnchor:hover a.anchor-section {visibility: visible;}
5 | 


--------------------------------------------------------------------------------
/docs/articles/getting_started_files/anchor-sections-1.0/anchor-sections.css:
--------------------------------------------------------------------------------
1 | /* Styles for section anchors */
2 | a.anchor-section {margin-left: 10px; visibility: hidden; color: inherit;}
3 | a.anchor-section::before {content: '#';}
4 | .hasAnchor:hover a.anchor-section {visibility: visible;}
5 | 


--------------------------------------------------------------------------------
/docs/articles/convert_and_save_cost_files/anchor-sections-1.0/anchor-sections.css:
--------------------------------------------------------------------------------
1 | /* Styles for section anchors */
2 | a.anchor-section {margin-left: 10px; visibility: hidden; color: inherit;}
3 | a.anchor-section::before {content: '#';}
4 | .hasAnchor:hover a.anchor-section {visibility: visible;}
5 | 


--------------------------------------------------------------------------------
/docs/articles/aws_athena_query_caching_files/anchor-sections-1.0/anchor-sections.css:
--------------------------------------------------------------------------------
1 | /* Styles for section anchors */
2 | a.anchor-section {margin-left: 10px; visibility: hidden; color: inherit;}
3 | a.anchor-section::before {content: '#';}
4 | .hasAnchor:hover a.anchor-section {visibility: visible;}
5 | 


--------------------------------------------------------------------------------
/docs/articles/changing_backend_file_parser_files/anchor-sections-1.0/anchor-sections.css:
--------------------------------------------------------------------------------
1 | /* Styles for section anchors */
2 | a.anchor-section {margin-left: 10px; visibility: hidden; color: inherit;}
3 | a.anchor-section::before {content: '#';}
4 | .hasAnchor:hover a.anchor-section {visibility: visible;}
5 | 


--------------------------------------------------------------------------------
/man/athena.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Driver.R
 3 | \name{athena}
 4 | \alias{athena}
 5 | \title{Athena Driver}
 6 | \usage{
 7 | athena()
 8 | }
 9 | \value{
10 | \code{athena()} returns a s4 class. This class is used active Athena method for \link[DBI:dbConnect]{DBI::dbConnect}
11 | }
12 | \description{
13 | Driver for an Athena paws connection.
14 | }
15 | \seealso{
16 | \link{dbConnect}
17 | }
18 | 


--------------------------------------------------------------------------------
/docs/pkgdown.yml:
--------------------------------------------------------------------------------
 1 | pandoc: 2.19.2
 2 | pkgdown: 2.0.7
 3 | pkgdown_sha: ~
 4 | articles:
 5 |   aws_athena_query_caching: aws_athena_query_caching.html
 6 |   aws_athena_unload: aws_athena_unload.html
 7 |   aws_s3_backend: aws_s3_backend.html
 8 |   changing_backend_file_parser: changing_backend_file_parser.html
 9 |   convert_and_save_cost: convert_and_save_cost.html
10 |   getting_started: getting_started.html
11 |   how_to_retry: how_to_retry.html
12 | last_built: 2022-12-19T16:26Z
13 | 
14 | 


--------------------------------------------------------------------------------
/tests/testthat/test-onload-functions.R:
--------------------------------------------------------------------------------
 1 | test_that("test if dbplyr major and minor versions are collected correctly", {
 2 |   skip_if_no_env()
 3 |   skip_if_package_not_avialable("dbplyr")
 4 |   
 5 |   library(dbplyr)
 6 |   
 7 |   dbplyr_major = packageVersion("dbplyr")$major
 8 |   dbplyr_minor = packageVersion("dbplyr")$minor
 9 |   
10 |   noctua:::dbplyr_version()
11 |   
12 |   expect_equal(noctua:::dbplyr_env$major, dbplyr_major)
13 |   expect_equal(noctua:::dbplyr_env$minor, dbplyr_minor)
14 | })
15 | 


--------------------------------------------------------------------------------
/noctua.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | ProjectId: d593a9c3-b4e8-4581-a58e-19d5453e79da
 3 | 
 4 | RestoreWorkspace: Default
 5 | SaveWorkspace: Default
 6 | AlwaysSaveHistory: Default
 7 | 
 8 | EnableCodeIndexing: Yes
 9 | UseSpacesForTab: Yes
10 | NumSpacesForTab: 2
11 | Encoding: UTF-8
12 | 
13 | RnwWeave: Sweave
14 | LaTeX: pdfLaTeX
15 | 
16 | BuildType: Package
17 | PackageUseDevtools: Yes
18 | PackageInstallArgs: --no-multiarch --with-keep.source
19 | PackageCheckArgs: --as-cran
20 | PackageRoxygenize: rd,collate,namespace
21 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM rocker/r-base
 2 | 
 3 | RUN apt-get update
 4 | RUN apt-get install -y --no-install-recommends \
 5 |     curl \
 6 |     libcurl4-openssl-dev \
 7 |     libssl-dev \
 8 |     libxml2-dev
 9 | 
10 | # get aws cli
11 | RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" \
12 |     && unzip awscliv2.zip \
13 |     && ./aws/install
14 | 
15 | # Install noctua
16 | RUN Rscript -e "install.packages(c('paws', 'noctua'), repos = c(pawsr = 'https://paws-r-builds.s3.amazonaws.com/packages/latest/', CRAN = 'https://cloud.r-project.org'))"
17 | 
18 | CMD ["R"]
19 | 


--------------------------------------------------------------------------------
/docs/articles/how_to_retry_files/header-attrs-2.6/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/how_to_retry_files/header-attrs-2.7/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/how_to_retry_files/header-attrs-2.9/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/man/dbplyr_edition.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dplyr_integration.R
 3 | \name{dbplyr_edition}
 4 | \alias{dbplyr_edition}
 5 | \alias{dbplyr_edition.AthenaConnection}
 6 | \title{Declare which version of dbplyr API is being called.}
 7 | \usage{
 8 | dbplyr_edition.AthenaConnection(con)
 9 | }
10 | \arguments{
11 | \item{con}{A \link{dbConnect} object, as returned by \code{dbConnect()}}
12 | }
13 | \value{
14 | Integer for which version of \code{dbplyr} is going to be used.
15 | }
16 | \description{
17 | Declare which version of dbplyr API is being called.
18 | }
19 | 


--------------------------------------------------------------------------------
/docs/articles/aws_s3_backend_files/header-attrs-2.10/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/aws_s3_backend_files/header-attrs-2.11/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/aws_s3_backend_files/header-attrs-2.6/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/aws_s3_backend_files/header-attrs-2.7/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/aws_s3_backend_files/header-attrs-2.9/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/getting_started_files/header-attrs-2.10/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/getting_started_files/header-attrs-2.11/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/getting_started_files/header-attrs-2.6/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/getting_started_files/header-attrs-2.7/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/getting_started_files/header-attrs-2.9/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/how_to_retry_files/header-attrs-2.10/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/how_to_retry_files/header-attrs-2.11/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/aws_athena_unload_files/header-attrs-2.11/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/convert_and_save_cost_files/header-attrs-2.10/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/convert_and_save_cost_files/header-attrs-2.11/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/convert_and_save_cost_files/header-attrs-2.6/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/convert_and_save_cost_files/header-attrs-2.7/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/convert_and_save_cost_files/header-attrs-2.9/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/aws_athena_query_caching_files/header-attrs-2.10/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/aws_athena_query_caching_files/header-attrs-2.11/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/aws_athena_query_caching_files/header-attrs-2.6/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/aws_athena_query_caching_files/header-attrs-2.7/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/aws_athena_query_caching_files/header-attrs-2.9/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/changing_backend_file_parser_files/header-attrs-2.10/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/changing_backend_file_parser_files/header-attrs-2.11/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/changing_backend_file_parser_files/header-attrs-2.6/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/changing_backend_file_parser_files/header-attrs-2.7/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/changing_backend_file_parser_files/header-attrs-2.9/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/tests/testthat/test-arn.R:
--------------------------------------------------------------------------------
 1 | context("ARN Connection")
 2 | 
 3 | # NOTE System variable format returned for Unit tests:
 4 | # Sys.getenv("noctua_arn"): "arn:aws:sts::123456789012:assumed-role/role_name/role_session_name"
 5 | # Sys.getenv("noctua_s3_query"): "s3://path/to/query/bucket/"
 6 | # Sys.getenv("noctua_s3_tbl"): "s3://path/to/bucket/"
 7 | 
 8 | test_that("Check connection to Athena using ARN",{
 9 |   skip_if_no_env()
10 |   # Test connection is using AWS CLI to set profile_name 
11 |   con <- dbConnect(athena(),
12 |                    role_arn = Sys.getenv("noctua_arn"),
13 |                    duration_seconds = 1000)
14 |   
15 |   output <- dbGetQuery(con, "show Databases")
16 |   expect_equal(any(grepl("default", output)), TRUE)
17 | })
18 | 


--------------------------------------------------------------------------------
/tests/testthat/test-region.R:
--------------------------------------------------------------------------------
 1 | context("Region parsed")
 2 | 
 3 | # NOTE System variable format returned for Unit tests:
 4 | # Sys.getenv("noctua_arn"): "arn:aws:sts::123456789012:assumed-role/role_name/role_session_name"
 5 | # Sys.getenv("noctua_s3_query"): "s3://path/to/query/bucket/"
 6 | # Sys.getenv("noctua_s3_tbl"): "s3://path/to/bucket/"
 7 | 
 8 | test_that("Check if region is passed correctly in dbConnect",{
 9 |   skip_if_no_env()
10 |   con1 <- dbConnect(athena())
11 |   con2 <- dbConnect(athena(), region = "us-east-1")
12 | 
13 |   expect_equal(con1@ptr$Athena$.internal$config$region[1], con1@info$region_name)
14 |   expect_equal(con2@info$region_name, "us-east-1")
15 |   expect_equal(con2@ptr$Athena$.internal$config$region[1], "us-east-1")
16 | })
17 | 


--------------------------------------------------------------------------------
/man/db_desc.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dplyr_integration.R
 3 | \name{db_desc}
 4 | \alias{db_desc}
 5 | \alias{db_desc.AthenaConnection}
 6 | \title{S3 implementation of \code{db_desc} for Athena (api version 1).}
 7 | \usage{
 8 | db_desc.AthenaConnection(x)
 9 | }
10 | \arguments{
11 | \item{x}{A \link{dbConnect} object, as returned by \code{dbConnect()}}
12 | }
13 | \value{
14 | Character variable containing Meta Data about query sent to Athena. The Meta Data is returned in the following format:
15 | 
16 | \code{"Athena <paws version> [<profile_name>@region/database]"}
17 | }
18 | \description{
19 | This is a backend function for dplyr to retrieve meta data about Athena queries. Users won't be required to access and run this function.
20 | }
21 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
 1 | ## Submission
 2 | This release contains a couple of bug fixes.
 3 | 
 4 | ## Test environments
 5 | * local OS X install, 4.2.0
 6 | * rhub: windows-x86_64-devel, ubuntu-gcc-release, fedora-clang-devel
 7 | 
 8 | ## R CMD check results (local)
 9 | 0 errors ✓ | 0 warnings ✓ | 0 notes ✓
10 | 
11 | ## R devtools::check_rhub() results
12 | 0 errors ✓ | 0 warnings ✓ | 0 notes ✓
13 | 
14 | **Side note:** ran devtools::check_rhub with following environment variables:
15 | ```
16 | devtools::check_rhub(
17 |   env_vars=c(
18 |     "R_COMPILE_AND_INSTALL_PACKAGES" = "always",
19 |     "LIBARROW_BINARY"="true",
20 |     "_R_CHECK_LENGTH_1_CONDITION_"="abort,verbose",
21 |     "_R_CHECK_LENGTH_1_LOGIC2_"="abort,verbose"
22 |   )
23 | )
24 | ```
25 | 
26 | ## unit tests (using testthat) results
27 | [ FAIL 0 | WARN 0 | SKIP 0 | PASS 331 ]
28 | 


--------------------------------------------------------------------------------
/docs/link.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <!-- Generator: Adobe Illustrator 19.2.1, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
 3 | <svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
 4 | 	 viewBox="0 0 20 20" style="enable-background:new 0 0 20 20;" xml:space="preserve">
 5 | <style type="text/css">
 6 | 	.st0{fill:#75AADB;}
 7 | </style>
 8 | <path class="st0" d="M4,11.3h1.3v1.3H4c-2,0-4-2.3-4-4.7s2.1-4.7,4-4.7h5.3c1.9,0,4,2.3,4,4.7c0,1.9-1.2,3.6-2.7,4.3v-1.5
 9 | 	C11.4,10.2,12,9.1,12,8c0-1.7-1.4-3.3-2.7-3.3H4C2.7,4.7,1.3,6.3,1.3,8S2.7,11.3,4,11.3z M16,7.3h-1.3v1.3H16c1.3,0,2.7,1.6,2.7,3.3
10 | 	s-1.4,3.3-2.7,3.3h-5.3C9.4,15.3,8,13.7,8,12c0-1.1,0.6-2.2,1.3-2.8V7.7C7.9,8.4,6.7,10.1,6.7,12c0,2.4,2.1,4.7,4,4.7H16
11 | 	c1.9,0,4-2.3,4-4.7S18,7.3,16,7.3z"/>
12 | </svg>
13 | 


--------------------------------------------------------------------------------
/tests/testthat/test-classes.R:
--------------------------------------------------------------------------------
 1 | context("classes")
 2 | 
 3 | # NOTE System variable format returned for Unit tests:
 4 | # Sys.getenv("noctua_arn"): "arn:aws:sts::123456789012:assumed-role/role_name/role_session_name"
 5 | # Sys.getenv("noctua_s3_query"): "s3://path/to/query/bucket/"
 6 | # Sys.getenv("noctua_s3_tbl"): "s3://path/to/bucket/"
 7 | 
 8 | test_that("Testing class formation", {
 9 |   skip_if_no_env()
10 |   # Test connection is using AWS CLI to set profile_name 
11 |   con <- dbConnect(athena())
12 | 
13 |   res <- dbSendQuery(con, "show databases")
14 |   DBI::dbClearResult(res)
15 | 
16 |   # testing components of s4 class
17 |   expect_identical(names(attributes(con)), c("ptr", "info","quote","class"))
18 |   expect_identical(names(attributes(res)), c("connection", "info", "class"))
19 |   expect_s4_class(con,"AthenaConnection")
20 |   expect_s4_class(res,"AthenaResult")
21 | })
22 | 


--------------------------------------------------------------------------------
/man/db_connection_describe.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dplyr_integration.R
 3 | \name{db_connection_describe}
 4 | \alias{db_connection_describe}
 5 | \alias{db_connection_describe.AthenaConnection}
 6 | \title{S3 implementation of \code{db_connection_describe} for Athena (api version 2).}
 7 | \usage{
 8 | db_connection_describe.AthenaConnection(con)
 9 | }
10 | \arguments{
11 | \item{con}{A \link{dbConnect} object, as returned by \code{dbConnect()}}
12 | }
13 | \value{
14 | Character variable containing Meta Data about query sent to Athena. The Meta Data is returned in the following format:
15 | 
16 | \code{"Athena <paws version> [<profile_name>@region/database]"}
17 | }
18 | \description{
19 | This is a backend function for dplyr to retrieve meta data about Athena queries. Users won't be required to access and run this function.
20 | }
21 | 


--------------------------------------------------------------------------------
/tests/testthat/test-endpoint-override.R:
--------------------------------------------------------------------------------
 1 | test_that("Test set aws service endpoints", {
 2 |   
 3 |   expect_equal(set_endpoints(NULL),list())
 4 |   expect_equal(set_endpoints("dummy"),list(athena = "dummy"))
 5 |   expect_equal(set_endpoints(list(Athena="dummy")),list(athena = "dummy"))
 6 |   expect_equal(
 7 |     set_endpoints(list(athena="dummy.athena", s3="dummy.s3")), 
 8 |     list(athena="dummy.athena", s3="dummy.s3")
 9 |   )
10 |   expect_equal(
11 |     set_endpoints(list(athena="dummy.athena", s3="dummy.s3", glue = "dummy.glue")), 
12 |     list(athena="dummy.athena", s3="dummy.s3", glue = "dummy.glue")
13 |   )
14 | })
15 | 
16 | test_that("Test unsupported aws services", {
17 |   expect_error(
18 |     set_endpoints(list(dummy="dummy")), 
19 |     "The named list"
20 |   )
21 |   expect_error(
22 |     set_endpoints(list()), 
23 |     "endpoint_override needed to be a named list or character"
24 |   )
25 | })
26 | 


--------------------------------------------------------------------------------
/man/AthenaPreviewObject.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/View.R
 3 | \name{AthenaPreviewObject}
 4 | \alias{AthenaPreviewObject}
 5 | \title{Preview the data in an object.}
 6 | \usage{
 7 | AthenaPreviewObject(connection, rowLimit, ...)
 8 | }
 9 | \arguments{
10 | \item{connection}{A connection object, as returned by \code{dbConnect()}.}
11 | 
12 | \item{rowLimit}{The maximum number of rows to display.}
13 | 
14 | \item{...}{Parameters specifying the object.}
15 | }
16 | \value{
17 | A data frame containing the data in the object.
18 | }
19 | \description{
20 | Return the data inside an object as a data frame.
21 | }
22 | \details{
23 | The object to previewed must be specified as one of the arguments
24 | (e.g. \code{table = "employees"}); depending on the driver and underlying
25 | data store, additional specification arguments may be required.
26 | }
27 | \keyword{internal}
28 | 


--------------------------------------------------------------------------------
/tests/testthat/test-retry.R:
--------------------------------------------------------------------------------
 1 | context("Testing retry function")
 2 | 
 3 | test_that("Check if retry_api is working as intended",{
 4 |   skip_if_no_env()
 5 |   # create a function that is designed to fail so many times
 6 |   fail_env <- new.env()
 7 |   fail_env$i <- 1
 8 |   
 9 |   fail_function <- function(i, j){
10 |     if (i > j) i <- 1
11 |     result <- (i == j)
12 |     fail_env$i <- i + 1
13 |     if(!result) stop(i, " does not equal 2")
14 |     return(TRUE)
15 |   }
16 |   
17 |   # this function will fail twice before succeeding
18 |   expect_true(retry_api_call(fail_function(fail_env$i , 3)))
19 |   
20 |   # stop noctua retrying and expect error
21 |   noctua_options(retry = 0)
22 |   expect_error(retry_api_call(fail_function(fail_env$i, 3)))
23 |   
24 |   expect_error(noctua_options(retry = - 10))
25 |   expect_warning(noctua_options(retry_quiet = "blah"))
26 |   expect_error(noctua_options(verbose = "blah"))
27 | })
28 | 


--------------------------------------------------------------------------------
/man/sql_translate_env.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sql_translate_env.R
 3 | \name{sql_translate_env}
 4 | \alias{sql_translate_env}
 5 | \alias{sql_translation.AthenaConnection}
 6 | \alias{sql_translate_env.AthenaConnection}
 7 | \title{AWS Athena backend dbplyr version 1 and 2}
 8 | \usage{
 9 | sql_translation.AthenaConnection(con)
10 | 
11 | sql_translate_env.AthenaConnection(con)
12 | }
13 | \arguments{
14 | \item{con}{An \code{\linkS4class{AthenaConnection}} object, produced by
15 | \code{\link[DBI:dbConnect]{DBI::dbConnect()}}}
16 | }
17 | \description{
18 | Create s3 implementation of \code{sql_translate_env} for AWS Athena sql translate environment based off
19 | \href{https://docs.aws.amazon.com/athena/latest/ug/data-types.html}{Athena Data Types} and
20 | \href{https://docs.aws.amazon.com/athena/latest/ug/functions-operators-reference-section.html}{DML Queries, Functions, and Operators}
21 | }
22 | 


--------------------------------------------------------------------------------
/man/AthenaListObjects.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/View.R
 3 | \name{AthenaListObjects}
 4 | \alias{AthenaListObjects}
 5 | \title{List objects in a connection.}
 6 | \usage{
 7 | AthenaListObjects(connection, ...)
 8 | }
 9 | \arguments{
10 | \item{connection}{A connection object, as returned by \code{dbConnect()}.}
11 | 
12 | \item{...}{Attributes to filter by.}
13 | }
14 | \value{
15 | A data frame with \code{name} and \code{type} columns, listing the
16 | objects.
17 | }
18 | \description{
19 | Lists all of the objects in the connection, or all the objects which have
20 | specific attributes.
21 | }
22 | \details{
23 | When used without parameters, this function returns all of the objects known
24 | by the connection. Any parameters passed will filter the list to only objects
25 | which have the given attributes; for instance, passing \code{schema = "foo"}
26 | will return only objects matching the schema \code{foo}.
27 | }
28 | \keyword{internal}
29 | 


--------------------------------------------------------------------------------
/man/AthenaDriver.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Driver.R
 3 | \docType{class}
 4 | \name{AthenaDriver}
 5 | \alias{AthenaDriver}
 6 | \alias{AthenaDriver-class}
 7 | \alias{show,AthenaDriver-method}
 8 | \alias{dbDataType,AthenaDriver,ANY-method}
 9 | \alias{dbDataType,AthenaDriver,list-method}
10 | \title{Athena Driver Methods}
11 | \usage{
12 | \S4method{show}{AthenaDriver}(object)
13 | 
14 | \S4method{dbDataType}{AthenaDriver,ANY}(dbObj, obj, ...)
15 | 
16 | \S4method{dbDataType}{AthenaDriver,list}(dbObj, obj, ...)
17 | }
18 | \arguments{
19 | \item{object}{Any R object}
20 | 
21 | \item{dbObj}{A object inheriting from \link[DBI:DBIDriver-class]{DBI::DBIDriver} or \link[DBI:DBIConnection-class]{DBI::DBIConnection}.}
22 | 
23 | \item{obj}{An R object whose SQL type we want to determine.}
24 | 
25 | \item{...}{Other arguments passed on to methods.}
26 | }
27 | \description{
28 | Implementations of pure virtual functions defined in the \code{DBI} package
29 | for AthenaDriver objects.
30 | }
31 | \keyword{internal}
32 | 


--------------------------------------------------------------------------------
/R/noctua.R:
--------------------------------------------------------------------------------
 1 | #' noctua: a DBI interface into Athena using paws SDK
 2 | #' 
 3 | #' noctua provides a seamless DBI interface into Athena using the R package 
 4 | #' \href{https://github.com/paws-r/paws}{paws}. 
 5 | #' 
 6 | #' @section Goal of Package:
 7 | #' The goal of the \code{noctua} package is to provide a DBI-compliant interface to \href{https://aws.amazon.com/athena/}{Amazon’s Athena}
 8 | #' using \code{paws} software development kit (SDK). This allows for an efficient, easy setup connection to Athena using the \code{paws} SDK as a driver.
 9 | #' 
10 | #' @section AWS Command Line Interface:
11 | #' As noctua is using \code{paws} as it's backend, \href{https://aws.amazon.com/cli/}{AWS Command Line Interface (AWS CLI)} can be used
12 | #' to remove user credentials when interacting with Athena.
13 | #' 
14 | #' This allows AWS profile names to be set up so that noctua can connect to different accounts from the same machine,
15 | #' without needing hard code any credentials.
16 | #'
17 | #' @import paws
18 | #' @importFrom utils packageVersion head
19 | #' @importFrom stats runif
20 | #' @import data.table
21 | #' @import DBI
22 | "_PACKAGE"
23 | 


--------------------------------------------------------------------------------
/docs/articles/aws_s3_backend_files/anchor-sections-1.0/anchor-sections.js:
--------------------------------------------------------------------------------
 1 | // Anchor sections v1.0 written by Atsushi Yasumoto on Oct 3rd, 2020.
 2 | document.addEventListener('DOMContentLoaded', function() {
 3 |   // Do nothing if AnchorJS is used
 4 |   if (typeof window.anchors === 'object' && anchors.hasOwnProperty('hasAnchorJSLink')) {
 5 |     return;
 6 |   }
 7 | 
 8 |   const h = document.querySelectorAll('h1, h2, h3, h4, h5, h6');
 9 | 
10 |   // Do nothing if sections are already anchored
11 |   if (Array.from(h).some(x => x.classList.contains('hasAnchor'))) {
12 |     return null;
13 |   }
14 | 
15 |   // Use section id when pandoc runs with --section-divs
16 |   const section_id = function(x) {
17 |     return ((x.classList.contains('section') || (x.tagName === 'SECTION'))
18 |             ? x.id : '');
19 |   };
20 | 
21 |   // Add anchors
22 |   h.forEach(function(x) {
23 |     const id = x.id || section_id(x.parentElement);
24 |     if (id === '') {
25 |       return null;
26 |     }
27 |     let anchor = document.createElement('a');
28 |     anchor.href = '#' + id;
29 |     anchor.classList = ['anchor-section'];
30 |     x.classList.add('hasAnchor');
31 |     x.appendChild(anchor);
32 |   });
33 | });
34 | 


--------------------------------------------------------------------------------
/docs/articles/how_to_retry_files/anchor-sections-1.0/anchor-sections.js:
--------------------------------------------------------------------------------
 1 | // Anchor sections v1.0 written by Atsushi Yasumoto on Oct 3rd, 2020.
 2 | document.addEventListener('DOMContentLoaded', function() {
 3 |   // Do nothing if AnchorJS is used
 4 |   if (typeof window.anchors === 'object' && anchors.hasOwnProperty('hasAnchorJSLink')) {
 5 |     return;
 6 |   }
 7 | 
 8 |   const h = document.querySelectorAll('h1, h2, h3, h4, h5, h6');
 9 | 
10 |   // Do nothing if sections are already anchored
11 |   if (Array.from(h).some(x => x.classList.contains('hasAnchor'))) {
12 |     return null;
13 |   }
14 | 
15 |   // Use section id when pandoc runs with --section-divs
16 |   const section_id = function(x) {
17 |     return ((x.classList.contains('section') || (x.tagName === 'SECTION'))
18 |             ? x.id : '');
19 |   };
20 | 
21 |   // Add anchors
22 |   h.forEach(function(x) {
23 |     const id = x.id || section_id(x.parentElement);
24 |     if (id === '') {
25 |       return null;
26 |     }
27 |     let anchor = document.createElement('a');
28 |     anchor.href = '#' + id;
29 |     anchor.classList = ['anchor-section'];
30 |     x.classList.add('hasAnchor');
31 |     x.appendChild(anchor);
32 |   });
33 | });
34 | 


--------------------------------------------------------------------------------
/docs/articles/getting_started_files/anchor-sections-1.0/anchor-sections.js:
--------------------------------------------------------------------------------
 1 | // Anchor sections v1.0 written by Atsushi Yasumoto on Oct 3rd, 2020.
 2 | document.addEventListener('DOMContentLoaded', function() {
 3 |   // Do nothing if AnchorJS is used
 4 |   if (typeof window.anchors === 'object' && anchors.hasOwnProperty('hasAnchorJSLink')) {
 5 |     return;
 6 |   }
 7 | 
 8 |   const h = document.querySelectorAll('h1, h2, h3, h4, h5, h6');
 9 | 
10 |   // Do nothing if sections are already anchored
11 |   if (Array.from(h).some(x => x.classList.contains('hasAnchor'))) {
12 |     return null;
13 |   }
14 | 
15 |   // Use section id when pandoc runs with --section-divs
16 |   const section_id = function(x) {
17 |     return ((x.classList.contains('section') || (x.tagName === 'SECTION'))
18 |             ? x.id : '');
19 |   };
20 | 
21 |   // Add anchors
22 |   h.forEach(function(x) {
23 |     const id = x.id || section_id(x.parentElement);
24 |     if (id === '') {
25 |       return null;
26 |     }
27 |     let anchor = document.createElement('a');
28 |     anchor.href = '#' + id;
29 |     anchor.classList = ['anchor-section'];
30 |     x.classList.add('hasAnchor');
31 |     x.appendChild(anchor);
32 |   });
33 | });
34 | 


--------------------------------------------------------------------------------
/docs/articles/aws_athena_query_caching_files/anchor-sections-1.0/anchor-sections.js:
--------------------------------------------------------------------------------
 1 | // Anchor sections v1.0 written by Atsushi Yasumoto on Oct 3rd, 2020.
 2 | document.addEventListener('DOMContentLoaded', function() {
 3 |   // Do nothing if AnchorJS is used
 4 |   if (typeof window.anchors === 'object' && anchors.hasOwnProperty('hasAnchorJSLink')) {
 5 |     return;
 6 |   }
 7 | 
 8 |   const h = document.querySelectorAll('h1, h2, h3, h4, h5, h6');
 9 | 
10 |   // Do nothing if sections are already anchored
11 |   if (Array.from(h).some(x => x.classList.contains('hasAnchor'))) {
12 |     return null;
13 |   }
14 | 
15 |   // Use section id when pandoc runs with --section-divs
16 |   const section_id = function(x) {
17 |     return ((x.classList.contains('section') || (x.tagName === 'SECTION'))
18 |             ? x.id : '');
19 |   };
20 | 
21 |   // Add anchors
22 |   h.forEach(function(x) {
23 |     const id = x.id || section_id(x.parentElement);
24 |     if (id === '') {
25 |       return null;
26 |     }
27 |     let anchor = document.createElement('a');
28 |     anchor.href = '#' + id;
29 |     anchor.classList = ['anchor-section'];
30 |     x.classList.add('hasAnchor');
31 |     x.appendChild(anchor);
32 |   });
33 | });
34 | 


--------------------------------------------------------------------------------
/docs/articles/convert_and_save_cost_files/anchor-sections-1.0/anchor-sections.js:
--------------------------------------------------------------------------------
 1 | // Anchor sections v1.0 written by Atsushi Yasumoto on Oct 3rd, 2020.
 2 | document.addEventListener('DOMContentLoaded', function() {
 3 |   // Do nothing if AnchorJS is used
 4 |   if (typeof window.anchors === 'object' && anchors.hasOwnProperty('hasAnchorJSLink')) {
 5 |     return;
 6 |   }
 7 | 
 8 |   const h = document.querySelectorAll('h1, h2, h3, h4, h5, h6');
 9 | 
10 |   // Do nothing if sections are already anchored
11 |   if (Array.from(h).some(x => x.classList.contains('hasAnchor'))) {
12 |     return null;
13 |   }
14 | 
15 |   // Use section id when pandoc runs with --section-divs
16 |   const section_id = function(x) {
17 |     return ((x.classList.contains('section') || (x.tagName === 'SECTION'))
18 |             ? x.id : '');
19 |   };
20 | 
21 |   // Add anchors
22 |   h.forEach(function(x) {
23 |     const id = x.id || section_id(x.parentElement);
24 |     if (id === '') {
25 |       return null;
26 |     }
27 |     let anchor = document.createElement('a');
28 |     anchor.href = '#' + id;
29 |     anchor.classList = ['anchor-section'];
30 |     x.classList.add('hasAnchor');
31 |     x.appendChild(anchor);
32 |   });
33 | });
34 | 


--------------------------------------------------------------------------------
/docs/articles/changing_backend_file_parser_files/anchor-sections-1.0/anchor-sections.js:
--------------------------------------------------------------------------------
 1 | // Anchor sections v1.0 written by Atsushi Yasumoto on Oct 3rd, 2020.
 2 | document.addEventListener('DOMContentLoaded', function() {
 3 |   // Do nothing if AnchorJS is used
 4 |   if (typeof window.anchors === 'object' && anchors.hasOwnProperty('hasAnchorJSLink')) {
 5 |     return;
 6 |   }
 7 | 
 8 |   const h = document.querySelectorAll('h1, h2, h3, h4, h5, h6');
 9 | 
10 |   // Do nothing if sections are already anchored
11 |   if (Array.from(h).some(x => x.classList.contains('hasAnchor'))) {
12 |     return null;
13 |   }
14 | 
15 |   // Use section id when pandoc runs with --section-divs
16 |   const section_id = function(x) {
17 |     return ((x.classList.contains('section') || (x.tagName === 'SECTION'))
18 |             ? x.id : '');
19 |   };
20 | 
21 |   // Add anchors
22 |   h.forEach(function(x) {
23 |     const id = x.id || section_id(x.parentElement);
24 |     if (id === '') {
25 |       return null;
26 |     }
27 |     let anchor = document.createElement('a');
28 |     anchor.href = '#' + id;
29 |     anchor.classList = ['anchor-section'];
30 |     x.classList.add('hasAnchor');
31 |     x.appendChild(anchor);
32 |   });
33 | });
34 | 


--------------------------------------------------------------------------------
/man/backend_dbplyr_v1.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dplyr_integration.R
 3 | \name{backend_dbplyr_v1}
 4 | \alias{backend_dbplyr_v1}
 5 | \alias{db_explain.AthenaConnection}
 6 | \alias{db_query_fields.AthenaConnection}
 7 | \title{Athena S3 implementation of dbplyr backend functions (api version 1).}
 8 | \usage{
 9 | db_explain.AthenaConnection(con, sql, ...)
10 | 
11 | db_query_fields.AthenaConnection(con, sql, ...)
12 | }
13 | \arguments{
14 | \item{con}{A \link{dbConnect} object, as returned by \code{dbConnect()}}
15 | 
16 | \item{sql}{SQL code to be sent to AWS Athena}
17 | 
18 | \item{...}{other parameters, currently not implemented}
19 | }
20 | \value{
21 | \describe{
22 | \item{db_explain}{Returns \href{https://docs.aws.amazon.com/athena/latest/ug/athena-explain-statement.html}{AWS Athena explain statement}}
23 | \item{db_query_fields}{Returns sql query column names}
24 | }
25 | }
26 | \description{
27 | These functions are used to build the different types of SQL queries.
28 | The AWS Athena implementation give extra parameters to allow access the to standard DBI Athena methods. They also
29 | utilise AWS Glue to speed up sql query execution.
30 | }
31 | \keyword{internal}
32 | 


--------------------------------------------------------------------------------
/.github/issue_template.md:
--------------------------------------------------------------------------------
 1 | <!--
 2 |   If you are wishing to request a new feature please ignore the following (and feel free to delete the below). Issues please try to use the template as it will help me get to the problem faster. 
 3 | 
 4 | Please remove all the extra text.
 5 | -->
 6 |   
 7 |   ### Issue Description
 8 |   <!--Example: `dbGetQuery()` returns incorrect timestamps.-->
 9 |   
10 |   ### Reproducible Example
11 |   <!--
12 |   Please include a small code example, if you can please use open source data i.e. `iris`, `mtcars` etc... To demostrate your issue.
13 | 
14 | If you struggle with markdown and formatting please use the:
15 |   
16 |   `reprex` package to help `install.packages("reprex")`
17 | 
18 | https://github.com/tidyverse/reprex#what-is-a-reprex
19 | 
20 | Example:
21 |   ```r
22 | library(noctua)
23 | library(DBI)
24 | con <- dbConnect(noctua::athena(),
25 |                  profile_name = "noctua")
26 | 
27 | dbWriteTable(con, "mtcars", mtcars, s3.location = Sys.getenv("my_s3_bucket"))
28 | 
29 | dbGetQuery(con, "select mpg, cyl, disp, hp from mtcars")
30 | ```
31 | 
32 | **NOTE:** Please don't include your AWS credentials!
33 | -->
34 | <details>
35 | <summary>Session Info</summary>
36 | 
37 | ```r
38 | devtools::session_info()
39 | #> output
40 | ```
41 | </details>
42 | 


--------------------------------------------------------------------------------
/tests/testthat/test-exist-remove.R:
--------------------------------------------------------------------------------
 1 | context("Exist/Remove")
 2 | 
 3 | # NOTE System variable format returned for Unit tests:
 4 | # Sys.getenv("noctua_arn"): "arn:aws:sts::123456789012:assumed-role/role_name/role_session_name"
 5 | # Sys.getenv("noctua_s3_query"): "s3://path/to/query/bucket/"
 6 | # Sys.getenv("noctua_s3_tbl"): "s3://path/to/bucket/"
 7 | 
 8 | s3.location <- paste0(Sys.getenv("noctua_s3_tbl"),"removable_table/")
 9 | 
10 | test_that("Check a table exist and remove table",{
11 |   skip_if_no_env()
12 |   # Test connection is using AWS CLI to set profile_name 
13 |   con <- dbConnect(athena())
14 |   
15 |   if(dbExistsTable(con, "removable_table"))
16 |     dbRemoveTable(con, "removable_table", confirm = TRUE)
17 |   
18 |   table_exist1 <- dbExistsTable(con, "removable_table")
19 |   
20 |   df <- data.frame(x = 1:10, y = letters[1:10], stringsAsFactors = F)
21 |   
22 |   dbWriteTable(con, "removable_table", df, s3.location = s3.location)
23 |   
24 |   table_exist2 <- dbExistsTable(con, "removable_table")
25 |   
26 |   dbRemoveTable(con, "removable_table", confirm = TRUE)
27 |   
28 |   table_exist3 <- dbExistsTable(con, "removable_table")  
29 |   
30 |   expect_equal(table_exist1, FALSE)
31 |   expect_equal(table_exist2, TRUE)
32 |   expect_equal(table_exist3, FALSE)
33 | })
34 | 


--------------------------------------------------------------------------------
/tests/testthat/test-dplyr-copy_to.R:
--------------------------------------------------------------------------------
 1 | context("dplyr copy_to")
 2 | 
 3 | # NOTE System variable format returned for Unit tests:
 4 | # Sys.getenv("noctua_arn"): "arn:aws:sts::123456789012:assumed-role/role_name/role_session_name"
 5 | # Sys.getenv("noctua_s3_query"): "s3://path/to/query/bucket/"
 6 | # Sys.getenv("noctua_s3_tbl"): "s3://path/to/bucket/"
 7 | 
 8 | test_that("Check noctua s3 dplyr copy_to method",{
 9 |   skip_if_no_env()
10 |   skip_if_package_not_avialable("dplyr")
11 |   
12 |   library(dplyr)
13 |   # Test connection is using AWS CLI to set profile_name 
14 |   con <- dbConnect(athena())
15 |   
16 |   # creates Athena table and returns tbl_sql
17 |   athena_mtcars <- copy_to(con, mtcars, s3_location = Sys.getenv("noctua_s3_tbl"), compress = T, overwrite = T, temporary = F)
18 |   mtcars_filter <- athena_mtcars %>% filter(gear >=4)
19 |   tbl_result <- is.tbl(mtcars_filter)
20 |   # create another Athena table
21 |   copy_to(con, mtcars_filter, temporary = F)
22 |   
23 |   result1 <- dbExistsTable(con, "mtcars")
24 |   result2 <- dbExistsTable(con, "mtcars_filter")
25 |   
26 |   # clean up athena
27 |   dbRemoveTable(con, "mtcars", confirm = TRUE)
28 |   dbRemoveTable(con, "mtcars_filter", confirm = TRUE)
29 |   
30 |   expect_true(tbl_result)
31 |   expect_true(result1)
32 |   expect_true(result2)
33 | })
34 | 


--------------------------------------------------------------------------------
/tests/testthat/test-append-data.R:
--------------------------------------------------------------------------------
 1 | context("Append to Existing")
 2 | 
 3 | # NOTE System variable format returned for Unit tests:
 4 | # Sys.getenv("noctua_arn"): "arn:aws:sts::123456789012:assumed-role/role_name/role_session_name"
 5 | # Sys.getenv("noctua_s3_query"): "s3://path/to/query/bucket/"
 6 | # Sys.getenv("noctua_s3_tbl"): "s3://path/to/bucket/"
 7 | 
 8 | test_that("Testing if data is appended correctly", {
 9 |   skip_if_no_env()
10 |   
11 |   # Test connection is using AWS CLI to set profile_name 
12 |   con <- dbConnect(athena(),
13 |                    s3_staging_dir = Sys.getenv("noctua_s3_query"))
14 |   
15 |   DATE <- Sys.Date()
16 |   dbWriteTable(con, "mtcars", mtcars, overwrite = T, compress = T,
17 |                partition = c("timesTamp" = format(DATE, "%Y%m%d")))
18 |   
19 |   # don't specify to send data compressed
20 |   expect_warning(dbWriteTable(con, "mtcars", mtcars, append = T, file.type = "parquet",
21 |                               partition = c("timesTamp" = format(DATE+1, "%Y%m%d"))))
22 | 
23 |   dt <- dbGetQuery(con, "select timestamp, cast(count(*) as integer) as n from mtcars group by 1 order by 1")
24 |   
25 |   exp_dt <- data.table(timestamp = c(format(DATE, "%Y%m%d"), format(DATE+1, "%Y%m%d")),
26 |                        n = as.integer(c(32,32)))
27 |   
28 |   expect_equal(dt, exp_dt)
29 | })


--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 |   release:
 9 |     types: [published]
10 |   workflow_dispatch:
11 | 
12 | name: pkgdown
13 | 
14 | jobs:
15 |   pkgdown:
16 |     runs-on: ubuntu-latest
17 |     # Only restrict concurrency for non-PR jobs
18 |     concurrency:
19 |       group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
20 |     env:
21 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
22 |     permissions:
23 |       contents: write
24 |     steps:
25 |       - uses: actions/checkout@v3
26 | 
27 |       - uses: r-lib/actions/setup-pandoc@v2
28 | 
29 |       - uses: r-lib/actions/setup-r@v2
30 |         with:
31 |           use-public-rspm: true
32 | 
33 |       - uses: r-lib/actions/setup-r-dependencies@v2
34 |         with:
35 |           extra-packages: any::pkgdown, local::.
36 |           needs: website
37 | 
38 |       - name: Build site
39 |         run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
40 |         shell: Rscript {0}
41 | 
42 |       - name: Deploy to GitHub pages 🚀
43 |         if: github.event_name != 'pull_request'
44 |         uses: JamesIves/github-pages-deploy-action@v4.4.1
45 |         with:
46 |           clean: false
47 |           branch: gh-pages
48 |           folder: docs
49 | 


--------------------------------------------------------------------------------
/man/noctua-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/noctua.R
 3 | \docType{package}
 4 | \name{noctua-package}
 5 | \alias{noctua}
 6 | \alias{noctua-package}
 7 | \title{noctua: a DBI interface into Athena using paws SDK}
 8 | \description{
 9 | noctua provides a seamless DBI interface into Athena using the R package
10 | \href{https://github.com/paws-r/paws}{paws}.
11 | }
12 | \section{Goal of Package}{
13 | 
14 | The goal of the \code{noctua} package is to provide a DBI-compliant interface to \href{https://aws.amazon.com/athena/}{Amazon’s Athena}
15 | using \code{paws} software development kit (SDK). This allows for an efficient, easy setup connection to Athena using the \code{paws} SDK as a driver.
16 | }
17 | 
18 | \section{AWS Command Line Interface}{
19 | 
20 | As noctua is using \code{paws} as it's backend, \href{https://aws.amazon.com/cli/}{AWS Command Line Interface (AWS CLI)} can be used
21 | to remove user credentials when interacting with Athena.
22 | 
23 | This allows AWS profile names to be set up so that noctua can connect to different accounts from the same machine,
24 | without needing hard code any credentials.
25 | }
26 | 
27 | \seealso{
28 | Useful links:
29 | \itemize{
30 |   \item \url{https://dyfanjones.github.io/noctua/}
31 |   \item \url{https://github.com/DyfanJones/noctua}
32 |   \item Report bugs at \url{https://github.com/DyfanJones/noctua/issues}
33 | }
34 | 
35 | }
36 | \author{
37 | \strong{Maintainer}: Dyfan Jones \email{dyfan.r.jones@gmail.com}
38 | 
39 | }
40 | 


--------------------------------------------------------------------------------
/tests/testthat/test-dbDisconnect.R:
--------------------------------------------------------------------------------
 1 | context("Disconnect")
 2 | 
 3 | # NOTE System variable format returned for Unit tests:
 4 | # Sys.getenv("noctua_arn"): "arn:aws:sts::123456789012:assumed-role/role_name/role_session_name"
 5 | # Sys.getenv("noctua_s3_query"): "s3://path/to/query/bucket/"
 6 | # Sys.getenv("noctua_s3_tbl"): "s3://path/to/bucket/"
 7 | 
 8 | s3.location <- paste0(Sys.getenv("noctua_s3_tbl"),"removable_table/")
 9 | 
10 | test_that("Check if dbDisconnect working as intended",{
11 |   skip_if_no_env()
12 |   # Test connection is using AWS CLI to set profile_name 
13 |   con <- dbConnect(athena(),
14 |                    s3_staging_dir = Sys.getenv("noctua_s3_query"))
15 |   
16 |   res <- dbSendQuery(con, "select 'dummy'")
17 |   
18 |   dbDisconnect(con)
19 |   
20 |   df <- data.frame(x = 1:10, y = letters[1:10], stringsAsFactors = F)
21 |   
22 |   expect_equal(dbIsValid(con), FALSE)
23 |   expect_equal(dbIsValid(res), FALSE)
24 |   expect_error(dbGetQuery(con, "select dummy"), "Connection already closed.")
25 |   expect_error(dbFetch(res), "Result already cleared.")
26 |   expect_error(con_error_msg(con, "dummy message."), "dummy message.")
27 |   expect_error(dbExistsTable(con, "removable_table"))
28 |   expect_error(dbWriteTable(con, "removable_table", df, s3.location = s3.location))
29 |   expect_error(dbRemoveTable(con, "removable_table"))
30 |   expect_error(dbSendQuery(con, "select * removable_table"))
31 |   expect_error(dbExecute(con, "select * removable_table"))
32 |   expect_error(dbGetQuery(con, "select * reomovable_table"))
33 | })
34 | 


--------------------------------------------------------------------------------
/man/dbListTables.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Connection.R
 3 | \name{dbListTables}
 4 | \alias{dbListTables}
 5 | \alias{dbListTables,AthenaConnection-method}
 6 | \title{List Athena Tables}
 7 | \usage{
 8 | \S4method{dbListTables}{AthenaConnection}(conn, catalog = NULL, schema = NULL, ...)
 9 | }
10 | \arguments{
11 | \item{conn}{A \link[DBI:DBIConnection-class]{DBI::DBIConnection} object,
12 | as returned by \link[DBI:dbConnect]{dbConnect()}.}
13 | 
14 | \item{catalog}{Athena catalog, default set to NULL to return all tables from all Athena catalogs}
15 | 
16 | \item{schema}{Athena schema, default set to NULL to return all tables from all Athena schemas.
17 | Note: The use of DATABASE and SCHEMA is interchangeable within Athena.}
18 | 
19 | \item{...}{Other parameters passed on to methods.}
20 | }
21 | \value{
22 | \code{dbListTables()} returns a character vector with all the tables from Athena.
23 | }
24 | \description{
25 | Returns the unquoted names of Athena tables accessible through this connection.
26 | }
27 | \examples{
28 | \dontrun{
29 | # Note:
30 | # - Require AWS Account to run below example.
31 | # - Different connection methods can be used please see `noctua::dbConnect` documnentation
32 | 
33 | library(DBI)
34 | 
35 | # Demo connection to Athena using profile name
36 | con <- dbConnect(noctua::athena())
37 | 
38 | # Return list of tables in Athena
39 | dbListTables(con)
40 | 
41 | # Disconnect conenction
42 | dbDisconnect(con)
43 | }
44 | }
45 | \seealso{
46 | \code{\link[DBI]{dbListTables}}
47 | }
48 | 


--------------------------------------------------------------------------------
/tests/testthat/test-dplyr-compute.R:
--------------------------------------------------------------------------------
 1 | context("dplyr compute")
 2 | 
 3 | # NOTE System variable format returned for Unit tests:
 4 | # Sys.getenv("noctua_arn"): "arn:aws:sts::123456789012:assumed-role/role_name/role_session_name"
 5 | # Sys.getenv("noctua_s3_query"): "s3://path/to/query/bucket/"
 6 | # Sys.getenv("noctua_s3_tbl"): "s3://path/to/bucket/"
 7 | 
 8 | test_that("Check noctua s3 dplyr compute method", {
 9 |   skip_if_no_env()
10 |   skip_if_package_not_avialable("dplyr")
11 | 
12 |   library(dplyr)
13 |   # Test connection is using AWS CLI to set profile_name
14 |   con <- dbConnect(athena())
15 | 
16 |   # remove test tables if exist
17 |   if (dbExistsTable(con, "compute_tbl1")) {
18 |     dbRemoveTable(con, "compute_tbl1", confirm = TRUE)
19 |   }
20 |   if (dbExistsTable(con, "compute_tbl2")) {
21 |     dbRemoveTable(con, "compute_tbl2", confirm = TRUE)
22 |   }
23 | 
24 |   athena_tbl <- tbl(con, sql("SELECT * FROM INFORMATION_SCHEMA.TABLES"))
25 |   s3_uri = file.path(Sys.getenv("noctua_s3_tbl"), "compute_tbl/", fsep = "/")
26 | 
27 |   athena_tbl %>% compute("compute_tbl1", s3_location = s3_uri, temporary = F)
28 |   athena_tbl %>% compute("compute_tbl2", temporary = F)
29 | 
30 |   noctua_options(unload = T)
31 |   expect_error(athena_tbl %>% compute("compute_tbl2"))
32 |   noctua_options()
33 | 
34 |   result1 <- dbExistsTable(con, "compute_tbl1")
35 |   result2 <- dbExistsTable(con, "compute_tbl2")
36 | 
37 |   # clean up athena
38 |   dbRemoveTable(con, "compute_tbl1", confirm = TRUE)
39 |   dbRemoveTable(con, "compute_tbl2", confirm = TRUE)
40 | 
41 |   expect_equal(result1, TRUE)
42 |   expect_equal(result2, TRUE)
43 | })
44 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: noctua
 2 | Type: Package
 3 | Title: Connect to 'AWS Athena' using R 'AWS SDK' 'paws' ('DBI' Interface)
 4 | Version: 2.6.3
 5 | Authors@R: person("Dyfan", "Jones", email="dyfan.r.jones@gmail.com", 
 6 |                   role= c("aut", "cre"))
 7 | Description: Designed to be compatible with the 'R' package 'DBI' (Database Interface)
 8 |     when connecting to Amazon Web Service ('AWS') Athena <https://aws.amazon.com/athena/>.
 9 |     To do this the 'R' 'AWS' Software Development Kit ('SDK') 'paws' 
10 |     <https://github.com/paws-r/paws> is used as a driver.
11 | Imports: 
12 |     data.table (>= 1.12.4),
13 |     DBI (>= 0.7),
14 |     methods,
15 |     paws (>= 0.2.0),
16 |     stats,
17 |     utils,
18 |     uuid (>= 0.1-4)
19 | Suggests: 
20 |     arrow,
21 |     bit64,
22 |     dplyr (>= 1.0.0),
23 |     dbplyr (>= 2.3.3),
24 |     testthat,
25 |     tibble,
26 |     vroom (>= 1.2.0),
27 |     covr,
28 |     knitr,
29 |     rmarkdown,
30 |     jsonify,
31 |     jsonlite
32 | VignetteBuilder: 
33 |     knitr
34 | Depends: R (>= 3.2.0)
35 | License: MIT + file LICENSE
36 | Encoding: UTF-8
37 | RoxygenNote: 7.3.3
38 | URL: https://dyfanjones.github.io/noctua/, https://github.com/DyfanJones/noctua
39 | BugReports: https://github.com/DyfanJones/noctua/issues
40 | Collate: 
41 |     'utils.R'
42 |     'dplyr_integration.R'
43 |     'noctua.R'
44 |     'Driver.R'
45 |     'Connection.R'
46 |     'DataTypes.R'
47 |     'File_Parser.R'
48 |     'Options.R'
49 |     'fetch_utils.R'
50 |     'Result.R'
51 |     'Table.R'
52 |     'View.R'
53 |     'athena_low_api.R'
54 |     'column_parser.R'
55 |     'sql_translate_utils.R'
56 |     'sql_translate_env.R'
57 |     'zzz.R'
58 | Roxygen: list(markdown = TRUE)
59 | 


--------------------------------------------------------------------------------
/man/sqlData.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Table.R
 3 | \name{sqlData}
 4 | \alias{sqlData}
 5 | \alias{sqlData,AthenaConnection-method}
 6 | \title{Converts data frame into suitable format to be uploaded to Athena}
 7 | \usage{
 8 | \S4method{sqlData}{AthenaConnection}(
 9 |   con,
10 |   value,
11 |   row.names = NA,
12 |   file.type = c("tsv", "csv", "parquet", "json"),
13 |   ...
14 | )
15 | }
16 | \arguments{
17 | \item{con}{A database connection.}
18 | 
19 | \item{value}{A data frame}
20 | 
21 | \item{row.names}{Either \code{TRUE}, \code{FALSE}, \code{NA} or a string.
22 | 
23 | If \code{TRUE}, always translate row names to a column called "row_names".
24 | If \code{FALSE}, never translate row names. If \code{NA}, translate
25 | rownames only if they're a character vector.
26 | 
27 | A string is equivalent to \code{TRUE}, but allows you to override the
28 | default name.
29 | 
30 | For backward compatibility, \code{NULL} is equivalent to \code{FALSE}.}
31 | 
32 | \item{file.type}{What file type to store data.frame on s3, noctua currently supports \code{c("csv", "tsv", "parquet", "json")}.
33 | \strong{Note:} This parameter is used for format any special characters that clash with file type separator.}
34 | 
35 | \item{...}{Other arguments used by individual methods.}
36 | }
37 | \value{
38 | \code{sqlData} returns a dataframe formatted for Athena. Currently converts \code{list} variable types into \code{character}
39 | split by \code{'|'}, similar to how \code{data.table} writes out to files.
40 | }
41 | \description{
42 | This method converts data.frame columns into the correct format so that it can be uploaded Athena.
43 | }
44 | \seealso{
45 | \code{\link[DBI]{sqlData}}
46 | }
47 | 


--------------------------------------------------------------------------------
/tests/testthat/test-caching.R:
--------------------------------------------------------------------------------
 1 | context("query id caching")
 2 | 
 3 | # NOTE System variable format returned for Unit tests:
 4 | # Sys.getenv("noctua_arn"): "arn:aws:sts::123456789012:assumed-role/role_name/role_session_name"
 5 | # Sys.getenv("noctua_s3_query"): "s3://path/to/query/bucket/"
 6 | # Sys.getenv("noctua_s3_tbl"): "s3://path/to/bucket/"
 7 | 
 8 | test_that("Testing if caching returns the same query id", {
 9 |   skip_if_no_env()
10 |   # Test connection is using AWS CLI to set profile_name 
11 |   con <- dbConnect(athena())
12 |   
13 |   query1 = "SELECT table_name FROM information_schema.tables limit 1"
14 |   res1 = dbSendStatement(con, query1)
15 |   dbFetch(res1)
16 |   res2 = dbExecute(con, query1)
17 |   
18 |   noctua_options(cache_size = 10)
19 |   
20 |   res3 = dbSendStatement(con, query1)
21 |   dbFetch(res3)
22 |   res4 = dbExecute(con, query1)
23 |   
24 |   query2 = "SELECT table_schema, table_name FROM information_schema.tables limit 1"
25 |   res5 = dbExecute(con, query2, unload = T)
26 |   res6 = dbExecute(con, query2)
27 |   
28 |   # clear cached backend data
29 |   noctua_options(clear_cache = T)
30 |   
31 |   # expect query ids not to be the same
32 |   exp1 = res1@info$QueryExecutionId == res2@info$QueryExecutionId
33 |   exp2 = res3@info$QueryExecutionId == res4@info$QueryExecutionId
34 |   exp3 = res5@info$UnloadDir == res6@info$UnloadDir
35 |   exp4 = res5@info$QueryExecutionId == res6@info$QueryExecutionId
36 |   expect_false(exp1)
37 |   expect_true(exp2)
38 |   expect_true(exp3)
39 |   expect_true(exp4)
40 |   expect_error(noctua_options(cache_size = 101))
41 |   expect_error(noctua_options(cache_size = -1))
42 |   expect_true(nrow(noctua:::athena_option_env$cache_dt) == 0)
43 | })
44 | 
45 | 
46 | 
47 | 


--------------------------------------------------------------------------------
/tests/testthat/test-keyboard-interrupt.R:
--------------------------------------------------------------------------------
 1 | context("keyboard interrupt")
 2 | 
 3 | # NOTE System variable format returned for Unit tests:
 4 | # Sys.getenv("noctua_arn"): "arn:aws:sts::123456789012:assumed-role/role_name/role_session_name"
 5 | # Sys.getenv("noctua_s3_query"): "s3://path/to/query/bucket/"
 6 | # Sys.getenv("noctua_s3_tbl"): "s3://path/to/bucket/"
 7 | 
 8 | test_that("Check if Athena query has been successfully been cancelled",{
 9 |   skip_if_no_env()
10 |   
11 |   con <- dbConnect(athena(), keyboard_interrupt = T)
12 |   
13 |   res <- dbSendQuery(con, "SHOW TABLES IN default")
14 |   query_id <- res@info[["QueryExecutionId"]]
15 |   err_msg <- sprintf(
16 |     "Query '%s' has been cancelled by user.",
17 |     query_id)
18 |   
19 |   expect_error(noctua:::interrupt_athena(res), err_msg)
20 |   
21 |   status <- res@connection@ptr$Athena$get_query_execution(
22 |     QueryExecutionId = query_id)$QueryExecution$Status$State
23 |   
24 |   expect_equal(status, "CANCELLED")
25 | })
26 | 
27 | test_that("Check if Athena query has not been cancelled",{
28 |   skip_if_no_env()
29 |   
30 |   con <- dbConnect(athena(), keyboard_interrupt = F)
31 |   
32 |   res <- dbSendQuery(con, "SHOW TABLES IN default")
33 |   query_id <- res@info[["QueryExecutionId"]]
34 |   err_msg <- sprintf(
35 |     "Query '%s' has been cancelled by user but will carry on running in AWS Athena",
36 |     query_id)
37 |   
38 |   expect_error(noctua:::interrupt_athena(res), err_msg)
39 |   
40 |   # give AWS Athena a chance to start query
41 |   Sys.sleep(5)
42 |   
43 |   status <- res@connection@ptr$Athena$get_query_execution(
44 |     QueryExecutionId = query_id)$QueryExecution$Status$State
45 | 
46 |   expect_true(status %in% c("RUNNING", "SUCCEEDED"))
47 |   
48 |   # tidy up query
49 |   dbClearResult(res)
50 | })
51 | 


--------------------------------------------------------------------------------
/tests/testthat/test-athena-request.R:
--------------------------------------------------------------------------------
 1 | context("Athena Request")
 2 | 
 3 | # NOTE System variable format returned for Unit tests:
 4 | # Sys.getenv("noctua_arn"): "arn:aws:sts::123456789012:assumed-role/role_name/role_session_name"
 5 | # Sys.getenv("noctua_s3_query"): "s3://path/to/query/bucket/"
 6 | # Sys.getenv("noctua_s3_tbl"): "s3://path/to/bucket/"
 7 | 
 8 | test_that("Check if Athena Request created correctly",{
 9 |   skip_if_no_env()
10 |   # Test connection is using AWS CLI to set profile_name 
11 |   con1 <- dbConnect(athena(),
12 |                     profile_name = "rathena",
13 |                     encryption_option = "SSE_S3",
14 |                     kms_key = "test_key",
15 |                     work_group = "test_group",
16 |                     s3_staging_dir = Sys.getenv("noctua_s3_query"))
17 |   
18 |   con2 <- dbConnect(athena(),
19 |                     profile_name = "rathena",
20 |                     encryption_option = "SSE_S3",
21 |                     work_group = "test_group",
22 |                     s3_staging_dir = Sys.getenv("noctua_s3_query"))
23 |   
24 |   con3 <- dbConnect(athena(),
25 |                     profile_name = "rathena",
26 |                     work_group = "test_group",
27 |                     s3_staging_dir = Sys.getenv("noctua_s3_query"))
28 |   
29 |   con4 <- dbConnect(athena(),
30 |                     profile_name = "rathena",
31 |                     s3_staging_dir = Sys.getenv("noctua_s3_query"))
32 |   
33 |   R1 <- noctua:::ResultConfiguration(con1)
34 |   R2 <- noctua:::ResultConfiguration(con2)
35 |   R3 <- noctua:::ResultConfiguration(con3)
36 |   R4 <- noctua:::ResultConfiguration(con4)
37 | 
38 |   expect_equal(R1, athena_test_req1)
39 |   expect_equal(R2, athena_test_req2)
40 |   expect_equal(R3, athena_test_req3)
41 |   expect_equal(R4, athena_test_req4)
42 | })


--------------------------------------------------------------------------------
/man/dbConvertTable.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Connection.R
 3 | \name{dbConvertTable}
 4 | \alias{dbConvertTable}
 5 | \alias{dbConvertTable,AthenaConnection-method}
 6 | \title{dbConvertTable aws s3 backend file types.}
 7 | \usage{
 8 | dbConvertTable(conn, obj, name, ...)
 9 | 
10 | \S4method{dbConvertTable}{AthenaConnection}(
11 |   conn,
12 |   obj,
13 |   name,
14 |   partition = NULL,
15 |   s3.location = NULL,
16 |   file.type = c("NULL", "csv", "tsv", "parquet", "json", "orc"),
17 |   compress = TRUE,
18 |   data = TRUE,
19 |   ...
20 | )
21 | }
22 | \arguments{
23 | \item{conn}{A \link[DBI:DBIConnection-class]{DBI::DBIConnection} object,}
24 | 
25 | \item{obj}{Athena table or \code{SQL} DML query to be converted. For \code{SQL}, the query need to be wrapped with \code{DBI::SQL()} and
26 | follow AWS Athena DML format \href{https://docs.aws.amazon.com/athena/latest/ug/select.html}{link}}
27 | 
28 | \item{name}{Name of destination table}
29 | 
30 | \item{...}{Extra parameters, currently not used}
31 | 
32 | \item{partition}{Partition Athena table}
33 | 
34 | \item{s3.location}{location to store output file, must be in s3 uri format for example ("s3://mybucket/data/").}
35 | 
36 | \item{file.type}{File type for \code{name}, currently support \code{c("NULL","csv", "tsv", "parquet", "json", "orc")}.
37 | \code{"NULL"} will let Athena set the file type for you.}
38 | 
39 | \item{compress}{Compress \code{name}, currently can only compress \code{c("parquet", "orc")} (\href{https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html}{AWS Athena CTAS})}
40 | 
41 | \item{data}{If \code{name} should be created with data or not.}
42 | }
43 | \value{
44 | \code{dbConvertTable()} returns \code{TRUE} but invisible.
45 | }
46 | \description{
47 | Utilises AWS Athena to convert AWS S3 backend file types. It also also to create more efficient file types i.e. "parquet" and "orc" from SQL queries.
48 | }
49 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/master/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 | 
 9 | name: R-CMD-check
10 | 
11 | jobs:
12 |   R-CMD-check:
13 |     runs-on: ubuntu-latest
14 |     env:
15 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
16 |       AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
17 |       AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
18 |       AWS_REGION: ${{ secrets.AWS_REGION }}
19 |       AWS_ATHENA_S3_STAGING_DIR: ${{ secrets.AWS_ATHENA_S3_STAGING_DIR }}
20 |       noctua_arn: ${{ secrets.NOCTUA_ARN }}
21 |       noctua_s3_query: ${{ secrets.NOCTUA_S3_QUERY }}
22 |       noctua_s3_tbl: ${{ secrets.NOCTUA_S3_TBL }}
23 |       ARROW_WITH_SNAPPY: ON
24 |       R_KEEP_PKG_SOURCE: yes
25 |       _R_CHECK_LENGTH_1_CONDITION_: abort,verbose
26 |       _R_CHECK_LENGTH_1_LOGIC2_: abort,verbose
27 |     steps:
28 |       - uses: actions/checkout@v3
29 | 
30 |       - uses: r-lib/actions/setup-r@v2
31 |         with:
32 |           use-public-rspm: true
33 | 
34 |       - uses: r-lib/actions/setup-r-dependencies@v2
35 |         with:
36 |           extra-packages: |
37 |             github::tidyverse/dbplyr
38 |             any::rcmdcheck
39 |             any::covr
40 |             
41 |       - uses: r-lib/actions/check-r-package@v2
42 | 
43 |       - name: Show testthat output
44 |         if: always()
45 |         run: find check -name 'testthat.Rout*' -exec cat '{}' \; || true
46 |         shell: bash
47 | 
48 |       - name: Upload check results
49 |         if: failure()
50 |         uses: actions/upload-artifact@main
51 |         with:
52 |           name: ${{ runner.os }}-r${{ matrix.config.r }}-results
53 |           path: check
54 | 
55 |       - name: Test coverage
56 |         run: covr::codecov()
57 |         shell: Rscript {0}
58 | 


--------------------------------------------------------------------------------
/man/backend_dbplyr.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dplyr_integration.R
 3 | \name{backend_dbplyr}
 4 | \alias{backend_dbplyr}
 5 | \alias{sql_query_explain.AthenaConnection}
 6 | \alias{sql_query_fields.AthenaConnection}
 7 | \alias{sql_escape_date.AthenaConnection}
 8 | \alias{sql_escape_datetime.AthenaConnection}
 9 | \title{Athena S3 implementation of dbplyr backend functions}
10 | \usage{
11 | sql_query_explain.AthenaConnection(con, sql, format = "text", type = NULL, ...)
12 | 
13 | sql_query_fields.AthenaConnection(con, sql, ...)
14 | 
15 | sql_escape_date.AthenaConnection(con, x)
16 | 
17 | sql_escape_datetime.AthenaConnection(con, x)
18 | }
19 | \arguments{
20 | \item{con}{A \link{dbConnect} object, as returned by \code{dbConnect()}}
21 | 
22 | \item{sql}{SQL code to be sent to AWS Athena}
23 | 
24 | \item{format}{returning format for explain queries, default set to \code{"text"}. Other formats can be found: \url{https://docs.aws.amazon.com/athena/latest/ug/athena-explain-statement.html}}
25 | 
26 | \item{type}{return plan for explain queries, default set to \code{NULL}. Other type can be found: \url{https://docs.aws.amazon.com/athena/latest/ug/athena-explain-statement.html}}
27 | 
28 | \item{...}{other parameters, currently not implemented}
29 | 
30 | \item{x}{R object to be transformed into athena equivalent}
31 | }
32 | \value{
33 | \describe{
34 | \item{sql_query_explain}{Returns sql query for \href{https://docs.aws.amazon.com/athena/latest/ug/athena-explain-statement.html}{AWS Athena explain statement}}
35 | \item{sql_query_fields}{Returns sql query column names}
36 | \item{sql_escape_date}{Returns sql escaping from dates}
37 | \item{sql_escape_datetime}{Returns sql escaping from date times}
38 | }
39 | }
40 | \description{
41 | These functions are used to build the different types of SQL queries.
42 | The AWS Athena implementation give extra parameters to allow access the to standard DBI Athena methods. They also
43 | utilise AWS Glue to speed up sql query execution.
44 | }
45 | 


--------------------------------------------------------------------------------
/tests/testthat/test-bigint.R:
--------------------------------------------------------------------------------
 1 | context("bigint")
 2 | 
 3 | # NOTE System variable format returned for Unit tests:
 4 | # Sys.getenv("noctua_arn"): "arn:aws:sts::123456789012:assumed-role/role_name/role_session_name"
 5 | # Sys.getenv("noctua_s3_query"): "s3://path/to/query/bucket/"
 6 | # Sys.getenv("noctua_s3_tbl"): "s3://path/to/bucket/"
 7 | 
 8 | s3.location1 <- paste0(Sys.getenv("noctua_s3_tbl"),"test_df/")
 9 | s3.location2 <- Sys.getenv("noctua_s3_tbl")
10 | 
11 | test_that("Testing data transfer between R and athena datatable", {
12 |   skip_if_no_env()
13 |   
14 |   noctua_options("vroom")
15 |   
16 |   # default big integer as integer64
17 |   con <- dbConnect(athena(),
18 |                    s3_staging_dir = Sys.getenv("noctua_s3_query"))
19 |   
20 |   expect_equal(noctua:::athena_option_env$bigint, "I")
21 |   
22 |   noctua_options()
23 |   expect_equal(noctua:::athena_option_env$bigint, "integer64")
24 |   
25 |   # big integer as integer
26 |   noctua_options()
27 |   con <- dbConnect(athena(),
28 |                    s3_staging_dir = Sys.getenv("noctua_s3_query"),
29 |                    bigint = "integer")
30 |   
31 |   expect_equal(noctua:::athena_option_env$bigint, "integer")
32 |   
33 |   noctua_options("vroom")
34 |   expect_equal(noctua:::athena_option_env$bigint, "i")
35 |   
36 |   # big integer as numeric
37 |   noctua_options()
38 |   con <- dbConnect(athena(),
39 |                    s3_staging_dir = Sys.getenv("noctua_s3_query"),
40 |                    bigint = "numeric")
41 |   
42 |   expect_equal(noctua:::athena_option_env$bigint, "double")
43 |   
44 |   noctua_options("vroom")
45 |   expect_equal(noctua:::athena_option_env$bigint, "d")
46 |   
47 |   # big integer as character
48 |   noctua_options()
49 |   con <- dbConnect(athena(),
50 |                    s3_staging_dir = Sys.getenv("noctua_s3_query"),
51 |                    bigint = "character")
52 |   
53 |   expect_equal(noctua:::athena_option_env$bigint, "character")
54 |   
55 |   noctua_options("vroom")
56 |   expect_equal(noctua:::athena_option_env$bigint, "c")
57 | })
58 | 


--------------------------------------------------------------------------------
/tests/testthat/test-athena-ddl.R:
--------------------------------------------------------------------------------
 1 | context("Athena DDL")
 2 | 
 3 | # NOTE System variable format returned for Unit tests:
 4 | # Sys.getenv("noctua_arn"): "arn:aws:sts::123456789012:assumed-role/role_name/role_session_name"
 5 | # Sys.getenv("noctua_s3_query"): "s3://path/to/query/bucket/"
 6 | # Sys.getenv("noctua_s3_tbl"): "s3://path/to/bucket/"
 7 | 
 8 | s3.location <- paste0(Sys.getenv("noctua_s3_tbl"),"test_df/")
 9 | df <- data.frame(x = 1:10, y = letters[1:10], stringsAsFactors = F)
10 | 
11 | test_that("Check if Athena DDL's are created correctly",{
12 |   skip_if_no_env()
13 |   # Test connection is using AWS CLI to set profile_name 
14 |   con <- dbConnect(athena())
15 |   
16 |   # CSV DDL
17 |   expect_ddl1 <- sqlCreateTable(con, "test_df", df, s3.location = s3.location, file.type = "csv")
18 |   expect_ddl2 <- sqlCreateTable(con, "test_df", df, s3.location = s3.location, file.type = "csv", compress = T)
19 |   
20 |   # TSV DDL
21 |   expect_ddl3 <- sqlCreateTable(con, "test_df", df, s3.location = s3.location, file.type = "tsv")
22 |   expect_ddl4 <- sqlCreateTable(con, "test_df", df, s3.location = s3.location, file.type = "tsv", compress = T)
23 |   
24 |   # Parquet DDL
25 |   expect_ddl5 <- sqlCreateTable(con, "test_df", df, s3.location = s3.location, file.type = "parquet")
26 |   expect_ddl6 <- sqlCreateTable(con, "test_df", df, partition = "timestamp", s3.location = s3.location, file.type = "parquet", compress = T)
27 |   
28 |   # JSON DDL
29 |   expect_ddl7 <- sqlCreateTable(con, "test_df", df, s3.location = s3.location, file.type = "json")
30 |   expect_ddl8 <- sqlCreateTable(con, "test_df", df, partition = "timestamp", s3.location = s3.location, file.type = "json", compress = T)
31 |   
32 |   
33 |   expect_equal(expect_ddl1, tbl_ddl$tbl1)
34 |   expect_equal(expect_ddl2, tbl_ddl$tbl2)
35 |   expect_equal(expect_ddl3, tbl_ddl$tbl3)
36 |   expect_equal(expect_ddl4, tbl_ddl$tbl4)
37 |   expect_equal(expect_ddl5, tbl_ddl$tbl5)
38 |   expect_equal(expect_ddl6, tbl_ddl$tbl6)
39 |   expect_equal(expect_ddl7, tbl_ddl$tbl7)
40 |   expect_equal(expect_ddl8, tbl_ddl$tbl8)
41 | })


--------------------------------------------------------------------------------
/docs/bootstrap-toc.css:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/)
 3 |  * Copyright 2015 Aidan Feldman
 4 |  * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */
 5 | 
 6 | /* modified from https://github.com/twbs/bootstrap/blob/94b4076dd2efba9af71f0b18d4ee4b163aa9e0dd/docs/assets/css/src/docs.css#L548-L601 */
 7 | 
 8 | /* All levels of nav */
 9 | nav[data-toggle='toc'] .nav > li > a {
10 |   display: block;
11 |   padding: 4px 20px;
12 |   font-size: 13px;
13 |   font-weight: 500;
14 |   color: #767676;
15 | }
16 | nav[data-toggle='toc'] .nav > li > a:hover,
17 | nav[data-toggle='toc'] .nav > li > a:focus {
18 |   padding-left: 19px;
19 |   color: #563d7c;
20 |   text-decoration: none;
21 |   background-color: transparent;
22 |   border-left: 1px solid #563d7c;
23 | }
24 | nav[data-toggle='toc'] .nav > .active > a,
25 | nav[data-toggle='toc'] .nav > .active:hover > a,
26 | nav[data-toggle='toc'] .nav > .active:focus > a {
27 |   padding-left: 18px;
28 |   font-weight: bold;
29 |   color: #563d7c;
30 |   background-color: transparent;
31 |   border-left: 2px solid #563d7c;
32 | }
33 | 
34 | /* Nav: second level (shown on .active) */
35 | nav[data-toggle='toc'] .nav .nav {
36 |   display: none; /* Hide by default, but at >768px, show it */
37 |   padding-bottom: 10px;
38 | }
39 | nav[data-toggle='toc'] .nav .nav > li > a {
40 |   padding-top: 1px;
41 |   padding-bottom: 1px;
42 |   padding-left: 30px;
43 |   font-size: 12px;
44 |   font-weight: normal;
45 | }
46 | nav[data-toggle='toc'] .nav .nav > li > a:hover,
47 | nav[data-toggle='toc'] .nav .nav > li > a:focus {
48 |   padding-left: 29px;
49 | }
50 | nav[data-toggle='toc'] .nav .nav > .active > a,
51 | nav[data-toggle='toc'] .nav .nav > .active:hover > a,
52 | nav[data-toggle='toc'] .nav .nav > .active:focus > a {
53 |   padding-left: 28px;
54 |   font-weight: 500;
55 | }
56 | 
57 | /* from https://github.com/twbs/bootstrap/blob/e38f066d8c203c3e032da0ff23cd2d6098ee2dd6/docs/assets/css/src/docs.css#L631-L634 */
58 | nav[data-toggle='toc'] .nav > .active > ul {
59 |   display: block;
60 | }
61 | 


--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
 1 | .onLoad <- function(libname, pkgname) {
 2 |   dbplyr_version()
 3 |   register_s3_method("dbplyr", "dbplyr_edition", "AthenaConnection")
 4 |   register_s3_method("dbplyr", "db_connection_describe", "AthenaConnection")
 5 |   register_s3_method("dbplyr", "sql_query_explain", "AthenaConnection")
 6 |   register_s3_method("dbplyr", "sql_query_fields", "AthenaConnection")
 7 |   register_s3_method("dbplyr", "sql_translation", "AthenaConnection")
 8 |   register_s3_method("dbplyr", "sql_escape_date", "AthenaConnection")
 9 |   register_s3_method("dbplyr", "sql_escape_datetime", "AthenaConnection")
10 |   register_s3_method("dbplyr", "db_compute", "AthenaConnection")
11 |   register_s3_method("dbplyr", "db_copy_to", "AthenaConnection")
12 |   register_s3_method("dbplyr", "sql_table_analyze", "AthenaConnection")
13 |   register_s3_method("dbplyr", "sql_query_save", "AthenaConnection")
14 | }
15 | 
16 | register_s3_method <- function(pkg, generic, class, fun = NULL) {
17 |   stopifnot(is.character(pkg), length(pkg) == 1)
18 |   stopifnot(is.character(generic), length(generic) == 1)
19 |   stopifnot(is.character(class), length(class) == 1)
20 | 
21 |   if (is.null(fun)) {
22 |     fun <- get(paste0(generic, ".", class), envir = parent.frame())
23 |   } else {
24 |     stopifnot(is.function(fun))
25 |   }
26 | 
27 |   if (pkg %in% loadedNamespaces()) {
28 |     registerS3method(generic, class, fun, envir = asNamespace(pkg))
29 |   }
30 | 
31 |   # Always register hook in case package is later unloaded & reloaded
32 |   setHook(
33 |     packageEvent(pkg, "onLoad"),
34 |     function(...) {
35 |       registerS3method(generic, class, fun, envir = asNamespace(pkg))
36 |     }
37 |   )
38 | }
39 | 
40 | dbplyr_version <- function() {
41 |   if (nzchar(system.file(package = "dbplyr"))) {
42 |     dbplyr_env$version <- packageVersion("dbplyr")
43 |     dbplyr_env$major <- dbplyr_env$version$major
44 |     dbplyr_env$minor <- dbplyr_env$version$minor
45 |   } else {
46 |     # default to minimum supported dbplyr version
47 |     dbplyr_env$major = 1L
48 |     dbplyr_env$minor = 4L
49 |   }
50 | }
51 | 
52 | dbplyr_env <- new.env(parent = emptyenv())
53 | 


--------------------------------------------------------------------------------
/tests/testthat/test-work-groups.R:
--------------------------------------------------------------------------------
 1 | context("Athena Work Groups")
 2 | 
 3 | # NOTE System variable format returned for Unit tests:
 4 | # Sys.getenv("noctua_arn"): "arn:aws:sts::123456789012:assumed-role/role_name/role_session_name"
 5 | # Sys.getenv("noctua_s3_query"): "s3://path/to/query/bucket/"
 6 | # Sys.getenv("noctua_s3_tbl"): "s3://path/to/bucket/"
 7 | 
 8 | test_that("Create and Delete Athena Work Groups",{
 9 |   skip_if_no_env()
10 |   # Test connection is using AWS CLI to set profile_name 
11 |   con <- dbConnect(athena())
12 |   
13 |   output1 <- list_work_groups(con)
14 |   work_groups1 <- sapply(output1, function(x) x$Name)
15 |   
16 |   create_work_group(con, "demo_work_group", description = "This is a demo work group",
17 |                     tags = tag_options(key= "demo_work_group", value = "demo_01"))
18 |   
19 |   output2 <- list_work_groups(con)
20 |   work_groups2 <- sapply(output2, function(x) x$Name)
21 |   
22 |   meta_data1 <- get_work_group(con, "demo_work_group")$Description
23 |   update_work_group(con, "demo_work_group", description = "This is a demo work group update")
24 |   meta_data2 <- get_work_group(con, "demo_work_group")$Description
25 |   
26 |   delete_work_group(con, "demo_work_group")
27 |   
28 |   output3 <- list_work_groups(con)
29 |   work_groups3 <- sapply(output3, function(x) x$Name)
30 |   
31 |   dbDisconnect(con)
32 |   
33 |   expect_error(create_work_group(con, "demo_work_group", description = "This is a demo work group",
34 |                     tags = tag_options(key= "demo_work_group", value = "demo_01")))
35 |   expect_error(delete_work_group(con, "demo_work_group"))
36 |   expect_error(list_work_groups(con))
37 |   expect_error(get_work_group(con))
38 |   expect_error(update_work_group(con, "demo_work_group", 
39 |                                  description = "This is a demo work group update"))
40 |   expect_equal(any(grepl("demo_work_group", output1)), FALSE)
41 |   expect_equal(any(grepl("demo_work_group", output2)), TRUE)
42 |   expect_equal(any(grepl("demo_work_group", output3)), FALSE)
43 |   expect_equal(meta_data1, "This is a demo work group")
44 |   expect_equal(meta_data2, "This is a demo work group update")
45 | })
46 | 


--------------------------------------------------------------------------------
/R/sql_translate_utils.R:
--------------------------------------------------------------------------------
 1 | #' @include utils.R
 2 | 
 3 | sql_quantile <- function(x, probs){
 4 |   build_sql <- pkg_method("build_sql", "dbplyr")
 5 |   check_probs(probs)
 6 |   build_sql("APPROX_PERCENTILE(",x,", ",probs,")")
 7 | }
 8 | 
 9 | sql_median <- function(){
10 |   warned <- FALSE
11 |   function(x, na.rm = FALSE){
12 |     warned <<- check_na_rm(na.rm, warned)
13 |     sql_quantile(x, 0.5)
14 |   }
15 | }
16 | 
17 | # mimic check_na_rm from dbplyr
18 | # https://github.com/tidyverse/dbplyr/blob/master/R/translate-sql-helpers.R#L213-L225
19 | check_na_rm <- function(na.rm, warned){
20 |   if(warned || identical(na.rm, TRUE))
21 |     return(warned)
22 |   warning(
23 |     "Missing values are always removed in SQL.\n", "Use `", 
24 |     "median(x, na.rm = TRUE)` to silence this warning\n",
25 |     "This warning is displayed only once per session.", 
26 |     call. = FALSE)
27 |   return(TRUE)
28 | }
29 | 
30 | # re-create check_probs from dbplyr:
31 | # https://github.com/tidyverse/dbplyr/blob/master/R/translate-sql-quantile.R#L40-L48
32 | check_probs <- function(probs) {
33 |   if (!is.numeric(probs)) {
34 |     stop("`probs` must be numeric", call. = FALSE)
35 |   }
36 |   
37 |   if (length(probs) > 1) {
38 |     stop("SQL translation only supports single value for `probs`.", call. = FALSE)
39 |   }
40 | }
41 | 
42 | # helper function to support R function paste in sql_translation_env
43 | athena_paste <- function(..., sep = " ", con) {
44 |   escape <- pkg_method("escape", "dbplyr")
45 |   sql <- pkg_method("sql", "dplyr")
46 |   sep <- escape(sep, con = con)
47 |   pieces <- vapply(list(...), escape, con = con, character(1))
48 |   sql(paste(pieces, collapse = paste0('||', sep, '||')))
49 | }
50 | 
51 | athena_regexpr <- function(pattern, text, ignore.case = FALSE, perl = FALSE, fixed = FALSE, 
52 |                            useBytes = FALSE){
53 |   if (any(c(perl, fixed, useBytes))) {
54 |     stop("`perl`, `fixed` and `useBytes` parameters are unsupported", call. = F)
55 |   }
56 |   build_sql <- pkg_method('build_sql', "dbplyr")
57 |   if(!ignore.case){
58 |     build_sql('REGEXP_LIKE(', text,",", pattern, ')')
59 |   } else {
60 |     pattern <- paste0("(?i)", pattern)
61 |     build_sql('REGEXP_LIKE(', text,",", pattern, ')')
62 |   }
63 | }
64 | 


--------------------------------------------------------------------------------
/man/AthenaResult.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Result.R
 3 | \docType{methods}
 4 | \name{AthenaResult}
 5 | \alias{AthenaResult}
 6 | \alias{AthenaResult-class}
 7 | \alias{dbClearResult,AthenaResult-method}
 8 | \alias{dbFetch,AthenaResult-method}
 9 | \alias{dbHasCompleted,AthenaResult-method}
10 | \alias{dbIsValid,AthenaResult-method}
11 | \alias{dbGetInfo,AthenaResult-method}
12 | \alias{dbColumnInfo,AthenaResult-method}
13 | \alias{dbStatistics}
14 | \alias{dbStatistics,AthenaResult-method}
15 | \alias{dbGetStatement,AthenaResult-method}
16 | \title{Athena Result Methods}
17 | \usage{
18 | \S4method{dbClearResult}{AthenaResult}(res, ...)
19 | 
20 | \S4method{dbFetch}{AthenaResult}(res, n = -1, ...)
21 | 
22 | \S4method{dbHasCompleted}{AthenaResult}(res, ...)
23 | 
24 | \S4method{dbIsValid}{AthenaResult}(dbObj, ...)
25 | 
26 | \S4method{dbGetInfo}{AthenaResult}(dbObj, ...)
27 | 
28 | \S4method{dbColumnInfo}{AthenaResult}(res, ...)
29 | 
30 | dbStatistics(res, ...)
31 | 
32 | \S4method{dbStatistics}{AthenaResult}(res, ...)
33 | 
34 | \S4method{dbGetStatement}{AthenaResult}(res, ...)
35 | }
36 | \arguments{
37 | \item{res}{An object inheriting from \link[DBI:DBIResult-class]{DBI::DBIResult}.}
38 | 
39 | \item{...}{Other arguments passed on to methods.}
40 | 
41 | \item{n}{maximum number of records to retrieve per fetch. Use \code{n = -1}
42 | or \code{n = Inf}
43 | to retrieve all pending records.  Some implementations may recognize other
44 | special values.}
45 | 
46 | \item{dbObj}{An object inheriting from \link[DBI:DBIResult-class]{DBI::DBIResult},
47 | \link[DBI:DBIConnection-class]{DBI::DBIConnection}, or \link[DBI:DBIDriver-class]{DBI::DBIDriver}.}
48 | }
49 | \value{
50 | \code{dbStatistics()} returns list containing Athena Statistics return from \code{paws}.
51 | }
52 | \description{
53 | Implementations of pure virtual functions defined in the \code{DBI} package
54 | for AthenaResult objects.
55 | 
56 | Returns AWS Athena Statistics from execute queries \link[DBI:dbSendQuery]{DBI::dbSendQuery}
57 | }
58 | \note{
59 | If a user does not have permission to remove AWS S3 resource from AWS Athena output location, then an AWS warning will be returned.
60 | For example \code{AccessDenied (HTTP 403). Access Denied}.
61 | It is better use query caching or optionally prevent clear AWS S3 resource using \link{noctua_options}
62 | }
63 | 


--------------------------------------------------------------------------------
/docs/docsearch.js:
--------------------------------------------------------------------------------
 1 | $(function() {
 2 | 
 3 |   // register a handler to move the focus to the search bar
 4 |   // upon pressing shift + "/" (i.e. "?")
 5 |   $(document).on('keydown', function(e) {
 6 |     if (e.shiftKey && e.keyCode == 191) {
 7 |       e.preventDefault();
 8 |       $("#search-input").focus();
 9 |     }
10 |   });
11 | 
12 |   $(document).ready(function() {
13 |     // do keyword highlighting
14 |     /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */
15 |     var mark = function() {
16 | 
17 |       var referrer = document.URL ;
18 |       var paramKey = "q" ;
19 | 
20 |       if (referrer.indexOf("?") !== -1) {
21 |         var qs = referrer.substr(referrer.indexOf('?') + 1);
22 |         var qs_noanchor = qs.split('#')[0];
23 |         var qsa = qs_noanchor.split('&');
24 |         var keyword = "";
25 | 
26 |         for (var i = 0; i < qsa.length; i++) {
27 |           var currentParam = qsa[i].split('=');
28 | 
29 |           if (currentParam.length !== 2) {
30 |             continue;
31 |           }
32 | 
33 |           if (currentParam[0] == paramKey) {
34 |             keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20"));
35 |           }
36 |         }
37 | 
38 |         if (keyword !== "") {
39 |           $(".contents").unmark({
40 |             done: function() {
41 |               $(".contents").mark(keyword);
42 |             }
43 |           });
44 |         }
45 |       }
46 |     };
47 | 
48 |     mark();
49 |   });
50 | });
51 | 
52 | /* Search term highlighting ------------------------------*/
53 | 
54 | function matchedWords(hit) {
55 |   var words = [];
56 | 
57 |   var hierarchy = hit._highlightResult.hierarchy;
58 |   // loop to fetch from lvl0, lvl1, etc.
59 |   for (var idx in hierarchy) {
60 |     words = words.concat(hierarchy[idx].matchedWords);
61 |   }
62 | 
63 |   var content = hit._highlightResult.content;
64 |   if (content) {
65 |     words = words.concat(content.matchedWords);
66 |   }
67 | 
68 |   // return unique words
69 |   var words_uniq = [...new Set(words)];
70 |   return words_uniq;
71 | }
72 | 
73 | function updateHitURL(hit) {
74 | 
75 |   var words = matchedWords(hit);
76 |   var url = "";
77 | 
78 |   if (hit.anchor) {
79 |     url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor;
80 |   } else {
81 |     url = hit.url + '?q=' + escape(words.join(" "));
82 |   }
83 | 
84 |   return url;
85 | }
86 | 


--------------------------------------------------------------------------------
/R/DataTypes.R:
--------------------------------------------------------------------------------
 1 | AthenaDataType <- function(fields, ...) {
 2 |   switch(
 3 |     class(fields)[1],
 4 |     logical = "BOOLEAN",
 5 |     integer = "INT",
 6 |     integer64 = "BIGINT",
 7 |     numeric = "DOUBLE",
 8 |     double = "DOUBLE",
 9 |     factor = "STRING",
10 |     character = "STRING",
11 |     list = "STRING",
12 |     Date = "DATE",
13 |     POSIXct = "TIMESTAMP",
14 |     stop("Unknown class ", paste(class(fields), collapse = "/"), call. = FALSE)
15 |   )
16 | }
17 | 
18 | # ==========================================================================
19 | # convert Athena types to R classes
20 | AthenaToRDataType <- function(method, data_type) UseMethod("AthenaToRDataType")
21 | 
22 | #' @export
23 | AthenaToRDataType.athena_data.table <- function(method, data_type) {
24 |   athena_to_r <- function(x) {
25 |     switch(
26 |       x,
27 |       boolean = "logical",
28 |       int = "integer",
29 |       integer = "integer",
30 |       tinyint = "integer",
31 |       smallint = "integer",
32 |       bigint = athena_option_env$bigint,
33 |       float = "double",
34 |       real = "double",
35 |       decimal = "double",
36 |       string = "character",
37 |       varchar = "character",
38 |       char = "character",
39 |       date = "Date",
40 |       timestamp = "POSIXct",
41 |       "timestamp with time zone" = "POSIXct",
42 |       array = "character",
43 |       row = "character",
44 |       map = "character",
45 |       json = "character",
46 |       ipaddress = "character",
47 |       varbinary = "character",
48 |       x
49 |     )
50 |   }
51 |   output <- vapply(data_type, athena_to_r, FUN.VALUE = character(1))
52 |   return(output)
53 | }
54 | 
55 | #' @export
56 | AthenaToRDataType.athena_vroom <- function(method, data_type) {
57 |   athena_to_r <- function(x) {
58 |     switch(
59 |       x,
60 |       boolean = "l",
61 |       int = "i",
62 |       integer = "i",
63 |       tinyint = "i",
64 |       smallint = "i",
65 |       bigint = athena_option_env$bigint,
66 |       double = "d",
67 |       float = "d",
68 |       real = "d",
69 |       decimal = "d",
70 |       string = "c",
71 |       varchar = "c",
72 |       char = "c",
73 |       date = "D",
74 |       timestamp = "T",
75 |       "timestamp with time zone" = "c",
76 |       array = "c",
77 |       row = "c",
78 |       map = "c",
79 |       json = "c",
80 |       ipaddress = "c",
81 |       varbinary = "c",
82 |       x
83 |     )
84 |   }
85 |   output <- vapply(data_type, athena_to_r, FUN.VALUE = character(1))
86 |   return(output)
87 | }
88 | 


--------------------------------------------------------------------------------
/vignettes/changing_backend_file_parser.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Changing Backend File Parser"
 3 | author: "Dyfan Jones"
 4 | output: rmarkdown::html_vignette
 5 | vignette: >
 6 |   %\VignetteIndexEntry{Changing Backend File Parser}
 7 |   %\VignetteEngine{knitr::rmarkdown}
 8 |   %\usepackage[UTF-8]{inputenc}
 9 | ---
10 | 
11 | # Intro
12 | `noctua` is dependent on `data.table` to read data into `R`. This is down to the amazing speed `data.table` offers when reading files into `R`. However a new package, with equally impressive read speeds, has come onto the scene called [`vroom`](https://github.com/tidyverse/vroom). As `vroom` has been designed to only read data into `R`, similarly to `readr`, `data.table` is still used for all of the heavy lifting. However if a user wishes to use `vroom` as the file parser, `noctua_options` function has been created to enable this:
13 | 
14 | ```r
15 | library(DBI)
16 | library(noctua)
17 | 
18 | con = dbConnect(athena())
19 | 
20 | noctua_options(file_parser = c("data.table", "vroom"))
21 | ```
22 | 
23 | By setting the `file_parser` to `"vroom"` then the backend will change to allow `vroom`'s file parser to be used instead of `data.table`. 
24 | 
25 | # Change back to `data.table`
26 | 
27 | To go back to using `data.table` as the file parser it is a simple as calling the `noctua_options` function:
28 | 
29 | ```r
30 | # return to using data.table as file parser
31 | noctua_options()
32 | ```
33 | 
34 | # Swapping on the fly
35 | 
36 | This makes it very flexible to swap between each file parser even between each query execution:
37 | 
38 | ```r
39 | library(DBI)
40 | library(noctua)
41 | 
42 | con = dbConnect(athena())
43 | 
44 | # upload data
45 | dbWriteTable(con, "iris", iris)
46 | 
47 | # use default data.table file parser
48 | df1 = dbGetQuery(con, "select * from iris")
49 | 
50 | # use vroom as file parser
51 | noctua_options("vroom")
52 | df2 = dbGetQuery(con, "select * from iris")
53 | 
54 | # return back to data.table file parser
55 | noctua_options()
56 | df3 = dbGetQuery(con, "select * from iris")
57 | ```
58 | 
59 | # Why should you consider `vroom`?
60 | 
61 | If you aren't sure whether to use `vroom` over `data.table`, I draw your attention to `vroom` boasting a whopping 1.40GB/sec throughput.
62 | 
63 | > *Statistics taken from vroom's github readme* 
64 | 
65 | package |	version |	time (sec) |	speed-up |	throughput
66 | ---|---|---|---|---
67 | vroom |	1.1.0 |	1.14 |	58.44 |	1.40 GB/sec
68 | data.table |	1.12.8 |	11.88 |	5.62 |	134.13 MB/sec
69 | readr |	1.3.1 |	29.02 |	2.30 |	54.92 MB/sec
70 | read.delim |	3.6.2 |	66.74 |	1.00 |	23.88 MB/sec
71 | 


--------------------------------------------------------------------------------
/vignettes/how_to_retry.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "How to Retry?"
 3 | author: "Dyfan Jones"
 4 | output: rmarkdown::html_vignette
 5 | vignette: >
 6 |   %\VignetteIndexEntry{How to Retry?}
 7 |   %\VignetteEngine{knitr::rmarkdown}
 8 |   %\usepackage[UTF-8]{inputenc}
 9 | ---
10 | 
11 | With most applications that connect to an API, unnecessary exceptions can be returned back to the caller in the case of transient network or service issues. To avoid this `noctua` has implemented a retry method with exponential backoff. This technique increases the reliability of the application with connecting to `AWS Athena`. 
12 | 
13 | # How to handle `noctua`'s retry?
14 | 
15 | By default `noctua` performs a retry noisily, this means it will report the exception it has encountered and let the user know how long `noctua` will wait until it retries again. This is reported in the following format:
16 | 
17 | ```r
18 | {expection message} + "Request failed. Retrying in " + {wait time}  + " seconds..."
19 | ```
20 | 
21 | This is to keep the user informed in what `noctua` is doing behind the scenes.
22 | 
23 | ## Configure
24 | 
25 | By default `noctua` retries 5 times and does it noisily. To configure this, `noctua_options` has been give 2 extra parameters `retry` and `retry_quiet`. `retry` is the number of retries `noctua` will perform. `retry_quiet` tells `noctua` to retry quietly or not.
26 | 
27 | We can change the default retry settings so that `noctua` will retry 10 times and do it quietly:
28 | 
29 | ```r
30 | noctua_options(retry = 10, retry_quiet = TRUE)
31 | ```
32 | 
33 | If you wish to create your own custom retry function just set the `retry` to 0:
34 | 
35 | ```r
36 | library(DBI)
37 | library(noctua)
38 | 
39 | # connection to AWS Athena
40 | con = dbConnect(athena())
41 | 
42 | # Stop noctua retrying
43 | noctua_options(retry = 0)
44 | 
45 | # build your own custom retry function
46 | custom_retry = function(x){
47 |   # your custom retry method
48 | }
49 | 
50 | # apply your own retry function
51 | custom_retry(dbGetQuery(con, "select ..."))
52 | ```
53 | 
54 | # Requests
55 | 
56 | If you wish to increase the retry functionality of `noctua` for example the use of different backoff algorithms, please raise a ticket at [issues](https://github.com/DyfanJones/noctua/issues) or raise a pull request.
57 | 
58 | # Reading material
59 | 
60 | - [Exponential Backoff and Jitter](https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/)
61 | - [How to handle a "Throttling – Maximum sending rate exceeded" error](https://aws.amazon.com/blogs/messaging-and-targeting/how-to-handle-a-throttling-maximum-sending-rate-exceeded-error/)
62 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(AthenaListColumns,AthenaConnection)
 4 | S3method(AthenaListObjectTypes,default)
 5 | S3method(AthenaListObjects,AthenaConnection)
 6 | S3method(AthenaPreviewObject,AthenaConnection)
 7 | S3method(AthenaToRDataType,athena_data.table)
 8 | S3method(AthenaToRDataType,athena_vroom)
 9 | S3method(athena_read,athena_data.table)
10 | S3method(athena_read,athena_vroom)
11 | S3method(athena_read_lines,athena_data.table)
12 | S3method(athena_read_lines,athena_vroom)
13 | S3method(resource_active,AthenaConnection)
14 | S3method(resource_active,AthenaResult)
15 | export(AthenaListObjects)
16 | export(AthenaPreviewObject)
17 | export(assume_role)
18 | export(athena)
19 | export(create_work_group)
20 | export(dbConvertTable)
21 | export(dbGetPartition)
22 | export(dbGetTables)
23 | export(dbShow)
24 | export(dbStatistics)
25 | export(db_compute.AthenaConnection)
26 | export(db_copy_to.AthenaConnection)
27 | export(dbplyr_edition.AthenaConnection)
28 | export(delete_work_group)
29 | export(get_session_token)
30 | export(get_work_group)
31 | export(list_work_groups)
32 | export(noctua_options)
33 | export(sql_escape_date.AthenaConnection)
34 | export(sql_escape_datetime.AthenaConnection)
35 | export(sql_query_save.AthenaConnection)
36 | export(sql_translate_env.AthenaConnection)
37 | export(sql_translation.AthenaConnection)
38 | export(tag_options)
39 | export(update_work_group)
40 | exportClasses(AthenaConnection)
41 | exportClasses(AthenaDriver)
42 | exportClasses(AthenaResult)
43 | exportMethods(dbBegin)
44 | exportMethods(dbClearResult)
45 | exportMethods(dbColumnInfo)
46 | exportMethods(dbCommit)
47 | exportMethods(dbConnect)
48 | exportMethods(dbConvertTable)
49 | exportMethods(dbDataType)
50 | exportMethods(dbDisconnect)
51 | exportMethods(dbExecute)
52 | exportMethods(dbExistsTable)
53 | exportMethods(dbFetch)
54 | exportMethods(dbGetInfo)
55 | exportMethods(dbGetPartition)
56 | exportMethods(dbGetQuery)
57 | exportMethods(dbGetStatement)
58 | exportMethods(dbGetTables)
59 | exportMethods(dbHasCompleted)
60 | exportMethods(dbIsValid)
61 | exportMethods(dbListFields)
62 | exportMethods(dbListTables)
63 | exportMethods(dbQuoteIdentifier)
64 | exportMethods(dbQuoteString)
65 | exportMethods(dbRemoveTable)
66 | exportMethods(dbRollback)
67 | exportMethods(dbSendQuery)
68 | exportMethods(dbSendStatement)
69 | exportMethods(dbShow)
70 | exportMethods(dbStatistics)
71 | exportMethods(dbWriteTable)
72 | exportMethods(show)
73 | exportMethods(sqlCreateTable)
74 | exportMethods(sqlData)
75 | import(DBI)
76 | import(data.table)
77 | import(methods)
78 | import(paws)
79 | importFrom(stats,runif)
80 | importFrom(utils,head)
81 | importFrom(utils,modifyList)
82 | importFrom(utils,packageVersion)
83 | 


--------------------------------------------------------------------------------
/.github/workflows/pr-commands.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   issue_comment:
 5 |     types: [created]
 6 | 
 7 | name: Commands
 8 | 
 9 | jobs:
10 |   document:
11 |     if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/document') }}
12 |     name: document
13 |     runs-on: ubuntu-latest
14 |     env:
15 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
16 |     steps:
17 |       - uses: actions/checkout@v3
18 | 
19 |       - uses: r-lib/actions/pr-fetch@v2
20 |         with:
21 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
22 | 
23 |       - uses: r-lib/actions/setup-r@v2
24 |         with:
25 |           use-public-rspm: true
26 | 
27 |       - uses: r-lib/actions/setup-r-dependencies@v2
28 |         with:
29 |           extra-packages: any::roxygen2
30 |           needs: pr-document
31 | 
32 |       - name: Document
33 |         run: roxygen2::roxygenise()
34 |         shell: Rscript {0}
35 | 
36 |       - name: commit
37 |         run: |
38 |           git config --local user.name "$GITHUB_ACTOR"
39 |           git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
40 |           git add man/\* NAMESPACE
41 |           git commit -m 'Document'
42 | 
43 |       - uses: r-lib/actions/pr-push@v2
44 |         with:
45 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
46 | 
47 |   style:
48 |     if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/style') }}
49 |     name: style
50 |     runs-on: ubuntu-latest
51 |     env:
52 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
53 |     steps:
54 |       - uses: actions/checkout@v3
55 | 
56 |       - uses: r-lib/actions/pr-fetch@v2
57 |         with:
58 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
59 | 
60 |       - uses: r-lib/actions/setup-r@v2
61 | 
62 |       - name: Install dependencies
63 |         run: install.packages("styler")
64 |         shell: Rscript {0}
65 | 
66 |       - name: Style
67 |         run: styler::style_pkg()
68 |         shell: Rscript {0}
69 | 
70 |       - name: commit
71 |         run: |
72 |           git config --local user.name "$GITHUB_ACTOR"
73 |           git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
74 |           git add \*.R
75 |           git commit -m 'Style'
76 | 
77 |       - uses: r-lib/actions/pr-push@v2
78 |         with:
79 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
80 | 


--------------------------------------------------------------------------------
/man/noctua_options.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Options.R
 3 | \name{noctua_options}
 4 | \alias{noctua_options}
 5 | \title{A method to configure noctua backend options.}
 6 | \usage{
 7 | noctua_options(
 8 |   file_parser,
 9 |   bigint,
10 |   binary,
11 |   json,
12 |   cache_size,
13 |   clear_cache,
14 |   retry,
15 |   retry_quiet,
16 |   unload,
17 |   clear_s3_resource,
18 |   verbose
19 | )
20 | }
21 | \arguments{
22 | \item{file_parser}{Method to read and write tables to Athena, currently default to \code{"data.table"}. The file_parser also
23 | determines the data format returned for example \code{"data.table"} will return \code{data.table} and \code{"vroom"} will return \code{tibble}.}
24 | 
25 | \item{bigint}{The R type that 64-bit integer types should be mapped to (default: \code{"integer64"}).
26 | Inbuilt \code{bigint} conversion types \code{c("integer64", "integer", "numeric", "character")}.}
27 | 
28 | \item{binary}{The R type that \code{binary/varbinary} types should be mapped to (default \code{"raw"}).
29 | Inbuilt binary conversion types \code{c("raw", "character")}.}
30 | 
31 | \item{json}{Attempt to converts AWS Athena data types \code{c(arrays, json)} using \code{jsonlite:parse_json} (default: \code{"auto"}).
32 | Inbuilt json conversion types \code{c("auto", "character")}.
33 | Custom Json parsers can be provide by using a function with data frame parameter.}
34 | 
35 | \item{cache_size}{Number of queries to be cached. Currently only support caching up to 100 distinct queries (default: \code{0}).}
36 | 
37 | \item{clear_cache}{Clears all previous cached query metadata}
38 | 
39 | \item{retry}{Maximum number of requests to attempt (default: \code{5}).}
40 | 
41 | \item{retry_quiet}{This method is deprecated please use verbose instead.}
42 | 
43 | \item{unload}{set AWS Athena unload functionality globally (default: \code{FALSE})}
44 | 
45 | \item{clear_s3_resource}{Clear down \verb{AWS Athena} \verb{AWS S3} resource (\code{s3_staging_dir} location).
46 | This is useful for users that don't have the \verb{AWS IAM role} permissions delete
47 | from \code{s3_staging_dir} (default: \code{TRUE})}
48 | 
49 | \item{verbose}{print package info messages (default: \code{TRUE})}
50 | }
51 | \value{
52 | \code{noctua_options()} returns the \code{list} of athena option environment invisibly.
53 | }
54 | \description{
55 | \code{noctua_options()} provides a method to change the backend. This includes changing the file parser,
56 | whether \code{noctua} should cache query ids locally and number of retries on a failed api call.
57 | }
58 | \examples{
59 | library(noctua)
60 | 
61 | # change file parser from default data.table to vroom
62 | noctua_options("vroom")
63 | 
64 | # cache queries locally
65 | noctua_options(cache_size = 5)
66 | }
67 | 


--------------------------------------------------------------------------------
/vignettes/aws_s3_backend.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "AWS S3 Backend"
 3 | author: "Dyfan Jones"
 4 | output: rmarkdown::html_vignette
 5 | vignette: >
 6 |   %\VignetteIndexEntry{AWS S3 Backend}
 7 |   %\VignetteEngine{knitr::rmarkdown}
 8 |   %\usepackage[UTF-8]{inputenc}
 9 | ---
10 | 
11 | `AWS Athena` allows SQL querying to be performed on [AWS S3 buckets](https://aws.amazon.com/s3/). To gain access to this, the correct permission level needs to be enabled.
12 | 
13 | # AWS S3 Structures
14 | 
15 | `noctua` uploads the data into `AWS S3`, then registers the table in `AWS Athena`. When appending data to an existing `AWS Athena` table, `noctua` adds the data in the specified `AWS S3` partition and then repairs the `AWS Athena` table. 
16 | 
17 | `noctua` uses the parameter: `s3.location` from the function `dbWriteTable` for the `AWS S3` location. If `s3.location` isn't specified then the location is taken from the initial connection (`dbConnect`). 
18 | 
19 | `noctua` aligns the `s3.location` to the following `AWS S3` structure: `{s3.location}/{schema}/{table_name}/{partition}/{file}` (remember that `s3.location` has to be in [s3 uri format](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingBucket.html): "s3://bucket-name/key-name"). This is to allow tables with same name to be uploaded to different schemas. 
20 | 
21 | ```{r, echo = F,out.width = "100%"}
22 | knitr::include_graphics("athena_s3_example.png")
23 | ```
24 | 
25 | **NOTE:** `noctua` won't duplicate the table name or schema if they have been provided in `s3.location`. For example:
26 | 
27 | ```r
28 | dbWriteTable(con, "myschema.table", table, 
29 |              s3.location = "s3://mybucket/myschema/table",
30 |              partition = c("year" = "2020"))
31 | 
32 | # AWS S3 location
33 | "s3://mybucket/myschema/table/year=2020/table.tsv"
34 | ```
35 | 
36 | # File Types
37 | 
38 | Currently `noctua` supports the following file types `[".tsv", ".csv", ".parquet"]`. For `parquet` files, the package [arrow](https://arrow.apache.org/docs/r/) is used. This package will have to be installed, before data can be sent to `AWS S3` in `parquet` format.
39 | 
40 | `noctua` also supports compression when uploading data to `AWS S3`. For delimited files (`".tsv"` and `".csv"`), [gunzip compression](https://en.wikipedia.org/wiki/Gzip) is used. When using gunzip compression, `noctua` will split the zipped file into a maximum of 20 equal parts. This is to speed up how `AWS Athena` queries gunzip compressed files ([Default Compression Method for Flat Files](https://github.com/DyfanJones/RAthena/issues/36)). [Snappy](https://en.wikipedia.org/wiki/Snappy_(compression)) compression is used for compressing `parquet` files.
41 | 
42 | # Useful links
43 | 
44 | * [AWS Athena performance tips](https://aws.amazon.com/blogs/big-data/top-10-performance-tuning-tips-for-amazon-athena/)
45 | 


--------------------------------------------------------------------------------
/tests/testthat/test-dbFetch-n.R:
--------------------------------------------------------------------------------
  1 | # NOTE System variable format returned for Unit tests:
  2 | # Sys.getenv("noctua_arn"): "arn:aws:sts::123456789012:assumed-role/role_name/role_session_name"
  3 | # Sys.getenv("noctua_s3_query"): "s3://path/to/query/bucket/"
  4 | # Sys.getenv("noctua_s3_tbl"): "s3://path/to/bucket/"
  5 | 
  6 | test_that("fetch athena table in batch 100 data.table", {
  7 |   skip_if_no_env()
  8 |   
  9 |   con <- dbConnect(athena())
 10 |   
 11 |   noctua_options()
 12 |   res = dbExecute(con, "select * from iris")
 13 |   
 14 |   fetch_iris = dbFetch(res, n = 100)
 15 |   
 16 |   expect_equal(dim(fetch_iris), c(100, 5))
 17 |   
 18 |   fetch_iris = dbFetch(res, n = 100)
 19 |   
 20 |   expect_equal(dim(fetch_iris), c(50, 5))
 21 |   expect_true(inherits(fetch_iris, "data.table"))
 22 |   
 23 |   dbClearResult(res)
 24 | })
 25 | 
 26 | test_that("fetch athena table in batch 100 tibble", {
 27 |   skip_if_no_env()
 28 |   
 29 |   con <- dbConnect(athena())
 30 |   
 31 |   noctua_options("vroom")
 32 |   
 33 |   res = dbExecute(con, "select * from iris")
 34 |   
 35 |   fetch_iris = dbFetch(res, n = 100)
 36 |   
 37 |   expect_equal(dim(fetch_iris), c(100, 5))
 38 |   
 39 |   fetch_iris = dbFetch(res, n = 100)
 40 |   
 41 |   expect_equal(dim(fetch_iris), c(50, 5))
 42 |   
 43 |   expect_true(inherits(fetch_iris, "tbl_df"))
 44 |   
 45 |   dbClearResult(res)
 46 | })
 47 | 
 48 | test_that("fetch athena table on closed connection", {
 49 |   skip_if_no_env()
 50 |   
 51 |   con <- dbConnect(athena())
 52 |   
 53 |   res = dbExecute(con, "select * from iris")
 54 |   
 55 |   fetch_iris = dbFetch(res, n = 100)
 56 |   
 57 |   expect_equal(dim(fetch_iris), c(100, 5))
 58 |   dbClearResult(res)
 59 |   
 60 |   expect_error(dbFetch(res, n = 100), "Result already cleared.")
 61 | })
 62 | 
 63 | test_that("test dbGetQuery dbplyr ident", {
 64 |   skip_if_no_env()
 65 |   skip_if_package_not_avialable("dbplyr")
 66 |   library(dbplyr)
 67 |   
 68 |   con <- dbConnect(athena())
 69 |   
 70 |   noctua::noctua_options("data.table")
 71 |   
 72 |   empty_shell = dbGetQuery(con, dbplyr::ident("iris"))
 73 |   
 74 |   expect = c("sepal_length", "sepal_width", "petal_length", "petal_width", "species")
 75 |   
 76 |   expect_equal(names(empty_shell), expect)
 77 | })
 78 | 
 79 | test_that("test if dbGetQuery statistics returns named list correctly", {
 80 |   skip_if_no_env()
 81 |   
 82 |   con <- dbConnect(athena())
 83 |   
 84 |   stat_out = utils::capture.output({exp = dbGetQuery(con, "select * from iris", statistics = T)})
 85 |   
 86 |   for (i in expected_stat_output){
 87 |     expect_true(any(grepl(i, stat_out)))
 88 |   }
 89 | })
 90 | 
 91 | test_that("test athena unload",{
 92 |   
 93 |   noctua_options(unload = T)
 94 |   
 95 |   expect_true(athena_unload())
 96 |   noctua_options()
 97 | })
 98 | 
 99 | 
100 | 


--------------------------------------------------------------------------------
/man/assume_role.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/athena_low_api.R
 3 | \name{assume_role}
 4 | \alias{assume_role}
 5 | \title{Assume AWS ARN Role}
 6 | \usage{
 7 | assume_role(
 8 |   profile_name = NULL,
 9 |   region_name = NULL,
10 |   role_arn = NULL,
11 |   role_session_name = sprintf("noctua-session-\%s", as.integer(Sys.time())),
12 |   duration_seconds = 3600L,
13 |   set_env = FALSE
14 | )
15 | }
16 | \arguments{
17 | \item{profile_name}{The name of a profile to use. If not given, then the default profile is used.
18 | To set profile name, the \href{https://aws.amazon.com/cli/}{AWS Command Line Interface} (AWS CLI) will need to be configured.
19 | To configure AWS CLI please refer to: \href{https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html}{Configuring the AWS CLI}.}
20 | 
21 | \item{region_name}{Default region when creating new connections. Please refer to \href{https://docs.aws.amazon.com/general/latest/gr/rande.html}{link} for
22 | AWS region codes (region code example: Region = EU (Ireland)     \code{ region_name = "eu-west-1"})}
23 | 
24 | \item{role_arn}{The Amazon Resource Name (ARN) of the role to assume (such as \code{arn:aws:sts::123456789012:assumed-role/role_name/role_session_name})}
25 | 
26 | \item{role_session_name}{An identifier for the assumed role session. By default \code{noctua} creates a session name \code{sprintf("noctua-session-\%s", as.integer(Sys.time()))}}
27 | 
28 | \item{duration_seconds}{The duration, in seconds, of the role session. The value can range from 900 seconds (15 minutes) up to the maximum session duration setting for the role.
29 | This setting can have a value from 1 hour to 12 hours. By default duration is set to 3600 seconds (1 hour).}
30 | 
31 | \item{set_env}{If set to \code{TRUE} environmental variables \code{AWS_ACCESS_KEY_ID}, \code{AWS_SECRET_ACCESS_KEY} and \code{AWS_SESSION_TOKEN} will be set.}
32 | }
33 | \value{
34 | \code{assume_role()} returns a list containing: \code{"AccessKeyId"}, \code{"SecretAccessKey"}, \code{"SessionToken"} and \code{"Expiration"}
35 | }
36 | \description{
37 | Returns a set of temporary security credentials that you can use to access AWS resources that you might not normally have access to (\href{https://www.paws-r-sdk.com/docs/sts_assume_role/}{link}).
38 | These temporary credentials consist of an access key ID, a secret access key, and a security token. Typically, you use AssumeRole within
39 | your account or for cross-account access.
40 | }
41 | \examples{
42 | \dontrun{
43 | # Note:
44 | # - Require AWS Account to run below example.
45 | 
46 | library(noctua)
47 | library(DBI)
48 | 
49 | # Assuming demo ARN role
50 | assume_role(
51 |   profile_name = "YOUR_PROFILE_NAME",
52 |   role_arn = "arn:aws:sts::123456789012:assumed-role/role_name/role_session_name",
53 |   set_env = TRUE
54 | )
55 | 
56 | # Connect to Athena using ARN Role
57 | con <- dbConnect(noctua::athena())
58 | }
59 | }
60 | \seealso{
61 | \code{\link[=dbConnect]{dbConnect()}}
62 | }
63 | 


--------------------------------------------------------------------------------
/man/session_token.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/athena_low_api.R
 3 | \name{session_token}
 4 | \alias{session_token}
 5 | \alias{get_session_token}
 6 | \title{Get Session Tokens for PAWS Connection}
 7 | \usage{
 8 | get_session_token(
 9 |   profile_name = NULL,
10 |   region_name = NULL,
11 |   serial_number = NULL,
12 |   token_code = NULL,
13 |   duration_seconds = 3600L,
14 |   set_env = FALSE
15 | )
16 | }
17 | \arguments{
18 | \item{profile_name}{The name of a profile to use. If not given, then the default profile is used.
19 | To set profile name, the \href{https://aws.amazon.com/cli/}{AWS Command Line Interface} (AWS CLI) will need to be configured.
20 | To configure AWS CLI please refer to: \href{https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html}{Configuring the AWS CLI}.}
21 | 
22 | \item{region_name}{Default region when creating new connections. Please refer to \href{https://docs.aws.amazon.com/general/latest/gr/rande.html}{link} for
23 | AWS region codes (region code example: Region = EU (Ireland)     \code{ region_name = "eu-west-1"})}
24 | 
25 | \item{serial_number}{The identification number of the MFA device that is associated with the IAM user who is making the GetSessionToken call.
26 | Specify this value if the IAM user has a policy that requires MFA authentication. The value is either the serial number for a hardware device
27 | (such as \code{GAHT12345678}) or an Amazon Resource Name (ARN) for a virtual device (such as arn:aws:iam::123456789012:mfa/user).}
28 | 
29 | \item{token_code}{The value provided by the MFA device, if MFA is required. If any policy requires the IAM user to submit an MFA code,
30 | specify this value. If MFA authentication is required, the user must provide a code when requesting a set of temporary
31 | security credentials. A user who fails to provide the code receives an "access denied" response when requesting resources
32 | that require MFA authentication.}
33 | 
34 | \item{duration_seconds}{The duration, in seconds, that the credentials should remain valid. Acceptable duration for IAM user sessions range
35 | from 900 seconds (15 minutes) to 129,600 seconds (36 hours), with 3,600 seconds (1 hour) as the default.}
36 | 
37 | \item{set_env}{If set to \code{TRUE} environmental variables \code{AWS_ACCESS_KEY_ID}, \code{AWS_SECRET_ACCESS_KEY} and \code{AWS_SESSION_TOKEN} will be set.}
38 | }
39 | \value{
40 | \code{get_session_token()} returns a list containing: \code{"AccessKeyId"}, \code{"SecretAccessKey"}, \code{"SessionToken"} and \code{"Expiration"}
41 | }
42 | \description{
43 | Returns a set of temporary credentials for an AWS account or IAM user (\href{https://www.paws-r-sdk.com/docs/sts_get_session_token/}{link}).
44 | }
45 | \examples{
46 | \dontrun{
47 | # Note:
48 | # - Require AWS Account to run below example.
49 | 
50 | library(noctua)
51 | library(DBI)
52 | 
53 | # Create Temporary Credentials duration 1 hour
54 | get_session_token("YOUR_PROFILE_NAME",
55 |   serial_number = "arn:aws:iam::123456789012:mfa/user",
56 |   token_code = "531602",
57 |   set_env = TRUE
58 | )
59 | 
60 | # Connect to Athena using temporary credentials
61 | con <- dbConnect(athena())
62 | }
63 | }
64 | 


--------------------------------------------------------------------------------
/.github/workflows/rhub.yaml:
--------------------------------------------------------------------------------
 1 | # R-hub's generic GitHub Actions workflow file. It's canonical location is at
 2 | # https://github.com/r-hub/actions/blob/v1/workflows/rhub.yaml
 3 | # You can update this file to a newer version using the rhub2 package:
 4 | #
 5 | # rhub::rhub_setup()
 6 | #
 7 | # It is unlikely that you need to modify this file manually.
 8 | 
 9 | name: R-hub
10 | run-name: "${{ github.event.inputs.id }}: ${{ github.event.inputs.name || format('Manually run by {0}', github.triggering_actor) }}"
11 | 
12 | on:
13 |   workflow_dispatch:
14 |     inputs:
15 |       config:
16 |         description: 'A comma separated list of R-hub platforms to use.'
17 |         type: string
18 |         default: 'linux,windows,macos'
19 |       name:
20 |         description: 'Run name. You can leave this empty now.'
21 |         type: string
22 |       id:
23 |         description: 'Unique ID. You can leave this empty now.'
24 |         type: string
25 | 
26 | jobs:
27 | 
28 |   setup:
29 |     runs-on: ubuntu-latest
30 |     outputs:
31 |       containers: ${{ steps.rhub-setup.outputs.containers }}
32 |       platforms: ${{ steps.rhub-setup.outputs.platforms }}
33 | 
34 |     steps:
35 |     # NO NEED TO CHECKOUT HERE
36 |     - uses: r-hub/actions/setup@v1
37 |       with:
38 |         config: ${{ github.event.inputs.config }}
39 |       id: rhub-setup
40 | 
41 |   linux-containers:
42 |     needs: setup
43 |     if: ${{ needs.setup.outputs.containers != '[]' }}
44 |     runs-on: ubuntu-latest
45 |     name: ${{ matrix.config.label }}
46 |     strategy:
47 |       fail-fast: false
48 |       matrix:
49 |         config: ${{ fromJson(needs.setup.outputs.containers) }}
50 |     container:
51 |       image: ${{ matrix.config.container }}
52 | 
53 |     steps:
54 |       - uses: r-hub/actions/checkout@v1
55 |       - uses: r-hub/actions/platform-info@v1
56 |         with:
57 |           token: ${{ secrets.RHUB_TOKEN }}
58 |           job-config: ${{ matrix.config.job-config }}
59 |       - uses: r-hub/actions/setup-deps@v1
60 |         with:
61 |           token: ${{ secrets.RHUB_TOKEN }}
62 |           job-config: ${{ matrix.config.job-config }}
63 |       - uses: r-hub/actions/run-check@v1
64 |         with:
65 |           token: ${{ secrets.RHUB_TOKEN }}
66 |           job-config: ${{ matrix.config.job-config }}
67 | 
68 |   other-platforms:
69 |     needs: setup
70 |     if: ${{ needs.setup.outputs.platforms != '[]' }}
71 |     runs-on: ${{ matrix.config.os }}
72 |     name: ${{ matrix.config.label }}
73 |     strategy:
74 |       fail-fast: false
75 |       matrix:
76 |         config: ${{ fromJson(needs.setup.outputs.platforms) }}
77 | 
78 |     steps:
79 |       - uses: r-hub/actions/checkout@v1
80 |       - uses: r-hub/actions/setup-r@v1
81 |         with:
82 |           job-config: ${{ matrix.config.job-config }}
83 |           token: ${{ secrets.RHUB_TOKEN }}
84 |       - uses: r-hub/actions/platform-info@v1
85 |         with:
86 |           token: ${{ secrets.RHUB_TOKEN }}
87 |           job-config: ${{ matrix.config.job-config }}
88 |       - uses: r-hub/actions/setup-deps@v1
89 |         with:
90 |           job-config: ${{ matrix.config.job-config }}
91 |           token: ${{ secrets.RHUB_TOKEN }}
92 |       - uses: r-hub/actions/run-check@v1
93 |         with:
94 |           job-config: ${{ matrix.config.job-config }}
95 |           token: ${{ secrets.RHUB_TOKEN }}
96 | 


--------------------------------------------------------------------------------
/tests/testthat/test-s3-upload-location.R:
--------------------------------------------------------------------------------
 1 | context("S3 upload location")
 2 | 
 3 | test_that("Check if the S3 upload location is correctly built",{
 4 |   skip_if_no_env()
 5 |   
 6 |   # Test connection is using AWS CLI to set profile_name 
 7 |   conn <- dbConnect(noctua::athena(),
 8 |                     s3_staging_dir = Sys.getenv("noctua_s3_query"))
 9 |   
10 |   # schema and name not s3 location 
11 |   name <- "dummy_table"
12 |   s3.location <- "s3://bucket/path/to/file"
13 |   partition <- c("YEAR"= 2000)
14 |   s3_1 <- noctua:::s3_upload_location(conn, s3.location, name, partition)
15 |   s3_2 <- noctua:::s3_upload_location(conn, s3.location, name, NULL)
16 | 
17 |   # schema in s3 location 
18 |   s3.location <- "s3://bucket/path/to/file/schema"
19 |   name <- "schema.dummy_table"
20 |   s3_3 <- noctua:::s3_upload_location(conn, s3.location, name, partition)
21 |   s3_4 <- noctua:::s3_upload_location(conn, s3.location, name, NULL)
22 |   
23 |   # name in s3 location 
24 |   s3.location <- "s3://bucket/path/to/file/dummy_table"
25 |   name <- "schema.dummy_table"
26 |   s3_3 <- noctua:::s3_upload_location(conn, s3.location, name, partition)
27 |   s3_4 <- noctua:::s3_upload_location(conn, s3.location, name, NULL)
28 | 
29 |   # schema different s3 location
30 |   s3.location <- "s3://bucket/path/schema/to/file"
31 |   name <- "schema.dummy_table"
32 |   s3_5 <- noctua:::s3_upload_location(conn, s3.location, name, partition)
33 |   s3_6 <- noctua:::s3_upload_location(conn, s3.location, name, NULL)
34 |   
35 |   # schema and table in s3 location
36 |   s3.location <- "s3://bucket/path/to/file/schema/dummy_table"
37 |   name <- "schema.dummy_table"
38 |   s3_7 <- noctua:::s3_upload_location(conn, s3.location, name, partition)
39 |   s3_8 <- noctua:::s3_upload_location(conn, s3.location, name, NULL)
40 |   
41 |   # s3 location for existing table (should ignore schema/name/ partition)
42 |   s3.location <- "s3://bucket/path/to/file/dummy_table"
43 |   name <- "schema.dummy_table"
44 |   s3_9 <- noctua:::s3_upload_location(conn, s3.location, name, partition, TRUE)
45 |   s3_10 <- noctua:::s3_upload_location(conn, s3.location, name, NULL, TRUE)
46 |   
47 |   expect_equal(s3_1, list(Bucket = "bucket", Key = "path/to/file", Schema = "default", Name = "dummy_table", Partition = "YEAR=2000"))
48 |   expect_equal(s3_2, list(Bucket = "bucket", Key = "path/to/file", Schema = "default", Name = "dummy_table", Partition = NULL))
49 |   expect_equal(s3_3, list(Bucket = "bucket", Key = "path/to/file/dummy_table", Schema = "schema", Name = NULL, Partition = "YEAR=2000"))
50 |   expect_equal(s3_4, list(Bucket = "bucket", Key = "path/to/file/dummy_table", Schema = "schema", Name = NULL, Partition = NULL))
51 |   expect_equal(s3_5, list(Bucket = "bucket", Key = "path/schema/to/file", Schema = NULL, Name = "dummy_table", Partition = "YEAR=2000"))
52 |   expect_equal(s3_6, list(Bucket = "bucket", Key = "path/schema/to/file", Schema = NULL, Name = "dummy_table", Partition = NULL))
53 |   expect_equal(s3_7, list(Bucket = "bucket", Key = "path/to/file/schema/dummy_table", Schema = NULL, Name = NULL, Partition = "YEAR=2000"))
54 |   expect_equal(s3_8, list(Bucket = "bucket", Key = "path/to/file/schema/dummy_table", Schema = NULL, Name = NULL, Partition = NULL))
55 |   expect_equal(s3_9, list(Bucket = "bucket", Key = "path/to/file/dummy_table", Schema = NULL, Name = NULL, Partition = "YEAR=2000"))
56 |   expect_equal(s3_10, list(Bucket = "bucket", Key = "path/to/file/dummy_table", Schema = NULL, Name = NULL, Partition = NULL))
57 | })
58 | 


--------------------------------------------------------------------------------
/vignettes/aws_athena_query_caching.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "AWS Athena Query Caching"
  3 | author: "Dyfan Jones"
  4 | output: rmarkdown::html_vignette
  5 | vignette: >
  6 |   %\VignetteIndexEntry{AWS Athena Query Caching}
  7 |   %\VignetteEngine{knitr::rmarkdown}
  8 |   %\usepackage[UTF-8]{inputenc}
  9 | ---
 10 | 
 11 | # Intro
 12 | 
 13 | `noctua` now supports caching. This was originally inspired by `pyathena`to reduce the cost of using `AWS Athena`. `noctua` however has a different caching method and utilities local caching in `R` environments instead of using AWS `list_query_executions`. This is down to `dbClearResult` clearing `AWS S3`'s `AWS Athena` output when caching is disabled.
 14 | 
 15 | ## Caching benefits
 16 | 
 17 | By caching queries the performance of repeat queries is significantly improved. This is because the query is no longer sent to `AWS Athena`. Instead the query ID, of the repeating query, is taken from the R environment and the result is returned from `AWS S3`.
 18 | 
 19 | ```r
 20 | library(DBI)
 21 | library(noctua)
 22 | 
 23 | con = dbConnect(athena())
 24 | 
 25 | # Start caching queries
 26 | noctua_options(cache_size = 10)
 27 | 
 28 | # Upload Data to AWS Athena
 29 | dbWriteTable(con, "iris", iris, partition = c("Partition" = "01"))
 30 | 
 31 | # initial query to AWS Athena
 32 | system.time(df1 = dbGetQuery(con, "select * from iris"))
 33 | 
 34 | # Info: (Data scanned: 3.63 KB)
 35 | #   user  system elapsed 
 36 | #  0.105   0.004   3.397 
 37 | 
 38 | # repeat query to AWS Athena
 39 | system.time(df2 = dbGetQuery(con, "select * from iris"))
 40 | 
 41 | # Info: (Data scanned: 3.63 KB)
 42 | #   user  system elapsed 
 43 | #  0.072   0.000   0.348 
 44 | ```
 45 | 
 46 | Here we can see a performance increase of x10 with repeat query execution.
 47 | 
 48 | ## Caching weakness 
 49 | 
 50 | The weakness in caching occurs when the underlying data is updated. The cache will still only retrieve the previous query ID. This means that the new updated data won't be return when the caching is enabled:
 51 | 
 52 | ```r
 53 | # Updating iris table
 54 | dbWriteTable(con, "iris", iris, append = T, partition = c("Partition" = "02"))
 55 | 
 56 | dt5 = dbGetQuery(con, "select * from iris")
 57 | 
 58 | # Stop using cache data
 59 | noctua_options()
 60 | 
 61 | dt6 = dbGetQuery(con, "select * from iris")
 62 | 
 63 | nrow(dt5)
 64 | # 150
 65 | 
 66 | nrow(dt6)
 67 | # 300
 68 | ```
 69 | 
 70 | Sadly the cached query didn't pick up the new data from `iris`.
 71 | 
 72 | ## Cache memory
 73 | 
 74 | The caching method in `noctua` will remember previous query ids within each R session, even if you stop and start caching in `noctua_options`.
 75 | 
 76 | ```r
 77 | # Start caching
 78 | noctua_options(cache_size = 10)
 79 | res1 = dbExecute(con, "select * from iris")
 80 | 
 81 | # Stop caching
 82 | noctua_options()
 83 | res2 = dbExecute(con, "select * from iris")
 84 | 
 85 | # Start caching
 86 | noctua_options(cache_size = 10)
 87 | res3 = dbExecute(con, "select * from iris")
 88 | 
 89 | # Compare Query ID's
 90 | res1@info$QueryExecutionId
 91 | # 9a9272f5-0632-4774-9aa9-d07f151dabc5
 92 | 
 93 | res2@info$QueryExecutionId
 94 | # be12fe0-3ec0-4595-b3e6-b3bf67efa266
 95 | 
 96 | res3@info$QueryExecutionId
 97 | # 9a9272f5-0632-4774-9aa9-d07f151dabc5
 98 | ```
 99 | 
100 | We can see that `res1` and `res3` utilise the same QueryID, even tho caching was stopped and started.
101 | 
102 | ## Clear down cache
103 | 
104 | To clear down the cache, just set the parameter: `clear_cache` within `noctua_options` to `TRUE`
105 | 
106 | ```r
107 | noctua_options(clear_cache = T)
108 | ```
109 | 


--------------------------------------------------------------------------------
/tests/testthat/test-dbConvertTable.R:
--------------------------------------------------------------------------------
 1 | context("Convert Table")
 2 | 
 3 | # NOTE System variable format returned for Unit tests:
 4 | # Sys.getenv("noctua_arn"): "arn:aws:sts::123456789012:assumed-role/role_name/role_session_name"
 5 | # Sys.getenv("noctua_s3_query"): "s3://path/to/query/bucket/"
 6 | # Sys.getenv("noctua_s3_tbl"): "s3://path/to/bucket/"
 7 | 
 8 | test_that("Check if table is converted correctly",{
 9 |   skip_if_no_env()
10 |   
11 |   # initial sql query checks
12 |   # file type NULL
13 |   obj1 <- ctas_sql_with(partition = NULL, s3.location = NULL, file.type = "NULL", compress = TRUE)
14 |   obj2 <- ctas_sql_with(partition = "hi", s3.location = NULL, file.type = "NULL", compress = TRUE)
15 |   obj3 <- ctas_sql_with(partition = "hi", s3.location = "s3://mybucket/myfile/", file.type = "NULL", compress = TRUE)
16 |   
17 |   # file type csv
18 |   expect_warning(ctas_sql_with(partition = NULL, s3.location = "s3://mybucket/myfile/", file.type = "csv", compress = TRUE))
19 |   obj4 <- ctas_sql_with(partition = "hi", s3.location = "s3://mybucket/myfile/", file.type = "csv", compress = FALSE)
20 |   
21 |   # file type tsv
22 |   obj5 <- ctas_sql_with(partition = NULL, s3.location = "s3://mybucket/myfile/", file.type = "tsv", compress = FALSE)
23 |   
24 |   # file type parquet
25 |   obj6 <- ctas_sql_with(partition = NULL, s3.location = "s3://mybucket/myfile/", file.type = "parquet", compress = TRUE)
26 |   
27 |   # file type json
28 |   obj7 <- ctas_sql_with(partition = NULL, s3.location = "s3://mybucket/myfile/", file.type = "json", compress = FALSE)
29 |   
30 |   # file type orc
31 |   obj8 <- ctas_sql_with(partition = NULL, s3.location = "s3://mybucket/myfile/", file.type = "orc", compress = TRUE)
32 |   
33 |   con <- dbConnect(athena(),
34 |                    s3_staging_dir = Sys.getenv("noctua_s3_query"))
35 |   
36 |   # check if key tables exist or not
37 |   if(!dbExistsTable(con, "iris")) dbWriteTable(con, "iris", iris)
38 |   if(dbExistsTable(con, "iris_parquet")) dbRemoveTable(con, "iris_parquet", confirm = T)
39 |   if(dbExistsTable(con, "iris_orc_partitioned")) dbRemoveTable(con, "iris_orc_partitioned", confirm = T)
40 |   
41 |   dbConvertTable(con, 
42 |                  obj = "iris",
43 |                  name = "iris_parquet",
44 |                  file.type = "parquet")
45 |   
46 |   dbConvertTable(con,
47 |                  obj = SQL("select 
48 |                                iris.*, 
49 |                                date_format(current_date, '%Y%m%d') as time_stamp 
50 |                              from iris"),
51 |                  name = "iris_orc_partitioned",
52 |                  file.type = "orc",
53 |                  partition = "time_stamp")
54 |   
55 |   obj9 <- sapply(c("iris_parquet", "iris_orc_partitioned"), dbExistsTable, conn = con)
56 |   
57 |   expect_equal("", obj1)
58 |   expect_equal("WITH (partitioned_by = ARRAY['hi'])\n", obj2)
59 |   expect_equal("WITH (external_location ='s3://mybucket/myfile/',\npartitioned_by = ARRAY['hi'])\n", obj3)
60 |   expect_equal("WITH (format = 'TEXTFILE',\nfield_delimiter = ',',\nexternal_location ='s3://mybucket/myfile/',\npartitioned_by = ARRAY['hi'])\n", obj4)
61 |   expect_equal("WITH (format = 'TEXTFILE',\nfield_delimiter = '\t',\nexternal_location ='s3://mybucket/myfile/')\n", obj5)
62 |   expect_equal("WITH (format = 'PARQUET',\nparquet_compression = 'SNAPPY',\nexternal_location ='s3://mybucket/myfile/')\n", obj6)
63 |   expect_equal("WITH (format = 'JSON',\nexternal_location ='s3://mybucket/myfile/')\n", obj7)
64 |   expect_equal("WITH (format = 'ORC',\norc_compression = 'SNAPPY',\nexternal_location ='s3://mybucket/myfile/')\n", obj8)
65 |   sapply(obj9, expect_true)
66 | })
67 | 


--------------------------------------------------------------------------------
/docs/pkgdown.js:
--------------------------------------------------------------------------------
  1 | /* http://gregfranko.com/blog/jquery-best-practices/ */
  2 | (function($) {
  3 |   $(function() {
  4 | 
  5 |     $('.navbar-fixed-top').headroom();
  6 | 
  7 |     $('body').css('padding-top', $('.navbar').height() + 10);
  8 |     $(window).resize(function(){
  9 |       $('body').css('padding-top', $('.navbar').height() + 10);
 10 |     });
 11 | 
 12 |     $('[data-toggle="tooltip"]').tooltip();
 13 | 
 14 |     var cur_path = paths(location.pathname);
 15 |     var links = $("#navbar ul li a");
 16 |     var max_length = -1;
 17 |     var pos = -1;
 18 |     for (var i = 0; i < links.length; i++) {
 19 |       if (links[i].getAttribute("href") === "#")
 20 |         continue;
 21 |       // Ignore external links
 22 |       if (links[i].host !== location.host)
 23 |         continue;
 24 | 
 25 |       var nav_path = paths(links[i].pathname);
 26 | 
 27 |       var length = prefix_length(nav_path, cur_path);
 28 |       if (length > max_length) {
 29 |         max_length = length;
 30 |         pos = i;
 31 |       }
 32 |     }
 33 | 
 34 |     // Add class to parent <li>, and enclosing <li> if in dropdown
 35 |     if (pos >= 0) {
 36 |       var menu_anchor = $(links[pos]);
 37 |       menu_anchor.parent().addClass("active");
 38 |       menu_anchor.closest("li.dropdown").addClass("active");
 39 |     }
 40 |   });
 41 | 
 42 |   function paths(pathname) {
 43 |     var pieces = pathname.split("/");
 44 |     pieces.shift(); // always starts with /
 45 | 
 46 |     var end = pieces[pieces.length - 1];
 47 |     if (end === "index.html" || end === "")
 48 |       pieces.pop();
 49 |     return(pieces);
 50 |   }
 51 | 
 52 |   // Returns -1 if not found
 53 |   function prefix_length(needle, haystack) {
 54 |     if (needle.length > haystack.length)
 55 |       return(-1);
 56 | 
 57 |     // Special case for length-0 haystack, since for loop won't run
 58 |     if (haystack.length === 0) {
 59 |       return(needle.length === 0 ? 0 : -1);
 60 |     }
 61 | 
 62 |     for (var i = 0; i < haystack.length; i++) {
 63 |       if (needle[i] != haystack[i])
 64 |         return(i);
 65 |     }
 66 | 
 67 |     return(haystack.length);
 68 |   }
 69 | 
 70 |   /* Clipboard --------------------------*/
 71 | 
 72 |   function changeTooltipMessage(element, msg) {
 73 |     var tooltipOriginalTitle=element.getAttribute('data-original-title');
 74 |     element.setAttribute('data-original-title', msg);
 75 |     $(element).tooltip('show');
 76 |     element.setAttribute('data-original-title', tooltipOriginalTitle);
 77 |   }
 78 | 
 79 |   if(ClipboardJS.isSupported()) {
 80 |     $(document).ready(function() {
 81 |       var copyButton = "<button type='button' class='btn btn-primary btn-copy-ex' type = 'submit' title='Copy to clipboard' aria-label='Copy to clipboard' data-toggle='tooltip' data-placement='left auto' data-trigger='hover' data-clipboard-copy><i class='fa fa-copy'></i></button>";
 82 | 
 83 |       $("div.sourceCode").addClass("hasCopyButton");
 84 | 
 85 |       // Insert copy buttons:
 86 |       $(copyButton).prependTo(".hasCopyButton");
 87 | 
 88 |       // Initialize tooltips:
 89 |       $('.btn-copy-ex').tooltip({container: 'body'});
 90 | 
 91 |       // Initialize clipboard:
 92 |       var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', {
 93 |         text: function(trigger) {
 94 |           return trigger.parentNode.textContent.replace(/\n#>[^\n]*/g, "");
 95 |         }
 96 |       });
 97 | 
 98 |       clipboardBtnCopies.on('success', function(e) {
 99 |         changeTooltipMessage(e.trigger, 'Copied!');
100 |         e.clearSelection();
101 |       });
102 | 
103 |       clipboardBtnCopies.on('error', function() {
104 |         changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy');
105 |       });
106 |     });
107 |   }
108 | })(window.jQuery || window.$)
109 | 


--------------------------------------------------------------------------------
/tests/testthat/test-datatransfer-vroom.R:
--------------------------------------------------------------------------------
 1 | context("data transfer vroom")
 2 | 
 3 | # NOTE System variable format returned for Unit tests:
 4 | # Sys.getenv("noctua_arn"): "arn:aws:sts::123456789012:assumed-role/role_name/role_session_name"
 5 | # Sys.getenv("noctua_s3_query"): "s3://path/to/query/bucket/"
 6 | # Sys.getenv("noctua_s3_tbl"): "s3://path/to/bucket/"
 7 | 
 8 | s3.location1 <- paste0(Sys.getenv("noctua_s3_tbl"),"test_df/")
 9 | s3.location2 <- Sys.getenv("noctua_s3_tbl")
10 | 
11 | test_that("Testing data transfer between R and athena vroom", {
12 |   skip_if_no_env()
13 |   skip_if_package_not_avialable("vroom")
14 |   
15 |   noctua_options("vroom")
16 |   # Test connection is using AWS CLI to set profile_name 
17 |   con <- dbConnect(athena(),
18 |                    s3_staging_dir = Sys.getenv("noctua_s3_query"))
19 |   
20 |   df <- data.frame(w = as.POSIXct((Sys.time() -9):Sys.time(), origin = "1970-01-01", tz = "UTC"),
21 |                    x = 1:10,
22 |                    y = c(letters[1:8], c(" \\t\\t\\n 123 \" \\t\\t\\n ", ",15 \"")), 
23 |                    z = sample(c(TRUE, FALSE), 10, replace = T),
24 |                    stringsAsFactors = F)
25 |   
26 |   # testing if bigint is transferred correctly
27 |   df2 <- data.frame(var1 = sample(letters, 10, replace = T),
28 |                     var2 = bit64::as.integer64(1:10),
29 |                     stringsAsFactors = F)
30 |   if (dbExistsTable(con, "test_df")) 
31 |     suppressWarnings(dbRemoveTable(con, "test_df", confirm = T))
32 |   
33 |   DATE <- Sys.Date()
34 |   dbWriteTable(con, "test_df", df,
35 |                partition = c("timesTamp" = format(DATE, "%Y%m%d")),
36 |                s3.location = s3.location1
37 |   )
38 |   dbWriteTable(con, "test_df2", df, 
39 |                overwrite = T,
40 |                partition = c("year" = format(DATE, "%Y"),
41 |                              "month" = format(DATE, "%m"),
42 |                              "DAY" = format(DATE, "%d")),
43 |                s3.location = s3.location2
44 |   )
45 |   dbWriteTable(con, "df_bigint", df2, overwrite = T, s3.location = s3.location2)
46 |   dbWriteTable(con, "mtcars2", mtcars, overwrite = T, compress = T) # mtcars used to test data.frame with row.names
47 |   
48 |   # if data.table is available in namespace result returned as data.table
49 |   test_df <- as.data.frame(dbGetQuery(con, paste0("select w, x, y, z from test_df where timestamp ='", format(DATE, "%Y%m%d"),"'")))
50 |   test_df2 <- as.data.frame(dbGetQuery(con, paste0("select w, x, y, z from test_df2 where year = '", format(DATE, "%Y"), "' and month = '",format(DATE, "%m"), "' and day = '", format(DATE, "%d"),"'")))
51 |   test_df3 <- as.data.frame(dbGetQuery(con, "select * from df_bigint"))
52 |   test_df4 <- as.data.frame(dbGetQuery(con, "select * from mtcars2"))
53 |   
54 |   # vroom adds attribute spec/problems. Attributes needs to be removed for unit test
55 |   # attribute spec
56 |   attributes(test_df)$spec <- NULL
57 |   attributes(test_df2)$spec <- NULL
58 |   attributes(test_df3)$spec <- NULL
59 |   attributes(test_df4)$spec <- NULL
60 |   
61 |   # attribute problems
62 |   attributes(test_df)$problems <- NULL
63 |   attributes(test_df2)$problems <- NULL
64 |   attributes(test_df3)$problems <- NULL
65 |   attributes(test_df4)$problems <- NULL
66 | 
67 |   expect_equal(test_df, df)
68 |   expect_equal(test_df2, df)
69 |   expect_equal(test_df3,df2)
70 |   expect_equal(test_df4, sqlData(con, mtcars))
71 | })
72 | 
73 | test_that("Test unload athena query vroom",{
74 |   skip_if_no_env()
75 |   skip_if_package_not_avialable("arrow")
76 |   skip_if_package_not_avialable("dplyr")
77 |   
78 |   con <- dbConnect(
79 |     athena(),
80 |     s3_staging_dir = Sys.getenv("noctua_s3_query"))
81 |   
82 |   noctua::noctua_options("vroom")
83 |   
84 |   df = dbGetQuery(con, "select 1 as n", unload = T)
85 |   
86 |   expect_s3_class(df, "tbl_df")
87 |   expect_equal(df$n, 1)
88 | })
89 | 


--------------------------------------------------------------------------------
/R/column_parser.R:
--------------------------------------------------------------------------------
  1 | # Helper functions to parse the more complex data types
  2 | 
  3 | .error_msg = "Column `%s` was unable to be converted."
  4 | 
  5 | # Takes a string and converts it to raw
  6 | hex2raw <- function(string){
  7 |   split_str <- strsplit(string, split = " ", fixed = TRUE)
  8 |   output <- as.raw(as.hexmode(unlist(split_str)))
  9 |   split_raw(output, lengths(split_str))
 10 | }
 11 | 
 12 | # split raw vector into list chunks
 13 | split_raw <- function(vec, splits){
 14 |   start <- cumsum(c(1, splits))
 15 |   end <- start[-1]-1
 16 |   lapply(seq_along(splits), function(i) vec[start[i]:end[i]])
 17 | }
 18 | 
 19 | # applying string convertion across entire data frame
 20 | raw_parser <- function(output, columns){
 21 |   # only convert Athena data types `varbinary`
 22 |   for (col in names(columns[columns %in% c("varbinary")])) {
 23 |     tryCatch({
 24 |       set(output, j=col, value=hex2raw(output[[col]]))
 25 |     },
 26 |     error = function(e){
 27 |       warning(sprintf(.error_msg, col), call. = F)
 28 |     })
 29 |   }
 30 | }
 31 | 
 32 | # split lists or vectors into list chunks
 33 | split_vec <- function(vec, len, max_len = length(vec)){
 34 |   start <- seq(1, max_len, len)
 35 |   end <- c(start[-1]-1, max_len)
 36 |   lapply(seq_along(start), function(i) vec[start[i]:end[i]])
 37 | }
 38 | 
 39 | # collapse json strings into 1 json string
 40 | create_json_string <- function(string){paste0("[", paste(string, collapse = ","), "]")}
 41 | 
 42 | # chunk up json strings then collapse json chunks before parsing them 
 43 | json_chunks <- function(string, fun=jsonlite::parse_json, min_chunk = 10000L){
 44 |   if(length(string) < min_chunk){
 45 |     output <- fun(create_json_string(string))
 46 |   } else {
 47 |     len <- max(ceiling(length(string)/20), min_chunk)
 48 |     split_string <- split_vec(string, len)
 49 |     output <- unlist(
 50 |       lapply(split_string, function(i) fun(create_json_string(i))),
 51 |       recursive = FALSE
 52 |     )
 53 |   }
 54 |   return(output)
 55 | }
 56 | 
 57 | # parse json string
 58 | json_parser <- function(output, columns){
 59 |   # Get JSON conversion method
 60 |   if(identical(athena_option_env$json, "auto")){
 61 |     parse_json <- pkg_method("parse_json", "jsonlite")
 62 |   } else if(is.function(athena_option_env$json)) {
 63 |     parse_json <- athena_option_env$json
 64 |   } else if(is.character(athena_option_env$json) &&
 65 |             athena_option_env$json != "auto"){
 66 |     stop("Unknown Json conversion method.", call. = F)
 67 |   }
 68 |   
 69 |   # only convert Athena data types `array` and `json`
 70 |   # only convert Athena data types `array` and `json`
 71 |   for (col in names(columns[columns %in% c("array", "json")])) {
 72 |     tryCatch({
 73 |       set(output,
 74 |           j=col,
 75 |           value=json_chunks(output[[col]], parse_json))
 76 |     },
 77 |     error = function(e){
 78 |       warning(sprintf(.error_msg, col), call. = F)
 79 |     })
 80 |   }
 81 | }
 82 | 
 83 | # convert data frame variable to newline delimited JSON format
 84 | # method: jsonify
 85 | col_to_ndjson <- function(dt, col, batch = 1e4){
 86 |   to_ndjson <- pkg_method("to_ndjson", "jsonify")
 87 |   splits <- split_vec(dt[[col]], batch)
 88 |   output <- lapply(splits, function(i) {
 89 |       strsplit(to_ndjson(i,unbox = T, numeric_dates = F), split = "\n")[[1]]
 90 |   })[[1]]
 91 |   return(output)
 92 | }
 93 | 
 94 | # method: jsonlite
 95 | # col_to_ndjson <- function(dt, col, batch = 1e4){
 96 | #   stream_out <- pkg_method("stream_out", "jsonlite")
 97 | #   con_raw <- rawConnection(raw(), open = "w")
 98 | #   stream_out(subset(dt, select = col), con_raw, verbose = F, auto_unbox = T, pagesize = batch)
 99 | #   con_out <- rawConnection(rawConnectionValue(con_raw))
100 | #   on.exit({
101 | #     close(con_raw)
102 | #     close(con_out)
103 | #   })
104 | #   return(readLines(con_out))
105 | # }
106 | 


--------------------------------------------------------------------------------
/tests/testthat/test-upload-file-parameters.R:
--------------------------------------------------------------------------------
 1 | context("upload file setup")
 2 | 
 3 | library(data.table)
 4 | 
 5 | test_that("test file parser parameter setup delimited",{
 6 |   skip_if_package_not_avialable("vroom")
 7 |   init_args = list()
 8 |   
 9 |   # data.table parser
10 |   noctua_options()
11 |   arg_4 <- noctua:::update_args(file.type = "csv", init_args)
12 |   arg_5 <- noctua:::update_args(file.type = "tsv", init_args)
13 |   
14 |   # vroom parser
15 |   noctua_options(file_parser = "vroom")
16 |   arg_6 <- noctua:::update_args(file.type = "csv", init_args)
17 |   arg_7 <- noctua:::update_args(file.type = "tsv", init_args)
18 | 
19 |   expect_equal(arg_4, list(fun = data.table::fwrite, quote= FALSE, showProgress = FALSE, sep = ","))
20 |   expect_equal(arg_5, list(fun = data.table::fwrite, quote= FALSE, showProgress = FALSE, sep = "\t"))
21 |   expect_equal(arg_6, list(fun = vroom::vroom_write, quote= "none", progress = FALSE, escape = "none", delim = ","))
22 |   expect_equal(arg_7, list(fun = vroom::vroom_write, quote= "none", progress = FALSE, escape = "none", delim = "\t"))
23 | })
24 | 
25 | test_that("test file parser parameter setup parquet",{
26 |   skip_if_package_not_avialable("arrow")
27 |   
28 |   init_args = list()
29 |   
30 |   arg_1 <- noctua:::update_args(file.type = "parquet", init_args)
31 |   arg_2 <- noctua:::update_args(file.type = "parquet", init_args, compress = T)
32 |   expect_equal(arg_1, list(fun = arrow::write_parquet, use_deprecated_int96_timestamps = TRUE, compression = NULL))
33 |   expect_equal(arg_2, list(fun = arrow::write_parquet, use_deprecated_int96_timestamps = TRUE, compression = "snappy"))
34 | })
35 | 
36 | test_that("test file parser parameter setup json",{
37 |   skip_if_package_not_avialable("jsonlite")
38 |   init_args = list()
39 |   
40 |   arg_3 <- noctua:::update_args(file.type = "json", init_args)
41 |   expect_equal(arg_3, list(fun = jsonlite::stream_out, verbose = FALSE))
42 | })
43 | 
44 | default_split <- c(1, 1000001)
45 | custom_split <- seq(1, 2e6, 100000)
46 | custom_chunk <- 100000
47 | 
48 | test_that("test data frame is split correctly",{
49 |   # Test connection is using AWS CLI to set profile_name 
50 |   value = data.table(x = 1:2e6)
51 |   max_row = nrow(value)
52 | 
53 |   vec_1 <- noctua:::dt_split(value, Inf, "csv", T)
54 |   vec_2 <- noctua:::dt_split(value, Inf, "tsv", F)
55 |   vec_3 <- noctua:::dt_split(value, custom_chunk, "tsv", T)
56 |   vec_4 <- noctua:::dt_split(value, custom_chunk, "csv", F)
57 |   vec_5 <- noctua:::dt_split(value, Inf, "parquet", T)
58 |   vec_6 <- noctua:::dt_split(value, Inf, "parquet", F)
59 |   vec_7 <- noctua:::dt_split(value, custom_chunk, "parquet", T)
60 |   vec_8 <- noctua:::dt_split(value, custom_chunk, "parquet", F)
61 |   vec_9 <- noctua:::dt_split(value, Inf, "json", T)
62 |   vec_10 <- noctua:::dt_split(value, Inf, "json", F)
63 |   vec_11 <- noctua:::dt_split(value, custom_chunk, "json", T)
64 |   vec_12 <- noctua:::dt_split(value, custom_chunk, "json", F)
65 |   
66 |   expect_equal(vec_1, list(SplitVec = default_split, MaxBatch = 1e+06, MaxRow = max_row))
67 |   expect_equal(vec_2, list(SplitVec = 1, MaxBatch = max_row, MaxRow = max_row))
68 |   expect_equal(vec_3, list(SplitVec = custom_split, MaxBatch = custom_chunk, MaxRow = max_row))
69 |   expect_equal(vec_4, list(SplitVec = custom_split, MaxBatch = custom_chunk, MaxRow = max_row))
70 |   expect_equal(vec_5, list(SplitVec = 1, MaxBatch = max_row, MaxRow = max_row))
71 |   expect_equal(vec_6, list(SplitVec = 1, MaxBatch = max_row, MaxRow = max_row))
72 |   expect_equal(vec_7, list(SplitVec = custom_split, MaxBatch = custom_chunk, MaxRow = max_row))
73 |   expect_equal(vec_8, list(SplitVec = custom_split, MaxBatch = custom_chunk, MaxRow = max_row))
74 |   expect_equal(vec_9, list(SplitVec = 1, MaxBatch = max_row, MaxRow = max_row))
75 |   expect_equal(vec_10, list(SplitVec = 1, MaxBatch = max_row, MaxRow = max_row))
76 |   expect_equal(vec_11, list(SplitVec = custom_split, MaxBatch = custom_chunk, MaxRow = max_row))
77 |   expect_equal(vec_12, list(SplitVec = custom_split, MaxBatch = custom_chunk, MaxRow = max_row))
78 | })
79 | 


--------------------------------------------------------------------------------
/man/sqlCreateTable.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/Table.R
  3 | \name{sqlCreateTable}
  4 | \alias{sqlCreateTable}
  5 | \alias{sqlCreateTable,AthenaConnection-method}
  6 | \title{Creates query to create a simple Athena table}
  7 | \usage{
  8 | \S4method{sqlCreateTable}{AthenaConnection}(
  9 |   con,
 10 |   table,
 11 |   fields,
 12 |   field.types = NULL,
 13 |   partition = NULL,
 14 |   s3.location = NULL,
 15 |   file.type = c("tsv", "csv", "parquet", "json"),
 16 |   compress = FALSE,
 17 |   ...
 18 | )
 19 | }
 20 | \arguments{
 21 | \item{con}{A database connection.}
 22 | 
 23 | \item{table}{The table name, passed on to \code{\link[DBI:dbQuoteIdentifier]{dbQuoteIdentifier()}}. Options are:
 24 | \itemize{
 25 | \item a character string with the unquoted DBMS table name,
 26 | e.g. \code{"table_name"},
 27 | \item a call to \code{\link[DBI:Id]{Id()}} with components to the fully qualified table name,
 28 | e.g. \code{Id(schema = "my_schema", table = "table_name")}
 29 | \item a call to \code{\link[DBI:SQL]{SQL()}} with the quoted and fully qualified table name
 30 | given verbatim, e.g. \code{SQL('"my_schema"."table_name"')}
 31 | }}
 32 | 
 33 | \item{fields}{Either a character vector or a data frame.
 34 | 
 35 | A named character vector: Names are column names, values are types.
 36 | Names are escaped with \code{\link[DBI:dbQuoteIdentifier]{dbQuoteIdentifier()}}.
 37 | Field types are unescaped.
 38 | 
 39 | A data frame: field types are generated using
 40 | \code{\link[DBI:dbDataType]{dbDataType()}}.}
 41 | 
 42 | \item{field.types}{Additional field types used to override derived types.}
 43 | 
 44 | \item{partition}{Partition Athena table (needs to be a named list or vector) for example: \code{c(var1 = "2019-20-13")}}
 45 | 
 46 | \item{s3.location}{s3 bucket to store Athena table, must be set as a s3 uri for example ("s3://mybucket/data/").
 47 | By default s3.location is set s3 staging directory from \code{\linkS4class{AthenaConnection}} object.}
 48 | 
 49 | \item{file.type}{What file type to store data.frame on s3, noctua currently supports \code{c("tsv", "csv", "parquet", "json")}. Default delimited file type is "tsv", in previous versions
 50 | of \code{noctua (=< 1.4.0)} file type "csv" was used as default. The reason for the change is that columns containing \code{Array/JSON} format cannot be written to
 51 | Athena due to the separating value ",". This would cause issues with AWS Athena.
 52 | \strong{Note:} "parquet" format is supported by the \code{arrow} package and it will need to be installed to utilise the "parquet" format.
 53 | "json" format is supported by \code{jsonlite} package and it will need to be installed to utilise the "json" format.}
 54 | 
 55 | \item{compress}{\code{FALSE | TRUE} To determine if to compress file.type. If file type is \code{c("csv", "tsv")} then "gzip" compression is used, for file type "parquet"
 56 | "snappy" compression is used. Currently \code{noctua} doesn't support compression for "json" file type.}
 57 | 
 58 | \item{...}{Other arguments used by individual methods.}
 59 | }
 60 | \value{
 61 | \code{sqlCreateTable} returns data.frame's \code{DDL} in the \code{\link[DBI]{SQL}} format.
 62 | }
 63 | \description{
 64 | Creates an interface to compose \code{CREATE EXTERNAL TABLE}.
 65 | }
 66 | \examples{
 67 | \dontrun{
 68 | # Note:
 69 | # - Require AWS Account to run below example.
 70 | # - Different connection methods can be used please see `noctua::dbConnect` documnentation
 71 | 
 72 | library(DBI)
 73 | 
 74 | # Demo connection to Athena using profile name
 75 | con <- dbConnect(noctua::athena())
 76 | 
 77 | # Create DDL for iris data.frame
 78 | sqlCreateTable(con, "iris", iris, s3.location = "s3://path/to/athena/table")
 79 | 
 80 | # Create DDL for iris data.frame with partition
 81 | sqlCreateTable(con, "iris", iris,
 82 |   partition = "timestamp",
 83 |   s3.location = "s3://path/to/athena/table"
 84 | )
 85 | 
 86 | # Create DDL for iris data.frame with partition and file.type parquet
 87 | sqlCreateTable(con, "iris", iris,
 88 |   partition = "timestamp",
 89 |   s3.location = "s3://path/to/athena/table",
 90 |   file.type = "parquet"
 91 | )
 92 | 
 93 | # Disconnect from Athena
 94 | dbDisconnect(con)
 95 | }
 96 | }
 97 | \seealso{
 98 | \code{\link[DBI]{sqlCreateTable}}
 99 | }
100 | 


--------------------------------------------------------------------------------
/tests/testthat/test-datatransfer-datatable.R:
--------------------------------------------------------------------------------
  1 | context("data transfer data.table")
  2 | 
  3 | # NOTE System variable format returned for Unit tests:
  4 | # Sys.getenv("noctua_arn"): "arn:aws:sts::123456789012:assumed-role/role_name/role_session_name"
  5 | # Sys.getenv("noctua_s3_query"): "s3://path/to/query/bucket/"
  6 | # Sys.getenv("noctua_s3_tbl"): "s3://path/to/bucket/"
  7 | 
  8 | s3.location1 <- paste0(Sys.getenv("noctua_s3_tbl"),"test_df/")
  9 | s3.location2 <- Sys.getenv("noctua_s3_tbl")
 10 | 
 11 | df <- data.frame(w = as.POSIXct((Sys.time()-9):Sys.time(), origin = "1970-01-01", tz = "UTC"),
 12 |                  x = 1:10,
 13 |                  y = c(letters[1:8], c(" \\t\\t\\n 123 \" \\t\\t\\n ", ",15 \"")), 
 14 |                  z = sample(c(TRUE, FALSE), 10, replace = T),
 15 |                  stringsAsFactors = F)
 16 | 
 17 | test_that("Testing data transfer between R and athena datatable", {
 18 |   skip_if_no_env()
 19 |   
 20 |   noctua_options()
 21 |   # Test connection is using AWS CLI to set profile_name 
 22 |   con <- dbConnect(athena(),
 23 |                    s3_staging_dir = Sys.getenv("noctua_s3_query"))
 24 |   
 25 |   # testing if bigint is transferred correctly
 26 |   df2 <- data.frame(var1 = sample(letters, 10, replace = T),
 27 |                     var2 = bit64::as.integer64(1:10),
 28 |                     stringsAsFactors = F)
 29 |   
 30 |   DATE <- Sys.Date()
 31 |   dbWriteTable(con, "test_df", df, overwrite = T, partition = c("timesTamp" = format(DATE, "%Y%m%d")), s3.location = s3.location1)
 32 |   dbWriteTable(con, "test_df2", df, 
 33 |                overwrite = T,
 34 |                partition = c("year" = format(DATE, "%Y"),
 35 |                              "month" = format(DATE, "%m"),
 36 |                              "DAY" = format(DATE, "%d")),
 37 |                s3.location = s3.location2)
 38 |   
 39 |   dbWriteTable(con, "df_bigint", df2, overwrite = T, s3.location = s3.location2)
 40 |   dbWriteTable(con, "mtcars2", mtcars, overwrite = T, compress = T) # mtcars used to test data.frame with row.names
 41 |   
 42 |   # if data.table is available in namespace result returned as data.table
 43 |   test_df <- as.data.frame(dbGetQuery(con, paste0("select w, x, y, z from test_df where timestamp ='", format(DATE, "%Y%m%d"),"'")))
 44 |   test_df2 <- as.data.frame(dbGetQuery(con, paste0("select w, x, y, z from test_df2 where year = '", format(DATE, "%Y"), "' and month = '",format(DATE, "%m"), "' and day = '", format(DATE, "%d"),"'")))
 45 |   test_df3 <- as.data.frame(dbGetQuery(con, "select * from df_bigint"))
 46 |   test_df4 <- as.data.frame(dbGetQuery(con, "select * from mtcars2"))
 47 |   
 48 |   expect_equal(test_df, df)
 49 |   expect_equal(test_df2, df)
 50 |   expect_equal(test_df3,df2)
 51 |   expect_equal(test_df4, sqlData(con, mtcars))
 52 | })
 53 | 
 54 | test_that("Testing data transfer between R and athena json file", {
 55 |   skip_if_no_env()
 56 |   skip_if_package_not_avialable("jsonlite")
 57 |   
 58 |   con <- dbConnect(athena(),
 59 |                    s3_staging_dir = Sys.getenv("noctua_s3_query"))
 60 |   
 61 |   if(dbExistsTable(con, "test_df3")){
 62 |     dbRemoveTable(con, "test_df3", confirm = T)
 63 |   }
 64 |   
 65 |   dbWriteTable(con, "test_df3", df, overwrite = T, file.type = "json")
 66 |   
 67 |   test_df <- as.data.frame(dbGetQuery(con, "select * from test_df3"))
 68 |   expect_equal(test_df, df)
 69 | })
 70 | 
 71 | test_that("Test unload athena query data.table",{
 72 |   skip_if_no_env()
 73 |   skip_if_package_not_avialable("arrow")
 74 |   
 75 |   con <- dbConnect(
 76 |     athena(),
 77 |     s3_staging_dir = Sys.getenv("noctua_s3_query"))
 78 |   
 79 |   df = dbGetQuery(con, "select 1 as n", unload = T)
 80 |   
 81 |   expect_s3_class(df, "data.table")
 82 |   expect_equal(df$n, 1)
 83 | })
 84 | 
 85 | test_that("Write can handle an empty data frame", {
 86 |   skip_if_no_env()
 87 |   
 88 |   noctua_options()
 89 |   # Test connection is using AWS CLI to set profile_name 
 90 |   con <- dbConnect(athena(),
 91 |                    s3_staging_dir = Sys.getenv("noctua_s3_query"))
 92 |   df <- data.frame(x = integer())
 93 |   
 94 |   if (dbExistsTable(con, "test_df")) {
 95 |     dbRemoveTable(con, "test_df", confirm = T)
 96 |   }
 97 |   
 98 |   # can create a new table
 99 |   dbWriteTable(con, "test_df", df)
100 |   expect_equal(as.data.frame(dbReadTable(con, "test_df")), df)
101 |   
102 |   dbWriteTable(con, "test_df", df, append = TRUE)
103 |   expect_equal(as.data.frame(dbReadTable(con, "test_df")), df)
104 | })
105 | 


--------------------------------------------------------------------------------
/docs/sitemap.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
  3 |   <url>
  4 |     <loc>/404.html</loc>
  5 |   </url>
  6 |   <url>
  7 |     <loc>/LICENSE-text.html</loc>
  8 |   </url>
  9 |   <url>
 10 |     <loc>/articles/aws_athena_query_caching.html</loc>
 11 |   </url>
 12 |   <url>
 13 |     <loc>/articles/aws_athena_unload.html</loc>
 14 |   </url>
 15 |   <url>
 16 |     <loc>/articles/aws_s3_backend.html</loc>
 17 |   </url>
 18 |   <url>
 19 |     <loc>/articles/changing_backend_file_parser.html</loc>
 20 |   </url>
 21 |   <url>
 22 |     <loc>/articles/convert_and_save_cost.html</loc>
 23 |   </url>
 24 |   <url>
 25 |     <loc>/articles/getting_started.html</loc>
 26 |   </url>
 27 |   <url>
 28 |     <loc>/articles/how_to_retry.html</loc>
 29 |   </url>
 30 |   <url>
 31 |     <loc>/articles/index.html</loc>
 32 |   </url>
 33 |   <url>
 34 |     <loc>/authors.html</loc>
 35 |   </url>
 36 |   <url>
 37 |     <loc>/index.html</loc>
 38 |   </url>
 39 |   <url>
 40 |     <loc>/issue_template.html</loc>
 41 |   </url>
 42 |   <url>
 43 |     <loc>/news/index.html</loc>
 44 |   </url>
 45 |   <url>
 46 |     <loc>/reference/AthenaConnection.html</loc>
 47 |   </url>
 48 |   <url>
 49 |     <loc>/reference/AthenaDriver.html</loc>
 50 |   </url>
 51 |   <url>
 52 |     <loc>/reference/AthenaWriteTables.html</loc>
 53 |   </url>
 54 |   <url>
 55 |     <loc>/reference/Query.html</loc>
 56 |   </url>
 57 |   <url>
 58 |     <loc>/reference/assume_role.html</loc>
 59 |   </url>
 60 |   <url>
 61 |     <loc>/reference/athena.html</loc>
 62 |   </url>
 63 |   <url>
 64 |     <loc>/reference/backend_dbplyr.html</loc>
 65 |   </url>
 66 |   <url>
 67 |     <loc>/reference/backend_dbplyr_v1.html</loc>
 68 |   </url>
 69 |   <url>
 70 |     <loc>/reference/backend_dbplyr_v2.html</loc>
 71 |   </url>
 72 |   <url>
 73 |     <loc>/reference/dbClearResult.html</loc>
 74 |   </url>
 75 |   <url>
 76 |     <loc>/reference/dbColumnInfo.html</loc>
 77 |   </url>
 78 |   <url>
 79 |     <loc>/reference/dbConnect-AthenaDriver-method.html</loc>
 80 |   </url>
 81 |   <url>
 82 |     <loc>/reference/dbConvertTable.html</loc>
 83 |   </url>
 84 |   <url>
 85 |     <loc>/reference/dbDataType.html</loc>
 86 |   </url>
 87 |   <url>
 88 |     <loc>/reference/dbDisconnect.html</loc>
 89 |   </url>
 90 |   <url>
 91 |     <loc>/reference/dbExistsTable.html</loc>
 92 |   </url>
 93 |   <url>
 94 |     <loc>/reference/dbFetch.html</loc>
 95 |   </url>
 96 |   <url>
 97 |     <loc>/reference/dbGetInfo.html</loc>
 98 |   </url>
 99 |   <url>
100 |     <loc>/reference/dbGetPartition.html</loc>
101 |   </url>
102 |   <url>
103 |     <loc>/reference/dbGetQuery.html</loc>
104 |   </url>
105 |   <url>
106 |     <loc>/reference/dbGetStatement.html</loc>
107 |   </url>
108 |   <url>
109 |     <loc>/reference/dbGetTables.html</loc>
110 |   </url>
111 |   <url>
112 |     <loc>/reference/dbHasCompleted.html</loc>
113 |   </url>
114 |   <url>
115 |     <loc>/reference/dbIsValid.html</loc>
116 |   </url>
117 |   <url>
118 |     <loc>/reference/dbListFields.html</loc>
119 |   </url>
120 |   <url>
121 |     <loc>/reference/dbListTables.html</loc>
122 |   </url>
123 |   <url>
124 |     <loc>/reference/dbQuote.html</loc>
125 |   </url>
126 |   <url>
127 |     <loc>/reference/dbRemoveTable.html</loc>
128 |   </url>
129 |   <url>
130 |     <loc>/reference/dbShow.html</loc>
131 |   </url>
132 |   <url>
133 |     <loc>/reference/dbStatistics.html</loc>
134 |   </url>
135 |   <url>
136 |     <loc>/reference/db_compute.html</loc>
137 |   </url>
138 |   <url>
139 |     <loc>/reference/db_connection_describe.html</loc>
140 |   </url>
141 |   <url>
142 |     <loc>/reference/db_copy_to.html</loc>
143 |   </url>
144 |   <url>
145 |     <loc>/reference/db_desc.html</loc>
146 |   </url>
147 |   <url>
148 |     <loc>/reference/db_save_query.html</loc>
149 |   </url>
150 |   <url>
151 |     <loc>/reference/dbplyr_edition.html</loc>
152 |   </url>
153 |   <url>
154 |     <loc>/reference/index.html</loc>
155 |   </url>
156 |   <url>
157 |     <loc>/reference/noctua-package.html</loc>
158 |   </url>
159 |   <url>
160 |     <loc>/reference/noctua_options.html</loc>
161 |   </url>
162 |   <url>
163 |     <loc>/reference/session_token.html</loc>
164 |   </url>
165 |   <url>
166 |     <loc>/reference/sqlCreateTable.html</loc>
167 |   </url>
168 |   <url>
169 |     <loc>/reference/sqlData.html</loc>
170 |   </url>
171 |   <url>
172 |     <loc>/reference/sql_translate_env.html</loc>
173 |   </url>
174 |   <url>
175 |     <loc>/reference/work_group.html</loc>
176 |   </url>
177 | </urlset>
178 | 


--------------------------------------------------------------------------------
/R/fetch_utils.R:
--------------------------------------------------------------------------------
  1 | #' @include utils.R
  2 | 
  3 | .fetch_n <- function(res, result_class, n) {
  4 |   # assign token from AthenaResult class
  5 |   token <- res@info[["NextToken"]]
  6 | 
  7 |   if (length(token) == 0) n <- as.integer(n + 1)
  8 |   chunk <- as.integer(n)
  9 |   if (n > 1000L) chunk <- 1000L
 10 | 
 11 |   iterate <- 1:ceiling(n / chunk)
 12 | 
 13 |   # create empty list shell
 14 |   dt_list <- list()
 15 |   length(dt_list) <- max(iterate)
 16 | 
 17 |   for (i in iterate) {
 18 |     if (i == max(iterate)) chunk <- as.integer(n - (i - 1) * chunk)
 19 | 
 20 |     # get chunk with retry api call if call fails
 21 |     retry_api_call(result <- res@connection@ptr$Athena$get_query_results(
 22 |       QueryExecutionId = res@info[["QueryExecutionId"]],
 23 |       NextToken = token,
 24 |       MaxResults = chunk
 25 |     ))
 26 | 
 27 |     # process returned list
 28 |     output <- do.call(rbind, result[["ResultSet"]][["Rows"]])
 29 |     suppressWarnings(staging_dt <- rbindlist(output, use.names = FALSE))
 30 | 
 31 |     # remove colnames from first row
 32 |     if (i == 1 && length(token) == 0) {
 33 |       staging_dt <- staging_dt[-1, ]
 34 |     }
 35 | 
 36 |     # ensure rownames are not set
 37 |     rownames(staging_dt) <- NULL
 38 | 
 39 |     # added staging data.table to list
 40 |     dt_list[[i]] <- staging_dt
 41 | 
 42 |     # if token hasn't changed or if no more tokens are available then break loop
 43 |     if ((length(token) != 0 &&
 44 |       token == result[["NextToken"]]) ||
 45 |       length(result[["NextToken"]]) == 0) {
 46 |       break
 47 |     } else {
 48 |       token <- result[["NextToken"]]
 49 |     }
 50 |   }
 51 | 
 52 |   # combined all lists together
 53 |   dt <- rbindlist(dt_list, use.names = FALSE)
 54 | 
 55 |   # Update last token in s4 class
 56 |   res@info[["NextToken"]] <- result[["NextToken"]]
 57 | 
 58 |   # replace names with actual names
 59 |   Names <- do.call(rbind, result_class)[,"Name"]
 60 |   colnames(dt) <- as.character(Names)
 61 | 
 62 |   # convert data.table to tibble if using vroom as backend
 63 |   if (inherits(athena_option_env[["file_parser"]], "athena_vroom")) {
 64 |     as_tibble <- pkg_method("as_tibble", "tibble")
 65 |     dt <- as_tibble(dt)
 66 |   }
 67 | 
 68 |   return(dt)
 69 | }
 70 | 
 71 | .fetch_unload <- function(res) {
 72 |   result_info <- split_s3_uri(res@connection@info[["s3_staging"]])
 73 |   result_info[["key"]] <- gsub("^/", "", file.path(gsub("/$", "", result_info[["key"]]), res@info[["UnloadDir"]]))
 74 | 
 75 |   all_keys <- list()
 76 |   token <- NULL
 77 |   # Get all s3 objects linked to table
 78 |   i <- 1
 79 |   while (is.null(token) || length(token) != 0) {
 80 |     objects <- res@connection@ptr$S3$list_objects_v2(
 81 |       Bucket = result_info[["bucket"]], Prefix = result_info[["key"]], ContinuationToken = token
 82 |     )
 83 |     token <- objects$NextContinuationToken
 84 |     all_keys[[i]] <- lapply(objects$Contents, function(x) list(Key = x[["Key"]]))
 85 |     i <- i + 1
 86 |   }
 87 |   all_keys <- unlist(all_keys, recursive = FALSE, use.names = FALSE)
 88 | 
 89 |   if (!requireNamespace("arrow", quietly = TRUE)) {
 90 |     stop("unload methods requires the `arrow` package, please install it first and try again",
 91 |       call. = F
 92 |     )
 93 |   }
 94 | 
 95 |   df_list <- lapply(all_keys, function(x) {
 96 |     arrow::read_parquet(res@connection@ptr$S3$get_object(
 97 |       Bucket = result_info[["bucket"]],
 98 |       Key = x
 99 |     )$Body)
100 |   })
101 | 
102 |   # convert data.table to tibble if using vroom as backend
103 |   if (inherits(athena_option_env[["file_parser"]], "athena_vroom")) {
104 |     if (!requireNamespace("dplyr", quietly = TRUE)) {
105 |       stop("`dplyr` package is required, please install it first and try again", call. = F)
106 |     }
107 |     combine <- function(x) dplyr::bind_rows(x)
108 |   } else {
109 |     combine <- function(x) rbindlist(x)
110 |   }
111 |   return(combine(df_list))
112 | }
113 | 
114 | .fetch_file <- function(res, result_class) {
115 |   # create temp file
116 |   File <- tempfile()
117 |   on.exit(unlink(File))
118 | 
119 |   result_info <- split_s3_uri(res@info[["OutputLocation"]])
120 | 
121 |   # connect to s3 and create a bucket object
122 |   # download athena output
123 |   retry_api_call(res@connection@ptr$S3$download_file(
124 |     Bucket = result_info[["bucket"]], Key = result_info[["key"]], Filename = File
125 |   ))
126 | 
127 |   if (grepl("\\.csv$", result_info[["key"]])) {
128 |     output <- athena_read(
129 |       athena_option_env[["file_parser"]],
130 |       File,
131 |       result_class,
132 |       res@connection
133 |     )
134 |   } else {
135 |     output <- athena_read_lines(
136 |       athena_option_env[["file_parser"]],
137 |       File,
138 |       result_class,
139 |       res@connection
140 |     )
141 |   }
142 |   return(output)
143 | }
144 | 


--------------------------------------------------------------------------------
/tests/testthat/helper.R:
--------------------------------------------------------------------------------
  1 | # helper function to skip test if noctua unit test environment variables not set
  2 | skip_if_no_env <- function(){
  3 |   have_arn <- Sys.getenv("noctua_arn") != "" 
  4 |   have_query <- is.s3_uri(Sys.getenv("noctua_s3_query"))
  5 |   have_tbl <- is.s3_uri(Sys.getenv("noctua_s3_tbl"))
  6 |   if (!all(have_arn, have_query, have_tbl))
  7 |     skip("Environment variables are not set for testing")
  8 | }
  9 | 
 10 | # helper function to skip tests if we don't have the suggested package
 11 | skip_if_package_not_avialable <- function(pkg) {
 12 |   if (!nzchar(system.file(package = pkg)))
 13 |     skip(sprintf("`%s` not available for testing", pkg))
 14 | }
 15 | 
 16 | # expected athena ddl's
 17 | # expected athena ddl's
 18 | tbl_ddl <- 
 19 |   list(tbl1 = 
 20 |          DBI::SQL(paste0("CREATE EXTERNAL TABLE `AwsDataCatalog`.`default`.`test_df` (
 21 |   `x` INT,
 22 |   `y` STRING
 23 | )
 24 | ROW FORMAT DELIMITED
 25 | 	FIELDS TERMINATED BY ','
 26 | 	LINES TERMINATED BY ", gsub("_","","'\\_n'"),
 27 |                   "\nLOCATION '",Sys.getenv("noctua_s3_tbl"),"test_df/default/test_df/'
 28 | TBLPROPERTIES (\"skip.header.line.count\"=\"1\");")),
 29 | tbl2 = 
 30 |   DBI::SQL(paste0("CREATE EXTERNAL TABLE `AwsDataCatalog`.`default`.`test_df` (
 31 |   `x` INT,
 32 |   `y` STRING
 33 | )
 34 | ROW FORMAT DELIMITED
 35 | 	FIELDS TERMINATED BY ','
 36 | 	LINES TERMINATED BY ", gsub("_","","'\\_n'"),
 37 |            "\nLOCATION '",Sys.getenv("noctua_s3_tbl"),"test_df/default/test_df/'
 38 | TBLPROPERTIES (\"skip.header.line.count\"=\"1\",
 39 | \t\t'compressionType'='gzip');")),
 40 | tbl3 = 
 41 |   DBI::SQL(paste0("CREATE EXTERNAL TABLE `AwsDataCatalog`.`default`.`test_df` (
 42 |   `x` INT,
 43 |   `y` STRING
 44 | )
 45 | ROW FORMAT DELIMITED
 46 | \tFIELDS TERMINATED BY '	'
 47 | \tLINES TERMINATED BY ", gsub("_","","'\\_n'"),"
 48 | LOCATION '",Sys.getenv("noctua_s3_tbl"),"test_df/default/test_df/'
 49 | TBLPROPERTIES (\"skip.header.line.count\"=\"1\");")),
 50 | tbl4 = 
 51 |   DBI::SQL(paste0("CREATE EXTERNAL TABLE `AwsDataCatalog`.`default`.`test_df` (
 52 |   `x` INT,
 53 |   `y` STRING
 54 | )
 55 | ROW FORMAT DELIMITED
 56 | \tFIELDS TERMINATED BY '	'
 57 | \tLINES TERMINATED BY ", gsub("_","","'\\_n'"),"
 58 | LOCATION '",Sys.getenv("noctua_s3_tbl"),"test_df/default/test_df/'
 59 | TBLPROPERTIES (\"skip.header.line.count\"=\"1\",
 60 | \t\t'compressionType'='gzip');")), 
 61 | tbl5 = 
 62 |   DBI::SQL(paste0("CREATE EXTERNAL TABLE `AwsDataCatalog`.`default`.`test_df` (
 63 |   `x` INT,
 64 |   `y` STRING
 65 | )
 66 | STORED AS PARQUET
 67 | LOCATION '",Sys.getenv("noctua_s3_tbl"),"test_df/default/test_df/'\n;")),
 68 | tbl6 = 
 69 |   DBI::SQL(paste0("CREATE EXTERNAL TABLE `AwsDataCatalog`.`default`.`test_df` (
 70 |   `x` INT,
 71 |   `y` STRING
 72 | )
 73 | PARTITIONED BY (`timestamp` STRING)
 74 | STORED AS PARQUET
 75 | LOCATION '",Sys.getenv("noctua_s3_tbl"),"test_df/default/test_df/'
 76 | tblproperties (\"parquet.compress\"=\"SNAPPY\");")),
 77 | tbl7 = 
 78 |   DBI::SQL(paste0("CREATE EXTERNAL TABLE `AwsDataCatalog`.`default`.`test_df` (
 79 |   `x` INT,
 80 |   `y` STRING
 81 | )
 82 | ROW FORMAT  serde 'org.apache.hive.hcatalog.data.JsonSerDe'
 83 | LOCATION '",Sys.getenv("noctua_s3_tbl"),"test_df/default/test_df/'\n")),
 84 | tbl8 = 
 85 |   DBI::SQL(paste0("CREATE EXTERNAL TABLE `AwsDataCatalog`.`default`.`test_df` (
 86 |   `x` INT,
 87 |   `y` STRING
 88 | )
 89 | PARTITIONED BY (`timestamp` STRING)
 90 | ROW FORMAT  serde 'org.apache.hive.hcatalog.data.JsonSerDe'
 91 | LOCATION '",Sys.getenv("noctua_s3_tbl"),"test_df/default/test_df/'\n")))
 92 | 
 93 | 
 94 | # static Athena Query Request Tests
 95 | athena_test_req1 <-
 96 |          list(OutputLocation = Sys.getenv("noctua_s3_query"),
 97 |               EncryptionConfiguration = list(EncryptionOption = "SSE_S3",
 98 |                                              KmsKey = "test_key"))
 99 | athena_test_req2 <-
100 |        list(OutputLocation = Sys.getenv("noctua_s3_query"),
101 |             EncryptionConfiguration = list(EncryptionOption = "SSE_S3"))
102 | athena_test_req3 <- list(OutputLocation = Sys.getenv("noctua_s3_query"))
103 | athena_test_req4 <- list(OutputLocation = Sys.getenv("noctua_s3_query"))
104 | 
105 | show_ddl <- DBI::SQL(paste0('CREATE EXTERNAL TABLE `default.test_df`(\n  `w` timestamp, \n  `x` int, \n  `y` string, \n  `z` boolean)\nPARTITIONED BY ( \n  `timestamp` string)\nROW FORMAT DELIMITED \n  FIELDS TERMINATED BY \'\\t\' \n  LINES TERMINATED BY \'\\n\' \nSTORED AS INPUTFORMAT \n  \'org.apache.hadoop.mapred.TextInputFormat\' \nOUTPUTFORMAT \n  \'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat\'\nLOCATION\n  \'' ,Sys.getenv("noctua_s3_tbl"), 'test_df/default/test_df\'\nTBLPROPERTIES (\n  \'skip.header.line.count\'=\'1\')'))
106 | 
107 | expected_stat_output = c(
108 |   "EngineExecutionTimeInMillis",
109 |   "DataScannedInBytes",
110 |   "DataManifestLocation",
111 |   "TotalExecutionTimeInMillis",
112 |   "QueryQueueTimeInMillis",
113 |   "QueryPlanningTimeInMillis",
114 |   "ServiceProcessingTimeInMillis"
115 | )
116 | 


--------------------------------------------------------------------------------
/man/db_compute.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/dplyr_integration.R
  3 | \name{db_compute}
  4 | \alias{db_compute}
  5 | \alias{db_compute.AthenaConnection}
  6 | \alias{sql_query_save.AthenaConnection}
  7 | \title{S3 implementation of \code{db_compute} for Athena}
  8 | \usage{
  9 | db_compute.AthenaConnection(
 10 |   con,
 11 |   table,
 12 |   sql,
 13 |   ...,
 14 |   overwrite = FALSE,
 15 |   temporary = FALSE,
 16 |   unique_indexes = list(),
 17 |   indexes = list(),
 18 |   analyze = TRUE,
 19 |   in_transaction = FALSE,
 20 |   partition = NULL,
 21 |   s3_location = NULL,
 22 |   file_type = c("csv", "tsv", "parquet"),
 23 |   compress = FALSE
 24 | )
 25 | 
 26 | sql_query_save.AthenaConnection(
 27 |   con,
 28 |   sql,
 29 |   name,
 30 |   temporary = TRUE,
 31 |   ...,
 32 |   partition = NULL,
 33 |   s3_location = NULL,
 34 |   file_type = NULL,
 35 |   compress = FALSE
 36 | )
 37 | }
 38 | \arguments{
 39 | \item{con}{A \link{dbConnect} object, as returned by \code{dbConnect()}}
 40 | 
 41 | \item{table}{Table name, if left default noctua will use the default from \code{dplyr}'s \code{compute} function.}
 42 | 
 43 | \item{sql}{SQL code to be sent to the data}
 44 | 
 45 | \item{...}{passes \code{noctua} table creation parameters: \code{file_type},\code{s3_location},\code{partition}.}
 46 | 
 47 | \item{overwrite}{Allows overwriting the destination table. Cannot be \code{TRUE} if \code{append} is also \code{TRUE}.}
 48 | 
 49 | \item{temporary}{if TRUE, will create a temporary table that is local to this connection and will be automatically deleted when the connection expires}
 50 | 
 51 | \item{unique_indexes}{a list of character vectors. Each element of the list will create a new unique index over the specified column(s). Duplicate rows will result in failure.}
 52 | 
 53 | \item{indexes}{a list of character vectors. Each element of the list will create a new index.}
 54 | 
 55 | \item{analyze}{if TRUE (the default), will automatically ANALYZE the new table so that the query optimiser has useful information.}
 56 | 
 57 | \item{in_transaction}{Should the table creation be wrapped in a transaction? This typically makes things faster, but you may want to suppress if the database doesn't support transactions, or you're wrapping in a transaction higher up (and your database doesn't support nested transactions.)}
 58 | 
 59 | \item{partition}{Partition Athena table (needs to be a named list or vector) for example: \code{c(var1 = "2019-20-13")}}
 60 | 
 61 | \item{s3_location}{s3 bucket to store Athena table, must be set as a s3 uri for example ("s3://mybucket/data/")}
 62 | 
 63 | \item{file_type}{What file type to store data.frame on s3, noctua currently supports \code{c("tsv", "csv", "parquet")}. Default delimited file type is "tsv", in previous versions
 64 | of \code{noctua (=< 1.4.0)} file type "csv" was used as default. The reason for the change is that columns containing \code{Array/JSON} format cannot be written to
 65 | Athena due to the separating value ",". This would cause issues with AWS Athena.
 66 | \strong{Note:} "parquet" format is supported by the \code{arrow} package and it will need to be installed to utilise the "parquet" format.}
 67 | 
 68 | \item{compress}{\code{FALSE | TRUE} To determine if to compress file.type. If file type is \code{c("csv", "tsv")} then "gzip" compression is used, for file type "parquet"
 69 | "snappy" compression is used.
 70 | \itemize{
 71 | \item{\code{file_type:} What file type to store data.frame on s3, noctua currently supports \code{c("NULL","csv", "parquet", "json")}.
 72 | \code{"NULL"} will let Athena set the file_type for you.}
 73 | \item{\code{s3_location:} s3 bucket to store Athena table, must be set as a s3 uri for example ("s3://mybucket/data/")}
 74 | \item{\code{partition:} Partition Athena table, requires to be a partitioned variable from previous table.}}}
 75 | 
 76 | \item{name}{Table name, if left default noctua will use the default from \code{dplyr}'s \code{compute} function.}
 77 | }
 78 | \value{
 79 | \code{db_compute} returns table name
 80 | }
 81 | \description{
 82 | This is a backend function for dplyr's \code{compute} function. Users won't be required to access and run this function.
 83 | }
 84 | \examples{
 85 | \dontrun{
 86 | # Note:
 87 | # - Require AWS Account to run below example.
 88 | # - Different connection methods can be used please see `noctua::dbConnect` documentation
 89 | 
 90 | library(DBI)
 91 | library(dplyr)
 92 | 
 93 | # Demo connection to Athena using profile name
 94 | con <- dbConnect(noctua::athena())
 95 | 
 96 | # Write data.frame to Athena table
 97 | copy_to(con, mtcars,
 98 |   s3_location = "s3://mybucket/data/"
 99 | )
100 | 
101 | # Write Athena table from tbl_sql
102 | athena_mtcars <- tbl(con, "mtcars")
103 | mtcars_filter <- athena_mtcars \%>\% filter(gear >= 4)
104 | 
105 | # create athena with unique table name
106 | mtcars_filer \%>\%
107 |   compute()
108 | 
109 | # create athena with specified name and s3 location
110 | mtcars_filer \%>\%
111 |   compute("mtcars_filer",
112 |     s3_location = "s3://mybucket/mtcars_filer/"
113 |   )
114 | 
115 | # Disconnect from Athena
116 | dbDisconnect(con)
117 | }
118 | }
119 | \seealso{
120 | \link{AthenaWriteTables}
121 | }
122 | 


--------------------------------------------------------------------------------
/tests/testthat/test-metadata.R:
--------------------------------------------------------------------------------
  1 | context("Athena Metadata")
  2 | 
  3 | # NOTE System variable format returned for Unit tests:
  4 | # Sys.getenv("noctua_arn"): "arn:aws:sts::123456789012:assumed-role/role_name/role_session_name"
  5 | # Sys.getenv("noctua_s3_query"): "s3://path/to/query/bucket/"
  6 | # Sys.getenv("noctua_s3_tbl"): "s3://path/to/bucket/"
  7 | 
  8 | df_col_info <- data.frame(
  9 |   field_name = c("w","x","y", "z", "timestamp"),
 10 |   type = c("timestamp", "integer", "varchar", "boolean", "varchar"),
 11 |   stringsAsFactors = F
 12 | )
 13 | con_info = c(
 14 |   "profile_name",
 15 |   "s3_staging",
 16 |   "db.catalog",
 17 |   "dbms.name",
 18 |   "work_group",
 19 |   "poll_interval",
 20 |   "encryption_option",
 21 |   "kms_key",
 22 |   "expiration",
 23 |   "keyboard_interrupt",
 24 |   "region_name",
 25 |   "paws",
 26 |   "noctua",
 27 |   "timezone",
 28 |   "endpoint_override"
 29 | )
 30 | col_info_exp = c("w","x","y", "z", "timestamp")
 31 | 
 32 | test_that("Returning meta data",{
 33 |   skip_if_no_env()
 34 |   # Test connection is using AWS CLI to set profile_name 
 35 |   con = dbConnect(athena())
 36 |   
 37 |   res1 = dbExecute(con, "select * from test_df")
 38 |   res2 = dbSendStatement(con, "select * from test_df")
 39 |   res_out2 = dbHasCompleted(res2)
 40 |   res_out1 = dbHasCompleted(res1)
 41 |   res_info = dbGetInfo(res1)
 42 |   res_stat = dbStatistics(res1)
 43 |   column_info1 = dbColumnInfo(res1)
 44 |   column_info2 = dbListFields(con, "test_df")
 45 |   con_info_exp = names(dbGetInfo(con))
 46 |   list_tbl1 = any(grepl("test_df", dbListTables(con, schema="default")))
 47 |   list_tbl2 = nrow(dbGetTables(con, schema="default")[TableName == "test_df"]) == 1
 48 |   list_tbl3 = nrow(dbGetTables(con)[Schema == "default" & TableName == "test_df"]) == 1
 49 |   list_tbl4 = any(grepl("test_df", dbListTables(con)))
 50 |   partition1 = grepl("timestamp", dbGetPartition(con, "test_df")[[1]])
 51 |   
 52 |   partition2 = names(dbGetPartition(con, "test_df", .format = T)) == "timestamp"
 53 |   noctua_options("vroom")
 54 |   partition3 = names(dbGetPartition(con, "test_df", .format = T)) == "timestamp"
 55 |   
 56 |   noctua_options()
 57 |   db_show_ddl = gsub(", \n  'transient_lastDdlTime'.*",")", dbShow(con, "test_df"))
 58 |   db_info = dbGetInfo(con)
 59 |   
 60 |   name1 <- db_detect(con, "table1")
 61 |   name2 <- db_detect(con, "mydatabase.table1")
 62 |   name3 <- db_detect(con, "mycatalog.mydatabase.table1")
 63 | 
 64 |   expect_equal(dbGetStatement(res2), "select * from test_df")
 65 |   
 66 |   dbClearResult(res1)
 67 |   dbDisconnect(con)
 68 |   
 69 |   expect_equal(column_info1, df_col_info)
 70 |   expect_equal(column_info2, col_info_exp)
 71 |   expect_equal(con_info[order(con_info)], con_info_exp[order(con_info_exp)])
 72 |   expect_true(list_tbl1)
 73 |   expect_true(list_tbl2)
 74 |   expect_true(list_tbl3)
 75 |   expect_true(list_tbl4)
 76 |   expect_true(partition1)
 77 |   expect_true(partition2)
 78 |   expect_true(partition3)
 79 |   expect_equal(db_show_ddl, show_ddl)
 80 |   expect_warning(noctua:::time_check(Sys.time() + 10))
 81 |   expect_error(noctua:::pkg_method("made_up", "made_up_pkg"))
 82 |   expect_false(noctua:::is.s3_uri(NULL))
 83 |   expect_true(is.list(db_info))
 84 |   expect_error(dbGetInfo(con))
 85 |   expect_true(res_out1)
 86 |   expect_true(inherits(res_out2, "logical"))
 87 |   expect_equal(
 88 |     sort(names(res_info)), 
 89 |     c("OutputLocation", "Query", "QueryExecutionId", "StateChangeReason", "StatementType",
 90 |       "Statistics", "Status", "UnloadDir", "WorkGroup"))
 91 |   expect_true(is.list(res_stat))
 92 |   expect_error(con_error_msg(res1, "dummy message"), "dummy message")
 93 |   expect_equal(name1, list("db.catalog" = "AwsDataCatalog", "dbms.name" = "default", "table" = "table1"))
 94 |   expect_equal(name2, list("db.catalog" = "AwsDataCatalog", "dbms.name" = "mydatabase", "table" = "table1"))
 95 |   expect_equal(name3, list("db.catalog" = "mycatalog", "dbms.name" = "mydatabase", "table" = "table1"))
 96 | })
 97 | 
 98 | test_that("test connection when timezone is NULL", {
 99 |   skip_if_no_env()
100 |   
101 |   con <- dbConnect(athena(), timezone = NULL)
102 |   
103 |   expect_equal(con@info$timezone, "UTC")
104 | })
105 | 
106 | test_that("test endpoints", {
107 |   skip_if_no_env()
108 |   
109 |   con1 = dbConnect(athena(), endpoint_override = "https://athena.eu-west-2.amazonaws.com/")
110 |   con2 = dbConnect(
111 |     athena(),
112 |     region_name = "us-east-2",
113 |     
114 |     # Change default endpoints:
115 |     # athena: "https://athena.us-east-2.amazonaws.com"
116 |     # s3: "https://s3.us-east-2.amazonaws.com"
117 |     # glue: "https://glue.us-east-2.amazonaws.com"
118 |     
119 |     endpoint_override = list(
120 |       athena = "https://athena-fips.us-east-2.amazonaws.com/",
121 |       s3 = "https://s3-fips.us-east-2.amazonaws.com/"
122 |     )
123 |   )
124 |   
125 |   expect_equal(as.character(con1@ptr$Athena$.internal$config$endpoint), "https://athena.eu-west-2.amazonaws.com/")
126 |   expect_equal(as.character(con2@ptr$Athena$.internal$config$endpoint), "https://athena-fips.us-east-2.amazonaws.com/")
127 |   expect_equal(as.character(con2@ptr$S3$.internal$config$endpoint), "https://s3-fips.us-east-2.amazonaws.com/")
128 | })
129 | 


--------------------------------------------------------------------------------
/man/db_copy_to.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/dplyr_integration.R
  3 | \name{db_copy_to}
  4 | \alias{db_copy_to}
  5 | \alias{db_copy_to.AthenaConnection}
  6 | \title{S3 implementation of \code{db_copy_to} for Athena}
  7 | \usage{
  8 | db_copy_to.AthenaConnection(
  9 |   con,
 10 |   table,
 11 |   values,
 12 |   ...,
 13 |   partition = NULL,
 14 |   s3_location = NULL,
 15 |   file_type = c("csv", "tsv", "parquet"),
 16 |   compress = FALSE,
 17 |   max_batch = Inf,
 18 |   overwrite = FALSE,
 19 |   append = FALSE,
 20 |   types = NULL,
 21 |   temporary = TRUE,
 22 |   unique_indexes = NULL,
 23 |   indexes = NULL,
 24 |   analyze = TRUE,
 25 |   in_transaction = FALSE
 26 | )
 27 | }
 28 | \arguments{
 29 | \item{con}{A \link{dbConnect} object, as returned by \code{dbConnect()}}
 30 | 
 31 | \item{table}{A character string specifying a table name. Names will be
 32 | automatically quoted so you can use any sequence of characters, not
 33 | just any valid bare table name.}
 34 | 
 35 | \item{values}{A data.frame to write to the database.}
 36 | 
 37 | \item{...}{other parameters currently not supported in noctua}
 38 | 
 39 | \item{partition}{Partition Athena table (needs to be a named list or vector) for example: \code{c(var1 = "2019-20-13")}}
 40 | 
 41 | \item{s3_location}{s3 bucket to store Athena table, must be set as a s3 uri for example ("s3://mybucket/data/")}
 42 | 
 43 | \item{file_type}{What file type to store data.frame on s3, noctua currently supports \code{c("tsv", "csv", "parquet")}. Default delimited file type is "tsv", in previous versions
 44 | of \code{noctua (=< 1.4.0)} file type "csv" was used as default. The reason for the change is that columns containing \code{Array/JSON} format cannot be written to
 45 | Athena due to the separating value ",". This would cause issues with AWS Athena.
 46 | \strong{Note:} "parquet" format is supported by the \code{arrow} package and it will need to be installed to utilise the "parquet" format.}
 47 | 
 48 | \item{compress}{\code{FALSE | TRUE} To determine if to compress file.type. If file type is \code{c("csv", "tsv")} then "gzip" compression is used, for file type "parquet"
 49 | "snappy" compression is used.}
 50 | 
 51 | \item{max_batch}{Split the data frame by max number of rows i.e. 100,000 so that multiple files can be uploaded into AWS S3. By default when compression
 52 | is set to \code{TRUE} and file.type is "csv" or "tsv" max.batch will split data.frame into 20 batches. This is to help the
 53 | performance of AWS Athena when working with files compressed in "gzip" format. \code{max.batch} will not split the data.frame
 54 | when loading file in parquet format. For more information please go to \href{https://github.com/DyfanJones/RAthena/issues/36}{link}}
 55 | 
 56 | \item{overwrite}{Allows overwriting the destination table. Cannot be \code{TRUE} if \code{append} is also \code{TRUE}.}
 57 | 
 58 | \item{append}{Allow appending to the destination table. Cannot be \code{TRUE} if \code{overwrite} is also \code{TRUE}. Existing Athena DDL file type will be retained
 59 | and used when uploading data to AWS Athena. If parameter \code{file.type} doesn't match AWS Athena DDL file type a warning message will be created
 60 | notifying user and \code{noctua} will use the file type for the Athena DDL.}
 61 | 
 62 | \item{types}{Additional field types used to override derived types.}
 63 | 
 64 | \item{temporary}{if TRUE, will create a temporary table that is local to this connection and will be automatically deleted when the connection expires}
 65 | 
 66 | \item{unique_indexes}{a list of character vectors. Each element of the list will create a new unique index over the specified column(s). Duplicate rows will result in failure.}
 67 | 
 68 | \item{indexes}{a list of character vectors. Each element of the list will create a new index.}
 69 | 
 70 | \item{analyze}{if TRUE (the default), will automatically ANALYZE the new table so that the query optimiser has useful information.}
 71 | 
 72 | \item{in_transaction}{Should the table creation be wrapped in a transaction? This typically makes things faster, but you may want to suppress if the database doesn't support transactions, or you're wrapping in a transaction higher up (and your database doesn't support nested transactions.)}
 73 | }
 74 | \value{
 75 | db_copy_to returns table name
 76 | }
 77 | \description{
 78 | This is an Athena method for dbplyr function \code{db_copy_to} to create an Athena table from a \code{data.frame}.
 79 | }
 80 | \examples{
 81 | \dontrun{
 82 | # Note:
 83 | # - Require AWS Account to run below example.
 84 | # - Different connection methods can be used please see `noctua::dbConnect` documnentation
 85 | 
 86 | library(DBI)
 87 | library(dplyr)
 88 | 
 89 | # Demo connection to Athena using profile name
 90 | con <- dbConnect(noctua::athena())
 91 | 
 92 | # List existing tables in Athena
 93 | dbListTables(con)
 94 | 
 95 | # Write data.frame to Athena table
 96 | copy_to(con, mtcars,
 97 |   s3_location = "s3://mybucket/data/"
 98 | )
 99 | 
100 | # Checking if uploaded table exists in Athena
101 | dbExistsTable(con, "mtcars")
102 | 
103 | # Write Athena table from tbl_sql
104 | athena_mtcars <- tbl(con, "mtcars")
105 | mtcars_filter <- athena_mtcars \%>\% filter(gear >= 4)
106 | 
107 | copy_to(con, mtcars_filter)
108 | 
109 | # Checking if uploaded table exists in Athena
110 | dbExistsTable(con, "mtcars_filter")
111 | 
112 | # Disconnect from Athena
113 | dbDisconnect(con)
114 | }
115 | }
116 | \seealso{
117 | \link{AthenaWriteTables}
118 | }
119 | 


--------------------------------------------------------------------------------
/tests/testthat/test-view.R:
--------------------------------------------------------------------------------
  1 | context("rstudio viewer")
  2 | 
  3 | # NOTE System variable format returned for Unit tests:
  4 | # Sys.getenv("noctua_arn"): "arn:aws:sts::123456789012:assumed-role/role_name/role_session_name"
  5 | # Sys.getenv("noctua_s3_query"): "s3://path/to/query/bucket/"
  6 | # Sys.getenv("noctua_s3_tbl"): "s3://path/to/bucket/"
  7 | 
  8 | test_that("Check if Athena list object types is formatted correctly",{
  9 |   skip_if_no_env()
 10 | 
 11 |   con <- dbConnect(athena())
 12 |   
 13 |   output <- noctua:::AthenaListObjectTypes.default(con)
 14 |   
 15 |   expect_equal(
 16 |     output, 
 17 |     list(
 18 |       catalog = list(
 19 |         contains = list(
 20 |           schema = list(
 21 |             contains = list(
 22 |               table = list(contains = "data"),
 23 |               view = list(contains = "data")
 24 |             )
 25 |           )
 26 |         )
 27 |       )
 28 |     )
 29 |   )
 30 | })
 31 | 
 32 | test_that("Check if Athena list object is formatted correctly",{
 33 |   skip_if_no_env()
 34 | 
 35 |   con <- dbConnect(athena())
 36 |   
 37 |   output1 <- noctua:::AthenaListObjects.AthenaConnection(con)
 38 |   output2 <- noctua:::AthenaListObjects.AthenaConnection(con, schema = "default")
 39 |   
 40 |   expect_true(inherits(output1, "data.frame"))
 41 |   expect_equal(names(output1), c("name", "type"))
 42 |   
 43 |   expect_true(inherits(output2, "data.frame"))
 44 |   expect_equal(names(output2), c("name", "type"))
 45 | })
 46 | 
 47 | test_that("Check computer host name output type",{
 48 |   skip_if_no_env()
 49 |   
 50 |   con <- dbConnect(athena())
 51 |   out <- noctua:::computeHostName(con)
 52 |   expect_true(is.character(out))
 53 | })
 54 | 
 55 | test_that("Check computer display name output type",{
 56 |   skip_if_no_env()
 57 |   
 58 |   con <- dbConnect(athena())
 59 |   
 60 |   out <- noctua:::computeDisplayName(con)
 61 |   expect_true(is.character(out))
 62 | })
 63 | 
 64 | test_that("Check if Athena list column formatting",{
 65 |   skip_if_no_env()
 66 | 
 67 |   con <- dbConnect(athena())
 68 |   
 69 |   output1 <- noctua:::AthenaListColumns.AthenaConnection(con, table = "iris", catalog = "AwsDataCatalog", schema = "default")
 70 |   
 71 |   dbDisconnect(con)
 72 |   
 73 |   expect_true(inherits(output1, "data.frame"))
 74 |   expect_equal(names(output1), c("name", "type"))
 75 |   
 76 |   expect_null(noctua:::AthenaListColumns.AthenaConnection(con, table = "iris", catalog = "AwsDataCatalog", schema = "default"))
 77 | })
 78 | 
 79 | test_that("Check if Athena list column formatting",{
 80 |   skip_if_no_env()
 81 |   
 82 |   con <- dbConnect(athena())
 83 |   
 84 |   output1 <- noctua:::AthenaTableTypes(con)
 85 |   output2 <- noctua:::AthenaTableTypes(con, schema = "default")
 86 |   output3 <- noctua:::AthenaTableTypes(con, schema = "default", name="iris")
 87 |   
 88 |   expect_true(inherits(output1, "character"))
 89 |   expect_true(inherits(output2, "character"))
 90 |   expect_true(inherits(output3, "character"))
 91 | })
 92 | 
 93 | test_that("Check if AthenaDatabase formatting is correct",{
 94 |   skip_if_no_env()
 95 |   
 96 |   con <- dbConnect(athena())
 97 |   
 98 |   output1 <- noctua:::AthenaDatabase(con, "AwsDataCatalog")
 99 | 
100 |   expect_true(inherits(output1, "character"))
101 | })
102 | 
103 | test_that("Check if AthenaDatabase formatting is correct",{
104 |   skip_if_no_env()
105 |   
106 |   con <- dbConnect(athena())
107 |   
108 |   output1 <- noctua:::AthenaPreviewObject.AthenaConnection(con, 10, table = "iris")
109 |   output2 <- noctua:::AthenaPreviewObject.AthenaConnection(con, 10, table = "iris", schema = "default")
110 |   
111 |   expect_true(inherits(output1, "data.frame"))
112 |   expect_true(inherits(output2, "data.frame"))
113 | })
114 | 
115 | test_that("Check if AthenaPreviewObject formatting is correct",{
116 |   skip_if_no_env()
117 |   
118 |   con <- dbConnect(athena())
119 |   
120 |   output1 <- noctua:::AthenaPreviewObject.AthenaConnection(con, 10, table = "iris")
121 |   output2 <- noctua:::AthenaPreviewObject.AthenaConnection(con, 10, table = "iris", schema = "default")
122 |   
123 |   expect_true(inherits(output1, "data.frame"))
124 |   expect_true(inherits(output2, "data.frame"))
125 | })
126 | 
127 | test_that("Check if AthenaConnectionIcon outputs correct path",{
128 |   skip_if_no_env()
129 |   
130 |   con <- dbConnect(athena())
131 |   
132 |   output1 <- noctua:::AthenaConnectionIcon(con)
133 |   
134 |   expect_true(file.exists(output1))
135 | })
136 | 
137 | test_that("Check if AthenaConnectionActions output format is correct",{
138 |   skip_if_no_env()
139 |   
140 |   con <- dbConnect(athena())
141 |   
142 |   output1 <- noctua:::AthenaConnectionActions(con)
143 |   
144 |   expect_true(is.list(output1))
145 | })
146 | 
147 | test_that("Check if on_connection_opened runs correctly",{
148 |   skip_if_no_env()
149 |   
150 |   con <- dbConnect(athena())
151 |   
152 |   output1 <- on_connection_opened(con)
153 |   
154 |   expect_null(output1)
155 | })
156 | 
157 | test_that("Check if on_connection_opened runs correctly",{
158 |   x1 <- list(TableType = "dummy",
159 |             Name = "dummy")
160 |   x2 <- list(Type = "dummy",
161 |              Name = "dummy")
162 |   x3 <- list(Name = "dummy")
163 |   
164 |   exp1 <- "dummy"
165 |   names(exp1) <- "dummy"
166 |   exp2 <- ""
167 |   names(exp2) <- "dummy"
168 |   
169 |   expect_equal(noctua:::TblMeta(x1), exp1)
170 |   expect_equal(noctua:::ColMeta(x2), exp1)
171 |   expect_equal(noctua:::TblMeta(x3), exp2)
172 |   expect_equal(noctua:::ColMeta(x3), exp2)
173 | })
174 | 


--------------------------------------------------------------------------------
/docs/bootstrap-toc.js:
--------------------------------------------------------------------------------
  1 | /*!
  2 |  * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/)
  3 |  * Copyright 2015 Aidan Feldman
  4 |  * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */
  5 | (function() {
  6 |   'use strict';
  7 | 
  8 |   window.Toc = {
  9 |     helpers: {
 10 |       // return all matching elements in the set, or their descendants
 11 |       findOrFilter: function($el, selector) {
 12 |         // http://danielnouri.org/notes/2011/03/14/a-jquery-find-that-also-finds-the-root-element/
 13 |         // http://stackoverflow.com/a/12731439/358804
 14 |         var $descendants = $el.find(selector);
 15 |         return $el.filter(selector).add($descendants).filter(':not([data-toc-skip])');
 16 |       },
 17 | 
 18 |       generateUniqueIdBase: function(el) {
 19 |         var text = $(el).text();
 20 |         var anchor = text.trim().toLowerCase().replace(/[^A-Za-z0-9]+/g, '-');
 21 |         return anchor || el.tagName.toLowerCase();
 22 |       },
 23 | 
 24 |       generateUniqueId: function(el) {
 25 |         var anchorBase = this.generateUniqueIdBase(el);
 26 |         for (var i = 0; ; i++) {
 27 |           var anchor = anchorBase;
 28 |           if (i > 0) {
 29 |             // add suffix
 30 |             anchor += '-' + i;
 31 |           }
 32 |           // check if ID already exists
 33 |           if (!document.getElementById(anchor)) {
 34 |             return anchor;
 35 |           }
 36 |         }
 37 |       },
 38 | 
 39 |       generateAnchor: function(el) {
 40 |         if (el.id) {
 41 |           return el.id;
 42 |         } else {
 43 |           var anchor = this.generateUniqueId(el);
 44 |           el.id = anchor;
 45 |           return anchor;
 46 |         }
 47 |       },
 48 | 
 49 |       createNavList: function() {
 50 |         return $('<ul class="nav"></ul>');
 51 |       },
 52 | 
 53 |       createChildNavList: function($parent) {
 54 |         var $childList = this.createNavList();
 55 |         $parent.append($childList);
 56 |         return $childList;
 57 |       },
 58 | 
 59 |       generateNavEl: function(anchor, text) {
 60 |         var $a = $('<a></a>');
 61 |         $a.attr('href', '#' + anchor);
 62 |         $a.text(text);
 63 |         var $li = $('<li></li>');
 64 |         $li.append($a);
 65 |         return $li;
 66 |       },
 67 | 
 68 |       generateNavItem: function(headingEl) {
 69 |         var anchor = this.generateAnchor(headingEl);
 70 |         var $heading = $(headingEl);
 71 |         var text = $heading.data('toc-text') || $heading.text();
 72 |         return this.generateNavEl(anchor, text);
 73 |       },
 74 | 
 75 |       // Find the first heading level (`<h1>`, then `<h2>`, etc.) that has more than one element. Defaults to 1 (for `<h1>`).
 76 |       getTopLevel: function($scope) {
 77 |         for (var i = 1; i <= 6; i++) {
 78 |           var $headings = this.findOrFilter($scope, 'h' + i);
 79 |           if ($headings.length > 1) {
 80 |             return i;
 81 |           }
 82 |         }
 83 | 
 84 |         return 1;
 85 |       },
 86 | 
 87 |       // returns the elements for the top level, and the next below it
 88 |       getHeadings: function($scope, topLevel) {
 89 |         var topSelector = 'h' + topLevel;
 90 | 
 91 |         var secondaryLevel = topLevel + 1;
 92 |         var secondarySelector = 'h' + secondaryLevel;
 93 | 
 94 |         return this.findOrFilter($scope, topSelector + ',' + secondarySelector);
 95 |       },
 96 | 
 97 |       getNavLevel: function(el) {
 98 |         return parseInt(el.tagName.charAt(1), 10);
 99 |       },
100 | 
101 |       populateNav: function($topContext, topLevel, $headings) {
102 |         var $context = $topContext;
103 |         var $prevNav;
104 | 
105 |         var helpers = this;
106 |         $headings.each(function(i, el) {
107 |           var $newNav = helpers.generateNavItem(el);
108 |           var navLevel = helpers.getNavLevel(el);
109 | 
110 |           // determine the proper $context
111 |           if (navLevel === topLevel) {
112 |             // use top level
113 |             $context = $topContext;
114 |           } else if ($prevNav && $context === $topContext) {
115 |             // create a new level of the tree and switch to it
116 |             $context = helpers.createChildNavList($prevNav);
117 |           } // else use the current $context
118 | 
119 |           $context.append($newNav);
120 | 
121 |           $prevNav = $newNav;
122 |         });
123 |       },
124 | 
125 |       parseOps: function(arg) {
126 |         var opts;
127 |         if (arg.jquery) {
128 |           opts = {
129 |             $nav: arg
130 |           };
131 |         } else {
132 |           opts = arg;
133 |         }
134 |         opts.$scope = opts.$scope || $(document.body);
135 |         return opts;
136 |       }
137 |     },
138 | 
139 |     // accepts a jQuery object, or an options object
140 |     init: function(opts) {
141 |       opts = this.helpers.parseOps(opts);
142 | 
143 |       // ensure that the data attribute is in place for styling
144 |       opts.$nav.attr('data-toggle', 'toc');
145 | 
146 |       var $topContext = this.helpers.createChildNavList(opts.$nav);
147 |       var topLevel = this.helpers.getTopLevel(opts.$scope);
148 |       var $headings = this.helpers.getHeadings(opts.$scope, topLevel);
149 |       this.helpers.populateNav($topContext, topLevel, $headings);
150 |     }
151 |   };
152 | 
153 |   $(function() {
154 |     $('nav[data-toggle="toc"]').each(function(i, el) {
155 |       var $nav = $(el);
156 |       Toc.init($nav);
157 |     });
158 |   });
159 | })();
160 | 


--------------------------------------------------------------------------------
/vignettes/getting_started.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Getting Started"
  3 | author: "Dyfan Jones"
  4 | output: rmarkdown::html_vignette
  5 | vignette: >
  6 |   %\VignetteIndexEntry{Getting Started}
  7 |   %\VignetteEngine{knitr::rmarkdown}
  8 |   %\usepackage[UTF-8]{inputenc}
  9 | ---
 10 | 
 11 | The `noctua` package aims to make it easier to work with data stored in [`AWS Athena`](https://aws.amazon.com/athena/). `noctua` package attempts to provide three levels of interacting with AWS Athena:
 12 | 
 13 | * Low - level API: This provides more finer tuning of `AWS Athena` backend utilising the AWS SDK [`paws`](https://github.com/paws-r/paws). This includes configuring [`AWS Athena Work Groups`](https://aws.amazon.com/about-aws/whats-new/2019/02/athena_workgroups/) to assuming different roles within `AWS` when connecting to `AWS Athena`.
 14 | * [DBI interface](https://dbi.r-dbi.org/): This is the primary goal of `noctua`, by providing a `DBI` interface to `AWS Athena`. Users are able to interact with `AWS Athena` utilising familiar functions and methods they have used for other Databases from R.
 15 | * [dplyr interface](https://dbplyr.tidyverse.org/): As `dplyr` is coming more popular, `noctua` aims to give `dplyr` a seamless interface into `AWS Athena`.
 16 | 
 17 | # Installing `noctua`:
 18 | 
 19 | As `noctua` utilising the R AWS SDK `paws` the installation of `noctua` is pretty straight forward:
 20 | 
 21 | ```r
 22 | # cran version
 23 | install.packages("noctua")
 24 | 
 25 | # Dev version
 26 | remotes::install_github("dyfanjones/noctua")
 27 | ```
 28 | 
 29 | ## Docker Example:
 30 | 
 31 | To help with users wishing to run `noctua` in a [docker](https://hub.docker.com/), a simple docker file has been created [here](https://github.com/DyfanJones/noctua/blob/master/docker/Dockerfile). To set up the docker please refer to [link](https://repost.aws/knowledge-center/codebuild-temporary-credentials-docker). For demo purposes we will use the [example docker](https://github.com/DyfanJones/noctua/blob/master/docker/Dockerfile) and run it locally:
 32 | 
 33 | ```console
 34 | # build docker image
 35 | docker build . -t noctua
 36 | 
 37 | # start container with aws credentials passed from local
 38 | docker run \
 39 |       -e AWS_ACCESS_KEY_ID="$(aws configure get aws_access_key_id)" \
 40 |       -e AWS_SECRET_ACCESS_KEY="$(aws configure get aws_secret_access_key)" \
 41 |       -e AWS_SESSION_TOKEN="$(aws configure get aws_session_token)" \
 42 |       -e AWS_DEFAULT_REGION="$(aws configure get region)" \
 43 |       -it noctua
 44 | ```
 45 | 
 46 | **NOTE:** `readr` isn't required for `noctua`, however it has been included in the docker file to improve performance when querying AWS Athena.
 47 | 
 48 | # Usage:
 49 | 
 50 | ## Low - Level API:
 51 | ```r
 52 | library(DBI)
 53 | library(noctua)
 54 | 
 55 | con <- dbConnect(athena())
 56 | 
 57 | # list all current work groups in AWS Athena
 58 | list_work_groups(con)
 59 | 
 60 | # Create a new work group
 61 | create_work_group(con, "demo_work_group", description = "This is a demo work group",
 62 |                   tags = tag_options(key= "demo_work_group", value = "demo_01"))
 63 | ```         
 64 | 
 65 | ## DBI:
 66 | ```r
 67 | library(DBI)
 68 | 
 69 | con <- dbConnect(noctua::athena())
 70 | 
 71 | # Get metadata 
 72 | dbGetInfo(con)
 73 | 
 74 | # $profile_name
 75 | # [1] "default"
 76 | # 
 77 | # $s3_staging
 78 | # [1] ######## NOTE: Please don't share your S3 bucket to the public
 79 | # 
 80 | # $dbms.name
 81 | # [1] "default"
 82 | # 
 83 | # $work_group
 84 | # [1] "primary"
 85 | # 
 86 | # $poll_interval
 87 | # NULL
 88 | # 
 89 | # $encryption_option
 90 | # NULL
 91 | # 
 92 | # $kms_key
 93 | # NULL
 94 | # 
 95 | # $expiration
 96 | # NULL
 97 | # 
 98 | # $region_name
 99 | # [1] "eu-west-1"
100 | # 
101 | # $paws
102 | # [1] "0.1.6"
103 | # 
104 | # $noctua
105 | # [1] "1.5.1"
106 | 
107 | # create table to AWS Athena
108 | dbWriteTable(con, "iris", iris)
109 | 
110 | dbGetQuery(con, "select * from iris limit 10")
111 | # Info: (Data scanned: 860 Bytes)
112 | #  sepal_length sepal_width petal_length petal_width species
113 | # 1:           5.1         3.5          1.4         0.2  setosa
114 | # 2:           4.9         3.0          1.4         0.2  setosa
115 | # 3:           4.7         3.2          1.3         0.2  setosa
116 | # 4:           4.6         3.1          1.5         0.2  setosa
117 | # 5:           5.0         3.6          1.4         0.2  setosa
118 | # 6:           5.4         3.9          1.7         0.4  setosa
119 | # 7:           4.6         3.4          1.4         0.3  setosa
120 | # 8:           5.0         3.4          1.5         0.2  setosa
121 | # 9:           4.4         2.9          1.4         0.2  setosa
122 | # 10:          4.9         3.1          1.5         0.1  setosa
123 | ```
124 | 
125 | ## dplyr:
126 | ```r
127 | library(dplyr)
128 | 
129 | athena_iris <- tbl(con, "iris")
130 | 
131 | athena_iris %>%
132 |   select(species, sepal_length, sepal_width) %>% 
133 |   head(10) %>%
134 |   collect()
135 | 
136 | # Info: (Data scanned: 860 Bytes)
137 | # # A tibble: 10 x 3
138 | # species  sepal_length sepal_width
139 | # <chr>           <dbl>       <dbl>
140 | # 1 setosa            5.1         3.5
141 | # 2 setosa            4.9         3  
142 | # 3 setosa            4.7         3.2
143 | # 4 setosa            4.6         3.1
144 | # 5 setosa            5           3.6
145 | # 6 setosa            5.4         3.9
146 | # 7 setosa            4.6         3.4
147 | # 8 setosa            5           3.4
148 | # 9 setosa            4.4         2.9
149 | # 10 setosa           4.9         3.1
150 | ```
151 | # Useful Links:
152 | 
153 | * [SQL](https://docs.aws.amazon.com/athena/latest/ug/functions-operators-reference-section.html)
154 | * [AWS Athena performance tips](https://aws.amazon.com/blogs/big-data/top-10-performance-tuning-tips-for-amazon-athena/)
155 | * [AWS Athena User Guide](https://docs.aws.amazon.com/athena/latest/ug/athena-ug.pdf)
156 | 


--------------------------------------------------------------------------------
/vignettes/convert_and_save_cost.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Convert and Save Cost"
  3 | author: "Dyfan Jones"
  4 | output: rmarkdown::html_vignette
  5 | vignette: >
  6 |   %\VignetteIndexEntry{Convert and Save Cost}
  7 |   %\VignetteEngine{knitr::rmarkdown}
  8 |   %\usepackage[UTF-8]{inputenc}
  9 | ---
 10 | 
 11 | # Pricing Details
 12 | 
 13 | >You are charged for the number of bytes scanned by Amazon Athena, rounded up to the nearest megabyte, with a 10MB minimum per query. There are no charges for Data Definition Language (DDL) statements like CREATE/ALTER/DROP TABLE, statements for managing partitions, or failed queries. Cancelled queries are charged based on the amount of data scanned.
 14 | >
 15 | >Compressing your data allows Athena to scan less data. Converting your data to columnar formats allows Athena to selectively read only required columns to process the data. Athena supports Apache ORC and Apache Parquet. Partitioning your data also allows Athena to restrict the amount of data scanned. This leads to cost savings and improved performance. You can see the amount of data scanned per query on the Athena console. [link](https://aws.amazon.com/athena/pricing/)
 16 | So it becomes more important to compress your data and convert it to the recommended file formats [Apache Parquet](https://parquet.apache.org/) or [Apache ORC](https://orc.apache.org/).
 17 | 
 18 | **DON'T WORRY!!! `noctua` is here to help!**
 19 | 
 20 | # `noctua`'s help
 21 | 
 22 | For a lot of users, [Apache Parquet](https://parquet.apache.org/) or [Apache ORC](https://orc.apache.org/) are file formats that aren't well known and as a result alto systems don't have the software to create these formats. `noctua` offers some assists by firstly enabling `apache parquet` format to be uploaded through [`dbWriteTable`](https://dyfanjones.github.io/noctua/reference/AthenaWriteTables.html), using the R package [`arrow`](https://arrow.apache.org/docs/r/) to create the parquet format.
 23 | 
 24 | If uploading Apache Parquet is not possible or if the file format Apache ORC is preferred then `noctua` offers another solution. `noctua` can utilise the power of AWS Athena to convert file formats for you. What this allows you to do is:
 25 | 
 26 | * Upload Data in an easier file format for example delimited format
 27 | * Convert Data into Parquet or ORC using AWS Athena to save cost
 28 | * Finally insert into final table with ETL processes
 29 | 
 30 | ## Upload Data in delimited format
 31 | 
 32 | Uploading Data in delimited format is the easiest method. 
 33 | 
 34 | ```r
 35 | library(DBI)
 36 | library(noctua)
 37 | 
 38 | con <- dbConnect(athena())
 39 | 
 40 | # create a temporary database to upload data into
 41 | res <- dbExecute(con, "CREATE IF NOT EXISTS DATABASE temp")
 42 | dbClearResult(res)
 43 | 
 44 | iris2 <- iris
 45 | iris2$time_stamp <- format(Sys.Date(), "%Y%m%d")
 46 | 
 47 | dbWriteTable(con, "temp.iris_delim", iris2)
 48 | ```
 49 | 
 50 | However delimited file format isn't the most cost effective when it comes to using AWS Athena. To overcome this we can convert this by using AWS Athena. 
 51 | 
 52 | ## Convert Data into Parquet or ORC
 53 | 
 54 | Converting table to a non-partitioned Parquet or ORC format.
 55 | 
 56 | ```r
 57 | # convert to parquet
 58 | dbConvertTable(con,
 59 |                obj = "temp.iris_delim",
 60 |                name = "iris_parquet",
 61 |                file.type = "parquet")
 62 |                
 63 | # convert to orc
 64 | dbConvertTable(con,
 65 |                obj = "temp.iris_delim",
 66 |                name = "iris_orc",
 67 |                file.type = "orc")
 68 | ```
 69 | 
 70 | **NOTE:** By default `dbConvertTable` compresses Parquet/ ORC format using `snappy` compression.
 71 | 
 72 | `noctua` goes a step further by allowing tables to be converted with partitions.
 73 | 
 74 | ```r
 75 | # convert to parquet with partition time_stamp
 76 | dbConvertTable(con,
 77 |                obj = "temp.iris_delim",
 78 |                name = "iris_parquet_partition",
 79 |                partition = "time_stamp",
 80 |                file.type = "parquet")
 81 | ```
 82 | 
 83 | `noctua` even allows SQL queries to be converted into desired file format:
 84 | 
 85 | ```r
 86 | dbConvertTable(con,
 87 |               obj = SQL("select 
 88 |                           Sepal_Length,
 89 |                           Sepal_Width,
 90 |                           date_format(current_date, '%Y%m%d') as time_stamp 
 91 |                         from temp.iris_delim"),
 92 |               name = "iris_orc_partition",
 93 |               partition = "time_stamp",
 94 |               file.type = "orc")
 95 | ```
 96 | 
 97 | ## Insert into table for ETL processes
 98 | 
 99 | As we have created partitioned data, we can easily insert into:
100 | 
101 | ```r
102 | res <- 
103 |   dbExecute(con, "insert into iris_orc_partition
104 |                   select 
105 |                     Sepal_Length,
106 |                     Sepal_Width, 
107 |                     date_format(date_add('date', 1, current_date) , '%Y%m%d') time_stamp 
108 |                   from temp.iris_delim")
109 | dbClearResult(res)
110 | ```
111 | What this all means is that you can create ETL processes by uploading data in basic file format (delimited), and then converting / inserting into the prefer file format. 
112 | 
113 | ## `dplyr` method
114 | 
115 | The good news doesn't stop there, `noctua` integrates with `dplyr` to allow converting to be done through `dplyr`.
116 | 
117 | ```r
118 | library(dplyr)
119 | 
120 | iris_tbl <- tbl(con, dbplyr::in_schema("temp", "iris_delim"))
121 | 
122 | r_date <- format(Sys.Date(), "%Y%m%d")
123 | 
124 | iris_tbl %>% 
125 |   select(petal_length,
126 |          petal_width) %>% 
127 |   mutate(time_stamp = r_date) %>%
128 |   compute("iris_dplyr_parquet", partition = "time_stamp", file_type = "parquet")
129 | ```
130 | 
131 | # Reading Material
132 | 
133 | * [CTAS insert into ETL](https://docs.aws.amazon.com/athena/latest/ug/ctas-insert-into-etl.html)
134 | * [Considerations and Limitations for CTAS Queries](https://docs.aws.amazon.com/athena/latest/ug/ctas-considerations-limitations.html)
135 | * [Examples of CTAS Queries](https://docs.aws.amazon.com/athena/latest/ug/ctas-examples.html)
136 | 


--------------------------------------------------------------------------------