├── .Rbuildignore
├── .gitignore
├── .travis.yml
├── CONTRIBUTING.md
├── CRAN-RELEASE
├── DESCRIPTION
├── LICENSE
├── NAMESPACE
├── NEWS.md
├── R
    ├── auth.R
    ├── bigQueryR.R
    ├── dataParseFunctions.R
    ├── datasets.R
    ├── downloadData.R
    ├── fastBqDownload.R
    ├── globals.R
    ├── jobs.R
    ├── listBigQuery.R
    ├── options.R
    ├── partition.R
    ├── print_methods.R
    ├── query.R
    ├── tables.R
    ├── uploadData.R
    └── utilities.R
├── README.md
├── cran-comments.md
├── drat.sh
├── inst
    ├── CITATION
    └── client.json
├── man
    ├── Table.Rd
    ├── bigQueryR.Rd
    ├── bqr_auth.Rd
    ├── bqr_copy_dataset.Rd
    ├── bqr_copy_table.Rd
    ├── bqr_create_table.Rd
    ├── bqr_delete_table.Rd
    ├── bqr_download_extract.Rd
    ├── bqr_download_query.Rd
    ├── bqr_extract_data.Rd
    ├── bqr_get_global_dataset.Rd
    ├── bqr_get_global_project.Rd
    ├── bqr_get_job.Rd
    ├── bqr_global_dataset.Rd
    ├── bqr_global_project.Rd
    ├── bqr_grant_extract_access.Rd
    ├── bqr_list_datasets.Rd
    ├── bqr_list_jobs.Rd
    ├── bqr_list_projects.Rd
    ├── bqr_list_tables.Rd
    ├── bqr_partition.Rd
    ├── bqr_patch_table.Rd
    ├── bqr_query.Rd
    ├── bqr_query_asynch.Rd
    ├── bqr_table_data.Rd
    ├── bqr_table_meta.Rd
    ├── bqr_upload_data.Rd
    ├── bqr_wait_for_job.Rd
    ├── parse_bqr_query.Rd
    └── schema_fields.Rd
├── tests
    ├── testthat.R
    └── testthat
    │   └── test_query.R
└── vignettes
    ├── bigQueryR.Rmd
    └── bigQueryR.html


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^\.httr-oauth$
 4 | cran-comments\.md
 5 | \.travis\.yml
 6 | test_signedurls\.RData
 7 | ^codecov\.yml$
 8 | ^CONTRIBUTING\.md$
 9 | ^Readme\.md$
10 | ^auth\.json$
11 | ^auth\.json\.enc$
12 | ^drat\.sh$
13 | ^revdep
14 | ^\.travis\.yml$
15 | ^docs
16 | _staticdocs\.yml
17 | _pkgdown\.yml
18 | ^auth
19 | ^secrets\.tar
20 | ^bq\.oauth$
21 | ^CRAN-RELEASE$
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rproj
3 | .Rhistory
4 | .RData
5 | inst/doc
6 | *.Rproj
7 | auth.json
8 | bq.oauth
9 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: R
 2 | sudo: false
 3 | cache: packages
 4 | r_packages:
 5 | - covr
 6 | - drat
 7 | after_success:
 8 | - Rscript -e 'library("covr");codecov()'
 9 | - test $TRAVIS_PULL_REQUEST == "false" && test $TRAVIS_BRANCH == "master" && bash
10 |   drat.sh
11 | env:
12 |   global:
13 |     secure: nw+dSgg7zzAGfn8RCgSR0lBE1UIRIQgnEMvFCvt6B/c9TWeNoVPApbSYzFMlWSdfztkvlhuQpdw7ueEiPLjyDb6bjwfaqaB5X3WaQoqMz+EoQw9bIoPx2FFCLNnY9n3eNtynIwr8Os7bCLTHyEH//jSSpLH82MnfvHpVSqpmiEXxNCv0sXVxSpjrnymr5mtMIaLN7iVu3nrYUQxPkWR+ShaOdySadUFEATKgzAecsuuEdnFICnHGZ35U5eDOrAVjO0nxlOruZA+kKclpQzzRfkPlJQyLioj1oqKxavg3YiDUqyb7RnsGI/QuXDNYzyfacwqA5e8c+3aiycS1wN4HPKo4zyDcaN6eWKkHncsGF83BaFIsExKjeYDUE6SiILa21LffBZ3vh0LIY4fbR5rA0NCeK4KR10YV994pQihP+g8cIWzDfz9mgtw8Izv0frKHxAjg0vrlGoSDu/XoX5e9TuiYU/n29Lh7wZJ7IkIkpMus8O78tLIW1UMUkz/+LdmucF8VA2Rg3Hn8md4eFXTly685hrxVjGs0tuFWgWytPn2Nyo94MUL1sPpKYuioN+ySiUrgV8+TNAdRKLWbpQU1W6rKTu/5RkO4Xv2cgZh4IhfiEgH2QDM1sFoywdvrVuXj5ZdXDvAMwR4iXLb8egeD/07FSiNuRjeESZ8SkpBJIQ4=
14 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | Contributions to **bigQueryR** are welcome from anyone and are best sent as pull requests on [the GitHub repository](https://github.com/cloudyr/bigQueryR/). This page provides some instructions to potential contributors about how to add to the package.
 2 | 
 3 |  1. Contributions can be submitted as [a pull request](https://help.github.com/articles/creating-a-pull-request/) on GitHub by forking or cloning the [repo](https://github.com/cloudyr/bigQueryR/), making changes and submitting the pull request.
 4 |  
 5 |  2. The cloudyr project follows [a consistent style guide](http://cloudyr.github.io/styleguide/index.html) across all of its packages. Please refer to this when editing package code.
 6 |  
 7 |  3. Pull requests should involve only one commit per substantive change. This means if you change multiple files (e.g., code and documentation), these changes should be committed together. If you don't know how to do this (e.g., you are making changes in the GitHub web interface) just submit anyway and the maintainer will clean things up.
 8 |  
 9 |  4. All contributions must be submitted consistent with the package license ([MIT](https://opensource.org/licenses/MIT)).
10 |  
11 |  5. Non-trivial contributions need to be noted in the `Authors@R` field in the [DESCRIPTION](https://github.com/cloudyr/bigQueryR/blob/master/DESCRIPTION). Just follow the format of the existing entries to add your name (and, optionally, email address). Substantial contributions should also be noted in [`inst/CITATION`](https://github.com/cloudyr/bigQueryR/blob/master/inst/CITATION).
12 |  
13 |  6. The cloudyr project use royxgen code and documentation markup, so changes should be made to roxygen comments in the source code `.R` files. If changes are made, roxygen needs to be run. The easiest way to do this is a command line call to: `Rscript -e devtools::document()`. Please resolve any roxygen errors before submitting a pull request.
14 |  
15 |  7. Please run `R CMD BUILD bigQueryR` and `R CMD CHECK bigQueryR_VERSION.tar.gz` before submitting the pull request to check for any errors.
16 |  
17 | Some specific types of changes that you might make are:
18 | 
19 |  1. Bug fixes. Great!
20 |  
21 |  2. Documentation-only changes (e.g., to Rd files, README, vignettes). This is great! All contributions are welcome.
22 |  
23 |  3. New functionality. This is fine, but should be discussed on [the GitHub issues page](https://github.com/cloudyr/bigQueryR/issues) before submitting a pull request.
24 |  
25 |  3. Changes requiring a new package dependency should also be discussed on [the GitHub issues page](https://github.com/cloudyr/bigQueryR/issues) before submitting a pull request.
26 |  
27 |  4. Message translations. These are very appreciated! The format is a pain, but if you're doing this I'm assuming you're already familiar with it.
28 | 
29 | Any questions you have can be opened as GitHub issues or directed to thosjleeper (at) gmail.com.
30 | 


--------------------------------------------------------------------------------
/CRAN-RELEASE:
--------------------------------------------------------------------------------
1 | This package was submitted to CRAN on 2019-10-04.
2 | Once it is accepted, delete this file and tag the release (commit c814f0b039).
3 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: bigQueryR
 2 | Title: Interface with Google BigQuery with Shiny Compatibility
 3 | Version: 0.5.0.9000
 4 | Authors@R: c(person("Mark", "Edmondson",email = "r@sunholo.com",
 5 |                   role = c("aut", "cre")),
 6 |              person("Hadley", "Wickham", , "hadley@rstudio.com", role = "ctb")
 7 |                   )
 8 | Description: Interface with 'Google BigQuery',
 9 |     see <https://cloud.google.com/bigquery/> for more information.
10 |     This package uses 'googleAuthR' so is compatible with similar packages, 
11 |     including 'Google Cloud Storage' (<https://cloud.google.com/storage/>) for result extracts. 
12 | URL: http://code.markedmondson.me/bigQueryR/
13 | BugReports: https://github.com/cloudyr/bigQueryR/issues
14 | License: MIT + file LICENSE
15 | LazyData: TRUE
16 | Depends:
17 |     R (>= 3.3)
18 | Imports:
19 |     googleAuthR (>= 1.1.1),
20 |     googleCloudStorageR (>= 0.2.0),
21 |     jsonlite (>= 1.0),
22 |     httr (>= 1.2.1),
23 |     assertthat
24 | Suggests:
25 |     shiny (>= 0.12.1),
26 |     knitr,
27 |     rmarkdown,
28 |     testthat,
29 |     data.table,
30 |     purrr
31 | RoxygenNote: 7.1.1
32 | VignetteBuilder: knitr
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2017
2 | COPYRIGHT HOLDER: Sunholo Ltd.
3 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(print,bqr_job)
 4 | export(Table)
 5 | export(bq_get_global_dataset)
 6 | export(bq_get_global_project)
 7 | export(bq_global_dataset)
 8 | export(bq_global_project)
 9 | export(bqr_auth)
10 | export(bqr_copy_dataset)
11 | export(bqr_copy_table)
12 | export(bqr_create_table)
13 | export(bqr_delete_table)
14 | export(bqr_download_extract)
15 | export(bqr_download_query)
16 | export(bqr_extract_data)
17 | export(bqr_get_global_dataset)
18 | export(bqr_get_global_project)
19 | export(bqr_get_job)
20 | export(bqr_global_dataset)
21 | export(bqr_global_project)
22 | export(bqr_grant_extract_access)
23 | export(bqr_list_datasets)
24 | export(bqr_list_jobs)
25 | export(bqr_list_projects)
26 | export(bqr_list_tables)
27 | export(bqr_partition)
28 | export(bqr_patch_table)
29 | export(bqr_query)
30 | export(bqr_query_asynch)
31 | export(bqr_table_data)
32 | export(bqr_table_meta)
33 | export(bqr_upload_data)
34 | export(bqr_wait_for_job)
35 | export(schema_fields)
36 | import(assertthat)
37 | import(googleCloudStorageR)
38 | importFrom(googleAuthR,gar_api_generator)
39 | importFrom(googleAuthR,gar_api_page)
40 | importFrom(googleAuthR,gar_auth)
41 | importFrom(googleAuthR,gar_auth_service)
42 | importFrom(stats,setNames)
43 | importFrom(tools,file_ext)
44 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | # bigQuery 0.5.0.9000
 2 | 
 3 | * Add missing numeric type for BigQuery schema parsing (#65)
 4 | * Align Google APIs URLs to Google Cloud Discovery docs. This enables support for Private and Restricted Google APIs configurations. (@husseyd, #81)
 5 |   - Substitute `https://bigquery.googleapis.com` for `https://www.googleapis.com`
 6 | 
 7 | # bigQueryR 0.5.0
 8 | 
 9 | * Support listing more than 50 datasets in `bqr_list_dataset`
10 | * Change `bqr_list_tables` to list all tables in a dataset by default
11 | * Add `bqr_copy_dataset`
12 | * Add `Table` and `bqr_update_table`
13 | * Support uploading nested lists via `toJSON`
14 | * Add writeDisposition to table loads
15 | * Allow creation of empty tables
16 | * Supporting supplying SQL via a file ending with `.sql` for `bqr_query()`
17 | * Update to new `googleAuthR>1.1.1`
18 | 
19 | # bigQueryR 0.4.0
20 | 
21 | * support `nullMarker`, `maxBadRecords`, `fieldDelimiter` in upload jobs
22 | * Support BigQuery type `DATE` for R class `Date` data.frame columns (BigQuery type `TIMESTAMP` still default for `POSIXct`columns) (#48)
23 | * Allow custom user schema for uploads of data.frames (#48)
24 | * Rename misnamed global functions from `bq_` prefix to `bqr_` prefix
25 | * Add `allowJaggedRows` and `allowQuotedNewlines` options to upload via `bqr_upload_data()`
26 | * `bqr_get_job` now accepts a job object as well as the jobId
27 | * Fix bug with `bqr_upload_data` where `autodetect=TRUE` didn't work with `gcs://` loads from Cloud Storage
28 | * Fix bug with `bqr_query()` that caused a 404 error sometimes. 
29 | 
30 | # bigQueryR 0.3.2
31 | 
32 | * Move to new batch endpoint (#41)
33 | 
34 | 
35 | # bigQueryR 0.3.1
36 | 
37 | * Fix asynch job fail if user previously `set.seed()` (#37)
38 | * skip tests on CRAN causing error
39 | * fix warning in scope check (#40)
40 | 
41 | # bigQueryR 0.3.0
42 | 
43 | * Add support for realtime queries, `useQueryCache = FALSE`
44 | * Add support for standard SQL (#21)
45 | * Add support for hms/timestamp class uploads (#27)
46 | * Add support for partitioned tables (#28)
47 | * Fix bug that only returned one row for single column queries (#31 - thanks Rob)
48 | * Allow loading of data from Google Cloud Storage to BigQuery for large files
49 | * no error if delete non-existent table (#26)
50 | * Add auto authentication if set environment var `BQ_AUTH_FILE` to location of auth file
51 | * Add default project if set environment var `BQ_DEFAULT_PROJECT_ID` to project-id
52 | * Add default dataset if set environment var `BQ_DEFAULT_DATASET` to dataset-id
53 | * Add auto paging through table lists in `bqr_list_tables()` (#29)
54 | * Make it clearer when jobs resulted in errors in the job print methods
55 | * Migrate to using `googleCloudStorageR` for Cloud Storage stuff
56 | * Set default authentication scope to `https://www.googleapis.com/auth/cloud-platform`
57 | * Unit tests
58 | * Upload table will now correctly report errors
59 | * More user feedback on BigQuery jobs when running
60 | * Allow upload of data.frames asynchrnously
61 | * Allow auto-detection of schema for uploads
62 | 
63 | # bigQueryR 0.2.0
64 | 
65 | * Download asynch queries straight to disk via googleCloudStorageR
66 | 
67 | # bigQueryR 0.1.0 
68 | 
69 | * Added a `NEWS.md` file to track changes to the package.
70 | * Initial release
71 | 


--------------------------------------------------------------------------------
/R/auth.R:
--------------------------------------------------------------------------------
  1 | # check authenticated with correct scopes
  2 | check_bq_auth <- function(){
  3 |   cloud_scopes <- c("https://www.googleapis.com/auth/cloud-platform",
  4 |                     "https://www.googleapis.com/auth/bigquery")
  5 |   
  6 |   if(!any(getOption("googleAuthR.scopes.selected") %in% cloud_scopes)){
  7 |     stop("Scopes not adequete for Google BigQuery.  Needs to be one of ", 
  8 |          paste(cloud_scopes, collapse = " "))
  9 |     googleAuthR::gar_token_info(2)
 10 | 
 11 |   }
 12 | }
 13 | 
 14 | # check authenticated with correct scopes
 15 | check_gcs_auth <- function(){
 16 |   cloud_scopes <- c("https://www.googleapis.com/auth/cloud-platform", 
 17 |                     "https://www.googleapis.com/auth/devstorage.full_control",
 18 |                     "https://www.googleapis.com/auth/devstorage.read_write")
 19 |   
 20 |   if(!any(getOption("googleAuthR.scopes.selected") %in% cloud_scopes)){
 21 |     stop("Not authenticated with Google Cloud Storage.  Needs to be one of ", 
 22 |          paste(cloud_scopes, collapse = " "))
 23 |     current_op <- getOption("googleAuthR.verbose")
 24 |     options(googleAuthR.verbose = 2)
 25 |     googleAuthR::gar_token_info()
 26 |     options(googleAuthR.verbose = current_op)
 27 |   }
 28 | }
 29 | 
 30 | #' Authenticate this session
 31 | #'
 32 | #' Autheticate manually via email or service JSON file
 33 | #' 
 34 | #' @param json_file Authentication json file you have downloaded from your Google Project
 35 | #' @param token A preexisting token to authenticate with
 36 | #' @param email A Google email to authenticate with
 37 | #'
 38 | #' If you have set the environment variable \code{BQ_AUTH_FILE} to a valid file location,
 39 | #'   the function will look there for authentication details.
 40 | #' Otherwise it will trigger an authentication flow via Google login screen in your browser based on the email you provide.
 41 | #'
 42 | #' If \code{BQ_AUTH_FILE} is specified, then authentication will be called upon loading the package
 43 | #'   via \code{library(bigQueryR)},
 44 | #'   meaning that calling this function yourself at the start of the session won't be necessary.
 45 | #'
 46 | #' \code{BQ_AUTH_FILE} is a GCP service account JSON ending with file extension \code{.json}
 47 | #'
 48 | #' @return Invisibly, the token that has been saved to the session
 49 | #' @importFrom googleAuthR gar_auth  gar_auth_service
 50 | #' @importFrom tools file_ext
 51 | #' @export
 52 | #' @examples 
 53 | #' 
 54 | #' \dontrun{
 55 | #' 
 56 | #' # to use default package credentials (for testing)
 57 | #' library(bigQueryR)
 58 | #' bqr_auth("location_of_json_file.json")
 59 | #' 
 60 | #' # or via email
 61 | #' bqr_auth(email="me@work.com")
 62 | #' 
 63 | #' # to use your own Google Cloud Project credentials
 64 | #' # go to GCP console and download client credentials JSON 
 65 | #' # ideally set this in .Renviron file, not here but just for demonstration
 66 | #' Sys.setenv("GAR_CLIENT_JSON" = "location/of/file.json")
 67 | #' library(bigQueryR)
 68 | #' # should now be able to log in via your own GCP project
 69 | #' bqr_auth()
 70 | #' 
 71 | #' # reauthentication
 72 | #' # Once you have authenticated, set email to skip the interactive message
 73 | #' bqr_auth(email = "my@email.com")
 74 | #' 
 75 | #' # or leave unset to bring up menu on which email to auth with
 76 | #' bqr_auth()
 77 | #' # The bigQueryR package is requesting access to your Google account. 
 78 | #' # Select a pre-authorised account or enter '0' to obtain a new token.
 79 | #' # Press Esc/Ctrl + C to abort.
 80 | #' #1: my@email.com
 81 | #' #2: work@mybusiness.com
 82 | 
 83 | #' # you can set authentication for many emails, then switch between them e.g.
 84 | #' bqr_auth(email = "my@email.com")
 85 | #' bqr_list_projects() # lists what GCP projects you have access to
 86 | #' bqr_auth(email = "work@mybusiness.com") 
 87 | #' bqr_list_projects() # lists second set of projects
 88 | #' 
 89 | #' 
 90 | #' 
 91 | #' }
 92 | bqr_auth <- function(json_file = NULL,
 93 |                      token = NULL, 
 94 |                      email = Sys.getenv("GARGLE_EMAIL")){
 95 |   
 96 |   set_scopes()
 97 |   
 98 |   if(is.null(json_file)){
 99 |     gar_auth(token = token,
100 |              email = email,
101 |              package = "bigQueryR")    
102 |   } else {
103 |     gar_auth_service(json_file = json_file)
104 |   }
105 | 
106 | }
107 | 
108 | set_scopes <- function(){
109 |   required_scopes <- c("https://www.googleapis.com/auth/bigquery",
110 |                        "https://www.googleapis.com/auth/cloud-platform")
111 |   
112 |   op <- getOption("googleAuthR.scopes.selected")
113 |   if(is.null(op)){
114 |     options(googleAuthR.scopes.selected = "https://www.googleapis.com/auth/bigquery")
115 |   } else if(!any(op %in% required_scopes)){
116 |     myMessage("Adding https://www.googleapis.com/auth/bigquery to scopes", level = 3)
117 |     options(googleAuthR.scopes.selected = c(op, "https://www.googleapis.com/auth/bigquery"))
118 |   }
119 | }


--------------------------------------------------------------------------------
/R/bigQueryR.R:
--------------------------------------------------------------------------------
 1 | #' bigQueryR
 2 | #' 
 3 | #' Provides an interface with Google BigQuery
 4 | #' 
 5 | #' @seealso \url{https://cloud.google.com/bigquery/docs/reference/v2/?hl=en}
 6 | #' 
 7 | #' @docType package
 8 | #' @name bigQueryR
 9 | NULL
10 | 
11 | 
12 | 


--------------------------------------------------------------------------------
/R/dataParseFunctions.R:
--------------------------------------------------------------------------------
 1 | #' Parse table data
 2 | #' 
 3 | #' @keywords internal
 4 | parse_bqr_query <- function(x){
 5 |   
 6 |   converter <- list(
 7 |     integer = as.integer,
 8 |     float = as.double,
 9 |     boolean = as.logical,
10 |     string = identity,
11 |     numeric = as.numeric,
12 |     timestamp = function(x) as.POSIXct(as.integer(x), origin = "1970-01-01", tz = "UTC"),
13 |     date = function(x) as.Date(x, format="%Y-%m-%d") #fix for #22 if using schema DATE
14 |   )
15 | 
16 |   schema <- x$schema$fields
17 |   ## ffs
18 |   data_f <- as.data.frame(matrix(unlist(unlist(x$rows)), 
19 |                                  ncol = length(schema$name),
20 |                                  byrow = TRUE), 
21 |                           stringsAsFactors = FALSE)
22 |   
23 |   types <- tolower(schema$type)
24 |   
25 |   converter_funcs <- converter[types]
26 | 
27 |   for(i in seq_along(converter_funcs)){
28 |     data_f[,i] <- converter_funcs[[i]](data_f[, i])
29 |   }
30 |   
31 |   names(data_f) <- schema$name
32 |   
33 |   out <- data_f
34 |   
35 |   out <- as.data.frame(out, stringsAsFactors = FALSE)
36 |   attr(out, "jobReference") <- x$jobReference
37 |   attr(out, "pageToken") <- x$pageToken
38 |   
39 |   out
40 |     
41 |     
42 | }
43 | 


--------------------------------------------------------------------------------
/R/datasets.R:
--------------------------------------------------------------------------------
  1 | #' List BigQuery datasets
  2 | #' 
  3 | #' Each projectId can have multiple datasets.
  4 | #' 
  5 | #' @param projectId The BigQuery project ID
  6 | #' 
  7 | #' @examples 
  8 | #' 
  9 | #' \dontrun{
 10 | #'   library(bigQueryR)
 11 | #'   
 12 | #'   ## this will open your browser
 13 | #'   ## Authenticate with an email that has access to the BigQuery project you need
 14 | #'   bqr_auth()
 15 | #'   
 16 | #'   ## verify under a new user
 17 | #'   bqr_auth(new_user=TRUE)
 18 | #'   
 19 | #'   ## get projects
 20 | #'   projects <- bqr_list_projects()
 21 | #'   
 22 | #'   my_project <- projects[1]
 23 | #'   
 24 | #'   ## for first project, get datasets
 25 | #'   datasets <- bqr_list_datasets[my_project]
 26 | #'   
 27 | #' }
 28 | #' 
 29 | #' @family bigQuery meta functions
 30 | #' @importFrom googleAuthR gar_api_generator gar_api_page
 31 | #' @export
 32 | bqr_list_datasets <- function(projectId = bqr_get_global_project()){
 33 |   
 34 |   check_bq_auth()
 35 |   l <- gar_api_generator("https://bigquery.googleapis.com/bigquery/v2",
 36 |                          "GET",
 37 |                          path_args = list(projects = projectId,
 38 |                                           datasets = ""),
 39 |                          pars_args = list(pageToken=""),
 40 |                          data_parse_function = parse_list_datasets)
 41 |   pages <- gar_api_page(l, 
 42 |                         page_f = get_attr_nextpagetoken,
 43 |                         page_method = "param",
 44 |                         page_arg = "pageToken")
 45 |   
 46 |   Reduce(rbind, pages)
 47 |   
 48 | }
 49 | 
 50 | #' @import assertthat
 51 | #' @noRd
 52 | parse_list_datasets <- function(x){
 53 |   
 54 |   assert_that(x$kind == "bigquery#datasetList")
 55 |   
 56 |   if(!is.null(x$datasets)) {
 57 |     d <- x$datasets
 58 |     o <- data.frame(datasetId = d$datasetReference$datasetId,
 59 |                     id = d$id,
 60 |                     projectId = d$datasetReference$projectId,
 61 |                     location = d$location,
 62 |                     stringsAsFactors = FALSE)
 63 |   } else {
 64 |     o <- data.frame()
 65 |   }
 66 |   attr(o, "nextPageToken") <- x$nextPageToken
 67 |   o
 68 | }
 69 | 
 70 | 
 71 | #' Copy datasets
 72 | #' 
 73 | #' Uses \link{bqr_copy_table} to copy all the tables in a dataset.  
 74 | #' 
 75 | #' @param source_datasetid source datasetId
 76 | #' @param destination_datasetid destination datasetId
 77 | #' @param source_projectid source table's projectId
 78 | #' @param destination_projectid destination table's projectId
 79 | #' @param createDisposition Create table's behaviour
 80 | #' @param writeDisposition Write to an existing table's behaviour
 81 | #' @param destination_prefix A prefix appended to the destination tableIds
 82 | #' 
 83 | #' @details 
 84 | #' 
 85 | #' You can not copy across dataset regions (e.g. EU to US), or copy BigQuery Views.
 86 | #' 
 87 | #' @export
 88 | #' @import assertthat
 89 | #' 
 90 | #' @return A named list of jobs of the source datasets, with details of job started. 
 91 | #' @examples 
 92 | #' 
 93 | #' \dontrun{
 94 | #' 
 95 | #'   bqr_copy_dataset("source_dataset", "destination_dataset")
 96 | #' 
 97 | #' }
 98 | bqr_copy_dataset <- function(source_datasetid,
 99 |                              destination_datasetid,
100 |                              source_projectid = bqr_get_global_project(),
101 |                              destination_projectid = bqr_get_global_project(),
102 |                              createDisposition = c("CREATE_IF_NEEDED","CREATE_NEVER"),
103 |                              writeDisposition = c("WRITE_TRUNCATE", "WRITE_APPEND", "WRITE_EMPTY"),
104 |                              destination_prefix = NULL){
105 |   
106 |   createDisposition <- match.arg(createDisposition)
107 |   writeDisposition <- match.arg(writeDisposition)
108 |   
109 |   assert_that(
110 |     is.string(source_projectid),
111 |     is.string(source_datasetid),
112 |     is.string(destination_projectid),
113 |     is.string(destination_datasetid)
114 |   )
115 |   
116 |   source_tables <- bqr_list_tables(source_projectid, 
117 |                                    datasetId = source_datasetid, 
118 |                                    maxResults = -1) 
119 |   
120 |   myMessage(sprintf("Found %s tables in %s.%s to copy to %s.%s", 
121 |                     nrow(source_tables), 
122 |                     source_projectid, source_datasetid, 
123 |                     destination_projectid, destination_datasetid), 
124 |             level = 3)
125 |   
126 |   mapply(bqr_copy_table, 
127 |          source_tableid = source_tables$tableId,
128 |          destination_tableid = paste0(destination_prefix, source_tables$tableId),
129 |          MoreArgs = list(
130 |            source_projectid = source_projectid,
131 |            source_datasetid = source_datasetid,
132 |            destination_projectid = destination_projectid,
133 |            destination_datasetid = destination_datasetid,
134 |            createDisposition = createDisposition,
135 |            writeDisposition = writeDisposition
136 |          ),
137 |          SIMPLIFY = FALSE)
138 | }


--------------------------------------------------------------------------------
/R/downloadData.R:
--------------------------------------------------------------------------------
  1 | #' Extract data asynchronously
  2 | #' 
  3 | #' Use this instead of \link{bqr_query} for big datasets. 
  4 | #' Requires you to make a bucket at https://console.cloud.google.com/storage/browser
  5 | #' 
  6 | #' @param projectId The BigQuery project ID.
  7 | #' @param datasetId A datasetId within projectId.
  8 | #' @param tableId ID of table you wish to extract.
  9 | #' @param cloudStorageBucket URI of the bucket to extract into.
 10 | #' @param filename Include a wildcard (*) if extract expected to be > 1GB.
 11 | #' @param compression Compression of file.
 12 | #' @param destinationFormat Format of file.
 13 | #' @param fieldDelimiter fieldDelimiter of file.
 14 | #' @param printHeader Whether to include header row.
 15 | #'  
 16 | #' @seealso \url{https://cloud.google.com/bigquery/exporting-data-from-bigquery}
 17 | #'
 18 | #'
 19 | #' @examples 
 20 | #' 
 21 | #' \dontrun{
 22 | #' library(bigQueryR)
 23 | #' 
 24 | #' ## Auth with a project that has at least BigQuery and Google Cloud Storage scope
 25 | #' bqr_auth()
 26 | #' 
 27 | #' ## make a big query
 28 | #' job <- bqr_query_asynch("your_project", 
 29 | #'                         "your_dataset",
 30 | #'                         "SELECT * FROM blah LIMIT 9999999", 
 31 | #'                         destinationTableId = "bigResultTable")
 32 | #'                         
 33 | #' ## poll the job to check its status
 34 | #' ## its done when job$status$state == "DONE"
 35 | #' bqr_get_job(job$jobReference$jobId, "your_project")
 36 | #' 
 37 | #' ##once done, the query results are in "bigResultTable"
 38 | #' ## extract that table to GoogleCloudStorage:
 39 | #' # Create a bucket at Google Cloud Storage at 
 40 | #' # https://console.cloud.google.com/storage/browser
 41 | #' 
 42 | #' job_extract <- bqr_extract_data("your_project",
 43 | #'                                 "your_dataset",
 44 | #'                                 "bigResultTable",
 45 | #'                                 "your_cloud_storage_bucket_name")
 46 | #'                                 
 47 | #' ## poll the extract job to check its status
 48 | #' ## its done when job$status$state == "DONE"
 49 | #' bqr_get_job(job_extract$jobReference$jobId, "your_project")
 50 | #' 
 51 | #' You should also see the extract in the Google Cloud Storage bucket
 52 | #' googleCloudStorageR::gcs_list_objects("your_cloud_storage_bucket_name")
 53 | #' 
 54 | #' ## to download via a URL and not logging in via Google Cloud Storage interface:
 55 | #' ## Use an email that is Google account enabled
 56 | #' ## Requires scopes:
 57 | #' ##  https://www.googleapis.com/auth/devstorage.full_control
 58 | #' ##  https://www.googleapis.com/auth/cloud-platform
 59 | #' 
 60 | #' download_url <- bqr_grant_extract_access(job_extract, "your@email.com")
 61 | #' 
 62 | #' ## download_url may be multiple if the data is > 1GB
 63 | #' 
 64 | #' }
 65 | #' 
 66 | #' @return A Job object to be queried via \link{bqr_get_job}
 67 | #'
 68 | #' @family BigQuery asynch query functions  
 69 | #' @export
 70 | bqr_extract_data <- function(projectId = bqr_get_global_project(), 
 71 |                              datasetId = bqr_get_global_dataset(), 
 72 |                              tableId,
 73 |                              cloudStorageBucket,
 74 |                              filename = paste0("big-query-extract-",
 75 |                                                gsub(" |:|-","", 
 76 |                                                     Sys.time()),"-*.csv"),
 77 |                              compression = c("NONE","GZIP"),
 78 |                              destinationFormat = c("CSV",
 79 |                                                    "NEWLINE_DELIMITED_JSON", 
 80 |                                                    "AVRO"),
 81 |                              fieldDelimiter = ",",
 82 |                              printHeader = TRUE){
 83 |   
 84 |   compression <- match.arg(compression)
 85 |   destinationFormat <- match.arg(destinationFormat)
 86 |   
 87 |   check_gcs_auth()
 88 |   
 89 |   stopifnot(inherits(projectId, "character"),
 90 |             inherits(datasetId, "character"),
 91 |             inherits(tableId, "character"),
 92 |             inherits(cloudStorageBucket, "character"),
 93 |             inherits(filename, "character"),
 94 |             inherits(fieldDelimiter, "character"),
 95 |             inherits(printHeader, "logical"))
 96 |   
 97 |   if(!grepl("^gs://",cloudStorageBucket)) 
 98 |     cloudStorageBucket <- paste0("gs://", cloudStorageBucket)
 99 |   
100 |   ## make job
101 |   job <- 
102 |     googleAuthR::gar_api_generator("https://bigquery.googleapis.com/bigquery/v2",
103 |                                    "POST",
104 |                                    path_args = list(projects = projectId,
105 |                                                     jobs = "")
106 |                                    )
107 | 
108 |   gsUri <- paste0(cloudStorageBucket, "/", filename)
109 |   
110 |   config <- list(
111 |     jobReference = list(
112 |       projectId = projectId
113 |       ##jobId = idempotency() ## uuid to stop duplicate exports - breaks if set.seed (#37)
114 |     ),
115 |     configuration = list(
116 |       extract = list(
117 |         sourceTable = list(
118 |           datasetId = datasetId,
119 |           projectId = projectId,
120 |           tableId = tableId
121 |         ),
122 |         destinationUris = list(
123 |           gsUri
124 |           ),
125 |         printHeader = printHeader,
126 |         fieldDelimiter = fieldDelimiter,
127 |         destinationFormat = destinationFormat,
128 |         compression = compression
129 |       )
130 |     )
131 |   )
132 |   
133 |   config <- rmNullObs(config)
134 |   
135 |   req <- job(path_arguments = list(projects = projectId),
136 |                the_body = config)
137 |   
138 |   if(req$status_code == 200){
139 |     myMessage("Extract request successful, use bqr_wait_for_job() to know when it is ready.", 
140 |               level=3)
141 |     out <- as.job(req$content)
142 |   } else {
143 |     stop("Error in extraction job")
144 |     # out <- FALSE
145 |   }
146 |   
147 |   out
148 | 
149 | }
150 | 
151 | #' Download extract data
152 | #' 
153 | #' After extracting data via \link{bqr_extract_data} download the 
154 | #'   extract from the Google Storage bucket.
155 | #'   
156 | #' If more than 1GB, will save multiple .csv files with prefix "N_" to filename.
157 | #' 
158 | #' @param extractJob An extract job from \link{bqr_extract_data}
159 | #' @param filename Where to save the csv file. If NULL then uses objectname.
160 | #' 
161 | #' @return TRUE if successfully downloaded
162 | #' @import googleCloudStorageR
163 | #' @family BigQuery asynch query functions 
164 | #' @export
165 | bqr_download_extract <- function(extractJob,
166 |                                  filename = NULL){
167 |   
168 |   if(extractJob$status$state != "DONE"){
169 |     stop("Job not done")
170 |   }
171 |   
172 |   check_gcs_auth()
173 |   
174 |   ## if multiple files, create the suffixs 000000000000, 000000000001, etc.
175 |   file_suffix <- make_suffix(extractJob$statistics$extract$destinationUriFileCounts)
176 |   
177 |   ## replace filename * with suffixes
178 |   uris <- gsub("\\*", "%s", extractJob$configuration$extract$destinationUris)
179 |   uris <- sprintf(uris, file_suffix)
180 |   
181 |   ## extract bucket names and object names
182 |   bucketnames <- gsub("gs://(.+)/(.+)$","\\1",uris)
183 |   objectnames <- gsub("gs://(.+)/(.+)$","\\2",uris)
184 |   
185 |   if(!is.null(filename)){
186 |     stopifnot(inherits(filename, "character"))
187 |   } else {
188 |     filename <- objectnames
189 |   }
190 |   
191 |   if(length(objectnames) > 1){
192 |     message("Multiple files to download.")
193 |     filename <- paste0(as.character(1:length(objectnames),"_",filename))
194 |   }
195 |   
196 |   dl <- function(f_name){
197 |     googleCloudStorageR::gcs_get_object(
198 |       bucket = bucketnames[[1]],
199 |       object_name = f_name,
200 |       saveToDisk = f_name
201 |     )
202 |   }
203 |   
204 |   lapply(filename, dl)
205 |   
206 | }
207 | 
208 | #' Grant access to an extract on Google Cloud Storage
209 | #' 
210 | #' To access the data created in \link{bqr_extract_data}.
211 | #' Requires the Google account email of the user. 
212 | #' 
213 | #' Uses \href{https://cloud.google.com/storage/docs/authentication#cookieauth}{cookie based auth}.
214 | #' 
215 | #' 
216 | #' @param extractJob An extract job from \link{bqr_extract_data}
217 | #' @param email email of the user to have access
218 | #' 
219 | #' @return URL(s) to download the extract that is accessible by email
220 | #' 
221 | #' @examples 
222 | #' 
223 | #' \dontrun{
224 | #' library(bigQueryR)
225 | #' 
226 | #' ## Auth with a project that has at least BigQuery and Google Cloud Storage scope
227 | #' bqr_auth()
228 | #' 
229 | #' ## make a big query
230 | #' job <- bqr_query_asynch("your_project", 
231 | #'                         "your_dataset",
232 | #'                         "SELECT * FROM blah LIMIT 9999999", 
233 | #'                         destinationTableId = "bigResultTable")
234 | #'                         
235 | #' ## poll the job to check its status
236 | #' ## its done when job$status$state == "DONE"
237 | #' bqr_get_job(job$jobReference$jobId, "your_project")
238 | #' 
239 | #' ##once done, the query results are in "bigResultTable"
240 | #' ## extract that table to GoogleCloudStorage:
241 | #' # Create a bucket at Google Cloud Storage at 
242 | #' # https://console.cloud.google.com/storage/browser
243 | #' 
244 | #' job_extract <- bqr_extract_data("your_project",
245 | #'                                 "your_dataset",
246 | #'                                 "bigResultTable",
247 | #'                                 "your_cloud_storage_bucket_name")
248 | #'                                 
249 | #' ## poll the extract job to check its status
250 | #' ## its done when job$status$state == "DONE"
251 | #' bqr_get_job(job_extract$jobReference$jobId, "your_project")
252 | #' 
253 | #' ## to download via a URL and not logging in via Google Cloud Storage interface:
254 | #' ## Use an email that is Google account enabled
255 | #' ## Requires scopes:
256 | #' ##  https://www.googleapis.com/auth/devstorage.full_control
257 | #' ##  https://www.googleapis.com/auth/cloud-platform
258 | #' ## set via options("bigQueryR.scopes") and reauthenticate if needed
259 | #' 
260 | #' download_url <- bqr_grant_extract_access(job_extract, "your@email.com")
261 | #' 
262 | #' ## download_url may be multiple if the data is > 1GB
263 | #' 
264 | #' }
265 | #' 
266 | #' @family BigQuery asynch query functions  
267 | #' @export
268 | bqr_grant_extract_access <- function(extractJob, email){
269 |   
270 |   check_gcs_auth()
271 |   
272 |   stopifnot(is.job(extractJob))
273 |   
274 |   if(extractJob$status$state != "DONE"){
275 |     stop("Job not done")
276 |   }
277 |   
278 |   stopifnot(inherits(email, "character"))
279 |   
280 |   ## if multiple files, create the suffixs 000000000000, 000000000001, etc.
281 |   file_suffix <- make_suffix(extractJob$statistics$extract$destinationUriFileCounts)
282 |   
283 |   ## replace filename * with suffixes
284 |   uris <- gsub("\\*", "%s", extractJob$configuration$extract$destinationUris)
285 |   uris <- sprintf(uris, file_suffix)
286 |   
287 |   ## extract bucket names and object names
288 |   bucketnames <- gsub("gs://(.+)/(.+)$","\\1",uris)
289 |   objectnames <- gsub("gs://(.+)/(.+)$","\\2",uris)
290 |   
291 |   ## Update access control list of objects to accept the email
292 |   
293 |   # helper function with prefilled params
294 |   updateAccess <- function(object){
295 |     googleCloudStorageR::gcs_update_object_acl(
296 |       object_name = object,
297 |       bucket = bucketnames[[1]],
298 |       entity = email,
299 |       entity_type = "user",
300 |       role = "READER"
301 |     )
302 |   }
303 |   
304 |   result <- vapply(objectnames, updateAccess, logical(1))
305 |   
306 |   ## the download URLs
307 |   downloadUri <- googleCloudStorageR::gcs_download_url(object_name = objectnames, 
308 |                                                        bucket = bucketnames)
309 |   
310 |   if(all(result)){
311 |     out <- downloadUri
312 |   } else {
313 |     warning("Problem setting access")
314 |     out <- NULL
315 |   }
316 |   
317 |   out
318 |   
319 | }
320 | 
321 | # Helper for filenames
322 | make_suffix <- function(destinationUriFileCount){
323 |   suff <- function(x) gsub(" ","0",sprintf("%12d", as.numeric(x)))
324 |   along <- 0:(as.numeric(destinationUriFileCount)-1)
325 | 
326 |   vapply(along, suff, "000000000000")
327 | }
328 | 
329 | 


--------------------------------------------------------------------------------
/R/fastBqDownload.R:
--------------------------------------------------------------------------------
  1 | #' Download data from BigQuery to local folder
  2 | #' 
  3 | #' Requires you to make a bucket at https://console.cloud.google.com/storage/browser
  4 | #' 
  5 | #' @param query The query you want to run.
  6 | #' @param target_folder Target folder on your local computer.
  7 | #' @param result_file_name Name of your downloaded file. 
  8 | #' @param refetch Boolean, whether you would like to refetch previously downloaded data.
  9 | #' @param useLegacySql Boolean, whether to use Legacy SQL. Default is FALSE.
 10 | #' @param clean_intermediate_results Boolean, whether to keep intermediate files on BigQuery and Google Cloud Storage.
 11 | #' @param global_project_name BigQuery project name (where you would like to save your file during download).
 12 | #' @param global_dataset_name BigQuery dataset name (where you would like to save your file during download).
 13 | #' @param global_bucket_name Google Cloud Storage bucket name (where you would like to save your file during download).
 14 | #' 
 15 | #' @examples
 16 | #' 
 17 | #' \dontrun{
 18 | #' library(bigQueryR)
 19 | #' 
 20 | #' ## Auth with a project that has at least BigQuery and Google Cloud Storage scope
 21 | #' bqr_auth()
 22 | #' 
 23 | #' # Create a bucket at Google Cloud Storage at 
 24 | #' # https://console.cloud.google.com/storage/browser
 25 | #' 
 26 | #' bqr_download_query(query = "select * from `your_project.your_dataset.your_table`")
 27 | #' 
 28 | #' }
 29 | #' 
 30 | #' @return a data.table.
 31 | #' 
 32 | #' @export 
 33 | bqr_download_query <- function(query = NULL,
 34 |                                target_folder = "data",
 35 |                                result_file_name = NULL,
 36 |                                refetch = FALSE,
 37 |                                useLegacySql = FALSE,
 38 |                                clean_intermediate_results = TRUE,
 39 |                                global_project_name = bqr_get_global_project(),
 40 |                                global_dataset_name = bqr_get_global_dataset(),
 41 |                                global_bucket_name = googleCloudStorageR::gcs_get_global_bucket()
 42 | ) {
 43 |     invisible(sapply(c("data.table", "purrr"), assertRequirement))
 44 | 
 45 |     if (is.null(result_file_name)) {
 46 |         result_file_name <- "fast_bq_download_result"
 47 |     } else {
 48 |         result_file_name <- gsub("(\\.csv$)|(\\.csv\\.gz$)", "", result_file_name)
 49 |     }
 50 | 
 51 |     full_result_path <- paste0(target_folder, "/", result_file_name, ".csv.gz")
 52 |     if (file.exists(full_result_path) & !refetch) {
 53 |         return(data.table::fread(paste("gunzip -c", full_result_path)))
 54 |     }
 55 | 
 56 |     setFastSqlDownloadOptions(global_project_name, global_dataset_name, global_bucket_name)
 57 | 
 58 |     gcp_result_name_raw <- paste0(result_file_name, "_", Sys.getenv("LOGNAME"), "_", Sys.time())
 59 |     gcp_result_name <- gsub("[^[:alnum:]]+", "_", gcp_result_name_raw)
 60 | 
 61 |     object_names <- saveQueryToStorage(query, gcp_result_name, useLegacySql)
 62 | 
 63 |     tryCatch(
 64 |         {
 65 |             output_dt <- readFromStorage(object_names, target_folder)
 66 |             unifyLocalChunks(output_dt, object_names, result_file_name, target_folder)
 67 |         },
 68 |         error = function(e) {
 69 |             message("\n\nError while saving from Storage to local. Running cleanup of Storage and BigQuery. See original error message below:\n\n")
 70 |             message(paste0(e, "\n\n"))
 71 |         },
 72 |         finally = {if (clean_intermediate_results == TRUE) {
 73 |                     cleanIntermediateResults(object_names, gcp_result_name, target_folder)
 74 |             }
 75 |         }
 76 |     )
 77 | 
 78 |     output_dt
 79 | }
 80 | 
 81 | 
 82 | setFastSqlDownloadOptions <- function(global_project_name, global_dataset_name, global_bucket_name) {
 83 |     options(googleAuthR.scopes.selected = "https://www.googleapis.com/auth/cloud-platform")
 84 | 
 85 |     bigQueryR::bqr_global_project(global_project_name)
 86 |     bigQueryR::bqr_global_dataset(global_dataset_name)
 87 |     googleCloudStorageR::gcs_global_bucket(global_bucket_name)
 88 | }
 89 | 
 90 | saveQueryToStorage <- function(query, result_name, useLegacySql){
 91 |     time <- Sys.time()
 92 |     message("Querying data and saving to BigQuery table")
 93 |     query_job <- bigQueryR::bqr_query_asynch(
 94 |         query = query,
 95 |         useLegacySql = useLegacySql,
 96 |         destinationTableId = result_name,
 97 |         writeDisposition = "WRITE_TRUNCATE"
 98 |     )
 99 | 
100 |     if (suppressMessages(bigQueryR::bqr_wait_for_job(query_job, wait = 2))$status$state == "DONE") {
101 |         time_elapsed <- difftime(Sys.time(), time)
102 |         message(paste("Querying job is finished, time elapsed:", format(time_elapsed,format = "%H:%M:%S")))
103 | 
104 |         time <- Sys.time()
105 |         message("Writing data to storage")
106 |         extract_job <- suppressMessages(bigQueryR::bqr_extract_data(
107 |                             tableId = result_name,
108 |                             cloudStorageBucket = googleCloudStorageR::gcs_get_global_bucket(),
109 |                             compression = "GZIP",
110 |                             filename = paste0(result_name, "_*.csv.gz")
111 |                 ))
112 |     }
113 | 
114 |     if (suppressMessages(bigQueryR::bqr_wait_for_job(extract_job, wait = 2))$status$state == "DONE") {
115 |         time_elapsed <- difftime(Sys.time(), time)
116 |         message(paste("Writing data to storage is finished, time elapsed:", format(time_elapsed,format = "%H:%M:%S")))
117 |         object_names <- grep(
118 |             result_name,
119 |             googleCloudStorageR::gcs_list_objects()$name,
120 |             value = TRUE
121 |         )
122 |     }
123 |     object_names
124 | }
125 | 
126 | readFromStorage <- function(object_names, target_folder) {
127 |     createFolder(target_folder)
128 |     chunk_dt_list <- purrr::map(object_names, ~ {
129 |         object <- .
130 |         googleCloudStorageR::gcs_get_object(
131 |             object_name = object,
132 |             saveToDisk = paste0(target_folder, "/", object),
133 |             overwrite = TRUE
134 |         )
135 |         data.table::fread(paste0("gunzip -c ", target_folder, "/", object))
136 |     })
137 |     data.table::rbindlist(chunk_dt_list)
138 | }
139 | 
140 | unifyLocalChunks <- function(output_dt, object_names, result_file_name, target_folder) {
141 |     if (length(object_names) > 1) {
142 |         data.table::fwrite(output_dt, paste0(target_folder, "/", result_file_name, ".csv"))
143 |         gzipDataAtPath(paste0(target_folder, "/", result_file_name, ".csv"))
144 |     } else{
145 |         file.rename(
146 |             paste0(target_folder, "/", object_names[[1]]),
147 |             paste0(target_folder, "/", result_file_name, ".csv.gz")
148 |         )
149 |     }
150 | }
151 | 
152 | cleanIntermediateResults <- function(object_names, table_id, target_folder) {
153 |     purrr::walk(
154 |         object_names,
155 |         ~ googleCloudStorageR::gcs_delete_object(object = .x)
156 |     )
157 |     bigQueryR::bqr_delete_table(tableId = table_id)
158 |     if (length(object_names) > 1) {
159 |         purrr::walk(paste0(target_folder, "/", object_names), file.remove)
160 |     }
161 |     message("The queried table on BigQuery and saved file(s) on GoogleCloudStorage have been cleaned up.
162 |         If you want to keep them, use clean_intermediate_results = TRUE.")
163 | }
164 | 
165 | createFolder <- function(target_folder) {
166 |     if (!dir.exists(target_folder)) {
167 |         dir.create(target_folder, recursive = TRUE)
168 |         message(paste0(target_folder, ' folder does not exist. Creating folder.'))
169 |     }
170 | }
171 | 
172 | gzipDataAtPath <- function(full_result_file_name) {
173 |     system(paste0("rm -f ", full_result_file_name, ".gz"))
174 |     system(paste0("gzip ", full_result_file_name))
175 | }
176 | 
177 | assertRequirement <- function(package_name) {
178 |     if (!requireNamespace(package_name, quietly = TRUE)) {
179 |        stop(paste0(package_name, " needed for this function to work. Please install it via install.packages('", package_name, "')"),
180 |             call. = FALSE)
181 |     }
182 | }
183 | 


--------------------------------------------------------------------------------
/R/globals.R:
--------------------------------------------------------------------------------
  1 | ## store project name
  2 | .bqr_env <- new.env(parent = emptyenv())
  3 | 
  4 | #' Set global project name
  5 | #'
  6 | #' Set a project name used for this R session
  7 | #'
  8 | #' @param project project name you want this session to use by default, or a project object
  9 | #'
 10 | #' @details
 11 | #'   This sets a project to a global environment value so you don't need to
 12 | #' supply the project argument to other API calls.
 13 | #'
 14 | #' @return The project name (invisibly)
 15 | #'
 16 | #' @family project functions
 17 | #' @import assertthat
 18 | #' @export
 19 | bqr_global_project <- function(project){
 20 | 
 21 |   assert_that(is.string(project))
 22 |   
 23 |   .bqr_env$project <- project
 24 |   message("Set default project to '", project,"'")
 25 |   return(invisible(.bqr_env$project))
 26 |   
 27 | }
 28 | 
 29 | #' @rdname bqr_global_project
 30 | #' @inheritParams bqr_global_project
 31 | #' @export
 32 | bq_global_project <- function(project){
 33 |   .Deprecated("bqr_global_project")
 34 |   bqr_global_project(project)
 35 | }
 36 | 
 37 | #' Get global project name
 38 | #'
 39 | #' project name set this session to use by default
 40 | #'
 41 | #' @return project name
 42 | #'
 43 | #' @details
 44 | #'   Set the project name via \link{bq_global_project}
 45 | #'
 46 | #' @family project functions
 47 | #' @export
 48 | bqr_get_global_project <- function(){
 49 | 
 50 |   if(!exists("project", envir = .bqr_env)){
 51 |     stop("Project is NULL and couldn't find global project ID name.
 52 |          Set it via bq_global_project")
 53 |   }
 54 |   
 55 |   .bqr_env$project
 56 |   
 57 | }
 58 | 
 59 | #' @rdname bqr_get_global_project
 60 | #' @export
 61 | bq_get_global_project <- function(){
 62 |   .Deprecated("bqr_get_global_project")
 63 |   bqr_get_global_project()
 64 | }
 65 | 
 66 | #' Set global dataset name
 67 | #'
 68 | #' Set a dataset name used for this R session
 69 | #'
 70 | #' @param dataset dataset name you want this session to use by default, or a dataset object
 71 | #'
 72 | #' @details
 73 | #'   This sets a dataset to a global environment value so you don't need to
 74 | #' supply the dataset argument to other API calls.
 75 | #'
 76 | #' @return The dataset name (invisibly)
 77 | #'
 78 | #' @family dataset functions
 79 | #' @export
 80 | #' @import assertthat
 81 | bqr_global_dataset <- function(dataset){
 82 | 
 83 |   assert_that(is.string(dataset))
 84 |   
 85 |   .bqr_env$dataset <- dataset
 86 |   message("Set default dataset to '", dataset,"'")
 87 |   return(invisible(.bqr_env$dataset))
 88 |   
 89 | }
 90 | 
 91 | #' @rdname bqr_global_dataset
 92 | #' @inheritParams bqr_global_dataset
 93 | #' @export
 94 | bq_global_dataset <- function(dataset){
 95 |   .Deprecated("bqr_global_dataset")
 96 |   bqr_global_dataset(dataset)
 97 | }
 98 | 
 99 | #' Get global dataset name
100 | #'
101 | #' dataset name set this session to use by default
102 | #'
103 | #' @return dataset name
104 | #'
105 | #' @details
106 | #'   Set the dataset name via \link{bq_global_dataset}
107 | #'
108 | #' @family dataset functions
109 | #' @export
110 | bqr_get_global_dataset <- function(){
111 | 
112 |   if(!exists("dataset", envir = .bqr_env)){
113 |     stop("dataset is NULL and couldn't find global dataset ID name.
114 |          Set it via bq_global_dataset")
115 |   }
116 |   
117 |   .bqr_env$dataset
118 |   
119 | }
120 | 
121 | #' @rdname bqr_get_global_dataset
122 | #' @export
123 | bq_get_global_dataset <- function(){
124 |   .Deprecated("bqr_get_global_dataset")
125 |   bqr_get_global_dataset()
126 | }


--------------------------------------------------------------------------------
/R/jobs.R:
--------------------------------------------------------------------------------
  1 | # As job
  2 | as.job <- function(x){
  3 |   stopifnot(x$kind == "bigquery#job")
  4 |   structure(x, class = c("bqr_job", class(x)))
  5 | }
  6 | 
  7 | # Is job
  8 | is.job <- function(x){
  9 |   inherits(x, "bqr_job")
 10 | }
 11 | 
 12 | # metadata only jobs
 13 | call_job <- function(projectId, config){
 14 |   l <- 
 15 |     googleAuthR::gar_api_generator("https://bigquery.googleapis.com/bigquery/v2",
 16 |                                    "POST",
 17 |                                    path_args = list(projects = projectId,
 18 |                                                     jobs = ""),
 19 |                                    data_parse_function = function(x) x
 20 |     )
 21 |   
 22 |   o <- l(the_body = config)
 23 |   as.job(o)
 24 | }
 25 | 
 26 | 
 27 | #' Wait for a bigQuery job
 28 | #' 
 29 | #' Wait for a bigQuery job to finish.
 30 | #' 
 31 | #' @param job A job object 
 32 | #' @param wait The number of seconds to wait between checks
 33 | #' 
 34 | #' Use this function to do a loop to check progress of a job running
 35 | #' 
 36 | #' @return After a while, a completed job
 37 | #' 
 38 | #' @family BigQuery asynch query functions  
 39 | #' @export
 40 | bqr_wait_for_job <- function(job, wait=5){
 41 |   
 42 |   stopifnot(is.job(job))
 43 |   
 44 |   status <- FALSE
 45 |   time <- Sys.time()
 46 |   
 47 |   while(!status){
 48 |     Sys.sleep(wait)
 49 |     myMessage("Waiting for job: ", job$jobReference$jobId, " - Job timer: ", format(difftime(Sys.time(), 
 50 |                                                                        time), 
 51 |                                                               format = "%H:%M:%S"), level = 3)
 52 |     
 53 |     job <- bqr_get_job(projectId = job$jobReference$projectId, 
 54 |                        jobId = job$jobReference$jobId,
 55 |                        location = job$jobReference$location)
 56 |     
 57 |     if(getOption("googleAuthR.verbose") <= 2){
 58 |       myMessage("job configuration:")
 59 |       print(job)
 60 |     }
 61 |     
 62 |     myMessage("Job status: ", job$status$state, level = 3)
 63 |     
 64 |     if(job$status$state == "DONE"){
 65 |       status <- TRUE 
 66 |     } else {
 67 |       status <- FALSE
 68 |     }
 69 |   }
 70 |   
 71 |   if(!is.null(job$status$errorResult)){
 72 |     myMessage("Job failed", level = 3)
 73 |     warning(job$status$errorResult$message)
 74 |     myMessage(job$status$errorResult$message, level = 3)
 75 |   }
 76 |   
 77 |   job
 78 | }
 79 | 
 80 | 
 81 | #' Poll a jobId
 82 | #' 
 83 | #' @param projectId projectId of job
 84 | #' @param jobId jobId to poll, or a job Object
 85 | #' @param location location where job is run. Required for single-region locations when jobId is not a job Object.
 86 | #' 
 87 | #' @return A Jobs resource
 88 | #' 
 89 | #' @examples 
 90 | #' 
 91 | #' \dontrun{
 92 | #' library(bigQueryR)
 93 | #' 
 94 | #' ## Auth with a project that has at least BigQuery and Google Cloud Storage scope
 95 | #' bqr_auth()
 96 | #' 
 97 | #' ## make a big query
 98 | #' job <- bqr_query_asynch("your_project", 
 99 | #'                         "your_dataset",
100 | #'                         "SELECT * FROM blah LIMIT 9999999", 
101 | #'                         destinationTableId = "bigResultTable")
102 | #'                         
103 | #' ## poll the job to check its status
104 | #' ## its done when job$status$state == "DONE"
105 | #' bqr_get_job(job$jobReference$jobId, "your_project")
106 | #' 
107 | #' ##once done, the query results are in "bigResultTable"
108 | #' ## extract that table to GoogleCloudStorage:
109 | #' # Create a bucket at Google Cloud Storage at 
110 | #' # https://console.cloud.google.com/storage/browser
111 | #' 
112 | #' job_extract <- bqr_extract_data("your_project",
113 | #'                                 "your_dataset",
114 | #'                                 "bigResultTable",
115 | #'                                 "your_cloud_storage_bucket_name")
116 | #'                                 
117 | #' ## poll the extract job to check its status
118 | #' ## its done when job$status$state == "DONE"
119 | #' bqr_get_job(job_extract$jobReference$jobId, "your_project")
120 | #' 
121 | #' ## to download via a URL and not logging in via Google Cloud Storage interface:
122 | #' ## Use an email that is Google account enabled
123 | #' ## Requires scopes:
124 | #' ##  https://www.googleapis.com/auth/devstorage.full_control
125 | #' ##  https://www.googleapis.com/auth/cloud-platform
126 | #' ## set via options("bigQueryR.scopes") and reauthenticate if needed
127 | #' 
128 | #' download_url <- bqr_grant_extract_access(job_extract, "your@email.com")
129 | #' 
130 | #' ## download_url may be multiple if the data is > 1GB
131 | #' 
132 | #' }
133 | #' 
134 | #' 
135 | #' 
136 | #' 
137 | #' @family BigQuery asynch query functions  
138 | #' @export
139 | bqr_get_job <- function(jobId = .Last.value,
140 |                         projectId = bqr_get_global_project(),
141 |                         location = NULL) {
142 |   check_bq_auth()
143 |   
144 |   if(is.job(jobId)){
145 |     jobId <- jobId$jobReference$jobId
146 |     location <- jobId$jobReference$location
147 |   }
148 |   stopifnot(inherits(projectId, "character"),
149 |             inherits(jobId, "character"))
150 | 
151 |   if (!is.null(location)) {
152 |     pars <- list(location = location)
153 |   } else {
154 |     pars <- NULL
155 |   }
156 |   
157 |   ## make job
158 |   job <- 
159 |     googleAuthR::gar_api_generator("https://bigquery.googleapis.com/bigquery/v2",
160 |                                    "GET",
161 |                                    path_args = list(projects = projectId,
162 |                                                     jobs = jobId),
163 |                                    pars_args = pars)
164 |   
165 |   req <- job(path_arguments = list(projects = projectId,
166 |                                    jobs = jobId),
167 |              pars_args = pars)
168 |   
169 |   as.job(req$content)
170 |   
171 | }
172 | 
173 | #' List BigQuery jobs
174 | #' 
175 | #' @description 
176 | #'   List the BigQuery jobs for the projectId
177 | #' 
178 | #' @details 
179 | #' Lists all jobs that you started in the specified project. 
180 | #' Job information is available for a six month period after creation. 
181 | #' The job list is sorted in reverse chronological order, by job creation time. 
182 | #' Requires the Can View project role, or the 
183 | #'   Is Owner project role if you set the allUsers property.
184 | #' 
185 | #' @param projectId projectId of job
186 | #' @param allUsers Whether to display jobs owned by all users in the project.
187 | #' @param projection "full" - all job data, "minimal" excludes job configuration.
188 | #' @param stateFilter Filter for job status.
189 | #' 
190 | #' @return A list of jobs resources
191 | #' @export
192 | bqr_list_jobs <- function(projectId = bqr_get_global_project(),
193 |                           allUsers = FALSE,
194 |                           projection = c("full","minimal"),
195 |                           stateFilter = c("done","pending","running")){
196 |   check_bq_auth()
197 |   stopifnot(inherits(projectId, "character"),
198 |             inherits(allUsers, "logical"))
199 |   
200 |   projection <- match.arg(projection)
201 |   stateFilter <- match.arg(stateFilter)
202 |   
203 |   pars <- list(allUsers = allUsers,
204 |                projection = projection,
205 |                stateFilter = stateFilter)
206 |   
207 |   options("googleAuthR.jsonlite.simplifyVector" = FALSE )
208 |   ## make job
209 |   job <- 
210 |     googleAuthR::gar_api_generator("https://bigquery.googleapis.com/bigquery/v2",
211 |                                    "GET",
212 |                                    path_args = list(projects = projectId,
213 |                                                     jobs = ""),
214 |                                    pars_args = pars)
215 |   req <- job(path_arguments = list(projects = projectId),
216 |              pars_argumenets = pars)
217 |   
218 |   out <- rmNullObs(req$content)
219 |   options("googleAuthR.jsonlite.simplifyVector" = TRUE )
220 |   
221 |   lapply(out$jobs, as.job)
222 |   
223 | }
224 | 


--------------------------------------------------------------------------------
/R/listBigQuery.R:
--------------------------------------------------------------------------------
 1 | #' List Google Dev Console projects you have access to
 2 | #' 
 3 | #' Example: bqr_list_projects()
 4 | #' 
 5 | #' @return A dataframe of the projects you have access to under the authentication
 6 | #' 
 7 | #' @examples 
 8 | #' 
 9 | #' \dontrun{
10 | #'   library(bigQueryR)
11 | #'   
12 | #'   ## this will open your browser
13 | #'   ## Authenticate with an email that has access to the BigQuery project you need
14 | #'   bqr_auth()
15 | #'   
16 | #'   ## verify under a new user
17 | #'   bqr_auth(new_user=TRUE)
18 | #'   
19 | #'   ## get projects
20 | #'   projects <- bqr_list_projects()
21 | #'   
22 | #' }
23 | #' 
24 | #' @family bigQuery meta functions
25 | #' @export
26 | bqr_list_projects <- function(){
27 |   check_bq_auth()
28 |   l <- googleAuthR::gar_api_generator("https://bigquery.googleapis.com/bigquery/v2/projects",
29 |                                       "GET",
30 |                                       data_parse_function = function(x) {
31 |                                         d <- x$projects
32 |                                         out <- data.frame(id = d$id,
33 |                                                           numericId = d$numericId,
34 |                                                           projectId = d$projectReference$projectId,
35 |                                                           friendlyName = d$friendlyName,
36 |                                                           stringsAsFactors = FALSE)
37 |                                         })
38 |   l()
39 |   
40 | }
41 | 
42 | 
43 | 


--------------------------------------------------------------------------------
/R/options.R:
--------------------------------------------------------------------------------
 1 | .onLoad <- function(libname, pkgname) {
 2 |   
 3 |   op <- options()
 4 |   op.bigQueryR <- list(
 5 |     ## default Google project
 6 |     googleAuthR.client_id = "68483650948-28g1na33slr3bt8rk7ikeog5ur19ldq6.apps.googleusercontent.com",
 7 |     googleAuthR.client_secret = "f0npd8zUhmqf8IqrIypBs6Cy ",
 8 |     googleAuthR.webapp.client_id = "68483650948-sufabj4nq9h1hjofp03hcjhk4af93080.apps.googleusercontent.com",
 9 |     googleAuthR.webapp.client_secret = "0tWYjliwXD32XhvDJHTl4NgN ",
10 |     googleAuthR.scopes.selected = c("https://www.googleapis.com/auth/cloud-platform"),
11 |     googleAuthR.batch_endpoint = "https://www.googleapis.com/batch/bigquery/v2"
12 |   )
13 |   
14 |   options(googleAuthR.httr_oauth_cache = "bq.oauth")
15 |   
16 |   toset <- !(names(op.bigQueryR) %in% names(op))
17 |   
18 |   if(any(toset)) options(op.bigQueryR[toset])
19 |   
20 |   invisible()
21 |   
22 | }
23 | 
24 | 
25 | .onAttach <- function(libname, pkgname){
26 |   
27 |   if(Sys.getenv("GAR_CLIENT_JSON") != ""){
28 |     googleAuthR::gar_set_client(json = Sys.getenv("GAR_CLIENT_JSON"))
29 |   }
30 |   
31 |   needed <- c("https://www.googleapis.com/auth/cloud-platform",
32 |               "https://www.googleapis.com/auth/bigquery")
33 |   
34 |   googleAuthR::gar_attach_auto_auth(needed,
35 |                                     environment_var = "BQ_AUTH_FILE")
36 |   
37 |   if(Sys.getenv("BQ_DEFAULT_PROJECT_ID") != ""){
38 |     .bqr_env$project <- Sys.getenv("BQ_DEFAULT_PROJECT_ID")
39 |     packageStartupMessage("Set default project to '", Sys.getenv("BQ_DEFAULT_PROJECT_ID"),"'")
40 |   }
41 |   
42 |   if(Sys.getenv("BQ_DEFAULT_DATASET") != ""){
43 |     .bqr_env$dataset <- Sys.getenv("BQ_DEFAULT_DATASET")
44 |     packageStartupMessage("Set default dataset to '", Sys.getenv("BQ_DEFAULT_DATASET"),"'")
45 |   }
46 |   
47 |   invisible()
48 |   
49 |   }
50 | 


--------------------------------------------------------------------------------
/R/partition.R:
--------------------------------------------------------------------------------
 1 | #' Convert date-sharded tables to a single partitioned table
 2 | #' 
 3 | #' Moves the old style date-sharded tables such as \code{[TABLE_NAME]_YYYYMMDD} to the new date partitioned format.
 4 | #' 
 5 | #' @param sharded The prefix of date-sharded tables to merge into one partitioned table
 6 | #' @param partition Name of partitioned table. Will create if not present already
 7 | #' @param projectId The project ID
 8 | #' @param datasetId The dataset ID
 9 | #' 
10 | #' @examples 
11 | #' 
12 | #' \dontrun{
13 | #'  
14 | #'  bqr_partition("ga_sessions_", "ga_partition")
15 | #' 
16 | #' }
17 | #' 
18 | #' @details 
19 | #' 
20 | #' Performs lots of copy table operations via \link{bqr_copy_table} 
21 | #'
22 | #' Before partitioned tables became available, BigQuery users would often divide 
23 | #'   large datasets into separate tables organized by time period; usually daily tables, 
24 | #'   where each table represented data loaded on that particular date.
25 | #'   
26 | #' Dividing a dataset into daily tables helped to reduce the amount of data scanned 
27 | #'   when querying a specific date range. For example, if you have a a year's worth of data 
28 | #'   in a single table, a query that involves the last seven days of data still requires 
29 | #'   a full scan of the entire table to determine which data to return. 
30 | #'   However, if your table is divided into daily tables, you can restrict the query to 
31 | #'   the seven most recent daily tables.
32 | #'   
33 | #' Daily tables, however, have several disadvantages. You must manually, or programmatically, 
34 | #'   create the daily tables. SQL queries are often more complex because your data can be 
35 | #'   spread across hundreds of tables. Performance degrades as the number of referenced 
36 | #'   tables increases. There is also a limit of 1,000 tables that can be referenced in a 
37 | #'   single query. Partitioned tables have none of these disadvantages.
38 | #' 
39 | #' @return A list of copy jobs for the sharded tables that will be copied to one partitioned table
40 | #' 
41 | #' @seealso \href{https://cloud.google.com/bigquery/docs/creating-partitioned-tables}{Partitioned Tables Help}
42 | #' @export
43 | #' @importFrom stats setNames
44 | bqr_partition <- function(sharded,
45 |                           partition,
46 |                           projectId = bqr_get_global_project(),
47 |                           datasetId = bqr_get_global_dataset()){
48 |   
49 |   ## check for shared tables
50 |   tables <- bqr_list_tables(projectId = projectId, datasetId = datasetId)
51 |   
52 |   shard_tables <- tables[grepl(paste0("^",sharded), tables$tableId),]
53 |   if(nrow(shard_tables) == 0){
54 |     stop("No sharded tables not found - is your tableID correct? Got ", sharded)
55 |   }
56 |   
57 |   ## check for partition table, creating if not there
58 |   part_table <- tables[grepl(paste0("^",partition,"$"), tables$tableId),"tableId"]
59 |   if(length(part_table) == 0){
60 |     myMessage("Creating Partition Table: ", partition, level = 3)
61 |     
62 |     shard_schema <- bqr_query(projectId = projectId, 
63 |                               datasetId = datasetId, 
64 |                               query = sprintf('SELECT * FROM %s LIMIT 1', shard_tables$tableId[[1]]))
65 |     
66 |     part_table <- bqr_create_table(projectId = projectId,
67 |                                    datasetId = datasetId,
68 |                                    tableId = partition,
69 |                                    template_data = shard_schema,
70 |                                    timePartitioning = TRUE)
71 |   }
72 |   ## extract shard dates
73 |   ex <- function(x) {
74 |     gsub(".+?([0-9]{8}$)","\\1",x)
75 |   }
76 |   
77 |   shard_dates <- vapply(shard_tables$tableId, ex, character(1), USE.NAMES = TRUE)
78 |   
79 |   ## query sharded tables, putting results in partition table
80 |   part_query <- function(sdn){
81 |     
82 |     myMessage("Partitioning ", sdn, level = 3)
83 |     
84 |     bqr_copy_table(source_projectid = projectId,
85 |                    source_datasetid = datasetId,
86 |                    source_tableid = sdn,
87 |                    destination_projectid = projectId,
88 |                    destination_datasetid = datasetId,
89 |                    destination_tableid = paste0(partition,"$",shard_dates[[sdn]]),
90 |                    writeDisposition = "WRITE_EMPTY")
91 |   }
92 |   
93 |   result <- lapply(names(shard_dates), part_query)
94 |   
95 |   setNames(result, names(shard_dates))
96 |   
97 | }
98 | 


--------------------------------------------------------------------------------
/R/print_methods.R:
--------------------------------------------------------------------------------
 1 | #' @export
 2 | print.bqr_job <- function(x, ...){
 3 |   cat("==Google BigQuery Job==\n")
 4 |   cat0("JobID:          ", x$jobReference$jobId)
 5 |   cat0("ProjectID:      ", x$jobReference$projectId)
 6 |   cat0("Status:         ", x$status$state)
 7 |   cat0("User:           ", x$user_email)
 8 |   cat0("Created:        ", as.character(js_to_posix(x$statistics$creationTime)))
 9 |   cat0("Start:          ", as.character(js_to_posix(x$statistics$startTime)))
10 |   cat0("End:            ", as.character(js_to_posix(x$statistics$endTime)))
11 |   cat("## View job configuration via job$configuration\n")
12 |   
13 |   cat0("## Job had error: \n", x$status$errorResult$message)
14 |   if(!is.null(x$status$errors)){
15 |     print(x$status$errors$message)
16 |   }
17 | 
18 |   
19 | }


--------------------------------------------------------------------------------
/R/query.R:
--------------------------------------------------------------------------------
  1 | #' Query a BigQuery Table
  2 | #' 
  3 | #' @param projectId The BigQuery project ID
  4 | #' @param datasetId A datasetId within projectId
  5 | #' @param query BigQuery SQL.  You can also supply a file location of your query ending with \code{.sql}
  6 | #' @param maxResults Max number per page of results. Set total rows with LIMIT in your query.
  7 | #' @param useLegacySql Whether the query you pass is legacy SQL or not. Default TRUE
  8 | #' @param useQueryCache Whether to use the query cache. Default TRUE, set to FALSE for realtime queries. 
  9 | #' 
 10 | #' @return a data.frame. 
 11 | #'   If there is an SQL error, a data.frame with 
 12 | #'   additional class "bigQueryR_query_error" and the 
 13 | #'   problem in the data.frame$message
 14 | #'   
 15 | #' @description 
 16 | #'   MaxResults is how many results to return per page of results, which can be less than the 
 17 | #' total results you have set in your  query using LIMIT.  Google recommends for bigger datasets
 18 | #' to set maxResults = 1000, but this will use more API calls.
 19 | #' 
 20 | #' @seealso \href{https://cloud.google.com/bigquery/sql-reference/}{BigQuery SQL reference}
 21 | #' 
 22 | #' @examples 
 23 | #' 
 24 | #' \dontrun{
 25 | #' 
 26 | #' bqr_query("big-query-r","samples",
 27 | #'           "SELECT COUNT(repository.url) FROM [publicdata:samples.github_nested]")
 28 | #' 
 29 | #' }
 30 | #' 
 31 | #' @family BigQuery query functions
 32 | #' @export
 33 | bqr_query <- function(projectId = bqr_get_global_project(), 
 34 |                       datasetId = bqr_get_global_dataset(), 
 35 |                       query, 
 36 |                       maxResults = 1000, 
 37 |                       useLegacySql = TRUE, 
 38 |                       useQueryCache = TRUE,
 39 |                       dryRun = FALSE,
 40 |                       timeoutMs = 600*1000){
 41 |   check_bq_auth()
 42 |   
 43 |   if(endsWith(query, ".sql")){
 44 |     query <- readChar(query, nchars = file.info(query)$size)
 45 |   }
 46 |   
 47 |   maxResults <- as.numeric(maxResults)
 48 |   if(maxResults > 100000) warning("bqr_query() is not suited to extract large amount of data from BigQuery. Consider using bqr_query_asynch() and bqr_extract_data() instead")
 49 |   
 50 |   body <- list(
 51 |     kind = "bigquery#queryRequest",
 52 |     query = query,
 53 |     maxResults = maxResults,
 54 |     useLegacySql = useLegacySql,
 55 |     useQueryCache = useQueryCache,
 56 |     defaultDataset = list(
 57 |       datasetId = datasetId,
 58 |       projectId = projectId
 59 |     ),
 60 |     timeoutMs = timeoutMs,
 61 |     dryRun = dryRun
 62 |   )
 63 |   
 64 |   body <- rmNullObs(body)
 65 |   
 66 |   # solve 404?
 67 |   the_url <- sprintf("https://bigquery.googleapis.com/bigquery/v2/projects/%s/queries", projectId)
 68 |   
 69 |   if(dryRun){
 70 |     q <- googleAuthR::gar_api_generator(the_url,
 71 |                                         "POST",
 72 |                                         checkTrailingSlash = FALSE)
 73 |     data <- try(q(the_body = body,
 74 |                   path_arguments = list(projects = projectId)))
 75 |     if(!is.error(data)){
 76 |       data <- data$content  
 77 |     }
 78 |     
 79 |   }else{
 80 |     q <- googleAuthR::gar_api_generator(the_url,
 81 |                                         "POST",
 82 |                                         data_parse_function = parse_bqr_query,
 83 |                                         checkTrailingSlash = FALSE)
 84 |     data <- try(q(the_body = body,
 85 |                   path_arguments = list(projects = projectId)))  
 86 |   }
 87 |   
 88 |   if(is.error(data)) {
 89 |     warning(error.message(data))
 90 |     data <- data.frame(error = "SQL Error", message = error.message(data))
 91 |     class(data) <- c(class(data), "bigQueryR_query_error")
 92 |   }
 93 |   
 94 |   pageToken <- attr(data, "pageToken")
 95 |   if(!is.null(pageToken)){
 96 |     message("Paging through query results")
 97 |     jobId <- attr(data, "jobReference")$jobId
 98 |     pr <- googleAuthR::gar_api_generator("https://bigquery.googleapis.com/bigquery/v2",
 99 |                                          "GET",
100 |                                          path_args = list(projects = projectId,
101 |                                                           queries = jobId),
102 |                                          pars_args = list(pageToken = pageToken), 
103 |                                          data_parse_function = parse_bqr_query)
104 |     i <- 1
105 |     while(!is.null(pageToken)){
106 |       message("Page #: ", i)
107 |       data_page <- pr(pars_arguments = list(pageToken = pageToken))
108 |       data <- rbind(data, data_page)
109 |       pageToken <- attr(data_page, "pageToken")
110 |       i <- i + 1
111 |     }
112 |     message("All data fetched.")
113 |     
114 |   }
115 |   
116 |   data
117 |   
118 | }
119 | 
120 | 
121 | #' BigQuery query asynchronously
122 | #' 
123 | #' Use for big results > 10000 that write to their own destinationTableId.
124 | #' 
125 | #' @param projectId projectId to be billed.
126 | #' @param datasetId datasetId of where query will execute.
127 | #' @param query The BigQuery query as a string.
128 | #' @param destinationTableId Id of table the results will be written to.
129 | #' @param writeDisposition Behaviour if destination table exists. See Details.
130 | #' @param useLegacySql Whether the query you pass is legacy SQL or not. Default TRUE
131 | #' 
132 | #' @details 
133 | #' 
134 | #' For bigger queries, asynchronous queries save the results to another BigQuery table.  
135 | #' You can check the progress of the job via \link{bqr_get_job}
136 | #' 
137 | #' You may now want to download this data.  
138 | #' For large datasets, this is best done via extracting the BigQuery result to Google Cloud Storage, 
139 | #' then downloading the data from there. 
140 | #' 
141 | #' You can read how to create a bucket at Google Cloud Storage 
142 | #' at \url{https://cloud.google.com/storage/docs/cloud-console}
143 | #' 
144 | #' writeDisposition - behaviour if destinationTable already exists: 
145 | #' \itemize{
146 | #'   \item WRITE_TRUNCATE: BigQuery overwrites the table data.
147 | #'   \item WRITE_APPEND: BigQuery appends the data to the table
148 | #'   \item WRITE_EMPTY: If contains data, a 'duplicate' error is returned
149 | #'  }
150 | #'
151 | #'     
152 | #' @return A Job object to be queried via \link{bqr_get_job}
153 | #' 
154 | #' @examples 
155 | #' 
156 | #' \dontrun{
157 | #' library(bigQueryR)
158 | #' 
159 | #' ## Auth with a project that has at least BigQuery and Google Cloud Storage scope
160 | #' bqr_auth()
161 | #' 
162 | #' ## make a big query
163 | #' job <- bqr_query_asynch("your_project", 
164 | #'                         "your_dataset",
165 | #'                         "SELECT * FROM blah LIMIT 9999999", 
166 | #'                         destinationTableId = "bigResultTable")
167 | #'                         
168 | #' ## poll the job to check its status
169 | #' ## its done when job$status$state == "DONE"
170 | #' bqr_get_job(job$jobReference$jobId, "your_project")
171 | #' 
172 | #' ##once done, the query results are in "bigResultTable"
173 | #' ## extract that table to GoogleCloudStorage:
174 | #' # Create a bucket at Google Cloud Storage at 
175 | #' # https://console.cloud.google.com/storage/browser
176 | #' 
177 | #' job_extract <- bqr_extract_data("your_project",
178 | #'                                 "your_dataset",
179 | #'                                 "bigResultTable",
180 | #'                                 "your_cloud_storage_bucket_name")
181 | #'                                 
182 | #' ## poll the extract job to check its status
183 | #' ## its done when job$status$state == "DONE"
184 | #' bqr_get_job(job_extract$jobReference$jobId, "your_project")
185 | #' 
186 | #' ## to download via a URL and not logging in via Google Cloud Storage interface:
187 | #' ## Use an email that is Google account enabled
188 | #' ## Requires scopes:
189 | #' ##  https://www.googleapis.com/auth/devstorage.full_control
190 | #' ##  https://www.googleapis.com/auth/cloud-platform
191 | #' ## set via options("bigQueryR.scopes") and reauthenticate if needed
192 | #' 
193 | #' download_url <- bqr_grant_extract_access(job_extract, "your@email.com")
194 | #' 
195 | #' ## download_url may be multiple if the data is > 1GB
196 | #' 
197 | #' }
198 | #' 
199 | #'
200 | #' @family BigQuery asynch query functions  
201 | #' @export
202 | bqr_query_asynch <- function(projectId = bqr_get_global_project(), 
203 |                              datasetId = bqr_get_global_dataset(), 
204 |                              query, 
205 |                              destinationTableId,
206 |                              useLegacySql = TRUE,
207 |                              writeDisposition = c("WRITE_EMPTY",
208 |                                                   "WRITE_TRUNCATE",
209 |                                                   "WRITE_APPEND")){
210 |   
211 |   writeDisposition <- match.arg(writeDisposition)
212 |   
213 |   check_bq_auth()
214 |   ## make job
215 |   job <- 
216 |     googleAuthR::gar_api_generator("https://bigquery.googleapis.com/bigquery/v2",
217 |                                    "POST",
218 |                                    path_args = list(projects = projectId,
219 |                                                     jobs = "")
220 |     )
221 |   
222 |   config <- list(
223 |     jobReference = list(
224 |       projectId = projectId
225 |      ## jobId = idempotency() ## uuid to stop duplicates - breaks if set.seed() (#37)
226 |     ),
227 |     configuration = list(
228 |       query = list(
229 |         allowLargeResults = TRUE,
230 |         defaultDataset = list(
231 |           datasetId = datasetId,
232 |           projectId = projectId
233 |         ),
234 |         destinationTable = list(
235 |           datasetId = datasetId,
236 |           projectId = projectId,
237 |           tableId = destinationTableId
238 |         ),
239 |         query = query,
240 |         useLegacySql = useLegacySql,
241 |         writeDisposition = writeDisposition
242 |       )
243 |     )
244 |   )
245 |   
246 |   config <- rmNullObs(config)
247 |   
248 |   req <- job(path_arguments = list(projects = projectId),
249 |              the_body = config)
250 |   
251 |   if(req$status_code == 200){
252 |     myMessage("Query request successful", level=2)
253 |     out <- req$content
254 |   } else {
255 |     stop("Error in query job")
256 |     # out <- FALSE
257 |   }
258 |   
259 |   as.job(out)
260 |   
261 | }
262 | 


--------------------------------------------------------------------------------
/R/tables.R:
--------------------------------------------------------------------------------
  1 | #' Copy BigQuery table
  2 | #' 
  3 | #' Copy a source table to another destination
  4 | #' 
  5 | #' @param source_projectid source table's projectId
  6 | #' @param source_datasetid source table's datasetId
  7 | #' @param source_tableid source table's tableId
  8 | #' @param destination_projectid destination table's projectId
  9 | #' @param destination_datasetid destination table's datasetId
 10 | #' @param destination_tableid destination table's tableId
 11 | #' @param createDisposition Create table's behaviour
 12 | #' @param writeDisposition Write to an existing table's behaviour
 13 | #' 
 14 | #' @return A job object
 15 | #' 
 16 | #' @export
 17 | #' @import assertthat
 18 | #' @family Table meta functions
 19 | bqr_copy_table <- function(source_tableid,
 20 |                            destination_tableid,
 21 |                            source_projectid = bqr_get_global_project(),
 22 |                            source_datasetid = bqr_get_global_dataset(),
 23 |                            destination_projectid = bqr_get_global_project(),
 24 |                            destination_datasetid = bqr_get_global_dataset(),
 25 |                            createDisposition = c("CREATE_IF_NEEDED","CREATE_NEVER"),
 26 |                            writeDisposition = c("WRITE_TRUNCATE", "WRITE_APPEND", "WRITE_EMPTY")){
 27 |   
 28 |   createDisposition <- match.arg(createDisposition)
 29 |   writeDisposition <- match.arg(writeDisposition)
 30 |   
 31 |   assert_that(
 32 |     is.string(source_projectid),
 33 |     is.string(source_datasetid),
 34 |     is.string(source_tableid),
 35 |     is.string(destination_projectid),
 36 |     is.string(destination_datasetid),
 37 |     is.string(destination_tableid)
 38 |   )
 39 |   
 40 |   config <- list(
 41 |     configuration = list(
 42 |       copy = list(
 43 |         createDisposition = createDisposition,
 44 |         sourceTable = list(
 45 |           projectId = source_projectid,
 46 |           datasetId = source_datasetid,
 47 |           tableId = source_tableid
 48 |         ),
 49 |         destinationTable = list(
 50 |           projectId = destination_projectid,
 51 |           datasetId = destination_datasetid,
 52 |           tableId = destination_tableid
 53 |         ),
 54 |         writeDisposition = writeDisposition
 55 |       )
 56 |     )
 57 |   )
 58 |   
 59 |   myMessage(sprintf("Copying table %s.%s.%s to %s.%s.%s", 
 60 |                     source_projectid, source_datasetid, source_tableid, 
 61 |                     destination_projectid,destination_datasetid, destination_tableid),
 62 |             level = 3)
 63 |   
 64 |   call_job(source_projectid, config = config)
 65 | }
 66 | 
 67 | 
 68 | 
 69 | #' List BigQuery tables in a dataset
 70 | #' 
 71 | #' @param projectId The BigQuery project ID
 72 | #' @param datasetId A datasetId within projectId
 73 | #' @param maxResults Number of results to return, default \code{-1} returns all results
 74 | #' 
 75 | #' @return dataframe of tables in dataset
 76 | #' 
 77 | #' @examples 
 78 | #' 
 79 | #' \dontrun{
 80 | #'  bqr_list_tables("publicdata", "samples")
 81 | #' }
 82 | #' 
 83 | #' @family Table meta functions
 84 | #' @import assertthat
 85 | #' @importFrom googleAuthR gar_api_generator gar_api_page
 86 | #' @export
 87 | bqr_list_tables <- function(projectId = bqr_get_global_project(), 
 88 |                             datasetId = bqr_get_global_dataset(),
 89 |                             maxResults = -1){
 90 |   
 91 |   assert_that(is.string(projectId),
 92 |               is.string(datasetId),
 93 |               is.scalar(maxResults))
 94 |   
 95 |   # support -1 for all results
 96 |   if(maxResults < 0){
 97 |     maxResults=NULL
 98 |   }
 99 |   
100 |   pars <- list(maxResults = maxResults,
101 |                pageToken = "")
102 |   pars <- rmNullObs(pars)
103 |   
104 |   
105 |   check_bq_auth()
106 |   l <- gar_api_generator("https://bigquery.googleapis.com/bigquery/v2",
107 |                          "GET",
108 |                          path_args = list(projects = projectId,
109 |                                           datasets = datasetId,
110 |                                           tables = ""),
111 |                          pars_args = pars,
112 |                          data_parse_function = parse_bqr_list_tables)
113 |   
114 |   pages <- gar_api_page(l, 
115 |                         page_f = get_attr_nextpagetoken,
116 |                         page_method = "param",
117 |                         page_arg = "pageToken")
118 |   
119 |   Reduce(rbind, pages)
120 | 
121 | }
122 | 
123 | parse_bqr_list_tables <- function(x) {
124 |   d <- x$tables
125 |   out <- data.frame(id = d$id,
126 |                     projectId = d$tableReference$projectId,
127 |                     datasetId = d$tableReference$datasetId,
128 |                     tableId = d$tableReference$tableId, stringsAsFactors = FALSE)
129 |   
130 |   if(!is.null(x$nextPageToken)){
131 |     attr(out, "nextPageToken") <- x$nextPageToken
132 |   }
133 |   
134 |   out
135 | 
136 |   
137 | }
138 | 
139 | #' Get BigQuery Table meta data
140 | #' 
141 | #' @param projectId The BigQuery project ID
142 | #' @param datasetId A datasetId within projectId
143 | #' @param tableId The tableId within the datasetId
144 | #' 
145 | #' @return list of table metadata
146 | #' 
147 | #' @examples 
148 | #' 
149 | #' \dontrun{
150 | #'   bqr_table_meta("publicdata", "samples", "github_nested")
151 | #' }
152 | #' 
153 | #' 
154 | #' @family Table meta functions
155 | #' @export
156 | bqr_table_meta <- function(projectId = bqr_get_global_project(), 
157 |                            datasetId = bqr_get_global_dataset(), 
158 |                            tableId){
159 |   
160 |   check_bq_auth()
161 |   f <- function(x){
162 |     x <- rmNullObs(x)
163 |   }
164 |   
165 |   
166 |   l <- googleAuthR::gar_api_generator("https://bigquery.googleapis.com/bigquery/v2",
167 |                                       "GET",
168 |                                       path_args = list(projects = projectId,
169 |                                                        datasets = datasetId,
170 |                                                        tables = tableId),
171 |                                       data_parse_function = f)
172 |   
173 |   res <- l(path_arguments = list(projects = projectId, 
174 |                           datasets = datasetId, 
175 |                           tables = tableId))
176 |   
177 |   as.table(res)
178 |   
179 | }
180 | 
181 | #' Get BigQuery Table's data list
182 | #' 
183 | #' @param projectId The BigQuery project ID
184 | #' @param datasetId A datasetId within projectId
185 | #' @param tableId The tableId within the datasetId
186 | #' @param maxResults Number of results to return
187 | #' 
188 | #' @return data.frame of table data
189 | #' 
190 | #' This won't work with nested datasets, for that use \link{bqr_query} as that flattens results.
191 | #' 
192 | #' @family Table meta functions
193 | #' @export
194 | bqr_table_data <- function(projectId = bqr_get_global_project(), 
195 |                            datasetId = bqr_get_global_dataset(), 
196 |                            tableId,
197 |                            maxResults = 1000){
198 |   check_bq_auth()
199 |   l <- googleAuthR::gar_api_generator("https://bigquery.googleapis.com/bigquery/v2",
200 |                                       "GET",
201 |                                       path_args = list(projects = projectId,
202 |                                                        datasets = datasetId,
203 |                                                        tables = tableId,
204 |                                                        data = ""),
205 |                                       pars_args = list(maxResults = maxResults),
206 |                                       data_parse_function = function(x) x)
207 |   
208 |   l(path_arguments = list(projects = projectId, 
209 |                           datasets = datasetId, 
210 |                           tables = tableId),
211 |     pars_arguments = list(maxResults = maxResults))
212 |   
213 | }
214 | 
215 | 
216 | #' Create a Table
217 | #' 
218 | #' @param projectId The BigQuery project ID.
219 | #' @param datasetId A datasetId within projectId.
220 | #' @param tableId Name of table you want.
221 | #' @param template_data A dataframe with the correct types of data. If \code{NULL} an empty table is made.
222 | #' @param timePartitioning Whether to create a partioned table
223 | #' @param expirationMs If a partioned table, whether to have an expiration time on the data. The default \code{0} is no expiration.
224 | #' 
225 | #' @return TRUE if created, FALSE if not.  
226 | #' 
227 | #' @details 
228 | #' 
229 | #' Creates a BigQuery table.
230 | #' 
231 | #' If setting \code{timePartioning} to \code{TRUE} then the table will be a 
232 | #'   \href{https://cloud.google.com/bigquery/docs/creating-partitioned-tables}{partioned table}
233 | #'   
234 | #' If you want more advanced features for the table, create it then call \link{bqr_patch_table} with advanced configuration configured from \link{Table}
235 | #' 
236 | #' @family Table meta functions
237 | #' @export
238 | bqr_create_table <- function(projectId = bqr_get_global_project(), 
239 |                              datasetId = bqr_get_global_dataset(), 
240 |                              tableId, 
241 |                              template_data = NULL,
242 |                              timePartitioning = FALSE,
243 |                              expirationMs = 0L){
244 |   check_bq_auth()
245 |   l <- googleAuthR::gar_api_generator("https://bigquery.googleapis.com/bigquery/v2",
246 |                                       "POST",
247 |                                       path_args = list(projects = projectId,
248 |                                                        datasets = datasetId,
249 |                                                        tables = "")
250 |                                       )
251 |   expirationMs <- as.integer(expirationMs)
252 |   timeP <- NULL
253 |   if(timePartitioning){
254 |     if(expirationMs == 0) expirationMs <- NULL
255 |     timeP <- list(type = "DAY", expirationMs = expirationMs)
256 |   }
257 |   
258 |   if(!is.null(template_data)){
259 |     schema <- list(
260 |       fields = schema_fields(template_data)
261 |     )
262 |   } else {
263 |     schema <- NULL
264 |   }
265 |   
266 |   config <- list(
267 |         schema = schema,
268 |         tableReference = list(
269 |           projectId = projectId,
270 |           datasetId = datasetId,
271 |           tableId = tableId
272 |         ),
273 |         timePartitioning = timeP
274 |   )
275 |   
276 |   config <- rmNullObs(config)
277 |   
278 |   req <- try(l(path_arguments = list(projects = projectId, 
279 |                                      datasets = datasetId),
280 |                the_body = config), silent = TRUE)
281 |   
282 |   if(is.error(req)){
283 |     if(grepl("Already Exists", error.message(req))){
284 |       message("Table exists: ", tableId, "Returning FALSE")
285 |       out <- FALSE
286 |     } else {
287 |       stop(error.message(req))
288 |     }
289 |   } else {
290 |     message("Table created: ", tableId)
291 |     out <- TRUE
292 |   }
293 |   
294 |   out
295 |   
296 | }
297 | 
298 | #' Update a Table
299 | #' 
300 | #' @param Table A Table object as created by \link{Table}
301 | #' 
302 | #' @description 
303 | #'  This uses PATCH semantics to alter an existing table.  
304 | #'  You need to create the Table object first to pass in using \link{Table} 
305 | #'which will be transformed to JSON
306 | #' 
307 | #' @export
308 | #' @import assertthat
309 | #' @importFrom googleAuthR gar_api_generator
310 | #' @seealso \href{https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#resource}{Definition of tables}
311 | #' @family Table meta functions
312 | bqr_patch_table <- function(Table){
313 |   assert_that(
314 |     is.table(Table)
315 |   )
316 |   
317 |   projectId <- Table$tableReference$projectId
318 |   datasetId <- Table$tableReference$datasetId
319 |   tableId <- Table$tableReference$tableId
320 |   
321 |   myMessage("Patching ", tableId, level = 3)
322 |   
323 |   the_url <- sprintf("https://bigquery.googleapis.com/bigquery/v2/projects/%s/datasets/%s/tables/%s",
324 |                      projectId, datasetId, tableId)
325 |   
326 |   call_api <- gar_api_generator(the_url, "PATCH", data_parse_function = function(x) x)
327 |   
328 |   res <- call_api(the_body = Table)
329 |   
330 |   as.table(res)
331 |   
332 | }
333 | 
334 | #' Delete a Table
335 | #' 
336 | #' @param projectId The BigQuery project ID.
337 | #' @param datasetId A datasetId within projectId.
338 | #' @param tableId Name of table you want to delete.
339 | #' 
340 | #' @return TRUE if deleted, FALSE if not.  
341 | #' 
342 | #' @details 
343 | #' 
344 | #' Deletes a BigQuery table
345 | #' 
346 | #' @family Table meta functions
347 | #' @export
348 | bqr_delete_table <- function(projectId = bqr_get_global_project(), 
349 |                              datasetId = bqr_get_global_dataset(), 
350 |                              tableId){
351 |   check_bq_auth()
352 |   l <- googleAuthR::gar_api_generator("https://bigquery.googleapis.com/bigquery/v2",
353 |                                       "DELETE",
354 |                                       path_args = list(projects = projectId,
355 |                                                        datasets = datasetId,
356 |                                                        tables = tableId)
357 |   )
358 |   
359 |   req <- try(suppressWarnings(l(path_arguments = list(projects = projectId, 
360 |                                            datasets = datasetId,
361 |                                            tables = tableId))), silent = TRUE)
362 |   if(is.error(req)){
363 |     if(grepl("Not found", error.message(req))){
364 |       myMessage(error.message(req), level = 3)
365 |       out <- FALSE
366 |     } else {
367 |       stop(error.message(req))
368 |     }
369 |   } else {
370 |     out <- TRUE
371 |   }
372 |   
373 |   out
374 |   
375 | }
376 | 
377 | 
378 | #' Table Object
379 | #' 
380 | #' Configure table objects as documented by 
381 | #' the \href{https://cloud.google.com/bigquery/docs/reference/rest/v2/tables}{Google docs for Table objects}
382 | #' 
383 | #' @param tableId tableId 
384 | #' @param projectId projectId
385 | #' @param datasetId datasetId
386 | #' @param clustering [Beta] Clustering specification for the table
387 | #' @param description [Optional] A user-friendly description of this table
388 | #' @param encryptionConfiguration Custom encryption configuration (e
389 | #' @param expirationTime [Optional] The time when this table expires, in milliseconds since the epoch
390 | #' @param friendlyName [Optional] A descriptive name for this table
391 | #' @param labels The labels associated with this table - a named list of key = value
392 | #' @param materializedView [Optional] Materialized view definition
393 | #' @param rangePartitioning [TrustedTester] Range partitioning specification for this table
394 | #' @param requirePartitionFilter [Beta] [Optional] If set to true, queries over this table require a partition filter that can be used for partition elimination to be specified
395 | #' @param schema [Optional] Describes the schema of this table
396 | #' @param timePartitioning Time-based partitioning specification for this table
397 | #' @param view [Optional] The view definition
398 | #' 
399 | #' @return Table object
400 | #' 
401 | #' @details 
402 | #' 
403 | #' A table object to be used within \link{bqr_patch_table}
404 | #' 
405 | #' @family Table meta functions
406 | #' @export
407 | #' @import assertthat
408 | Table <- function(tableId,
409 |                   projectId = bqr_get_global_project(), 
410 |                   datasetId = bqr_get_global_dataset(), 
411 |                   clustering = NULL, 
412 |                   description = NULL, 
413 |                   encryptionConfiguration = NULL, 
414 |                   expirationTime = NULL, 
415 |                   friendlyName = NULL, 
416 |                   labels = NULL, 
417 |                   materializedView = NULL, 
418 |                   rangePartitioning = NULL, 
419 |                   requirePartitionFilter = NULL, 
420 |                   schema = NULL, 
421 |                   timePartitioning = NULL, 
422 |                   view = NULL) {
423 |   assert_that(
424 |     is.string(projectId),
425 |     is.string(datasetId),
426 |     is.string(tableId)
427 |     # is.string(friendlyName),
428 |     # is.string(description),
429 |     # is.list(labels),
430 |     # is.list(timePartitioning)
431 |     # is.flag(requirePartitionFilter)
432 |   )
433 |   
434 |   tt <- list(
435 |     tableReference = list(projectId = projectId,
436 |                           datasetId = datasetId,
437 |                           tableId = tableId),
438 |     clustering = clustering, 
439 |     description = description, 
440 |     encryptionConfiguration = encryptionConfiguration, 
441 |     expirationTime = expirationTime,
442 |     friendlyName = friendlyName, 
443 |     labels = labels, 
444 |     materializedView = materializedView, 
445 |     rangePartitioning = rangePartitioning, 
446 |     requirePartitionFilter = NULL, 
447 |     schema = schema,
448 |     timePartitioning = timePartitioning, 
449 |     view = view)
450 |   
451 |   tt <- rmNullObs(tt)
452 |   
453 |   structure(tt, class = "gar_Table")
454 |   
455 | }
456 | 
457 | is.table <- function(x){
458 |   inherits(x, "gar_Table")
459 | }
460 | 
461 | as.table <- function(x){
462 |   structure(x, class = "gar_Table")
463 | }


--------------------------------------------------------------------------------
/R/uploadData.R:
--------------------------------------------------------------------------------
  1 | #' Upload data to BigQuery
  2 | #' 
  3 | #' @param projectId The BigQuery project ID.
  4 | #' @param datasetId A datasetId within projectId.
  5 | #' @param tableId ID of table where data will end up.
  6 | #' @param upload_data The data to upload, a data.frame object or a Google Cloud Storage URI
  7 | #' @param create Whether to create a new table if necessary, or error if it already exists.
  8 | #' @param writeDisposition How to add the data to a table.
  9 | #' @param schema If \code{upload_data} is a Google Cloud Storage URI, supply the data schema.  For \code{CSV} a helper function is available by using \link{schema_fields} on a data sample
 10 | #' @param sourceFormat If \code{upload_data} is a Google Cloud Storage URI, supply the data format.  Default is \code{CSV}
 11 | #' @param wait If uploading a data.frame, whether to wait for it to upload before returning
 12 | #' @param autodetect Experimental feature that auto-detects schema for CSV and JSON files
 13 | #' @param nullMarker Specifies a string that represents a null value in a CSV file. 
 14 | #'   For example, if you specify \code{\\N}, BigQuery interprets \code{\\N} as a null value when loading a CSV file. The default value is the empty string. 
 15 | #' @param maxBadRecords The maximum number of bad records that BigQuery can ignore when running the job
 16 | #' @param allowJaggedRows Whether to allow rows with variable length columns
 17 | #' @param allowQuotedNewlines Whether to allow datasets with quoted new lines
 18 | #' @param fieldDelimiter The separator for fields in a CSV file.  Default is comma - \code{,}
 19 | #' 
 20 | #' @return TRUE if successful, FALSE if not. 
 21 | #' 
 22 | #' @seealso url{https://cloud.google.com/bigquery/loading-data-post-request}
 23 | #' 
 24 | #' @details 
 25 | #' 
 26 | #' A temporary csv file is created when uploading from a local data.frame
 27 | #' 
 28 | #' For larger file sizes up to 5TB, upload to Google Cloud Storage first via \link[googleCloudStorageR]{gcs_upload} then supply the object URI of the form \code{gs://project-name/object-name} to the \code{upload_data} argument.  
 29 | #'   
 30 | #' You also need to supply a data schema.  Remember that the file should not have a header row.
 31 | #'   
 32 | #' @examples 
 33 | #' 
 34 | #' \dontrun{
 35 | #' 
 36 | #'  library(googleCloudStorageR)
 37 | #'  library(bigQueryR)
 38 | #'  
 39 | #'  gcs_global_bucket("your-project")
 40 | #'  
 41 | #'  ## custom upload function to ignore quotes and column headers
 42 | #'  f <- function(input, output) {
 43 | #'    write.table(input, sep = ",", col.names = FALSE, row.names = FALSE, 
 44 | #'                quote = FALSE, file = output, qmethod = "double")}
 45 | #'    
 46 | #'  ## upload files to Google Cloud Storage
 47 | #'  gcs_upload(mtcars, name = "mtcars_test1.csv", object_function = f)
 48 | #'  gcs_upload(mtcars, name = "mtcars_test2.csv", object_function = f)
 49 | #'  
 50 | #'  ## create the schema of the files you just uploaded
 51 | #'  user_schema <- schema_fields(mtcars)
 52 | #'  
 53 | #'  ## load files from Google Cloud Storage into BigQuery
 54 | #'  bqr_upload_data(projectId = "your-project", 
 55 | #'                 datasetId = "test", 
 56 | #'                 tableId = "from_gcs_mtcars", 
 57 | #'                 upload_data = c("gs://your-project/mtcars_test1.csv", 
 58 | #'                                 "gs://your-project/mtcars_test2.csv"),
 59 | #'                 schema = user_schema)
 60 | #'  
 61 | #'  ## for big files, its helpful to create your schema on a small sample
 62 | #'  ## a quick way to do this on the command line is:
 63 | #'  # "head bigfile.csv > head_bigfile.csv"
 64 | #' 
 65 | #' ## upload nested lists as JSON
 66 | #' the_list <- list(list(col1 = "yes", col2 = "no", 
 67 | #'                       col3 = list(nest1 = 1, nest2 = 3), col4 = "oh"),
 68 | #'                  list(col1 = "yes2", 
 69 | #'                       col2 = "n2o", col3 = list(nest1 = 5, nest2 = 7), 
 70 | #'                       col4 = "oh2"), 
 71 | #'                  list(col1 = "yes3", col2 = "no3", 
 72 | #'                       col3 = list(nest1 = 7, nest2 = 55), col4 = "oh3"))
 73 | #'    
 74 | #' bqr_upload_data(datasetId = "test", 
 75 | #'                 tableId = "nested_list_json", 
 76 | #'                 upload_data = the_list, 
 77 | #'                 autodetect = TRUE)
 78 | #' 
 79 | #' }
 80 | #' 
 81 | #' @family bigQuery upload functions
 82 | #' @export
 83 | #' @import assertthat
 84 | bqr_upload_data <- function(projectId = bqr_get_global_project(), 
 85 |                             datasetId = bqr_get_global_dataset(), 
 86 |                             tableId, 
 87 |                             upload_data, 
 88 |                             create = c("CREATE_IF_NEEDED", "CREATE_NEVER"),
 89 |                             writeDisposition = c("WRITE_TRUNCATE","WRITE_APPEND","WRITE_EMPTY"),
 90 |                             schema = NULL,
 91 |                             sourceFormat = c("CSV", "DATASTORE_BACKUP", 
 92 |                                              "NEWLINE_DELIMITED_JSON","AVRO"),
 93 |                             wait = TRUE,
 94 |                             autodetect = FALSE,
 95 |                             nullMarker = NULL,
 96 |                             maxBadRecords = NULL,
 97 |                             allowJaggedRows = FALSE,
 98 |                             allowQuotedNewlines = FALSE,
 99 |                             fieldDelimiter = NULL){
100 |   
101 | 
102 |   assert_that(is.string(projectId),
103 |               is.string(datasetId),
104 |               is.string(tableId),
105 |               is.flag(wait),
106 |               is.flag(allowJaggedRows),
107 |               is.flag(allowQuotedNewlines),
108 |               is.flag(autodetect))
109 |   sourceFormat <- match.arg(sourceFormat)
110 |   create <- match.arg(create)
111 |   writeDisposition <- match.arg(writeDisposition)
112 |   
113 |   check_bq_auth()
114 |   
115 | 
116 |   bqr_do_upload(upload_data = upload_data, 
117 |                 projectId = projectId,
118 |                 datasetId = datasetId,
119 |                 tableId = tableId,
120 |                 create = create,
121 |                 writeDisposition = writeDisposition,
122 |                 schema = schema,
123 |                 sourceFormat = sourceFormat,
124 |                 wait = wait,
125 |                 autodetect = autodetect,
126 |                 nullMarker = nullMarker,
127 |                 maxBadRecords = maxBadRecords,
128 |                 allowJaggedRows = allowJaggedRows,
129 |                 allowQuotedNewlines = allowQuotedNewlines,
130 |                 fieldDelimiter = fieldDelimiter)
131 |   
132 |   
133 | }
134 | 
135 | #S3 generic dispatch
136 | bqr_do_upload <- function(upload_data, 
137 |                           projectId, 
138 |                           datasetId, 
139 |                           tableId,
140 |                           create,
141 |                           writeDisposition,
142 |                           schema,
143 |                           sourceFormat,
144 |                           wait,
145 |                           autodetect,
146 |                           nullMarker,
147 |                           maxBadRecords,
148 |                           allowJaggedRows,
149 |                           allowQuotedNewlines,
150 |                           fieldDelimiter){
151 | 
152 |     UseMethod("bqr_do_upload", upload_data)
153 |    
154 | }
155 | 
156 | 
157 | bqr_do_upload.list <- function(upload_data, 
158 |                                projectId, 
159 |                                datasetId, 
160 |                                tableId,
161 |                                create,
162 |                                writeDisposition,
163 |                                schema,
164 |                                sourceFormat, # not used
165 |                                wait,
166 |                                autodetect,
167 |                                nullMarker,
168 |                                maxBadRecords,
169 |                                allowJaggedRows,
170 |                                allowQuotedNewlines,
171 |                                fieldDelimiter){ 
172 |   
173 |   myMessage("Uploading local list as JSON", level = 3)
174 |   
175 |   # how to create schema for json? 
176 |   # if(!is.null(user_schema)){
177 |   #   schema <- user_schema
178 |   # } else {
179 |   #   schema <- schema_fields(upload_data)
180 |   # }
181 |   
182 |   if(autodetect){
183 |     the_schema <- NULL
184 |   } else {
185 |     the_schema <- list(
186 |       fields = schema
187 |     )
188 |   }
189 |   
190 |   config <- list(
191 |     configuration = list(
192 |       load = list(
193 |         nullMarker = nullMarker,
194 |         maxBadRecords = maxBadRecords,
195 |         sourceFormat = "NEWLINE_DELIMITED_JSON",
196 |         createDisposition = jsonlite::unbox(create),
197 |         writeDisposition = jsonlite::unbox(writeDisposition),
198 |         schema = the_schema,
199 |         destinationTable = list(
200 |           projectId = projectId,
201 |           datasetId = datasetId,
202 |           tableId = tableId
203 |         ),
204 |         autodetect = autodetect,
205 |         allowJaggedRows = allowJaggedRows,
206 |         allowQuotedNewlines = allowQuotedNewlines
207 |       )
208 |     )
209 |   )
210 |   
211 |   config <- rmNullObs(config)
212 |   
213 |   the_json <- paste(lapply(upload_data, jsonlite::toJSON),
214 |                     sep = "\n", collapse = "\n")
215 |   
216 |   mp_body <- make_body(config, obj = the_json)
217 |   
218 |   req <- do_obj_req(mp_body, projectId = projectId, datasetId = datasetId, tableId = tableId)
219 |   
220 |   out <- check_req(req, wait = wait)
221 |   
222 |   out 
223 |   
224 |    
225 | }
226 | 
227 | # upload for local data.fram
228 | bqr_do_upload.data.frame <- function(upload_data, 
229 |                                      projectId, 
230 |                                      datasetId, 
231 |                                      tableId,
232 |                                      create,
233 |                                      writeDisposition,
234 |                                      schema,
235 |                                      sourceFormat, # not used
236 |                                      wait,
237 |                                      autodetect,
238 |                                      nullMarker,
239 |                                      maxBadRecords,
240 |                                      allowJaggedRows,
241 |                                      allowQuotedNewlines,
242 |                                      fieldDelimiter){ 
243 |   
244 |   myMessage("Uploading local data.frame", level = 3)
245 |   
246 |   if(is.null(fieldDelimiter)){
247 |     # default to ","
248 |     fieldDelimiter <- ","
249 |   }
250 |   
251 |   if(!is.null(schema)){
252 |     the_schema <- list(
253 |       fields = schema
254 |     )
255 |   } else {
256 |     the_schema <- list(
257 |       fields = schema_fields(upload_data)
258 |     )
259 |   }
260 |   
261 |   if(autodetect){
262 |     warning("autodetect=TRUE is set to FALSE for data.frame uploads 
263 |             as the schema is inferred from the data.frame's column class")
264 |   }
265 |   
266 |   config <- list(
267 |     configuration = list(
268 |       load = list(
269 |         fieldDelimiter = fieldDelimiter,
270 |         nullMarker = nullMarker,
271 |         maxBadRecords = maxBadRecords,
272 |         sourceFormat = "CSV",
273 |         createDisposition = jsonlite::unbox(create),
274 |         writeDisposition = jsonlite::unbox(writeDisposition),
275 |         schema = the_schema,
276 |         destinationTable = list(
277 |           projectId = projectId,
278 |           datasetId = datasetId,
279 |           tableId = tableId
280 |         ),
281 |         autodetect = FALSE,
282 |         allowJaggedRows = allowJaggedRows,
283 |         allowQuotedNewlines = allowQuotedNewlines
284 |       )
285 |     )
286 |   )
287 |   
288 |   config <- rmNullObs(config)
289 |   
290 |   csv <- standard_csv(upload_data)
291 |   
292 |   mp_body <- make_body(config, obj = csv)
293 |   
294 |   req <- do_obj_req(mp_body, projectId = projectId, datasetId = datasetId, tableId = tableId)
295 |   
296 |   out <- check_req(req, wait = wait)
297 |   
298 |   out
299 | }
300 | 
301 | do_obj_req <- function(mp_body, projectId, datasetId, tableId) {
302 |   l <- 
303 |     googleAuthR::gar_api_generator("https://bigquery.googleapis.com/upload/bigquery/v2",
304 |                                    "POST",
305 |                                    path_args = list(projects = projectId,
306 |                                                     jobs = ""),
307 |                                    pars_args = list(uploadType="multipart"),
308 |                                    customConfig = list(
309 |                                      httr::add_headers("Content-Type" = "multipart/related; boundary=bqr_upload"),
310 |                                      httr::add_headers("Content-Length" = nchar(mp_body, type = "bytes"))
311 |                                    )
312 |     )
313 |   
314 |   l(path_arguments = list(projects = projectId, 
315 |                           datasets = datasetId,
316 |                           tableId = tableId),
317 |     the_body = mp_body)
318 | }
319 | 
320 | make_body <- function(config, obj) {
321 |   boundary <- "--bqr_upload"
322 |   line_break <- "\r\n"
323 |   mp_body_schema <- paste(boundary,
324 |                           "Content-Type: application/json; charset=UTF-8",
325 |                           line_break,
326 |                           jsonlite::toJSON(config, pretty=TRUE, auto_unbox = TRUE),
327 |                           line_break,
328 |                           sep = "\r\n")
329 |   
330 |   ## its very fussy about whitespace
331 |   ## must match exactly https://cloud.google.com/bigquery/loading-data-post-request 
332 |   mp_body_data <- paste0(boundary,
333 |                          line_break,
334 |                          "Content-Type: application/octet-stream",
335 |                          line_break,
336 |                          line_break,
337 |                          obj)
338 |   
339 |   paste(mp_body_schema, mp_body_data, 
340 |                    paste0(boundary, "--"), sep = "\r\n")
341 | }
342 | 
343 | 
344 | check_req <- function(req, wait) {
345 |   if(!is.null(req$content$status$errorResult)){
346 |     stop("Error in upload job: ", req$status$errors$message)
347 |   } else {
348 |     myMessage("Upload job made...", level = 3)
349 |   }
350 |   
351 |   if(req$status_code == 200){
352 |     
353 |     if(req$content$kind == "bigquery#job"){
354 |       if(wait){
355 |         out <- bqr_wait_for_job(as.job(req$content))
356 |       } else {
357 |         myMessage("Returning: BigQuery load of local uploaded Job object: ", 
358 |                   req$content$jobReference$jobId, level = 3)
359 |         
360 |         out <- bqr_get_job(req$content$jobReference$jobId, 
361 |                            projectId = req$content$jobReference$projectId,
362 |                            location = req$content$jobReference$location)
363 |       }
364 |       
365 |     } else {
366 |       stop("Upload table didn't return bqr_job object when it should have.")
367 |     }
368 |     
369 |   } else {
370 |     myMessage("Error in upload, returning FALSE", level = 3)
371 |     out <- FALSE
372 |   }
373 |   
374 |   out
375 | }
376 | 
377 | # upload for gs:// character vector
378 | bqr_do_upload.character <- function(upload_data, 
379 |                                     projectId, 
380 |                                     datasetId, 
381 |                                     tableId,
382 |                                     create,
383 |                                     writeDisposition,
384 |                                     schema,
385 |                                     sourceFormat,
386 |                                     wait, # not used
387 |                                     autodetect,
388 |                                     nullMarker,
389 |                                     maxBadRecords,
390 |                                     allowJaggedRows,
391 |                                     allowQuotedNewlines,
392 |                                     fieldDelimiter){
393 |   
394 |   myMessage("Uploading from Google Cloud Storage URI", level = 3)
395 |   
396 |   assert_that(
397 |     all(startsWith(upload_data, "gs://"))
398 |   )
399 |   
400 |   if(length(upload_data) > 1){
401 |     source_uri <- upload_data
402 |   } else {
403 |     source_uri <- list(upload_data)
404 |   }
405 |     
406 |   if(is.null(schema) && !autodetect){
407 |     stop("Must supply a data schema or use autodetect if loading from Google Cloud Storage",
408 |          call. = FALSE)
409 |   }
410 |   
411 |   if(!autodetect){
412 |     the_schema = list(
413 |       fields = schema
414 |     )
415 |   } else {
416 |     the_schema <- NULL
417 |   }
418 |   
419 |   config <- list(
420 |     configuration = list(
421 |       load = list(
422 |         fieldDelimiter = fieldDelimiter,
423 |         nullMarker = nullMarker,
424 |         maxBadRecords = maxBadRecords,
425 |         sourceFormat = sourceFormat,
426 |         createDisposition = jsonlite::unbox(create),
427 |         writeDisposition = jsonlite::unbox(writeDisposition),
428 |         sourceUris = source_uri,
429 |         schema = the_schema,
430 |         destinationTable = list(
431 |           projectId = projectId,
432 |           datasetId = datasetId,
433 |           tableId = tableId
434 |         ),
435 |         autodetect = autodetect,
436 |         allowJaggedRows = allowJaggedRows,
437 |         allowQuotedNewlines = allowQuotedNewlines
438 |       )
439 |     )
440 |   )
441 |   
442 |   config <- rmNullObs(config)
443 |   
444 |   l <- 
445 |     googleAuthR::gar_api_generator("https://bigquery.googleapis.com/bigquery/v2",
446 |                                    "POST",
447 |                                    path_args = list(projects = projectId,
448 |                                                     jobs = ""),
449 |                                    data_parse_function = function(x) x
450 |                                    )
451 |   
452 |   req <- l(path_arguments = list(projects = projectId, 
453 |                                  datasets = datasetId,
454 |                                  tableId = tableId),
455 |            the_body = config)
456 |   
457 |   job <- as.job(req)
458 |   
459 |   myMessage("Returning: BigQuery load from Google Cloud Storage Job object: ", 
460 |             job$jobId, level = 3)
461 |   
462 |   job
463 | 
464 | }
465 | 
466 | #' Create data schema for upload to BigQuery
467 | #' 
468 | #' Use this on a sample of the data you want to load from Google Cloud Storage
469 | #' 
470 | #' @param data An example of the data to create a schema from
471 | #' 
472 | #' @return A schema object suitable to pass within the \code{schema} argument of \link{bqr_upload_data}
473 | #' 
474 | #' @details 
475 | #' 
476 | #' This is taken from \link[bigrquery]{insert_upload_job}
477 | #' @author Hadley Wickham \email{hadley@@rstudio.com}
478 | #'
479 | #' @export
480 | schema_fields <- function(data) { 
481 |   types <- vapply(data, data_type, character(1))
482 |   unname(Map(function(type, name) list(name = name, type = type), types, names(data)))
483 | }
484 | 
485 | ## From bigrquery
486 | data_type <- function(x) {
487 |   switch(class(x)[1],
488 |          character = "STRING",
489 |          logical = "BOOLEAN",
490 |          numeric = "FLOAT",
491 |          integer = "INTEGER",
492 |          factor = "STRING",
493 |          Date = "DATE",
494 |          POSIXct = "TIMESTAMP",
495 |          hms = "INTEGER",
496 |          difftime = "INTEGER",
497 |          stop("Unknown class ", paste0(class(x), collapse = "/"))
498 |   )
499 | }
500 | 
501 | ## From bigrquery
502 | ## CSV load options https://cloud.google.com/bigquery/loading-data-into-bigquery#csvformat
503 | standard_csv <- function(values) {
504 |   # Convert factors to strings
505 |   is_factor <- vapply(values, is.factor, logical(1))
506 |   values[is_factor] <- lapply(values[is_factor], as.character)
507 |   
508 |   # Encode special characters in strings
509 |   is_char <- vapply(values, is.character, logical(1))
510 |   values[is_char] <- lapply(values[is_char], encodeString, na.encode = FALSE)
511 |   
512 |   # Encode dates and times
513 |   is_time <- vapply(values, function(x) inherits(x, "POSIXct"), logical(1))
514 |   values[is_time] <- lapply(values[is_time], as.numeric)
515 |   
516 |   # is_date <- vapply(values, function(x) inherits(x, "Date"), logical(1))
517 |   # values[is_date] <- lapply(values[is_date], function(x) as.numeric(as.POSIXct(x)))
518 |   
519 |   tmp <- tempfile(fileext = ".csv")
520 |   on.exit(unlink(tmp))
521 |   
522 |   conn <- file(tmp, open = "wb")
523 |   utils::write.table(values, conn, sep = ",", na = "", qmethod = "double",
524 |                      row.names = FALSE, col.names = FALSE, eol = "\12")
525 |   close(conn)
526 |   
527 |   # Don't read trailing nl
528 |   readChar(tmp, file.info(tmp)$size - 1, useBytes = TRUE)
529 | }
530 | 


--------------------------------------------------------------------------------
/R/utilities.R:
--------------------------------------------------------------------------------
  1 | #' Get attribute nextPageLink
  2 | #' @noRd
  3 | get_attr_nextpagetoken <- function(x){
  4 |   attr(x, "nextPageToken")
  5 | }
  6 | 
  7 | 
  8 | #' if argument is NULL, no line output
  9 | #'
 10 | #' @keywords internal
 11 | #' @noRd
 12 | cat0 <- function(prefix = "", x){
 13 |   if(!is.null(x)){
 14 |     cat(prefix, x, "\n")
 15 |   }
 16 | }
 17 | 
 18 | #' Javascript time to R time
 19 | #'
 20 | #' @keywords internal
 21 | #' @noRd
 22 | js_to_posix <- function(x){
 23 |   as.POSIXct(as.numeric(x) / 1000, origin = "1970-01-01")
 24 | }
 25 | 
 26 | #' taken from utils:::format.object_size
 27 | #'
 28 | #' @keywords internal
 29 | #' @noRd
 30 | format_object_size <- function (x, units = "b", ...)
 31 | {
 32 |   units <- match.arg(units, c("b", "auto", "Kb", "Mb", "Gb",
 33 |                               "Tb", "Pb", "B", "KB", "MB", "GB", "TB", "PB", "KiB",
 34 |                               "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"))
 35 |   if (units == "auto")
 36 |     units <- if (x >= 1024^4)
 37 |       "Tb"
 38 |   else if (x >= 1024^3)
 39 |     "Gb"
 40 |   else if (x >= 1024^2)
 41 |     "Mb"
 42 |   else if (x >= 1024)
 43 |     "Kb"
 44 |   else "b"
 45 |   switch(units, b = , B = paste(x, "bytes"), Kb = , KB = paste(round(x/1024,
 46 |                                                                      1L), "Kb"), Mb = , MB = paste(round(x/1024^2, 1L), "Mb"),
 47 |          Gb = , GB = paste(round(x/1024^3, 1L), "Gb"), Tb = ,
 48 |          TB = paste(round(x/1024^4, 1L), "Tb"), Pb = , PB = paste(round(x/1024^5,
 49 |                                                                         1L), "Pb"), KiB = paste(round(x/1024, 1L), "KiB"),
 50 |          MiB = paste(round(x/1024^2, 1L), "MiB"), GiB = paste(round(x/1024^3,
 51 |                                                                     1L), "GiB"), TiB = paste(round(x/1024^4, 1L), "TiB"),
 52 |          PiB = paste(round(x/1024^5, 1L), "PiB"), EiB = paste(round(x/1024^6,
 53 |                                                                     1L), "EiB"), ZiB = paste(round(x/1024^7, 1L), "ZiB"),
 54 |          YiB = paste(round(x/1024^8, 1L), "YiB"))
 55 | }
 56 | 
 57 | #' Timestamp to R date
 58 | #' @keywords internal
 59 | #' @noRd
 60 | timestamp_to_r <- function(t){
 61 |   as.POSIXct(t, format = "%Y-%m-%dT%H:%M:%S")
 62 | }
 63 | 
 64 | 
 65 | #' A helper function that tests whether an object is either NULL _or_
 66 | #' a list of NULLs
 67 | #'
 68 | #' @keywords internal
 69 | #' @noRd
 70 | is.NullOb <- function(x) is.null(x) | all(sapply(x, is.null))
 71 | 
 72 | #' Recursively step down into list, removing all such objects
 73 | #'
 74 | #' @keywords internal
 75 | #' @noRd
 76 | rmNullObs <- function(x) {
 77 |   x <- Filter(Negate(is.NullOb), x)
 78 |   lapply(x, function(x) if (is.list(x)) rmNullObs(x) else x)
 79 | }
 80 | 
 81 | #' Is this a try error?
 82 | #' 
 83 | #' Utility to test errors
 84 | #' 
 85 | #' @param test_me an object created with try()
 86 | #' 
 87 | #' @return Boolean
 88 | #' 
 89 | #' @keywords internal
 90 | #' @noRd
 91 | is.error <- function(test_me){
 92 |   inherits(test_me, "try-error")
 93 | }
 94 | 
 95 | #' Get the error message
 96 | #'
 97 | #' @param test_me an object that has failed is.error
 98 | #'
 99 | #' @return The error message
100 | #'
101 | #' @keywords internal
102 | #' @noRd
103 | error.message <- function(test_me){
104 |   if(is.error(test_me)) attr(test_me, "condition")$message
105 | }
106 | 
107 | #' Customer message log level
108 | #'
109 | #' @param ... The message(s)
110 | #' @param level The severity
111 | #'
112 | #' @details 0 = everything, 1 = debug, 2=normal, 3=important
113 | #' @keywords internal
114 | #' @noRd
115 | myMessage <- function(..., level = 1){
116 |   
117 |   
118 |   compare_level <- getOption("googleAuthR.verbose")
119 |   if(is.null(compare_level)) compare_level <- 1
120 |   
121 |   if(level >= compare_level){
122 |     message(Sys.time()," -- ", ...)
123 |   }
124 |   
125 | }
126 | 
127 | 
128 | 
129 | #' Idempotency
130 | #'
131 | #' A random code to ensure no repeats
132 | #'
133 | #' @return A random 15 digit hash
134 | #' @keywords internal
135 | #' @noRd
136 | idempotency <- function(){
137 |   set.seed(Sys.time())
138 |   paste(sample(c(LETTERS, letters, 0:9), 15, TRUE),collapse="")
139 | }
140 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # bigQueryR
 2 | 
 3 | ## Introduction 
 4 | 
 5 | This is a package for interacting with [BigQuery](https://cloud.google.com/bigquery/) from within R.
 6 | 
 7 | See the [bigQueryR website](http://code.markedmondson.me/bigQueryR) for examples, details and tutorials. 
 8 | 
 9 | ## Installation ##
10 | 
11 | [![CRAN](http://www.r-pkg.org/badges/version/bigQueryR)](http://cran.r-project.org/package=bigQueryR)
12 | [![Build Status](https://travis-ci.org/cloudyr/bigQueryR.png?branch=master)](https://travis-ci.org/cloudyr/bigQueryR)
13 | [![codecov.io](http://codecov.io/github/cloudyr/bigQueryR/coverage.svg?branch=master)](http://codecov.io/github/cloudyr/bigQueryR?branch=master)
14 | 
15 | This package is on CRAN, but to install the latest development version you can install from the cloudyr drat repository:
16 | 
17 | ```R
18 | # latest stable version
19 | install.packages("bigQueryR", repos = c(getOption("repos"), "http://cloudyr.github.io/drat"))
20 | ```
21 | 
22 | Or, to pull a potentially unstable version directly from GitHub:
23 | 
24 | ```R
25 | if(!require("ghit")){
26 |     install.packages("ghit")
27 | }
28 | ghit::install_github("cloudyr/bigQueryR")
29 | ```
30 | 
31 | 
32 | ---
33 | [![cloudyr project logo](http://i.imgur.com/JHS98Y7.png)](https://github.com/cloudyr)
34 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
 1 | ## Test environments
 2 | * local OS X install, R 3.6.1
 3 | * ubuntu 12.04 (on travis-ci), R 3.6.1
 4 | * Windows (on win-builder) R 3.6.1
 5 | 
 6 | ## R CMD check results
 7 | 
 8 | 0 errors | 0 warnings | 1 notes
 9 | 
10 | * Possibly mis-spelled words in DESCRIPTION:
11 |   BigQuery (2:30)
12 |   
13 | This is spelt correctly
14 | 
15 | ## Reverse dependencies
16 | 
17 | googleAnalyticsR is a dependency, that when checked had 0 errors. 
18 | 
19 | ---
20 | 


--------------------------------------------------------------------------------
/drat.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -o errexit -o nounset
 3 | addToDrat(){
 4 |   mkdir drat; cd drat
 5 | 
 6 |   ## Set up Repo parameters
 7 |   git init
 8 |   git config user.name "leeper"
 9 |   git config user.email "thosjleeper@gmail.com"
10 |   git config --global push.default simple
11 | 
12 |   ## Get drat repo
13 |   git remote add upstream "https://$GH_TOKEN@github.com/cloudyr/cloudyr.github.io.git"
14 |   git fetch upstream
15 |   git checkout master
16 | 
17 |   Rscript -e "drat::insertPackage('../$PKG_TARBALL', repodir = './drat')"
18 |   git add --all
19 |   git commit -m "add $PKG_TARBALL (build $TRAVIS_BUILD_ID)"
20 |   git push
21 | 
22 | }
23 | addToDrat
24 | 


--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
 1 | citHeader("To cite package 'bigQueryR' in publications use:")
 2 |      
 3 |  year <- sub(".*(2[[:digit:]]{3})-.*", "\\1", meta$Date, perl = TRUE)
 4 |  vers <- paste("R package version", meta$Version)
 5 |  
 6 |  citEntry(entry="Manual",
 7 |           title = "bigQueryR: Interface with Google BigQuery with Shiny Compatibility",
 8 |           author = personList(as.person("Mark Edmondson")),
 9 |           year = year,
10 |           note = vers,
11 |           textVersion =
12 |           paste("Mark Edmondson (",
13 |                 year,
14 |                 "). bigQueryR: Interface with Google BigQuery with Shiny Compatibility. ",
15 |                 vers, ".", sep=""))
16 | 


--------------------------------------------------------------------------------
/inst/client.json:
--------------------------------------------------------------------------------
1 | {"installed":{"client_id":"68483650948-28g1na33slr3bt8rk7ikeog5ur19ldq6.apps.googleusercontent.com","project_id":"big-query-r","auth_uri":"https://accounts.google.com/o/oauth2/auth","token_uri":"https://accounts.google.com/o/oauth2/token","auth_provider_x509_cert_url":"https://www.googleapis.com/oauth2/v1/certs","client_secret":"f0npd8zUhmqf8IqrIypBs6Cy","redirect_uris":["urn:ietf:wg:oauth:2.0:oob","http://localhost"]}}


--------------------------------------------------------------------------------
/man/Table.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/tables.R
 3 | \name{Table}
 4 | \alias{Table}
 5 | \title{Table Object}
 6 | \usage{
 7 | Table(
 8 |   tableId,
 9 |   projectId = bqr_get_global_project(),
10 |   datasetId = bqr_get_global_dataset(),
11 |   clustering = NULL,
12 |   description = NULL,
13 |   encryptionConfiguration = NULL,
14 |   expirationTime = NULL,
15 |   friendlyName = NULL,
16 |   labels = NULL,
17 |   materializedView = NULL,
18 |   rangePartitioning = NULL,
19 |   requirePartitionFilter = NULL,
20 |   schema = NULL,
21 |   timePartitioning = NULL,
22 |   view = NULL
23 | )
24 | }
25 | \arguments{
26 | \item{tableId}{tableId}
27 | 
28 | \item{projectId}{projectId}
29 | 
30 | \item{datasetId}{datasetId}
31 | 
32 | \item{clustering}{[Beta] Clustering specification for the table}
33 | 
34 | \item{description}{[Optional] A user-friendly description of this table}
35 | 
36 | \item{encryptionConfiguration}{Custom encryption configuration (e}
37 | 
38 | \item{expirationTime}{[Optional] The time when this table expires, in milliseconds since the epoch}
39 | 
40 | \item{friendlyName}{[Optional] A descriptive name for this table}
41 | 
42 | \item{labels}{The labels associated with this table - a named list of key = value}
43 | 
44 | \item{materializedView}{[Optional] Materialized view definition}
45 | 
46 | \item{rangePartitioning}{[TrustedTester] Range partitioning specification for this table}
47 | 
48 | \item{requirePartitionFilter}{[Beta] [Optional] If set to true, queries over this table require a partition filter that can be used for partition elimination to be specified}
49 | 
50 | \item{schema}{[Optional] Describes the schema of this table}
51 | 
52 | \item{timePartitioning}{Time-based partitioning specification for this table}
53 | 
54 | \item{view}{[Optional] The view definition}
55 | }
56 | \value{
57 | Table object
58 | }
59 | \description{
60 | Configure table objects as documented by 
61 | the \href{https://cloud.google.com/bigquery/docs/reference/rest/v2/tables}{Google docs for Table objects}
62 | }
63 | \details{
64 | A table object to be used within \link{bqr_patch_table}
65 | }
66 | \seealso{
67 | Other Table meta functions: 
68 | \code{\link{bqr_copy_table}()},
69 | \code{\link{bqr_create_table}()},
70 | \code{\link{bqr_delete_table}()},
71 | \code{\link{bqr_list_tables}()},
72 | \code{\link{bqr_patch_table}()},
73 | \code{\link{bqr_table_data}()},
74 | \code{\link{bqr_table_meta}()}
75 | }
76 | \concept{Table meta functions}
77 | 


--------------------------------------------------------------------------------
/man/bigQueryR.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bigQueryR.R
 3 | \docType{package}
 4 | \name{bigQueryR}
 5 | \alias{bigQueryR}
 6 | \title{bigQueryR}
 7 | \description{
 8 | Provides an interface with Google BigQuery
 9 | }
10 | \seealso{
11 | \url{https://cloud.google.com/bigquery/docs/reference/v2/?hl=en}
12 | }
13 | 


--------------------------------------------------------------------------------
/man/bqr_auth.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/auth.R
 3 | \name{bqr_auth}
 4 | \alias{bqr_auth}
 5 | \title{Authenticate this session}
 6 | \usage{
 7 | bqr_auth(json_file = NULL, token = NULL, email = Sys.getenv("GARGLE_EMAIL"))
 8 | }
 9 | \arguments{
10 | \item{json_file}{Authentication json file you have downloaded from your Google Project}
11 | 
12 | \item{token}{A preexisting token to authenticate with}
13 | 
14 | \item{email}{A Google email to authenticate with
15 | 
16 | If you have set the environment variable \code{BQ_AUTH_FILE} to a valid file location,
17 |   the function will look there for authentication details.
18 | Otherwise it will trigger an authentication flow via Google login screen in your browser based on the email you provide.
19 | 
20 | If \code{BQ_AUTH_FILE} is specified, then authentication will be called upon loading the package
21 |   via \code{library(bigQueryR)},
22 |   meaning that calling this function yourself at the start of the session won't be necessary.
23 | 
24 | \code{BQ_AUTH_FILE} is a GCP service account JSON ending with file extension \code{.json}}
25 | }
26 | \value{
27 | Invisibly, the token that has been saved to the session
28 | }
29 | \description{
30 | Autheticate manually via email or service JSON file
31 | }
32 | \examples{
33 | 
34 | \dontrun{
35 | 
36 | # to use default package credentials (for testing)
37 | library(bigQueryR)
38 | bqr_auth("location_of_json_file.json")
39 | 
40 | # or via email
41 | bqr_auth(email="me@work.com")
42 | 
43 | # to use your own Google Cloud Project credentials
44 | # go to GCP console and download client credentials JSON 
45 | # ideally set this in .Renviron file, not here but just for demonstration
46 | Sys.setenv("GAR_CLIENT_JSON" = "location/of/file.json")
47 | library(bigQueryR)
48 | # should now be able to log in via your own GCP project
49 | bqr_auth()
50 | 
51 | # reauthentication
52 | # Once you have authenticated, set email to skip the interactive message
53 | bqr_auth(email = "my@email.com")
54 | 
55 | # or leave unset to bring up menu on which email to auth with
56 | bqr_auth()
57 | # The bigQueryR package is requesting access to your Google account. 
58 | # Select a pre-authorised account or enter '0' to obtain a new token.
59 | # Press Esc/Ctrl + C to abort.
60 | #1: my@email.com
61 | #2: work@mybusiness.com
62 | # you can set authentication for many emails, then switch between them e.g.
63 | bqr_auth(email = "my@email.com")
64 | bqr_list_projects() # lists what GCP projects you have access to
65 | bqr_auth(email = "work@mybusiness.com") 
66 | bqr_list_projects() # lists second set of projects
67 | 
68 | 
69 | 
70 | }
71 | }
72 | 


--------------------------------------------------------------------------------
/man/bqr_copy_dataset.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/datasets.R
 3 | \name{bqr_copy_dataset}
 4 | \alias{bqr_copy_dataset}
 5 | \title{Copy datasets}
 6 | \usage{
 7 | bqr_copy_dataset(
 8 |   source_datasetid,
 9 |   destination_datasetid,
10 |   source_projectid = bqr_get_global_project(),
11 |   destination_projectid = bqr_get_global_project(),
12 |   createDisposition = c("CREATE_IF_NEEDED", "CREATE_NEVER"),
13 |   writeDisposition = c("WRITE_TRUNCATE", "WRITE_APPEND", "WRITE_EMPTY"),
14 |   destination_prefix = NULL
15 | )
16 | }
17 | \arguments{
18 | \item{source_datasetid}{source datasetId}
19 | 
20 | \item{destination_datasetid}{destination datasetId}
21 | 
22 | \item{source_projectid}{source table's projectId}
23 | 
24 | \item{destination_projectid}{destination table's projectId}
25 | 
26 | \item{createDisposition}{Create table's behaviour}
27 | 
28 | \item{writeDisposition}{Write to an existing table's behaviour}
29 | 
30 | \item{destination_prefix}{A prefix appended to the destination tableIds}
31 | }
32 | \value{
33 | A named list of jobs of the source datasets, with details of job started.
34 | }
35 | \description{
36 | Uses \link{bqr_copy_table} to copy all the tables in a dataset.
37 | }
38 | \details{
39 | You can not copy across dataset regions (e.g. EU to US), or copy BigQuery Views.
40 | }
41 | \examples{
42 | 
43 | \dontrun{
44 | 
45 |   bqr_copy_dataset("source_dataset", "destination_dataset")
46 | 
47 | }
48 | }
49 | 


--------------------------------------------------------------------------------
/man/bqr_copy_table.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/tables.R
 3 | \name{bqr_copy_table}
 4 | \alias{bqr_copy_table}
 5 | \title{Copy BigQuery table}
 6 | \usage{
 7 | bqr_copy_table(
 8 |   source_tableid,
 9 |   destination_tableid,
10 |   source_projectid = bqr_get_global_project(),
11 |   source_datasetid = bqr_get_global_dataset(),
12 |   destination_projectid = bqr_get_global_project(),
13 |   destination_datasetid = bqr_get_global_dataset(),
14 |   createDisposition = c("CREATE_IF_NEEDED", "CREATE_NEVER"),
15 |   writeDisposition = c("WRITE_TRUNCATE", "WRITE_APPEND", "WRITE_EMPTY")
16 | )
17 | }
18 | \arguments{
19 | \item{source_tableid}{source table's tableId}
20 | 
21 | \item{destination_tableid}{destination table's tableId}
22 | 
23 | \item{source_projectid}{source table's projectId}
24 | 
25 | \item{source_datasetid}{source table's datasetId}
26 | 
27 | \item{destination_projectid}{destination table's projectId}
28 | 
29 | \item{destination_datasetid}{destination table's datasetId}
30 | 
31 | \item{createDisposition}{Create table's behaviour}
32 | 
33 | \item{writeDisposition}{Write to an existing table's behaviour}
34 | }
35 | \value{
36 | A job object
37 | }
38 | \description{
39 | Copy a source table to another destination
40 | }
41 | \seealso{
42 | Other Table meta functions: 
43 | \code{\link{Table}()},
44 | \code{\link{bqr_create_table}()},
45 | \code{\link{bqr_delete_table}()},
46 | \code{\link{bqr_list_tables}()},
47 | \code{\link{bqr_patch_table}()},
48 | \code{\link{bqr_table_data}()},
49 | \code{\link{bqr_table_meta}()}
50 | }
51 | \concept{Table meta functions}
52 | 


--------------------------------------------------------------------------------
/man/bqr_create_table.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/tables.R
 3 | \name{bqr_create_table}
 4 | \alias{bqr_create_table}
 5 | \title{Create a Table}
 6 | \usage{
 7 | bqr_create_table(
 8 |   projectId = bqr_get_global_project(),
 9 |   datasetId = bqr_get_global_dataset(),
10 |   tableId,
11 |   template_data = NULL,
12 |   timePartitioning = FALSE,
13 |   expirationMs = 0L
14 | )
15 | }
16 | \arguments{
17 | \item{projectId}{The BigQuery project ID.}
18 | 
19 | \item{datasetId}{A datasetId within projectId.}
20 | 
21 | \item{tableId}{Name of table you want.}
22 | 
23 | \item{template_data}{A dataframe with the correct types of data. If \code{NULL} an empty table is made.}
24 | 
25 | \item{timePartitioning}{Whether to create a partioned table}
26 | 
27 | \item{expirationMs}{If a partioned table, whether to have an expiration time on the data. The default \code{0} is no expiration.}
28 | }
29 | \value{
30 | TRUE if created, FALSE if not.
31 | }
32 | \description{
33 | Create a Table
34 | }
35 | \details{
36 | Creates a BigQuery table.
37 | 
38 | If setting \code{timePartioning} to \code{TRUE} then the table will be a 
39 |   \href{https://cloud.google.com/bigquery/docs/creating-partitioned-tables}{partioned table}
40 |   
41 | If you want more advanced features for the table, create it then call \link{bqr_patch_table} with advanced configuration configured from \link{Table}
42 | }
43 | \seealso{
44 | Other Table meta functions: 
45 | \code{\link{Table}()},
46 | \code{\link{bqr_copy_table}()},
47 | \code{\link{bqr_delete_table}()},
48 | \code{\link{bqr_list_tables}()},
49 | \code{\link{bqr_patch_table}()},
50 | \code{\link{bqr_table_data}()},
51 | \code{\link{bqr_table_meta}()}
52 | }
53 | \concept{Table meta functions}
54 | 


--------------------------------------------------------------------------------
/man/bqr_delete_table.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/tables.R
 3 | \name{bqr_delete_table}
 4 | \alias{bqr_delete_table}
 5 | \title{Delete a Table}
 6 | \usage{
 7 | bqr_delete_table(
 8 |   projectId = bqr_get_global_project(),
 9 |   datasetId = bqr_get_global_dataset(),
10 |   tableId
11 | )
12 | }
13 | \arguments{
14 | \item{projectId}{The BigQuery project ID.}
15 | 
16 | \item{datasetId}{A datasetId within projectId.}
17 | 
18 | \item{tableId}{Name of table you want to delete.}
19 | }
20 | \value{
21 | TRUE if deleted, FALSE if not.
22 | }
23 | \description{
24 | Delete a Table
25 | }
26 | \details{
27 | Deletes a BigQuery table
28 | }
29 | \seealso{
30 | Other Table meta functions: 
31 | \code{\link{Table}()},
32 | \code{\link{bqr_copy_table}()},
33 | \code{\link{bqr_create_table}()},
34 | \code{\link{bqr_list_tables}()},
35 | \code{\link{bqr_patch_table}()},
36 | \code{\link{bqr_table_data}()},
37 | \code{\link{bqr_table_meta}()}
38 | }
39 | \concept{Table meta functions}
40 | 


--------------------------------------------------------------------------------
/man/bqr_download_extract.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/downloadData.R
 3 | \name{bqr_download_extract}
 4 | \alias{bqr_download_extract}
 5 | \title{Download extract data}
 6 | \usage{
 7 | bqr_download_extract(extractJob, filename = NULL)
 8 | }
 9 | \arguments{
10 | \item{extractJob}{An extract job from \link{bqr_extract_data}}
11 | 
12 | \item{filename}{Where to save the csv file. If NULL then uses objectname.}
13 | }
14 | \value{
15 | TRUE if successfully downloaded
16 | }
17 | \description{
18 | After extracting data via \link{bqr_extract_data} download the 
19 |   extract from the Google Storage bucket.
20 |   
21 | If more than 1GB, will save multiple .csv files with prefix "N_" to filename.
22 | }
23 | \seealso{
24 | Other BigQuery asynch query functions: 
25 | \code{\link{bqr_extract_data}()},
26 | \code{\link{bqr_get_job}()},
27 | \code{\link{bqr_grant_extract_access}()},
28 | \code{\link{bqr_query_asynch}()},
29 | \code{\link{bqr_wait_for_job}()}
30 | }
31 | \concept{BigQuery asynch query functions}
32 | 


--------------------------------------------------------------------------------
/man/bqr_download_query.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fastBqDownload.R
 3 | \name{bqr_download_query}
 4 | \alias{bqr_download_query}
 5 | \title{Download data from BigQuery to local folder}
 6 | \usage{
 7 | bqr_download_query(
 8 |   query = NULL,
 9 |   target_folder = "data",
10 |   result_file_name = NULL,
11 |   refetch = FALSE,
12 |   useLegacySql = FALSE,
13 |   clean_intermediate_results = TRUE,
14 |   global_project_name = bqr_get_global_project(),
15 |   global_dataset_name = bqr_get_global_dataset(),
16 |   global_bucket_name = googleCloudStorageR::gcs_get_global_bucket()
17 | )
18 | }
19 | \arguments{
20 | \item{query}{The query you want to run.}
21 | 
22 | \item{target_folder}{Target folder on your local computer.}
23 | 
24 | \item{result_file_name}{Name of your downloaded file.}
25 | 
26 | \item{refetch}{Boolean, whether you would like to refetch previously downloaded data.}
27 | 
28 | \item{useLegacySql}{Boolean, whether to use Legacy SQL. Default is FALSE.}
29 | 
30 | \item{clean_intermediate_results}{Boolean, whether to keep intermediate files on BigQuery and Google Cloud Storage.}
31 | 
32 | \item{global_project_name}{BigQuery project name (where you would like to save your file during download).}
33 | 
34 | \item{global_dataset_name}{BigQuery dataset name (where you would like to save your file during download).}
35 | 
36 | \item{global_bucket_name}{Google Cloud Storage bucket name (where you would like to save your file during download).}
37 | }
38 | \value{
39 | a data.table.
40 | }
41 | \description{
42 | Requires you to make a bucket at https://console.cloud.google.com/storage/browser
43 | }
44 | \examples{
45 | 
46 | \dontrun{
47 | library(bigQueryR)
48 | 
49 | ## Auth with a project that has at least BigQuery and Google Cloud Storage scope
50 | bqr_auth()
51 | 
52 | # Create a bucket at Google Cloud Storage at 
53 | # https://console.cloud.google.com/storage/browser
54 | 
55 | bqr_download_query(query = "select * from `your_project.your_dataset.your_table`")
56 | 
57 | }
58 | 
59 | }
60 | 


--------------------------------------------------------------------------------
/man/bqr_extract_data.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/downloadData.R
  3 | \name{bqr_extract_data}
  4 | \alias{bqr_extract_data}
  5 | \title{Extract data asynchronously}
  6 | \usage{
  7 | bqr_extract_data(
  8 |   projectId = bqr_get_global_project(),
  9 |   datasetId = bqr_get_global_dataset(),
 10 |   tableId,
 11 |   cloudStorageBucket,
 12 |   filename = paste0("big-query-extract-", gsub(" |:|-", "", Sys.time()), "-*.csv"),
 13 |   compression = c("NONE", "GZIP"),
 14 |   destinationFormat = c("CSV", "NEWLINE_DELIMITED_JSON", "AVRO"),
 15 |   fieldDelimiter = ",",
 16 |   printHeader = TRUE
 17 | )
 18 | }
 19 | \arguments{
 20 | \item{projectId}{The BigQuery project ID.}
 21 | 
 22 | \item{datasetId}{A datasetId within projectId.}
 23 | 
 24 | \item{tableId}{ID of table you wish to extract.}
 25 | 
 26 | \item{cloudStorageBucket}{URI of the bucket to extract into.}
 27 | 
 28 | \item{filename}{Include a wildcard (*) if extract expected to be > 1GB.}
 29 | 
 30 | \item{compression}{Compression of file.}
 31 | 
 32 | \item{destinationFormat}{Format of file.}
 33 | 
 34 | \item{fieldDelimiter}{fieldDelimiter of file.}
 35 | 
 36 | \item{printHeader}{Whether to include header row.}
 37 | }
 38 | \value{
 39 | A Job object to be queried via \link{bqr_get_job}
 40 | }
 41 | \description{
 42 | Use this instead of \link{bqr_query} for big datasets. 
 43 | Requires you to make a bucket at https://console.cloud.google.com/storage/browser
 44 | }
 45 | \examples{
 46 | 
 47 | \dontrun{
 48 | library(bigQueryR)
 49 | 
 50 | ## Auth with a project that has at least BigQuery and Google Cloud Storage scope
 51 | bqr_auth()
 52 | 
 53 | ## make a big query
 54 | job <- bqr_query_asynch("your_project", 
 55 |                         "your_dataset",
 56 |                         "SELECT * FROM blah LIMIT 9999999", 
 57 |                         destinationTableId = "bigResultTable")
 58 |                         
 59 | ## poll the job to check its status
 60 | ## its done when job$status$state == "DONE"
 61 | bqr_get_job(job$jobReference$jobId, "your_project")
 62 | 
 63 | ##once done, the query results are in "bigResultTable"
 64 | ## extract that table to GoogleCloudStorage:
 65 | # Create a bucket at Google Cloud Storage at 
 66 | # https://console.cloud.google.com/storage/browser
 67 | 
 68 | job_extract <- bqr_extract_data("your_project",
 69 |                                 "your_dataset",
 70 |                                 "bigResultTable",
 71 |                                 "your_cloud_storage_bucket_name")
 72 |                                 
 73 | ## poll the extract job to check its status
 74 | ## its done when job$status$state == "DONE"
 75 | bqr_get_job(job_extract$jobReference$jobId, "your_project")
 76 | 
 77 | You should also see the extract in the Google Cloud Storage bucket
 78 | googleCloudStorageR::gcs_list_objects("your_cloud_storage_bucket_name")
 79 | 
 80 | ## to download via a URL and not logging in via Google Cloud Storage interface:
 81 | ## Use an email that is Google account enabled
 82 | ## Requires scopes:
 83 | ##  https://www.googleapis.com/auth/devstorage.full_control
 84 | ##  https://www.googleapis.com/auth/cloud-platform
 85 | 
 86 | download_url <- bqr_grant_extract_access(job_extract, "your@email.com")
 87 | 
 88 | ## download_url may be multiple if the data is > 1GB
 89 | 
 90 | }
 91 | 
 92 | }
 93 | \seealso{
 94 | \url{https://cloud.google.com/bigquery/exporting-data-from-bigquery}
 95 | 
 96 | Other BigQuery asynch query functions: 
 97 | \code{\link{bqr_download_extract}()},
 98 | \code{\link{bqr_get_job}()},
 99 | \code{\link{bqr_grant_extract_access}()},
100 | \code{\link{bqr_query_asynch}()},
101 | \code{\link{bqr_wait_for_job}()}
102 | }
103 | \concept{BigQuery asynch query functions}
104 | 


--------------------------------------------------------------------------------
/man/bqr_get_global_dataset.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/globals.R
 3 | \name{bqr_get_global_dataset}
 4 | \alias{bqr_get_global_dataset}
 5 | \alias{bq_get_global_dataset}
 6 | \title{Get global dataset name}
 7 | \usage{
 8 | bqr_get_global_dataset()
 9 | 
10 | bq_get_global_dataset()
11 | }
12 | \value{
13 | dataset name
14 | }
15 | \description{
16 | dataset name set this session to use by default
17 | }
18 | \details{
19 | Set the dataset name via \link{bq_global_dataset}
20 | }
21 | \seealso{
22 | Other dataset functions: 
23 | \code{\link{bqr_global_dataset}()}
24 | }
25 | \concept{dataset functions}
26 | 


--------------------------------------------------------------------------------
/man/bqr_get_global_project.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/globals.R
 3 | \name{bqr_get_global_project}
 4 | \alias{bqr_get_global_project}
 5 | \alias{bq_get_global_project}
 6 | \title{Get global project name}
 7 | \usage{
 8 | bqr_get_global_project()
 9 | 
10 | bq_get_global_project()
11 | }
12 | \value{
13 | project name
14 | }
15 | \description{
16 | project name set this session to use by default
17 | }
18 | \details{
19 | Set the project name via \link{bq_global_project}
20 | }
21 | \seealso{
22 | Other project functions: 
23 | \code{\link{bqr_global_project}()}
24 | }
25 | \concept{project functions}
26 | 


--------------------------------------------------------------------------------
/man/bqr_get_job.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/jobs.R
 3 | \name{bqr_get_job}
 4 | \alias{bqr_get_job}
 5 | \title{Poll a jobId}
 6 | \usage{
 7 | bqr_get_job(
 8 |   jobId = .Last.value,
 9 |   projectId = bqr_get_global_project(),
10 |   location = NULL
11 | )
12 | }
13 | \arguments{
14 | \item{jobId}{jobId to poll, or a job Object}
15 | 
16 | \item{projectId}{projectId of job}
17 | 
18 | \item{location}{location where job is run. Required for single-region locations when jobId is not a job Object.}
19 | }
20 | \value{
21 | A Jobs resource
22 | }
23 | \description{
24 | Poll a jobId
25 | }
26 | \examples{
27 | 
28 | \dontrun{
29 | library(bigQueryR)
30 | 
31 | ## Auth with a project that has at least BigQuery and Google Cloud Storage scope
32 | bqr_auth()
33 | 
34 | ## make a big query
35 | job <- bqr_query_asynch("your_project", 
36 |                         "your_dataset",
37 |                         "SELECT * FROM blah LIMIT 9999999", 
38 |                         destinationTableId = "bigResultTable")
39 |                         
40 | ## poll the job to check its status
41 | ## its done when job$status$state == "DONE"
42 | bqr_get_job(job$jobReference$jobId, "your_project")
43 | 
44 | ##once done, the query results are in "bigResultTable"
45 | ## extract that table to GoogleCloudStorage:
46 | # Create a bucket at Google Cloud Storage at 
47 | # https://console.cloud.google.com/storage/browser
48 | 
49 | job_extract <- bqr_extract_data("your_project",
50 |                                 "your_dataset",
51 |                                 "bigResultTable",
52 |                                 "your_cloud_storage_bucket_name")
53 |                                 
54 | ## poll the extract job to check its status
55 | ## its done when job$status$state == "DONE"
56 | bqr_get_job(job_extract$jobReference$jobId, "your_project")
57 | 
58 | ## to download via a URL and not logging in via Google Cloud Storage interface:
59 | ## Use an email that is Google account enabled
60 | ## Requires scopes:
61 | ##  https://www.googleapis.com/auth/devstorage.full_control
62 | ##  https://www.googleapis.com/auth/cloud-platform
63 | ## set via options("bigQueryR.scopes") and reauthenticate if needed
64 | 
65 | download_url <- bqr_grant_extract_access(job_extract, "your@email.com")
66 | 
67 | ## download_url may be multiple if the data is > 1GB
68 | 
69 | }
70 | 
71 | 
72 | 
73 | 
74 | }
75 | \seealso{
76 | Other BigQuery asynch query functions: 
77 | \code{\link{bqr_download_extract}()},
78 | \code{\link{bqr_extract_data}()},
79 | \code{\link{bqr_grant_extract_access}()},
80 | \code{\link{bqr_query_asynch}()},
81 | \code{\link{bqr_wait_for_job}()}
82 | }
83 | \concept{BigQuery asynch query functions}
84 | 


--------------------------------------------------------------------------------
/man/bqr_global_dataset.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/globals.R
 3 | \name{bqr_global_dataset}
 4 | \alias{bqr_global_dataset}
 5 | \alias{bq_global_dataset}
 6 | \title{Set global dataset name}
 7 | \usage{
 8 | bqr_global_dataset(dataset)
 9 | 
10 | bq_global_dataset(dataset)
11 | }
12 | \arguments{
13 | \item{dataset}{dataset name you want this session to use by default, or a dataset object}
14 | }
15 | \value{
16 | The dataset name (invisibly)
17 | }
18 | \description{
19 | Set a dataset name used for this R session
20 | }
21 | \details{
22 | This sets a dataset to a global environment value so you don't need to
23 | supply the dataset argument to other API calls.
24 | }
25 | \seealso{
26 | Other dataset functions: 
27 | \code{\link{bqr_get_global_dataset}()}
28 | }
29 | \concept{dataset functions}
30 | 


--------------------------------------------------------------------------------
/man/bqr_global_project.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/globals.R
 3 | \name{bqr_global_project}
 4 | \alias{bqr_global_project}
 5 | \alias{bq_global_project}
 6 | \title{Set global project name}
 7 | \usage{
 8 | bqr_global_project(project)
 9 | 
10 | bq_global_project(project)
11 | }
12 | \arguments{
13 | \item{project}{project name you want this session to use by default, or a project object}
14 | }
15 | \value{
16 | The project name (invisibly)
17 | }
18 | \description{
19 | Set a project name used for this R session
20 | }
21 | \details{
22 | This sets a project to a global environment value so you don't need to
23 | supply the project argument to other API calls.
24 | }
25 | \seealso{
26 | Other project functions: 
27 | \code{\link{bqr_get_global_project}()}
28 | }
29 | \concept{project functions}
30 | 


--------------------------------------------------------------------------------
/man/bqr_grant_extract_access.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/downloadData.R
 3 | \name{bqr_grant_extract_access}
 4 | \alias{bqr_grant_extract_access}
 5 | \title{Grant access to an extract on Google Cloud Storage}
 6 | \usage{
 7 | bqr_grant_extract_access(extractJob, email)
 8 | }
 9 | \arguments{
10 | \item{extractJob}{An extract job from \link{bqr_extract_data}}
11 | 
12 | \item{email}{email of the user to have access}
13 | }
14 | \value{
15 | URL(s) to download the extract that is accessible by email
16 | }
17 | \description{
18 | To access the data created in \link{bqr_extract_data}.
19 | Requires the Google account email of the user.
20 | }
21 | \details{
22 | Uses \href{https://cloud.google.com/storage/docs/authentication#cookieauth}{cookie based auth}.
23 | }
24 | \examples{
25 | 
26 | \dontrun{
27 | library(bigQueryR)
28 | 
29 | ## Auth with a project that has at least BigQuery and Google Cloud Storage scope
30 | bqr_auth()
31 | 
32 | ## make a big query
33 | job <- bqr_query_asynch("your_project", 
34 |                         "your_dataset",
35 |                         "SELECT * FROM blah LIMIT 9999999", 
36 |                         destinationTableId = "bigResultTable")
37 |                         
38 | ## poll the job to check its status
39 | ## its done when job$status$state == "DONE"
40 | bqr_get_job(job$jobReference$jobId, "your_project")
41 | 
42 | ##once done, the query results are in "bigResultTable"
43 | ## extract that table to GoogleCloudStorage:
44 | # Create a bucket at Google Cloud Storage at 
45 | # https://console.cloud.google.com/storage/browser
46 | 
47 | job_extract <- bqr_extract_data("your_project",
48 |                                 "your_dataset",
49 |                                 "bigResultTable",
50 |                                 "your_cloud_storage_bucket_name")
51 |                                 
52 | ## poll the extract job to check its status
53 | ## its done when job$status$state == "DONE"
54 | bqr_get_job(job_extract$jobReference$jobId, "your_project")
55 | 
56 | ## to download via a URL and not logging in via Google Cloud Storage interface:
57 | ## Use an email that is Google account enabled
58 | ## Requires scopes:
59 | ##  https://www.googleapis.com/auth/devstorage.full_control
60 | ##  https://www.googleapis.com/auth/cloud-platform
61 | ## set via options("bigQueryR.scopes") and reauthenticate if needed
62 | 
63 | download_url <- bqr_grant_extract_access(job_extract, "your@email.com")
64 | 
65 | ## download_url may be multiple if the data is > 1GB
66 | 
67 | }
68 | 
69 | }
70 | \seealso{
71 | Other BigQuery asynch query functions: 
72 | \code{\link{bqr_download_extract}()},
73 | \code{\link{bqr_extract_data}()},
74 | \code{\link{bqr_get_job}()},
75 | \code{\link{bqr_query_asynch}()},
76 | \code{\link{bqr_wait_for_job}()}
77 | }
78 | \concept{BigQuery asynch query functions}
79 | 


--------------------------------------------------------------------------------
/man/bqr_list_datasets.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/datasets.R
 3 | \name{bqr_list_datasets}
 4 | \alias{bqr_list_datasets}
 5 | \title{List BigQuery datasets}
 6 | \usage{
 7 | bqr_list_datasets(projectId = bqr_get_global_project())
 8 | }
 9 | \arguments{
10 | \item{projectId}{The BigQuery project ID}
11 | }
12 | \description{
13 | Each projectId can have multiple datasets.
14 | }
15 | \examples{
16 | 
17 | \dontrun{
18 |   library(bigQueryR)
19 |   
20 |   ## this will open your browser
21 |   ## Authenticate with an email that has access to the BigQuery project you need
22 |   bqr_auth()
23 |   
24 |   ## verify under a new user
25 |   bqr_auth(new_user=TRUE)
26 |   
27 |   ## get projects
28 |   projects <- bqr_list_projects()
29 |   
30 |   my_project <- projects[1]
31 |   
32 |   ## for first project, get datasets
33 |   datasets <- bqr_list_datasets[my_project]
34 |   
35 | }
36 | 
37 | }
38 | \seealso{
39 | Other bigQuery meta functions: 
40 | \code{\link{bqr_list_projects}()}
41 | }
42 | \concept{bigQuery meta functions}
43 | 


--------------------------------------------------------------------------------
/man/bqr_list_jobs.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/jobs.R
 3 | \name{bqr_list_jobs}
 4 | \alias{bqr_list_jobs}
 5 | \title{List BigQuery jobs}
 6 | \usage{
 7 | bqr_list_jobs(
 8 |   projectId = bqr_get_global_project(),
 9 |   allUsers = FALSE,
10 |   projection = c("full", "minimal"),
11 |   stateFilter = c("done", "pending", "running")
12 | )
13 | }
14 | \arguments{
15 | \item{projectId}{projectId of job}
16 | 
17 | \item{allUsers}{Whether to display jobs owned by all users in the project.}
18 | 
19 | \item{projection}{"full" - all job data, "minimal" excludes job configuration.}
20 | 
21 | \item{stateFilter}{Filter for job status.}
22 | }
23 | \value{
24 | A list of jobs resources
25 | }
26 | \description{
27 | List the BigQuery jobs for the projectId
28 | }
29 | \details{
30 | Lists all jobs that you started in the specified project. 
31 | Job information is available for a six month period after creation. 
32 | The job list is sorted in reverse chronological order, by job creation time. 
33 | Requires the Can View project role, or the 
34 |   Is Owner project role if you set the allUsers property.
35 | }
36 | 


--------------------------------------------------------------------------------
/man/bqr_list_projects.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/listBigQuery.R
 3 | \name{bqr_list_projects}
 4 | \alias{bqr_list_projects}
 5 | \title{List Google Dev Console projects you have access to}
 6 | \usage{
 7 | bqr_list_projects()
 8 | }
 9 | \value{
10 | A dataframe of the projects you have access to under the authentication
11 | }
12 | \description{
13 | Example: bqr_list_projects()
14 | }
15 | \examples{
16 | 
17 | \dontrun{
18 |   library(bigQueryR)
19 |   
20 |   ## this will open your browser
21 |   ## Authenticate with an email that has access to the BigQuery project you need
22 |   bqr_auth()
23 |   
24 |   ## verify under a new user
25 |   bqr_auth(new_user=TRUE)
26 |   
27 |   ## get projects
28 |   projects <- bqr_list_projects()
29 |   
30 | }
31 | 
32 | }
33 | \seealso{
34 | Other bigQuery meta functions: 
35 | \code{\link{bqr_list_datasets}()}
36 | }
37 | \concept{bigQuery meta functions}
38 | 


--------------------------------------------------------------------------------
/man/bqr_list_tables.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/tables.R
 3 | \name{bqr_list_tables}
 4 | \alias{bqr_list_tables}
 5 | \title{List BigQuery tables in a dataset}
 6 | \usage{
 7 | bqr_list_tables(
 8 |   projectId = bqr_get_global_project(),
 9 |   datasetId = bqr_get_global_dataset(),
10 |   maxResults = -1
11 | )
12 | }
13 | \arguments{
14 | \item{projectId}{The BigQuery project ID}
15 | 
16 | \item{datasetId}{A datasetId within projectId}
17 | 
18 | \item{maxResults}{Number of results to return, default \code{-1} returns all results}
19 | }
20 | \value{
21 | dataframe of tables in dataset
22 | }
23 | \description{
24 | List BigQuery tables in a dataset
25 | }
26 | \examples{
27 | 
28 | \dontrun{
29 |  bqr_list_tables("publicdata", "samples")
30 | }
31 | 
32 | }
33 | \seealso{
34 | Other Table meta functions: 
35 | \code{\link{Table}()},
36 | \code{\link{bqr_copy_table}()},
37 | \code{\link{bqr_create_table}()},
38 | \code{\link{bqr_delete_table}()},
39 | \code{\link{bqr_patch_table}()},
40 | \code{\link{bqr_table_data}()},
41 | \code{\link{bqr_table_meta}()}
42 | }
43 | \concept{Table meta functions}
44 | 


--------------------------------------------------------------------------------
/man/bqr_partition.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/partition.R
 3 | \name{bqr_partition}
 4 | \alias{bqr_partition}
 5 | \title{Convert date-sharded tables to a single partitioned table}
 6 | \usage{
 7 | bqr_partition(
 8 |   sharded,
 9 |   partition,
10 |   projectId = bqr_get_global_project(),
11 |   datasetId = bqr_get_global_dataset()
12 | )
13 | }
14 | \arguments{
15 | \item{sharded}{The prefix of date-sharded tables to merge into one partitioned table}
16 | 
17 | \item{partition}{Name of partitioned table. Will create if not present already}
18 | 
19 | \item{projectId}{The project ID}
20 | 
21 | \item{datasetId}{The dataset ID}
22 | }
23 | \value{
24 | A list of copy jobs for the sharded tables that will be copied to one partitioned table
25 | }
26 | \description{
27 | Moves the old style date-sharded tables such as \code{[TABLE_NAME]_YYYYMMDD} to the new date partitioned format.
28 | }
29 | \details{
30 | Performs lots of copy table operations via \link{bqr_copy_table} 
31 | 
32 | Before partitioned tables became available, BigQuery users would often divide 
33 |   large datasets into separate tables organized by time period; usually daily tables, 
34 |   where each table represented data loaded on that particular date.
35 |   
36 | Dividing a dataset into daily tables helped to reduce the amount of data scanned 
37 |   when querying a specific date range. For example, if you have a a year's worth of data 
38 |   in a single table, a query that involves the last seven days of data still requires 
39 |   a full scan of the entire table to determine which data to return. 
40 |   However, if your table is divided into daily tables, you can restrict the query to 
41 |   the seven most recent daily tables.
42 |   
43 | Daily tables, however, have several disadvantages. You must manually, or programmatically, 
44 |   create the daily tables. SQL queries are often more complex because your data can be 
45 |   spread across hundreds of tables. Performance degrades as the number of referenced 
46 |   tables increases. There is also a limit of 1,000 tables that can be referenced in a 
47 |   single query. Partitioned tables have none of these disadvantages.
48 | }
49 | \examples{
50 | 
51 | \dontrun{
52 |  
53 |  bqr_partition("ga_sessions_", "ga_partition")
54 | 
55 | }
56 | 
57 | }
58 | \seealso{
59 | \href{https://cloud.google.com/bigquery/docs/creating-partitioned-tables}{Partitioned Tables Help}
60 | }
61 | 


--------------------------------------------------------------------------------
/man/bqr_patch_table.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/tables.R
 3 | \name{bqr_patch_table}
 4 | \alias{bqr_patch_table}
 5 | \title{Update a Table}
 6 | \usage{
 7 | bqr_patch_table(Table)
 8 | }
 9 | \arguments{
10 | \item{Table}{A Table object as created by \link{Table}}
11 | }
12 | \description{
13 | This uses PATCH semantics to alter an existing table.  
14 |  You need to create the Table object first to pass in using \link{Table} 
15 | which will be transformed to JSON
16 | }
17 | \seealso{
18 | \href{https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#resource}{Definition of tables}
19 | 
20 | Other Table meta functions: 
21 | \code{\link{Table}()},
22 | \code{\link{bqr_copy_table}()},
23 | \code{\link{bqr_create_table}()},
24 | \code{\link{bqr_delete_table}()},
25 | \code{\link{bqr_list_tables}()},
26 | \code{\link{bqr_table_data}()},
27 | \code{\link{bqr_table_meta}()}
28 | }
29 | \concept{Table meta functions}
30 | 


--------------------------------------------------------------------------------
/man/bqr_query.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/query.R
 3 | \name{bqr_query}
 4 | \alias{bqr_query}
 5 | \title{Query a BigQuery Table}
 6 | \usage{
 7 | bqr_query(
 8 |   projectId = bqr_get_global_project(),
 9 |   datasetId = bqr_get_global_dataset(),
10 |   query,
11 |   maxResults = 1000,
12 |   useLegacySql = TRUE,
13 |   useQueryCache = TRUE,
14 |   dryRun = FALSE,
15 |   timeoutMs = 600 * 1000
16 | )
17 | }
18 | \arguments{
19 | \item{projectId}{The BigQuery project ID}
20 | 
21 | \item{datasetId}{A datasetId within projectId}
22 | 
23 | \item{query}{BigQuery SQL.  You can also supply a file location of your query ending with \code{.sql}}
24 | 
25 | \item{maxResults}{Max number per page of results. Set total rows with LIMIT in your query.}
26 | 
27 | \item{useLegacySql}{Whether the query you pass is legacy SQL or not. Default TRUE}
28 | 
29 | \item{useQueryCache}{Whether to use the query cache. Default TRUE, set to FALSE for realtime queries.}
30 | }
31 | \value{
32 | a data.frame. 
33 |   If there is an SQL error, a data.frame with 
34 |   additional class "bigQueryR_query_error" and the 
35 |   problem in the data.frame$message
36 | }
37 | \description{
38 | MaxResults is how many results to return per page of results, which can be less than the 
39 | total results you have set in your  query using LIMIT.  Google recommends for bigger datasets
40 | to set maxResults = 1000, but this will use more API calls.
41 | }
42 | \examples{
43 | 
44 | \dontrun{
45 | 
46 | bqr_query("big-query-r","samples",
47 |           "SELECT COUNT(repository.url) FROM [publicdata:samples.github_nested]")
48 | 
49 | }
50 | 
51 | }
52 | \seealso{
53 | \href{https://cloud.google.com/bigquery/sql-reference/}{BigQuery SQL reference}
54 | }
55 | \concept{BigQuery query functions}
56 | 


--------------------------------------------------------------------------------
/man/bqr_query_asynch.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/query.R
  3 | \name{bqr_query_asynch}
  4 | \alias{bqr_query_asynch}
  5 | \title{BigQuery query asynchronously}
  6 | \usage{
  7 | bqr_query_asynch(
  8 |   projectId = bqr_get_global_project(),
  9 |   datasetId = bqr_get_global_dataset(),
 10 |   query,
 11 |   destinationTableId,
 12 |   useLegacySql = TRUE,
 13 |   writeDisposition = c("WRITE_EMPTY", "WRITE_TRUNCATE", "WRITE_APPEND")
 14 | )
 15 | }
 16 | \arguments{
 17 | \item{projectId}{projectId to be billed.}
 18 | 
 19 | \item{datasetId}{datasetId of where query will execute.}
 20 | 
 21 | \item{query}{The BigQuery query as a string.}
 22 | 
 23 | \item{destinationTableId}{Id of table the results will be written to.}
 24 | 
 25 | \item{useLegacySql}{Whether the query you pass is legacy SQL or not. Default TRUE}
 26 | 
 27 | \item{writeDisposition}{Behaviour if destination table exists. See Details.}
 28 | }
 29 | \value{
 30 | A Job object to be queried via \link{bqr_get_job}
 31 | }
 32 | \description{
 33 | Use for big results > 10000 that write to their own destinationTableId.
 34 | }
 35 | \details{
 36 | For bigger queries, asynchronous queries save the results to another BigQuery table.  
 37 | You can check the progress of the job via \link{bqr_get_job}
 38 | 
 39 | You may now want to download this data.  
 40 | For large datasets, this is best done via extracting the BigQuery result to Google Cloud Storage, 
 41 | then downloading the data from there. 
 42 | 
 43 | You can read how to create a bucket at Google Cloud Storage 
 44 | at \url{https://cloud.google.com/storage/docs/cloud-console}
 45 | 
 46 | writeDisposition - behaviour if destinationTable already exists: 
 47 | \itemize{
 48 |   \item WRITE_TRUNCATE: BigQuery overwrites the table data.
 49 |   \item WRITE_APPEND: BigQuery appends the data to the table
 50 |   \item WRITE_EMPTY: If contains data, a 'duplicate' error is returned
 51 |  }
 52 | }
 53 | \examples{
 54 | 
 55 | \dontrun{
 56 | library(bigQueryR)
 57 | 
 58 | ## Auth with a project that has at least BigQuery and Google Cloud Storage scope
 59 | bqr_auth()
 60 | 
 61 | ## make a big query
 62 | job <- bqr_query_asynch("your_project", 
 63 |                         "your_dataset",
 64 |                         "SELECT * FROM blah LIMIT 9999999", 
 65 |                         destinationTableId = "bigResultTable")
 66 |                         
 67 | ## poll the job to check its status
 68 | ## its done when job$status$state == "DONE"
 69 | bqr_get_job(job$jobReference$jobId, "your_project")
 70 | 
 71 | ##once done, the query results are in "bigResultTable"
 72 | ## extract that table to GoogleCloudStorage:
 73 | # Create a bucket at Google Cloud Storage at 
 74 | # https://console.cloud.google.com/storage/browser
 75 | 
 76 | job_extract <- bqr_extract_data("your_project",
 77 |                                 "your_dataset",
 78 |                                 "bigResultTable",
 79 |                                 "your_cloud_storage_bucket_name")
 80 |                                 
 81 | ## poll the extract job to check its status
 82 | ## its done when job$status$state == "DONE"
 83 | bqr_get_job(job_extract$jobReference$jobId, "your_project")
 84 | 
 85 | ## to download via a URL and not logging in via Google Cloud Storage interface:
 86 | ## Use an email that is Google account enabled
 87 | ## Requires scopes:
 88 | ##  https://www.googleapis.com/auth/devstorage.full_control
 89 | ##  https://www.googleapis.com/auth/cloud-platform
 90 | ## set via options("bigQueryR.scopes") and reauthenticate if needed
 91 | 
 92 | download_url <- bqr_grant_extract_access(job_extract, "your@email.com")
 93 | 
 94 | ## download_url may be multiple if the data is > 1GB
 95 | 
 96 | }
 97 | 
 98 | 
 99 | }
100 | \seealso{
101 | Other BigQuery asynch query functions: 
102 | \code{\link{bqr_download_extract}()},
103 | \code{\link{bqr_extract_data}()},
104 | \code{\link{bqr_get_job}()},
105 | \code{\link{bqr_grant_extract_access}()},
106 | \code{\link{bqr_wait_for_job}()}
107 | }
108 | \concept{BigQuery asynch query functions}
109 | 


--------------------------------------------------------------------------------
/man/bqr_table_data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/tables.R
 3 | \name{bqr_table_data}
 4 | \alias{bqr_table_data}
 5 | \title{Get BigQuery Table's data list}
 6 | \usage{
 7 | bqr_table_data(
 8 |   projectId = bqr_get_global_project(),
 9 |   datasetId = bqr_get_global_dataset(),
10 |   tableId,
11 |   maxResults = 1000
12 | )
13 | }
14 | \arguments{
15 | \item{projectId}{The BigQuery project ID}
16 | 
17 | \item{datasetId}{A datasetId within projectId}
18 | 
19 | \item{tableId}{The tableId within the datasetId}
20 | 
21 | \item{maxResults}{Number of results to return}
22 | }
23 | \value{
24 | data.frame of table data
25 | 
26 | This won't work with nested datasets, for that use \link{bqr_query} as that flattens results.
27 | }
28 | \description{
29 | Get BigQuery Table's data list
30 | }
31 | \seealso{
32 | Other Table meta functions: 
33 | \code{\link{Table}()},
34 | \code{\link{bqr_copy_table}()},
35 | \code{\link{bqr_create_table}()},
36 | \code{\link{bqr_delete_table}()},
37 | \code{\link{bqr_list_tables}()},
38 | \code{\link{bqr_patch_table}()},
39 | \code{\link{bqr_table_meta}()}
40 | }
41 | \concept{Table meta functions}
42 | 


--------------------------------------------------------------------------------
/man/bqr_table_meta.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/tables.R
 3 | \name{bqr_table_meta}
 4 | \alias{bqr_table_meta}
 5 | \title{Get BigQuery Table meta data}
 6 | \usage{
 7 | bqr_table_meta(
 8 |   projectId = bqr_get_global_project(),
 9 |   datasetId = bqr_get_global_dataset(),
10 |   tableId
11 | )
12 | }
13 | \arguments{
14 | \item{projectId}{The BigQuery project ID}
15 | 
16 | \item{datasetId}{A datasetId within projectId}
17 | 
18 | \item{tableId}{The tableId within the datasetId}
19 | }
20 | \value{
21 | list of table metadata
22 | }
23 | \description{
24 | Get BigQuery Table meta data
25 | }
26 | \examples{
27 | 
28 | \dontrun{
29 |   bqr_table_meta("publicdata", "samples", "github_nested")
30 | }
31 | 
32 | 
33 | }
34 | \seealso{
35 | Other Table meta functions: 
36 | \code{\link{Table}()},
37 | \code{\link{bqr_copy_table}()},
38 | \code{\link{bqr_create_table}()},
39 | \code{\link{bqr_delete_table}()},
40 | \code{\link{bqr_list_tables}()},
41 | \code{\link{bqr_patch_table}()},
42 | \code{\link{bqr_table_data}()}
43 | }
44 | \concept{Table meta functions}
45 | 


--------------------------------------------------------------------------------
/man/bqr_upload_data.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/uploadData.R
  3 | \name{bqr_upload_data}
  4 | \alias{bqr_upload_data}
  5 | \title{Upload data to BigQuery}
  6 | \usage{
  7 | bqr_upload_data(
  8 |   projectId = bqr_get_global_project(),
  9 |   datasetId = bqr_get_global_dataset(),
 10 |   tableId,
 11 |   upload_data,
 12 |   create = c("CREATE_IF_NEEDED", "CREATE_NEVER"),
 13 |   writeDisposition = c("WRITE_TRUNCATE", "WRITE_APPEND", "WRITE_EMPTY"),
 14 |   schema = NULL,
 15 |   sourceFormat = c("CSV", "DATASTORE_BACKUP", "NEWLINE_DELIMITED_JSON", "AVRO"),
 16 |   wait = TRUE,
 17 |   autodetect = FALSE,
 18 |   nullMarker = NULL,
 19 |   maxBadRecords = NULL,
 20 |   allowJaggedRows = FALSE,
 21 |   allowQuotedNewlines = FALSE,
 22 |   fieldDelimiter = NULL
 23 | )
 24 | }
 25 | \arguments{
 26 | \item{projectId}{The BigQuery project ID.}
 27 | 
 28 | \item{datasetId}{A datasetId within projectId.}
 29 | 
 30 | \item{tableId}{ID of table where data will end up.}
 31 | 
 32 | \item{upload_data}{The data to upload, a data.frame object or a Google Cloud Storage URI}
 33 | 
 34 | \item{create}{Whether to create a new table if necessary, or error if it already exists.}
 35 | 
 36 | \item{writeDisposition}{How to add the data to a table.}
 37 | 
 38 | \item{schema}{If \code{upload_data} is a Google Cloud Storage URI, supply the data schema.  For \code{CSV} a helper function is available by using \link{schema_fields} on a data sample}
 39 | 
 40 | \item{sourceFormat}{If \code{upload_data} is a Google Cloud Storage URI, supply the data format.  Default is \code{CSV}}
 41 | 
 42 | \item{wait}{If uploading a data.frame, whether to wait for it to upload before returning}
 43 | 
 44 | \item{autodetect}{Experimental feature that auto-detects schema for CSV and JSON files}
 45 | 
 46 | \item{nullMarker}{Specifies a string that represents a null value in a CSV file. 
 47 | For example, if you specify \code{\\N}, BigQuery interprets \code{\\N} as a null value when loading a CSV file. The default value is the empty string.}
 48 | 
 49 | \item{maxBadRecords}{The maximum number of bad records that BigQuery can ignore when running the job}
 50 | 
 51 | \item{allowJaggedRows}{Whether to allow rows with variable length columns}
 52 | 
 53 | \item{allowQuotedNewlines}{Whether to allow datasets with quoted new lines}
 54 | 
 55 | \item{fieldDelimiter}{The separator for fields in a CSV file.  Default is comma - \code{,}}
 56 | }
 57 | \value{
 58 | TRUE if successful, FALSE if not.
 59 | }
 60 | \description{
 61 | Upload data to BigQuery
 62 | }
 63 | \details{
 64 | A temporary csv file is created when uploading from a local data.frame
 65 | 
 66 | For larger file sizes up to 5TB, upload to Google Cloud Storage first via \link[googleCloudStorageR]{gcs_upload} then supply the object URI of the form \code{gs://project-name/object-name} to the \code{upload_data} argument.  
 67 |   
 68 | You also need to supply a data schema.  Remember that the file should not have a header row.
 69 | }
 70 | \examples{
 71 | 
 72 | \dontrun{
 73 | 
 74 |  library(googleCloudStorageR)
 75 |  library(bigQueryR)
 76 |  
 77 |  gcs_global_bucket("your-project")
 78 |  
 79 |  ## custom upload function to ignore quotes and column headers
 80 |  f <- function(input, output) {
 81 |    write.table(input, sep = ",", col.names = FALSE, row.names = FALSE, 
 82 |                quote = FALSE, file = output, qmethod = "double")}
 83 |    
 84 |  ## upload files to Google Cloud Storage
 85 |  gcs_upload(mtcars, name = "mtcars_test1.csv", object_function = f)
 86 |  gcs_upload(mtcars, name = "mtcars_test2.csv", object_function = f)
 87 |  
 88 |  ## create the schema of the files you just uploaded
 89 |  user_schema <- schema_fields(mtcars)
 90 |  
 91 |  ## load files from Google Cloud Storage into BigQuery
 92 |  bqr_upload_data(projectId = "your-project", 
 93 |                 datasetId = "test", 
 94 |                 tableId = "from_gcs_mtcars", 
 95 |                 upload_data = c("gs://your-project/mtcars_test1.csv", 
 96 |                                 "gs://your-project/mtcars_test2.csv"),
 97 |                 schema = user_schema)
 98 |  
 99 |  ## for big files, its helpful to create your schema on a small sample
100 |  ## a quick way to do this on the command line is:
101 |  # "head bigfile.csv > head_bigfile.csv"
102 | 
103 | ## upload nested lists as JSON
104 | the_list <- list(list(col1 = "yes", col2 = "no", 
105 |                       col3 = list(nest1 = 1, nest2 = 3), col4 = "oh"),
106 |                  list(col1 = "yes2", 
107 |                       col2 = "n2o", col3 = list(nest1 = 5, nest2 = 7), 
108 |                       col4 = "oh2"), 
109 |                  list(col1 = "yes3", col2 = "no3", 
110 |                       col3 = list(nest1 = 7, nest2 = 55), col4 = "oh3"))
111 |    
112 | bqr_upload_data(datasetId = "test", 
113 |                 tableId = "nested_list_json", 
114 |                 upload_data = the_list, 
115 |                 autodetect = TRUE)
116 | 
117 | }
118 | 
119 | }
120 | \seealso{
121 | url{https://cloud.google.com/bigquery/loading-data-post-request}
122 | }
123 | \concept{bigQuery upload functions}
124 | 


--------------------------------------------------------------------------------
/man/bqr_wait_for_job.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/jobs.R
 3 | \name{bqr_wait_for_job}
 4 | \alias{bqr_wait_for_job}
 5 | \title{Wait for a bigQuery job}
 6 | \usage{
 7 | bqr_wait_for_job(job, wait = 5)
 8 | }
 9 | \arguments{
10 | \item{job}{A job object}
11 | 
12 | \item{wait}{The number of seconds to wait between checks
13 | 
14 | Use this function to do a loop to check progress of a job running}
15 | }
16 | \value{
17 | After a while, a completed job
18 | }
19 | \description{
20 | Wait for a bigQuery job to finish.
21 | }
22 | \seealso{
23 | Other BigQuery asynch query functions: 
24 | \code{\link{bqr_download_extract}()},
25 | \code{\link{bqr_extract_data}()},
26 | \code{\link{bqr_get_job}()},
27 | \code{\link{bqr_grant_extract_access}()},
28 | \code{\link{bqr_query_asynch}()}
29 | }
30 | \concept{BigQuery asynch query functions}
31 | 


--------------------------------------------------------------------------------
/man/parse_bqr_query.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dataParseFunctions.R
 3 | \name{parse_bqr_query}
 4 | \alias{parse_bqr_query}
 5 | \title{Parse table data}
 6 | \usage{
 7 | parse_bqr_query(x)
 8 | }
 9 | \description{
10 | Parse table data
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/man/schema_fields.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/uploadData.R
 3 | \name{schema_fields}
 4 | \alias{schema_fields}
 5 | \title{Create data schema for upload to BigQuery}
 6 | \usage{
 7 | schema_fields(data)
 8 | }
 9 | \arguments{
10 | \item{data}{An example of the data to create a schema from}
11 | }
12 | \value{
13 | A schema object suitable to pass within the \code{schema} argument of \link{bqr_upload_data}
14 | }
15 | \description{
16 | Use this on a sample of the data you want to load from Google Cloud Storage
17 | }
18 | \details{
19 | This is taken from \link[bigrquery]{insert_upload_job}
20 | }
21 | \author{
22 | Hadley Wickham \email{hadley@rstudio.com}
23 | }
24 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(bigQueryR)
3 | 
4 | 
5 | test_check("bigQueryR")
6 | 


--------------------------------------------------------------------------------
/tests/testthat/test_query.R:
--------------------------------------------------------------------------------
  1 | library(googleAuthR)
  2 | 
  3 | context("Authentication")
  4 | 
  5 | options(googleAuthR.scopes.selected = "https://www.googleapis.com/auth/cloud-platform")
  6 | 
  7 | test_that("Can authenticate", {
  8 |   skip_on_cran()
  9 |   skip_if_no_env_auth("BQ_AUTH_FILE")
 10 |   
 11 |   projects <- bqr_list_projects()
 12 |   expect_s3_class(projects, "data.frame")
 13 |   
 14 | })
 15 | 
 16 | test_that("Set global project", {
 17 |   skip_on_cran()
 18 |   skip_if_no_env_auth("BQ_AUTH_FILE")
 19 |   expect_equal(bq_global_project("mark-edmondson-gde"), 
 20 |                "mark-edmondson-gde")
 21 |   
 22 | })
 23 | 
 24 | test_that("Set global dataset", {
 25 |   skip_on_cran()
 26 |   skip_if_no_env_auth("BQ_AUTH_FILE")
 27 |   expect_equal(bq_global_dataset("test2"), 
 28 |                "test2")
 29 |   
 30 | })
 31 | 
 32 | test_data <- data.frame(Name = c("Season","Test"),
 33 |                         Date = as.Date(c("2010-06-30","2010-06-30")),
 34 |                         ID = c(1,2),
 35 |                         stringsAsFactors = FALSE)
 36 | 
 37 | context("Uploads")
 38 | 
 39 | test_that("Can upload test set",{
 40 |   skip_on_cran()
 41 |   skip_if_no_env_auth("BQ_AUTH_FILE")
 42 |   ## canøt query against this too quickly if creating at same runtime
 43 |   out <- bqr_upload_data(tableId = "test2", upload_data = test_data)
 44 |   
 45 |   expect_equal(out$status$state, "DONE")
 46 |   
 47 | })
 48 | 
 49 | test_that("Can upload via Google Cloud Storage",{
 50 |   skip_on_cran()
 51 |   skip_if_no_env_auth("BQ_AUTH_FILE")
 52 |   
 53 |   library(googleCloudStorageR)
 54 |   gcs_global_bucket("bigqueryr-tests")
 55 |   
 56 |   f <- function(input, output) {
 57 |     write.table(input, sep = ",", 
 58 |                 col.names = FALSE, 
 59 |                 row.names = FALSE, 
 60 |                 quote = FALSE, 
 61 |                 file = output, 
 62 |                 qmethod = "double")
 63 |   }
 64 |   gcs_upload(mtcars, name = "mtcars_test3.csv", object_function = f)
 65 |   gcs_upload(mtcars, name = "mtcars_test4.csv", object_function = f)
 66 |   
 67 |   user_schema <- schema_fields(mtcars)
 68 |   bqr_upload_data(datasetId = "test", 
 69 |                   tableId = "from_gcs_mtcars", 
 70 |                   upload_data = c("gs://bigqueryr-tests/mtcars_test3.csv","gs://bigqueryr-tests/mtcars_test4.csv"),
 71 |                   schema = user_schema)
 72 | })
 73 | 
 74 | test_that("Can upload nested JSON",{
 75 |   skip_on_cran()
 76 |   skip_if_no_env_auth("BQ_AUTH_FILE")
 77 |   
 78 |   the_list <- list(list(col1 = "yes", col2 = "no", col3 = list(nest1 = 1, nest2 = 3), col4 = "oh"),
 79 |                    list(col1 = "yes2", col2 = "n2o", col3 = list(nest1 = 5, nest2 = 7), col4 = "oh2"), 
 80 |                    list(col1 = "yes3", col2 = "no3", col3 = list(nest1 = 7, nest2 = 55), col4 = "oh3"))
 81 |   bqr_upload_data(datasetId = "test", 
 82 |                   tableId = "nested_list_json", 
 83 |                   upload_data = the_list, autodetect = TRUE)
 84 | })
 85 | 
 86 | context("List tables")
 87 | 
 88 | test_that("Can list tables", {
 89 |   skip_on_cran()
 90 |   skip_if_no_env_auth("BQ_AUTH_FILE")
 91 |   
 92 |   result <- bqr_list_tables()
 93 |   expect_true("test1" %in% result$tableId)
 94 |   
 95 | })
 96 | 
 97 | context("Query")
 98 | 
 99 | test_that("Can query test set", {
100 |   skip_on_cran()
101 |   skip_if_no_env_auth("BQ_AUTH_FILE")
102 |   result <- bqr_query(query = "SELECT * FROM test1")
103 |   
104 |   expect_equal(result$Name, test_data$Name)
105 |   expect_equal(as.Date(result$Date), test_data$Date)
106 |   expect_equal(result$ID, test_data$ID)
107 |   
108 | })
109 | 
110 | test_that("Single query bug", {
111 |   skip_on_cran()
112 |   skip_if_no_env_auth("BQ_AUTH_FILE")
113 |   
114 |   result <- bqr_query(query = "SELECT repository.url FROM [publicdata:samples.github_nested] LIMIT 10")
115 |   
116 |   ## should be 10, not 1
117 |   expect_equal(nrow(result), 10)
118 |   
119 | })
120 | 
121 | test_that("Async query", {
122 |   skip_on_cran()
123 |   skip_if_no_env_auth("BQ_AUTH_FILE")
124 |   
125 |   job <- bqr_query_asynch(query = "SELECT * FROM test1", 
126 |                              destinationTableId = "test3", 
127 |                              writeDisposition = "WRITE_TRUNCATE")
128 |   
129 |   expect_equal(job$kind, "bigquery#job")
130 |   
131 |   job <- bqr_wait_for_job(job)
132 |   expect_equal(job$status$state, "DONE")
133 |   expect_null(job$status$errorResult)
134 |   
135 | })
136 | 
137 | context("Downloading extracts")
138 | 
139 | test_that("Extract data to Google Cloud Storage, and download", {
140 |   skip_on_cran()
141 |   skip_if_no_env_auth("BQ_AUTH_FILE")
142 |   
143 |   gcs_global_bucket("bigqueryr-tests")
144 |   job_extract <- bqr_extract_data(tableId = "test3",
145 |                                   cloudStorageBucket = gcs_get_global_bucket())
146 |   
147 |   expect_equal(job_extract$kind, "bigquery#job")
148 |   expect_null(job_extract$status$errorResult)
149 |   
150 |   job <- bqr_wait_for_job(job_extract)
151 |   
152 |   expect_equal(job$status$state, "DONE")
153 |   
154 |   urls <- bqr_grant_extract_access(job, email = "m@sunholo.com")
155 |   expect_true(grepl("https://storage.cloud.google.com/bigqueryr-tests/big-query-extract", urls))
156 |   
157 |   extract <- bqr_download_extract(job)
158 |   file_name <- basename(job$configuration$extract$destinationUri)
159 |   
160 |   downloaded <- list.files(pattern = gsub("-\\*\\.csv","", file_name))
161 |   expect_true(file.exists(downloaded))
162 |   unlink(downloaded)
163 | })
164 | 
165 | context("Tables")
166 | 
167 | test_that("Create a table", {
168 |   skip_on_cran()
169 |   skip_if_no_env_auth("BQ_AUTH_FILE")
170 |   
171 |   table <- bqr_create_table(tableId = "created_table", template_data = mtcars)
172 |   
173 |   expect_true(table)
174 |   
175 | })
176 | 
177 | test_that("Get meta data of table", {
178 |   skip_on_cran()
179 |   skip_if_no_env_auth("BQ_AUTH_FILE")
180 |   
181 |   meta <- bqr_table_meta(tableId = "created_table")
182 |   
183 |   expect_equal(meta$kind, "bigquery#table")
184 |   
185 | })
186 | 
187 | test_that("Get data of table", {
188 |   skip_on_cran()
189 |   skip_if_no_env_auth("BQ_AUTH_FILE")
190 |   
191 |   meta <- bqr_table_data(tableId = "created_table")
192 |   
193 |   expect_equal(meta$kind, "bigquery#tableDataList")
194 |   
195 | })
196 | 
197 | test_that("Delete a table", {
198 |   skip_on_cran()
199 |   skip_if_no_env_auth("BQ_AUTH_FILE")
200 |   
201 |   table <- bqr_delete_table(tableId = "created_table")
202 |   
203 |   expect_true(table)
204 |   
205 | })
206 | 


--------------------------------------------------------------------------------
/vignettes/bigQueryR.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "bigQueryR"
  3 | author: "Mark Edmondson"
  4 | date: "`r Sys.Date()`"
  5 | output: rmarkdown::html_vignette
  6 | vignette: >
  7 |   %\VignetteIndexEntry{bigQueryR}
  8 |   %\VignetteEngine{knitr::rmarkdown}
  9 |   %\VignetteEncoding{UTF-8}
 10 | ---
 11 | 
 12 | ## Introduction 
 13 | 
 14 | This is a package for interating with [BigQuery](https://cloud.google.com/bigquery/) from within R.
 15 | 
 16 | You may want instead to use [bigrquery](https://github.com/hadley/bigrquery) which is more developed with integration with `dplyr` etc. Some functions from `bigrquery` are used in this package.
 17 | 
 18 | ### Why this package then?
 19 | 
 20 | This package is here as it uses [googleAuthR](https://github.com/MarkEdmondson1234/googleAuthR) as backend, so has Shiny support, and compatibility with other googleAuthR dependent packages.
 21 | 
 22 | It also has support for data extracts to Google Cloud Storage, meaning you can download data and make the download URL available to a user via their Google email. If you do a query normally with over 100000 results it hangs and errors. 
 23 | 
 24 | ## Authentication
 25 | 
 26 | Authentication is as used in other `googleAuthR` libraries:
 27 | 
 28 | ```r
 29 | library(bigQueryR)
 30 | 
 31 | ## this will open your browser
 32 | ## Authenticate with an email that has access to the BigQuery project you need
 33 | bqr_auth()
 34 | 
 35 | ## verify under a new user
 36 | bqr_auth(email="me@work.com")
 37 | ```
 38 | 
 39 | If you are authenticating under several APIs via `googleAuthR`then use `gar_auth()` instead with the appropriate scopes set.
 40 | 
 41 | You can also use service-to-service JSON files and multi-user authentication under Shiny, see the `googleAuthR` readme for details.
 42 | 
 43 | ## Listing BigQuery meta data
 44 | 
 45 | Various functions for listing what is in your BigQuery account.
 46 | 
 47 | ```r
 48 | library(bigQueryR)
 49 |   
 50 | ## this will open your browser
 51 | ## Authenticate with an email that has access to the BigQuery project you need
 52 | bqr_auth()
 53 |   
 54 | ## get projects
 55 | projects <- bqr_list_projects()
 56 |   
 57 | my_project <- projects$id[1]
 58 |   
 59 | ## for first project, get datasets
 60 | datasets <- bqr_list_datasets(my_project)
 61 | 
 62 | my_dataset <- datasets$datasetId[1]
 63 | ## list tables
 64 | my_tables <- bqr_list_tables(my_project, my_dataset)
 65 | 
 66 | ## get metadata for table
 67 | meta_table <- bqr_table_meta(my_project, my_dataset, my_table$tableId[1])
 68 | 
 69 | ```
 70 | 
 71 | ## Simple Queries
 72 | 
 73 | You can pass in queries that have results under ~ 100000 rows using this command:
 74 | 
 75 | ```r
 76 | bqr_query(my_project,"samples",
 77 |           "SELECT COUNT(repository.url) FROM [publicdata:samples.github_nested]")
 78 | ```
 79 | 
 80 | More than that, and the API starts to hang and you are limited by your download bandwidth.
 81 | 
 82 | ## Asynchronous Queries
 83 | 
 84 | For bigger queries, asynchronous queries save the results to another BigQuery table.  You can check the progress of the job via `bqr_get_job`
 85 | 
 86 | ```r
 87 | library(bigQueryR)
 88 | 
 89 | ## Auth with a project that has at least BigQuery and Google Cloud Storage scope
 90 | bqr_auth()
 91 | 
 92 | ## make a big query
 93 | job <- bqr_query_asynch("your_project", 
 94 |                         "your_dataset",
 95 |                         "SELECT * FROM blah LIMIT 9999999", 
 96 |                         destinationTableId = "bigResultTable")
 97 |                         
 98 | ## poll the job to check its status
 99 | ## its done when job$status$state == "DONE"
100 | bqr_get_job(job$jobReference$jobId, "your_project")
101 | 
102 | ##once done, the query results are in "bigResultTable"
103 | ```
104 | 
105 | You may now want to download this data.  For large datasets, this is best done via extracting the BigQuery result to Google Cloud Storage, then downloading the data from there. 
106 | 
107 | You can create a bucket at Google Cloud Storage see <https://cloud.google.com/storage/docs/cloud-console>, or you can use [library(googleCloudStorageR)](https://github.com/cloudyr/googleCloudStorageR)
108 | 
109 | Once created, you can extract your data via the below:
110 | 
111 | ```r
112 | ## Create the data extract from BigQuery to Cloud Storage
113 | job_extract <- bqr_extract_data("your_project",
114 |                                 "your_dataset",
115 |                                 "bigResultTable",
116 |                                 "your_cloud_storage_bucket_name")
117 |                                 
118 | ## poll the extract job to check its status
119 | ## its done when job$status$state == "DONE"
120 | bqr_get_job(job_extract$jobReference$jobId, "your_project")
121 | 
122 | ## to download via a URL and not logging in via Google Cloud Storage interface:
123 | ## Use an email that is Google account enabled
124 | ## Requires scopes:
125 | ##  https://www.googleapis.com/auth/devstorage.full_control
126 | ##  https://www.googleapis.com/auth/cloud-platform
127 | ## set via options("bigQueryR.scopes") and reauthenticate if needed
128 | 
129 | download_url <- bqr_grant_extract_access(job_extract, "your@email.com")
130 | 
131 | ## download_url may be multiple if the data is > 1GB
132 | > [1] "https://storage.cloud.google.com/big-query-r-extracts/extract-20160311112410-000000000000.csv"
133 | > [2] "https://storage.cloud.google.com/big-query-r-extracts/extract-20160311112410-000000000001.csv"
134 | > [3] "https://storage.cloud.google.com/big-query-r-extracts/extract-20160311112410-000000000002.csv"
135 | 
136 | ```
137 | 


--------------------------------------------------------------------------------
/vignettes/bigQueryR.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | 
  3 | <html xmlns="http://www.w3.org/1999/xhtml">
  4 | 
  5 | <head>
  6 | 
  7 | <meta charset="utf-8" />
  8 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  9 | <meta name="generator" content="pandoc" />
 10 | <meta http-equiv="X-UA-Compatible" content="IE=EDGE" />
 11 | 
 12 | <meta name="viewport" content="width=device-width, initial-scale=1">
 13 | 
 14 | <meta name="author" content="Mark Edmondson" />
 15 | 
 16 | <meta name="date" content="2019-10-09" />
 17 | 
 18 | <title>bigQueryR</title>
 19 | 
 20 | 
 21 | 
 22 | <style type="text/css">code{white-space: pre;}</style>
 23 | <style type="text/css" data-origin="pandoc">
 24 | a.sourceLine { display: inline-block; line-height: 1.25; }
 25 | a.sourceLine { pointer-events: none; color: inherit; text-decoration: inherit; }
 26 | a.sourceLine:empty { height: 1.2em; }
 27 | .sourceCode { overflow: visible; }
 28 | code.sourceCode { white-space: pre; position: relative; }
 29 | div.sourceCode { margin: 1em 0; }
 30 | pre.sourceCode { margin: 0; }
 31 | @media screen {
 32 | div.sourceCode { overflow: auto; }
 33 | }
 34 | @media print {
 35 | code.sourceCode { white-space: pre-wrap; }
 36 | a.sourceLine { text-indent: -1em; padding-left: 1em; }
 37 | }
 38 | pre.numberSource a.sourceLine
 39 |   { position: relative; left: -4em; }
 40 | pre.numberSource a.sourceLine::before
 41 |   { content: attr(data-line-number);
 42 |     position: relative; left: -1em; text-align: right; vertical-align: baseline;
 43 |     border: none; pointer-events: all; display: inline-block;
 44 |     -webkit-touch-callout: none; -webkit-user-select: none;
 45 |     -khtml-user-select: none; -moz-user-select: none;
 46 |     -ms-user-select: none; user-select: none;
 47 |     padding: 0 4px; width: 4em;
 48 |     color: #aaaaaa;
 49 |   }
 50 | pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa;  padding-left: 4px; }
 51 | div.sourceCode
 52 |   {  }
 53 | @media screen {
 54 | a.sourceLine::before { text-decoration: underline; }
 55 | }
 56 | code span.al { color: #ff0000; font-weight: bold; } /* Alert */
 57 | code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
 58 | code span.at { color: #7d9029; } /* Attribute */
 59 | code span.bn { color: #40a070; } /* BaseN */
 60 | code span.bu { } /* BuiltIn */
 61 | code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
 62 | code span.ch { color: #4070a0; } /* Char */
 63 | code span.cn { color: #880000; } /* Constant */
 64 | code span.co { color: #60a0b0; font-style: italic; } /* Comment */
 65 | code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
 66 | code span.do { color: #ba2121; font-style: italic; } /* Documentation */
 67 | code span.dt { color: #902000; } /* DataType */
 68 | code span.dv { color: #40a070; } /* DecVal */
 69 | code span.er { color: #ff0000; font-weight: bold; } /* Error */
 70 | code span.ex { } /* Extension */
 71 | code span.fl { color: #40a070; } /* Float */
 72 | code span.fu { color: #06287e; } /* Function */
 73 | code span.im { } /* Import */
 74 | code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
 75 | code span.kw { color: #007020; font-weight: bold; } /* Keyword */
 76 | code span.op { color: #666666; } /* Operator */
 77 | code span.ot { color: #007020; } /* Other */
 78 | code span.pp { color: #bc7a00; } /* Preprocessor */
 79 | code span.sc { color: #4070a0; } /* SpecialChar */
 80 | code span.ss { color: #bb6688; } /* SpecialString */
 81 | code span.st { color: #4070a0; } /* String */
 82 | code span.va { color: #19177c; } /* Variable */
 83 | code span.vs { color: #4070a0; } /* VerbatimString */
 84 | code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
 85 | 
 86 | </style>
 87 | <script>
 88 | // apply pandoc div.sourceCode style to pre.sourceCode instead
 89 | (function() {
 90 |   var sheets = document.styleSheets;
 91 |   for (var i = 0; i < sheets.length; i++) {
 92 |     if (sheets[i].ownerNode.dataset["origin"] !== "pandoc") continue;
 93 |     try { var rules = sheets[i].cssRules; } catch (e) { continue; }
 94 |     for (var j = 0; j < rules.length; j++) {
 95 |       var rule = rules[j];
 96 |       // check if there is a div.sourceCode rule
 97 |       if (rule.type !== rule.STYLE_RULE || rule.selectorText !== "div.sourceCode") continue;
 98 |       var style = rule.style.cssText;
 99 |       // check if color or background-color is set
100 |       if (rule.style.color === '' && rule.style.backgroundColor === '') continue;
101 |       // replace div.sourceCode by a pre.sourceCode rule
102 |       sheets[i].deleteRule(j);
103 |       sheets[i].insertRule('pre.sourceCode{' + style + '}', j);
104 |     }
105 |   }
106 | })();
107 | </script>
108 | 
109 | 
110 | 
111 | <style type="text/css">body {
112 | background-color: #fff;
113 | margin: 1em auto;
114 | max-width: 700px;
115 | overflow: visible;
116 | padding-left: 2em;
117 | padding-right: 2em;
118 | font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
119 | font-size: 14px;
120 | line-height: 1.35;
121 | }
122 | #header {
123 | text-align: center;
124 | }
125 | #TOC {
126 | clear: both;
127 | margin: 0 0 10px 10px;
128 | padding: 4px;
129 | width: 400px;
130 | border: 1px solid #CCCCCC;
131 | border-radius: 5px;
132 | background-color: #f6f6f6;
133 | font-size: 13px;
134 | line-height: 1.3;
135 | }
136 | #TOC .toctitle {
137 | font-weight: bold;
138 | font-size: 15px;
139 | margin-left: 5px;
140 | }
141 | #TOC ul {
142 | padding-left: 40px;
143 | margin-left: -1.5em;
144 | margin-top: 5px;
145 | margin-bottom: 5px;
146 | }
147 | #TOC ul ul {
148 | margin-left: -2em;
149 | }
150 | #TOC li {
151 | line-height: 16px;
152 | }
153 | table {
154 | margin: 1em auto;
155 | border-width: 1px;
156 | border-color: #DDDDDD;
157 | border-style: outset;
158 | border-collapse: collapse;
159 | }
160 | table th {
161 | border-width: 2px;
162 | padding: 5px;
163 | border-style: inset;
164 | }
165 | table td {
166 | border-width: 1px;
167 | border-style: inset;
168 | line-height: 18px;
169 | padding: 5px 5px;
170 | }
171 | table, table th, table td {
172 | border-left-style: none;
173 | border-right-style: none;
174 | }
175 | table thead, table tr.even {
176 | background-color: #f7f7f7;
177 | }
178 | p {
179 | margin: 0.5em 0;
180 | }
181 | blockquote {
182 | background-color: #f6f6f6;
183 | padding: 0.25em 0.75em;
184 | }
185 | hr {
186 | border-style: solid;
187 | border: none;
188 | border-top: 1px solid #777;
189 | margin: 28px 0;
190 | }
191 | dl {
192 | margin-left: 0;
193 | }
194 | dl dd {
195 | margin-bottom: 13px;
196 | margin-left: 13px;
197 | }
198 | dl dt {
199 | font-weight: bold;
200 | }
201 | ul {
202 | margin-top: 0;
203 | }
204 | ul li {
205 | list-style: circle outside;
206 | }
207 | ul ul {
208 | margin-bottom: 0;
209 | }
210 | pre, code {
211 | background-color: #f7f7f7;
212 | border-radius: 3px;
213 | color: #333;
214 | white-space: pre-wrap; 
215 | }
216 | pre {
217 | border-radius: 3px;
218 | margin: 5px 0px 10px 0px;
219 | padding: 10px;
220 | }
221 | pre:not([class]) {
222 | background-color: #f7f7f7;
223 | }
224 | code {
225 | font-family: Consolas, Monaco, 'Courier New', monospace;
226 | font-size: 85%;
227 | }
228 | p > code, li > code {
229 | padding: 2px 0px;
230 | }
231 | div.figure {
232 | text-align: center;
233 | }
234 | img {
235 | background-color: #FFFFFF;
236 | padding: 2px;
237 | border: 1px solid #DDDDDD;
238 | border-radius: 3px;
239 | border: 1px solid #CCCCCC;
240 | margin: 0 5px;
241 | }
242 | h1 {
243 | margin-top: 0;
244 | font-size: 35px;
245 | line-height: 40px;
246 | }
247 | h2 {
248 | border-bottom: 4px solid #f7f7f7;
249 | padding-top: 10px;
250 | padding-bottom: 2px;
251 | font-size: 145%;
252 | }
253 | h3 {
254 | border-bottom: 2px solid #f7f7f7;
255 | padding-top: 10px;
256 | font-size: 120%;
257 | }
258 | h4 {
259 | border-bottom: 1px solid #f7f7f7;
260 | margin-left: 8px;
261 | font-size: 105%;
262 | }
263 | h5, h6 {
264 | border-bottom: 1px solid #ccc;
265 | font-size: 105%;
266 | }
267 | a {
268 | color: #0033dd;
269 | text-decoration: none;
270 | }
271 | a:hover {
272 | color: #6666ff; }
273 | a:visited {
274 | color: #800080; }
275 | a:visited:hover {
276 | color: #BB00BB; }
277 | a[href^="http:"] {
278 | text-decoration: underline; }
279 | a[href^="https:"] {
280 | text-decoration: underline; }
281 | 
282 | code > span.kw { color: #555; font-weight: bold; } 
283 | code > span.dt { color: #902000; } 
284 | code > span.dv { color: #40a070; } 
285 | code > span.bn { color: #d14; } 
286 | code > span.fl { color: #d14; } 
287 | code > span.ch { color: #d14; } 
288 | code > span.st { color: #d14; } 
289 | code > span.co { color: #888888; font-style: italic; } 
290 | code > span.ot { color: #007020; } 
291 | code > span.al { color: #ff0000; font-weight: bold; } 
292 | code > span.fu { color: #900; font-weight: bold; }  code > span.er { color: #a61717; background-color: #e3d2d2; } 
293 | </style>
294 | 
295 | 
296 | 
297 | 
298 | </head>
299 | 
300 | <body>
301 | 
302 | 
303 | 
304 | 
305 | <h1 class="title toc-ignore">bigQueryR</h1>
306 | <h4 class="author">Mark Edmondson</h4>
307 | <h4 class="date">2019-10-09</h4>
308 | 
309 | 
310 | 
311 | <div id="introduction" class="section level2">
312 | <h2>Introduction</h2>
313 | <p>This is a package for interating with <a href="https://cloud.google.com/bigquery/">BigQuery</a> from within R.</p>
314 | <p>You may want instead to use <a href="https://github.com/hadley/bigrquery">bigrquery</a> which is more developed with integration with <code>dplyr</code> etc. Some functions from <code>bigrquery</code> are used in this package.</p>
315 | <div id="why-this-package-then" class="section level3">
316 | <h3>Why this package then?</h3>
317 | <p>This package is here as it uses <a href="https://github.com/MarkEdmondson1234/googleAuthR">googleAuthR</a> as backend, so has Shiny support, and compatibility with other googleAuthR dependent packages.</p>
318 | <p>It also has support for data extracts to Google Cloud Storage, meaning you can download data and make the download URL available to a user via their Google email. If you do a query normally with over 100000 results it hangs and errors.</p>
319 | </div>
320 | </div>
321 | <div id="authentication" class="section level2">
322 | <h2>Authentication</h2>
323 | <p>Authentication is as used in other <code>googleAuthR</code> libraries:</p>
324 | <div class="sourceCode" id="cb1"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb1-1" data-line-number="1"><span class="kw">library</span>(bigQueryR)</a>
325 | <a class="sourceLine" id="cb1-2" data-line-number="2"></a>
326 | <a class="sourceLine" id="cb1-3" data-line-number="3"><span class="co">## this will open your browser</span></a>
327 | <a class="sourceLine" id="cb1-4" data-line-number="4"><span class="co">## Authenticate with an email that has access to the BigQuery project you need</span></a>
328 | <a class="sourceLine" id="cb1-5" data-line-number="5"><span class="kw">bqr_auth</span>()</a>
329 | <a class="sourceLine" id="cb1-6" data-line-number="6"></a>
330 | <a class="sourceLine" id="cb1-7" data-line-number="7"><span class="co">## verify under a new user</span></a>
331 | <a class="sourceLine" id="cb1-8" data-line-number="8"><span class="kw">bqr_auth</span>(<span class="dt">email=</span><span class="st">&quot;me@work.com&quot;</span>)</a></code></pre></div>
332 | <p>If you are authenticating under several APIs via <code>googleAuthR</code>then use <code>gar_auth()</code> instead with the appropriate scopes set.</p>
333 | <p>You can also use service-to-service JSON files and multi-user authentication under Shiny, see the <code>googleAuthR</code> readme for details.</p>
334 | </div>
335 | <div id="listing-bigquery-meta-data" class="section level2">
336 | <h2>Listing BigQuery meta data</h2>
337 | <p>Various functions for listing what is in your BigQuery account.</p>
338 | <div class="sourceCode" id="cb2"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb2-1" data-line-number="1"><span class="kw">library</span>(bigQueryR)</a>
339 | <a class="sourceLine" id="cb2-2" data-line-number="2">  </a>
340 | <a class="sourceLine" id="cb2-3" data-line-number="3"><span class="co">## this will open your browser</span></a>
341 | <a class="sourceLine" id="cb2-4" data-line-number="4"><span class="co">## Authenticate with an email that has access to the BigQuery project you need</span></a>
342 | <a class="sourceLine" id="cb2-5" data-line-number="5"><span class="kw">bqr_auth</span>()</a>
343 | <a class="sourceLine" id="cb2-6" data-line-number="6">  </a>
344 | <a class="sourceLine" id="cb2-7" data-line-number="7"><span class="co">## get projects</span></a>
345 | <a class="sourceLine" id="cb2-8" data-line-number="8">projects &lt;-<span class="st"> </span><span class="kw">bqr_list_projects</span>()</a>
346 | <a class="sourceLine" id="cb2-9" data-line-number="9">  </a>
347 | <a class="sourceLine" id="cb2-10" data-line-number="10">my_project &lt;-<span class="st"> </span>projects<span class="op">$</span>id[<span class="dv">1</span>]</a>
348 | <a class="sourceLine" id="cb2-11" data-line-number="11">  </a>
349 | <a class="sourceLine" id="cb2-12" data-line-number="12"><span class="co">## for first project, get datasets</span></a>
350 | <a class="sourceLine" id="cb2-13" data-line-number="13">datasets &lt;-<span class="st"> </span><span class="kw">bqr_list_datasets</span>(my_project)</a>
351 | <a class="sourceLine" id="cb2-14" data-line-number="14"></a>
352 | <a class="sourceLine" id="cb2-15" data-line-number="15">my_dataset &lt;-<span class="st"> </span>datasets<span class="op">$</span>datasetId[<span class="dv">1</span>]</a>
353 | <a class="sourceLine" id="cb2-16" data-line-number="16"><span class="co">## list tables</span></a>
354 | <a class="sourceLine" id="cb2-17" data-line-number="17">my_tables &lt;-<span class="st"> </span><span class="kw">bqr_list_tables</span>(my_project, my_dataset)</a>
355 | <a class="sourceLine" id="cb2-18" data-line-number="18"></a>
356 | <a class="sourceLine" id="cb2-19" data-line-number="19"><span class="co">## get metadata for table</span></a>
357 | <a class="sourceLine" id="cb2-20" data-line-number="20">meta_table &lt;-<span class="st"> </span><span class="kw">bqr_table_meta</span>(my_project, my_dataset, my_table<span class="op">$</span>tableId[<span class="dv">1</span>])</a></code></pre></div>
358 | </div>
359 | <div id="simple-queries" class="section level2">
360 | <h2>Simple Queries</h2>
361 | <p>You can pass in queries that have results under ~ 100000 rows using this command:</p>
362 | <div class="sourceCode" id="cb3"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb3-1" data-line-number="1"><span class="kw">bqr_query</span>(my_project,<span class="st">&quot;samples&quot;</span>,</a>
363 | <a class="sourceLine" id="cb3-2" data-line-number="2">          <span class="st">&quot;SELECT COUNT(repository.url) FROM [publicdata:samples.github_nested]&quot;</span>)</a></code></pre></div>
364 | <p>More than that, and the API starts to hang and you are limited by your download bandwidth.</p>
365 | </div>
366 | <div id="asynchronous-queries" class="section level2">
367 | <h2>Asynchronous Queries</h2>
368 | <p>For bigger queries, asynchronous queries save the results to another BigQuery table. You can check the progress of the job via <code>bqr_get_job</code></p>
369 | <div class="sourceCode" id="cb4"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb4-1" data-line-number="1"><span class="kw">library</span>(bigQueryR)</a>
370 | <a class="sourceLine" id="cb4-2" data-line-number="2"></a>
371 | <a class="sourceLine" id="cb4-3" data-line-number="3"><span class="co">## Auth with a project that has at least BigQuery and Google Cloud Storage scope</span></a>
372 | <a class="sourceLine" id="cb4-4" data-line-number="4"><span class="kw">bqr_auth</span>()</a>
373 | <a class="sourceLine" id="cb4-5" data-line-number="5"></a>
374 | <a class="sourceLine" id="cb4-6" data-line-number="6"><span class="co">## make a big query</span></a>
375 | <a class="sourceLine" id="cb4-7" data-line-number="7">job &lt;-<span class="st"> </span><span class="kw">bqr_query_asynch</span>(<span class="st">&quot;your_project&quot;</span>, </a>
376 | <a class="sourceLine" id="cb4-8" data-line-number="8">                        <span class="st">&quot;your_dataset&quot;</span>,</a>
377 | <a class="sourceLine" id="cb4-9" data-line-number="9">                        <span class="st">&quot;SELECT * FROM blah LIMIT 9999999&quot;</span>, </a>
378 | <a class="sourceLine" id="cb4-10" data-line-number="10">                        <span class="dt">destinationTableId =</span> <span class="st">&quot;bigResultTable&quot;</span>)</a>
379 | <a class="sourceLine" id="cb4-11" data-line-number="11">                        </a>
380 | <a class="sourceLine" id="cb4-12" data-line-number="12"><span class="co">## poll the job to check its status</span></a>
381 | <a class="sourceLine" id="cb4-13" data-line-number="13"><span class="co">## its done when job$status$state == &quot;DONE&quot;</span></a>
382 | <a class="sourceLine" id="cb4-14" data-line-number="14"><span class="kw">bqr_get_job</span>(<span class="st">&quot;your_project&quot;</span>, job<span class="op">$</span>jobReference<span class="op">$</span>jobId)</a>
383 | <a class="sourceLine" id="cb4-15" data-line-number="15"></a>
384 | <a class="sourceLine" id="cb4-16" data-line-number="16"><span class="co">##once done, the query results are in &quot;bigResultTable&quot;</span></a></code></pre></div>
385 | <p>You may now want to download this data. For large datasets, this is best done via extracting the BigQuery result to Google Cloud Storage, then downloading the data from there.</p>
386 | <p>You can create a bucket at Google Cloud Storage see <a href="https://cloud.google.com/storage/docs/cloud-console" class="uri">https://cloud.google.com/storage/docs/cloud-console</a>, or you can use <a href="https://github.com/cloudyr/googleCloudStorageR">library(googleCloudStorageR)</a></p>
387 | <p>Once created, you can extract your data via the below:</p>
388 | <div class="sourceCode" id="cb5"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb5-1" data-line-number="1"><span class="co">## Create the data extract from BigQuery to Cloud Storage</span></a>
389 | <a class="sourceLine" id="cb5-2" data-line-number="2">job_extract &lt;-<span class="st"> </span><span class="kw">bqr_extract_data</span>(<span class="st">&quot;your_project&quot;</span>,</a>
390 | <a class="sourceLine" id="cb5-3" data-line-number="3">                                <span class="st">&quot;your_dataset&quot;</span>,</a>
391 | <a class="sourceLine" id="cb5-4" data-line-number="4">                                <span class="st">&quot;bigResultTable&quot;</span>,</a>
392 | <a class="sourceLine" id="cb5-5" data-line-number="5">                                <span class="st">&quot;your_cloud_storage_bucket_name&quot;</span>)</a>
393 | <a class="sourceLine" id="cb5-6" data-line-number="6">                                </a>
394 | <a class="sourceLine" id="cb5-7" data-line-number="7"><span class="co">## poll the extract job to check its status</span></a>
395 | <a class="sourceLine" id="cb5-8" data-line-number="8"><span class="co">## its done when job$status$state == &quot;DONE&quot;</span></a>
396 | <a class="sourceLine" id="cb5-9" data-line-number="9"><span class="kw">bqr_get_job</span>(<span class="st">&quot;your_project&quot;</span>, job_extract<span class="op">$</span>jobReference<span class="op">$</span>jobId)</a>
397 | <a class="sourceLine" id="cb5-10" data-line-number="10"></a>
398 | <a class="sourceLine" id="cb5-11" data-line-number="11"><span class="co">## to download via a URL and not logging in via Google Cloud Storage interface:</span></a>
399 | <a class="sourceLine" id="cb5-12" data-line-number="12"><span class="co">## Use an email that is Google account enabled</span></a>
400 | <a class="sourceLine" id="cb5-13" data-line-number="13"><span class="co">## Requires scopes:</span></a>
401 | <a class="sourceLine" id="cb5-14" data-line-number="14"><span class="co">##  https://www.googleapis.com/auth/devstorage.full_control</span></a>
402 | <a class="sourceLine" id="cb5-15" data-line-number="15"><span class="co">##  https://www.googleapis.com/auth/cloud-platform</span></a>
403 | <a class="sourceLine" id="cb5-16" data-line-number="16"><span class="co">## set via options(&quot;bigQueryR.scopes&quot;) and reauthenticate if needed</span></a>
404 | <a class="sourceLine" id="cb5-17" data-line-number="17"></a>
405 | <a class="sourceLine" id="cb5-18" data-line-number="18">download_url &lt;-<span class="st"> </span><span class="kw">bqr_grant_extract_access</span>(job_extract, <span class="st">&quot;your@email.com&quot;</span>)</a>
406 | <a class="sourceLine" id="cb5-19" data-line-number="19"></a>
407 | <a class="sourceLine" id="cb5-20" data-line-number="20"><span class="co">## download_url may be multiple if the data is &gt; 1GB</span></a>
408 | <a class="sourceLine" id="cb5-21" data-line-number="21"><span class="op">&gt;</span><span class="st"> </span>[<span class="dv">1</span>] <span class="st">&quot;https://storage.cloud.google.com/big-query-r-extracts/extract-20160311112410-000000000000.csv&quot;</span></a>
409 | <a class="sourceLine" id="cb5-22" data-line-number="22"><span class="op">&gt;</span><span class="st"> </span>[<span class="dv">2</span>] <span class="st">&quot;https://storage.cloud.google.com/big-query-r-extracts/extract-20160311112410-000000000001.csv&quot;</span></a>
410 | <a class="sourceLine" id="cb5-23" data-line-number="23"><span class="op">&gt;</span><span class="st"> </span>[<span class="dv">3</span>] <span class="st">&quot;https://storage.cloud.google.com/big-query-r-extracts/extract-20160311112410-000000000002.csv&quot;</span></a></code></pre></div>
411 | </div>
412 | 
413 | 
414 | 
415 | <!-- code folding -->
416 | 
417 | 
418 | <!-- dynamically load mathjax for compatibility with self-contained -->
419 | <script>
420 |   (function () {
421 |     var script = document.createElement("script");
422 |     script.type = "text/javascript";
423 |     script.src  = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
424 |     document.getElementsByTagName("head")[0].appendChild(script);
425 |   })();
426 | </script>
427 | 
428 | </body>
429 | </html>
430 | 


--------------------------------------------------------------------------------