├── tests
    ├── testthat.R
    └── testthat
    │   ├── test-url-availability.R
    │   ├── test-rba-functions.R
    │   ├── test-abs-cat-functions.R
    │   └── test-abs-api-functions.R
├── data
    ├── abs_cachelist.rda
    ├── rba_cachelist.rda
    ├── abs_cat_cachelist.rda
    └── aus_state_codes.rda
├── .gitignore
├── R
    ├── z-release-questions.R
    ├── zzz.R
    ├── raustats.R
    ├── z-debugging-code.R
    ├── httr-utilities.R
    ├── data.R
    ├── date-utilities.R
    ├── z-unused-functions.R
    ├── rba-functions.R
    ├── abs-api-functions.R
    ├── abs-cat-functions.R
    └── abs-series-abbreviation.R
├── data-raw
    ├── Australian-States-Territories.csv
    ├── build-data.R
    └── ABS-TSS-Catalogue-Numbers.csv
├── .Rbuildignore
├── man
    ├── excel2Date.Rd
    ├── last_day.Rd
    ├── raustats_ua.Rd
    ├── rba_table_cache.Rd
    ├── rba_urls.Rd
    ├── abs_call_api.Rd
    ├── fin_year.Rd
    ├── abs_cachelist.Rd
    ├── aus_state_codes.Rd
    ├── abs_api_urls.Rd
    ├── abs_filetypes.Rd
    ├── abs_api_call.Rd
    ├── raustats.Rd
    ├── abs_local_filename.Rd
    ├── abs_urls.Rd
    ├── rba_read_tss.Rd
    ├── abs_cat_cachelist.Rd
    ├── abs_datasets.Rd
    ├── quarter2Date.Rd
    ├── abs_cat_unzip.Rd
    ├── raustats_check_url_available.Rd
    ├── rba_cachelist.Rd
    ├── abs_cat_download.Rd
    ├── abs_metadata.Rd
    ├── rba_file_download.Rd
    ├── abs_dimensions.Rd
    ├── abs_cat_releases.Rd
    ├── rba_stats.Rd
    ├── rba_search.Rd
    ├── abs_read_tss.Rd
    ├── abs_cat_stats.Rd
    ├── abs_search.Rd
    ├── abs_cat_tables.Rd
    └── abs_stats.Rd
├── DESCRIPTION
├── NAMESPACE
├── cran-comments.md
├── NEWS.md
├── TODO
├── README.md
└── README.Rmd


/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(raustats)
3 | 
4 | test_check("raustats")
5 | 


--------------------------------------------------------------------------------
/data/abs_cachelist.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mitcda/raustats/HEAD/data/abs_cachelist.rda


--------------------------------------------------------------------------------
/data/rba_cachelist.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mitcda/raustats/HEAD/data/rba_cachelist.rda


--------------------------------------------------------------------------------
/data/abs_cat_cachelist.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mitcda/raustats/HEAD/data/abs_cat_cachelist.rda


--------------------------------------------------------------------------------
/data/aus_state_codes.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mitcda/raustats/HEAD/data/aus_state_codes.rda


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | Meta
 2 | .Rproj.user
 3 | .Rhistory
 4 | .RData
 5 | *~
 6 | R-example-code/*
 7 | *discards*
 8 | /README.html
 9 | doc
10 | raustats_presentation.*
11 | 


--------------------------------------------------------------------------------
/R/z-release-questions.R:
--------------------------------------------------------------------------------
1 | ## Package checking and other miscellaneous functions
2 | 
3 | release_questions <- function() {
4 |   c(
5 |     "Have you updated static package data (data-raw/build-data.R)?"
6 |   )
7 | }
8 | 
9 | 


--------------------------------------------------------------------------------
/data-raw/Australian-States-Territories.csv:
--------------------------------------------------------------------------------
 1 | state_code,state_abb,state_name
 2 | 0,Aus,Australia
 3 | 1,NSW,New South Wales
 4 | 2,Vic,Victoria
 5 | 3,Qld,Queensland
 6 | 4,SA,South Australia
 7 | 5,WA,Western Australia
 8 | 6,Tas,Tasmania
 9 | 7,NT,Northern Territory
10 | 8,ACT,Australian Capital Territory
11 | 9,OT,Other Territories
12 | 
13 | 


--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
 1 | .onAttach <- function(libname, pkgname) {
 2 |     packageStartupMessage("Attaching package: 'raustats'");
 3 | }
 4 | 
 5 | ## .onLoad <- function(libname, pkgname)
 6 | ## {
 7 | ## }
 8 | 
 9 | ## .onDetach <- function(libname, pkgname)
10 | ## {
11 | ## }
12 | 
13 | ## .Last <- function()
14 | ## {
15 | ## }
16 | 
17 | ## .First <- function()
18 | ## {
19 | ## }
20 | 
21 | 


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | # Rbuildignore
 2 | # Ignore git repository
 3 | ^.git/$
 4 | ^.gitignore$
 5 | ^.Rhistory$
 6 | ^Meta$
 7 | ^doc$
 8 | data-raw
 9 | ^EXTRA-CODE$
10 | ^README$
11 | ^README.Rmd$
12 | ^README.html$
13 | ^CHANGES$
14 | ^TODO$
15 | ^R-example-code/$
16 | abs-series-abbreviation.R
17 | z-debugging-code.R
18 | z-release-questions.R
19 | z-unused-functions.R
20 | raustats_presentation.Rmd
21 | raustats_presentation.pptx
22 | cran-comments.md
23 | 


--------------------------------------------------------------------------------
/man/excel2Date.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/date-utilities.R
 3 | \name{excel2Date}
 4 | \alias{excel2Date}
 5 | \title{Convert Excel numeric date to R Date object}
 6 | \usage{
 7 | excel2Date(x)
 8 | }
 9 | \arguments{
10 | \item{x}{Excel-based date numeric object}
11 | }
12 | \value{
13 | Date object
14 | }
15 | \description{
16 | Function to convert Excel numeric date to R Date object
17 | }
18 | \keyword{internal}
19 | 


--------------------------------------------------------------------------------
/man/last_day.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/date-utilities.R
 3 | \name{last_day}
 4 | \alias{last_day}
 5 | \title{Set Date object to the last day of the month}
 6 | \usage{
 7 | last_day(date)
 8 | }
 9 | \arguments{
10 | \item{date}{date object}
11 | }
12 | \value{
13 | Date object
14 | }
15 | \description{
16 | Function to change the date of a Date object to the last day of the month
17 | }
18 | \author{
19 | David Mitchell <david.pk.mitchell@gmail.com>
20 | }
21 | \keyword{internal}
22 | 


--------------------------------------------------------------------------------
/R/raustats.R:
--------------------------------------------------------------------------------
 1 | #' raustats: An R package for accessing data and statistics from the ABS and RBA websites
 2 | #'
 3 | #' The raustats package provides structured access to all data and statistics 
 4 | #' available from the Australian Bureau of Statistics and Reserve Bank of Australia
 5 | #' website, as well as draft access to the ABS.Stat - Beta data catalogue API.
 6 | #'
 7 | #' To learn more about the raustats package, start with the vignettes:
 8 | #' \code{browseVignettes(package = "raustats")}
 9 | #'
10 | #' @docType package
11 | #' @name raustats
12 | NULL
13 | 


--------------------------------------------------------------------------------
/man/raustats_ua.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/httr-utilities.R
 3 | \name{raustats_ua}
 4 | \alias{raustats_ua}
 5 | \title{raustats package user agent}
 6 | \usage{
 7 | raustats_ua()
 8 | }
 9 | \value{
10 | a list with a base url and a url section for formatting the JSON API calls
11 | }
12 | \description{
13 | This function specifies the package user agent, and is used inside
14 |   GET/POST function calls
15 | }
16 | \author{
17 | David Mitchell <david.pk.mitchell@gmail.com>
18 | }
19 | \keyword{internal}
20 | 


--------------------------------------------------------------------------------
/man/rba_table_cache.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rba-functions.R
 3 | \name{rba_table_cache}
 4 | \alias{rba_table_cache}
 5 | \title{Return list of RBA tables}
 6 | \usage{
 7 | rba_table_cache()
 8 | }
 9 | \value{
10 | data frame in long format
11 | }
12 | \description{
13 | Function to return an updated list of data tables available from the RBA website.
14 | }
15 | \examples{
16 |   \donttest{
17 |     rba_cachelist <- rba_table_cache();
18 |   }
19 | }
20 | \author{
21 | David Mitchell <david.pk.mitchell@gmail.com>
22 | }
23 | 


--------------------------------------------------------------------------------
/man/rba_urls.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rba-functions.R
 3 | \name{rba_urls}
 4 | \alias{rba_urls}
 5 | \title{RBA base URL and data paths}
 6 | \usage{
 7 | rba_urls()
 8 | }
 9 | \value{
10 | list of RBA base URL and data paths
11 | }
12 | \description{
13 | This function returns a list of URLs and data paths used to construct RBA data
14 |   access calls. It is used in other functions in this package and need not be called directly.
15 | }
16 | \author{
17 | David Mitchell <david.pk.mitchell@gmail.com>
18 | }
19 | \keyword{internal}
20 | 


--------------------------------------------------------------------------------
/man/abs_call_api.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/abs-api-functions.R
 3 | \name{abs_call_api}
 4 | \alias{abs_call_api}
 5 | \title{Submit API call to ABS.Stat}
 6 | \usage{
 7 | abs_call_api(url)
 8 | }
 9 | \arguments{
10 | \item{url}{Character vector specifying one or more ABS collections or catalogue numbers to
11 | download.}
12 | }
13 | \value{
14 | data frame in long format
15 | }
16 | \description{
17 | This function submits the specified API call to ABS.Stat
18 | }
19 | \author{
20 | David Mitchell <david.pk.mitchell@gmail.com>
21 | }
22 | \keyword{internal}
23 | 


--------------------------------------------------------------------------------
/man/fin_year.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/date-utilities.R
 3 | \name{fin_year}
 4 | \alias{fin_year}
 5 | \title{Create financial year date object}
 6 | \usage{
 7 | fin_year(date, ending = "Jun")
 8 | }
 9 | \arguments{
10 | \item{date}{date object}
11 | 
12 | \item{ending}{character string abbreviation or number denoting ending month of the financial year}
13 | }
14 | \value{
15 | Date object
16 | }
17 | \description{
18 | Function to create a financial year date object
19 | }
20 | \author{
21 | David Mitchell <david.pk.mitchell@gmail.com>
22 | }
23 | \keyword{internal}
24 | 


--------------------------------------------------------------------------------
/man/abs_cachelist.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{abs_cachelist}
 5 | \alias{abs_cachelist}
 6 | \title{Datasets available through the ABS API}
 7 | \format{A data frame containing three columns:
 8 | \itemize{
 9 | \item \code{id} ABS dataset identifier.
10 | \item \code{agencyID} Source agency identifier (ABS).
11 | \item \code{name} ABS dataset name.
12 | }}
13 | \usage{
14 | abs_cachelist
15 | }
16 | \description{
17 | This data set provides a list of all datasets, and the associated metadata,
18 |   available through the ABS API.
19 | }
20 | \keyword{datasets}
21 | 


--------------------------------------------------------------------------------
/man/aus_state_codes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{aus_state_codes}
 5 | \alias{aus_state_codes}
 6 | \title{Table of Australian state and territory codes}
 7 | \format{A data frame containing three columns:
 8 | \itemize{
 9 |   \item \code{state_code} One-digit state code.
10 |   \item \code{state_abb} State/territory abbreviation.
11 |   \item \code{state_name} State/territory name.
12 | }}
13 | \usage{
14 | aus_state_codes
15 | }
16 | \description{
17 | A list of Australian state and territory codes (including code 0 -- Australia)
18 | }
19 | \keyword{datasets}
20 | 


--------------------------------------------------------------------------------
/man/abs_api_urls.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/abs-api-functions.R
 3 | \name{abs_api_urls}
 4 | \alias{abs_api_urls}
 5 | \title{ABS URL addresses and paths used in ABS.Stat API calls}
 6 | \usage{
 7 | abs_api_urls()
 8 | }
 9 | \value{
10 | a list with a base url and a url section for formatting the JSON API calls
11 | }
12 | \description{
13 | This function returns a list of URLs and data paths used to construction ABS.Stat
14 |   API call. It is used in other functions in this package and need not be called directly.
15 | }
16 | \author{
17 | David Mitchell <david.pk.mitchell@gmail.com>
18 | }
19 | \keyword{internal}
20 | 


--------------------------------------------------------------------------------
/data-raw/build-data.R:
--------------------------------------------------------------------------------
 1 | ## Build datasets
 2 | devtools::load_all(".");
 3 | rba_cachelist <- rba_table_cache();
 4 | abs_cat_cachelist <- read.csv(here::here("data-raw", "ABS-TSS-Catalogue-Numbers.csv"));
 5 | abs_cachelist <- abs_datasets();
 6 | aus_state_codes <- read.csv(here::here("data-raw", "Australian-States-Territories.csv"));
 7 | 
 8 | ## Write data sets files
 9 | usethis::use_data(rba_cachelist, overwrite=TRUE);
10 | usethis::use_data(abs_cat_cachelist, overwrite=TRUE);
11 | usethis::use_data(abs_cachelist, overwrite=TRUE);
12 | usethis::use_data(aus_state_codes, overwrite=TRUE);
13 | 
14 | ## ---------------------------------- EOF -------------------------------------
15 | 


--------------------------------------------------------------------------------
/man/abs_filetypes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/abs-cat-functions.R
 3 | \name{abs_filetypes}
 4 | \alias{abs_filetypes}
 5 | \title{Valid ABS file types}
 6 | \usage{
 7 | abs_filetypes()
 8 | }
 9 | \value{
10 | a vector containing a list of valid ABS file types.
11 | }
12 | \description{
13 | This function returns a vector of valid ABS file types for using list of URLs and data paths used to construct ABS Catalogue
14 |   data access calls. It is used in other functions in this package and need not be called
15 |   directly.
16 | }
17 | \author{
18 | David Mitchell <david.pk.mitchell@gmail.com>
19 | }
20 | \keyword{internal}
21 | 


--------------------------------------------------------------------------------
/man/abs_api_call.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/abs-api-functions.R
 3 | \name{abs_api_call}
 4 | \alias{abs_api_call}
 5 | \title{Create ABS.Stat API URL call}
 6 | \usage{
 7 | abs_api_call(path, args)
 8 | }
 9 | \arguments{
10 | \item{path}{Character vector specifying one or more ABS collections or catalogue numbers to
11 | download.}
12 | 
13 | \item{args}{Named list of arguments to supply to call.}
14 | }
15 | \value{
16 | data frame in long format
17 | }
18 | \description{
19 | The function created the ABS.Stat API call URL
20 | }
21 | \author{
22 | David Mitchell <david.pk.mitchell@gmail.com>
23 | }
24 | \keyword{internal}
25 | 


--------------------------------------------------------------------------------
/man/raustats.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/raustats.R
 3 | \docType{package}
 4 | \name{raustats}
 5 | \alias{raustats}
 6 | \title{raustats: An R package for accessing data and statistics from the ABS and RBA websites}
 7 | \description{
 8 | The raustats package provides structured access to all data and statistics 
 9 | available from the Australian Bureau of Statistics and Reserve Bank of Australia
10 | website, as well as draft access to the ABS.Stat - Beta data catalogue API.
11 | }
12 | \details{
13 | To learn more about the raustats package, start with the vignettes:
14 | \code{browseVignettes(package = "raustats")}
15 | }
16 | 


--------------------------------------------------------------------------------
/man/abs_local_filename.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/abs-cat-functions.R
 3 | \name{abs_local_filename}
 4 | \alias{abs_local_filename}
 5 | \title{Create local file names for storing downloaded ABS data files}
 6 | \usage{
 7 | abs_local_filename(url)
 8 | }
 9 | \arguments{
10 | \item{url}{Character vector specifying one or more ABS data URLs.}
11 | }
12 | \value{
13 | Returns a local file names (character vector) in which downloaded files will be saved.
14 | }
15 | \description{
16 | Function to create local filename from web-based file name.
17 | }
18 | \author{
19 | David Mitchell <david.pk.mitchell@gmail.com>
20 | }
21 | \keyword{internal}
22 | 


--------------------------------------------------------------------------------
/man/abs_urls.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/abs-cat-functions.R
 3 | \name{abs_urls}
 4 | \alias{abs_urls}
 5 | \title{ABS URL addresses and paths used in accessing ABS Catalogue data calls}
 6 | \usage{
 7 | abs_urls()
 8 | }
 9 | \value{
10 | a list with a base url and a url section for formatting ABS Catalogue statistics calls
11 | }
12 | \description{
13 | This function returns a list of URLs and data paths used to construct ABS Catalogue
14 |   data access calls. It is used in other functions in this package and need not be called
15 |   directly.
16 | }
17 | \author{
18 | David Mitchell <david.pk.mitchell@gmail.com>
19 | }
20 | \keyword{internal}
21 | 


--------------------------------------------------------------------------------
/man/rba_read_tss.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rba-functions.R
 3 | \name{rba_read_tss}
 4 | \alias{rba_read_tss}
 5 | \title{Read RBA statistical time series spreadsheet}
 6 | \usage{
 7 | rba_read_tss(files)
 8 | }
 9 | \arguments{
10 | \item{files}{Names of one or more ABS data file}
11 | }
12 | \value{
13 | data frame in long format
14 | }
15 | \description{
16 | Functions to extract data from a specified RBA time series spreadsheet.
17 | }
18 | \examples{
19 |  \donttest{
20 |    rba_urls <- rba_search(pattern = "Liabilities and Assets")$url
21 |    rba_files <- sapply(rba_urls, rba_file_download)
22 |    data <- rba_read_tss(rba_files);
23 |  }
24 | }
25 | \author{
26 | David Mitchell <david.pk.mitchell@gmail.com>
27 | }
28 | 


--------------------------------------------------------------------------------
/man/abs_cat_cachelist.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{abs_cat_cachelist}
 5 | \alias{abs_cat_cachelist}
 6 | \title{List of ABS catalogue tables}
 7 | \format{A data frame containing five columns:
 8 | \itemize{
 9 |   \item \code{publication_title} ABS publication title.
10 |   \item \code{catalogue_no} ABS catalogue number.
11 |   \item \code{abs_url} ABS URL.
12 |   \item \code{last_updated} Publication last updated.
13 |   \item \code{type} Publication type -- one of either 'time series', 'panel' or 'summary'.RBA URL 
14 | }}
15 | \usage{
16 | abs_cat_cachelist
17 | }
18 | \description{
19 | This data set provides a list of the most common ABS catalogue tables.
20 | }
21 | \keyword{datasets}
22 | 


--------------------------------------------------------------------------------
/man/abs_datasets.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/abs-api-functions.R
 3 | \name{abs_datasets}
 4 | \alias{abs_datasets}
 5 | \title{Download ABS.Stat datasets}
 6 | \usage{
 7 | abs_datasets(lang = "en", include_notes = FALSE)
 8 | }
 9 | \arguments{
10 | \item{lang}{Preferred language (default 'en' - English).}
11 | 
12 | \item{include_notes}{Include ABS annotation information for each series.}
13 | }
14 | \value{
15 | data frame in long format
16 | }
17 | \description{
18 | This function returns a list of all datasets available from ABS.Stat.
19 | }
20 | \examples{
21 |   \donttest{
22 |     datasets <- abs_datasets()
23 |     datasets <- abs_datasets(include_notes=TRUE)
24 |   }
25 | }
26 | \author{
27 | David Mitchell <david.pk.mitchell@gmail.com>
28 | }
29 | 


--------------------------------------------------------------------------------
/man/quarter2Date.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/date-utilities.R
 3 | \name{quarter2Date}
 4 | \alias{quarter2Date}
 5 | \title{Convert dates formatted as year-quarter to dates objects}
 6 | \usage{
 7 | quarter2Date(x, base.month = "Mar", format = "\%Y-Q\%q")
 8 | }
 9 | \arguments{
10 | \item{x}{Year-quarter date format}
11 | 
12 | \item{base.month}{Specifies base month for first quarter. Can be a scalar: 1,2,3 or character
13 | object: Jan, Feb, Mar.}
14 | 
15 | \item{format}{The input date format. Default is "\%Y-Q\%q".}
16 | }
17 | \value{
18 | This function returns a Date format object.
19 | }
20 | \description{
21 | Function to convert dates formatted as year-quarter to date-format objects
22 | }
23 | \author{
24 | David Mitchell <david.pk.mitchell@gmail.com>
25 | }
26 | \keyword{internal}
27 | 


--------------------------------------------------------------------------------
/man/abs_cat_unzip.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/abs-cat-functions.R
 3 | \name{abs_cat_unzip}
 4 | \alias{abs_cat_unzip}
 5 | \title{Uncompress locally-stored ABS Catalogue data file archives}
 6 | \usage{
 7 | abs_cat_unzip(files, exdir)
 8 | }
 9 | \arguments{
10 | \item{files}{One or more local zip files.}
11 | 
12 | \item{exdir}{Target directory for extracted archive files. Directory is created if it doesn't
13 | exist. If missing, creates a new subdirectory in \code{tempdir()} using the respective zip
14 | files (specified in \code{files}.}
15 | }
16 | \value{
17 | Returns a character vector listing the names of all files extracted.
18 | }
19 | \description{
20 | Function to uncompress locally-stored ABS Catalogue data file archives.
21 | }
22 | \author{
23 | David Mitchell <david.pk.mitchell@gmail.com>
24 | }
25 | 


--------------------------------------------------------------------------------
/man/raustats_check_url_available.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/httr-utilities.R
 3 | \name{raustats_check_url_available}
 4 | \alias{raustats_check_url_available}
 5 | \title{Check specified ABS/RBA URL available}
 6 | \usage{
 7 | raustats_check_url_available(url)
 8 | }
 9 | \arguments{
10 | \item{url}{The base URL to check.}
11 | }
12 | \value{
13 | \code{TRUE} if the API is available, otherwise \code{stop()} is called.
14 | }
15 | \description{
16 | Function to ensure URL calls fail gracefully with an informative message if the
17 |   resource is not available (and not give a check warning nor error).
18 | }
19 | \note{
20 | Based on code in \code{opensensmapR} (\url{https://github.com/sensebox/opensensmapR/blob/f69cf62b2771d5b6ed605c04b7ddd618f5a272c2/R/api.R}{\code{api.R}}).
21 | }
22 | \keyword{internal}
23 | 


--------------------------------------------------------------------------------
/man/rba_cachelist.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{rba_cachelist}
 5 | \alias{rba_cachelist}
 6 | \title{Cached list of statistical tables provided by the RBA}
 7 | \format{A data frame containing three columns:
 8 | \itemize{
 9 |   \item \code{table_code} RBA table code.
10 |   \item \code{table_name} RBA table name.
11 |   \item \code{table_type} One of either current statistical tables, historical data or discontinued data
12 |   \item \code{url} RBA URL 
13 | }}
14 | \usage{
15 | rba_cachelist
16 | }
17 | \description{
18 | This data is a cached result of the \code{\link{rba_table_cache}} function. By
19 |   default functions \code{\link{rba_search}} and \code{\link{rba_stats}} use this data if the
20 |   \code{update_cache} parameter is \code{TRUE}.
21 | }
22 | \keyword{datasets}
23 | 


--------------------------------------------------------------------------------
/man/abs_cat_download.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/abs-cat-functions.R
 3 | \name{abs_cat_download}
 4 | \alias{abs_cat_download}
 5 | \title{Function to download files from the ABS website and store locally}
 6 | \usage{
 7 | abs_cat_download(data_url, exdir = tempdir())
 8 | }
 9 | \arguments{
10 | \item{data_url}{Character vector specifying an ABS data URLs.}
11 | 
12 | \item{exdir}{Target directory for downloaded files (defaults to \code{tempdir()}). Directory is
13 | created if it doesn't exist.}
14 | }
15 | \value{
16 | Downloads data from the ABS website and returns a character vector listing the location
17 |   where files are saved.
18 | }
19 | \description{
20 | Downloads specified ABS catalogue data files from the ABS website, using a valid ABS
21 |   data table URL.
22 | }
23 | \author{
24 | David Mitchell <david.pk.mitchell@gmail.com>
25 | }
26 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: raustats
 2 | Type: Package
 3 | Title: Access Data and Statistics from the ABS and RBA Websites
 4 | Description: Functions for downloading Australian economic statistics
 5 |   from the Australian Bureau of Statistics (ABS) (see <https://www.abs.gov.au/>) and
 6 |   Reserve Bank of Australia (RBA) (see <https://www.rba.gov.au/>) websites.
 7 | Version: 0.15.0
 8 | Date: 2019-12-20
 9 | Authors@R: c(
10 |   person("David", "Mitchell", email = "david.pk.mitchell@gmail.com", role = c("aut", "cre"))
11 |   )
12 | Maintainer: David Mitchell <david.pk.mitchell@gmail.com>
13 | URL: https://github.com/mitcda/raustats
14 | License: GPL-3
15 | Depends: R (>= 3.3.0), readxl
16 | Imports: stats, dplyr, httr, jsonlite, rvest, tidyr, xml2, lubridate
17 | Suggests: repo, knitr, rmarkdown, testthat, ggplot2
18 | LazyData: TRUE
19 | RoxygenNote: 7.0.2
20 | VignetteBuilder: knitr
21 | NeedsCompilation: no
22 | Encoding: UTF-8
23 | 


--------------------------------------------------------------------------------
/man/abs_metadata.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/abs-api-functions.R
 3 | \name{abs_metadata}
 4 | \alias{abs_metadata}
 5 | \title{Download dataset metadata from the ABS API}
 6 | \usage{
 7 | abs_metadata(id, lang = "en")
 8 | }
 9 | \arguments{
10 | \item{id}{ABS dataset ID.}
11 | 
12 | \item{lang}{Preferred language (default 'en' - English).}
13 | }
14 | \value{
15 | data frame in long format
16 | }
17 | \description{
18 | This function queries and returns all metadata associated with a specified dataset
19 |   from ABS.Stat.
20 | }
21 | \examples{
22 |   \donttest{
23 |     datasets <- abs_datasets();
24 |     x <- abs_metadata("CPI");
25 |     x <- abs_metadata(grep("cpi", datasets$id, ignore.case=TRUE, value=TRUE));
26 |     names(x)
27 |     y <- abs_metadata(datasets$id[1]);
28 |     names(y)
29 |   }
30 | }
31 | \author{
32 | David Mitchell <david.pk.mitchell@gmail.com>
33 | }
34 | 


--------------------------------------------------------------------------------
/R/z-debugging-code.R:
--------------------------------------------------------------------------------
 1 | #' # Debugging code
 2 | #'
 3 | #' ## Debugging `abs_stats` function
 4 | #' 
 5 | 
 6 | #' ### Testing empty returns
 7 | ## Regional Statistics by ASGS 2016
 8 | ##  - Economy and Industry
 9 | ##    + Number of businesses
10 | ## library(magrittr)
11 | ## abs_id <- abs_search("regional statistics") %>%
12 | ##   filter(grepl("regional\\s*statistics.*asgs\\s*2016", name, ignore.case=TRUE));
13 | ## abs_meta <- abs_metadata(abs_id$id);
14 | ## abs_fltr <- abs_search("^total\\s*number.+business\\s*entries", dataset=abs_id$id, code_only=TRUE);
15 | 
16 | ## ## Debugging settings
17 | ## dataset <- "ABS_REGIONAL_ASGS2016"
18 | ## filter <- list(MEASURE="CABEE_6",
19 | ##                # MEASURE="CABEE_10",
20 | ##                REGIONTYPE="STE",
21 | ##                ASGS_2016=1:8);
22 | ## start_date <- 2011
23 | ## end_date <- 2018
24 | ## dimensionAtObservation <- "AllDimensions"
25 | ## detail <- "Full"
26 | ## enforce_api_limits <- TRUE
27 | 
28 | 


--------------------------------------------------------------------------------
/man/rba_file_download.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rba-functions.R
 3 | \name{rba_file_download}
 4 | \alias{rba_file_download}
 5 | \title{Function to download statistics files from the RBA website and store locally}
 6 | \usage{
 7 | rba_file_download(data_url, exdir = tempdir(), update_cache = TRUE)
 8 | }
 9 | \arguments{
10 | \item{data_url}{Character vector specifying an RBA data set URL.}
11 | 
12 | \item{exdir}{Target directory for downloaded files (defaults to \code{tempdir()}). Directory is
13 | created if it doesn't exist.}
14 | 
15 | \item{update_cache}{Logical expression, if FALSE (default), use the cached list of available
16 | RBA datasets, if TRUE, update the list of available datasets.}
17 | }
18 | \value{
19 | Downloads data from the ABS website and returns a character vector listing the location
20 |   where files are saved.
21 | }
22 | \description{
23 | This function downloads one or more RBA data files at the specified by URLs and
24 |   saves a local copy.
25 | }
26 | \author{
27 | David Mitchell <david.pk.mitchell@gmail.com>
28 | }
29 | 


--------------------------------------------------------------------------------
/man/abs_dimensions.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/abs-api-functions.R
 3 | \name{abs_dimensions}
 4 | \alias{abs_dimensions}
 5 | \title{Return available dimensions of ABS series}
 6 | \usage{
 7 | abs_dimensions(dataset, update_cache = FALSE)
 8 | }
 9 | \arguments{
10 | \item{dataset}{Character vector of dataset codes. These codes correspond to the
11 | \code{indicatorID} column from the indicator data frame of \code{abs_cache} or
12 | \code{abs_cachelist}, or the result of \code{abs_indicators}.}
13 | 
14 | \item{update_cache}{Logical expression, if FALSE (default), use the cached list of available
15 | ABS.Stat datasets, if TRUE, update the list of available datasets.}
16 | }
17 | \value{
18 | a data frame with available dataset dimensions.
19 | }
20 | \description{
21 | This function returns the available dimeninsions for a specified ABS API dataset.
22 | }
23 | \examples{
24 |   \donttest{
25 |     ## CPI - Consumer Price Index
26 |     x <- abs_dimensions("CPI");
27 |     str(x)
28 |     ## LF - Labour Force
29 |     x <- abs_dimensions("LF");
30 |     str(x)
31 |   }
32 | }
33 | \author{
34 | David Mitchell <david.pk.mitchell@gmail.com>
35 | }
36 | 


--------------------------------------------------------------------------------
/man/abs_cat_releases.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/abs-cat-functions.R
 3 | \name{abs_cat_releases}
 4 | \alias{abs_cat_releases}
 5 | \title{Return ABS catalogue table releases}
 6 | \usage{
 7 | abs_cat_releases(cat_no, include_urls = FALSE)
 8 | }
 9 | \arguments{
10 | \item{cat_no}{ABS catalogue numbers.}
11 | 
12 | \item{include_urls}{Include full path URL to specified ABS catalogue releases. Default (FALSE)
13 | does not include release URLs.}
14 | }
15 | \value{
16 | Returns a data frame listing available ABS catalogue releases.
17 | }
18 | \description{
19 | Return list of all releases available for specified ABS catalogue number.
20 | }
21 | \examples{
22 |   \donttest{
23 |     ## List all available quarterly National Accounts tables
24 |     ana_releases <- abs_cat_releases("5206.0");
25 |     ana_release_urls <- abs_cat_releases("5206.0", include_urls=TRUE);
26 |   
27 |     ## List latest available CPI Time Series Spreadsheet tables only
28 |     cpi_releases <- abs_cat_releases("6401.0");
29 |     cpi_release_urls <- abs_cat_releases("6401.0", include_urls=TRUE);
30 |   }
31 | }
32 | \author{
33 | David Mitchell <david.pk.mitchell@gmail.com>
34 | }
35 | 


--------------------------------------------------------------------------------
/man/rba_stats.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rba-functions.R
 3 | \name{rba_stats}
 4 | \alias{rba_stats}
 5 | \title{Return data for a specified RBA time series}
 6 | \usage{
 7 | rba_stats(table_no, pattern, url, update_cache = FALSE, ...)
 8 | }
 9 | \arguments{
10 | \item{table_no}{Character vector specifying one or more RBA table numbers to download.}
11 | 
12 | \item{pattern}{Character string or regular expression to be matched.}
13 | 
14 | \item{url}{Valid URL for RBA dataset (Excel format only).}
15 | 
16 | \item{update_cache}{Logical expression, if FALSE (default), use the cached list of available
17 | RBA datasets, if TRUE, update the list of available datasets.}
18 | 
19 | \item{...}{Other arguments to \code{\link{rba_search}}, e.g. \code{series_type = "statistical_tables"}.}
20 | }
21 | \value{
22 | data frame in long format
23 | }
24 | \description{
25 | Function to download and return specified RBA time series data.
26 | }
27 | \examples{
28 |   \donttest{
29 |     ## Example - Selecting by table_no
30 |     x <- rba_stats("A1");
31 | 
32 |     ## Example - Selecting by pattern
33 |     x <- rba_stats(pattern="Liabilities and Assets");
34 |   }
35 | }
36 | \author{
37 | David Mitchell <david.pk.mitchell@gmail.com>
38 | }
39 | 


--------------------------------------------------------------------------------
/man/rba_search.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rba-functions.R
 3 | \name{rba_search}
 4 | \alias{rba_search}
 5 | \title{Return list of data tables from RBA website}
 6 | \usage{
 7 | rba_search(
 8 |   pattern,
 9 |   fields = c("table_no", "table_name"),
10 |   series_type = "statistical tables",
11 |   ignore.case = TRUE,
12 |   update_cache = FALSE
13 | )
14 | }
15 | \arguments{
16 | \item{pattern}{Character string or regular expression to be matched}
17 | 
18 | \item{fields}{Character vector of column names through which to search. By default, the function
19 | searches 'table_no' and 'table_name'.}
20 | 
21 | \item{series_type}{Character vector specifying one or more one of 'statistical tables', 'historical data' or
22 | 'discontinued data'. By default, \code{series_type = 'statistical tables'}.}
23 | 
24 | \item{ignore.case}{Case senstive pattern match or not.}
25 | 
26 | \item{update_cache}{Logical expression, if FALSE (default), use the cached list of available
27 | RBA tables (\code{rba_cachelist}), if TRUE, update the list of available datasets.}
28 | }
29 | \value{
30 | data frame in long format
31 | }
32 | \description{
33 | Function to return a list of all RBA data tables.
34 | }
35 | \examples{
36 |  rba_datasets <- rba_search(pattern = "Liabilities and Assets");
37 | }
38 | \author{
39 | David Mitchell <david.pk.mitchell@gmail.com>
40 | }
41 | 


--------------------------------------------------------------------------------
/man/abs_read_tss.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/abs-cat-functions.R
 3 | \name{abs_read_tss}
 4 | \alias{abs_read_tss}
 5 | \alias{abs_read_tss_}
 6 | \title{Extract data from an ABS time series data file}
 7 | \usage{
 8 | abs_read_tss(files, type = "tss", na.rm = TRUE)
 9 | 
10 | abs_read_tss_(file, type = "tss", na.rm = na.rm)
11 | }
12 | \arguments{
13 | \item{files}{Names of one or more ABS data files}
14 | 
15 | \item{type}{One of either 'tss' -- ABS Time Series Spreadsheet (the Default) or 'css' -- Data
16 | Cube.R}
17 | 
18 | \item{na.rm}{logical. If \code{TRUE} (default), remove observations containing missing values.}
19 | }
20 | \value{
21 | data frame in long format
22 | }
23 | \description{
24 | This function extracts time series data from ABS data files.
25 | 
26 | This is the internal function that extracts time series data from ABS data files.
27 | }
28 | \examples{
29 |   \donttest{
30 |     ## Read specified ABS Excel time series files
31 |     tables <- abs_cat_tables("5206.0", releases="Latest", include_urls=TRUE);
32 |     downloaded_tables <- abs_cat_download(tables$path_zip, exdir=tempdir())
33 |     extracted_files <- abs_cat_unzip(downloaded_tables)
34 |     x <- abs_read_tss(extracted_files);
35 |   }
36 | }
37 | \author{
38 | David Mitchell <david.pk.mitchell@gmail.com>
39 | 
40 | David Mitchell <david.pk.mitchell@gmail.com>
41 | }
42 | \keyword{internal}
43 | 


--------------------------------------------------------------------------------
/R/httr-utilities.R:
--------------------------------------------------------------------------------
 1 | ## httr settings
 2 | #' @name raustats_ua
 3 | #' @title raustats package user agent
 4 | #' @description This function specifies the package user agent, and is used inside
 5 | #'   GET/POST function calls
 6 | #' @importFrom httr user_agent
 7 | #' @return a list with a base url and a url section for formatting the JSON API calls
 8 | #' @author David Mitchell <david.pk.mitchell@@gmail.com>
 9 | #' @keywords internal
10 | raustats_ua <- function()
11 |   user_agent("http://github.com/mitcda/raustats")
12 | 
13 | 
14 | ## Check if the specified ABS/RBA URL is available
15 | #' @name raustats_check_url_available
16 | #' @title Check specified ABS/RBA URL available
17 | #' @description Function to ensure URL calls fail gracefully with an informative message if the
18 | #'   resource is not available (and not give a check warning nor error).
19 | #' @importFrom httr GET status_code
20 | #' @param url The base URL to check.
21 | #' @return \code{TRUE} if the API is available, otherwise \code{stop()} is called.
22 | #' @note Based on code in \code{opensensmapR} (\url{https://github.com/sensebox/opensensmapR/blob/f69cf62b2771d5b6ed605c04b7ddd618f5a272c2/R/api.R}{\code{api.R}}).
23 | #' @keywords internal
24 | raustats_check_url_available <- function(url) {
25 |   code = FALSE
26 |   try({ code = status_code(GET(url, raustats_ua())) }, silent = TRUE)
27 |   
28 |   if (code == 200)
29 |     return(NULL)
30 |   
31 |   if (code != FALSE) {
32 |     errtext = sprintf("The API at %s is currently not available. (HTTP code %s)", url, code)
33 |     stop(paste(errtext, collapse='\n'), call. = FALSE)
34 |   }
35 | }
36 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(abs_cat_download)
 4 | export(abs_cat_releases)
 5 | export(abs_cat_stats)
 6 | export(abs_cat_tables)
 7 | export(abs_cat_unzip)
 8 | export(abs_datasets)
 9 | export(abs_dimensions)
10 | export(abs_metadata)
11 | export(abs_read_tss)
12 | export(abs_search)
13 | export(abs_stats)
14 | export(rba_file_download)
15 | export(rba_read_tss)
16 | export(rba_search)
17 | export(rba_stats)
18 | export(rba_table_cache)
19 | importFrom(dplyr,bind_rows)
20 | importFrom(dplyr,case_when)
21 | importFrom(dplyr,left_join)
22 | importFrom(httr,GET)
23 | importFrom(httr,content)
24 | importFrom(httr,http_error)
25 | importFrom(httr,http_status)
26 | importFrom(httr,http_type)
27 | importFrom(httr,progress)
28 | importFrom(httr,status_code)
29 | importFrom(httr,user_agent)
30 | importFrom(httr,write_disk)
31 | importFrom(jsonlite,fromJSON)
32 | importFrom(lubridate,ceiling_date)
33 | importFrom(lubridate,days)
34 | importFrom(readxl,excel_sheets)
35 | importFrom(readxl,read_excel)
36 | importFrom(rvest,follow_link)
37 | importFrom(rvest,html_attr)
38 | importFrom(rvest,html_nodes)
39 | importFrom(rvest,html_session)
40 | importFrom(rvest,html_table)
41 | importFrom(rvest,html_text)
42 | importFrom(rvest,jump_to)
43 | importFrom(stats,complete.cases)
44 | importFrom(stats,setNames)
45 | importFrom(tidyr,gather)
46 | importFrom(utils,unzip)
47 | importFrom(utils,zip)
48 | importFrom(xml2,as_list)
49 | importFrom(xml2,read_html)
50 | importFrom(xml2,read_xml)
51 | importFrom(xml2,xml_attr)
52 | importFrom(xml2,xml_attrs)
53 | importFrom(xml2,xml_child)
54 | importFrom(xml2,xml_children)
55 | importFrom(xml2,xml_find_all)
56 | importFrom(xml2,xml_length)
57 | importFrom(xml2,xml_name)
58 | importFrom(xml2,xml_ns_strip)
59 | importFrom(xml2,xml_parent)
60 | importFrom(xml2,xml_text)
61 | 


--------------------------------------------------------------------------------
/man/abs_cat_stats.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/abs-cat-functions.R
 3 | \name{abs_cat_stats}
 4 | \alias{abs_cat_stats}
 5 | \title{Get ABS catalogue series data}
 6 | \usage{
 7 | abs_cat_stats(
 8 |   cat_no,
 9 |   tables = "All",
10 |   releases = "Latest",
11 |   types = "tss",
12 |   na.rm = TRUE
13 | )
14 | }
15 | \arguments{
16 | \item{cat_no}{Character vector specifying one or more ABS collections or catalogue numbers to
17 | download.}
18 | 
19 | \item{tables}{A character vector of regular expressions denoting tables to download. The default
20 | ('All') downloads all time series spreadsheet tables for each specified catalogue. Use a list
21 | to specify different table sets for each specified ABS catalogue number.}
22 | 
23 | \item{releases}{Date or character string object specifying the month and year denoting which
24 | release to download. Default is "Latest", which downloads the latest available data. See
25 | examples for further details.}
26 | 
27 | \item{types}{One of either 'tss' -- ABS time series spreadsheet (the default) or 'css' -- ABS
28 | data cube (cross-section spreadsheet).}
29 | 
30 | \item{na.rm}{logical (default: \code{TRUE}) - remove observations containing missing values.}
31 | }
32 | \value{
33 | data frame in long format
34 | }
35 | \description{
36 | This function downloads ABS catalogue series statistics, by ABS catalogue number.
37 | }
38 | \examples{
39 |   \donttest{
40 |     ## Download quarterly Australian National Accounts, Tables 1 & 2 
41 |     ana_q <- abs_cat_stats("5206.0", tables=c("Table 1\\\\W+", "Table 2\\\\W+"));
42 | 
43 |     ## Download December 2017 Australian National Accounts, Table 1
44 |     ana_q_2017q4 <- abs_cat_stats("5206.0", tables="Table 1\\\\W+", release="Dec 2017");
45 |   }
46 | }
47 | \author{
48 | David Mitchell <david.pk.mitchell@gmail.com>
49 | }
50 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
 1 | # Release version 0.15.0
 2 | 
 3 | ## New submission
 4 | 
 5 | ### Release summary
 6 | 
 7 | This is a minor release
 8 | 
 9 | 
10 | ### Test environments
11 | * local Debian Linux 10, R 3.6.1 and R-devel
12 | * win-builder (release and devel)
13 | 
14 | 
15 | ### R CMD check results
16 | There were no ERRORs or WARNINGs.
17 | 
18 | There was 1 NOTE:
19 | 
20 | * checking CRAN incoming feasibility ... NOTE
21 |     Maintainer: ‘David Mitchell <david.pk.mitchell@gmail.com>’
22 |  
23 | New submission
24 | 
25 | Package was archived on CRAN
26 | 
27 | CRAN repository db overrides:
28 |   X-CRAN-Comment: Archived on 2019-12-19 for policy violation.
29 | 
30 |   On Internet access.
31 | 
32 | - Added functionality to ensure URL calls fail gracefully with an 
33 |   informative message if the resource is not available.
34 | 
35 | 
36 | ### Downstream dependencies
37 | There are currently no downstream dependencies for this package.
38 | 
39 | 
40 | 
41 | # Initial submission - version 0.1.0
42 | 
43 | ## New submission 
44 | 
45 | ### Resubmission
46 | 
47 | This is a resubmission.  In this version I have:
48 | 
49 | * Added links to the websites described in the Description text:
50 |   <https://www.rba.gov.au/> and <https://www.rba.gov.au/>.
51 | 
52 | * Replaced all instances of \dontrun{} with \donttest{} in Rd-files.
53 | 
54 | * Removed examples for all unexported functions.
55 | 
56 | 
57 | ### Test environments
58 | * local Debian Linux 4.18, R 3.5.2 and R-devel
59 | * win-builder (release and devel)
60 | 
61 | 
62 | ### R CMD check results
63 | There were no ERRORs or WARNINGs.
64 | 
65 | There was 1 NOTE:
66 | 
67 | * checking CRAN incoming feasibility ... NOTE
68 |     Maintainer: ‘David Mitchell <david.pk.mitchell@gmail.com>’
69 |  
70 |   New submission
71 | 
72 | 
73 | ### Downstream dependencies
74 | There are currently no downstream dependencies for this package.
75 | 
76 | 
77 | 
78 | 


--------------------------------------------------------------------------------
/man/abs_search.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/abs-api-functions.R
 3 | \name{abs_search}
 4 | \alias{abs_search}
 5 | \title{Search dataset information from the ABS.Stat API}
 6 | \usage{
 7 | abs_search(
 8 |   pattern,
 9 |   dataset = NULL,
10 |   ignore.case = TRUE,
11 |   code_only = FALSE,
12 |   update_cache = FALSE
13 | )
14 | }
15 | \arguments{
16 | \item{pattern}{Character string or regular expression to be matched.}
17 | 
18 | \item{dataset}{Character vector of ABS.Stat dataset codes. These codes correspond to the
19 | \code{indicatorID} column from the indicator data frame of \code{abs_cache} or
20 | \code{abs_cachelist}, or the result of \code{abs_indicators}. If NULL (default), then function
21 | undertakes a dataset mode search. If not NULL, function searches all dimensions of specified
22 | dataset.}
23 | 
24 | \item{ignore.case}{Case senstive pattern match or not.}
25 | 
26 | \item{code_only}{If FALSE (default), all column/fields are returned. If TRUE, only the dataset
27 | identifier or indicator code are returned.}
28 | 
29 | \item{update_cache}{Logical expression, if FALSE (default), use the cached list of available
30 | ABS.Stat datasets, if TRUE, update the list of available datasets.}
31 | }
32 | \value{
33 | A data frame with datasets and data items that match the search pattern.
34 | }
35 | \description{
36 | This function finds datasets or dimensions within a specific that match a specified
37 |   regular expresion and returns matching results.
38 | }
39 | \note{
40 | With acknowledgements to \code{wb_search} function.
41 | }
42 | \examples{
43 |  ## ABS dataset search
44 |  x <- abs_search(pattern = "consumer price index")
45 |  x <- abs_search(pattern = "census")
46 |  x <- abs_search(pattern = "labour force")
47 | 
48 |  ## ABS indicator search
49 |  x <- abs_search(pattern = "all groups", dataset="CPI")
50 |  x <- abs_search(pattern = c("all groups", "capital cities"), dataset="CPI")
51 | 
52 | }
53 | \author{
54 | David Mitchell <david.pk.mitchell@gmail.com>
55 | }
56 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | # raustats 0.15.0
 2 | ---------------------------------------------------------------------
 3 | 
 4 | ## Changes:
 5 | 
 6 | * `abs_cat_stats` includes new argument `na.rm` to provide option to remove rows
 7 |   with `NA` values.
 8 | 
 9 | * `abs_read_tss` includes new argument `na.rm` to provide option to remove rows
10 |   with `NA` values.
11 | 
12 | * `abs_cat_download` now includes PDF files in set of downloadable ABS catalogue
13 |   file types.
14 | 
15 | * `abs_cat_releases` a new function that returns the set of all available
16 |   releases for a specified ABS catalogue number.
17 | 
18 | * `abs_cat_tables` includes internal changes that specify separate columns for
19 |   Excel, Zip and PDF resource URLs.
20 | 
21 | * `abs_stats` includes new option `return_json` which enables return of data in
22 |   raw JSON format.
23 | 
24 | * `rba_search` (and by extension `rba_stats`) now includes new option
25 |   `series_type` which enables user to list only current *statistical tables*
26 |   (the default), *historical data* or *discontinued data*.
27 | 
28 | 
29 | ## Bug fixes:
30 | 
31 | * `abs_cat_stats` now avoids multiple file downloads and applies `abs_cat_unzip`
32 |   only to compressed files.
33 | 
34 | * `abs_cat_tables` includes revisions that correct errors thrown by
35 |   `abs_cat_tables` and `abs_cat_stats` for some ABS catalogue numbers
36 |   (e.g. 8731.0 and 3105.0.65.001).
37 | 
38 | * `abs_stats` now gracefully handles zero-length (empty) returns.
39 | 
40 | * `rba_stats` now downloads only current *statistical tables* by
41 |   default. Previously, `rba_stats` would attempt to read all tables meeting
42 |   search criteria, and fail in cases involving a mix of *statistical tables*,
43 |   *historical data* and/or *discontinued data*. (Reported by David Stephan.)
44 | 
45 | * Added functionality to ensure URL calls fail gracefully with an informative
46 |   message if the resource is not available.
47 | 
48 | 
49 | 
50 | # raustats 0.1.0
51 | ---------------------------------------------------------------------
52 | 
53 | * Initial package release
54 | 


--------------------------------------------------------------------------------
/R/data.R:
--------------------------------------------------------------------------------
 1 | ### Data sets
 2 | 
 3 | #' @name rba_cachelist
 4 | #' @title Cached list of statistical tables provided by the RBA
 5 | #' @description This data is a cached result of the \code{\link{rba_table_cache}} function. By
 6 | #'   default functions \code{\link{rba_search}} and \code{\link{rba_stats}} use this data if the
 7 | #'   \code{update_cache} parameter is \code{TRUE}.
 8 | #' 
 9 | #' @format A data frame containing three columns:
10 | #' \itemize{
11 | #'   \item \code{table_code} RBA table code.
12 | #'   \item \code{table_name} RBA table name.
13 | #'   \item \code{table_type} One of either current statistical tables, historical data or discontinued data
14 | #'   \item \code{url} RBA URL 
15 | #' }
16 | "rba_cachelist"
17 | 
18 | 
19 | #' @name abs_cat_cachelist
20 | #' @title List of ABS catalogue tables
21 | #' @description This data set provides a list of the most common ABS catalogue tables.
22 | #'
23 | #' @format A data frame containing five columns:
24 | #' \itemize{
25 | #'   \item \code{publication_title} ABS publication title.
26 | #'   \item \code{catalogue_no} ABS catalogue number.
27 | #'   \item \code{abs_url} ABS URL.
28 | #'   \item \code{last_updated} Publication last updated.
29 | #'   \item \code{type} Publication type -- one of either 'time series', 'panel' or 'summary'.RBA URL 
30 | #' }
31 | "abs_cat_cachelist"
32 | 
33 | 
34 | #' @name abs_cachelist
35 | #' @title Datasets available through the ABS API
36 | #' @description This data set provides a list of all datasets, and the associated metadata,
37 | #'   available through the ABS API.
38 | #' @format A data frame containing three columns:
39 | #' \itemize{
40 | #' \item \code{id} ABS dataset identifier.
41 | #' \item \code{agencyID} Source agency identifier (ABS).
42 | #' \item \code{name} ABS dataset name.
43 | #' }
44 | "abs_cachelist"
45 | 
46 | 
47 | #' @name aus_state_codes
48 | #' @title Table of Australian state and territory codes
49 | #' @description A list of Australian state and territory codes (including code 0 -- Australia)
50 | #'
51 | #' @format A data frame containing three columns:
52 | #' \itemize{
53 | #'   \item \code{state_code} One-digit state code.
54 | #'   \item \code{state_abb} State/territory abbreviation.
55 | #'   \item \code{state_name} State/territory name.
56 | #' }
57 | "aus_state_codes"
58 | 
59 | 


--------------------------------------------------------------------------------
/man/abs_cat_tables.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/abs-cat-functions.R
 3 | \name{abs_cat_tables}
 4 | \alias{abs_cat_tables}
 5 | \title{Return ABS catalogue tables}
 6 | \usage{
 7 | abs_cat_tables(
 8 |   cat_no,
 9 |   releases = "Latest",
10 |   types = c("tss", "css"),
11 |   include_urls = FALSE
12 | )
13 | }
14 | \arguments{
15 | \item{cat_no}{ABS catalogue numbers.}
16 | 
17 | \item{releases}{Date or character string object specifying the month and year denoting which
18 | release to download. Default is "Latest", which downloads the latest available data. See
19 | examples for further details.}
20 | 
21 | \item{types}{ABS publication types to return. Permissable options include one or more of: 'tss'
22 | -- ABS Time Series Spreadsheets, 'css' - ABS Data Cubes and 'pub' -- ABS Publications. The
23 | default returns all Time Series Spreadsheets and Data Cubes.}
24 | 
25 | \item{include_urls}{Include full URLs to returned ABS data files. Default (FALSE) does not
26 | include data file URLs.}
27 | }
28 | \value{
29 | Returns a data frame listing the data collection tables and URLs for Excel (column:
30 |   \code{path_xls}) and, if available, Zip (column: \code{path_zip}) files.
31 | }
32 | \description{
33 | Return list of data tables available from specified ABS catalogue number.
34 | }
35 | \examples{
36 |   \donttest{
37 |     ## List latest available quarterly National Accounts tables
38 |     ana_tables <- abs_cat_tables("5206.0", releases="Latest");
39 |     ana_tables_url <- abs_cat_tables("5206.0", releases="Latest", include_urls=TRUE);
40 | 
41 |     ## List latest available CPI Time Series Spreadsheet tables only
42 |     cpi_tables <- abs_cat_tables("6401.0", releases="Latest", types="tss");
43 |     cpi_tables_url <- abs_cat_tables("5206.0", releases="Latest", types="tss", include_urls=TRUE);
44 |   
45 |     ## List latest available ASGS Volume 3 Data Cubes
46 |     asgs_vol3_tables <- abs_cat_tables("1270.0.55.003", releases="Latest", types="css");
47 |     asgs_vol3_tables_url <- abs_cat_tables("1270.0.55.003", releases="Latest",
48 |                                            types="css", include_urls=TRUE);
49 |   
50 |     ## List latest available ASGS ANZSIC publications (PDF) files
51 |     anzsic_2006 <- abs_cat_tables("1292.0", releases="Latest", types="pub", include_urls=TRUE);
52 |   }
53 | }
54 | \author{
55 | David Mitchell <david.pk.mitchell@gmail.com>
56 | }
57 | 


--------------------------------------------------------------------------------
/tests/testthat/test-url-availability.R:
--------------------------------------------------------------------------------
 1 | context("raustats URL checking functions")
 2 | 
 3 | test_that("raustats_check_url_available returns true results",
 4 | {
 5 |   skip_on_cran()
 6 |   skip_on_travis()
 7 |   skip_on_appveyor()
 8 | 
 9 |   ## Check valid ABS Catalogue URLs
10 |   expect_null(raustats_check_url_available(abs_urls()$base_url))
11 |   expect_null(raustats_check_url_available(file.path(abs_urls()$base_url,
12 |                                                      abs_urls()$ausstats_path,
13 |                                                      mf_path = "mf")));
14 |   expect_null(raustats_check_url_available(file.path(abs_urls()$base_url,
15 |                                                      abs_urls()$ausstats_path,
16 |                                                      abs_urls()$mf_path,
17 |                                                      "5206.0")));
18 | 
19 |   ## Check valid ABS API URLs
20 |   expect_null(raustats_check_url_available(file.path(abs_api_urls()$base_url)));
21 | 
22 |   ## Check valid RBA paths
23 |   expect_null(raustats_check_url_available(rba_urls()$base_url));
24 |   expect_null(raustats_check_url_available(file.path(rba_urls()$base_url,
25 |                                                      rba_urls()$stats_path)));
26 |   expect_null(raustats_check_url_available(file.path(rba_urls()$base_url,
27 |                                                      rba_urls()$stats_path,
28 |                                                      rba_urls()$tables_path)));
29 | })
30 | 
31 | 
32 | test_that("raustats_check_url_available fails gracefully",
33 | {
34 |   skip_on_cran()
35 |   skip_on_travis()
36 |   skip_on_appveyor()
37 | 
38 |   ## Test mis-specified ABS Catalogue URL
39 |   expect_error(raustats_check_url_available(file.path(abs_urls()$base_url,
40 |                                                       abs_urls()$ausstats_path)));
41 |   
42 |   ## Test mis-specified ABS API URL
43 |   expect_error(raustats_check_url_available(file.path(abs_api_urls()$base_url,
44 |                                                       abs_api_urls()$datastr_path,
45 |                                                       abs_api_urls()$sdmx_json_path)));
46 |   
47 |   ## Test mis-specified RBA URLs
48 |   expect_error(raustats_check_url_available(file.path(rba_urls()$base_url,
49 |                                                       rba_urls()$stats_path,
50 |                                                       "Table_1")));
51 | })
52 | 


--------------------------------------------------------------------------------
/TODO:
--------------------------------------------------------------------------------
 1 | -*- mode: markdown -*- 
 2 | 
 3 | # TO DO - Feature requests
 4 | 
 5 | * Enable `releases="July 2016"` to work for the latest releases in
 6 |   `abs_cat_tables` and `abs_cat_stats`.
 7 | 
 8 | 
 9 | ## Completed functions
10 | 
11 | * Add argument: `format = c("table", "raw")` argument to `abs_stats` function to
12 |   provide option to return results as raw JSON
13 | 
14 | * Add functionality to check for zero-length data sets returned by `abs_stats`,
15 |   to avoid failing on bind_rows step. [COMPLETED 2019-12-20]
16 | 
17 | * The ABS Engineering Construction tables seem to require that the
18 |   `sub(regex_table_name, ...)` has `ignore.case = TRUE` (to capture "TABLE") -
19 |   [COMPLETED 2018-09-15]
20 | 
21 | * Add function to list all available releases for a specified catalogue
22 |   number. [COMPLETED 2019-12-20 - `abs_cat_releases`]
23 | 
24 | * Fix path variables in `abs_cat_tables` function.
25 | 
26 | * Fix `rba_stats` function - error on loading tables D2 & G3, among possible
27 |   others. (Reported by David Stephan ) - [COMPLETED 2019-12-20]
28 | 
29 | 
30 | ## ABS API functions
31 | 
32 | * `abs_stats function` - Add `simplify` argument to `abs_stats` function to
33 |     provide option to return raw sdmx-json format output
34 | 
35 | * Simplify `abs_cachetable` to include only a list of datasets and call
36 |   abs_metadata each time detailed dataset information is required.
37 | 
38 | * Wrap all url calls inside a `try`/`catch`
39 |   - Packages which use Internet resources should fail gracefully with an informative message
40 |     if the resource is not available
41 | 
42 | * Test all ABS catalogue functions
43 | 
44 | * `abs_cat_stats`
45 |   - Insert code to handle Date class `releases` arguments, e.g. releases=as.Date("2017-12-01").
46 |   - Add `header_start` and `header_rows` argument to `abs_cat_stats` function.
47 |   - Add `return_urls` functionality to `abs_cat_stats` function - [DONE 2018-11-01]
48 | 
49 | * `abs_cat_tables`
50 |   - Add code to handle varying number of paths returned by `abs_cat_tables`
51 |   - Remove Released 'dd/mm/yyyy' columns from final results.
52 |   - Modify `abs_cat_tables` function to list tables for specified catalogue numbers. [DONE 2018-11-05]
53 |   - Generalise `abs_cat_tables` function to handle non-time series results with only one column of URLs. [DONE 2018-11-06]
54 | 
55 | * `abs_cat_unzip`
56 |   - Added exdir argument - [DONE 2018-11-01]
57 | 
58 | * `abs_cat_download`
59 |   - Added exdir argument - [DONE 2018-11-01]
60 | 
61 | * Candidate additional arguments for `abs_stats`
62 | #' `@param include_lastUpdated = FALSE` A character vector of regular expressions denoting
63 | #'   tables to download. The default ('All') downloads all time series spreadsheet tables for each
64 | #'   specified catalogue. Use a list to specify different table sets for each specified ABS
65 | #'   catalogue number.
66 | 
67 | 
68 | * Check warning messages issued by `rba_stats(url = ...)`:
69 | #' Warning messages:
70 | #' 1: In eval(substitute(list(...)), `_data`, parent.frame()) :
71 | #' NAs introduced by coercion
72 | 


--------------------------------------------------------------------------------
/data-raw/ABS-TSS-Catalogue-Numbers.csv:
--------------------------------------------------------------------------------
 1 | publication_title,catalogue_no,abs_url,publication_url,last_updated,type
 2 | Australian Demographic Statistics,"3101.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/3101.0,NA,time series
 3 | Overseas Arrivals and Departures,"3401.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/3401.0,NA,time series
 4 | Australian System of National Accounts,"5204.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/5204.0,NA,time series
 5 | "National Accounts: National Income, Expenditure and Product","5206.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/5206.0,NA,time series
 6 | National Accounts: Finance and Wealth,"5232.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/5232.0,NA,time series
 7 | Balance of Payments and International Investment,"5302.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/5302.0,NA,time series
 8 | International Trade in Goods and Services,"5368.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/5368.0,NA,time series
 9 | Labour Force,"6202.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/6202.0,NA,time series
10 | Average Weekly Earnings,"6302.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/6302.0,NA,time series
11 | Employee Earnings and Hours,"6306.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/6306.0,NA,time series
12 | Wage Price Indexes,"6345.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/6345.0,NA,time series
13 | Consumer Price Index,"6401.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/6401.0,NA,time series
14 | Producer Price Indexes,"6427.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/6427.0,NA,time series
15 | Retail Trade,"8501.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/8501.0,NA,time series
16 | Engineering Construction,"8762.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/8762.0,NA,time series
17 | Australian Historical Population Statistics,"3105.0.65.001",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/3105.0.65.001,NA,panel
18 | "Population Projections, Australia","3222.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/3222.0,NA,panel
19 | "Regional Population Growth, Australia","3218.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/3218.0,NA,panel
20 | "Regional Population Growth, Australia","3236.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/3218.0,NA,panel
21 | "Value of Principal Agricultural Commodities Produced, Australia","7501.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/7501.0,NA,summary
22 | "Value of Agricultural Commodities Produced, Australia","7503.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/7503.0,NA,summary
23 | "Principal Agricultural Commodities, Australia, Preliminary","7111.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/7111.0,NA,summary
24 | "Agricultural Commodities, Australia","7121.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/7121.0,NA,summary
25 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | <!-- README.md is generated from README.Rmd. Please edit that file -->
 3 | 
 4 | # raustats: An R package for accessing ABS and RBA statistics
 5 | 
 6 | An R package for downloading Australian economic statistics from the
 7 | Australian Bureau of Statistics (ABS) and Reserve Bank of Australia
 8 | (RBA) websites.
 9 | 
10 | ## Installation
11 | 
12 | You can install the released version of raustats from
13 | [CRAN](https://cran.r-project.org) with:
14 | 
15 | ``` r
16 | install.packages("raustats")
17 | ```
18 | 
19 | or the latest development version from github with:
20 | 
21 | ``` r
22 | devtools::install_github("mitcda/raustats")
23 | ```
24 | 
25 | ## How to use raustats
26 | 
27 | To learn more about the raustats package, start with the vignettes:
28 | 
29 | ``` r
30 | browseVignettes(package = "raustats")
31 | ```
32 | 
33 | ## Introduction
34 | 
35 | The [Australian Bureau of Statistics (ABS)](http://www.abs.gov.au/) is
36 | Australia’s national statistical agency, providing trusted official statistics
37 | on a wide range of economic, social, population and environmental matters of
38 | importance to Australia. Key ABS statistical collections include:
39 | 
40 |   - Australian National Accounts
41 |   - International Trade
42 |   - Consumer Price Index (CPI)
43 |   - Labour Force
44 |   - Population trends
45 | 
46 | The [Reserve Bank of Australia (RBA)](https://www.rba.gov.au/) is Australia’s
47 | central bank. In addition to its legislative responsibilities, it collects and
48 | publishes statistics on money, credit, the Australian banking systems and other
49 | relevant economic metrics. Key RBA statistics include:
50 | 
51 |   - Banking system assets and liabilities
52 |   - Money and credit statistics
53 |   - Household and business finances
54 |   - Interest rates
55 |   - Exchange rates
56 |   - Inflation and inflation expectations.
57 | 
58 | The ABS and RBA make their statistics primarily available through Excel and/or
59 | CSV spreadsheets.
60 | 
61 | This package provides functions to search and download data and statistics from
62 | the [Australian Bureau of Statistics (ABS)](http://www.abs.gov.au/) and [Reserve
63 | Bank of Australia (RBA)](https://www.rba.gov.au/) websites, as well as draft
64 | access to the [ABS.Stat](http://stat.data.abs.gov.au/) - Beta data catalogue
65 | API.
66 | 
67 | ## Examples
68 | 
69 | ### Downloading ABS Catalogue Statistics
70 | 
71 | ABS catalogue statistics may be downloaded, by catalogue number, using the
72 | `abs_cat_stats()` function. The following example downloads all Consumer Price
73 | Index (CPI) data series (ABS Catalogue no. 6401.0).
74 | 
75 | ``` r
76 | cpi_all <- abs_cat_stats("6401.0")
77 | ```
78 | 
79 | To download only the latest statistics reported in Table 1 (ABS groups Tables 1
80 | and 2), simply provide a regular expression to the `tables` argument:
81 | 
82 | ``` r
83 | cpi <- abs_cat_stats("6401.0", tables="Table.+1")
84 | ```
85 | 
86 | The package also provides functions to ABS statistics via the
87 | [ABS.Stat](http://stat.data.abs.gov.au/) Beta API. See the package help and
88 | vignettes for examples.
89 | 
90 | ### Downloading RBA data
91 | 
92 | RBA data series may be downloaded by table number, using the `rba_stats()`
93 | function. The following example downloads Table A1 - Liabilities and Assets of
94 | the RBA.
95 | 
96 | ``` r
97 | rba_bs <- rba_stats("A1")
98 | ```
99 | 


--------------------------------------------------------------------------------
/tests/testthat/test-rba-functions.R:
--------------------------------------------------------------------------------
  1 | context("RBA functions")
  2 | 
  3 | test_that("rba_stats_url returns valid URL",
  4 | {
  5 |   skip_on_cran()
  6 |   skip_on_travis()
  7 |   skip_on_appveyor()
  8 | 
  9 |   expect_type(rba_urls(), "list");
 10 |   expect_s3_class(rvest::html_session(rba_urls()$base_url), "session");
 11 | })
 12 | 
 13 | 
 14 | test_that("rba_table_cache returns data.frame class object",
 15 | {
 16 |   skip_on_cran()
 17 |   skip_on_travis()
 18 |   skip_on_appveyor()
 19 | 
 20 |   expect_s3_class(rba_table_cache(), "data.frame");
 21 | })
 22 | 
 23 | 
 24 | test_that("rba_search fails well",
 25 | {
 26 |   skip_on_cran()
 27 |   skip_on_travis()
 28 |   skip_on_appveyor()
 29 | 
 30 |   expect_error(rba_search())
 31 | })
 32 | 
 33 | 
 34 | test_that("rba_search returns valid results",
 35 | {
 36 |   skip_on_cran()
 37 |   skip_on_travis()
 38 |   skip_on_appveyor()
 39 | 
 40 |   expect_s3_class(rba_search(pattern = "Liabilities and Assets"), "data.frame");
 41 |   expect_s3_class(rba_search(pattern = "Consumer Prices"), "data.frame");
 42 |   expect_s3_class(rba_search(pattern = "Population"), "data.frame");
 43 | })
 44 | 
 45 | 
 46 | test_that("rba_search returns valid results",
 47 | {
 48 |   skip_on_cran()
 49 |   skip_on_travis()
 50 |   skip_on_appveyor()
 51 | 
 52 |   expect_s3_class(rba_search("G3"), "data.frame");
 53 |   expect_s3_class(rba_search("G3", series_type="discontinued data", update_cache=TRUE),
 54 |                   "data.frame");
 55 | })
 56 | 
 57 | 
 58 | test_that("rba_file_download returns valid data.frame",
 59 | {
 60 |   skip_on_cran()
 61 |   skip_on_travis()
 62 |   skip_on_appveyor()
 63 | 
 64 |   downloaded_tables <- rba_file_download("https://www.rba.gov.au/statistics/tables/xls/d01hist.xls")
 65 |   expect_type(downloaded_tables, "character");
 66 |   expect_match(downloaded_tables, "\\w+\\.xlsx*$");
 67 |   expect_true(all(file.exists(downloaded_tables)));
 68 | 
 69 | })
 70 | 
 71 | 
 72 | test_that("rba_read_tss returns valid data.frame",
 73 | {
 74 |   skip_on_cran()
 75 |   skip_on_travis()
 76 |   skip_on_appveyor()
 77 |   
 78 |   rba_urls <- rba_search(pattern = "Liabilities and Assets")$url
 79 |   rba_files <- sapply(rba_urls, rba_file_download);
 80 |   expect_s3_class(rba_read_tss(rba_files), "data.frame");
 81 | })
 82 | 
 83 | 
 84 | test_that("rba_stats returns valid data.frame",
 85 | {
 86 |   skip_on_cran()
 87 |   skip_on_travis()
 88 |   skip_on_appveyor()
 89 | 
 90 |   ## Test 'table_no' option function call
 91 |   expect_s3_class(rba_stats("A1"), "data.frame");
 92 |   expect_s3_class(rba_stats(table_no="A1"), "data.frame");
 93 |   ## Test 'pattern' option function call
 94 |   expect_s3_class(rba_stats(pattern="Liabilities and Assets"), "data.frame");
 95 |   ## Test 'url' option function call
 96 |   url <- "https://www.rba.gov.au/statistics/tables/xls/d01hist.xls";
 97 |   expect_false(httr::http_error(url));
 98 |   expect_s3_class(rba_stats(url=url), "data.frame");
 99 | })
100 | 
101 | 
102 | test_that("More tests that rba_stats returns valid data.frame",
103 | {
104 |   skip_on_cran()
105 |   skip_on_travis()
106 |   skip_on_appveyor()
107 | 
108 |   ## Specific table tests:
109 |   ## 1) Table G1
110 |   expect_s3_class(rba_stats(table_no = "G1"), "data.frame");
111 |   ## 2) Table D2
112 |   expect_s3_class(rba_stats(table_no = "D2"), "data.frame");
113 |   ## 3) Table G3
114 |   expect_s3_class(rba_stats(table_no = "G3"), "data.frame");
115 | })
116 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | output: github_document
  3 | ---
  4 | 
  5 | ```{r setup, include = FALSE}
  6 | knitr::opts_chunk$set(
  7 |   collapse = TRUE,
  8 |   comment = "#>",
  9 |   fig.path = "man/figures/README-",
 10 |   out.width = "100%"
 11 | )
 12 | ```
 13 | 
 14 | <!-- README.md is generated from README.Rmd. Please edit that file -->
 15 | 
 16 | # raustats: An R package for accessing ABS and RBA statistics
 17 | 
 18 | An R package for downloading Australian economic statistics from the Australian
 19 | Bureau of Statistics (ABS) and Reserve Bank of Australia (RBA) websites.
 20 | 
 21 | 
 22 | ## Installation
 23 | 
 24 | You can install the released version of `raustats` from [CRAN](https://CRAN.R-project.org) with:
 25 | 
 26 | ```{r eval=FALSE}
 27 | install.packages("raustats")
 28 | ```
 29 | or the latest development version from github with:
 30 | 
 31 | ```{r, eval = FALSE}
 32 | remotes::install_github("mitcda/raustats")
 33 | ```
 34 | 
 35 | ## How to use raustats
 36 | 
 37 | To learn more about the raustats package, start with the vignettes:
 38 | ```{r, eval = FALSE}
 39 | browseVignettes(package = "raustats")
 40 | ```
 41 | 
 42 | 
 43 | ## Introduction
 44 | 
 45 | The [Australian Bureau of Statistics (ABS)](http://www.abs.gov.au/) is
 46 | Australia’s national statistical agency, providing trusted official statistics
 47 | on a wide range of economic, social, population and environmental matters of
 48 | importance to Australia. Key ABS statistical collections include:
 49 |  
 50 |   * Australian National Accounts
 51 |   * International Trade
 52 |   * Consumer Price Index (CPI)
 53 |   * Labour Force
 54 |   * Population trends
 55 | 
 56 | 
 57 | The [Reserve Bank of Australia (RBA)](https://www.rba.gov.au/) is Australia's
 58 | central bank. In addition to its legislative responsibilities, it collects and
 59 | publishes statistics on money, credit, the Australian banking systems and other
 60 | relevant economic metrics. Key RBA statistics include:
 61 | 
 62 |   * Banking system assets and liabilities
 63 |   * Money and credit statistics
 64 |   * Household and business finances
 65 |   * Interest rates
 66 |   * Exchange rates
 67 |   * Inflation and inflation expectations.
 68 | 
 69 | The ABS and RBA make their statistics primarily available through Excel and/or
 70 | CSV spreadsheets. 
 71 | 
 72 | This package provides functions to search and download data and statistics from
 73 | the [Australian Bureau of Statistics (ABS)](http://www.abs.gov.au/) and [Reserve
 74 | Bank of Australia (RBA)](https://www.rba.gov.au/) websites, as well as draft
 75 | access to the [ABS.Stat](http://stat.data.abs.gov.au/) - Beta data catalogue
 76 | API.
 77 | 
 78 | 
 79 | ## Examples
 80 | 
 81 | ### Downloading ABS Catalogue Statistics
 82 | 
 83 | ABS catalogue statistics may be downloaded, by catalogue number, using the
 84 | `abs_cat_stats()` function. The following example downloads all Consumer Price Index
 85 | (CPI) data series (ABS Catalogue no. 6401.0).
 86 | 
 87 | ```{r eval=FALSE}
 88 | cpi_all <- abs_cat_stats("6401.0")
 89 | ```
 90 | 
 91 | To download only the latest statistics reported in Table 1 (ABS groups Tables 1 and
 92 | 2), simply provide a regular expression to the `tables` argument:
 93 | 
 94 | ```{r eval=FALSE}
 95 | cpi <- abs_cat_stats("6401.0", tables="Table.+1")
 96 | ```
 97 | 
 98 | The package also provides functions to ABS statistics via the
 99 | [ABS.Stat](http://stat.data.abs.gov.au/) Beta API. See the package help and
100 | vignettes for examples. 
101 | 
102 | 
103 | ### Downloading RBA data
104 | 
105 | RBA data series may be downloaded by table number, using the `rba_stats()`
106 | function. The following example downloads Table A1 - Liabilities and Assets of
107 | the RBA.
108 | 
109 | ```{r eval=FALSE}
110 | rba_bs <- rba_stats("A1")
111 | ```
112 | 


--------------------------------------------------------------------------------
/R/date-utilities.R:
--------------------------------------------------------------------------------
  1 | ### Function: excel2Date
  2 | #' @name excel2Date
  3 | #' @title Convert Excel numeric date to R Date object
  4 | #' @description Function to convert Excel numeric date to R Date object
  5 | #' @param x Excel-based date numeric object
  6 | #' @return Date object
  7 | ## #' @examples
  8 | ## #'   \donttest{
  9 | ## #'     raustats:::excel2Date(43445);
 10 | ## #'   }
 11 | #' @keywords internal
 12 | excel2Date <- function(x) {
 13 |   as.Date(x, origin="1899-12-30");
 14 | }
 15 | 
 16 | ### Function: quarter2Date
 17 | #' @name quarter2Date
 18 | #' @title Convert dates formatted as year-quarter to dates objects
 19 | #' @description Function to convert dates formatted as year-quarter to date-format objects
 20 | #' @param x Year-quarter date format
 21 | #' @param base.month Specifies base month for first quarter. Can be a scalar: 1,2,3 or character
 22 | #'   object: Jan, Feb, Mar.
 23 | #' @param format The input date format. Default is "\%Y-Q\%q".
 24 | #' @return This function returns a Date format object.
 25 | #' @author David Mitchell <david.pk.mitchell@@gmail.com>
 26 | ## #' @examples
 27 | ## #'   \donttest{
 28 | ## #'     x <- c("1960-Q1","1960-Q2","1960-Q3","1960-Q4","1961-Q1","1961-Q2");
 29 | ## #'     quarter2Date(x);
 30 | ## #'     quarter2Date(x, base.month="Jan");
 31 | ## #'   }
 32 | #' @keywords internal
 33 | quarter2Date <- function(x, base.month="Mar", format="%Y-Q%q")
 34 | {
 35 |   ## Check format
 36 |   if (!grepl("%Y", format) & !grepl("%q", format))
 37 |     stop("Format should contain year (%Y) and quarter (%q) regular expressions.")
 38 |   format  <- sub("(%q)", "(\\\\d)",
 39 |                  sub("(%Y)", "(\\\\d{4})", format));
 40 |   Year <- as.integer(sub(format,"\\1", x));
 41 |   Qtr <- as.integer(sub(format,"\\2", x));
 42 |   ## Re-encode month
 43 |   Mth <- if (base.month == 1 | base.month == "Jan") {
 44 |            Qtr * 3 - 2;
 45 |          } else if (base.month == 2 | base.month == "Feb") {
 46 |            Qtr * 3 - 1;
 47 |          } else if (base.month == 3 | base.month == "Mar") {
 48 |            Qtr * 3;
 49 |          } else {
 50 |            stop(paste("base.month should be either a scalar = 1,2 or 3",
 51 |                       "or a character object = \"Jan\", \"Feb\" or \"Mar\"."));
 52 |          }
 53 |   z <- as.Date(paste(Year, month.abb[Mth], "01", sep="-"), format="%Y-%b-%d");
 54 |   return(z);
 55 | }
 56 | 
 57 | 
 58 | ### Function: last_day
 59 | #' @name last_day
 60 | #' @title Set Date object to the last day of the month
 61 | #' @description Function to change the date of a Date object to the last day of the month
 62 | #' @importFrom lubridate ceiling_date days
 63 | #' @param date date object
 64 | #' @return Date object
 65 | #' @author David Mitchell <david.pk.mitchell@@gmail.com>
 66 | ## #' @examples
 67 | ## #'   \donttest{
 68 | ## #'     date <- seq.Date(as.Date("2005-06-01"), length=36, by="month");
 69 | ## #'    last_day(date)
 70 | ## #'   }
 71 | #' @keywords internal
 72 | last_day <- function(date)
 73 |   ceiling_date(date, "month") - days(1);
 74 | 
 75 | 
 76 | ### Function: fin_year
 77 | #' @name fin_year
 78 | #' @title Create financial year date object
 79 | #' @description Function to create a financial year date object
 80 | #' @param date date object
 81 | #' @param ending character string abbreviation or number denoting ending month of the financial year
 82 | #' @return Date object 
 83 | #' @author David Mitchell <david.pk.mitchell@@gmail.com>
 84 | ## #' @examples
 85 | ## #'   \donttest{
 86 | ## #'     x <- seq.Date(as.Date("2005-06-01"), length=36, by="month");
 87 | ## #'    fin_year(x)
 88 | ## #'   }
 89 | #' @keywords internal
 90 | fin_year <- function(date, ending="Jun")
 91 | {
 92 |   if (is.character(ending)) {
 93 |     if (!substr(ending,1,3) %in% month.abb)
 94 |       stop(sprintf("Invalid month supplied to ending: %s", ending))
 95 |     ending <- match(ending, month.abb);
 96 |   } else {
 97 |     if (!ending %in% 1:12)
 98 |       stop(sprintf("Invalid month supplied: %d - should be in 1:12", ending));
 99 |   }
100 | 
101 |   Year <- as.integer(format(date, "%Y"));
102 |   Month <- as.integer(format(date, "%m"));
103 |   Year <- ifelse(Month > ending, Year + 1, Year);
104 |   z <- as.Date(paste(Year, month.abb[ending], "01", sep="-"), format="%Y-%b-%d");
105 |   return(z);
106 | }
107 | 


--------------------------------------------------------------------------------
/man/abs_stats.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/abs-api-functions.R
  3 | \name{abs_stats}
  4 | \alias{abs_stats}
  5 | \title{Download data from the ABS API}
  6 | \usage{
  7 | abs_stats(
  8 |   dataset,
  9 |   filter,
 10 |   start_date,
 11 |   end_date,
 12 |   lang = c("en", "fr"),
 13 |   dimensionAtObservation = c("AllDimensions", "TimeDimension", "MeasureDimension"),
 14 |   detail = c("Full", "DataOnly", "SeriesKeysOnly", "NoData"),
 15 |   return_json = FALSE,
 16 |   return_url = FALSE,
 17 |   enforce_api_limits = TRUE,
 18 |   update_cache = FALSE
 19 | )
 20 | }
 21 | \arguments{
 22 | \item{dataset}{Character vector of ABS.Stat dataset codes. These codes correspond to the
 23 | \code{indicatorID} column from the indicator data frame of \code{abs_cache} or
 24 | \code{abs_cachelist}, or the result of \code{abs_indicators}.}
 25 | 
 26 | \item{filter}{A list that contains filter of dimensions available in the specified \code{series}
 27 | to use in the API call. If NULL, no filter is set and the query tries to return all dimensions
 28 | of the dataset. Valid dimensions to include in the list supplied to filter include: MEASURE,
 29 | REGION, INDEX, TSEST and FREQUENCY.}
 30 | 
 31 | \item{start_date}{Numeric or character. If numeric it must be in %Y form (i.e. four digit
 32 | year). For data at the sub-annual granularity the API supports a format as follows: Monthly
 33 | data -- '2016-M01', Quarterly data -- '2016-Q1', Semi-annual data -- '2016-B2', Financial year
 34 | data -- '2016-17'.}
 35 | 
 36 | \item{end_date}{Numeric or character (refer to \code{startdate}).}
 37 | 
 38 | \item{lang}{Language in which to return the results. If \code{lang} is unspecified, english is
 39 | the default.  ## @param remove_na If \code{TRUE}, remove blank or NA observations. If
 40 | \code{FALSE}, no blank or NA ## values are removed from the return.  ## @param include_unit If
 41 | \code{TRUE}, the column unit is not removed from the return. If ## \code{FALSE}, this column is
 42 | removed.  ## @param include_obsStatus If \code{TRUE}, the column obsStatus is not removed from
 43 | the return. If ## \code{FALSE}, this column is removed.}
 44 | 
 45 | \item{dimensionAtObservation}{The identifier of the dimension to be attached at the observation
 46 | level. The default order is: 'AllDimensions', 'TimeDimension' and 'MeasureDimension'.
 47 | AllDimensions results in a flat list of observations without any grouping.}
 48 | 
 49 | \item{detail}{This argument specifies the desired amount of information to be returned. Possible
 50 |   values are:
 51 | 
 52 |   \itemize{
 53 |     \item Full: all data and documentation, including annotations (default)
 54 |     \item DataOnly: attributes – and therefore groups – will be excluded
 55 |     \item SeriesKeysOnly: only the series elements and the dimensions that make up the series keys
 56 |     \item NoData: returns the groups and series, including attributes and annotations, without observations (all values = NA)
 57 |   }}
 58 | 
 59 | \item{return_json}{Logical. Default is \code{FALSE}. If \code{TRUE}, the function returns the
 60 | result in raw sdmx-json.}
 61 | 
 62 | \item{return_url}{Default is \code{FALSE}. If \code{TRUE}, the function returns the generated
 63 | request URL and does not submit the request.}
 64 | 
 65 | \item{enforce_api_limits}{If \code{TRUE} (the default), the function enforces the ABS.Stat
 66 | RESTful API limits and will not submit the query if the URL string length exceeds 1000
 67 | characters or the query would return more than 1 million records. If \code{FALSE}, the function
 68 | submits the API call regardless and attempts to return the results.}
 69 | 
 70 | \item{update_cache}{Logical expression, if FALSE (default), use the cached list of available
 71 | ABS.Stat datasets, if TRUE, update the list of available datasets.}
 72 | }
 73 | \value{
 74 | Returns a data frame of the selected series from the specified ABS dataset.
 75 | }
 76 | \description{
 77 | This function queries and returns data for a specified ABS dataset from the ABS API.
 78 | }
 79 | \note{
 80 | The data query submitted by this function uses the ABS RESTful API based on the SDMX-JSON
 81 |   standard. It has a maximum allowable character limit of 1000 characters allowed in the data
 82 |   URL.
 83 | 
 84 |   Further limitations known at this time include:
 85 |   \itemize{
 86 |     \item Only anonymous queries are supported, there is no authentication
 87 |     \item Each response is limited to no more than 1 million observations
 88 |     \item Errors are not returned in the JSON format but HTTP status codes and messages are
 89 |       set according to the Web Services Guidelines
 90 |     \item The lastNObservations parameter is not supported
 91 |     \item Observations follow the time series (or import-specific) order even if
 92 |       \code{dimensionAtObservation=AllDimensions} is used.
 93 |   }
 94 | }
 95 | \examples{
 96 |   \donttest{
 97 |     x <- abs_stats(dataset="CPI", filter="all", return_url=TRUE);
 98 |     x <- abs_stats(dataset="CPI", filter=list(MEASURE=1, REGION=c(1:8,50),
 99 |                                               INDEX=10001, TSEST=10, FREQUENCY="Q"));
100 |     x <- abs_stats(dataset="CPI", filter=list(MEASURE="all", REGION=50,
101 |                                               INDEX=10001, TSEST=10, FREQUENCY="Q"));
102 |     x <- abs_stats(dataset="CPI", filter=list(MEASURE="all", REGION=50, INDEX=10001,
103 |                                               TSEST=10, FREQUENCY="Q"), return_url=TRUE);
104 |  }
105 | }
106 | \author{
107 | David Mitchell <david.pk.mitchell@gmail.com>
108 | }
109 | 


--------------------------------------------------------------------------------
/R/z-unused-functions.R:
--------------------------------------------------------------------------------
  1 | #' @name abs_cache
  2 | #' @title Download updated list of datasets and dimensions information from the ABS API
  3 | #' @description TBC
  4 | #' @param lang Language in which to return the results. If \code{lang} is unspecified, English
  5 | #'   ('en') is the default.
  6 | #' @param progress Report download progress. Arguments accepts integer, logical or NULL. Set
  7 | #'   \code{progress} to \code{NULL} (default) to disable progress
  8 | #'   reporting. Otherwise set progress equal to integer value frequency.
  9 | #' 
 10 | #' @return A list of available ABS data series each comprising a list of available data dimensions,
 11 | #'   typically containing:
 12 | #'   \itemize{
 13 | #'     \item \code{MEASURE}: Measurement units (e.g. Persons, $ million, Index, Percentage change, etc.)
 14 | #'     \item \code{REGION}: Australian region name
 15 | #'     \item \code{INDEX}: Data item code and description
 16 | #'     \item \code{TSEST}: Time series estimate type (e.g. Original, Seasonally Adjusted, etc.)
 17 | #'     \item \code{FREQUENCY}: Available data frequency (Monthly, Quarterly, Annual)
 18 | #'     \item \code{TIME}: Available observation period index
 19 | #'     \item \code{OBS_STATUS}: Observation status notes code and description
 20 | #'       (e.g. 'r' - revised, 'q' - not available, 'u' - not applicable)
 21 | #'     \item \code{TIME_FORMAT}: Available time format (e.g. Annual, Quarterly, Monthly, Daily).
 22 | #'   }
 23 | #' 
 24 | #' @note Saving the results of this function and using it as the cache parameter in \code{abs_stats}
 25 | #'   and \code{abs_search} replaces the default cached version \code{abs_cachelist} that comes with
 26 | #'   the package. Note, however, that this function can take a long time to extract metadata for all
 27 | #'   ABS datasets (e.g. approximately 20 minutes for 400 data sets), so use sparingly. For this
 28 | #'   reason, we also recommend specifying a progress update using the \code{progress} argument
 29 | #'   (default: 10).
 30 | 
 31 | #'   Not all data returns have support for languages other than english. If the specific
 32 | #'   return does not support your requested language by default it will return NA. The options for
 33 | #'   \code{lang} on the ABS API are presently:
 34 | #'   \itemize{
 35 | #'     \item en: English
 36 | #'     \item fr: French
 37 | #'   }
 38 | #' 
 39 | #' @export
 40 | #' @author David Mitchell <david.pk.mitchell@@gmail.com>
 41 | #' @examples
 42 | #'   \donttest{
 43 | #'     z <- abs_cache(lang='en', progress=5)
 44 | #'   }
 45 | abs_cache <- function(lang="en", progress=10)
 46 | {
 47 |   x <- abs_datasets(lang=lang)
 48 |   if ( !is.null(progress) ) {
 49 |     t0 <- proc.time();
 50 |     i_report <- unique(c(seq(progress, nrow(x), by=progress), nrow(x)));
 51 |   }
 52 |   z <- lapply(seq_len(nrow(x)),
 53 |               function(i) {
 54 |                 ## Download metadata
 55 |                 y <- abs_metadata(x$id[i], lang=lang);
 56 |                 ## Add dataset id & name information as attributes
 57 |                 attr(y, "dataset") <- x$id[i];
 58 |                 attr(y, "agency") <- x$agencyID[i];
 59 |                 attr(y, "dataset_desc") <- x$name[i];
 60 |                 ## Report progress
 61 |                 if (!is.null(progress))
 62 |                   if (i %in% i_report)
 63 |                     cat(sprintf("Retrieved metadata for %d (of %d) datasets. Total time: %.2f",
 64 |                                 i, nrow(x), (proc.time() - t0)["elapsed"]), "\n");
 65 |                 return(y)
 66 |               });
 67 |   names(z) <- x$id;
 68 |   return(z);
 69 | }
 70 | 
 71 | 
 72 | #' @name abs_cachelist2table
 73 | #' @title Converts an abs_cachelist to abs_cachetable
 74 | #' @description This function converts an \code{abs_cachelist} to an \code{abs_cachetable} suitable
 75 | #'   for use with \code{\link{abs_search}}.
 76 | #' @importFrom stats setNames
 77 | #' @param cache An existing cachelist of available ABS datasets created by \code{abs_cachelist}. If
 78 | #'   \code{NULL}, uses the stored package cachelist.
 79 | #'
 80 | #' @return A table containing three columns:
 81 | #'   \itemize{
 82 | #'     \item \code{dataset}: ABS API dataset identifier.
 83 | #'     \item \code{dataset_description}: ABS API dataset description.
 84 | #'     \item \code{measure}: ABS API dataset measure identifier.
 85 | #'     \item \code{measure_description}: ABS API dataset measure description
 86 | #'   }
 87 | #' 
 88 | #' @author David Mitchell <david.pk.mitchell@@gmail.com>
 89 | #' @note This is an internal library function and is not exported.
 90 | #' @examples
 91 | #'  \donttest{
 92 | #'    abs_ct <- abs_cachelist2table(raustats::abs_cachelist)
 93 | #'  }
 94 | abs_cachelist2table <- function(cache)
 95 | {
 96 |   if (missing(cache)) 
 97 |     cache <- raustats::abs_cachelist;
 98 |   cache_table <-
 99 |     suppressWarnings(lapply(cache,
100 |                             function(x) {
101 |                               names(x) <- attr(x, "concept");
102 |                               y <- setNames(
103 |                                 data.frame(attr(x, "dataset"),
104 |                                            attr(x, "dataset_desc")##,
105 |                                            ## if(is.null(x$MEASURE$Code)) "" else x$MEASURE$Code,
106 |                                            ## if(is.null(x$MEASURE$Description)) "" else x$MEASURE$Description,
107 |                                            ## if(is.null(x$INDEX$Code)) "" else x$INDEX$Code,
108 |                                            ## if(is.null(x$INDEX$Description)) "" else x$INDEX$Description
109 |                                            ),
110 |                                 c("dataset","dataset_description"##,
111 |                                   ## "measure","measure_description",
112 |                                   ## "index","index_description"
113 |                                   ));
114 |                               return(y)
115 |                             })
116 |                      );
117 |   cache_table <- do.call(rbind, cache_table);
118 |   row.names(cache_table) <- seq_len(nrow(cache_table))
119 |   return(cache_table);
120 | }
121 | 


--------------------------------------------------------------------------------
/tests/testthat/test-abs-cat-functions.R:
--------------------------------------------------------------------------------
  1 | context("ABS Catalogue functions")
  2 | 
  3 | test_that("abs_ausstats_url returns valid URL",
  4 | {
  5 |   skip_on_cran()
  6 |   skip_on_travis()
  7 |   skip_on_appveyor()
  8 | 
  9 |   expect_type(abs_urls()$base_url, "character");
 10 |   expect_type(abs_urls()$ausstats_path, "character");
 11 |   expect_type(abs_urls()$downloads_regex, "character");
 12 |   expect_type(abs_urls()$releases_regex, "character");
 13 | })
 14 | 
 15 | 
 16 | test_that("abs_cat_tables fails well",
 17 | {
 18 |   skip_on_cran()
 19 |   skip_on_travis()
 20 |   skip_on_appveyor()
 21 |   
 22 |   invalid_cat_no <- "5205.0"
 23 |   expect_error(abs_cat_tables(invalid_cat_no));
 24 | })
 25 | 
 26 | 
 27 | test_that("abs_cat_tables returns a valid data.frame",
 28 | {
 29 |   skip_on_cran()
 30 |   skip_on_travis()
 31 |   skip_on_appveyor()
 32 | 
 33 |   ## ABS Catalogue tables - 5206.0
 34 |   abs_tables_5206 <- abs_cat_tables("5206.0")
 35 |   expect_s3_class(abs_tables_5206, "data.frame");
 36 | 
 37 |   ## ABS Catalogue tables - 5206.0, with URLs
 38 |   abs_tables_5206_url <- abs_cat_tables("5206.0", releases="Latest", include_urls=TRUE);
 39 |   expect_s3_class(abs_tables_5206_url, "data.frame");
 40 | 
 41 |   ## ABS Catalogue tables - 6401.0, types="tss"
 42 |   abs_tables_6401 <- abs_cat_tables("6401.0", releases="Latest", types="tss");
 43 |   expect_s3_class(abs_tables_6401, "data.frame");
 44 | 
 45 |   ## ABS Catalogue tables - 1270.0.55.003, types="css" 
 46 |   abs_tables_1270.0.55.003 <- abs_cat_tables("1270.0.55.003", releases="Latest", types="css");
 47 |   expect_s3_class(abs_tables_1270.0.55.003, "data.frame");
 48 | 
 49 |   ## ABS Catalogue tables - 1292, types="pub"
 50 |   abs_tables_1292 <- abs_cat_tables("1292.0", releases="Latest", types="pub", include_urls=TRUE);
 51 |   expect_s3_class(abs_tables_1292, "data.frame");
 52 | 
 53 |   ## ABS Catalogue tables - 8731
 54 |   abs_tables_8731 <- abs_cat_tables("8731.0", releases="Latest", include_urls=TRUE);
 55 |   expect_s3_class(abs_tables_8731, "data.frame");
 56 | })
 57 | 
 58 | 
 59 | test_that("abs_cat_releases fails well",
 60 | {
 61 |   skip_on_cran()
 62 |   skip_on_travis()
 63 |   skip_on_appveyor()
 64 | 
 65 |   ## Check error on invalid ABS Cat. no.
 66 |   bad_url <- "Invalid_Cat_no"
 67 |   expect_error(abs_cat_releases(bad_url));
 68 |   ## No ABS Cat. no.
 69 |   expect_error(abs_cat_releases());
 70 | })
 71 | 
 72 | 
 73 | test_that("abs_cat_releases returns a valid data.frame",
 74 | {
 75 |   skip_on_cran()
 76 |   skip_on_travis()
 77 |   skip_on_appveyor()
 78 | 
 79 |   ## ABS Catalogue releases - 5206.0
 80 |   abs_release_5206 <- abs_cat_releases("5206.0");
 81 |   expect_s3_class(abs_release_5206, "data.frame");
 82 | 
 83 |   ## ABS Catalogue releases - 5206.0, with URLs
 84 |   abs_release_5206_url <- abs_cat_releases("5206.0", include_urls=TRUE);
 85 |   expect_s3_class(abs_release_5206_url, "data.frame");
 86 | 
 87 |   ## ABS Catalogue tables - 6401.0
 88 |   abs_release_6401 <- abs_cat_releases("6401.0");
 89 |   expect_s3_class(abs_release_6401, "data.frame");
 90 | 
 91 |   ## ABS Catalogue tables - 6401.0, with URLs
 92 |   abs_release_6401_url <- abs_cat_releases("6401.0", include_urls=TRUE);
 93 |   expect_s3_class(abs_release_6401_url, "data.frame");
 94 | })
 95 | 
 96 | 
 97 | 
 98 | test_that("abs_local_filename created valid file name",
 99 | {
100 |   skip_on_cran()
101 |   skip_on_travis()
102 |   skip_on_appveyor()
103 | 
104 |   test_all <- "http://www.abs.gov.au/ausstats/meisubs.NSF/log?openagent&all_time_series_workbooks.zip&5206.0&Time%20Series%20Spreadsheet&23EA5772544F27BECA2582FE001507D1&0&Jun%202018&05.09.2018&Latest"
105 |   expect_match(abs_local_filename(test_all), "^\\w+\\.(zip|xlsx*)$");
106 | 
107 |   test_table_xls <- "http://www.abs.gov.au/ausstats/meisubs.NSF/log?openagent&5206001_key_aggregates.xls&5206.0&Time%20Series%20Spreadsheet&C1145211D5AF80E5CA2582FE0014F063&0&Jun%202018&05.09.2018&Latest"
108 |   expect_match(abs_local_filename(test_table_xls), "^\\w+\\.(zip|xlsx*)$");
109 | 
110 |   test_table_zip <- "http://www.abs.gov.au/ausstats/meisubs.NSF/log?openagent&5206001_key_aggregates.zip&5206.0&Time%20Series%20Spreadsheet&C1145211D5AF80E5CA2582FE0014F063&0&Jun%202018&05.09.2018&Latest"
111 |   expect_match(abs_local_filename(test_table_zip), "^\\w+\\.(zip|xlsx*)$");
112 | })
113 | 
114 | 
115 | test_that("abs_cat_download downloads specified table files",
116 | {
117 |   skip_on_cran()
118 |   skip_on_travis()
119 |   skip_on_appveyor()
120 |   
121 |   abs_tables_5206_url <- abs_cat_tables("5206.0", releases="Latest", include_urls=TRUE);
122 |   downloaded_tables <- abs_cat_download(head(abs_tables_5206_url$path_xls, 3), exdir=tempdir());
123 |   expect_type(downloaded_tables, "character");
124 |   expect_match(downloaded_tables, "\\w+\\.(zip|xlsx*)$");
125 |   expect_true(all(file.exists(downloaded_tables)))
126 | })
127 | 
128 | 
129 | test_that("abs_cat_unzip extracts from valid filenames",
130 | {
131 |   skip_on_cran()
132 |   skip_on_travis()
133 |   skip_on_appveyor()
134 | 
135 |   abs_tables_5206_url <- abs_cat_tables("5206.0", releases="Latest", include_urls=TRUE);
136 |   downloaded_tables <- abs_cat_download(abs_tables_5206_url$path_zip %>% .[!is.na(.)],
137 |                                         exdir=tempdir());
138 |   extracted_files <- abs_cat_unzip(downloaded_tables);
139 |   expect_type(extracted_files, "character");
140 |   expect_match(extracted_files, "\\w+\\.xlsx*$");
141 |   expect_true(all(file.exists(extracted_files)));
142 | })
143 | 
144 | 
145 | test_that("abs_read_tss returns valid data.frame",
146 | {
147 |   skip_on_cran()
148 |   skip_on_travis()
149 |   skip_on_appveyor()
150 | 
151 |   abs_tables_5206_url <- abs_cat_tables("5206.0", releases="Latest", include_urls=TRUE);
152 |   downloaded_tables <- abs_cat_download(abs_tables_5206_url$path_zip %>% .[!is.na(.)],
153 |                                         exdir=tempdir());
154 |   extracted_files <- abs_cat_unzip(downloaded_tables)
155 |   expect_s3_class(abs_read_tss(extracted_files[1]), "data.frame"); ## Extract one file
156 |   expect_s3_class(abs_read_tss(extracted_files), "data.frame");    ## Extract multiple files
157 | })
158 | 
159 | 
160 | test_that("abs_cat_stats tss call returns valid data frame",
161 | {
162 |   skip_on_cran()
163 |   skip_on_travis()
164 |   skip_on_appveyor()
165 | 
166 |   ## ABS Catalogue no. 5206.0
167 |   expect_s3_class(abs_cat_stats("5206.0", tables="Table 1\\W+"), "data.frame");
168 |   expect_s3_class(abs_cat_stats("5206.0", tables=c("Table 1\\W+", "Table 2\\W+")), "data.frame");
169 |   ## ABS Catalogue no. 6401.0
170 |   expect_s3_class(abs_cat_stats("6401.0", tables="CPI.+All Groups"), "data.frame");
171 |   expect_s3_class(abs_cat_stats("6401.0", tables="CPI.+All Groups", releases="Dec 2017"), "data.frame");
172 |   ## ABS Catalogue no. 8731.0
173 |   expect_s3_class(abs_cat_stats("8731.0", tables=c("TABLE 01\\W+", "TABLE 02\\W+")), "data.frame");
174 | })
175 | 


--------------------------------------------------------------------------------
/tests/testthat/test-abs-api-functions.R:
--------------------------------------------------------------------------------
  1 | context("ABS API functions")
  2 | 
  3 | test_that("abs_api_call creates proper url",
  4 | {
  5 |   skip_on_cran()
  6 |   skip_on_travis()
  7 |   skip_on_appveyor()
  8 | 
  9 |   expect_match(abs_api_call(path=abs_api_urls()$datastr_path, args="all"),
 10 |                "http:\\/\\/stat\\.data\\.abs\\.gov\\.au\\/.+\\/all");
 11 |   expect_false(httr::http_error(abs_api_call(path=abs_api_urls()$datastr_path, args="all")));
 12 | })
 13 | 
 14 | 
 15 | ## test_that("abs_api_call returns error if url is invalid",
 16 | ## {
 17 | ##   skip_on_cran()
 18 | ##   skip_on_travis()
 19 | ##   skip_on_appveyor()
 20 | 
 21 | ##   expect_true(httr::http_error(sub("\\.au", "",
 22 | ##                                    abs_api_call(path=abs_api_urls()$datastr_path, args="all"))));
 23 | ##   expect_true(httr::http_error(sub("Structure", "",
 24 | ##                                    abs_api_call(path=abs_api_urls()$datastr_path, args="all"))));
 25 | ## })
 26 | 
 27 | 
 28 | test_that("abs_call_api creates xml_document",
 29 | {
 30 |   skip_on_cran()
 31 |   skip_on_travis()
 32 |   skip_on_appveyor()
 33 |   
 34 |   url <- abs_api_call(path=abs_api_urls()$datastr_path, args="all");
 35 |   expect_s3_class(abs_call_api(url), "xml_document");
 36 |   expect_s3_class(abs_call_api(url), "xml_node");
 37 | })
 38 | 
 39 | 
 40 | test_that("abs_datasets returns object of class data.frame with specified names",
 41 | {
 42 |   skip_on_cran()
 43 |   skip_on_travis()
 44 |   skip_on_appveyor()
 45 |   
 46 |   x <- abs_datasets(include_notes=TRUE)
 47 |   expect_s3_class(x, "data.frame");
 48 |   expect_named(x, c("agencyID", "id", "name", "notes"), ignore.order=TRUE)
 49 | })
 50 | 
 51 | 
 52 | test_that("abs_metadata returns object of class list with specified names",
 53 | {
 54 |   skip_on_cran()
 55 |   skip_on_travis()
 56 |   skip_on_appveyor()
 57 |   
 58 |   x <- abs_metadata("CPI");
 59 |   expect_type(x, "list");
 60 |   expect_named(x, c("CL_CPI_MEASURE","CL_CPI_REGION","CL_CPI_INDEX","CL_CPI_TSEST",
 61 |                     "CL_CPI_FREQUENCY","CL_CPI_TIME","CL_CPI_OBS_STATUS","CL_CPI_TIME_FORMAT"),
 62 |                ignore.order=TRUE);
 63 | })
 64 | 
 65 | 
 66 | ## test_that("abs_cache returns object of class list with specified names",
 67 | ## {
 68 | ##   skip_on_cran()
 69 | ##   skip_on_travis()
 70 | ##   skip_on_appveyor()
 71 | 
 72 | ##   skip("abs_cache() test skipped -- takes long time to download all ABS series.")
 73 | ##   abs_cachelist <- abs_cache(progress=5)
 74 | ##   expect_type(abs_cachelist, "list");
 75 | ## })
 76 | 
 77 | ## test_that("abs_cachelist returns object of class table with specified names",
 78 | ## {
 79 | ##   skip_on_cran()
 80 | ##   skip_on_travis()
 81 | ##   skip_on_appveyor()
 82 | 
 83 | ##   abs_ct <- abs_cachelist2table(raustats::abs_cachelist)
 84 | ##   expect_s3_class(abs_ct, "data.frame");
 85 | ##   expect_named(abs_ct, c("dataset","dataset_description"), ignore.order=TRUE, ignore.case=TRUE);
 86 | ## })
 87 | 
 88 | test_that("abs_dimensions returns named data frame",
 89 | {
 90 |   skip_on_cran()
 91 |   skip_on_travis()
 92 |   skip_on_appveyor()
 93 | 
 94 |   abs_dim <- abs_dimensions("CPI")
 95 |   expect_s3_class(abs_dim, "data.frame");
 96 |   expect_named(abs_dim, c("name","type"), ignore.order=TRUE, ignore.case=TRUE);
 97 | })
 98 | 
 99 | test_that("abs_search returns a list with specified names",
100 | {
101 |   skip_on_cran()
102 |   skip_on_travis()
103 |   skip_on_appveyor()
104 | 
105 |   abs_dataset_search <- abs_search("consumer price index")
106 |   expect_s3_class(abs_dataset_search, "data.frame");
107 |   expect_named(abs_dataset_search, c("id", "agencyID", "name"),
108 |                ignore.order=TRUE, ignore.case=TRUE);
109 | 
110 |   abs_indicator_search <- abs_search("all groups", dataset="CPI")
111 |   expect_type(abs_indicator_search, "list");
112 |   expect_named(abs_indicator_search[[1]], c("code","description"),
113 |                ignore.order=TRUE, ignore.case=TRUE);
114 | })
115 | 
116 | test_that("abs_stats fails well",
117 | {
118 |   skip_on_cran()
119 |   skip_on_travis()
120 |   skip_on_appveyor()
121 | 
122 |   ## library(testthat);
123 |   expect_error(abs_stats());                                ## No dataset provided
124 |   expect_error(abs_stats("INVALID_ID"));                    ## Non-existent dataset
125 |   expect_error(abs_stats("CPI"));                           ## No filter supplied
126 |   expect_error(abs_stats("CPI", filter="invalid_filter"));  ## Invalid filter value
127 |   expect_error(abs_stats("CPI", filter=list(MEASURE=1, REGION=c(1:8,50),
128 |                                             INDEX=10001, TSEST=10, FREQUENCY="Q"),
129 |                          start_date=2008, end_date=2006));
130 | 
131 |   ## Test that calls returning no observations fail cleanly
132 |   expect_error(abs_stats("ABS_REGIONAL_ASGS2016",
133 |                          filter=list(MEASURE="CABEE_6",
134 |                                      REGIONTYPE="STE",
135 |                                      ASGS_2016=1:8),
136 |                          start_date=2008, end_date=2006));
137 | })
138 | 
139 | test_that("abs_stats returns valid URL",
140 | {
141 |   skip_on_cran()
142 |   skip_on_travis()
143 |   skip_on_appveyor()
144 | 
145 |   expect_match(abs_stats("CPI", filter="all", return_url=TRUE),
146 |                "^http:\\/\\/stat.data.abs.gov.au\\/SDMX-JSON\\/data\\/CPI");
147 | })
148 | 
149 | test_that("abs_stats returns raw JSON object",
150 | {
151 |   skip_on_cran()
152 |   skip_on_travis()
153 |   skip_on_appveyor()
154 | 
155 |   ## Test specific filter and start/end dates
156 |   expect_type(abs_stats("CPI", filter=list(MEASURE=1, REGION=c(1:8,50),
157 |                                            INDEX=10001, TSEST=10, FREQUENCY="Q"),
158 |                         start_date="2008-Q3", end_date="2018-Q2", return_json=TRUE),
159 |               "character");
160 | })
161 | 
162 | 
163 | test_that("abs_stats returns valid data frame",
164 | {
165 |   skip_on_cran()
166 |   skip_on_travis()
167 |   skip_on_appveyor()
168 | 
169 |   ## Test ERP Quarterly data extraction
170 |   expect_s3_class(abs_stats("ERP_QUARTERLY",
171 |                             filter = list(MEASURE = 1,  ## Estimated Resident Population
172 |                                           SEX_ABS = 3,  ## Persons
173 |                                           AGE = "TT")), ## All ages
174 |                   "data.frame");
175 |   
176 |   ## Test specific filter and start/end dates
177 |   expect_s3_class(abs_stats("CPI",
178 |                             filter=list(MEASURE=1, REGION=c(1:8,50),
179 |                                         INDEX=10001, TSEST=10, FREQUENCY="Q"),
180 |                             start_date="2008-Q3", end_date="2018-Q2"),
181 |                   "data.frame");
182 |   ## Test incomplete filter set
183 |   partial_flt <- list(REGION=c(1:8,50), INDEX=10001, TSEST=10, FREQUENCY="Q");
184 |   expect_message(abs_stats("CPI", filter=partial_flt,
185 |                            start_date="2008-Q3", end_date="2018-Q2"));
186 |   expect_s3_class(suppressWarnings(abs_stats("CPI", filter=partial_flt,
187 |                                              start_date="2008-Q3", end_date="2018-Q2")),
188 |                   "data.frame");
189 |   ## Test function returns character string
190 |   expect_message(abs_stats("CPI", filter=partial_flt,
191 |                            start_date="2008-Q3", end_date="2018-Q2", return_url=TRUE));
192 |   expect_type(suppressWarnings(abs_stats("CPI", filter=partial_flt,
193 |                                          start_date="2008-Q3", end_date="2018-Q2", return_url=TRUE)),
194 |               "character");
195 | })
196 | 


--------------------------------------------------------------------------------
/R/rba-functions.R:
--------------------------------------------------------------------------------
  1 | ### Function: rba_urls
  2 | #' @name rba_urls
  3 | #' @title RBA base URL and data paths
  4 | #' @description This function returns a list of URLs and data paths used to construct RBA data
  5 | #'   access calls. It is used in other functions in this package and need not be called directly.
  6 | #' @return list of RBA base URL and data paths
  7 | #' @author David Mitchell <david.pk.mitchell@@gmail.com>
  8 | #' @keywords internal
  9 | rba_urls <- function()
 10 |   list(base_url = "https://www.rba.gov.au",
 11 |        stats_path = "statistics",
 12 |        tables_path = "tables");
 13 | 
 14 | 
 15 | ### Function: rba_table_cache
 16 | #' @name rba_table_cache
 17 | #' @title Return list of RBA tables
 18 | #' @description Function to return an updated list of data tables available from the RBA website.
 19 | #' @importFrom rvest html_session jump_to html_attr html_text html_nodes
 20 | #' @return data frame in long format
 21 | #' @export
 22 | #' @author David Mitchell <david.pk.mitchell@@gmail.com>
 23 | #' @examples
 24 | #'   \donttest{
 25 | #'     rba_cachelist <- rba_table_cache();
 26 | #'   }
 27 | rba_table_cache <- function()
 28 | {
 29 |   ## Avoid 'No visible binding for global variables' note
 30 |   { table_name <- NULL }
 31 |   ## Create RBA URL and open session 
 32 |   url <- file.path(rba_urls()$base_url, rba_urls()$stats_path);
 33 |   ## Check url available
 34 |   raustats_check_url_available(url);
 35 |   s <- html_session(url);
 36 |   ## Get statistical data paths
 37 |   .paths <- html_nodes(s, "a");
 38 |   path_statistical_data <- unique(html_attr(.paths, "href")[grepl("^statistical tables$",
 39 |                                                                   html_text(.paths), ignore.case=TRUE)]);
 40 |   path_historical_data <- unique(html_attr(.paths, "href")[grepl("^historical data$",
 41 |                                                                  html_text(.paths), ignore.case=TRUE)]);
 42 |   path_discontinued_data <- unique(html_attr(.paths, "href")[grepl("^discontinued data$",
 43 |                                                                    html_text(.paths), ignore.case=TRUE)]);
 44 |   ##
 45 |   ## Get list of current data tables
 46 |   rs <- jump_to(s, path_statistical_data);
 47 |   .paths <- html_nodes(rs, "a");
 48 |   statistical_tables <- data.frame(table_type = "statistical tables",
 49 |                                    table = html_text(.paths[grepl("xls(x*)", .paths, ignore.case=TRUE)]),
 50 |                                    url = paste0(sub("/$", "", rba_urls()$base_url),
 51 |                                                  html_attr(.paths[grepl("xls(x*)", .paths, ignore.case=TRUE)],
 52 |                                                            "href")));
 53 |   ## Include only Excel spreadsheet tables
 54 |   statistical_tables <- statistical_tables[grepl("\\.xls(x*)$", statistical_tables$url, ignore.case=TRUE),];
 55 |   ##
 56 |   ## Get list of historical data tables
 57 |   rs <- jump_to(s, path_historical_data);
 58 |   .paths <- html_nodes(rs, "a");
 59 |   historical_tables <- data.frame(table_type = "historical data",
 60 |                                   table = html_text(.paths[grepl("xls(x*)", .paths, ignore.case=TRUE)]),
 61 |                                   url = paste0(sub("/$", "", rba_urls()$base_url),
 62 |                                                 html_attr(.paths[grepl("xls(x*)", .paths, ignore.case=TRUE)],
 63 |                                                           "href")));
 64 |   ## Exclude: i) Occasional Paper 10
 65 |   historical_tables <- historical_tables[!grepl("Occasional Paper.+10", historical_tables$table,
 66 |                                                 ignore.case=TRUE),];
 67 |   ##  and    ii) Survey of consumers use of payments
 68 |   historical_tables <- historical_tables[!grepl("survey.+of.+consumers.+use", historical_tables$url,
 69 |                                                 ignore.case=TRUE),];
 70 |   ##
 71 |   ## Get list of discontinued data tables
 72 |   rs <- jump_to(s, path_discontinued_data);
 73 |   .paths <- html_nodes(rs, "a");
 74 |   discontinued_tables <- data.frame(table_type = "discontinued data",
 75 |                                     table = html_text(.paths[grepl("xls(x*)", .paths, ignore.case=TRUE)]),
 76 |                                     url = paste0(sub("/$", "", rba_urls()$base_url),
 77 |                                                   html_attr(.paths[grepl("xls(x*)", .paths, ignore.case=TRUE)],
 78 |                                                             "href")));
 79 |   z <- rbind(statistical_tables,
 80 |              historical_tables,
 81 |              discontinued_tables);
 82 |   z <- transform(z,
 83 |                  table_name = sub("(.+)\\s(-|\u2013|\u2014)\\s(\\w\\d+(\\.\\d+)*)$", "\\1", table),
 84 |                  table_no = sub("(.+)\\s(-|\u2013|\u2014)\\s(\\w\\d+(\\.\\d+)*)$", "\\3", table));
 85 |   ## Replace en-dash/em-dash with hyphen (Regular expressions: en-dash - \u2013, and em-dash - \u2014
 86 |   z <- transform(z,
 87 |                  table_name = gsub("\\s+"," ", gsub("(\u2013|\u2014)","-", table_name)));
 88 |   ## Re-order columns and return
 89 |   z <- z[,c("table_no", "table_name", "table_type", "url")];
 90 |   return(z);
 91 | }
 92 | 
 93 | 
 94 | ### Function: rba_search
 95 | #' @name rba_search
 96 | #' @title Return list of data tables from RBA website
 97 | #' @description Function to return a list of all RBA data tables.
 98 | #' @param pattern Character string or regular expression to be matched
 99 | #' @param fields Character vector of column names through which to search. By default, the function
100 | #'   searches 'table_no' and 'table_name'.
101 | #' @param series_type Character vector specifying one or more one of 'statistical tables', 'historical data' or
102 | #'   'discontinued data'. By default, \code{series_type = 'statistical tables'}.
103 | #' @param ignore.case Case senstive pattern match or not.
104 | #' @param update_cache Logical expression, if FALSE (default), use the cached list of available
105 | #'   RBA tables (\code{rba_cachelist}), if TRUE, update the list of available datasets.
106 | #' @return data frame in long format
107 | #' @author David Mitchell <david.pk.mitchell@@gmail.com>
108 | #' @export
109 | #' @examples
110 | #'  rba_datasets <- rba_search(pattern = "Liabilities and Assets");
111 | rba_search <- function(pattern, fields = c("table_no", "table_name"),
112 |                        series_type = "statistical tables",
113 |                        ignore.case = TRUE, update_cache = FALSE)
114 | {
115 |   if (missing(pattern))
116 |     stop("No pattern supplied")
117 |   if (update_cache) {
118 |     rba_cache <- rba_table_cache();
119 |   } else {
120 |     rba_cache <- raustats::rba_cachelist;
121 |   }
122 |   if (any(!fields %in% names(rba_cache)))
123 |     stop(sprintf("Field names: %s not in cache", fields[!fields %in% names(rba_cache)]))
124 |   ## Return list of matching RBA datasets
125 |   match_index <- sapply(fields,
126 |                         function(field)
127 |                           grep(pattern, rba_cache[, field], ignore.case=ignore.case));
128 |   match_index <- sort(unique(unlist(match_index)));
129 |   z <- rba_cache[match_index,];
130 |   ## Filter RBA data sets to specified series type(s)
131 |   if (!any(series_type %in% c("statistical tables", "historical data", "discontinued data"))) 
132 |     stop(sprintf("Invalid series type(s): %s",
133 |                  paste(series_type[!series_type %in%
134 |                                    c("statistical tables", "historical data", "discontinued data")],
135 |                        collapse=", ")))
136 |   ## START - New code
137 |   ## -- UP TO HERE --
138 |   ## if (any(!tolower(series_type) %in% c("statistical tables",'historical data','discontinued data')))
139 |   ##   stop("series_type argument invalid!")
140 |   z <- z[grepl(paste(series_type, collapse="|"), z$table_type, ignore.case=ignore.case),]
141 |   ## series_index <- grep(paste(series_type, collapse="|"),
142 |   ##                      z$table_type, ignore.case=ignore.case);
143 |   ## ## series_index <- sort(unique(unlist(series_index)));
144 |   ## z <- z[series_index,];
145 |   ## END - New code
146 |   return(z);
147 | }
148 | 
149 | 
150 | #' @name rba_stats
151 | #' @title Return data for a specified RBA time series
152 | #' @description Function to download and return specified RBA time series data.
153 | #' @param table_no Character vector specifying one or more RBA table numbers to download.
154 | #' @param pattern Character string or regular expression to be matched.
155 | #' @param url Valid URL for RBA dataset (Excel format only).
156 | #' @param update_cache Logical expression, if FALSE (default), use the cached list of available
157 | #'   RBA datasets, if TRUE, update the list of available datasets.
158 | #' @param ... Other arguments to \code{\link{rba_search}}, e.g. \code{series_type = "statistical_tables"}.
159 | #' @return data frame in long format
160 | #' @export
161 | #' @author David Mitchell <david.pk.mitchell@@gmail.com>
162 | #' @examples
163 | #'   \donttest{
164 | #'     ## Example - Selecting by table_no
165 | #'     x <- rba_stats("A1");
166 | #'
167 | #'     ## Example - Selecting by pattern
168 | #'     x <- rba_stats(pattern="Liabilities and Assets");
169 | #'   }
170 | rba_stats <- function(table_no, pattern, url, update_cache=FALSE, ...)
171 | {
172 |   if (FALSE) {
173 |     table_no = "D2"
174 |     rba_g1 <- rba_stats(table_no = "G1")
175 |     rba_d2 <- rba_stats(table_no = "D2")
176 |   }
177 |   ## Deprecate: series_type
178 |   if (missing(table_no) & missing(pattern) & missing(url))
179 |     stop("One of either table_no, pattern or url must be specified.")
180 |   if (!missing(table_no) & !missing(pattern))
181 |     warning("Both table_no and pattern supplied, using table_no.")
182 |   if (!missing(table_no) & !missing(url))
183 |     warning("Both table_no and url supplied, using table_no.")
184 |   if (!missing(pattern) & !missing(url))
185 |     warning("Both pattern and url supplied, using pattern.")
186 |   ## Update RBA table list
187 |   if (update_cache) {
188 |     rba_cache <- rba_table_cache();
189 |   } else {
190 |     rba_cache <- raustats::rba_cachelist;
191 |   }
192 | 
193 |   ## TO DO: Add table_type attribute to vector 'urls'
194 |   if (!missing(table_no)) {
195 |     if (!table_no %in% rba_cache$table_no)
196 |       stop("table_no not valid RBA table code")
197 |     ## Changed here
198 |     urls <- rba_search(pattern=table_no, update_cache=update_cache, ...)$url
199 |     ## urls <- as.character(table$url[which(table_no == rba_cache$table_no)]);
200 |   }
201 | 
202 |   if (!missing(pattern))
203 |     urls <- as.character(rba_search(pattern, update_cache=update_cache, ...)$url)
204 |   
205 |   if (!missing(url)) {
206 |     if (!any(url %in% rba_cache$url))
207 |       stop(sprintf("Following urls invalid: %s",
208 |                    paste(rba_cache$url[!url %in% rba_cache$url], collapse=", ")));
209 |     urls <- as.character(url)
210 |   }
211 |   ## Download RBA statistical data
212 |   ## Internet resource checking undertaken in 'rba_file_download' function.
213 |   z <- lapply(urls, rba_file_download); 
214 |   ## Read data
215 |   data <- lapply(z, rba_read_tss);
216 |   data <- do.call(rbind, data);
217 |   rownames(data) <- seq_len(nrow(data));
218 |   return(data);
219 | }
220 | 
221 | 
222 | #' @name rba_file_download
223 | #' @title Function to download statistics files from the RBA website and store locally
224 | #' @description This function downloads one or more RBA data files at the specified by URLs and
225 | #'   saves a local copy.
226 | #' @importFrom httr GET http_type http_error progress status_code write_disk 
227 | #' @param data_url Character vector specifying an RBA data set URL.
228 | #' @param exdir Target directory for downloaded files (defaults to \code{tempdir()}). Directory is
229 | #'   created if it doesn't exist.
230 | #' @param update_cache Logical expression, if FALSE (default), use the cached list of available
231 | #'   RBA datasets, if TRUE, update the list of available datasets.
232 | #' @return Downloads data from the ABS website and returns a character vector listing the location
233 | #'   where files are saved.
234 | #' @author David Mitchell <david.pk.mitchell@@gmail.com>
235 | #' @export
236 | rba_file_download <- function(data_url, exdir=tempdir(), update_cache=TRUE)
237 | {
238 |   ## DEBUGGING CODE
239 |   ## if (FALSE) {
240 |   ##   exdir <- tempdir()
241 |   ##   data_url <- head(rba_table_cache()$url, 1);
242 |   ##   xx <- rba_file_download(rba_url);
243 |   ## }
244 |   if (!dir.exists(exdir))  dir.create(exdir)
245 |   data_url <- as.character(data_url)
246 |   local_filename <- basename(data_url);
247 | 
248 |   ## Update RBA table list
249 |   if (update_cache) {
250 |     rba_cache <- rba_table_cache();
251 |   } else {
252 |     rba_cache <- raustats::rba_cachelist;
253 |   }
254 | 
255 |   ## Check if url is not valid RBA data URL
256 |   if (!data_url %in% rba_cache$url)
257 |     stop(sprintf("Invalid RBA url: %s", data_url));
258 |   ## -- Download files --
259 |   cat(sprintf("Downloading: %s", local_filename));
260 |   ## Error check URL call
261 |   raustats_check_url_available(data_url);
262 |   resp <- GET(data_url, write_disk(file.path(exdir, local_filename), overwrite=TRUE),
263 |               raustats_ua(), progress());
264 |   ##  http_type(resp)
265 |   ## File download validation code based on:
266 |   ##  https://cran.r-project.org/web/packages/httr/vignettes/api-packages.html
267 |   ## if (http_error(resp)) {
268 |   ##   stop(
269 |   ##     sprintf(
270 |   ##       "RBA data file request failed (Error code: %s)\nInvalid URL: %s", 
271 |   ##       status_code(resp),
272 |   ##       data_url
273 |   ##     ),
274 |   ##     call. = FALSE
275 |   ##   )
276 |   ## }
277 | 
278 |   ##  RBA website returns: content-type: application/octet-stream
279 |   ## if (!http_type(resp) %in% c("text/csv", "application/vnd.ms-excel")) {
280 |   ##   stop("RBA file request did not return an Excel or CSV file", call. = FALSE)
281 |   ## }
282 | 
283 |   ## Return results
284 |   return(file.path(exdir, local_filename));
285 | }
286 | 
287 | 
288 | ### Function: rba_read_tss
289 | #' @name rba_read_tss
290 | #' @title Read RBA statistical time series spreadsheet
291 | #' @description Functions to extract data from a specified RBA time series spreadsheet.
292 | #' @importFrom readxl read_excel excel_sheets
293 | #' @importFrom dplyr left_join
294 | #' @importFrom tidyr gather
295 | #' @importFrom stats complete.cases
296 | #' @param files Names of one or more ABS data file
297 | #' @return data frame in long format
298 | #' @author David Mitchell <david.pk.mitchell@@gmail.com>
299 | #' @export
300 | #' @examples
301 | #'  \donttest{
302 | #'    rba_urls <- rba_search(pattern = "Liabilities and Assets")$url
303 | #'    rba_files <- sapply(rba_urls, rba_file_download)
304 | #'    data <- rba_read_tss(rba_files);
305 | #'  }
306 | rba_read_tss <- function(files)
307 | {
308 |   x <- lapply(files,
309 |               function(file)
310 |                 rba_read_tss_(file)
311 |               )
312 |   z <- do.call(rbind, x);
313 |   return(z)
314 | }
315 | 
316 | 
317 | rba_read_tss_ <- function(file)
318 | {
319 |   ## Debugging text
320 |   if (FALSE) {
321 |     rba_cache <- rba_table_cache();
322 |     rba_cache %>% write.csv(file.path(tempdir(), "rba_cache.csv"))
323 |     table_no = "G3"
324 |     urls <- as.character(rba_cache$url[which(table_no == rba_cache$table_no)]);
325 |     file <- lapply(urls, rba_file_download)[[1]]; 
326 |       sheet_names <- excel_sheets(file)[grepl("data|series breaks", excel_sheets(file), ignore.case=TRUE)];
327 |   }
328 |   
329 |   ## Avoid 'No visible binding for global variables' note
330 |   { series_id <- value <- NULL }
331 |   sheet_names <- excel_sheets(file);
332 |   ## CONSIDER: writing message for data sets containing series breaks, e.g.
333 |   ## if (any(grepl("series.+breaks", sheet_names, ignore.case=TRUE)))
334 |   ##   cat(sprintf("Note RBA data file %s contains Series Breaks. (See: %s for details).\n",
335 |   ##               basename(file), file));
336 |   ## CONSIDER: option for incorporating series breaks.
337 |   
338 |   ## TO DO
339 |   ## 1. Require method to import historical and supplementary RBA data tables
340 |   ## Check validity
341 |   ## if (!all(c("notes", "data") %in% tolower(sheet_names)))
342 |   ##   stop(sprintf("File: %s is not a valid RBA time series file.", basename(file)));
343 |   data <- lapply(
344 |     ## Only process sheets named: 'Data'
345 |     sheet_names[grepl("data", excel_sheets(file), ignore.case=TRUE)],
346 |     function(sheet_name) {
347 |       ## Read metadata
348 |       .data <- read_excel(file, sheet=sheet_name, col_names=FALSE, col_types="text",
349 |                           na=c("","--"), .name_repair="minimal");
350 |       ## Return pre-header information from RBA files 
351 |       header_row <- which(sapply(1:nrow(.data),
352 |                                  function(i)
353 |                                    grepl("series\\s*id", paste(.data[i,], collapse=" "),
354 |                                          ignore.case=TRUE)));
355 |       ## -- Extract table name & number --
356 |       ## Note use of 'word' character    /here                /here for 13a, 6b, etc.
357 |       regex_table_name <- "^(\\w+\\d+(\\.\\d+)*)(.+)$";
358 |       table_no <- trimws(sub(regex_table_name, "\\1",
359 |                              paste(replace(.data[1,], is.na(.data[1,]), ""), collapse="")));
360 |       ## Return table name/number details
361 |       table_name <- trimws(sub(regex_table_name, "\\3",
362 |                                paste(replace(.data[1,], is.na(.data[1,]), ""), collapse="")));
363 |       ## Extract metadata
364 |       metadata <- .data[1:header_row,];
365 |       metadata <- metadata[complete.cases(metadata),];            ## Drop NA rows
366 |       metadata <- as.data.frame(t(metadata), stringsAsFactors=FALSE);
367 |       rownames(metadata) <- seq_len(nrow(metadata));
368 |       names(metadata) <- tolower(gsub("\\s","_",
369 |                                       gsub("\\.", "",
370 |                                            metadata[1,])));       ## Rename variables
371 |       metadata <- metadata[-1,];
372 |       metadata$publication_date <- excel2Date(as.integer(metadata$publication_date));
373 |       ## Append to metadata table
374 |       metadata <- transform(metadata,
375 |                             table_no = table_no,
376 |                             table_name = table_name);
377 |       
378 |       z <- .data[-(1:header_row),];
379 |       ## Rename variables, including renaming `Series ID`
380 |       names(z) <- sub("series.*id", "date", .data[header_row,], ignore.case=TRUE);
381 |       z <- gather(z, series_id, value, -date, convert=TRUE); ## Transform to key:value pairs
382 |       z <- transform(z,
383 |                      date = excel2Date(as.integer(date)),
384 |                      value = as.numeric(value));
385 |       
386 |       data <- left_join(z, metadata, by="series_id");
387 |       data <- data[complete.cases(data),];
388 |       names(data) <- tolower(names(data));
389 |       return(data)
390 |     });
391 |   data <- do.call(rbind, data);
392 |   return(data);
393 | }
394 | 


--------------------------------------------------------------------------------
/R/abs-api-functions.R:
--------------------------------------------------------------------------------
  1 | ### ABS API functions
  2 | 
  3 | #' @name abs_api_urls
  4 | #' @title ABS URL addresses and paths used in ABS.Stat API calls
  5 | #' @description This function returns a list of URLs and data paths used to construction ABS.Stat
  6 | #'   API call. It is used in other functions in this package and need not be called directly.
  7 | #' @return a list with a base url and a url section for formatting the JSON API calls
  8 | #' @author David Mitchell <david.pk.mitchell@@gmail.com>
  9 | #' @keywords internal
 10 | abs_api_urls <- function()
 11 |   list(base_url = "http://stat.data.abs.gov.au",
 12 |        datastr_path = "restsdmx/sdmx.ashx/GetDataStructure",
 13 |        sdmx_json_path = "SDMX-JSON/data")
 14 | 
 15 | 
 16 | #' @name abs_api_call
 17 | #' @title Create ABS.Stat API URL call
 18 | #' @description The function created the ABS.Stat API call URL
 19 | #' @param path Character vector specifying one or more ABS collections or catalogue numbers to
 20 | #'   download.
 21 | #' @param args Named list of arguments to supply to call.
 22 | #' @return data frame in long format
 23 | #' @author David Mitchell <david.pk.mitchell@@gmail.com>
 24 | #' @keywords internal
 25 | abs_api_call <- function(path, args)
 26 | {
 27 |     if (missing(path))
 28 |       stop("path missing.")
 29 | 
 30 |     if (missing(args))
 31 |       stop("Argument path missing.")
 32 | 
 33 |     url <- file.path(abs_api_urls()$base_url, path, args)
 34 | 
 35 |     return(url);
 36 | }
 37 | 
 38 | 
 39 | #' @name abs_call_api
 40 | #' @title Submit API call to ABS.Stat
 41 | #' @description This function submits the specified API call to ABS.Stat
 42 | #' @importFrom xml2 read_xml
 43 | #' @importFrom httr http_error
 44 | #' @param url Character vector specifying one or more ABS collections or catalogue numbers to
 45 | #'   download.
 46 | #' @return data frame in long format
 47 | #' @author David Mitchell <david.pk.mitchell@@gmail.com>
 48 | #' @keywords internal
 49 | abs_call_api <- function(url)
 50 | {
 51 |   if (http_error(url))
 52 |     stop(sprintf("HTTP error returned by url: %s", url))
 53 |   
 54 |   x <- read_xml(url)
 55 |   return(x);
 56 | }
 57 | 
 58 | 
 59 | #' @name abs_datasets
 60 | #' @title Download ABS.Stat datasets
 61 | #' @description This function returns a list of all datasets available from ABS.Stat.
 62 | #' @importFrom xml2 as_list read_xml read_html xml_name xml_find_all
 63 | #' @param lang Preferred language (default 'en' - English).
 64 | #' @param include_notes Include ABS annotation information for each series.
 65 | #' @return data frame in long format
 66 | #' @export
 67 | #' @author David Mitchell <david.pk.mitchell@@gmail.com>
 68 | #' @examples
 69 | #'   \donttest{
 70 | #'     datasets <- abs_datasets()
 71 | #'     datasets <- abs_datasets(include_notes=TRUE)
 72 | #'   }
 73 | abs_datasets <- function(lang="en", include_notes=FALSE)
 74 | {
 75 |   ## Return xml document of ABS indicators
 76 |   url <- abs_api_call(path=abs_api_urls()$datastr_path, args="all");
 77 |   x <- abs_call_api(url);
 78 |   ## Select node name for 
 79 |   no_ids <- table(xml_name(xml_find_all(x, "//*[@id]")));
 80 |   series_node_name <- names(no_ids[no_ids == max(no_ids)])
 81 |   ## Extract Series ID information
 82 |   xpath_str <- sprintf("//*[name() = '%s']", series_node_name);
 83 |   name_fld <- "Name"
 84 |   ## The following code extracts the relevant ABS series information from the returned
 85 |   ## XML document by first saving the relevant part of the XML document to an R list and
 86 |   ## then explicitly extracting the relevant information from specific nodes by name.
 87 |   ## A more general recursive process, impervious to name changes would be preferred,
 88 |   ## however, it is more complex than simply revising the following code in response to
 89 |   ## potential future server-side changes.
 90 |   y <- as_list(xml_find_all(x, xpath_str));
 91 |   z <- lapply(y,
 92 |               function(m)
 93 |                 list(agencyID = attr(m, "agencyID"),
 94 |                      id = attr(m, "id"),
 95 |                      name = unlist(
 96 |                          if (length(m[names(m) == name_fld]) == 1) {
 97 |                            m[[name_fld]]
 98 |                          } else {
 99 |                            m[names(m) == name_fld][sapply(m[names(m) == name_fld],
100 |                                                         function(p) attributes(p)) == lang]
101 |                          }),
102 |                      notes = paste(unlist(m$Annotations), collapse=": "))
103 |                   );
104 |   z <- as.data.frame(do.call(rbind, z));
105 |   z <- z[, c("id","agencyID","name","notes")];
106 |   if (!include_notes)
107 |     z <- z[, c("id","agencyID","name")];
108 |   return(z)
109 | }
110 | 
111 | 
112 | #' @name abs_metadata
113 | #' @title Download dataset metadata from the ABS API
114 | #' @description This function queries and returns all metadata associated with a specified dataset
115 | #'   from ABS.Stat.
116 | #' @importFrom xml2 xml_name xml_children xml_child xml_length xml_attrs xml_attr xml_ns_strip
117 | #'   xml_text xml_find_all xml_parent
118 | #' @param id ABS dataset ID.
119 | #' @param lang Preferred language (default 'en' - English).
120 | #' @return data frame in long format
121 | #' @export
122 | #' @author David Mitchell <david.pk.mitchell@@gmail.com>
123 | #' @examples
124 | #'   \donttest{
125 | #'     datasets <- abs_datasets();
126 | #'     x <- abs_metadata("CPI");
127 | #'     x <- abs_metadata(grep("cpi", datasets$id, ignore.case=TRUE, value=TRUE));
128 | #'     names(x)
129 | #'     y <- abs_metadata(datasets$id[1]);
130 | #'     names(y)
131 | #'   }
132 | abs_metadata <- function(id, lang="en")
133 | {
134 |   ## Return xml document of ABS indicators
135 |   url <- abs_api_call(path=abs_api_urls()$datastr_path, args=id);
136 |   x <- abs_call_api(url);
137 | 
138 |   ## Return all codelists
139 |   i_codelist <- grep("codelist", xml_name(xml_children(x)), ignore.case=TRUE);
140 |   n_codelists <- xml_length(xml_child(x, i_codelist));
141 |   ## Dataset dimensions and codes
142 |   codelists_attrs <- as.data.frame(
143 |     do.call(rbind,
144 |             lapply(seq_len(n_codelists),
145 |                    function(i)
146 |                      xml_attrs(xml_child(xml_child(x, 2),i))
147 |                    )),
148 |     stringsAsFactors = FALSE);
149 |   ## Codelist content
150 |   codelists <- lapply(seq_len(n_codelists),
151 |                       function(i) {
152 |                         ## Note 'xml_ns_strip' essential to extracting Description
153 |                         y <- xml_ns_strip(xml_child(xml_child(x, i_codelist), i));
154 |                   
155 |                         codelist <- data.frame(
156 |                           Code = xml_text(xml_find_all(xml_children(y), "@value")),
157 |                           Description = xml_text(xml_find_all(y,
158 |                                                               sprintf(".//Code//Description[@xml:lang='%s']",
159 |                                                                       lang))),
160 |                           stringsAsFactors=FALSE);
161 |                       });
162 |   ## Return components
163 |   i_keyfamilies <- grep("keyfamilies", xml_name(xml_children(x)), ignore.case=TRUE);
164 |   z <- xml_parent(xml_find_all(xml_children(xml_child(x, i_keyfamilies)),
165 |                                ".//@codelist"));
166 |   components <- data.frame(codes = xml_text(xml_find_all(z, ".//@codelist")),
167 |                            conceptRef = xml_text(xml_find_all(z, ".//@conceptRef")),
168 |                            type = xml_name(z),
169 |                            stringsAsFactors=FALSE);
170 |   ## Return concepts
171 |   i_concepts <- grep("concepts", xml_name(xml_children(x)), ignore.case=TRUE);
172 |   w <- xml_children(xml_child(x, i_concepts));
173 |   concepts <- data.frame(concept = xml_attr(xml_find_all(w, "."), "id"),
174 |                          agencyID = xml_attr(xml_find_all(w, "."), "agencyID"),
175 |                          conceptRef=xml_text(xml_find_all(w, sprintf(".//Name[@xml:lang='%s']", lang))),
176 |                          stringsAsFactors=FALSE);
177 |   ## Set names/attributes
178 |   names(codelists) <- components$codes;
179 |   ## Add dataset and dataset_desc attributes
180 |   attr(codelists, "concept") <- components$conceptRef;
181 |   attr(codelists, "description") <- concepts$conceptRef[match(components$conceptRef, concepts$concept)];
182 |   attr(codelists, "type") <- components$type;
183 |   return(codelists);
184 | }
185 | 
186 | 
187 | #' @name abs_dimensions
188 | #' @title Return available dimensions of ABS series
189 | #' @description This function returns the available dimeninsions for a specified ABS API dataset.
190 | #' @param dataset Character vector of dataset codes. These codes correspond to the
191 | #'   \code{indicatorID} column from the indicator data frame of \code{abs_cache} or
192 | #'   \code{abs_cachelist}, or the result of \code{abs_indicators}.
193 | #' @param update_cache Logical expression, if FALSE (default), use the cached list of available
194 | #'   ABS.Stat datasets, if TRUE, update the list of available datasets.
195 | #' @return a data frame with available dataset dimensions.
196 | #' @export
197 | #' @author David Mitchell <david.pk.mitchell@@gmail.com>
198 | #' @examples
199 | #'   \donttest{
200 | #'     ## CPI - Consumer Price Index
201 | #'     x <- abs_dimensions("CPI");
202 | #'     str(x)
203 | #'     ## LF - Labour Force
204 | #'     x <- abs_dimensions("LF");
205 | #'     str(x)
206 | #'   }
207 | abs_dimensions <- function(dataset, update_cache=FALSE)
208 | {
209 |   ## Check dataset present and valid 
210 |   if (missing(dataset))
211 |     stop("No dataset name supplied.");
212 |   if (update_cache) {
213 |     cache <- abs_datasets();
214 |   } else {
215 |     cache <- raustats::abs_cachelist;
216 |   }
217 |   if (!dataset %in% cache$id)
218 |     stop(sprintf("%s not valid dataset name.", dataset));
219 |   metadata <- abs_metadata(dataset)
220 |   ## Return data frame of dataset dimensions:
221 |   z <- data.frame(name = attr(metadata, "concept"),
222 |                   type = attr(metadata, "type"));
223 |   return(z)
224 | }
225 | 
226 | 
227 | #' @name abs_search
228 | #' @title Search dataset information from the ABS.Stat API
229 | #' @description This function finds datasets or dimensions within a specific that match a specified
230 | #'   regular expresion and returns matching results.
231 | #' @param pattern Character string or regular expression to be matched.
232 | #' @param dataset Character vector of ABS.Stat dataset codes. These codes correspond to the
233 | #'   \code{indicatorID} column from the indicator data frame of \code{abs_cache} or
234 | #'   \code{abs_cachelist}, or the result of \code{abs_indicators}. If NULL (default), then function
235 | #'   undertakes a dataset mode search. If not NULL, function searches all dimensions of specified
236 | #'   dataset.
237 | #' @param ignore.case Case senstive pattern match or not.
238 | #' @param code_only If FALSE (default), all column/fields are returned. If TRUE, only the dataset
239 | #'   identifier or indicator code are returned.
240 | #' @param update_cache Logical expression, if FALSE (default), use the cached list of available
241 | #'   ABS.Stat datasets, if TRUE, update the list of available datasets.
242 | #' @return A data frame with datasets and data items that match the search pattern.
243 | #' @export
244 | #' @note With acknowledgements to \code{wb_search} function.
245 | #' @author David Mitchell <david.pk.mitchell@@gmail.com>
246 | #' @examples
247 | #'  ## ABS dataset search
248 | #'  x <- abs_search(pattern = "consumer price index")
249 | #'  x <- abs_search(pattern = "census")
250 | #'  x <- abs_search(pattern = "labour force")
251 | #'
252 | #'  ## ABS indicator search
253 | #'  x <- abs_search(pattern = "all groups", dataset="CPI")
254 | #'  x <- abs_search(pattern = c("all groups", "capital cities"), dataset="CPI")
255 | #' 
256 | abs_search <- function(pattern, dataset=NULL, ignore.case=TRUE, code_only=FALSE, update_cache=FALSE)
257 | {
258 |   if (missing(pattern))
259 |     stop("No regular expression provided.")
260 |   if (update_cache) {
261 |     cache <- abs_datasets();
262 |   } else {
263 |     cache <- raustats::abs_cachelist;
264 |   }
265 |   ## 
266 |   if (is.null(dataset)) {
267 |     ## 1. If dataset not specified, search through list of datasets
268 |     ## Return list of matching ABS.Stat datasets
269 |     match_index <- sapply(names(cache), ## cache_table
270 |                           function(i) grep(pattern, cache[, i], ignore.case=ignore.case), ## cache_table[, i]
271 |                           USE.NAMES = FALSE);
272 |     match_index <- sort(unique(unlist(match_index)));
273 |     if (length(match_index) == 0)
274 |       warning(sprintf("No matches were found for the search term %s. Returning an empty data frame.", 
275 |                       pattern));
276 |     match_df <- unique(cache[match_index, ])  ## unique(cache_table[match_index, ])
277 |     rownames(match_df) <- seq_len(nrow(match_df));
278 |     if (code_only)
279 |       match_df <- as.character(match_df[,"id"]);
280 |     return(match_df);
281 |   } else {
282 |     ## 2. If dataset specified, search through list of datasets
283 |     if (!dataset %in% cache$id)
284 |       stop(sprintf("Dataset: %s not available on ABS.Stat", dataset))
285 |     .cachelist <- abs_metadata(dataset);
286 |     names(.cachelist) <- attr(.cachelist, "concept");
287 |     ## Return list of all dataset dimensions with matching elements
288 |     filter_index <- lapply(.cachelist,
289 |                            function(x) {
290 |                              i <- grep(sprintf("(%s)", paste(pattern, collapse="|")),
291 |                                        x$Description, ignore.case=ignore.case);
292 |                              z <- x[i,];
293 |                              return(z);
294 |                            });
295 |     filter <- filter_index[sapply(filter_index, nrow) > 0]
296 |     if (code_only)
297 |       filter <- lapply(filter, function(x) as.character(x$Code));
298 |     return(filter)
299 |   }
300 | }
301 | 
302 | 
303 | #' @name abs_stats
304 | #' @title Download data from the ABS API
305 | #' @description This function queries and returns data for a specified ABS dataset from the ABS API.
306 | #' @importFrom xml2 read_xml read_html
307 | #' @importFrom httr content GET http_error http_status http_type progress status_code
308 | #' @importFrom jsonlite fromJSON
309 | #' @importFrom stats setNames
310 | #' @param dataset Character vector of ABS.Stat dataset codes. These codes correspond to the
311 | #'   \code{indicatorID} column from the indicator data frame of \code{abs_cache} or
312 | #'   \code{abs_cachelist}, or the result of \code{abs_indicators}.
313 | #' @param filter A list that contains filter of dimensions available in the specified \code{series}
314 | #'   to use in the API call. If NULL, no filter is set and the query tries to return all dimensions
315 | #'   of the dataset. Valid dimensions to include in the list supplied to filter include: MEASURE,
316 | #'   REGION, INDEX, TSEST and FREQUENCY.
317 | #' @param start_date Numeric or character. If numeric it must be in %Y form (i.e. four digit
318 | #'   year). For data at the sub-annual granularity the API supports a format as follows: Monthly
319 | #'   data -- '2016-M01', Quarterly data -- '2016-Q1', Semi-annual data -- '2016-B2', Financial year
320 | #'   data -- '2016-17'.
321 | #' @param end_date Numeric or character (refer to \code{startdate}).
322 | #' @param lang Language in which to return the results. If \code{lang} is unspecified, english is
323 | #'   the default.  ## @param remove_na If \code{TRUE}, remove blank or NA observations. If
324 | #'   \code{FALSE}, no blank or NA ## values are removed from the return.  ## @param include_unit If
325 | #'   \code{TRUE}, the column unit is not removed from the return. If ## \code{FALSE}, this column is
326 | #'   removed.  ## @param include_obsStatus If \code{TRUE}, the column obsStatus is not removed from
327 | #'   the return. If ## \code{FALSE}, this column is removed.
328 | #' @param dimensionAtObservation The identifier of the dimension to be attached at the observation
329 | #'   level. The default order is: 'AllDimensions', 'TimeDimension' and 'MeasureDimension'.
330 | #'   AllDimensions results in a flat list of observations without any grouping.
331 | #' @param detail This argument specifies the desired amount of information to be returned. Possible
332 | #'   values are:
333 | #' 
334 | #'   \itemize{
335 | #'     \item Full: all data and documentation, including annotations (default)
336 | #'     \item DataOnly: attributes – and therefore groups – will be excluded
337 | #'     \item SeriesKeysOnly: only the series elements and the dimensions that make up the series keys
338 | #'     \item NoData: returns the groups and series, including attributes and annotations, without observations (all values = NA)
339 | #'   }
340 | #' 
341 | #' @param return_json Logical. Default is \code{FALSE}. If \code{TRUE}, the function returns the
342 | #'   result in raw sdmx-json.
343 | #' @param return_url Default is \code{FALSE}. If \code{TRUE}, the function returns the generated
344 | #'   request URL and does not submit the request.
345 | #' @param enforce_api_limits If \code{TRUE} (the default), the function enforces the ABS.Stat
346 | #'   RESTful API limits and will not submit the query if the URL string length exceeds 1000
347 | #'   characters or the query would return more than 1 million records. If \code{FALSE}, the function
348 | #'   submits the API call regardless and attempts to return the results.
349 | #' @param update_cache Logical expression, if FALSE (default), use the cached list of available
350 | #'   ABS.Stat datasets, if TRUE, update the list of available datasets.
351 | #' @return Returns a data frame of the selected series from the specified ABS dataset.
352 | #' @note The data query submitted by this function uses the ABS RESTful API based on the SDMX-JSON
353 | #'   standard. It has a maximum allowable character limit of 1000 characters allowed in the data
354 | #'   URL.
355 | #'
356 | #'   Further limitations known at this time include:
357 | #'   \itemize{
358 | #'     \item Only anonymous queries are supported, there is no authentication
359 | #'     \item Each response is limited to no more than 1 million observations
360 | #'     \item Errors are not returned in the JSON format but HTTP status codes and messages are
361 | #'       set according to the Web Services Guidelines
362 | #'     \item The lastNObservations parameter is not supported
363 | #'     \item Observations follow the time series (or import-specific) order even if
364 | #'       \code{dimensionAtObservation=AllDimensions} is used.
365 | #'   }
366 | #' 
367 | #' @export
368 | #' @author David Mitchell <david.pk.mitchell@@gmail.com>
369 | #' @examples
370 | #'   \donttest{
371 | #'     x <- abs_stats(dataset="CPI", filter="all", return_url=TRUE);
372 | #'     x <- abs_stats(dataset="CPI", filter=list(MEASURE=1, REGION=c(1:8,50),
373 | #'                                               INDEX=10001, TSEST=10, FREQUENCY="Q"));
374 | #'     x <- abs_stats(dataset="CPI", filter=list(MEASURE="all", REGION=50,
375 | #'                                               INDEX=10001, TSEST=10, FREQUENCY="Q"));
376 | #'     x <- abs_stats(dataset="CPI", filter=list(MEASURE="all", REGION=50, INDEX=10001,
377 | #'                                               TSEST=10, FREQUENCY="Q"), return_url=TRUE);
378 | #'  }
379 | abs_stats <- function(dataset, filter, start_date, end_date, lang=c("en","fr"),
380 |                       dimensionAtObservation=c("AllDimensions","TimeDimension","MeasureDimension"),
381 |                       detail=c("Full","DataOnly","SeriesKeysOnly","NoData"),
382 |                       return_json=FALSE, return_url=FALSE,
383 |                       enforce_api_limits=TRUE, update_cache=FALSE)
384 | {
385 |   ## Check dataset present and valid 
386 |   if (missing(dataset))
387 |     stop("No dataset supplied.");
388 |   if (!dataset %in% abs_datasets()$id)
389 |     stop(sprintf("%s not a valid ABS dataset.", dataset));
390 |   ## Check if filter provided
391 |   if (missing(filter)) {
392 |     dataset_dim <- abs_dimensions(dataset)
393 |     stop(sprintf("No filter argument. Should be either 'all' or valid list with dataset dimensions: %s",
394 |                  paste(dataset_dim[grepl("^dimension$", dataset_dim$type,
395 |                                          ignore.case=TRUE), "name"], collapse=", ")));
396 |   }
397 |   ## Check if start_date > end_date
398 |   if (!missing(start_date) && !missing(end_date) && start_date > end_date)
399 |     stop("start_date later than end_date, request not submitted.")
400 |   ## Return metadata
401 |   if (update_cache) {
402 |     cache <- abs_datasets();
403 |   } else {
404 |     cache <- raustats::abs_cachelist;
405 |   }
406 |   ## Get list of Dimension name:
407 |   metadata <- abs_metadata(dataset);
408 |   metadata_names <- abs_dimensions(dataset, );
409 |   metadata_dims <- as.character(metadata_names[grepl("^dimension$", metadata_names$type, ignore.case=TRUE),
410 |                                                "name"]);
411 |   names(metadata) <- metadata_names$name;
412 |   ## Return agency name
413 |   ## agency_name <- unlist(attr(cache[[dataset]], "agency"));
414 |   ## -- Check the set of dimensions supplied in 'filter' --
415 |   if (length(filter) == 1 && filter == "all") {
416 |     ##  If filter='all', replace with detailed filter list including all dimensions
417 |     .filter <- metadata;
418 |     filter <- lapply(.filter, function(x) x$Code);
419 |     filter <- filter[names(filter) %in% metadata_dims];
420 |   } else if (class(filter) == "list") {
421 |     ## If filter is a list:
422 |     if (any(!metadata_dims %in% names(filter))) {
423 |       ## Check if any filter dimensions missing, and append missing elements (set to 'all')
424 |       message(sprintf("Filter dimension(s): %s not in filter, dimensions added and set to 'all'.",
425 |                       paste(metadata_dims[!metadata_dims %in% names(filter)], collapse=", ")));
426 |       for (name in metadata_dims[!metadata_dims %in% names(filter)])
427 |         filter[[name]] <- "all"
428 |     }
429 |     filter <- filter[metadata_dims];
430 |     for (name in names(filter))
431 |       if( length(filter[[name]]) == 1 && grepl("all", filter[[name]], ignore.case=TRUE) )
432 |         filter[[name]] <- metadata[[name]]$Code;
433 |   } else {
434 |     stop("Argument filter must be either the single character string: 'all' or a valid filter list.");
435 |   }
436 |   n_filter <- prod(lengths(filter));
437 |   ## Create ABS URL and open session 
438 |   url <- file.path(abs_api_urls()$base_url, abs_api_urls()$sdmx_json_path,
439 |                    dataset,
440 |                    paste(lapply(filter,
441 |                                 function(x) paste(x, collapse="+")),
442 |                          collapse="."),
443 |                    "all");
444 |   ## dimensionAtObservation
445 |   dimensionAtObservation <- match.arg(dimensionAtObservation);
446 |   if (!dimensionAtObservation %in% c("AllDimensions","TimeDimension","MeasureDimension"))
447 |     stop("dimensionAtObservation argument invalid!")
448 |   detail <- match.arg(detail);
449 |   if (!detail %in% c("Full","DataOnly","SeriesKeysOnly","NoData"))
450 |     stop("detail argument invalid!")
451 |   ## Append 'detail' and 'dimensionAtObservation' values to URL query
452 |   url <- sprintf("%s?detail=%s&dimensionAtObservation=%s",
453 |                  url, detail, dimensionAtObservation);
454 |   ## Add start/end dates, and check validity
455 |   if (!missing(start_date))
456 |     url <- paste0(url, "&startPeriod=", start_date)
457 |   if (!missing(end_date))
458 |     url <- paste0(url, "&endPeriod=", end_date);
459 |   ## Return URL if specified
460 |   if (return_url) {
461 |     return(url)
462 |   } else {
463 |     ## Check URL length - ABS.Stat limit: 1000 characters
464 |     if (enforce_api_limits) {
465 |       if (nchar(url) > 1000)
466 |         stop(sprintf(paste("URL query length (%i) exceeds maximum request URL limit (1000 characters).",
467 |                            "Filter query in one or more dimensions."),
468 |                      nchar(url)));
469 |       ## Check number of observations - ABS.Stat limit: 1 million observations
470 |       time_filter <- metadata$TIME$Code;
471 |       if(!missing(start_date))
472 |         time_filter <- time_filter[time_filter >= start_date]
473 |       if(!missing(end_date))
474 |         time_filter <- time_filter[time_filter >= end_date]
475 |       ## Count approximate number of records to be returned
476 |       n_time <- sum(c(ifelse("A" %in% filter$FREQUENCY,
477 |                              length(grep("^\\d{4}$", time_filter)),
478 |                              NA_integer_)),
479 |                     c(ifelse("S" %in% filter$FREQUENCY,
480 |                              length(grep("^\\d{4}-B\\d+$", time_filter)),
481 |                              NA_integer_)),
482 |                     c(ifelse("Q" %in% filter$FREQUENCY,
483 |                              length(grep("^\\d{4}-Q\\d+$", time_filter)),
484 |                              NA_integer_)),
485 |                     c(ifelse("M" %in% filter$FREQUENCY,
486 |                              length(grep("^\\d{4}-M\\d+$", time_filter)),
487 |                              NA_integer_)),
488 |                     na.rm = TRUE);
489 |       if (n_filter * n_time > 10^6)
490 |         stop(sprintf(paste("Estimated number of records (%i) exceeds ABS.Stat limit (1 million).",
491 |                            "Filter query in one or more dimensions."),
492 |                      n_filter * n_time));
493 |     }
494 | 
495 |     ## Download data
496 |     ## cat(sprintf("API query submitted: %s...\n", substr(url, 30)));
497 |     ## Error check URL call
498 |     raustats_check_url_available(url)
499 |     resp <- GET(url, raustats_ua(), progress())
500 |     ## ## Error check URL call
501 |     ## if (http_error(resp)) {
502 |     ##   stop(
503 |     ##     sprintf(
504 |     ##       "ABS.Stat API request failed [%s]\n%s\n<%s>", 
505 |     ##       status_code(resp),
506 |     ##       http_status(resp)$message,
507 |     ##       http_status(resp)$reason,
508 |     ##       ),
509 |     ##     call. = FALSE
510 |     ##   )
511 |     ## }
512 |     ## Check content type
513 |     if (!grepl("draft-sdmx-json", http_type(resp))) {
514 |       stop("ABS.Stat API did not return SDMX-JSON format", call. = FALSE)
515 |     }
516 | 
517 |     if (return_json) {
518 |       ## Return results as sdmx-json text format
519 |       return(content(resp, as="text"))
520 |     } else {
521 |       cat("Converting query output to data frame ... \n");
522 |       ## Convert JSON to list
523 |       x_json <- fromJSON(content(resp, as="text")) ## , simplifyVector = FALSE)
524 |       ## Check whether data contains any observations
525 |       if (ncol(x_json$dataSets$observation) == 0)
526 |         stop(paste("API call returns no observations.",
527 |                    "Check ABS.Stat or inspect JSON object with `return_json=TRUE`"), call. = FALSE);
528 |       ## Convert JSON format to long (tidy) data frame
529 |       x_obs <- x_json$dataSets$observation;
530 |       x_str <- x_json$structure$dimensions$observation;
531 |       y <- data.frame(do.call(rbind, unlist(x_obs, recursive=FALSE)));
532 |       ## Set names of returned records
533 |       y <- if (detail == "Full") {
534 |              setNames(y, c("values","obs_status","unknown"))
535 |            } else if (detail == "SeriesKeysOnly") {
536 |              setNames(y, c("series_key"));
537 |            } else if (detail == "DataOnly") {
538 |              setNames(y, c("values"));
539 |            } else { ## if (detail == NoData) {
540 |              setNames(y, c("values","obs_status","unknown"))
541 |            }
542 |       y <- cbind(setNames(data.frame(do.call(rbind, strsplit(row.names(y), ":"))),
543 |                           tolower(sub("\\s+","_", x_str$name))),
544 |                  y);
545 |       ## Re-index dimension IDs from 0-based to 1-based
546 |       for (name in tolower(sub("\\s+","_", x_str$name)))
547 |         y[,name] <- as.integer(as.character(y[,name])) + 1;
548 |       names_y <- setNames(lapply(seq_len(nrow(x_str)),
549 |                                  function(j) unlist(x_str[j,"values"], recursive=FALSE)
550 |                                  ),
551 |                           tolower(sub("\\s+","_", x_str$name)));
552 |       ## Substitute dimension IDs for Names
553 |       for (name in names(names_y))
554 |         y[,name] <- names_y[[name]]$name[y[,name]]
555 |       ## Insert dataset_name
556 |       y$agency_id <- x_json$header$sender$id;
557 |       y$agency_name <- x_json$header$sender$name;
558 |       y$dataset_name <- x_json$structure$name;
559 |       ## Re-index rows
560 |       row.names(y) <- seq_len(nrow(y));
561 |       ## cat("completed.\n");
562 |       ## Return data
563 |       return(y);
564 |     } ## End: return_json
565 |   }
566 | }
567 | 


--------------------------------------------------------------------------------
/R/abs-cat-functions.R:
--------------------------------------------------------------------------------
  1 | ### ABS Catalogue functions
  2 | 
  3 | #' @name abs_urls
  4 | #' @title ABS URL addresses and paths used in accessing ABS Catalogue data calls
  5 | #' @description This function returns a list of URLs and data paths used to construct ABS Catalogue
  6 | #'   data access calls. It is used in other functions in this package and need not be called
  7 | #'   directly.
  8 | #' @return a list with a base url and a url section for formatting ABS Catalogue statistics calls
  9 | #' @author David Mitchell <david.pk.mitchell@@gmail.com>
 10 | #' @keywords internal
 11 | abs_urls <- function()
 12 | {
 13 |   list(base_url = "https://www.abs.gov.au",
 14 |        ausstats_path = "ausstats/abs@.nsf",
 15 |        mf_path = "mf",
 16 |        downloads_regex = "Downloads",
 17 |        releases_regex = "Past.*Future.*Releases");
 18 | }
 19 | 
 20 | 
 21 | #' @name abs_filetypes
 22 | #' @title Valid ABS file types
 23 | #' @description This function returns a vector of valid ABS file types for using list of URLs and data paths used to construct ABS Catalogue
 24 | #'   data access calls. It is used in other functions in this package and need not be called
 25 | #'   directly.
 26 | #' @return a vector containing a list of valid ABS file types.
 27 | #' @author David Mitchell <david.pk.mitchell@@gmail.com>
 28 | #' @keywords internal
 29 | abs_filetypes <- function()
 30 | {
 31 |   c(zip_files = "application/x-zip",
 32 |     excel_files = "application/vnd.ms-excel",
 33 |     openxml_files = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
 34 |     pdf_files = "application/pdf");
 35 | }
 36 | 
 37 | 
 38 | #' @name abs_cat_stats
 39 | #' @title Get ABS catalogue series data
 40 | #' @description This function downloads ABS catalogue series statistics, by ABS catalogue number.
 41 | #' @importFrom rvest html_session follow_link html_attr jump_to
 42 | #' @importFrom xml2 read_xml read_html
 43 | #' @param cat_no Character vector specifying one or more ABS collections or catalogue numbers to
 44 | #'   download.
 45 | #' @param tables A character vector of regular expressions denoting tables to download. The default
 46 | #'   ('All') downloads all time series spreadsheet tables for each specified catalogue. Use a list
 47 | #'   to specify different table sets for each specified ABS catalogue number.
 48 | #' @param releases Date or character string object specifying the month and year denoting which
 49 | #'   release to download. Default is "Latest", which downloads the latest available data. See
 50 | #'   examples for further details.
 51 | #' @param types One of either 'tss' -- ABS time series spreadsheet (the default) or 'css' -- ABS
 52 | #'   data cube (cross-section spreadsheet).
 53 | #' @param na.rm logical (default: \code{TRUE}) - remove observations containing missing values.
 54 | #' @return data frame in long format
 55 | #' @export
 56 | #' @author David Mitchell <david.pk.mitchell@@gmail.com>
 57 | #' @examples
 58 | #'   \donttest{
 59 | #'     ## Download quarterly Australian National Accounts, Tables 1 & 2 
 60 | #'     ana_q <- abs_cat_stats("5206.0", tables=c("Table 1\\W+", "Table 2\\W+"));
 61 | #'
 62 | #'     ## Download December 2017 Australian National Accounts, Table 1
 63 | #'     ana_q_2017q4 <- abs_cat_stats("5206.0", tables="Table 1\\W+", release="Dec 2017");
 64 | #'   }
 65 | abs_cat_stats <- function(cat_no, tables="All", releases="Latest", types="tss", na.rm=TRUE)
 66 | {
 67 |   if (missing(cat_no))
 68 |     stop("No cat_no supplied.");
 69 |   ## if (tolower(releases) != "latest" ||
 70 |   ##     releases IS NOT A DATE )
 71 |   ##   stop("releases arguments ")
 72 |   if (any(!types %in% c("tss","css")))
 73 |     stop("Allowable type arguments limited to one or both: 'tss' and 'css'.");
 74 |   ## Get available catalogue tables
 75 |   if (FALSE) {
 76 |     cat_no <- "5206.0"; tables <- c("Table 1\\W+", "Table 2\\W+");
 77 |     releases <- "Latest"; types <- "tss"; include_urls <- FALSE;
 78 |   }
 79 |   cat_tables <- abs_cat_tables(cat_no=cat_no, releases=releases, types=types, include_urls=TRUE)
 80 |   ## Select only the user specified tables ('sel_tables')
 81 |   if (length(tables) == 1 && tolower(tables) == "all") {
 82 |     ## If 'all' tables, download all
 83 |     sel_tables <- if (any(grepl("^all time series.*", cat_tables$item_name, ignore.case=TRUE))) {
 84 |                     ## If all tables provided as single compressed archive, select that
 85 |                     cat_tables[grepl("^all time series.*", cat_tables$item_name, ignore.case=TRUE),]
 86 |                   } else {
 87 |                     ## Else, select all tables
 88 |                     cat_tables
 89 |                   };
 90 |   } else {
 91 |     ## Else, return only selected tables
 92 |     sel_tables <- cat_tables[grepl(sprintf("(%s)", paste(tables, collapse="|")),
 93 |                                    cat_tables$item_name, ignore.case=TRUE),]
 94 |     ## Stop if regular expression does not return any tables
 95 |     if (nrow(sel_tables) == 0)
 96 |       stop(paste("Specified table regular expressions do not match any table names, re-specify."))
 97 |   }
 98 |   ## Select only the user specified tables ('sel_tables')
 99 |   sel_urls <- apply(sel_tables, 1,
100 |                     function(y) {
101 |                       ## If zip in path_zip, select zip file, else select xls(x) file
102 |                       if (any(grepl("\\.zip", y, ignore.case=TRUE))) {
103 |                         unique(grep("\\.zip", unlist(y), ignore.case=TRUE, value=TRUE))
104 |                       } else {
105 |                         unique(grep("\\.xlsx*", unlist(y), ignore.case=TRUE, value=TRUE))
106 |                       }
107 |                     });
108 |   ## Download ABS TSS/Data Cubes ..
109 |   z <- lapply(sel_urls, abs_cat_download);
110 |   z <- lapply(z,
111 |               function(x) 
112 |                 if (!grepl("\\.zip", x, ignore.case=TRUE)) {
113 |                   x
114 |                 } else {
115 |                   abs_cat_unzip(files=x)
116 |                 });
117 |   ## .. and combine into single data frame
118 |   data <- lapply(z, function(x) abs_read_tss(x, na.rm=na.rm));
119 |   data <- do.call(rbind, data);
120 |   rownames(data) <- 1:nrow(data);
121 |   return(data);
122 | }
123 | 
124 | 
125 | #' @name abs_cat_tables
126 | #' @title Return ABS catalogue tables
127 | #' @description Return list of data tables available from specified ABS catalogue number.
128 | #' @importFrom rvest html_session html_text html_nodes html_attr follow_link
129 | #' @importFrom httr http_error
130 | #' @importFrom dplyr case_when bind_rows
131 | #' @param cat_no ABS catalogue numbers.
132 | #' @param releases Date or character string object specifying the month and year denoting which
133 | #'   release to download. Default is "Latest", which downloads the latest available data. See
134 | #'   examples for further details.
135 | #' @param types ABS publication types to return. Permissable options include one or more of: 'tss'
136 | #'   -- ABS Time Series Spreadsheets, 'css' - ABS Data Cubes and 'pub' -- ABS Publications. The
137 | #'   default returns all Time Series Spreadsheets and Data Cubes.
138 | #' @param include_urls Include full URLs to returned ABS data files. Default (FALSE) does not
139 | #'   include data file URLs.
140 | #' @return Returns a data frame listing the data collection tables and URLs for Excel (column:
141 | #'   \code{path_xls}) and, if available, Zip (column: \code{path_zip}) files.
142 | #' @export
143 | #' @author David Mitchell <david.pk.mitchell@@gmail.com>
144 | #' @examples
145 | #'   \donttest{
146 | #'     ## List latest available quarterly National Accounts tables
147 | #'     ana_tables <- abs_cat_tables("5206.0", releases="Latest");
148 | #'     ana_tables_url <- abs_cat_tables("5206.0", releases="Latest", include_urls=TRUE);
149 | #'
150 | #'     ## List latest available CPI Time Series Spreadsheet tables only
151 | #'     cpi_tables <- abs_cat_tables("6401.0", releases="Latest", types="tss");
152 | #'     cpi_tables_url <- abs_cat_tables("5206.0", releases="Latest", types="tss", include_urls=TRUE);
153 | #'   
154 | #'     ## List latest available ASGS Volume 3 Data Cubes
155 | #'     asgs_vol3_tables <- abs_cat_tables("1270.0.55.003", releases="Latest", types="css");
156 | #'     asgs_vol3_tables_url <- abs_cat_tables("1270.0.55.003", releases="Latest",
157 | #'                                            types="css", include_urls=TRUE);
158 | #'   
159 | #'     ## List latest available ASGS ANZSIC publications (PDF) files
160 | #'     anzsic_2006 <- abs_cat_tables("1292.0", releases="Latest", types="pub", include_urls=TRUE);
161 | #'   }
162 | abs_cat_tables <- function(cat_no, releases="Latest", types=c("tss", "css"), include_urls=FALSE)
163 | {
164 |   ## if (FALSE) {
165 |   ##   -- DEBUGGING CODE --
166 |   ## cat_no <- "6401.0"; types <- "tss"; releases <- "Latest"; include_urls <- TRUE;
167 |   ## cat_no <- "5209.0.55.001"; types <- "css"; releases <- "Latest"; include_urls <- TRUE;
168 |   ## cat_no <- "1270.0.55.001"; releases <- "Latest"; types <- "css"; include_urls <- TRUE;
169 |   ## cat_no <- "6202.0"; releases <- "Latest"; types <- "css"; include_urls <- TRUE;
170 |   ## cat_no <- "3105.0.65.001"; releases <- "Latest"; types <- "css"; include_urls <- TRUE;
171 |   ## }
172 |   if (missing(cat_no))
173 |     stop("No cat_no supplied.");
174 |   if (any(!types %in% c("tss", "css", "pub")))
175 |     stop("Allowable type arguments limited to one or more of: 'tss', 'css' or 'pub'.");
176 |   if (!is.logical(include_urls))
177 |     stop("include_urls must be either TRUE or FALSE");
178 |   ## Spell out type -- for ABS website scraping
179 |   types <- sapply(types,
180 |                      function(x) switch(x,
181 |                                         "tss" = "Time Series Spreadsheet",
182 |                                         "css" = "Data Cubes",
183 |                                         "pub" = "Publication"));
184 |   ## Create ABS URL and open session 
185 |   url <- file.path(abs_urls()$base_url, abs_urls()$ausstats_path, abs_urls()$mf_path, cat_no);
186 |   ## Check for HTTP errors
187 |   raustats_check_url_available(url);
188 |   ## -- OLD CODE --
189 |   ## if (http_error(url))
190 |   ##   stop(sprintf("File cannot be downloaded. Check URL: %s", url))
191 |   ## Open html session
192 |   suppressWarnings(s <- html_session(url));
193 |   releases <- unique(releases);
194 |   if (length(releases) == 1 && tolower(releases) == "latest") {
195 |     .paths <- "";
196 |   } else {
197 |     ## Get path to 'Past & Future Releases' page
198 |     .paths <- html_nodes(s, "a");
199 |     .paths <- .paths[grepl(abs_urls()$releases_regex, .paths)];
200 |     .paths <- html_attr(.paths, "href");
201 |     s <- jump_to(s, .paths)
202 |     .paths <- html_nodes(s, "a");
203 |     .paths <- .paths[grepl(paste(releases, collapse="|"), .paths, ignore.case=TRUE)];
204 |     .paths <- html_attr(.paths, "href");
205 |   }
206 |   ## Return list of all downloadable files, for specified catalogue tables ('cat_tables')
207 |   v <- lapply(.paths,
208 |               function(x) {
209 |                 ## Check for HTTP errors
210 |                 ## raustats_check_url_available(file.path(s, x));
211 |                 y <- jump_to(s, x)
212 |                 l <- follow_link(y, abs_urls()$downloads_regex)
213 |                 ht <- html_nodes(html_nodes(l, "table"), "table")
214 |                 ## Return data table
215 |                 ## The ABS data catalogue lists the data inside a HTML table within a table, i.e.
216 |                 ##  <table>
217 |                 ##    <table> </table>
218 |                 ##  </table>
219 |                 ## The following nested apply functions, exploits this structure to extract the
220 |                 ## list of available publication types and associated links.
221 |                 all_nodes <- lapply(sapply(ht, function(x) html_nodes(x, "tr")),
222 |                                     function(x)
223 |                                       c(html_text(html_nodes(x, "td")),
224 |                                         ## html_attr(html_nodes(html_nodes(x, "td"), "a"), "href")));
225 |                                         paste0(abs_urls()$base_url,
226 |                                                html_attr(html_nodes(html_nodes(x, "td"), "a"), "href")))
227 |                                     );
228 |                 ## Remove ABS data download section heading from all_nodes
229 |                 ##   Where ABS data download section titles that include links, are included
230 |                 ##   in the node set, but are not conformant with publication information.
231 |                 ##   The following code block, removes these entries.
232 |                 data_nodes <- lapply(all_nodes,
233 |                                      function(x) {
234 |                                        if (grepl(paste(c("(^\\W{0,1}$)",
235 |                                                          "(^data\\s*cubes\\W*$)",
236 |                                                          "(^time series spreadsheet\\W*$)"),
237 |                                                        collapse="|"),
238 |                                                  x[1], ignore.case=TRUE)) {
239 |                                          NULL
240 |                                        } else {
241 |                                          x
242 |                                        }
243 |                                      })
244 |                 data_nodes <- data_nodes[-which(sapply(data_nodes, is.null))];
245 |                 ## Tidy and return data set names and urls
246 |                 nodes <- data_nodes[unlist(lapply(data_nodes,
247 |                                                   function(x)
248 |                                                     any(grepl(sprintf("(%s)",
249 |                                                                       paste(types, collapse="|")),
250 |                                                               x, ignore.case=TRUE)) &
251 |                                                     any(grepl("ausstats", x, ignore.case=TRUE))
252 |                                                   ))];
253 |                 ## Remove non-breaking spaces (&nbsp;), and blank entries
254 |                 nodes <- lapply(nodes,
255 |                                 function(x) {
256 |                                   z <- trimws(gsub("\u00a0", "", x));      ## Remove non-breaking spaces
257 |                                   z <- replace(z, z == "", NA_character_); ## Replace blank objects with NA
258 |                                   ## Set entries not starting with 'https*' with 'NA_character_'
259 |                                   z[-1] <- replace(z[-1],                          
260 |                                                    !grepl("^https*.+", z[-1], ignore.case=TRUE),
261 |                                                    NA_character_);
262 |                                   ## Set entries containing 'INotes' with 'NA_character_'
263 |                                   z <- replace(z,                          
264 |                                                grepl("INotes", z, ignore.case=TRUE),
265 |                                                NA_character_);
266 |                                   z <- z[!is.na(z)];                       ## Remove NA objects
267 |                                   ## Set object names: First element = 'item_name'
268 |                                   names(z)[1] <- "item_name";
269 |                                   names(z)[-1] <- case_when(
270 |                                     ## !grepl("(^https*|^Releases|INotes)", z, ignore.case=TRUE) ~ "item_name",
271 |                                     grepl("\\.xlsx*", z[-1], ignore.case=TRUE) ~ "path_xls",
272 |                                     grepl("\\.zip", z[-1], ignore.case=TRUE) ~ "path_zip",
273 |                                     grepl("\\.pdf", z[-1], ignore.case=TRUE) ~ "path_pdf",
274 |                                     TRUE ~ NA_character_)
275 |                                   z <- as.data.frame(t(cbind.data.frame(z, deparse.level=1)),
276 |                                                      stringsAsFactors=FALSE);
277 |                                   return(z);
278 |                                 });
279 |                 ## Tidy nodes into data.frame (using dplyr::bind_rows)
280 |                 dt <- suppressWarnings(bind_rows(nodes))
281 |                 ## Lastly replace spaces in all URL paths with '%20' string
282 |                 for(name in grep("^path_", names(dt), ignore.case=TRUE, value=TRUE)) # names(dt)[-1]
283 |                   dt[,name] <- gsub("\\s+", "%20", dt[,name]);
284 |                 return(dt);
285 |               });
286 |   ## Add catalogue number and release information to table
287 |   v <- lapply(seq_along(v),
288 |               function(i) {
289 |                 v[[i]]$release <- sub("^$", "Latest", releases[i]);
290 |                 v[[i]]$cat_no <- cat_no;
291 |                 as.data.frame(v)
292 |               });
293 |   ## Bind all results together
294 |   z <- do.call(rbind, v);
295 |   ## If rbind breaks on different row names try:
296 |   ## z <- do.call(function(...) rbind(..., make.row.names=FALSE), v);
297 |   ## names(z) <- c("item_name", ..., "cat_no", "release");
298 |   z <- if (!include_urls) {
299 |          z[,c("cat_no", "release", "item_name")]
300 |        } else {
301 |          z[,c("cat_no", "release", "item_name",
302 |               names(z)[!names(z) %in% c("cat_no", "release", "item_name")])]
303 |        }
304 |   row.names(z) <- seq_len(nrow(z));
305 |   return(z)
306 | }
307 | 
308 | 
309 | #' @name abs_cat_releases
310 | #' @title Return ABS catalogue table releases
311 | #' @description Return list of all releases available for specified ABS catalogue number.
312 | #' @importFrom rvest html_session html_table html_text html_nodes html_attr follow_link
313 | #' @importFrom httr http_error
314 | #' @param cat_no ABS catalogue numbers.
315 | #' @param include_urls Include full path URL to specified ABS catalogue releases. Default (FALSE)
316 | #'   does not include release URLs.
317 | #' @return Returns a data frame listing available ABS catalogue releases.
318 | #' @export
319 | #' @author David Mitchell <david.pk.mitchell@@gmail.com>
320 | #' @examples
321 | #'   \donttest{
322 | #'     ## List all available quarterly National Accounts tables
323 | #'     ana_releases <- abs_cat_releases("5206.0");
324 | #'     ana_release_urls <- abs_cat_releases("5206.0", include_urls=TRUE);
325 | #'   
326 | #'     ## List latest available CPI Time Series Spreadsheet tables only
327 | #'     cpi_releases <- abs_cat_releases("6401.0");
328 | #'     cpi_release_urls <- abs_cat_releases("6401.0", include_urls=TRUE);
329 | #'   }
330 | abs_cat_releases <- function(cat_no, include_urls=FALSE)
331 | {
332 |   ## if (FALSE) {
333 |   ##   ## -- DEBUGGING CODE --
334 |   ##   cat_no <- "5206.0"
335 |   ##   include_urls <- FALSE
336 |   ## }
337 |   if (missing(cat_no))
338 |     stop("No cat_no supplied.");
339 |   if (!is.logical(include_urls))
340 |     stop("include_urls must be either TRUE or FALSE");
341 |   ## Create ABS URL and open session 
342 |   url <- file.path(abs_urls()$base_url, abs_urls()$ausstats_path, abs_urls()$mf_path, cat_no);
343 |   ## Check for HTTP errors
344 |   raustats_check_url_available(url)
345 |   ## if (http_error(url))
346 |   ##   stop(sprintf("File cannot be downloaded. Check URL: %s", url))
347 |   suppressWarnings(s <- html_session(url));
348 |   ## Get path to 'Past & Future Releases' page
349 |   .paths <- html_nodes(s, "a");
350 |   .paths <- .paths[grepl(abs_urls()$releases_regex, .paths)];
351 |   .paths <- html_attr(.paths, "href");
352 |   s <- jump_to(s, .paths)
353 |   ## Get list of available ABS catalogue releases (See: https://devhints.io/xpath for Xpath hints)
354 |   .tables <- html_nodes(s, "table");
355 |   .tables <- .tables[grepl("Past Releases", .tables, ignore.case=TRUE)];
356 |   .paths <- html_nodes(.tables, "a");
357 |   ## Return results 
358 |   if (!include_urls) {
359 |     z <- data.frame(releases = html_text(.paths))
360 |   } else {
361 |     z <- data.frame(releases = html_text(.paths),
362 |                     urls = file.path(abs_urls()$base_url,
363 |                                      abs_urls()$ausstats_path,
364 |                                      html_attr(.paths, "href")))
365 |   }
366 |   row.names(z) <- seq_len(nrow(z));
367 |   return(z)
368 | }
369 | 
370 | 
371 | #' @name abs_cat_download
372 | #' @title Function to download files from the ABS website and store locally
373 | #' @description Downloads specified ABS catalogue data files from the ABS website, using a valid ABS
374 | #'   data table URL.
375 | #' @importFrom httr GET http_type http_error progress status_code write_disk
376 | #' @param data_url Character vector specifying an ABS data URLs.
377 | #' @param exdir Target directory for downloaded files (defaults to \code{tempdir()}). Directory is
378 | #'   created if it doesn't exist.
379 | #' @return Downloads data from the ABS website and returns a character vector listing the location
380 | #'   where files are saved.
381 | #' @export
382 | #' @author David Mitchell <david.pk.mitchell@@gmail.com>
383 | abs_cat_download <- function(data_url, exdir=tempdir()) {
384 |   if (!dir.exists(exdir)) dir.create(exdir);
385 |   local_filenames <-
386 |     sapply(data_url[!is.na(data_url)],
387 |            function(url) {
388 |              this_filename <- abs_local_filename(url);
389 |              ## Check if any data_urls are not ABS data URLs
390 |              if (!grepl("^https*:\\/\\/www\\.abs\\.gov\\.au\\/ausstats.+",
391 |                         url, ignore.case=TRUE))	
392 |                stop(sprintf("Invalid ABS url: %s", url));
393 |              ##
394 |              ## -- Download files --
395 |              cat(sprintf("Downloading: %s", this_filename));
396 |              ## Check for errors
397 |              raustats_check_url_available(url)
398 |              resp <- GET(url, write_disk(file.path(exdir, this_filename), overwrite=TRUE),
399 |                          raustats_ua(), progress());
400 |              ## ## File download validation code based on:
401 |              ## ##  https://cran.r-project.org/web/packages/httr/vignettes/api-packages.html
402 |              ## if (http_error(resp)) {
403 |              ##   stop(
404 |              ##     sprintf(
405 |              ##       "ABS catalogue file request failed (Error code: %s)\nInvalid URL: %s", 
406 |              ##       status_code(resp),
407 |              ##       url
408 |              ##     ),
409 |              ##     call. = FALSE
410 |              ##   )
411 |              ## }
412 |              ## Check content-type is compliant
413 |              if (!http_type(resp) %in% abs_filetypes()) {
414 |                stop("ABS file request did not return Excel, Zip or PDF file", call. = FALSE)
415 |              }
416 |              return(file.path(exdir, this_filename));
417 |            })
418 |     ## local_filename <- abs_local_filename(data_url);
419 |   ## ## Check if any data_urls are not ABS data URLs
420 |   ## if (!grepl("^https*:\\/\\/www\\.abs\\.gov\\.au\\/ausstats.+",
421 |   ##            data_url, ignore.case=TRUE))	
422 |   ##   stop(sprintf("Invalid ABS url: %s", data_url));
423 |   ## ##
424 |   ## ## -- Download files --
425 |   ## cat(sprintf("Downloading: %s", local_filename));
426 |   ## resp <- GET(data_url, write_disk(file.path(exdir, local_filename), overwrite=TRUE),
427 |   ##             raustats_ua(), progress());
428 |   ## ## File download validation code based on:
429 |   ## ##  https://cran.r-project.org/web/packages/httr/vignettes/api-packages.html
430 |   ## if (http_error(resp)) {
431 |   ##   stop(
432 |   ##     sprintf(
433 |   ##       "ABS catalogue file request failed (Error code: %s)\nInvalid URL: %s", 
434 |   ##       status_code(resp),
435 |   ##       data_url
436 |   ##     ),
437 |   ##     call. = FALSE
438 |   ##   )
439 |   ## }
440 |   ## ## Check content-type is compliant
441 |   ## if (!http_type(resp) %in% abs_filetypes()) {
442 |   ##   stop("ABS file request did not return Excel, Zip or PDF file", call. = FALSE)
443 |   ## }
444 |   ## Return results
445 |   ## return(file.path(exdir, local_filename));
446 |   return(local_filenames);
447 | }
448 | 
449 | 
450 | #' @name abs_local_filename
451 | #' @title Create local file names for storing downloaded ABS data files
452 | #' @description Function to create local filename from web-based file name.
453 | #' @param url Character vector specifying one or more ABS data URLs.
454 | #' @return Returns a local file names (character vector) in which downloaded files will be saved.
455 | #' @author David Mitchell <david.pk.mitchell@@gmail.com>
456 | #' @keywords internal
457 | abs_local_filename <- function(url)
458 | {
459 |   sprintf("%s_%s.%s",
460 |           sub("^.+&(.+)\\.(zip|xlsx*|pdf)&.+$", "\\1", url),
461 |           sub("^.+(\\d{2}).(\\d{2}).(\\d{4}).+$", "\\3\\2\\1", url),
462 |           sub("^.+&(.+)\\.(zip|xlsx*|pdf)&.+$", "\\2", url));
463 | }
464 | 
465 | 
466 | #' @name abs_cat_unzip
467 | #' @title Uncompress locally-stored ABS Catalogue data file archives
468 | #' @description Function to uncompress locally-stored ABS Catalogue data file archives.
469 | #' @importFrom utils unzip zip
470 | #' @param files One or more local zip files.
471 | #' @param exdir Target directory for extracted archive files. Directory is created if it doesn't
472 | #'   exist. If missing, creates a new subdirectory in \code{tempdir()} using the respective zip
473 | #'   files (specified in \code{files}.
474 | #' @return Returns a character vector listing the names of all files extracted.
475 | #' @export
476 | #' @author David Mitchell <david.pk.mitchell@@gmail.com>
477 | abs_cat_unzip <- function(files, exdir) {
478 |   if (any(!file.exists(files)))
479 |     stop(sprintf("Files %s do not exist",
480 |                  paste(files[!file.exists(files)], collapse=", ")));
481 |   if (missing(exdir))
482 |     exdir <- tempdir();
483 |   ## Only extract from zip files
484 |   files <- files[grepl("\\.zip$", files, ignore.case=TRUE)];
485 |   xl_files <- sapply(files,
486 |                      function(x)
487 |                        if (grepl("\\.zip$", x, ignore.case=TRUE)) {
488 |                          ## If exdir NOT missing, then use it
489 |                          if (exdir == tempdir()) {
490 |                            exdir <- file.path(exdir, sub("\\.zip", "", basename(x)));
491 |                          } else {
492 |                            ## Else, use tempdir()
493 |                            if (!dir.exists(exdir))
494 |                              dir.create(exdir)
495 |                          }
496 |                          unzip(x, exdir=exdir);
497 |                          file.path(exdir, unzip(x, list=TRUE)$Name);
498 |                        } else {
499 |                          x;
500 |                        });
501 |   return(xl_files);
502 | }
503 | 
504 | 
505 | ### Function: abs_read_tss
506 | #' @name abs_read_tss
507 | #' @title Extract data from an ABS time series data file
508 | #' @description This function extracts time series data from ABS data files.
509 | #' @param files Names of one or more ABS data files
510 | #' @param type One of either 'tss' -- ABS Time Series Spreadsheet (the Default) or 'css' -- Data
511 | #'   Cube.R
512 | #' @param na.rm logical. If \code{TRUE} (default), remove observations containing missing values.
513 | #' @return data frame in long format
514 | #' @export
515 | #' @author David Mitchell <david.pk.mitchell@@gmail.com>
516 | #' @examples
517 | #'   \donttest{
518 | #'     ## Read specified ABS Excel time series files
519 | #'     tables <- abs_cat_tables("5206.0", releases="Latest", include_urls=TRUE);
520 | #'     downloaded_tables <- abs_cat_download(tables$path_zip, exdir=tempdir())
521 | #'     extracted_files <- abs_cat_unzip(downloaded_tables)
522 | #'     x <- abs_read_tss(extracted_files);
523 | #'   }
524 | abs_read_tss <- function(files, type="tss", na.rm=TRUE) {
525 |   x <- lapply(files,
526 |               function(file)
527 |                 abs_read_tss_(file, type=type, na.rm=na.rm));
528 |   z <- do.call(rbind, x);
529 | #  rownames(z) <- seq_len(nrow(z));
530 |   return(z);
531 | }
532 | 
533 | 
534 | ### Function: abs_read_tss_
535 | #' @name abs_read_tss
536 | #' @title Read ABS time series data file(s)
537 | #' @description This is the internal function that extracts time series data from ABS data files.
538 | #' @importFrom readxl read_excel excel_sheets
539 | #' @importFrom dplyr left_join
540 | #' @importFrom tidyr gather
541 | #' @importFrom stats complete.cases
542 | #' @param files Names of one or more ABS data files
543 | #' @param type One of either 'tss' -- ABS Time Series Spreadsheet (the Default) or 'css' -- Data
544 | #'   Cube.R
545 | #' @param na.rm logical. If \code{TRUE} (default), remove observations containing missing values.
546 | #' @author David Mitchell <david.pk.mitchell@@gmail.com>
547 | #' @keywords internal
548 | abs_read_tss_ <- function(file, type="tss", na.rm=na.rm) {
549 |   ## Avoid 'No visible binding for global variables' note
550 |   { series_start <- series_end <- no_obs <- collection_month <- series_id <- value <- NULL }
551 |   
552 |   sheet_names <- tolower(excel_sheets(file));
553 |   if (!all(c("index", "data1")  %in% sheet_names))
554 |     stop(sprintf("File: %s is not a valid ABS time series file.", basename(file)));
555 |   ## -- Read metadata --
556 |   .meta <- read_excel(file,
557 |                       sheet = grep("index", excel_sheets(file), ignore.case=TRUE, value=TRUE),
558 |                       .name_repair = "minimal");
559 |   ## Return pre-header information from ABS files 
560 |   header_row <- which(sapply(1:nrow(.meta),
561 |                              function(i)
562 |                                grepl("series\\s*id", paste(.meta[i,], collapse=" "),
563 |                                      ignore.case=TRUE)));
564 |   metadata <- .meta;
565 |   names(metadata) <- tolower(gsub("\\s","_",
566 |                                   gsub("\\.", "",
567 |                                        .meta[header_row,])));         ## Rename variables
568 |   metadata <- metadata[-(1:header_row), !is.na(names(metadata))];     ## Drop header rows & empty columns
569 |   metadata <- metadata[complete.cases(metadata),];                    ## Drop NA rows
570 |   metadata <- metadata[grepl("\\w\\d{4,7}\\w", metadata$series_id),]; ## Drop if Series ID invalid 
571 |   metadata <- transform(metadata,
572 |                         series_start     = excel2Date(as.integer(series_start)),
573 |                         series_end       = excel2Date(as.integer(series_end)),
574 |                         no_obs           = as.integer(no_obs),
575 |                         collection_month = as.integer(collection_month));
576 |   ##
577 |   ## Get publication details
578 |   ## -- Catalogue number & name --
579 |   regex_catno_name <- "^.*(\\d{4}\\.\\d+(\\.\\d+)*)\\s+(.+)$";
580 |   catno_name <- sapply(1:header_row,
581 |                        function(i)
582 |                          grep(regex_catno_name, paste(.meta[i,], collapse=" "),
583 |                               ignore.case=TRUE, value=TRUE));
584 |   catno_name <- gsub("(\\s*NA)+", "",
585 |                      sub(regex_catno_name, "\\1|\\3", unlist(catno_name), ignore.case=TRUE));
586 |   catno_name <- trimws(unlist(strsplit(catno_name, split="\\|")));
587 |   ##
588 |   ## -- Table number & name --
589 |   ## Note use of 'word' character    \/here               \/here for 13a, 6b, etc.
590 |   regex_table_name <- "^.*Tables*\\s+(\\w+(\\s+\\w+\\s+\\w+)*)(\\.|:)*\\s+(.+)$";
591 |   ## Note use of alternative separators: .|:                      ^here
592 |   tableno_name <- sapply(1:header_row,
593 |                          function(i)
594 |                            grep(regex_table_name,
595 |                                 paste(.meta[i,], collapse=" "),
596 |                                 ignore.case=TRUE, value=TRUE));
597 |   tableno_name <- gsub("(\\s*NA)+", "",
598 |                        sub(regex_table_name, "\\1|\\4", unlist(tableno_name), ignore.case=TRUE));
599 |   tableno_name <- trimws(unlist(strsplit(tableno_name, split="\\|")));
600 |   ##
601 |   ## Add publication details to metadata table
602 |   metadata  <- transform(metadata,
603 |                          catalogue_no      = catno_name[1],
604 |                          publication_title = catno_name[2],
605 |                          table_no          = tableno_name[1],
606 |                          table_title       = tableno_name[2]);
607 |   ## Extract data
608 |   data <- lapply(grep("data", excel_sheets(file), ignore.case=TRUE, value=TRUE),
609 |                  function(sheet_name) {
610 |                    z <- read_excel(file, sheet=sheet_name, .name_repair = "minimal");
611 |                    ## Return pre-header information from ABS files 
612 |                    header_row <- which(sapply(1:nrow(z),
613 |                                               function(i)
614 |                                                 grepl("series\\s*id", paste(z[i,], collapse=" "), 
615 |                                                       ignore.case=TRUE)));
616 |                    names(z) <- gsub("\\s","_",
617 |                                     gsub("\\.","", z[header_row,]));      ## Rename variables
618 |                    names(z) <- sub("series_id", "date", names(z),         ## Rename Series_ID field
619 |                                    ignore.case=TRUE); 
620 |                    z <- z[-(1:header_row), !is.na(names(z))];             ## Drop empty columns
621 |                    z <- gather(z, series_id, value, -date, convert=TRUE); ## Transform data to key:value pairs
622 |                    z <- transform(z,
623 |                                   date = excel2Date(as.integer(date)),
624 |                                   value = as.numeric(value));
625 |                    names(z) <- tolower(names(z));
626 |                    return(z);
627 |                  });
628 |   data <- do.call(rbind, data);
629 |   data <- left_join(data, metadata, by="series_id");
630 |   if (na.rm)
631 |     data <- data[complete.cases(data),]
632 |   names(data) <- tolower(names(data));
633 |   return(data);
634 | }
635 | 


--------------------------------------------------------------------------------
/R/abs-series-abbreviation.R:
--------------------------------------------------------------------------------
  1 | ## =========================================================================
  2 | ## Filename:    
  3 | ## Created: 
  4 | ## Updated:     <2019-06-19 10:46:32 david at grover>
  5 | ## Author:      
  6 | ## Description: 
  7 | ##              
  8 | ##
  9 | ## =========================================================================
 10 | 
 11 | #'
 12 | #'  Table - ABS ANA Series IDs and series names
 13 | #' 
 14 | #'  ABS Series ID  | Series abb    | Series name                                     
 15 | #' :---------------|:--------------|:--------------------------------------
 16 | #'  A2304402X      | gdp_cv_sa     | GDP Chain Volume measures: Seasonally Adjusted   
 17 | #'  A2304340C      | gdp_cv_tr     |           "              : Trend
 18 | #'  A2302459A      | gdp_cv_or     |           "              : Original
 19 | #'  A2304408L      | gdi_cv_sa     | Gross Domestic Income, Chain Volume measures: Seasonally Adjusted
 20 | #'  A2304342J      | gdi_cv_tr     |           "              : Trend
 21 | #'  A2302463T      | gdi_cv_or     |           "              : Original
 22 | #'  A2304412C      | gni_cv_sa     | GNI Chain Volume measures: Seasonally Adjusted
 23 | #'  A2304344L      | gni_cv_tr     |           "              : Trend
 24 | #'  A2302464V      | gni_cv_or     |           "              : Original
 25 | #'  A2304414J      | nndi_cv_sa    | NNDI Chain Volume measures: Seasonally Adjusted
 26 | #'  A2304346T      | nndi_cv_tr    |           "              : Trend
 27 | #'  A2302465W      | nndi_cv_or    |           "              : Original
 28 | #'  A2304404C      | gdppc_cv_sa   | GDP per capita Chain volume measures: Seasonally Adjusted
 29 | #'  A2304336L      | gdppc_cv_tr   |           "              : Trend
 30 | #'  A2302459A      | gdppc_cv_or   |           "              : Original
 31 | #'  A2304113C      | gne_cv_sa     | GNE Chain Volume measures: Seasonally Adjusted
 32 | #'  A2304237F      | gne_cv_tr     |           "              : Trend
 33 | #'  A2302514F      | gne_cv_or     |           "              : Original
 34 | #'  A2304111X      | dfd_cv_sa     | DFD Chain Volume measures: Seasonally Adjusted
 35 | #'  A2304235A      | dfd_cv_tr     |           "              : Trend
 36 | #'  A2302519T      | dfd_cv_or     |           "              : Original
 37 | #'  A2304114F      | exp_cv_sa     | Exports Chain Volume measures: Seasonally Adjusted
 38 | #'  A2304238J      | exp_cv_tr     |           "              : Trend
 39 | #'  A2302520A      | exp_cv_or     |           "              : Original
 40 | #'  A2304115J      | imp_cv_sa     | Imports Chain Volume measures: Seasonally Adjusted
 41 | #'  A2304239K      | imp_cv_sa     |           "              : Trend
 42 | #'  A2302521C      | imp_cv_sa     |           "              : Original
 43 | #'
 44 | #'  Notes
 45 | #'    GDI = GDP - ToT effects       (GDI - Gross Domestic Income)
 46 | #'    NNDI = ??                     (NNDI - Net National Disposable Income)
 47 | #'    DFD = GNE - Inventory change  (DFD - Domestic Final Demand)
 48 | #'
 49 | #' 
 50 | 
 51 | #### Add human-readable series model names (abbreviations)
 52 | ## -- TO DO - INCLUDE IN ABS data package GENERAL FUNCTIONS
 53 | 
 54 | ana_series_abb <- function(x) {
 55 |   x %>%
 56 |     ## Table abbreviations
 57 |     mutate(series_abb =
 58 |              case_when(grepl("^key national accounts aggregates", table_title, ignore.case=TRUE)
 59 |                        ~ "ana",
 60 |                        grepl("^expenditure.+GDP", table_title, ignore.case=TRUE)
 61 |                        ~ "gdpe",
 62 |                        grepl("^income from.+GDP", table_title, ignore.case=TRUE)
 63 |                        ~ "gdpi",
 64 |                        grepl("^gross value added.+industry", table_title, ignore.case=TRUE)
 65 |                        ~ "gva",
 66 |                        grepl("^gross value added.+industry.+current price", table_title, ignore.case=TRUE)
 67 |                        ~ "gvacp",
 68 |                        grepl("^household.*final.*consumption.*expenditure", table_title, ignore.case=TRUE)
 69 |                        ~ "hfce",
 70 |                        TRUE ~ ""),
 71 |            ## Series abbreviations
 72 |            series_abb =
 73 |              paste0(series_abb,
 74 |                     case_when(grepl("^gross domestic product", data_item_description, ignore.case=TRUE)
 75 |                               ~ "_gdp",
 76 |                               grepl("^gdp", data_item_description, ignore.case=TRUE)
 77 |                               ~ "_gdp",
 78 |                               grepl("^gross value added", data_item_description, ignore.case=TRUE)
 79 |                               ~ "_gva",
 80 |                               grepl("^net domestic product", data_item_description, ignore.case=TRUE)
 81 |                               ~ "_ndp",
 82 |                               grepl("^net domestic product", data_item_description, ignore.case=TRUE)
 83 |                               ~ "_ndp",
 84 |                               grepl("gross domestic income", data_item_description, ignore.case=TRUE)
 85 |                               ~ "_gdi",
 86 |                               grepl("gross national income", data_item_description, ignore.case=TRUE)
 87 |                               ~ "_gni",
 88 |                               grepl("net national disposable income", data_item_description, ignore.case=TRUE)
 89 |                               ~ "_ndi",
 90 |                               grepl("terms of trade", data_item_description, ignore.case=TRUE)
 91 |                               ~ "_tot",
 92 |                               grepl("gross national expenditure", data_item_description, ignore.case=TRUE)
 93 |                               ~ "_gne",
 94 |                               grepl("exports of goods and services", data_item_description, ignore.case=TRUE)
 95 |                               ~ "_exp",
 96 |                               grepl("imports of goods and services", data_item_description, ignore.case=TRUE)
 97 |                               ~ "_imp",
 98 |                               grepl("domestic final demand", data_item_description, ignore.case=TRUE)
 99 |                               ~ "_dfd",
100 |                               grepl("change.+inventories", data_item_description, ignore.case=TRUE)
101 |                               ~ "_chinv",
102 |                               grepl("final consumption expenditure", data_item_description, ignore.case=TRUE)
103 |                               ~ "_fce",
104 |                               grepl("gross fixed capital formation", data_item_description, ignore.case=TRUE)
105 |                               ~ "_gfcf",
106 |                               grepl("state final demand", data_item_description, ignore.case=TRUE)
107 |                               ~ "_sfd",
108 |                               grepl("hours worked market sector", data_item_description, ignore.case=TRUE)
109 |                               ~ "_hrsmk",
110 |                               grepl("hours worked", data_item_description, ignore.case=TRUE)
111 |                               ~ "_hrstl",
112 |                               grepl("real unit.*labour cost.*non.*farm", data_item_description, ignore.case=TRUE)
113 |                               ~ "_rulcnf",
114 |                               grepl("real unit.*labour cost.*", data_item_description, ignore.case=TRUE)
115 |                               ~ "_rulc",
116 |                               grepl("household saving ratio", data_item_description, ignore.case=TRUE)
117 |                               ~ "_hsr",
118 |                               grepl("net saving", data_item_description, ignore.case=TRUE)
119 |                               ~ "_netsav",
120 |                               grepl("statistical discrepancy", data_item_description, ignore.case=TRUE)
121 |                               ~ "_statdis",
122 |                               ## Industry gross value added
123 |                               ## grepl(sprintf("\\(%s\\)\\s*;", paste(letters, collapse="|")),
124 |                               ##       data_item_description, ignore.case=TRUE)
125 |                               ## ~ sub(sprintf(".+\\((%s)\\)\\s*;.*", paste(letters, collapse="|")),
126 |                               ##       tolower("_div\\1"), data_item_description, ignore.case=TRUE),
127 |                               ## Division A - Agriculture, forestry & fishing
128 |                               grepl("\\(a\\).+Agriculture", data_item_description, ignore.case=TRUE)
129 |                               ~ "_diva_ag",
130 |                               grepl("\\(a\\).+Forestry.*fishing", data_item_description, ignore.case=TRUE)
131 |                               ~ "_diva_ff",
132 |                               grepl("\\(a\\).+;$", data_item_description, ignore.case=TRUE)
133 |                               ~ "_diva_tot",
134 |                               ## Division B - Mining
135 |                               grepl("\\(b\\).+coal.*mining", data_item_description, ignore.case=TRUE)
136 |                               ~ "_divb_cl",
137 |                               grepl("\\(b\\).+oil.*gas", data_item_description, ignore.case=TRUE)
138 |                               ~ "_divb_og",
139 |                               grepl("\\(b\\).+iron.*ore", data_item_description, ignore.case=TRUE)
140 |                               ~ "_divb_fe",
141 |                               grepl("\\(b\\).+other.*mining", data_item_description, ignore.case=TRUE)
142 |                               ~ "_divb_ot",
143 |                               grepl("\\(b\\).+mining.*excluding.*exploration", data_item_description,
144 |                                     ignore.case=TRUE)
145 |                               ~ "_divb_mn",
146 |                               grepl("\\(b\\).+exploration.*support", data_item_description,
147 |                                     ignore.case=TRUE)
148 |                               ~ "_divb_es",
149 |                               grepl("\\(b\\).+;$", data_item_description, ignore.case=TRUE)
150 |                               ~ "_divb_tot",
151 |                               ## Division C - Manufacturing
152 |                               grepl("\\(c\\).+food.*beverage", data_item_description, ignore.case=TRUE)
153 |                               ~ "_divc_fb",
154 |                               grepl("\\(c\\).+petroleum.*coal", data_item_description, ignore.case=TRUE)
155 |                               ~ "_divc_pc",
156 |                               grepl("\\(c\\).+metal.*products", data_item_description, ignore.case=TRUE)
157 |                               ~ "_divc_mt",
158 |                               grepl("\\(c\\).+machinery.*equipment", data_item_description, ignore.case=TRUE)
159 |                               ~ "_divc_mc",
160 |                               grepl("\\(c\\).+other.*manufacturing", data_item_description,
161 |                                     ignore.case=TRUE)
162 |                               ~ "_divc_ot",
163 |                               grepl("\\(c\\).+;$", data_item_description, ignore.case=TRUE)
164 |                               ~ "_divc_tot",
165 |                               ## Division D - Utilities
166 |                               grepl("\\(d\\).+electricity", data_item_description, ignore.case=TRUE)
167 |                               ~ "_divd_el",
168 |                               grepl("\\(d\\).+gas", data_item_description, ignore.case=TRUE)
169 |                               ~ "_divd_gs",
170 |                               grepl("\\(d\\).+water.*supply", data_item_description, ignore.case=TRUE)
171 |                               ~ "_divd_wt",
172 |                               grepl("\\(d\\).+;$", data_item_description, ignore.case=TRUE)
173 |                               ~ "_divd_tot",
174 |                               ## Division E - Construction
175 |                               grepl("\\(e\\).+building.*construction", data_item_description, ignore.case=TRUE)
176 |                               ~ "_dive_bc",
177 |                               grepl("\\(e\\).+civil.*engineering", data_item_description, ignore.case=TRUE)
178 |                               ~ "_dive_ce",
179 |                               grepl("\\(e\\).+construction.*services", data_item_description, ignore.case=TRUE)
180 |                               ~ "_dive_cs",
181 |                               grepl("\\(e\\).+;$", data_item_description, ignore.case=TRUE)
182 |                               ~ "_dive_tot",
183 |                               ## Division F - Wholesale trade
184 |                               grepl("\\(f\\).+;$", data_item_description, ignore.case=TRUE)
185 |                               ~ "_divf_tot",
186 |                               ## Division G - Retail trade
187 |                               grepl("\\(g\\).+;$", data_item_description, ignore.case=TRUE)
188 |                               ~ "_divg_tot",
189 |                               ## Division H - Accommodation & food services
190 |                               grepl("\\(h\\).+;$", data_item_description, ignore.case=TRUE)
191 |                               ~ "_divh_tot",
192 |                               ## Division I - Transport
193 |                               grepl("\\(i\\).+road", data_item_description, ignore.case=TRUE)
194 |                               ~ "_divi_rd",
195 |                               grepl("\\(i\\).+air.*space", data_item_description, ignore.case=TRUE)
196 |                               ~ "_divi_as",
197 |                               grepl("\\(i\\).+rail.*pipeline", data_item_description, ignore.case=TRUE)
198 |                               ~ "_divi_rl",
199 |                               grepl("\\(i\\).+postal.*storage", data_item_description, ignore.case=TRUE)
200 |                               ~ "_divi_ps",
201 |                               grepl("\\(i\\).+;$", data_item_description, ignore.case=TRUE)
202 |                               ~ "_divi_tot",
203 |                               ## Division J - Telecommunications
204 |                               grepl("\\(j\\).+telecommunications", data_item_description, ignore.case=TRUE)
205 |                               ~ "_divj_tl",
206 |                               grepl("\\(j\\).+other.*information", data_item_description, ignore.case=TRUE)
207 |                               ~ "_divj_ot",
208 |                               grepl("\\(j\\).+;$", data_item_description, ignore.case=TRUE)
209 |                               ~ "_divj_tot",
210 |                               ## Division K - Finance & insurance
211 |                               grepl("\\(k\\).+finance", data_item_description, ignore.case=TRUE)
212 |                               ~ "_divk_fn",
213 |                               grepl("\\(k\\).+other.*financial", data_item_description, ignore.case=TRUE)
214 |                               ~ "_divk_ot",
215 |                               grepl("\\(k\\).+;$", data_item_description, ignore.case=TRUE)
216 |                               ~ "_divk_tot",
217 |                               ## Division L - Rental, hiring & real estate
218 |                               grepl("\\(l\\).+rental.*hiring", data_item_description, ignore.case=TRUE)
219 |                               ~ "_divl_rh",
220 |                               grepl("\\(l\\).+real.*estate", data_item_description, ignore.case=TRUE)
221 |                               ~ "_divl_re",
222 |                               grepl("\\(l\\).+;$", data_item_description, ignore.case=TRUE)
223 |                               ~ "_divl_tot",
224 |                               ## Division M - Professional and scientific services
225 |                               grepl("\\(m\\).+computer.*system", data_item_description, ignore.case=TRUE)
226 |                               ~ "_divm_cs",
227 |                               grepl("\\(m\\).+other.*professional", data_item_description, ignore.case=TRUE)
228 |                               ~ "_divm_op",
229 |                               grepl("\\(m\\).+;$", data_item_description, ignore.case=TRUE)
230 |                               ~ "_divm_tot",
231 |                               ## Division N - Administrative & support services
232 |                               grepl("\\(n\\).+;$", data_item_description, ignore.case=TRUE)
233 |                               ~ "_divn_tot",
234 |                               ## Division O - Public administration & safety
235 |                               grepl("\\(o\\).+;$", data_item_description, ignore.case=TRUE)
236 |                               ~ "_divo_tot",
237 |                               ## Division P - Education and training
238 |                               grepl("\\(p\\).+;$", data_item_description, ignore.case=TRUE)
239 |                               ~ "_divp_tot",
240 |                               ## Division Q - Health care & social assistance
241 |                               grepl("\\(q\\).+;$", data_item_description, ignore.case=TRUE)
242 |                               ~ "_divq_tot",
243 |                               ## Division R - Arts and recreation services
244 |                               grepl("\\(r\\).+;$", data_item_description, ignore.case=TRUE)
245 |                               ~ "_divr_tot",
246 |                               ## Division S - Other services
247 |                               grepl("\\(s\\).+;$", data_item_description, ignore.case=TRUE)
248 |                               ~ "_divs_tot",
249 |                               ## Ownership of dwellings
250 |                               grepl("ownership.*dwellings", data_item_description, ignore.case=TRUE)
251 |                               ~ "_dwell",
252 |                               grepl("taxes less subsidies", data_item_description, ignore.case=TRUE)
253 |                               ~ "_nettax",
254 |                               grepl("gross value added at basi prices taxes less", data_item_description,
255 |                                     ignore.case=TRUE)
256 |                               ~ "_nettax",
257 |                               ## Household Final Consumption Expenditure items
258 |                               grepl("Food", data_item_description, ignore.case=TRUE)
259 |                               ~ "_01_food",
260 |                               grepl("Alcoholic.*beverage.*cigarettes.*tobacco", data_item_description,
261 |                                     ignore.case=TRUE)
262 |                               ~ "_02_albt",
263 |                               grepl("Cigarettes.*tobacco", data_item_description, ignore.case=TRUE)
264 |                               ~ "_02a_tabc",
265 |                               grepl("Alcoholic.*beverage", data_item_description, ignore.case=TRUE)
266 |                               ~ "_02b_abev",
267 |                               grepl("Clothing.*footwear", data_item_description, ignore.case=TRUE)
268 |                               ~ "_03_clft",
269 |                               grepl("Housing.*water.*electricity.*gas", data_item_description, ignore.case=TRUE)
270 |                               ~ "_04_hhsv",
271 |                               grepl("Rent.*other.*dwelling.*services", data_item_description, ignore.case=TRUE)
272 |                               ~ "_04a_rnts",
273 |                               grepl("Actual.*imputed.*rent", data_item_description, ignore.case=TRUE)
274 |                               ~ "_04b_rent",
275 |                               grepl("Electricity.*gas.*other.*fuel", data_item_description, ignore.case=TRUE)
276 |                               ~ "_04c_util",
277 |                               grepl("Water.*sewerage.*charges", data_item_description, ignore.case=TRUE)
278 |                               ~ "_04d_watr",
279 |                               grepl("Furnishings.*household.*equip", data_item_description, ignore.case=TRUE)
280 |                               ~ "_05_furn",
281 |                               grepl("Furniture.*floor.*coverings", data_item_description, ignore.case=TRUE)
282 |                               ~ "_05a_furn",
283 |                               grepl("Household.*appliances", data_item_description, ignore.case=TRUE)
284 |                               ~ "_05b_appl",
285 |                               grepl("Household.*tools", data_item_description, ignore.case=TRUE)
286 |                               ~ "_05c_tool",
287 |                               grepl("Health", data_item_description, ignore.case=TRUE)
288 |                               ~ "_06_hlth",
289 |                               grepl("Medicines", data_item_description, ignore.case=TRUE)
290 |                               ~ "_06a_hlth",
291 |                               grepl("Total.*health.*services", data_item_description, ignore.case=TRUE)
292 |                               ~ "_06b_hlth",
293 |                               grepl("Purchase.*vehicles", data_item_description, ignore.case=TRUE)
294 |                               ~ "_07a_vcpx",
295 |                               grepl("Operation.*vehicles", data_item_description, ignore.case=TRUE)
296 |                               ~ "_07b_vopx",
297 |                               grepl("Transport.*services", data_item_description, ignore.case=TRUE)
298 |                               ~ "_07c_tran",
299 |                               grepl("Transport", data_item_description, ignore.case=TRUE)
300 |                               ~ "_07_tran",
301 |                               grepl("Communications", data_item_description, ignore.case=TRUE)
302 |                               ~ "_08_comm",
303 |                               grepl("Goods.*for.*recreation.*culture", data_item_description, ignore.case=TRUE)
304 |                               ~ "_09a_recg",
305 |                               grepl("^Recreational.*cultural.*services", data_item_description,
306 |                                     ignore.case=TRUE)
307 |                               ~ "_09b_recs",
308 |                               grepl("Sporting.*recreational.*cultural.*services", data_item_description,
309 |                                     ignore.case=TRUE)
310 |                               ~ "_09c_sprt",
311 |                               grepl("Net.*losses.*gambling", data_item_description, ignore.case=TRUE)
312 |                               ~ "_09d_gamb",
313 |                               grepl("Newspapers.*books.*stationery", data_item_description, ignore.case=TRUE)
314 |                               ~ "_09e_news",
315 |                               grepl("Recreation.*culture", data_item_description, ignore.case=TRUE)
316 |                               ~ "_09_recc",
317 |                               grepl("Education.*services", data_item_description, ignore.case=TRUE)
318 |                               ~ "_10_educ",
319 |                               grepl("Hotels.*cafes.*restaurants", data_item_description, ignore.case=TRUE)
320 |                               ~ "_11_acrs",
321 |                               grepl("Catering.*services", data_item_description, ignore.case=TRUE)
322 |                               ~ "_11a_cats",
323 |                               grepl("Accommodation.*services", data_item_description, ignore.case=TRUE)
324 |                               ~ "_11b_accs",
325 |                               grepl("Miscellaneous.*goods.*services", data_item_description, ignore.case=TRUE)
326 |                               ~ "_12_misc",
327 |                               grepl("Other.*goods", data_item_description, ignore.case=TRUE)
328 |                               ~ "_12a_othg",
329 |                               grepl("Insurance.*financial.*services", data_item_description, ignore.case=TRUE)
330 |                               ~ "_12b_fins",
331 |                               grepl("Other.*services", data_item_description, ignore.case=TRUE)
332 |                               ~ "_12c_oths",
333 |                               grepl("Net.*expenditure.*overseas", data_item_description, ignore.case=TRUE)
334 |                               ~ "_neo",
335 |                               grepl("Final.*consumption.*expenditure", data_item_description, ignore.case=TRUE)
336 |                               ~ "_totc",
337 |                               TRUE ~ "")),
338 |            ##
339 |            ## Per capita/hour worked series 
340 |            series_abb = paste0(series_abb,
341 |                                case_when(grepl("per capita", data_item_description, ignore.case=TRUE)
342 |                                          ~ "pc",
343 |                                          grepl("per hour", data_item_description, ignore.case=TRUE)
344 |                                          ~ "ph",
345 |                                          TRUE ~ "")),
346 |            ##
347 |            ## Households, government, private, public
348 |            series_abb = paste0(series_abb,
349 |                                case_when(grepl("general government", data_item_description, ignore.case=TRUE)
350 |                                          ~ "_gov",
351 |                                          TRUE ~ "")),
352 |            ## -- General government options
353 |            series_abb = paste0(series_abb,
354 |                                case_when(grepl("general government",
355 |                                                data_item_description, ignore.case=TRUE) ~ "_gov",
356 |                                          grepl("general government.+national",
357 |                                                data_item_description, ignore.case=TRUE) ~ "_nat",
358 |                                          grepl("general government.+national.+non-defence",
359 |                                                data_item_description, ignore.case=TRUE) ~ "_ndf",
360 |                                          grepl("general government.+national.+defence",
361 |                                                data_item_description, ignore.case=TRUE) ~ "_def",
362 |                                          grepl("general government.+state and local",
363 |                                                data_item_description, ignore.case=TRUE) ~ "_stl",
364 |                                          grepl("households",
365 |                                                data_item_description, ignore.case=TRUE) ~ "_hhld",
366 |                                          grepl("all sectors",
367 |                                                data_item_description, ignore.case=TRUE) ~ "_tot",
368 |                                          grepl("private",
369 |                                                data_item_description, ignore.case=TRUE) ~  "_priv",
370 |                                          grepl("public", data_item_description, ignore.case=TRUE)
371 |                                          ~  "_pub",
372 |                                          TRUE ~ "")),
373 |            ##
374 |            ## States/territories
375 |            series_abb = paste0(series_abb,
376 |                                case_when(grepl("new south wales",
377 |                                                data_item_description, ignore.case=TRUE)
378 |                                          ~ "_nsw",
379 |                                          grepl("victoria", data_item_description, ignore.case=TRUE)
380 |                                          ~ "_vic",
381 |                                          grepl("queensland", data_item_description, ignore.case=TRUE)
382 |                                          ~ "_qld",
383 |                                          grepl("south australia", data_item_description, ignore.case=TRUE)
384 |                                          ~ "_sa",
385 |                                          grepl("western australia", data_item_description, ignore.case=TRUE)
386 |                                          ~ "_wa",
387 |                                          grepl("tasmania", data_item_description, ignore.case=TRUE)
388 |                                          ~ "_tas",
389 |                                          grepl("northern territory", data_item_description, ignore.case=TRUE)
390 |                                          ~ "_nt",
391 |                                          grepl("australian capital territory", data_item_description, ignore.case=TRUE)
392 |                                          ~ "_act",
393 |                                          TRUE ~ "")),
394 |            ##
395 |            ## Chain volume/current prices
396 |            series_abb = paste0(series_abb,
397 |                                case_when(grepl("chain volume measures", data_item_description, ignore.case=TRUE) |
398 |                                          grepl("chain volume measures", table_title, ignore.case=TRUE)
399 |                                          ~ "_cv",
400 |                                          grepl("current prices", data_item_description, ignore.case=TRUE) |
401 |                                          grepl("current prices", table_title, ignore.case=TRUE)
402 |                                          ~ "_cp",
403 |                                          grepl("price indexes", data_item_description, ignore.case=TRUE) |
404 |                                          grepl("price indexes", table_title, ignore.case=TRUE)
405 |                                          ~ "_ix",
406 |                                          grepl("implicit price deflators", data_item_description, ignore.case=TRUE) |
407 |                                          grepl("implicit price deflators", table_title, ignore.case=TRUE)
408 |                                          ~ "_pd",
409 |                                          TRUE ~ "")),
410 |            ##
411 |            ## Original/seasonally adjusted/trend/index 
412 |            series_abb = paste0(series_abb,
413 |                                case_when(grepl("original", series_type, ignore.case=TRUE)
414 |                                          ~ "_or",
415 |                                          grepl("seasonally adjusted", series_type, ignore.case=TRUE)
416 |                                          ~ "_sa",
417 |                                          grepl("trend", series_type, ignore.case=TRUE)
418 |                                          ~ "_tr",
419 |                                          TRUE ~ "")),
420 |            ##
421 |            ## Percentage change/ratio/index
422 |            series_abb = paste0(series_abb,
423 |                                case_when(grepl("percent", unit, ignore.case=TRUE)
424 |                                          ~ "_pc",
425 |                                          grepl("\\$.*(million)*", unit, ignore.case=TRUE)
426 |                                          ~ "_aud",
427 |                                          grepl("index", unit, ignore.case=TRUE)
428 |                                          ~ "_ix",
429 |                                          grepl("proportion", unit, ignore.case=TRUE)
430 |                                          ~ "_rt",
431 |                                          TRUE ~ ""))
432 |            );
433 | }
434 | 
435 | ## ana_series_abb <- function(x) {
436 | ##   x %>%
437 | ##     ## Series abbreviations
438 | ##     mutate(series_abb = ifelse(grepl("^gross domestic product", data_item_description, ignore.case=TRUE),
439 | ##                                "gdp", "")) %>%
440 | ##     mutate(series_abb = ifelse(grepl("^gdp", data_item_description, ignore.case=TRUE),
441 | ##                                "gdp", series_abb)) %>%
442 | ##     mutate(series_abb = ifelse(grepl("^gross value added", data_item_description, ignore.case=TRUE),
443 | ##                                "gva", series_abb)) %>%
444 | ##     mutate(series_abb = ifelse(grepl("^net domestic product", data_item_description, ignore.case=TRUE),
445 | ##                                "ndp", series_abb)) %>%
446 | ##     mutate(series_abb = ifelse(grepl("^net domestic product", data_item_description, ignore.case=TRUE),
447 | ##                                "ndp", series_abb)) %>%
448 | ##     mutate(series_abb = ifelse(grepl("gross domestic income", data_item_description, ignore.case=TRUE),
449 | ##                                "gdi", series_abb)) %>%
450 | ##     mutate(series_abb = ifelse(grepl("gross national income", data_item_description, ignore.case=TRUE),
451 | ##                                "gni", series_abb)) %>%
452 | ##     mutate(series_abb = ifelse(grepl("net national disposable income", data_item_description, ignore.case=TRUE),
453 | ##                                "ndi", series_abb)) %>%
454 | ##     mutate(series_abb = ifelse(grepl("terms of trade", data_item_description, ignore.case=TRUE),
455 | ##                                "tot", series_abb)) %>%
456 | ##     mutate(series_abb = ifelse(grepl("gross national expenditure", data_item_description, ignore.case=TRUE),
457 | ##                                "gne", series_abb)) %>%
458 | ##     mutate(series_abb = ifelse(grepl("exports of goods and services", data_item_description, ignore.case=TRUE),
459 | ##                                "exp", series_abb)) %>%
460 | ##     mutate(series_abb = ifelse(grepl("imports of goods and services", data_item_description, ignore.case=TRUE),
461 | ##                                "imp", series_abb)) %>%
462 | ##     mutate(series_abb = ifelse(grepl("final consumption expenditure", data_item_description, ignore.case=TRUE),
463 | ##                                "fce", series_abb)) %>%
464 | ##     mutate(series_abb = ifelse(grepl("gross fixed capital formation", data_item_description, ignore.case=TRUE),
465 | ##                                "gfcf", series_abb)) %>%
466 | ##     mutate(series_abb = ifelse(grepl("state final demand", data_item_description, ignore.case=TRUE),
467 | ##                                "sfd", series_abb)) %>%
468 | ##     ##
469 | ##     ## Per capita series 
470 | ##     mutate(series_abb = ifelse(grepl("per capita", data_item_description, ignore.case=TRUE),
471 | ##                                paste0(series_abb, "pc"), series_abb)) %>%
472 | ##     ##
473 | ##     ## Households, government, private, public
474 | ##     mutate(series_abb = ifelse(grepl("general government", data_item_description, ignore.case=TRUE),
475 | ##                                paste0(series_abb, "_gov"), series_abb)) %>%
476 | ##     ## -- General government options
477 | ##     mutate(series_abb = ifelse(grepl("general government.+national",
478 | ##                                      data_item_description, ignore.case=TRUE),
479 | ##                                paste0(series_abb, "_nat"), series_abb)) %>%
480 | ##     mutate(series_abb = ifelse(grepl("general government.+national.+defence",
481 | ##                                      data_item_description, ignore.case=TRUE),
482 | ##                                paste0(series_abb, "_def"), series_abb)) %>%
483 | ##     mutate(series_abb = ifelse(grepl("general government.+national.+non-defence",
484 | ##                                      data_item_description, ignore.case=TRUE),
485 | ##                                sub("_def", "_ndf", series_abb), series_abb)) %>%
486 | ##     mutate(series_abb = ifelse(grepl("general government.+state and local",
487 | ##                                      data_item_description, ignore.case=TRUE),
488 | ##                                paste0(series_abb, "_stl"), series_abb)) %>%
489 | ##     mutate(series_abb = ifelse(grepl("households", data_item_description, ignore.case=TRUE),
490 | ##                                paste0(series_abb, "_hhld"), series_abb)) %>%
491 | ##     mutate(series_abb = ifelse(grepl("all sectors", data_item_description, ignore.case=TRUE),
492 | ##                                paste0(series_abb, "_tot"), series_abb)) %>%
493 | ##     mutate(series_abb = ifelse(grepl("private", data_item_description, ignore.case=TRUE),
494 | ##                                paste0(series_abb, "_priv"), series_abb)) %>%
495 | ##     mutate(series_abb = ifelse(grepl("public", data_item_description, ignore.case=TRUE),
496 | ##                                paste0(series_abb, "_pub"), series_abb)) %>%
497 | ##     ##
498 | ##     ## States/territories
499 | ##     mutate(series_abb = ifelse(grepl("new south wales", data_item_description, ignore.case=TRUE),
500 | ##                                paste0(series_abb, "_nsw"), series_abb)) %>%
501 | ##     mutate(series_abb = ifelse(grepl("victoria", data_item_description, ignore.case=TRUE),
502 | ##                                paste0(series_abb, "_vic"), series_abb)) %>%
503 | ##     mutate(series_abb = ifelse(grepl("queensland", data_item_description, ignore.case=TRUE),
504 | ##                                paste0(series_abb, "_qld"), series_abb)) %>%
505 | ##     mutate(series_abb = ifelse(grepl("south australia", data_item_description, ignore.case=TRUE),
506 | ##                                paste0(series_abb, "_sa"), series_abb)) %>%
507 | ##     mutate(series_abb = ifelse(grepl("western australia", data_item_description, ignore.case=TRUE),
508 | ##                                paste0(series_abb, "_wa"), series_abb)) %>%
509 | ##     mutate(series_abb = ifelse(grepl("tasmania", data_item_description, ignore.case=TRUE),
510 | ##                                paste0(series_abb, "_tas"), series_abb)) %>%
511 | ##     mutate(series_abb = ifelse(grepl("northern territory", data_item_description, ignore.case=TRUE),
512 | ##                                paste0(series_abb, "_nt"), series_abb)) %>%
513 | ##     mutate(series_abb = ifelse(grepl("australian capital territory", data_item_description, ignore.case=TRUE),
514 | ##                                paste0(series_abb, "_act"), series_abb)) %>%
515 | ##     ##
516 | ##     ## Chain volume/current prices
517 | ##     mutate(series_abb = ifelse(grepl("chain volume measures", data_item_description, ignore.case=TRUE) |
518 | ##                                grepl("chain volume measures", table_title, ignore.case=TRUE),
519 | ##                                paste0(series_abb, "_cv"), series_abb)) %>%
520 | ##     mutate(series_abb = ifelse(grepl("current prices", data_item_description, ignore.case=TRUE) |
521 | ##                                grepl("current prices", table_title, ignore.case=TRUE),
522 | ##                                paste0(series_abb, "_cp"), series_abb)) %>%
523 | ##     ##
524 | ##     ## Original/seasonally adjusted/trend/index
525 | ##     mutate(series_abb = ifelse(grepl("original", series_type, ignore.case=TRUE),
526 | ##                                paste0(series_abb, "_or"), series_abb)) %>%
527 | ##     mutate(series_abb = ifelse(grepl("seasonally adjusted", series_type, ignore.case=TRUE),
528 | ##                                paste0(series_abb, "_sa"), series_abb)) %>%
529 | ##     mutate(series_abb = ifelse(grepl("trend", series_type, ignore.case=TRUE),
530 | ##                                paste0(series_abb, "_tr"), series_abb)) %>%
531 | ##     ##
532 | ##     ## Percentage change/ratio/index
533 | ##     mutate(series_abb = ifelse(grepl("percent", unit, ignore.case=TRUE),
534 | ##                                paste0(series_abb, "_pc"), series_abb)) %>%
535 | ##     mutate(series_abb = ifelse(grepl("\\$.*million", unit, ignore.case=TRUE),
536 | ##                                paste0(series_abb, "_aud"), series_abb)) %>%
537 | ##     mutate(series_abb = ifelse(grepl("index", unit, ignore.case=TRUE),
538 | ##                                paste0(series_abb, "_ix"), series_abb)) %>%
539 | ##     mutate(series_abb = ifelse(grepl("proportion", unit, ignore.case=TRUE),
540 | ##                                paste0(series_abb, "_rt"), series_abb))
541 | ## }
542 | 
543 | 
544 | 
545 | 
546 | ## ppi_series_abb <- function(x) {
547 | ##   x %>%
548 | ##     ## Publication abbreviations
549 | ##     mutate(series_abb = ifelse(grepl("^producer price indexes", publication_title, ignore.case=TRUE),
550 | ##                                "ppi", "")) %>%
551 | ##     ## Sector abbreviations
552 | ##     mutate(series_abb = ifelse(grepl("transport.+warehousing", table_title, ignore.case=TRUE),
553 | ##                                paste0(series_abb, "_tr"), series_abb)) %>%
554 | ##     ## Series abbreviations
555 | ##     mutate(series_abb =
556 | ##              paste0(series_abb,
557 | ##                     case_when(grepl("road freight", data_item_description, ignore.case=TRUE) ~ "_rdfrt",
558 | ##                               grepl("urban bus", data_item_description, ignore.case=TRUE) ~ "_ubus",
559 | ##                               grepl("taxi", data_item_description, ignore.case=TRUE) ~ "_taxi",
560 | ##                               grepl("rail freight", data_item_description, ignore.case=TRUE) ~ "_rlfrt",
561 | ##                               grepl("water freight", data_item_description, ignore.case=TRUE) ~ "_wtfrt",
562 | ##                               grepl("pipeline", data_item_description, ignore.case=TRUE) ~ "_pipe",
563 | ##                               grepl("postal and courier", data_item_description, ignore.case=TRUE) ~ "_pstl",
564 | ##                               grepl("courier pick-up", data_item_description, ignore.case=TRUE) ~ "_cour",
565 | ##                               grepl("water transport support", data_item_description, ignore.case=TRUE) ~ "_wtspt",
566 | ##                               grepl("stevedoring", data_item_description, ignore.case=TRUE) ~ "_wtstv",
567 | ##                               grepl("port and water transport", data_item_description, ignore.case=TRUE) ~ "_wtprt",
568 | ##                               grepl("other water", data_item_description, ignore.case=TRUE) ~ "_wtoth",
569 | ##                               grepl("airport operations", data_item_description, ignore.case=TRUE) ~ "_arprt",
570 | ##                               grepl("customs agency", data_item_description, ignore.case=TRUE) ~ "_svcust",
571 | ##                               grepl("warehousing and storage", data_item_description, ignore.case=TRUE) ~ "_whgen",
572 | ##                               grepl("grain storage", data_item_description, ignore.case=TRUE) ~ "_whgrn",
573 | ##                               grepl("other warehousing", data_item_description, ignore.case=TRUE) ~ "_whoth",
574 | ##                               TRUE ~ ""))
575 | ##            ) %>%
576 | ##     ## Percentage change/ratio/index
577 | ##     mutate(series_abb =
578 | ##              paste0(series_abb,
579 | ##                     case_when(grepl("percent", unit, ignore.case=TRUE) ~ "_pc",
580 | ##                               grepl("\\$.*million", unit, ignore.case=TRUE) ~ "_aud",
581 | ##                               grepl("index", unit, ignore.case=TRUE) ~ "_ix",
582 | ##                               grepl("proportion", unit, ignore.case=TRUE) ~ "_rt",
583 | ##                               TRUE ~ ""))
584 | ##            );
585 | ## }
586 | 
587 | 
588 | ## =============================== EOF =====================================
589 | 


--------------------------------------------------------------------------------