├── tests ├── testthat.R └── testthat │ ├── test-url-availability.R │ ├── test-rba-functions.R │ ├── test-abs-cat-functions.R │ └── test-abs-api-functions.R ├── data ├── abs_cachelist.rda ├── rba_cachelist.rda ├── abs_cat_cachelist.rda └── aus_state_codes.rda ├── .gitignore ├── R ├── z-release-questions.R ├── zzz.R ├── raustats.R ├── z-debugging-code.R ├── httr-utilities.R ├── data.R ├── date-utilities.R ├── z-unused-functions.R ├── rba-functions.R ├── abs-api-functions.R ├── abs-cat-functions.R └── abs-series-abbreviation.R ├── data-raw ├── Australian-States-Territories.csv ├── build-data.R └── ABS-TSS-Catalogue-Numbers.csv ├── .Rbuildignore ├── man ├── excel2Date.Rd ├── last_day.Rd ├── raustats_ua.Rd ├── rba_table_cache.Rd ├── rba_urls.Rd ├── abs_call_api.Rd ├── fin_year.Rd ├── abs_cachelist.Rd ├── aus_state_codes.Rd ├── abs_api_urls.Rd ├── abs_filetypes.Rd ├── abs_api_call.Rd ├── raustats.Rd ├── abs_local_filename.Rd ├── abs_urls.Rd ├── rba_read_tss.Rd ├── abs_cat_cachelist.Rd ├── abs_datasets.Rd ├── quarter2Date.Rd ├── abs_cat_unzip.Rd ├── raustats_check_url_available.Rd ├── rba_cachelist.Rd ├── abs_cat_download.Rd ├── abs_metadata.Rd ├── rba_file_download.Rd ├── abs_dimensions.Rd ├── abs_cat_releases.Rd ├── rba_stats.Rd ├── rba_search.Rd ├── abs_read_tss.Rd ├── abs_cat_stats.Rd ├── abs_search.Rd ├── abs_cat_tables.Rd └── abs_stats.Rd ├── DESCRIPTION ├── NAMESPACE ├── cran-comments.md ├── NEWS.md ├── TODO ├── README.md └── README.Rmd /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(raustats) 3 | 4 | test_check("raustats") 5 | -------------------------------------------------------------------------------- /data/abs_cachelist.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mitcda/raustats/HEAD/data/abs_cachelist.rda -------------------------------------------------------------------------------- /data/rba_cachelist.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mitcda/raustats/HEAD/data/rba_cachelist.rda -------------------------------------------------------------------------------- /data/abs_cat_cachelist.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mitcda/raustats/HEAD/data/abs_cat_cachelist.rda -------------------------------------------------------------------------------- /data/aus_state_codes.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mitcda/raustats/HEAD/data/aus_state_codes.rda -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | Meta 2 | .Rproj.user 3 | .Rhistory 4 | .RData 5 | *~ 6 | R-example-code/* 7 | *discards* 8 | /README.html 9 | doc 10 | raustats_presentation.* 11 | -------------------------------------------------------------------------------- /R/z-release-questions.R: -------------------------------------------------------------------------------- 1 | ## Package checking and other miscellaneous functions 2 | 3 | release_questions <- function() { 4 | c( 5 | "Have you updated static package data (data-raw/build-data.R)?" 6 | ) 7 | } 8 | 9 | -------------------------------------------------------------------------------- /data-raw/Australian-States-Territories.csv: -------------------------------------------------------------------------------- 1 | state_code,state_abb,state_name 2 | 0,Aus,Australia 3 | 1,NSW,New South Wales 4 | 2,Vic,Victoria 5 | 3,Qld,Queensland 6 | 4,SA,South Australia 7 | 5,WA,Western Australia 8 | 6,Tas,Tasmania 9 | 7,NT,Northern Territory 10 | 8,ACT,Australian Capital Territory 11 | 9,OT,Other Territories 12 | 13 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | .onAttach <- function(libname, pkgname) { 2 | packageStartupMessage("Attaching package: 'raustats'"); 3 | } 4 | 5 | ## .onLoad <- function(libname, pkgname) 6 | ## { 7 | ## } 8 | 9 | ## .onDetach <- function(libname, pkgname) 10 | ## { 11 | ## } 12 | 13 | ## .Last <- function() 14 | ## { 15 | ## } 16 | 17 | ## .First <- function() 18 | ## { 19 | ## } 20 | 21 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | # Rbuildignore 2 | # Ignore git repository 3 | ^.git/$ 4 | ^.gitignore$ 5 | ^.Rhistory$ 6 | ^Meta$ 7 | ^doc$ 8 | data-raw 9 | ^EXTRA-CODE$ 10 | ^README$ 11 | ^README.Rmd$ 12 | ^README.html$ 13 | ^CHANGES$ 14 | ^TODO$ 15 | ^R-example-code/$ 16 | abs-series-abbreviation.R 17 | z-debugging-code.R 18 | z-release-questions.R 19 | z-unused-functions.R 20 | raustats_presentation.Rmd 21 | raustats_presentation.pptx 22 | cran-comments.md 23 | -------------------------------------------------------------------------------- /man/excel2Date.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/date-utilities.R 3 | \name{excel2Date} 4 | \alias{excel2Date} 5 | \title{Convert Excel numeric date to R Date object} 6 | \usage{ 7 | excel2Date(x) 8 | } 9 | \arguments{ 10 | \item{x}{Excel-based date numeric object} 11 | } 12 | \value{ 13 | Date object 14 | } 15 | \description{ 16 | Function to convert Excel numeric date to R Date object 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/last_day.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/date-utilities.R 3 | \name{last_day} 4 | \alias{last_day} 5 | \title{Set Date object to the last day of the month} 6 | \usage{ 7 | last_day(date) 8 | } 9 | \arguments{ 10 | \item{date}{date object} 11 | } 12 | \value{ 13 | Date object 14 | } 15 | \description{ 16 | Function to change the date of a Date object to the last day of the month 17 | } 18 | \author{ 19 | David Mitchell 20 | } 21 | \keyword{internal} 22 | -------------------------------------------------------------------------------- /R/raustats.R: -------------------------------------------------------------------------------- 1 | #' raustats: An R package for accessing data and statistics from the ABS and RBA websites 2 | #' 3 | #' The raustats package provides structured access to all data and statistics 4 | #' available from the Australian Bureau of Statistics and Reserve Bank of Australia 5 | #' website, as well as draft access to the ABS.Stat - Beta data catalogue API. 6 | #' 7 | #' To learn more about the raustats package, start with the vignettes: 8 | #' \code{browseVignettes(package = "raustats")} 9 | #' 10 | #' @docType package 11 | #' @name raustats 12 | NULL 13 | -------------------------------------------------------------------------------- /man/raustats_ua.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/httr-utilities.R 3 | \name{raustats_ua} 4 | \alias{raustats_ua} 5 | \title{raustats package user agent} 6 | \usage{ 7 | raustats_ua() 8 | } 9 | \value{ 10 | a list with a base url and a url section for formatting the JSON API calls 11 | } 12 | \description{ 13 | This function specifies the package user agent, and is used inside 14 | GET/POST function calls 15 | } 16 | \author{ 17 | David Mitchell 18 | } 19 | \keyword{internal} 20 | -------------------------------------------------------------------------------- /man/rba_table_cache.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/rba-functions.R 3 | \name{rba_table_cache} 4 | \alias{rba_table_cache} 5 | \title{Return list of RBA tables} 6 | \usage{ 7 | rba_table_cache() 8 | } 9 | \value{ 10 | data frame in long format 11 | } 12 | \description{ 13 | Function to return an updated list of data tables available from the RBA website. 14 | } 15 | \examples{ 16 | \donttest{ 17 | rba_cachelist <- rba_table_cache(); 18 | } 19 | } 20 | \author{ 21 | David Mitchell 22 | } 23 | -------------------------------------------------------------------------------- /man/rba_urls.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/rba-functions.R 3 | \name{rba_urls} 4 | \alias{rba_urls} 5 | \title{RBA base URL and data paths} 6 | \usage{ 7 | rba_urls() 8 | } 9 | \value{ 10 | list of RBA base URL and data paths 11 | } 12 | \description{ 13 | This function returns a list of URLs and data paths used to construct RBA data 14 | access calls. It is used in other functions in this package and need not be called directly. 15 | } 16 | \author{ 17 | David Mitchell 18 | } 19 | \keyword{internal} 20 | -------------------------------------------------------------------------------- /man/abs_call_api.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/abs-api-functions.R 3 | \name{abs_call_api} 4 | \alias{abs_call_api} 5 | \title{Submit API call to ABS.Stat} 6 | \usage{ 7 | abs_call_api(url) 8 | } 9 | \arguments{ 10 | \item{url}{Character vector specifying one or more ABS collections or catalogue numbers to 11 | download.} 12 | } 13 | \value{ 14 | data frame in long format 15 | } 16 | \description{ 17 | This function submits the specified API call to ABS.Stat 18 | } 19 | \author{ 20 | David Mitchell 21 | } 22 | \keyword{internal} 23 | -------------------------------------------------------------------------------- /man/fin_year.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/date-utilities.R 3 | \name{fin_year} 4 | \alias{fin_year} 5 | \title{Create financial year date object} 6 | \usage{ 7 | fin_year(date, ending = "Jun") 8 | } 9 | \arguments{ 10 | \item{date}{date object} 11 | 12 | \item{ending}{character string abbreviation or number denoting ending month of the financial year} 13 | } 14 | \value{ 15 | Date object 16 | } 17 | \description{ 18 | Function to create a financial year date object 19 | } 20 | \author{ 21 | David Mitchell 22 | } 23 | \keyword{internal} 24 | -------------------------------------------------------------------------------- /man/abs_cachelist.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{abs_cachelist} 5 | \alias{abs_cachelist} 6 | \title{Datasets available through the ABS API} 7 | \format{A data frame containing three columns: 8 | \itemize{ 9 | \item \code{id} ABS dataset identifier. 10 | \item \code{agencyID} Source agency identifier (ABS). 11 | \item \code{name} ABS dataset name. 12 | }} 13 | \usage{ 14 | abs_cachelist 15 | } 16 | \description{ 17 | This data set provides a list of all datasets, and the associated metadata, 18 | available through the ABS API. 19 | } 20 | \keyword{datasets} 21 | -------------------------------------------------------------------------------- /man/aus_state_codes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{aus_state_codes} 5 | \alias{aus_state_codes} 6 | \title{Table of Australian state and territory codes} 7 | \format{A data frame containing three columns: 8 | \itemize{ 9 | \item \code{state_code} One-digit state code. 10 | \item \code{state_abb} State/territory abbreviation. 11 | \item \code{state_name} State/territory name. 12 | }} 13 | \usage{ 14 | aus_state_codes 15 | } 16 | \description{ 17 | A list of Australian state and territory codes (including code 0 -- Australia) 18 | } 19 | \keyword{datasets} 20 | -------------------------------------------------------------------------------- /man/abs_api_urls.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/abs-api-functions.R 3 | \name{abs_api_urls} 4 | \alias{abs_api_urls} 5 | \title{ABS URL addresses and paths used in ABS.Stat API calls} 6 | \usage{ 7 | abs_api_urls() 8 | } 9 | \value{ 10 | a list with a base url and a url section for formatting the JSON API calls 11 | } 12 | \description{ 13 | This function returns a list of URLs and data paths used to construction ABS.Stat 14 | API call. It is used in other functions in this package and need not be called directly. 15 | } 16 | \author{ 17 | David Mitchell 18 | } 19 | \keyword{internal} 20 | -------------------------------------------------------------------------------- /data-raw/build-data.R: -------------------------------------------------------------------------------- 1 | ## Build datasets 2 | devtools::load_all("."); 3 | rba_cachelist <- rba_table_cache(); 4 | abs_cat_cachelist <- read.csv(here::here("data-raw", "ABS-TSS-Catalogue-Numbers.csv")); 5 | abs_cachelist <- abs_datasets(); 6 | aus_state_codes <- read.csv(here::here("data-raw", "Australian-States-Territories.csv")); 7 | 8 | ## Write data sets files 9 | usethis::use_data(rba_cachelist, overwrite=TRUE); 10 | usethis::use_data(abs_cat_cachelist, overwrite=TRUE); 11 | usethis::use_data(abs_cachelist, overwrite=TRUE); 12 | usethis::use_data(aus_state_codes, overwrite=TRUE); 13 | 14 | ## ---------------------------------- EOF ------------------------------------- 15 | -------------------------------------------------------------------------------- /man/abs_filetypes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/abs-cat-functions.R 3 | \name{abs_filetypes} 4 | \alias{abs_filetypes} 5 | \title{Valid ABS file types} 6 | \usage{ 7 | abs_filetypes() 8 | } 9 | \value{ 10 | a vector containing a list of valid ABS file types. 11 | } 12 | \description{ 13 | This function returns a vector of valid ABS file types for using list of URLs and data paths used to construct ABS Catalogue 14 | data access calls. It is used in other functions in this package and need not be called 15 | directly. 16 | } 17 | \author{ 18 | David Mitchell 19 | } 20 | \keyword{internal} 21 | -------------------------------------------------------------------------------- /man/abs_api_call.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/abs-api-functions.R 3 | \name{abs_api_call} 4 | \alias{abs_api_call} 5 | \title{Create ABS.Stat API URL call} 6 | \usage{ 7 | abs_api_call(path, args) 8 | } 9 | \arguments{ 10 | \item{path}{Character vector specifying one or more ABS collections or catalogue numbers to 11 | download.} 12 | 13 | \item{args}{Named list of arguments to supply to call.} 14 | } 15 | \value{ 16 | data frame in long format 17 | } 18 | \description{ 19 | The function created the ABS.Stat API call URL 20 | } 21 | \author{ 22 | David Mitchell 23 | } 24 | \keyword{internal} 25 | -------------------------------------------------------------------------------- /man/raustats.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/raustats.R 3 | \docType{package} 4 | \name{raustats} 5 | \alias{raustats} 6 | \title{raustats: An R package for accessing data and statistics from the ABS and RBA websites} 7 | \description{ 8 | The raustats package provides structured access to all data and statistics 9 | available from the Australian Bureau of Statistics and Reserve Bank of Australia 10 | website, as well as draft access to the ABS.Stat - Beta data catalogue API. 11 | } 12 | \details{ 13 | To learn more about the raustats package, start with the vignettes: 14 | \code{browseVignettes(package = "raustats")} 15 | } 16 | -------------------------------------------------------------------------------- /man/abs_local_filename.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/abs-cat-functions.R 3 | \name{abs_local_filename} 4 | \alias{abs_local_filename} 5 | \title{Create local file names for storing downloaded ABS data files} 6 | \usage{ 7 | abs_local_filename(url) 8 | } 9 | \arguments{ 10 | \item{url}{Character vector specifying one or more ABS data URLs.} 11 | } 12 | \value{ 13 | Returns a local file names (character vector) in which downloaded files will be saved. 14 | } 15 | \description{ 16 | Function to create local filename from web-based file name. 17 | } 18 | \author{ 19 | David Mitchell 20 | } 21 | \keyword{internal} 22 | -------------------------------------------------------------------------------- /man/abs_urls.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/abs-cat-functions.R 3 | \name{abs_urls} 4 | \alias{abs_urls} 5 | \title{ABS URL addresses and paths used in accessing ABS Catalogue data calls} 6 | \usage{ 7 | abs_urls() 8 | } 9 | \value{ 10 | a list with a base url and a url section for formatting ABS Catalogue statistics calls 11 | } 12 | \description{ 13 | This function returns a list of URLs and data paths used to construct ABS Catalogue 14 | data access calls. It is used in other functions in this package and need not be called 15 | directly. 16 | } 17 | \author{ 18 | David Mitchell 19 | } 20 | \keyword{internal} 21 | -------------------------------------------------------------------------------- /man/rba_read_tss.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/rba-functions.R 3 | \name{rba_read_tss} 4 | \alias{rba_read_tss} 5 | \title{Read RBA statistical time series spreadsheet} 6 | \usage{ 7 | rba_read_tss(files) 8 | } 9 | \arguments{ 10 | \item{files}{Names of one or more ABS data file} 11 | } 12 | \value{ 13 | data frame in long format 14 | } 15 | \description{ 16 | Functions to extract data from a specified RBA time series spreadsheet. 17 | } 18 | \examples{ 19 | \donttest{ 20 | rba_urls <- rba_search(pattern = "Liabilities and Assets")$url 21 | rba_files <- sapply(rba_urls, rba_file_download) 22 | data <- rba_read_tss(rba_files); 23 | } 24 | } 25 | \author{ 26 | David Mitchell 27 | } 28 | -------------------------------------------------------------------------------- /man/abs_cat_cachelist.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{abs_cat_cachelist} 5 | \alias{abs_cat_cachelist} 6 | \title{List of ABS catalogue tables} 7 | \format{A data frame containing five columns: 8 | \itemize{ 9 | \item \code{publication_title} ABS publication title. 10 | \item \code{catalogue_no} ABS catalogue number. 11 | \item \code{abs_url} ABS URL. 12 | \item \code{last_updated} Publication last updated. 13 | \item \code{type} Publication type -- one of either 'time series', 'panel' or 'summary'.RBA URL 14 | }} 15 | \usage{ 16 | abs_cat_cachelist 17 | } 18 | \description{ 19 | This data set provides a list of the most common ABS catalogue tables. 20 | } 21 | \keyword{datasets} 22 | -------------------------------------------------------------------------------- /man/abs_datasets.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/abs-api-functions.R 3 | \name{abs_datasets} 4 | \alias{abs_datasets} 5 | \title{Download ABS.Stat datasets} 6 | \usage{ 7 | abs_datasets(lang = "en", include_notes = FALSE) 8 | } 9 | \arguments{ 10 | \item{lang}{Preferred language (default 'en' - English).} 11 | 12 | \item{include_notes}{Include ABS annotation information for each series.} 13 | } 14 | \value{ 15 | data frame in long format 16 | } 17 | \description{ 18 | This function returns a list of all datasets available from ABS.Stat. 19 | } 20 | \examples{ 21 | \donttest{ 22 | datasets <- abs_datasets() 23 | datasets <- abs_datasets(include_notes=TRUE) 24 | } 25 | } 26 | \author{ 27 | David Mitchell 28 | } 29 | -------------------------------------------------------------------------------- /man/quarter2Date.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/date-utilities.R 3 | \name{quarter2Date} 4 | \alias{quarter2Date} 5 | \title{Convert dates formatted as year-quarter to dates objects} 6 | \usage{ 7 | quarter2Date(x, base.month = "Mar", format = "\%Y-Q\%q") 8 | } 9 | \arguments{ 10 | \item{x}{Year-quarter date format} 11 | 12 | \item{base.month}{Specifies base month for first quarter. Can be a scalar: 1,2,3 or character 13 | object: Jan, Feb, Mar.} 14 | 15 | \item{format}{The input date format. Default is "\%Y-Q\%q".} 16 | } 17 | \value{ 18 | This function returns a Date format object. 19 | } 20 | \description{ 21 | Function to convert dates formatted as year-quarter to date-format objects 22 | } 23 | \author{ 24 | David Mitchell 25 | } 26 | \keyword{internal} 27 | -------------------------------------------------------------------------------- /man/abs_cat_unzip.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/abs-cat-functions.R 3 | \name{abs_cat_unzip} 4 | \alias{abs_cat_unzip} 5 | \title{Uncompress locally-stored ABS Catalogue data file archives} 6 | \usage{ 7 | abs_cat_unzip(files, exdir) 8 | } 9 | \arguments{ 10 | \item{files}{One or more local zip files.} 11 | 12 | \item{exdir}{Target directory for extracted archive files. Directory is created if it doesn't 13 | exist. If missing, creates a new subdirectory in \code{tempdir()} using the respective zip 14 | files (specified in \code{files}.} 15 | } 16 | \value{ 17 | Returns a character vector listing the names of all files extracted. 18 | } 19 | \description{ 20 | Function to uncompress locally-stored ABS Catalogue data file archives. 21 | } 22 | \author{ 23 | David Mitchell 24 | } 25 | -------------------------------------------------------------------------------- /man/raustats_check_url_available.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/httr-utilities.R 3 | \name{raustats_check_url_available} 4 | \alias{raustats_check_url_available} 5 | \title{Check specified ABS/RBA URL available} 6 | \usage{ 7 | raustats_check_url_available(url) 8 | } 9 | \arguments{ 10 | \item{url}{The base URL to check.} 11 | } 12 | \value{ 13 | \code{TRUE} if the API is available, otherwise \code{stop()} is called. 14 | } 15 | \description{ 16 | Function to ensure URL calls fail gracefully with an informative message if the 17 | resource is not available (and not give a check warning nor error). 18 | } 19 | \note{ 20 | Based on code in \code{opensensmapR} (\url{https://github.com/sensebox/opensensmapR/blob/f69cf62b2771d5b6ed605c04b7ddd618f5a272c2/R/api.R}{\code{api.R}}). 21 | } 22 | \keyword{internal} 23 | -------------------------------------------------------------------------------- /man/rba_cachelist.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{rba_cachelist} 5 | \alias{rba_cachelist} 6 | \title{Cached list of statistical tables provided by the RBA} 7 | \format{A data frame containing three columns: 8 | \itemize{ 9 | \item \code{table_code} RBA table code. 10 | \item \code{table_name} RBA table name. 11 | \item \code{table_type} One of either current statistical tables, historical data or discontinued data 12 | \item \code{url} RBA URL 13 | }} 14 | \usage{ 15 | rba_cachelist 16 | } 17 | \description{ 18 | This data is a cached result of the \code{\link{rba_table_cache}} function. By 19 | default functions \code{\link{rba_search}} and \code{\link{rba_stats}} use this data if the 20 | \code{update_cache} parameter is \code{TRUE}. 21 | } 22 | \keyword{datasets} 23 | -------------------------------------------------------------------------------- /man/abs_cat_download.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/abs-cat-functions.R 3 | \name{abs_cat_download} 4 | \alias{abs_cat_download} 5 | \title{Function to download files from the ABS website and store locally} 6 | \usage{ 7 | abs_cat_download(data_url, exdir = tempdir()) 8 | } 9 | \arguments{ 10 | \item{data_url}{Character vector specifying an ABS data URLs.} 11 | 12 | \item{exdir}{Target directory for downloaded files (defaults to \code{tempdir()}). Directory is 13 | created if it doesn't exist.} 14 | } 15 | \value{ 16 | Downloads data from the ABS website and returns a character vector listing the location 17 | where files are saved. 18 | } 19 | \description{ 20 | Downloads specified ABS catalogue data files from the ABS website, using a valid ABS 21 | data table URL. 22 | } 23 | \author{ 24 | David Mitchell 25 | } 26 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: raustats 2 | Type: Package 3 | Title: Access Data and Statistics from the ABS and RBA Websites 4 | Description: Functions for downloading Australian economic statistics 5 | from the Australian Bureau of Statistics (ABS) (see ) and 6 | Reserve Bank of Australia (RBA) (see ) websites. 7 | Version: 0.15.0 8 | Date: 2019-12-20 9 | Authors@R: c( 10 | person("David", "Mitchell", email = "david.pk.mitchell@gmail.com", role = c("aut", "cre")) 11 | ) 12 | Maintainer: David Mitchell 13 | URL: https://github.com/mitcda/raustats 14 | License: GPL-3 15 | Depends: R (>= 3.3.0), readxl 16 | Imports: stats, dplyr, httr, jsonlite, rvest, tidyr, xml2, lubridate 17 | Suggests: repo, knitr, rmarkdown, testthat, ggplot2 18 | LazyData: TRUE 19 | RoxygenNote: 7.0.2 20 | VignetteBuilder: knitr 21 | NeedsCompilation: no 22 | Encoding: UTF-8 23 | -------------------------------------------------------------------------------- /man/abs_metadata.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/abs-api-functions.R 3 | \name{abs_metadata} 4 | \alias{abs_metadata} 5 | \title{Download dataset metadata from the ABS API} 6 | \usage{ 7 | abs_metadata(id, lang = "en") 8 | } 9 | \arguments{ 10 | \item{id}{ABS dataset ID.} 11 | 12 | \item{lang}{Preferred language (default 'en' - English).} 13 | } 14 | \value{ 15 | data frame in long format 16 | } 17 | \description{ 18 | This function queries and returns all metadata associated with a specified dataset 19 | from ABS.Stat. 20 | } 21 | \examples{ 22 | \donttest{ 23 | datasets <- abs_datasets(); 24 | x <- abs_metadata("CPI"); 25 | x <- abs_metadata(grep("cpi", datasets$id, ignore.case=TRUE, value=TRUE)); 26 | names(x) 27 | y <- abs_metadata(datasets$id[1]); 28 | names(y) 29 | } 30 | } 31 | \author{ 32 | David Mitchell 33 | } 34 | -------------------------------------------------------------------------------- /R/z-debugging-code.R: -------------------------------------------------------------------------------- 1 | #' # Debugging code 2 | #' 3 | #' ## Debugging `abs_stats` function 4 | #' 5 | 6 | #' ### Testing empty returns 7 | ## Regional Statistics by ASGS 2016 8 | ## - Economy and Industry 9 | ## + Number of businesses 10 | ## library(magrittr) 11 | ## abs_id <- abs_search("regional statistics") %>% 12 | ## filter(grepl("regional\\s*statistics.*asgs\\s*2016", name, ignore.case=TRUE)); 13 | ## abs_meta <- abs_metadata(abs_id$id); 14 | ## abs_fltr <- abs_search("^total\\s*number.+business\\s*entries", dataset=abs_id$id, code_only=TRUE); 15 | 16 | ## ## Debugging settings 17 | ## dataset <- "ABS_REGIONAL_ASGS2016" 18 | ## filter <- list(MEASURE="CABEE_6", 19 | ## # MEASURE="CABEE_10", 20 | ## REGIONTYPE="STE", 21 | ## ASGS_2016=1:8); 22 | ## start_date <- 2011 23 | ## end_date <- 2018 24 | ## dimensionAtObservation <- "AllDimensions" 25 | ## detail <- "Full" 26 | ## enforce_api_limits <- TRUE 27 | 28 | -------------------------------------------------------------------------------- /man/rba_file_download.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/rba-functions.R 3 | \name{rba_file_download} 4 | \alias{rba_file_download} 5 | \title{Function to download statistics files from the RBA website and store locally} 6 | \usage{ 7 | rba_file_download(data_url, exdir = tempdir(), update_cache = TRUE) 8 | } 9 | \arguments{ 10 | \item{data_url}{Character vector specifying an RBA data set URL.} 11 | 12 | \item{exdir}{Target directory for downloaded files (defaults to \code{tempdir()}). Directory is 13 | created if it doesn't exist.} 14 | 15 | \item{update_cache}{Logical expression, if FALSE (default), use the cached list of available 16 | RBA datasets, if TRUE, update the list of available datasets.} 17 | } 18 | \value{ 19 | Downloads data from the ABS website and returns a character vector listing the location 20 | where files are saved. 21 | } 22 | \description{ 23 | This function downloads one or more RBA data files at the specified by URLs and 24 | saves a local copy. 25 | } 26 | \author{ 27 | David Mitchell 28 | } 29 | -------------------------------------------------------------------------------- /man/abs_dimensions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/abs-api-functions.R 3 | \name{abs_dimensions} 4 | \alias{abs_dimensions} 5 | \title{Return available dimensions of ABS series} 6 | \usage{ 7 | abs_dimensions(dataset, update_cache = FALSE) 8 | } 9 | \arguments{ 10 | \item{dataset}{Character vector of dataset codes. These codes correspond to the 11 | \code{indicatorID} column from the indicator data frame of \code{abs_cache} or 12 | \code{abs_cachelist}, or the result of \code{abs_indicators}.} 13 | 14 | \item{update_cache}{Logical expression, if FALSE (default), use the cached list of available 15 | ABS.Stat datasets, if TRUE, update the list of available datasets.} 16 | } 17 | \value{ 18 | a data frame with available dataset dimensions. 19 | } 20 | \description{ 21 | This function returns the available dimeninsions for a specified ABS API dataset. 22 | } 23 | \examples{ 24 | \donttest{ 25 | ## CPI - Consumer Price Index 26 | x <- abs_dimensions("CPI"); 27 | str(x) 28 | ## LF - Labour Force 29 | x <- abs_dimensions("LF"); 30 | str(x) 31 | } 32 | } 33 | \author{ 34 | David Mitchell 35 | } 36 | -------------------------------------------------------------------------------- /man/abs_cat_releases.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/abs-cat-functions.R 3 | \name{abs_cat_releases} 4 | \alias{abs_cat_releases} 5 | \title{Return ABS catalogue table releases} 6 | \usage{ 7 | abs_cat_releases(cat_no, include_urls = FALSE) 8 | } 9 | \arguments{ 10 | \item{cat_no}{ABS catalogue numbers.} 11 | 12 | \item{include_urls}{Include full path URL to specified ABS catalogue releases. Default (FALSE) 13 | does not include release URLs.} 14 | } 15 | \value{ 16 | Returns a data frame listing available ABS catalogue releases. 17 | } 18 | \description{ 19 | Return list of all releases available for specified ABS catalogue number. 20 | } 21 | \examples{ 22 | \donttest{ 23 | ## List all available quarterly National Accounts tables 24 | ana_releases <- abs_cat_releases("5206.0"); 25 | ana_release_urls <- abs_cat_releases("5206.0", include_urls=TRUE); 26 | 27 | ## List latest available CPI Time Series Spreadsheet tables only 28 | cpi_releases <- abs_cat_releases("6401.0"); 29 | cpi_release_urls <- abs_cat_releases("6401.0", include_urls=TRUE); 30 | } 31 | } 32 | \author{ 33 | David Mitchell 34 | } 35 | -------------------------------------------------------------------------------- /man/rba_stats.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/rba-functions.R 3 | \name{rba_stats} 4 | \alias{rba_stats} 5 | \title{Return data for a specified RBA time series} 6 | \usage{ 7 | rba_stats(table_no, pattern, url, update_cache = FALSE, ...) 8 | } 9 | \arguments{ 10 | \item{table_no}{Character vector specifying one or more RBA table numbers to download.} 11 | 12 | \item{pattern}{Character string or regular expression to be matched.} 13 | 14 | \item{url}{Valid URL for RBA dataset (Excel format only).} 15 | 16 | \item{update_cache}{Logical expression, if FALSE (default), use the cached list of available 17 | RBA datasets, if TRUE, update the list of available datasets.} 18 | 19 | \item{...}{Other arguments to \code{\link{rba_search}}, e.g. \code{series_type = "statistical_tables"}.} 20 | } 21 | \value{ 22 | data frame in long format 23 | } 24 | \description{ 25 | Function to download and return specified RBA time series data. 26 | } 27 | \examples{ 28 | \donttest{ 29 | ## Example - Selecting by table_no 30 | x <- rba_stats("A1"); 31 | 32 | ## Example - Selecting by pattern 33 | x <- rba_stats(pattern="Liabilities and Assets"); 34 | } 35 | } 36 | \author{ 37 | David Mitchell 38 | } 39 | -------------------------------------------------------------------------------- /man/rba_search.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/rba-functions.R 3 | \name{rba_search} 4 | \alias{rba_search} 5 | \title{Return list of data tables from RBA website} 6 | \usage{ 7 | rba_search( 8 | pattern, 9 | fields = c("table_no", "table_name"), 10 | series_type = "statistical tables", 11 | ignore.case = TRUE, 12 | update_cache = FALSE 13 | ) 14 | } 15 | \arguments{ 16 | \item{pattern}{Character string or regular expression to be matched} 17 | 18 | \item{fields}{Character vector of column names through which to search. By default, the function 19 | searches 'table_no' and 'table_name'.} 20 | 21 | \item{series_type}{Character vector specifying one or more one of 'statistical tables', 'historical data' or 22 | 'discontinued data'. By default, \code{series_type = 'statistical tables'}.} 23 | 24 | \item{ignore.case}{Case senstive pattern match or not.} 25 | 26 | \item{update_cache}{Logical expression, if FALSE (default), use the cached list of available 27 | RBA tables (\code{rba_cachelist}), if TRUE, update the list of available datasets.} 28 | } 29 | \value{ 30 | data frame in long format 31 | } 32 | \description{ 33 | Function to return a list of all RBA data tables. 34 | } 35 | \examples{ 36 | rba_datasets <- rba_search(pattern = "Liabilities and Assets"); 37 | } 38 | \author{ 39 | David Mitchell 40 | } 41 | -------------------------------------------------------------------------------- /man/abs_read_tss.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/abs-cat-functions.R 3 | \name{abs_read_tss} 4 | \alias{abs_read_tss} 5 | \alias{abs_read_tss_} 6 | \title{Extract data from an ABS time series data file} 7 | \usage{ 8 | abs_read_tss(files, type = "tss", na.rm = TRUE) 9 | 10 | abs_read_tss_(file, type = "tss", na.rm = na.rm) 11 | } 12 | \arguments{ 13 | \item{files}{Names of one or more ABS data files} 14 | 15 | \item{type}{One of either 'tss' -- ABS Time Series Spreadsheet (the Default) or 'css' -- Data 16 | Cube.R} 17 | 18 | \item{na.rm}{logical. If \code{TRUE} (default), remove observations containing missing values.} 19 | } 20 | \value{ 21 | data frame in long format 22 | } 23 | \description{ 24 | This function extracts time series data from ABS data files. 25 | 26 | This is the internal function that extracts time series data from ABS data files. 27 | } 28 | \examples{ 29 | \donttest{ 30 | ## Read specified ABS Excel time series files 31 | tables <- abs_cat_tables("5206.0", releases="Latest", include_urls=TRUE); 32 | downloaded_tables <- abs_cat_download(tables$path_zip, exdir=tempdir()) 33 | extracted_files <- abs_cat_unzip(downloaded_tables) 34 | x <- abs_read_tss(extracted_files); 35 | } 36 | } 37 | \author{ 38 | David Mitchell 39 | 40 | David Mitchell 41 | } 42 | \keyword{internal} 43 | -------------------------------------------------------------------------------- /R/httr-utilities.R: -------------------------------------------------------------------------------- 1 | ## httr settings 2 | #' @name raustats_ua 3 | #' @title raustats package user agent 4 | #' @description This function specifies the package user agent, and is used inside 5 | #' GET/POST function calls 6 | #' @importFrom httr user_agent 7 | #' @return a list with a base url and a url section for formatting the JSON API calls 8 | #' @author David Mitchell 9 | #' @keywords internal 10 | raustats_ua <- function() 11 | user_agent("http://github.com/mitcda/raustats") 12 | 13 | 14 | ## Check if the specified ABS/RBA URL is available 15 | #' @name raustats_check_url_available 16 | #' @title Check specified ABS/RBA URL available 17 | #' @description Function to ensure URL calls fail gracefully with an informative message if the 18 | #' resource is not available (and not give a check warning nor error). 19 | #' @importFrom httr GET status_code 20 | #' @param url The base URL to check. 21 | #' @return \code{TRUE} if the API is available, otherwise \code{stop()} is called. 22 | #' @note Based on code in \code{opensensmapR} (\url{https://github.com/sensebox/opensensmapR/blob/f69cf62b2771d5b6ed605c04b7ddd618f5a272c2/R/api.R}{\code{api.R}}). 23 | #' @keywords internal 24 | raustats_check_url_available <- function(url) { 25 | code = FALSE 26 | try({ code = status_code(GET(url, raustats_ua())) }, silent = TRUE) 27 | 28 | if (code == 200) 29 | return(NULL) 30 | 31 | if (code != FALSE) { 32 | errtext = sprintf("The API at %s is currently not available. (HTTP code %s)", url, code) 33 | stop(paste(errtext, collapse='\n'), call. = FALSE) 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(abs_cat_download) 4 | export(abs_cat_releases) 5 | export(abs_cat_stats) 6 | export(abs_cat_tables) 7 | export(abs_cat_unzip) 8 | export(abs_datasets) 9 | export(abs_dimensions) 10 | export(abs_metadata) 11 | export(abs_read_tss) 12 | export(abs_search) 13 | export(abs_stats) 14 | export(rba_file_download) 15 | export(rba_read_tss) 16 | export(rba_search) 17 | export(rba_stats) 18 | export(rba_table_cache) 19 | importFrom(dplyr,bind_rows) 20 | importFrom(dplyr,case_when) 21 | importFrom(dplyr,left_join) 22 | importFrom(httr,GET) 23 | importFrom(httr,content) 24 | importFrom(httr,http_error) 25 | importFrom(httr,http_status) 26 | importFrom(httr,http_type) 27 | importFrom(httr,progress) 28 | importFrom(httr,status_code) 29 | importFrom(httr,user_agent) 30 | importFrom(httr,write_disk) 31 | importFrom(jsonlite,fromJSON) 32 | importFrom(lubridate,ceiling_date) 33 | importFrom(lubridate,days) 34 | importFrom(readxl,excel_sheets) 35 | importFrom(readxl,read_excel) 36 | importFrom(rvest,follow_link) 37 | importFrom(rvest,html_attr) 38 | importFrom(rvest,html_nodes) 39 | importFrom(rvest,html_session) 40 | importFrom(rvest,html_table) 41 | importFrom(rvest,html_text) 42 | importFrom(rvest,jump_to) 43 | importFrom(stats,complete.cases) 44 | importFrom(stats,setNames) 45 | importFrom(tidyr,gather) 46 | importFrom(utils,unzip) 47 | importFrom(utils,zip) 48 | importFrom(xml2,as_list) 49 | importFrom(xml2,read_html) 50 | importFrom(xml2,read_xml) 51 | importFrom(xml2,xml_attr) 52 | importFrom(xml2,xml_attrs) 53 | importFrom(xml2,xml_child) 54 | importFrom(xml2,xml_children) 55 | importFrom(xml2,xml_find_all) 56 | importFrom(xml2,xml_length) 57 | importFrom(xml2,xml_name) 58 | importFrom(xml2,xml_ns_strip) 59 | importFrom(xml2,xml_parent) 60 | importFrom(xml2,xml_text) 61 | -------------------------------------------------------------------------------- /man/abs_cat_stats.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/abs-cat-functions.R 3 | \name{abs_cat_stats} 4 | \alias{abs_cat_stats} 5 | \title{Get ABS catalogue series data} 6 | \usage{ 7 | abs_cat_stats( 8 | cat_no, 9 | tables = "All", 10 | releases = "Latest", 11 | types = "tss", 12 | na.rm = TRUE 13 | ) 14 | } 15 | \arguments{ 16 | \item{cat_no}{Character vector specifying one or more ABS collections or catalogue numbers to 17 | download.} 18 | 19 | \item{tables}{A character vector of regular expressions denoting tables to download. The default 20 | ('All') downloads all time series spreadsheet tables for each specified catalogue. Use a list 21 | to specify different table sets for each specified ABS catalogue number.} 22 | 23 | \item{releases}{Date or character string object specifying the month and year denoting which 24 | release to download. Default is "Latest", which downloads the latest available data. See 25 | examples for further details.} 26 | 27 | \item{types}{One of either 'tss' -- ABS time series spreadsheet (the default) or 'css' -- ABS 28 | data cube (cross-section spreadsheet).} 29 | 30 | \item{na.rm}{logical (default: \code{TRUE}) - remove observations containing missing values.} 31 | } 32 | \value{ 33 | data frame in long format 34 | } 35 | \description{ 36 | This function downloads ABS catalogue series statistics, by ABS catalogue number. 37 | } 38 | \examples{ 39 | \donttest{ 40 | ## Download quarterly Australian National Accounts, Tables 1 & 2 41 | ana_q <- abs_cat_stats("5206.0", tables=c("Table 1\\\\W+", "Table 2\\\\W+")); 42 | 43 | ## Download December 2017 Australian National Accounts, Table 1 44 | ana_q_2017q4 <- abs_cat_stats("5206.0", tables="Table 1\\\\W+", release="Dec 2017"); 45 | } 46 | } 47 | \author{ 48 | David Mitchell 49 | } 50 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | # Release version 0.15.0 2 | 3 | ## New submission 4 | 5 | ### Release summary 6 | 7 | This is a minor release 8 | 9 | 10 | ### Test environments 11 | * local Debian Linux 10, R 3.6.1 and R-devel 12 | * win-builder (release and devel) 13 | 14 | 15 | ### R CMD check results 16 | There were no ERRORs or WARNINGs. 17 | 18 | There was 1 NOTE: 19 | 20 | * checking CRAN incoming feasibility ... NOTE 21 | Maintainer: ‘David Mitchell ’ 22 | 23 | New submission 24 | 25 | Package was archived on CRAN 26 | 27 | CRAN repository db overrides: 28 | X-CRAN-Comment: Archived on 2019-12-19 for policy violation. 29 | 30 | On Internet access. 31 | 32 | - Added functionality to ensure URL calls fail gracefully with an 33 | informative message if the resource is not available. 34 | 35 | 36 | ### Downstream dependencies 37 | There are currently no downstream dependencies for this package. 38 | 39 | 40 | 41 | # Initial submission - version 0.1.0 42 | 43 | ## New submission 44 | 45 | ### Resubmission 46 | 47 | This is a resubmission. In this version I have: 48 | 49 | * Added links to the websites described in the Description text: 50 | and . 51 | 52 | * Replaced all instances of \dontrun{} with \donttest{} in Rd-files. 53 | 54 | * Removed examples for all unexported functions. 55 | 56 | 57 | ### Test environments 58 | * local Debian Linux 4.18, R 3.5.2 and R-devel 59 | * win-builder (release and devel) 60 | 61 | 62 | ### R CMD check results 63 | There were no ERRORs or WARNINGs. 64 | 65 | There was 1 NOTE: 66 | 67 | * checking CRAN incoming feasibility ... NOTE 68 | Maintainer: ‘David Mitchell ’ 69 | 70 | New submission 71 | 72 | 73 | ### Downstream dependencies 74 | There are currently no downstream dependencies for this package. 75 | 76 | 77 | 78 | -------------------------------------------------------------------------------- /man/abs_search.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/abs-api-functions.R 3 | \name{abs_search} 4 | \alias{abs_search} 5 | \title{Search dataset information from the ABS.Stat API} 6 | \usage{ 7 | abs_search( 8 | pattern, 9 | dataset = NULL, 10 | ignore.case = TRUE, 11 | code_only = FALSE, 12 | update_cache = FALSE 13 | ) 14 | } 15 | \arguments{ 16 | \item{pattern}{Character string or regular expression to be matched.} 17 | 18 | \item{dataset}{Character vector of ABS.Stat dataset codes. These codes correspond to the 19 | \code{indicatorID} column from the indicator data frame of \code{abs_cache} or 20 | \code{abs_cachelist}, or the result of \code{abs_indicators}. If NULL (default), then function 21 | undertakes a dataset mode search. If not NULL, function searches all dimensions of specified 22 | dataset.} 23 | 24 | \item{ignore.case}{Case senstive pattern match or not.} 25 | 26 | \item{code_only}{If FALSE (default), all column/fields are returned. If TRUE, only the dataset 27 | identifier or indicator code are returned.} 28 | 29 | \item{update_cache}{Logical expression, if FALSE (default), use the cached list of available 30 | ABS.Stat datasets, if TRUE, update the list of available datasets.} 31 | } 32 | \value{ 33 | A data frame with datasets and data items that match the search pattern. 34 | } 35 | \description{ 36 | This function finds datasets or dimensions within a specific that match a specified 37 | regular expresion and returns matching results. 38 | } 39 | \note{ 40 | With acknowledgements to \code{wb_search} function. 41 | } 42 | \examples{ 43 | ## ABS dataset search 44 | x <- abs_search(pattern = "consumer price index") 45 | x <- abs_search(pattern = "census") 46 | x <- abs_search(pattern = "labour force") 47 | 48 | ## ABS indicator search 49 | x <- abs_search(pattern = "all groups", dataset="CPI") 50 | x <- abs_search(pattern = c("all groups", "capital cities"), dataset="CPI") 51 | 52 | } 53 | \author{ 54 | David Mitchell 55 | } 56 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # raustats 0.15.0 2 | --------------------------------------------------------------------- 3 | 4 | ## Changes: 5 | 6 | * `abs_cat_stats` includes new argument `na.rm` to provide option to remove rows 7 | with `NA` values. 8 | 9 | * `abs_read_tss` includes new argument `na.rm` to provide option to remove rows 10 | with `NA` values. 11 | 12 | * `abs_cat_download` now includes PDF files in set of downloadable ABS catalogue 13 | file types. 14 | 15 | * `abs_cat_releases` a new function that returns the set of all available 16 | releases for a specified ABS catalogue number. 17 | 18 | * `abs_cat_tables` includes internal changes that specify separate columns for 19 | Excel, Zip and PDF resource URLs. 20 | 21 | * `abs_stats` includes new option `return_json` which enables return of data in 22 | raw JSON format. 23 | 24 | * `rba_search` (and by extension `rba_stats`) now includes new option 25 | `series_type` which enables user to list only current *statistical tables* 26 | (the default), *historical data* or *discontinued data*. 27 | 28 | 29 | ## Bug fixes: 30 | 31 | * `abs_cat_stats` now avoids multiple file downloads and applies `abs_cat_unzip` 32 | only to compressed files. 33 | 34 | * `abs_cat_tables` includes revisions that correct errors thrown by 35 | `abs_cat_tables` and `abs_cat_stats` for some ABS catalogue numbers 36 | (e.g. 8731.0 and 3105.0.65.001). 37 | 38 | * `abs_stats` now gracefully handles zero-length (empty) returns. 39 | 40 | * `rba_stats` now downloads only current *statistical tables* by 41 | default. Previously, `rba_stats` would attempt to read all tables meeting 42 | search criteria, and fail in cases involving a mix of *statistical tables*, 43 | *historical data* and/or *discontinued data*. (Reported by David Stephan.) 44 | 45 | * Added functionality to ensure URL calls fail gracefully with an informative 46 | message if the resource is not available. 47 | 48 | 49 | 50 | # raustats 0.1.0 51 | --------------------------------------------------------------------- 52 | 53 | * Initial package release 54 | -------------------------------------------------------------------------------- /R/data.R: -------------------------------------------------------------------------------- 1 | ### Data sets 2 | 3 | #' @name rba_cachelist 4 | #' @title Cached list of statistical tables provided by the RBA 5 | #' @description This data is a cached result of the \code{\link{rba_table_cache}} function. By 6 | #' default functions \code{\link{rba_search}} and \code{\link{rba_stats}} use this data if the 7 | #' \code{update_cache} parameter is \code{TRUE}. 8 | #' 9 | #' @format A data frame containing three columns: 10 | #' \itemize{ 11 | #' \item \code{table_code} RBA table code. 12 | #' \item \code{table_name} RBA table name. 13 | #' \item \code{table_type} One of either current statistical tables, historical data or discontinued data 14 | #' \item \code{url} RBA URL 15 | #' } 16 | "rba_cachelist" 17 | 18 | 19 | #' @name abs_cat_cachelist 20 | #' @title List of ABS catalogue tables 21 | #' @description This data set provides a list of the most common ABS catalogue tables. 22 | #' 23 | #' @format A data frame containing five columns: 24 | #' \itemize{ 25 | #' \item \code{publication_title} ABS publication title. 26 | #' \item \code{catalogue_no} ABS catalogue number. 27 | #' \item \code{abs_url} ABS URL. 28 | #' \item \code{last_updated} Publication last updated. 29 | #' \item \code{type} Publication type -- one of either 'time series', 'panel' or 'summary'.RBA URL 30 | #' } 31 | "abs_cat_cachelist" 32 | 33 | 34 | #' @name abs_cachelist 35 | #' @title Datasets available through the ABS API 36 | #' @description This data set provides a list of all datasets, and the associated metadata, 37 | #' available through the ABS API. 38 | #' @format A data frame containing three columns: 39 | #' \itemize{ 40 | #' \item \code{id} ABS dataset identifier. 41 | #' \item \code{agencyID} Source agency identifier (ABS). 42 | #' \item \code{name} ABS dataset name. 43 | #' } 44 | "abs_cachelist" 45 | 46 | 47 | #' @name aus_state_codes 48 | #' @title Table of Australian state and territory codes 49 | #' @description A list of Australian state and territory codes (including code 0 -- Australia) 50 | #' 51 | #' @format A data frame containing three columns: 52 | #' \itemize{ 53 | #' \item \code{state_code} One-digit state code. 54 | #' \item \code{state_abb} State/territory abbreviation. 55 | #' \item \code{state_name} State/territory name. 56 | #' } 57 | "aus_state_codes" 58 | 59 | -------------------------------------------------------------------------------- /man/abs_cat_tables.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/abs-cat-functions.R 3 | \name{abs_cat_tables} 4 | \alias{abs_cat_tables} 5 | \title{Return ABS catalogue tables} 6 | \usage{ 7 | abs_cat_tables( 8 | cat_no, 9 | releases = "Latest", 10 | types = c("tss", "css"), 11 | include_urls = FALSE 12 | ) 13 | } 14 | \arguments{ 15 | \item{cat_no}{ABS catalogue numbers.} 16 | 17 | \item{releases}{Date or character string object specifying the month and year denoting which 18 | release to download. Default is "Latest", which downloads the latest available data. See 19 | examples for further details.} 20 | 21 | \item{types}{ABS publication types to return. Permissable options include one or more of: 'tss' 22 | -- ABS Time Series Spreadsheets, 'css' - ABS Data Cubes and 'pub' -- ABS Publications. The 23 | default returns all Time Series Spreadsheets and Data Cubes.} 24 | 25 | \item{include_urls}{Include full URLs to returned ABS data files. Default (FALSE) does not 26 | include data file URLs.} 27 | } 28 | \value{ 29 | Returns a data frame listing the data collection tables and URLs for Excel (column: 30 | \code{path_xls}) and, if available, Zip (column: \code{path_zip}) files. 31 | } 32 | \description{ 33 | Return list of data tables available from specified ABS catalogue number. 34 | } 35 | \examples{ 36 | \donttest{ 37 | ## List latest available quarterly National Accounts tables 38 | ana_tables <- abs_cat_tables("5206.0", releases="Latest"); 39 | ana_tables_url <- abs_cat_tables("5206.0", releases="Latest", include_urls=TRUE); 40 | 41 | ## List latest available CPI Time Series Spreadsheet tables only 42 | cpi_tables <- abs_cat_tables("6401.0", releases="Latest", types="tss"); 43 | cpi_tables_url <- abs_cat_tables("5206.0", releases="Latest", types="tss", include_urls=TRUE); 44 | 45 | ## List latest available ASGS Volume 3 Data Cubes 46 | asgs_vol3_tables <- abs_cat_tables("1270.0.55.003", releases="Latest", types="css"); 47 | asgs_vol3_tables_url <- abs_cat_tables("1270.0.55.003", releases="Latest", 48 | types="css", include_urls=TRUE); 49 | 50 | ## List latest available ASGS ANZSIC publications (PDF) files 51 | anzsic_2006 <- abs_cat_tables("1292.0", releases="Latest", types="pub", include_urls=TRUE); 52 | } 53 | } 54 | \author{ 55 | David Mitchell 56 | } 57 | -------------------------------------------------------------------------------- /tests/testthat/test-url-availability.R: -------------------------------------------------------------------------------- 1 | context("raustats URL checking functions") 2 | 3 | test_that("raustats_check_url_available returns true results", 4 | { 5 | skip_on_cran() 6 | skip_on_travis() 7 | skip_on_appveyor() 8 | 9 | ## Check valid ABS Catalogue URLs 10 | expect_null(raustats_check_url_available(abs_urls()$base_url)) 11 | expect_null(raustats_check_url_available(file.path(abs_urls()$base_url, 12 | abs_urls()$ausstats_path, 13 | mf_path = "mf"))); 14 | expect_null(raustats_check_url_available(file.path(abs_urls()$base_url, 15 | abs_urls()$ausstats_path, 16 | abs_urls()$mf_path, 17 | "5206.0"))); 18 | 19 | ## Check valid ABS API URLs 20 | expect_null(raustats_check_url_available(file.path(abs_api_urls()$base_url))); 21 | 22 | ## Check valid RBA paths 23 | expect_null(raustats_check_url_available(rba_urls()$base_url)); 24 | expect_null(raustats_check_url_available(file.path(rba_urls()$base_url, 25 | rba_urls()$stats_path))); 26 | expect_null(raustats_check_url_available(file.path(rba_urls()$base_url, 27 | rba_urls()$stats_path, 28 | rba_urls()$tables_path))); 29 | }) 30 | 31 | 32 | test_that("raustats_check_url_available fails gracefully", 33 | { 34 | skip_on_cran() 35 | skip_on_travis() 36 | skip_on_appveyor() 37 | 38 | ## Test mis-specified ABS Catalogue URL 39 | expect_error(raustats_check_url_available(file.path(abs_urls()$base_url, 40 | abs_urls()$ausstats_path))); 41 | 42 | ## Test mis-specified ABS API URL 43 | expect_error(raustats_check_url_available(file.path(abs_api_urls()$base_url, 44 | abs_api_urls()$datastr_path, 45 | abs_api_urls()$sdmx_json_path))); 46 | 47 | ## Test mis-specified RBA URLs 48 | expect_error(raustats_check_url_available(file.path(rba_urls()$base_url, 49 | rba_urls()$stats_path, 50 | "Table_1"))); 51 | }) 52 | -------------------------------------------------------------------------------- /TODO: -------------------------------------------------------------------------------- 1 | -*- mode: markdown -*- 2 | 3 | # TO DO - Feature requests 4 | 5 | * Enable `releases="July 2016"` to work for the latest releases in 6 | `abs_cat_tables` and `abs_cat_stats`. 7 | 8 | 9 | ## Completed functions 10 | 11 | * Add argument: `format = c("table", "raw")` argument to `abs_stats` function to 12 | provide option to return results as raw JSON 13 | 14 | * Add functionality to check for zero-length data sets returned by `abs_stats`, 15 | to avoid failing on bind_rows step. [COMPLETED 2019-12-20] 16 | 17 | * The ABS Engineering Construction tables seem to require that the 18 | `sub(regex_table_name, ...)` has `ignore.case = TRUE` (to capture "TABLE") - 19 | [COMPLETED 2018-09-15] 20 | 21 | * Add function to list all available releases for a specified catalogue 22 | number. [COMPLETED 2019-12-20 - `abs_cat_releases`] 23 | 24 | * Fix path variables in `abs_cat_tables` function. 25 | 26 | * Fix `rba_stats` function - error on loading tables D2 & G3, among possible 27 | others. (Reported by David Stephan ) - [COMPLETED 2019-12-20] 28 | 29 | 30 | ## ABS API functions 31 | 32 | * `abs_stats function` - Add `simplify` argument to `abs_stats` function to 33 | provide option to return raw sdmx-json format output 34 | 35 | * Simplify `abs_cachetable` to include only a list of datasets and call 36 | abs_metadata each time detailed dataset information is required. 37 | 38 | * Wrap all url calls inside a `try`/`catch` 39 | - Packages which use Internet resources should fail gracefully with an informative message 40 | if the resource is not available 41 | 42 | * Test all ABS catalogue functions 43 | 44 | * `abs_cat_stats` 45 | - Insert code to handle Date class `releases` arguments, e.g. releases=as.Date("2017-12-01"). 46 | - Add `header_start` and `header_rows` argument to `abs_cat_stats` function. 47 | - Add `return_urls` functionality to `abs_cat_stats` function - [DONE 2018-11-01] 48 | 49 | * `abs_cat_tables` 50 | - Add code to handle varying number of paths returned by `abs_cat_tables` 51 | - Remove Released 'dd/mm/yyyy' columns from final results. 52 | - Modify `abs_cat_tables` function to list tables for specified catalogue numbers. [DONE 2018-11-05] 53 | - Generalise `abs_cat_tables` function to handle non-time series results with only one column of URLs. [DONE 2018-11-06] 54 | 55 | * `abs_cat_unzip` 56 | - Added exdir argument - [DONE 2018-11-01] 57 | 58 | * `abs_cat_download` 59 | - Added exdir argument - [DONE 2018-11-01] 60 | 61 | * Candidate additional arguments for `abs_stats` 62 | #' `@param include_lastUpdated = FALSE` A character vector of regular expressions denoting 63 | #' tables to download. The default ('All') downloads all time series spreadsheet tables for each 64 | #' specified catalogue. Use a list to specify different table sets for each specified ABS 65 | #' catalogue number. 66 | 67 | 68 | * Check warning messages issued by `rba_stats(url = ...)`: 69 | #' Warning messages: 70 | #' 1: In eval(substitute(list(...)), `_data`, parent.frame()) : 71 | #' NAs introduced by coercion 72 | -------------------------------------------------------------------------------- /data-raw/ABS-TSS-Catalogue-Numbers.csv: -------------------------------------------------------------------------------- 1 | publication_title,catalogue_no,abs_url,publication_url,last_updated,type 2 | Australian Demographic Statistics,"3101.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/3101.0,NA,time series 3 | Overseas Arrivals and Departures,"3401.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/3401.0,NA,time series 4 | Australian System of National Accounts,"5204.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/5204.0,NA,time series 5 | "National Accounts: National Income, Expenditure and Product","5206.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/5206.0,NA,time series 6 | National Accounts: Finance and Wealth,"5232.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/5232.0,NA,time series 7 | Balance of Payments and International Investment,"5302.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/5302.0,NA,time series 8 | International Trade in Goods and Services,"5368.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/5368.0,NA,time series 9 | Labour Force,"6202.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/6202.0,NA,time series 10 | Average Weekly Earnings,"6302.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/6302.0,NA,time series 11 | Employee Earnings and Hours,"6306.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/6306.0,NA,time series 12 | Wage Price Indexes,"6345.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/6345.0,NA,time series 13 | Consumer Price Index,"6401.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/6401.0,NA,time series 14 | Producer Price Indexes,"6427.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/6427.0,NA,time series 15 | Retail Trade,"8501.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/8501.0,NA,time series 16 | Engineering Construction,"8762.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/8762.0,NA,time series 17 | Australian Historical Population Statistics,"3105.0.65.001",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/3105.0.65.001,NA,panel 18 | "Population Projections, Australia","3222.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/3222.0,NA,panel 19 | "Regional Population Growth, Australia","3218.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/3218.0,NA,panel 20 | "Regional Population Growth, Australia","3236.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/3218.0,NA,panel 21 | "Value of Principal Agricultural Commodities Produced, Australia","7501.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/7501.0,NA,summary 22 | "Value of Agricultural Commodities Produced, Australia","7503.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/7503.0,NA,summary 23 | "Principal Agricultural Commodities, Australia, Preliminary","7111.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/7111.0,NA,summary 24 | "Agricultural Commodities, Australia","7121.0",http://www.abs.gov.au/,http://www.abs.gov.au/AUSSTATS/abs@.nsf/mf/7121.0,NA,summary 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # raustats: An R package for accessing ABS and RBA statistics 5 | 6 | An R package for downloading Australian economic statistics from the 7 | Australian Bureau of Statistics (ABS) and Reserve Bank of Australia 8 | (RBA) websites. 9 | 10 | ## Installation 11 | 12 | You can install the released version of raustats from 13 | [CRAN](https://cran.r-project.org) with: 14 | 15 | ``` r 16 | install.packages("raustats") 17 | ``` 18 | 19 | or the latest development version from github with: 20 | 21 | ``` r 22 | devtools::install_github("mitcda/raustats") 23 | ``` 24 | 25 | ## How to use raustats 26 | 27 | To learn more about the raustats package, start with the vignettes: 28 | 29 | ``` r 30 | browseVignettes(package = "raustats") 31 | ``` 32 | 33 | ## Introduction 34 | 35 | The [Australian Bureau of Statistics (ABS)](http://www.abs.gov.au/) is 36 | Australia’s national statistical agency, providing trusted official statistics 37 | on a wide range of economic, social, population and environmental matters of 38 | importance to Australia. Key ABS statistical collections include: 39 | 40 | - Australian National Accounts 41 | - International Trade 42 | - Consumer Price Index (CPI) 43 | - Labour Force 44 | - Population trends 45 | 46 | The [Reserve Bank of Australia (RBA)](https://www.rba.gov.au/) is Australia’s 47 | central bank. In addition to its legislative responsibilities, it collects and 48 | publishes statistics on money, credit, the Australian banking systems and other 49 | relevant economic metrics. Key RBA statistics include: 50 | 51 | - Banking system assets and liabilities 52 | - Money and credit statistics 53 | - Household and business finances 54 | - Interest rates 55 | - Exchange rates 56 | - Inflation and inflation expectations. 57 | 58 | The ABS and RBA make their statistics primarily available through Excel and/or 59 | CSV spreadsheets. 60 | 61 | This package provides functions to search and download data and statistics from 62 | the [Australian Bureau of Statistics (ABS)](http://www.abs.gov.au/) and [Reserve 63 | Bank of Australia (RBA)](https://www.rba.gov.au/) websites, as well as draft 64 | access to the [ABS.Stat](http://stat.data.abs.gov.au/) - Beta data catalogue 65 | API. 66 | 67 | ## Examples 68 | 69 | ### Downloading ABS Catalogue Statistics 70 | 71 | ABS catalogue statistics may be downloaded, by catalogue number, using the 72 | `abs_cat_stats()` function. The following example downloads all Consumer Price 73 | Index (CPI) data series (ABS Catalogue no. 6401.0). 74 | 75 | ``` r 76 | cpi_all <- abs_cat_stats("6401.0") 77 | ``` 78 | 79 | To download only the latest statistics reported in Table 1 (ABS groups Tables 1 80 | and 2), simply provide a regular expression to the `tables` argument: 81 | 82 | ``` r 83 | cpi <- abs_cat_stats("6401.0", tables="Table.+1") 84 | ``` 85 | 86 | The package also provides functions to ABS statistics via the 87 | [ABS.Stat](http://stat.data.abs.gov.au/) Beta API. See the package help and 88 | vignettes for examples. 89 | 90 | ### Downloading RBA data 91 | 92 | RBA data series may be downloaded by table number, using the `rba_stats()` 93 | function. The following example downloads Table A1 - Liabilities and Assets of 94 | the RBA. 95 | 96 | ``` r 97 | rba_bs <- rba_stats("A1") 98 | ``` 99 | -------------------------------------------------------------------------------- /tests/testthat/test-rba-functions.R: -------------------------------------------------------------------------------- 1 | context("RBA functions") 2 | 3 | test_that("rba_stats_url returns valid URL", 4 | { 5 | skip_on_cran() 6 | skip_on_travis() 7 | skip_on_appveyor() 8 | 9 | expect_type(rba_urls(), "list"); 10 | expect_s3_class(rvest::html_session(rba_urls()$base_url), "session"); 11 | }) 12 | 13 | 14 | test_that("rba_table_cache returns data.frame class object", 15 | { 16 | skip_on_cran() 17 | skip_on_travis() 18 | skip_on_appveyor() 19 | 20 | expect_s3_class(rba_table_cache(), "data.frame"); 21 | }) 22 | 23 | 24 | test_that("rba_search fails well", 25 | { 26 | skip_on_cran() 27 | skip_on_travis() 28 | skip_on_appveyor() 29 | 30 | expect_error(rba_search()) 31 | }) 32 | 33 | 34 | test_that("rba_search returns valid results", 35 | { 36 | skip_on_cran() 37 | skip_on_travis() 38 | skip_on_appveyor() 39 | 40 | expect_s3_class(rba_search(pattern = "Liabilities and Assets"), "data.frame"); 41 | expect_s3_class(rba_search(pattern = "Consumer Prices"), "data.frame"); 42 | expect_s3_class(rba_search(pattern = "Population"), "data.frame"); 43 | }) 44 | 45 | 46 | test_that("rba_search returns valid results", 47 | { 48 | skip_on_cran() 49 | skip_on_travis() 50 | skip_on_appveyor() 51 | 52 | expect_s3_class(rba_search("G3"), "data.frame"); 53 | expect_s3_class(rba_search("G3", series_type="discontinued data", update_cache=TRUE), 54 | "data.frame"); 55 | }) 56 | 57 | 58 | test_that("rba_file_download returns valid data.frame", 59 | { 60 | skip_on_cran() 61 | skip_on_travis() 62 | skip_on_appveyor() 63 | 64 | downloaded_tables <- rba_file_download("https://www.rba.gov.au/statistics/tables/xls/d01hist.xls") 65 | expect_type(downloaded_tables, "character"); 66 | expect_match(downloaded_tables, "\\w+\\.xlsx*$"); 67 | expect_true(all(file.exists(downloaded_tables))); 68 | 69 | }) 70 | 71 | 72 | test_that("rba_read_tss returns valid data.frame", 73 | { 74 | skip_on_cran() 75 | skip_on_travis() 76 | skip_on_appveyor() 77 | 78 | rba_urls <- rba_search(pattern = "Liabilities and Assets")$url 79 | rba_files <- sapply(rba_urls, rba_file_download); 80 | expect_s3_class(rba_read_tss(rba_files), "data.frame"); 81 | }) 82 | 83 | 84 | test_that("rba_stats returns valid data.frame", 85 | { 86 | skip_on_cran() 87 | skip_on_travis() 88 | skip_on_appveyor() 89 | 90 | ## Test 'table_no' option function call 91 | expect_s3_class(rba_stats("A1"), "data.frame"); 92 | expect_s3_class(rba_stats(table_no="A1"), "data.frame"); 93 | ## Test 'pattern' option function call 94 | expect_s3_class(rba_stats(pattern="Liabilities and Assets"), "data.frame"); 95 | ## Test 'url' option function call 96 | url <- "https://www.rba.gov.au/statistics/tables/xls/d01hist.xls"; 97 | expect_false(httr::http_error(url)); 98 | expect_s3_class(rba_stats(url=url), "data.frame"); 99 | }) 100 | 101 | 102 | test_that("More tests that rba_stats returns valid data.frame", 103 | { 104 | skip_on_cran() 105 | skip_on_travis() 106 | skip_on_appveyor() 107 | 108 | ## Specific table tests: 109 | ## 1) Table G1 110 | expect_s3_class(rba_stats(table_no = "G1"), "data.frame"); 111 | ## 2) Table D2 112 | expect_s3_class(rba_stats(table_no = "D2"), "data.frame"); 113 | ## 3) Table G3 114 | expect_s3_class(rba_stats(table_no = "G3"), "data.frame"); 115 | }) 116 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | ```{r setup, include = FALSE} 6 | knitr::opts_chunk$set( 7 | collapse = TRUE, 8 | comment = "#>", 9 | fig.path = "man/figures/README-", 10 | out.width = "100%" 11 | ) 12 | ``` 13 | 14 | 15 | 16 | # raustats: An R package for accessing ABS and RBA statistics 17 | 18 | An R package for downloading Australian economic statistics from the Australian 19 | Bureau of Statistics (ABS) and Reserve Bank of Australia (RBA) websites. 20 | 21 | 22 | ## Installation 23 | 24 | You can install the released version of `raustats` from [CRAN](https://CRAN.R-project.org) with: 25 | 26 | ```{r eval=FALSE} 27 | install.packages("raustats") 28 | ``` 29 | or the latest development version from github with: 30 | 31 | ```{r, eval = FALSE} 32 | remotes::install_github("mitcda/raustats") 33 | ``` 34 | 35 | ## How to use raustats 36 | 37 | To learn more about the raustats package, start with the vignettes: 38 | ```{r, eval = FALSE} 39 | browseVignettes(package = "raustats") 40 | ``` 41 | 42 | 43 | ## Introduction 44 | 45 | The [Australian Bureau of Statistics (ABS)](http://www.abs.gov.au/) is 46 | Australia’s national statistical agency, providing trusted official statistics 47 | on a wide range of economic, social, population and environmental matters of 48 | importance to Australia. Key ABS statistical collections include: 49 | 50 | * Australian National Accounts 51 | * International Trade 52 | * Consumer Price Index (CPI) 53 | * Labour Force 54 | * Population trends 55 | 56 | 57 | The [Reserve Bank of Australia (RBA)](https://www.rba.gov.au/) is Australia's 58 | central bank. In addition to its legislative responsibilities, it collects and 59 | publishes statistics on money, credit, the Australian banking systems and other 60 | relevant economic metrics. Key RBA statistics include: 61 | 62 | * Banking system assets and liabilities 63 | * Money and credit statistics 64 | * Household and business finances 65 | * Interest rates 66 | * Exchange rates 67 | * Inflation and inflation expectations. 68 | 69 | The ABS and RBA make their statistics primarily available through Excel and/or 70 | CSV spreadsheets. 71 | 72 | This package provides functions to search and download data and statistics from 73 | the [Australian Bureau of Statistics (ABS)](http://www.abs.gov.au/) and [Reserve 74 | Bank of Australia (RBA)](https://www.rba.gov.au/) websites, as well as draft 75 | access to the [ABS.Stat](http://stat.data.abs.gov.au/) - Beta data catalogue 76 | API. 77 | 78 | 79 | ## Examples 80 | 81 | ### Downloading ABS Catalogue Statistics 82 | 83 | ABS catalogue statistics may be downloaded, by catalogue number, using the 84 | `abs_cat_stats()` function. The following example downloads all Consumer Price Index 85 | (CPI) data series (ABS Catalogue no. 6401.0). 86 | 87 | ```{r eval=FALSE} 88 | cpi_all <- abs_cat_stats("6401.0") 89 | ``` 90 | 91 | To download only the latest statistics reported in Table 1 (ABS groups Tables 1 and 92 | 2), simply provide a regular expression to the `tables` argument: 93 | 94 | ```{r eval=FALSE} 95 | cpi <- abs_cat_stats("6401.0", tables="Table.+1") 96 | ``` 97 | 98 | The package also provides functions to ABS statistics via the 99 | [ABS.Stat](http://stat.data.abs.gov.au/) Beta API. See the package help and 100 | vignettes for examples. 101 | 102 | 103 | ### Downloading RBA data 104 | 105 | RBA data series may be downloaded by table number, using the `rba_stats()` 106 | function. The following example downloads Table A1 - Liabilities and Assets of 107 | the RBA. 108 | 109 | ```{r eval=FALSE} 110 | rba_bs <- rba_stats("A1") 111 | ``` 112 | -------------------------------------------------------------------------------- /R/date-utilities.R: -------------------------------------------------------------------------------- 1 | ### Function: excel2Date 2 | #' @name excel2Date 3 | #' @title Convert Excel numeric date to R Date object 4 | #' @description Function to convert Excel numeric date to R Date object 5 | #' @param x Excel-based date numeric object 6 | #' @return Date object 7 | ## #' @examples 8 | ## #' \donttest{ 9 | ## #' raustats:::excel2Date(43445); 10 | ## #' } 11 | #' @keywords internal 12 | excel2Date <- function(x) { 13 | as.Date(x, origin="1899-12-30"); 14 | } 15 | 16 | ### Function: quarter2Date 17 | #' @name quarter2Date 18 | #' @title Convert dates formatted as year-quarter to dates objects 19 | #' @description Function to convert dates formatted as year-quarter to date-format objects 20 | #' @param x Year-quarter date format 21 | #' @param base.month Specifies base month for first quarter. Can be a scalar: 1,2,3 or character 22 | #' object: Jan, Feb, Mar. 23 | #' @param format The input date format. Default is "\%Y-Q\%q". 24 | #' @return This function returns a Date format object. 25 | #' @author David Mitchell 26 | ## #' @examples 27 | ## #' \donttest{ 28 | ## #' x <- c("1960-Q1","1960-Q2","1960-Q3","1960-Q4","1961-Q1","1961-Q2"); 29 | ## #' quarter2Date(x); 30 | ## #' quarter2Date(x, base.month="Jan"); 31 | ## #' } 32 | #' @keywords internal 33 | quarter2Date <- function(x, base.month="Mar", format="%Y-Q%q") 34 | { 35 | ## Check format 36 | if (!grepl("%Y", format) & !grepl("%q", format)) 37 | stop("Format should contain year (%Y) and quarter (%q) regular expressions.") 38 | format <- sub("(%q)", "(\\\\d)", 39 | sub("(%Y)", "(\\\\d{4})", format)); 40 | Year <- as.integer(sub(format,"\\1", x)); 41 | Qtr <- as.integer(sub(format,"\\2", x)); 42 | ## Re-encode month 43 | Mth <- if (base.month == 1 | base.month == "Jan") { 44 | Qtr * 3 - 2; 45 | } else if (base.month == 2 | base.month == "Feb") { 46 | Qtr * 3 - 1; 47 | } else if (base.month == 3 | base.month == "Mar") { 48 | Qtr * 3; 49 | } else { 50 | stop(paste("base.month should be either a scalar = 1,2 or 3", 51 | "or a character object = \"Jan\", \"Feb\" or \"Mar\".")); 52 | } 53 | z <- as.Date(paste(Year, month.abb[Mth], "01", sep="-"), format="%Y-%b-%d"); 54 | return(z); 55 | } 56 | 57 | 58 | ### Function: last_day 59 | #' @name last_day 60 | #' @title Set Date object to the last day of the month 61 | #' @description Function to change the date of a Date object to the last day of the month 62 | #' @importFrom lubridate ceiling_date days 63 | #' @param date date object 64 | #' @return Date object 65 | #' @author David Mitchell 66 | ## #' @examples 67 | ## #' \donttest{ 68 | ## #' date <- seq.Date(as.Date("2005-06-01"), length=36, by="month"); 69 | ## #' last_day(date) 70 | ## #' } 71 | #' @keywords internal 72 | last_day <- function(date) 73 | ceiling_date(date, "month") - days(1); 74 | 75 | 76 | ### Function: fin_year 77 | #' @name fin_year 78 | #' @title Create financial year date object 79 | #' @description Function to create a financial year date object 80 | #' @param date date object 81 | #' @param ending character string abbreviation or number denoting ending month of the financial year 82 | #' @return Date object 83 | #' @author David Mitchell 84 | ## #' @examples 85 | ## #' \donttest{ 86 | ## #' x <- seq.Date(as.Date("2005-06-01"), length=36, by="month"); 87 | ## #' fin_year(x) 88 | ## #' } 89 | #' @keywords internal 90 | fin_year <- function(date, ending="Jun") 91 | { 92 | if (is.character(ending)) { 93 | if (!substr(ending,1,3) %in% month.abb) 94 | stop(sprintf("Invalid month supplied to ending: %s", ending)) 95 | ending <- match(ending, month.abb); 96 | } else { 97 | if (!ending %in% 1:12) 98 | stop(sprintf("Invalid month supplied: %d - should be in 1:12", ending)); 99 | } 100 | 101 | Year <- as.integer(format(date, "%Y")); 102 | Month <- as.integer(format(date, "%m")); 103 | Year <- ifelse(Month > ending, Year + 1, Year); 104 | z <- as.Date(paste(Year, month.abb[ending], "01", sep="-"), format="%Y-%b-%d"); 105 | return(z); 106 | } 107 | -------------------------------------------------------------------------------- /man/abs_stats.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/abs-api-functions.R 3 | \name{abs_stats} 4 | \alias{abs_stats} 5 | \title{Download data from the ABS API} 6 | \usage{ 7 | abs_stats( 8 | dataset, 9 | filter, 10 | start_date, 11 | end_date, 12 | lang = c("en", "fr"), 13 | dimensionAtObservation = c("AllDimensions", "TimeDimension", "MeasureDimension"), 14 | detail = c("Full", "DataOnly", "SeriesKeysOnly", "NoData"), 15 | return_json = FALSE, 16 | return_url = FALSE, 17 | enforce_api_limits = TRUE, 18 | update_cache = FALSE 19 | ) 20 | } 21 | \arguments{ 22 | \item{dataset}{Character vector of ABS.Stat dataset codes. These codes correspond to the 23 | \code{indicatorID} column from the indicator data frame of \code{abs_cache} or 24 | \code{abs_cachelist}, or the result of \code{abs_indicators}.} 25 | 26 | \item{filter}{A list that contains filter of dimensions available in the specified \code{series} 27 | to use in the API call. If NULL, no filter is set and the query tries to return all dimensions 28 | of the dataset. Valid dimensions to include in the list supplied to filter include: MEASURE, 29 | REGION, INDEX, TSEST and FREQUENCY.} 30 | 31 | \item{start_date}{Numeric or character. If numeric it must be in %Y form (i.e. four digit 32 | year). For data at the sub-annual granularity the API supports a format as follows: Monthly 33 | data -- '2016-M01', Quarterly data -- '2016-Q1', Semi-annual data -- '2016-B2', Financial year 34 | data -- '2016-17'.} 35 | 36 | \item{end_date}{Numeric or character (refer to \code{startdate}).} 37 | 38 | \item{lang}{Language in which to return the results. If \code{lang} is unspecified, english is 39 | the default. ## @param remove_na If \code{TRUE}, remove blank or NA observations. If 40 | \code{FALSE}, no blank or NA ## values are removed from the return. ## @param include_unit If 41 | \code{TRUE}, the column unit is not removed from the return. If ## \code{FALSE}, this column is 42 | removed. ## @param include_obsStatus If \code{TRUE}, the column obsStatus is not removed from 43 | the return. If ## \code{FALSE}, this column is removed.} 44 | 45 | \item{dimensionAtObservation}{The identifier of the dimension to be attached at the observation 46 | level. The default order is: 'AllDimensions', 'TimeDimension' and 'MeasureDimension'. 47 | AllDimensions results in a flat list of observations without any grouping.} 48 | 49 | \item{detail}{This argument specifies the desired amount of information to be returned. Possible 50 | values are: 51 | 52 | \itemize{ 53 | \item Full: all data and documentation, including annotations (default) 54 | \item DataOnly: attributes – and therefore groups – will be excluded 55 | \item SeriesKeysOnly: only the series elements and the dimensions that make up the series keys 56 | \item NoData: returns the groups and series, including attributes and annotations, without observations (all values = NA) 57 | }} 58 | 59 | \item{return_json}{Logical. Default is \code{FALSE}. If \code{TRUE}, the function returns the 60 | result in raw sdmx-json.} 61 | 62 | \item{return_url}{Default is \code{FALSE}. If \code{TRUE}, the function returns the generated 63 | request URL and does not submit the request.} 64 | 65 | \item{enforce_api_limits}{If \code{TRUE} (the default), the function enforces the ABS.Stat 66 | RESTful API limits and will not submit the query if the URL string length exceeds 1000 67 | characters or the query would return more than 1 million records. If \code{FALSE}, the function 68 | submits the API call regardless and attempts to return the results.} 69 | 70 | \item{update_cache}{Logical expression, if FALSE (default), use the cached list of available 71 | ABS.Stat datasets, if TRUE, update the list of available datasets.} 72 | } 73 | \value{ 74 | Returns a data frame of the selected series from the specified ABS dataset. 75 | } 76 | \description{ 77 | This function queries and returns data for a specified ABS dataset from the ABS API. 78 | } 79 | \note{ 80 | The data query submitted by this function uses the ABS RESTful API based on the SDMX-JSON 81 | standard. It has a maximum allowable character limit of 1000 characters allowed in the data 82 | URL. 83 | 84 | Further limitations known at this time include: 85 | \itemize{ 86 | \item Only anonymous queries are supported, there is no authentication 87 | \item Each response is limited to no more than 1 million observations 88 | \item Errors are not returned in the JSON format but HTTP status codes and messages are 89 | set according to the Web Services Guidelines 90 | \item The lastNObservations parameter is not supported 91 | \item Observations follow the time series (or import-specific) order even if 92 | \code{dimensionAtObservation=AllDimensions} is used. 93 | } 94 | } 95 | \examples{ 96 | \donttest{ 97 | x <- abs_stats(dataset="CPI", filter="all", return_url=TRUE); 98 | x <- abs_stats(dataset="CPI", filter=list(MEASURE=1, REGION=c(1:8,50), 99 | INDEX=10001, TSEST=10, FREQUENCY="Q")); 100 | x <- abs_stats(dataset="CPI", filter=list(MEASURE="all", REGION=50, 101 | INDEX=10001, TSEST=10, FREQUENCY="Q")); 102 | x <- abs_stats(dataset="CPI", filter=list(MEASURE="all", REGION=50, INDEX=10001, 103 | TSEST=10, FREQUENCY="Q"), return_url=TRUE); 104 | } 105 | } 106 | \author{ 107 | David Mitchell 108 | } 109 | -------------------------------------------------------------------------------- /R/z-unused-functions.R: -------------------------------------------------------------------------------- 1 | #' @name abs_cache 2 | #' @title Download updated list of datasets and dimensions information from the ABS API 3 | #' @description TBC 4 | #' @param lang Language in which to return the results. If \code{lang} is unspecified, English 5 | #' ('en') is the default. 6 | #' @param progress Report download progress. Arguments accepts integer, logical or NULL. Set 7 | #' \code{progress} to \code{NULL} (default) to disable progress 8 | #' reporting. Otherwise set progress equal to integer value frequency. 9 | #' 10 | #' @return A list of available ABS data series each comprising a list of available data dimensions, 11 | #' typically containing: 12 | #' \itemize{ 13 | #' \item \code{MEASURE}: Measurement units (e.g. Persons, $ million, Index, Percentage change, etc.) 14 | #' \item \code{REGION}: Australian region name 15 | #' \item \code{INDEX}: Data item code and description 16 | #' \item \code{TSEST}: Time series estimate type (e.g. Original, Seasonally Adjusted, etc.) 17 | #' \item \code{FREQUENCY}: Available data frequency (Monthly, Quarterly, Annual) 18 | #' \item \code{TIME}: Available observation period index 19 | #' \item \code{OBS_STATUS}: Observation status notes code and description 20 | #' (e.g. 'r' - revised, 'q' - not available, 'u' - not applicable) 21 | #' \item \code{TIME_FORMAT}: Available time format (e.g. Annual, Quarterly, Monthly, Daily). 22 | #' } 23 | #' 24 | #' @note Saving the results of this function and using it as the cache parameter in \code{abs_stats} 25 | #' and \code{abs_search} replaces the default cached version \code{abs_cachelist} that comes with 26 | #' the package. Note, however, that this function can take a long time to extract metadata for all 27 | #' ABS datasets (e.g. approximately 20 minutes for 400 data sets), so use sparingly. For this 28 | #' reason, we also recommend specifying a progress update using the \code{progress} argument 29 | #' (default: 10). 30 | 31 | #' Not all data returns have support for languages other than english. If the specific 32 | #' return does not support your requested language by default it will return NA. The options for 33 | #' \code{lang} on the ABS API are presently: 34 | #' \itemize{ 35 | #' \item en: English 36 | #' \item fr: French 37 | #' } 38 | #' 39 | #' @export 40 | #' @author David Mitchell 41 | #' @examples 42 | #' \donttest{ 43 | #' z <- abs_cache(lang='en', progress=5) 44 | #' } 45 | abs_cache <- function(lang="en", progress=10) 46 | { 47 | x <- abs_datasets(lang=lang) 48 | if ( !is.null(progress) ) { 49 | t0 <- proc.time(); 50 | i_report <- unique(c(seq(progress, nrow(x), by=progress), nrow(x))); 51 | } 52 | z <- lapply(seq_len(nrow(x)), 53 | function(i) { 54 | ## Download metadata 55 | y <- abs_metadata(x$id[i], lang=lang); 56 | ## Add dataset id & name information as attributes 57 | attr(y, "dataset") <- x$id[i]; 58 | attr(y, "agency") <- x$agencyID[i]; 59 | attr(y, "dataset_desc") <- x$name[i]; 60 | ## Report progress 61 | if (!is.null(progress)) 62 | if (i %in% i_report) 63 | cat(sprintf("Retrieved metadata for %d (of %d) datasets. Total time: %.2f", 64 | i, nrow(x), (proc.time() - t0)["elapsed"]), "\n"); 65 | return(y) 66 | }); 67 | names(z) <- x$id; 68 | return(z); 69 | } 70 | 71 | 72 | #' @name abs_cachelist2table 73 | #' @title Converts an abs_cachelist to abs_cachetable 74 | #' @description This function converts an \code{abs_cachelist} to an \code{abs_cachetable} suitable 75 | #' for use with \code{\link{abs_search}}. 76 | #' @importFrom stats setNames 77 | #' @param cache An existing cachelist of available ABS datasets created by \code{abs_cachelist}. If 78 | #' \code{NULL}, uses the stored package cachelist. 79 | #' 80 | #' @return A table containing three columns: 81 | #' \itemize{ 82 | #' \item \code{dataset}: ABS API dataset identifier. 83 | #' \item \code{dataset_description}: ABS API dataset description. 84 | #' \item \code{measure}: ABS API dataset measure identifier. 85 | #' \item \code{measure_description}: ABS API dataset measure description 86 | #' } 87 | #' 88 | #' @author David Mitchell 89 | #' @note This is an internal library function and is not exported. 90 | #' @examples 91 | #' \donttest{ 92 | #' abs_ct <- abs_cachelist2table(raustats::abs_cachelist) 93 | #' } 94 | abs_cachelist2table <- function(cache) 95 | { 96 | if (missing(cache)) 97 | cache <- raustats::abs_cachelist; 98 | cache_table <- 99 | suppressWarnings(lapply(cache, 100 | function(x) { 101 | names(x) <- attr(x, "concept"); 102 | y <- setNames( 103 | data.frame(attr(x, "dataset"), 104 | attr(x, "dataset_desc")##, 105 | ## if(is.null(x$MEASURE$Code)) "" else x$MEASURE$Code, 106 | ## if(is.null(x$MEASURE$Description)) "" else x$MEASURE$Description, 107 | ## if(is.null(x$INDEX$Code)) "" else x$INDEX$Code, 108 | ## if(is.null(x$INDEX$Description)) "" else x$INDEX$Description 109 | ), 110 | c("dataset","dataset_description"##, 111 | ## "measure","measure_description", 112 | ## "index","index_description" 113 | )); 114 | return(y) 115 | }) 116 | ); 117 | cache_table <- do.call(rbind, cache_table); 118 | row.names(cache_table) <- seq_len(nrow(cache_table)) 119 | return(cache_table); 120 | } 121 | -------------------------------------------------------------------------------- /tests/testthat/test-abs-cat-functions.R: -------------------------------------------------------------------------------- 1 | context("ABS Catalogue functions") 2 | 3 | test_that("abs_ausstats_url returns valid URL", 4 | { 5 | skip_on_cran() 6 | skip_on_travis() 7 | skip_on_appveyor() 8 | 9 | expect_type(abs_urls()$base_url, "character"); 10 | expect_type(abs_urls()$ausstats_path, "character"); 11 | expect_type(abs_urls()$downloads_regex, "character"); 12 | expect_type(abs_urls()$releases_regex, "character"); 13 | }) 14 | 15 | 16 | test_that("abs_cat_tables fails well", 17 | { 18 | skip_on_cran() 19 | skip_on_travis() 20 | skip_on_appveyor() 21 | 22 | invalid_cat_no <- "5205.0" 23 | expect_error(abs_cat_tables(invalid_cat_no)); 24 | }) 25 | 26 | 27 | test_that("abs_cat_tables returns a valid data.frame", 28 | { 29 | skip_on_cran() 30 | skip_on_travis() 31 | skip_on_appveyor() 32 | 33 | ## ABS Catalogue tables - 5206.0 34 | abs_tables_5206 <- abs_cat_tables("5206.0") 35 | expect_s3_class(abs_tables_5206, "data.frame"); 36 | 37 | ## ABS Catalogue tables - 5206.0, with URLs 38 | abs_tables_5206_url <- abs_cat_tables("5206.0", releases="Latest", include_urls=TRUE); 39 | expect_s3_class(abs_tables_5206_url, "data.frame"); 40 | 41 | ## ABS Catalogue tables - 6401.0, types="tss" 42 | abs_tables_6401 <- abs_cat_tables("6401.0", releases="Latest", types="tss"); 43 | expect_s3_class(abs_tables_6401, "data.frame"); 44 | 45 | ## ABS Catalogue tables - 1270.0.55.003, types="css" 46 | abs_tables_1270.0.55.003 <- abs_cat_tables("1270.0.55.003", releases="Latest", types="css"); 47 | expect_s3_class(abs_tables_1270.0.55.003, "data.frame"); 48 | 49 | ## ABS Catalogue tables - 1292, types="pub" 50 | abs_tables_1292 <- abs_cat_tables("1292.0", releases="Latest", types="pub", include_urls=TRUE); 51 | expect_s3_class(abs_tables_1292, "data.frame"); 52 | 53 | ## ABS Catalogue tables - 8731 54 | abs_tables_8731 <- abs_cat_tables("8731.0", releases="Latest", include_urls=TRUE); 55 | expect_s3_class(abs_tables_8731, "data.frame"); 56 | }) 57 | 58 | 59 | test_that("abs_cat_releases fails well", 60 | { 61 | skip_on_cran() 62 | skip_on_travis() 63 | skip_on_appveyor() 64 | 65 | ## Check error on invalid ABS Cat. no. 66 | bad_url <- "Invalid_Cat_no" 67 | expect_error(abs_cat_releases(bad_url)); 68 | ## No ABS Cat. no. 69 | expect_error(abs_cat_releases()); 70 | }) 71 | 72 | 73 | test_that("abs_cat_releases returns a valid data.frame", 74 | { 75 | skip_on_cran() 76 | skip_on_travis() 77 | skip_on_appveyor() 78 | 79 | ## ABS Catalogue releases - 5206.0 80 | abs_release_5206 <- abs_cat_releases("5206.0"); 81 | expect_s3_class(abs_release_5206, "data.frame"); 82 | 83 | ## ABS Catalogue releases - 5206.0, with URLs 84 | abs_release_5206_url <- abs_cat_releases("5206.0", include_urls=TRUE); 85 | expect_s3_class(abs_release_5206_url, "data.frame"); 86 | 87 | ## ABS Catalogue tables - 6401.0 88 | abs_release_6401 <- abs_cat_releases("6401.0"); 89 | expect_s3_class(abs_release_6401, "data.frame"); 90 | 91 | ## ABS Catalogue tables - 6401.0, with URLs 92 | abs_release_6401_url <- abs_cat_releases("6401.0", include_urls=TRUE); 93 | expect_s3_class(abs_release_6401_url, "data.frame"); 94 | }) 95 | 96 | 97 | 98 | test_that("abs_local_filename created valid file name", 99 | { 100 | skip_on_cran() 101 | skip_on_travis() 102 | skip_on_appveyor() 103 | 104 | test_all <- "http://www.abs.gov.au/ausstats/meisubs.NSF/log?openagent&all_time_series_workbooks.zip&5206.0&Time%20Series%20Spreadsheet&23EA5772544F27BECA2582FE001507D1&0&Jun%202018&05.09.2018&Latest" 105 | expect_match(abs_local_filename(test_all), "^\\w+\\.(zip|xlsx*)$"); 106 | 107 | test_table_xls <- "http://www.abs.gov.au/ausstats/meisubs.NSF/log?openagent&5206001_key_aggregates.xls&5206.0&Time%20Series%20Spreadsheet&C1145211D5AF80E5CA2582FE0014F063&0&Jun%202018&05.09.2018&Latest" 108 | expect_match(abs_local_filename(test_table_xls), "^\\w+\\.(zip|xlsx*)$"); 109 | 110 | test_table_zip <- "http://www.abs.gov.au/ausstats/meisubs.NSF/log?openagent&5206001_key_aggregates.zip&5206.0&Time%20Series%20Spreadsheet&C1145211D5AF80E5CA2582FE0014F063&0&Jun%202018&05.09.2018&Latest" 111 | expect_match(abs_local_filename(test_table_zip), "^\\w+\\.(zip|xlsx*)$"); 112 | }) 113 | 114 | 115 | test_that("abs_cat_download downloads specified table files", 116 | { 117 | skip_on_cran() 118 | skip_on_travis() 119 | skip_on_appveyor() 120 | 121 | abs_tables_5206_url <- abs_cat_tables("5206.0", releases="Latest", include_urls=TRUE); 122 | downloaded_tables <- abs_cat_download(head(abs_tables_5206_url$path_xls, 3), exdir=tempdir()); 123 | expect_type(downloaded_tables, "character"); 124 | expect_match(downloaded_tables, "\\w+\\.(zip|xlsx*)$"); 125 | expect_true(all(file.exists(downloaded_tables))) 126 | }) 127 | 128 | 129 | test_that("abs_cat_unzip extracts from valid filenames", 130 | { 131 | skip_on_cran() 132 | skip_on_travis() 133 | skip_on_appveyor() 134 | 135 | abs_tables_5206_url <- abs_cat_tables("5206.0", releases="Latest", include_urls=TRUE); 136 | downloaded_tables <- abs_cat_download(abs_tables_5206_url$path_zip %>% .[!is.na(.)], 137 | exdir=tempdir()); 138 | extracted_files <- abs_cat_unzip(downloaded_tables); 139 | expect_type(extracted_files, "character"); 140 | expect_match(extracted_files, "\\w+\\.xlsx*$"); 141 | expect_true(all(file.exists(extracted_files))); 142 | }) 143 | 144 | 145 | test_that("abs_read_tss returns valid data.frame", 146 | { 147 | skip_on_cran() 148 | skip_on_travis() 149 | skip_on_appveyor() 150 | 151 | abs_tables_5206_url <- abs_cat_tables("5206.0", releases="Latest", include_urls=TRUE); 152 | downloaded_tables <- abs_cat_download(abs_tables_5206_url$path_zip %>% .[!is.na(.)], 153 | exdir=tempdir()); 154 | extracted_files <- abs_cat_unzip(downloaded_tables) 155 | expect_s3_class(abs_read_tss(extracted_files[1]), "data.frame"); ## Extract one file 156 | expect_s3_class(abs_read_tss(extracted_files), "data.frame"); ## Extract multiple files 157 | }) 158 | 159 | 160 | test_that("abs_cat_stats tss call returns valid data frame", 161 | { 162 | skip_on_cran() 163 | skip_on_travis() 164 | skip_on_appveyor() 165 | 166 | ## ABS Catalogue no. 5206.0 167 | expect_s3_class(abs_cat_stats("5206.0", tables="Table 1\\W+"), "data.frame"); 168 | expect_s3_class(abs_cat_stats("5206.0", tables=c("Table 1\\W+", "Table 2\\W+")), "data.frame"); 169 | ## ABS Catalogue no. 6401.0 170 | expect_s3_class(abs_cat_stats("6401.0", tables="CPI.+All Groups"), "data.frame"); 171 | expect_s3_class(abs_cat_stats("6401.0", tables="CPI.+All Groups", releases="Dec 2017"), "data.frame"); 172 | ## ABS Catalogue no. 8731.0 173 | expect_s3_class(abs_cat_stats("8731.0", tables=c("TABLE 01\\W+", "TABLE 02\\W+")), "data.frame"); 174 | }) 175 | -------------------------------------------------------------------------------- /tests/testthat/test-abs-api-functions.R: -------------------------------------------------------------------------------- 1 | context("ABS API functions") 2 | 3 | test_that("abs_api_call creates proper url", 4 | { 5 | skip_on_cran() 6 | skip_on_travis() 7 | skip_on_appveyor() 8 | 9 | expect_match(abs_api_call(path=abs_api_urls()$datastr_path, args="all"), 10 | "http:\\/\\/stat\\.data\\.abs\\.gov\\.au\\/.+\\/all"); 11 | expect_false(httr::http_error(abs_api_call(path=abs_api_urls()$datastr_path, args="all"))); 12 | }) 13 | 14 | 15 | ## test_that("abs_api_call returns error if url is invalid", 16 | ## { 17 | ## skip_on_cran() 18 | ## skip_on_travis() 19 | ## skip_on_appveyor() 20 | 21 | ## expect_true(httr::http_error(sub("\\.au", "", 22 | ## abs_api_call(path=abs_api_urls()$datastr_path, args="all")))); 23 | ## expect_true(httr::http_error(sub("Structure", "", 24 | ## abs_api_call(path=abs_api_urls()$datastr_path, args="all")))); 25 | ## }) 26 | 27 | 28 | test_that("abs_call_api creates xml_document", 29 | { 30 | skip_on_cran() 31 | skip_on_travis() 32 | skip_on_appveyor() 33 | 34 | url <- abs_api_call(path=abs_api_urls()$datastr_path, args="all"); 35 | expect_s3_class(abs_call_api(url), "xml_document"); 36 | expect_s3_class(abs_call_api(url), "xml_node"); 37 | }) 38 | 39 | 40 | test_that("abs_datasets returns object of class data.frame with specified names", 41 | { 42 | skip_on_cran() 43 | skip_on_travis() 44 | skip_on_appveyor() 45 | 46 | x <- abs_datasets(include_notes=TRUE) 47 | expect_s3_class(x, "data.frame"); 48 | expect_named(x, c("agencyID", "id", "name", "notes"), ignore.order=TRUE) 49 | }) 50 | 51 | 52 | test_that("abs_metadata returns object of class list with specified names", 53 | { 54 | skip_on_cran() 55 | skip_on_travis() 56 | skip_on_appveyor() 57 | 58 | x <- abs_metadata("CPI"); 59 | expect_type(x, "list"); 60 | expect_named(x, c("CL_CPI_MEASURE","CL_CPI_REGION","CL_CPI_INDEX","CL_CPI_TSEST", 61 | "CL_CPI_FREQUENCY","CL_CPI_TIME","CL_CPI_OBS_STATUS","CL_CPI_TIME_FORMAT"), 62 | ignore.order=TRUE); 63 | }) 64 | 65 | 66 | ## test_that("abs_cache returns object of class list with specified names", 67 | ## { 68 | ## skip_on_cran() 69 | ## skip_on_travis() 70 | ## skip_on_appveyor() 71 | 72 | ## skip("abs_cache() test skipped -- takes long time to download all ABS series.") 73 | ## abs_cachelist <- abs_cache(progress=5) 74 | ## expect_type(abs_cachelist, "list"); 75 | ## }) 76 | 77 | ## test_that("abs_cachelist returns object of class table with specified names", 78 | ## { 79 | ## skip_on_cran() 80 | ## skip_on_travis() 81 | ## skip_on_appveyor() 82 | 83 | ## abs_ct <- abs_cachelist2table(raustats::abs_cachelist) 84 | ## expect_s3_class(abs_ct, "data.frame"); 85 | ## expect_named(abs_ct, c("dataset","dataset_description"), ignore.order=TRUE, ignore.case=TRUE); 86 | ## }) 87 | 88 | test_that("abs_dimensions returns named data frame", 89 | { 90 | skip_on_cran() 91 | skip_on_travis() 92 | skip_on_appveyor() 93 | 94 | abs_dim <- abs_dimensions("CPI") 95 | expect_s3_class(abs_dim, "data.frame"); 96 | expect_named(abs_dim, c("name","type"), ignore.order=TRUE, ignore.case=TRUE); 97 | }) 98 | 99 | test_that("abs_search returns a list with specified names", 100 | { 101 | skip_on_cran() 102 | skip_on_travis() 103 | skip_on_appveyor() 104 | 105 | abs_dataset_search <- abs_search("consumer price index") 106 | expect_s3_class(abs_dataset_search, "data.frame"); 107 | expect_named(abs_dataset_search, c("id", "agencyID", "name"), 108 | ignore.order=TRUE, ignore.case=TRUE); 109 | 110 | abs_indicator_search <- abs_search("all groups", dataset="CPI") 111 | expect_type(abs_indicator_search, "list"); 112 | expect_named(abs_indicator_search[[1]], c("code","description"), 113 | ignore.order=TRUE, ignore.case=TRUE); 114 | }) 115 | 116 | test_that("abs_stats fails well", 117 | { 118 | skip_on_cran() 119 | skip_on_travis() 120 | skip_on_appveyor() 121 | 122 | ## library(testthat); 123 | expect_error(abs_stats()); ## No dataset provided 124 | expect_error(abs_stats("INVALID_ID")); ## Non-existent dataset 125 | expect_error(abs_stats("CPI")); ## No filter supplied 126 | expect_error(abs_stats("CPI", filter="invalid_filter")); ## Invalid filter value 127 | expect_error(abs_stats("CPI", filter=list(MEASURE=1, REGION=c(1:8,50), 128 | INDEX=10001, TSEST=10, FREQUENCY="Q"), 129 | start_date=2008, end_date=2006)); 130 | 131 | ## Test that calls returning no observations fail cleanly 132 | expect_error(abs_stats("ABS_REGIONAL_ASGS2016", 133 | filter=list(MEASURE="CABEE_6", 134 | REGIONTYPE="STE", 135 | ASGS_2016=1:8), 136 | start_date=2008, end_date=2006)); 137 | }) 138 | 139 | test_that("abs_stats returns valid URL", 140 | { 141 | skip_on_cran() 142 | skip_on_travis() 143 | skip_on_appveyor() 144 | 145 | expect_match(abs_stats("CPI", filter="all", return_url=TRUE), 146 | "^http:\\/\\/stat.data.abs.gov.au\\/SDMX-JSON\\/data\\/CPI"); 147 | }) 148 | 149 | test_that("abs_stats returns raw JSON object", 150 | { 151 | skip_on_cran() 152 | skip_on_travis() 153 | skip_on_appveyor() 154 | 155 | ## Test specific filter and start/end dates 156 | expect_type(abs_stats("CPI", filter=list(MEASURE=1, REGION=c(1:8,50), 157 | INDEX=10001, TSEST=10, FREQUENCY="Q"), 158 | start_date="2008-Q3", end_date="2018-Q2", return_json=TRUE), 159 | "character"); 160 | }) 161 | 162 | 163 | test_that("abs_stats returns valid data frame", 164 | { 165 | skip_on_cran() 166 | skip_on_travis() 167 | skip_on_appveyor() 168 | 169 | ## Test ERP Quarterly data extraction 170 | expect_s3_class(abs_stats("ERP_QUARTERLY", 171 | filter = list(MEASURE = 1, ## Estimated Resident Population 172 | SEX_ABS = 3, ## Persons 173 | AGE = "TT")), ## All ages 174 | "data.frame"); 175 | 176 | ## Test specific filter and start/end dates 177 | expect_s3_class(abs_stats("CPI", 178 | filter=list(MEASURE=1, REGION=c(1:8,50), 179 | INDEX=10001, TSEST=10, FREQUENCY="Q"), 180 | start_date="2008-Q3", end_date="2018-Q2"), 181 | "data.frame"); 182 | ## Test incomplete filter set 183 | partial_flt <- list(REGION=c(1:8,50), INDEX=10001, TSEST=10, FREQUENCY="Q"); 184 | expect_message(abs_stats("CPI", filter=partial_flt, 185 | start_date="2008-Q3", end_date="2018-Q2")); 186 | expect_s3_class(suppressWarnings(abs_stats("CPI", filter=partial_flt, 187 | start_date="2008-Q3", end_date="2018-Q2")), 188 | "data.frame"); 189 | ## Test function returns character string 190 | expect_message(abs_stats("CPI", filter=partial_flt, 191 | start_date="2008-Q3", end_date="2018-Q2", return_url=TRUE)); 192 | expect_type(suppressWarnings(abs_stats("CPI", filter=partial_flt, 193 | start_date="2008-Q3", end_date="2018-Q2", return_url=TRUE)), 194 | "character"); 195 | }) 196 | -------------------------------------------------------------------------------- /R/rba-functions.R: -------------------------------------------------------------------------------- 1 | ### Function: rba_urls 2 | #' @name rba_urls 3 | #' @title RBA base URL and data paths 4 | #' @description This function returns a list of URLs and data paths used to construct RBA data 5 | #' access calls. It is used in other functions in this package and need not be called directly. 6 | #' @return list of RBA base URL and data paths 7 | #' @author David Mitchell 8 | #' @keywords internal 9 | rba_urls <- function() 10 | list(base_url = "https://www.rba.gov.au", 11 | stats_path = "statistics", 12 | tables_path = "tables"); 13 | 14 | 15 | ### Function: rba_table_cache 16 | #' @name rba_table_cache 17 | #' @title Return list of RBA tables 18 | #' @description Function to return an updated list of data tables available from the RBA website. 19 | #' @importFrom rvest html_session jump_to html_attr html_text html_nodes 20 | #' @return data frame in long format 21 | #' @export 22 | #' @author David Mitchell 23 | #' @examples 24 | #' \donttest{ 25 | #' rba_cachelist <- rba_table_cache(); 26 | #' } 27 | rba_table_cache <- function() 28 | { 29 | ## Avoid 'No visible binding for global variables' note 30 | { table_name <- NULL } 31 | ## Create RBA URL and open session 32 | url <- file.path(rba_urls()$base_url, rba_urls()$stats_path); 33 | ## Check url available 34 | raustats_check_url_available(url); 35 | s <- html_session(url); 36 | ## Get statistical data paths 37 | .paths <- html_nodes(s, "a"); 38 | path_statistical_data <- unique(html_attr(.paths, "href")[grepl("^statistical tables$", 39 | html_text(.paths), ignore.case=TRUE)]); 40 | path_historical_data <- unique(html_attr(.paths, "href")[grepl("^historical data$", 41 | html_text(.paths), ignore.case=TRUE)]); 42 | path_discontinued_data <- unique(html_attr(.paths, "href")[grepl("^discontinued data$", 43 | html_text(.paths), ignore.case=TRUE)]); 44 | ## 45 | ## Get list of current data tables 46 | rs <- jump_to(s, path_statistical_data); 47 | .paths <- html_nodes(rs, "a"); 48 | statistical_tables <- data.frame(table_type = "statistical tables", 49 | table = html_text(.paths[grepl("xls(x*)", .paths, ignore.case=TRUE)]), 50 | url = paste0(sub("/$", "", rba_urls()$base_url), 51 | html_attr(.paths[grepl("xls(x*)", .paths, ignore.case=TRUE)], 52 | "href"))); 53 | ## Include only Excel spreadsheet tables 54 | statistical_tables <- statistical_tables[grepl("\\.xls(x*)$", statistical_tables$url, ignore.case=TRUE),]; 55 | ## 56 | ## Get list of historical data tables 57 | rs <- jump_to(s, path_historical_data); 58 | .paths <- html_nodes(rs, "a"); 59 | historical_tables <- data.frame(table_type = "historical data", 60 | table = html_text(.paths[grepl("xls(x*)", .paths, ignore.case=TRUE)]), 61 | url = paste0(sub("/$", "", rba_urls()$base_url), 62 | html_attr(.paths[grepl("xls(x*)", .paths, ignore.case=TRUE)], 63 | "href"))); 64 | ## Exclude: i) Occasional Paper 10 65 | historical_tables <- historical_tables[!grepl("Occasional Paper.+10", historical_tables$table, 66 | ignore.case=TRUE),]; 67 | ## and ii) Survey of consumers use of payments 68 | historical_tables <- historical_tables[!grepl("survey.+of.+consumers.+use", historical_tables$url, 69 | ignore.case=TRUE),]; 70 | ## 71 | ## Get list of discontinued data tables 72 | rs <- jump_to(s, path_discontinued_data); 73 | .paths <- html_nodes(rs, "a"); 74 | discontinued_tables <- data.frame(table_type = "discontinued data", 75 | table = html_text(.paths[grepl("xls(x*)", .paths, ignore.case=TRUE)]), 76 | url = paste0(sub("/$", "", rba_urls()$base_url), 77 | html_attr(.paths[grepl("xls(x*)", .paths, ignore.case=TRUE)], 78 | "href"))); 79 | z <- rbind(statistical_tables, 80 | historical_tables, 81 | discontinued_tables); 82 | z <- transform(z, 83 | table_name = sub("(.+)\\s(-|\u2013|\u2014)\\s(\\w\\d+(\\.\\d+)*)$", "\\1", table), 84 | table_no = sub("(.+)\\s(-|\u2013|\u2014)\\s(\\w\\d+(\\.\\d+)*)$", "\\3", table)); 85 | ## Replace en-dash/em-dash with hyphen (Regular expressions: en-dash - \u2013, and em-dash - \u2014 86 | z <- transform(z, 87 | table_name = gsub("\\s+"," ", gsub("(\u2013|\u2014)","-", table_name))); 88 | ## Re-order columns and return 89 | z <- z[,c("table_no", "table_name", "table_type", "url")]; 90 | return(z); 91 | } 92 | 93 | 94 | ### Function: rba_search 95 | #' @name rba_search 96 | #' @title Return list of data tables from RBA website 97 | #' @description Function to return a list of all RBA data tables. 98 | #' @param pattern Character string or regular expression to be matched 99 | #' @param fields Character vector of column names through which to search. By default, the function 100 | #' searches 'table_no' and 'table_name'. 101 | #' @param series_type Character vector specifying one or more one of 'statistical tables', 'historical data' or 102 | #' 'discontinued data'. By default, \code{series_type = 'statistical tables'}. 103 | #' @param ignore.case Case senstive pattern match or not. 104 | #' @param update_cache Logical expression, if FALSE (default), use the cached list of available 105 | #' RBA tables (\code{rba_cachelist}), if TRUE, update the list of available datasets. 106 | #' @return data frame in long format 107 | #' @author David Mitchell 108 | #' @export 109 | #' @examples 110 | #' rba_datasets <- rba_search(pattern = "Liabilities and Assets"); 111 | rba_search <- function(pattern, fields = c("table_no", "table_name"), 112 | series_type = "statistical tables", 113 | ignore.case = TRUE, update_cache = FALSE) 114 | { 115 | if (missing(pattern)) 116 | stop("No pattern supplied") 117 | if (update_cache) { 118 | rba_cache <- rba_table_cache(); 119 | } else { 120 | rba_cache <- raustats::rba_cachelist; 121 | } 122 | if (any(!fields %in% names(rba_cache))) 123 | stop(sprintf("Field names: %s not in cache", fields[!fields %in% names(rba_cache)])) 124 | ## Return list of matching RBA datasets 125 | match_index <- sapply(fields, 126 | function(field) 127 | grep(pattern, rba_cache[, field], ignore.case=ignore.case)); 128 | match_index <- sort(unique(unlist(match_index))); 129 | z <- rba_cache[match_index,]; 130 | ## Filter RBA data sets to specified series type(s) 131 | if (!any(series_type %in% c("statistical tables", "historical data", "discontinued data"))) 132 | stop(sprintf("Invalid series type(s): %s", 133 | paste(series_type[!series_type %in% 134 | c("statistical tables", "historical data", "discontinued data")], 135 | collapse=", "))) 136 | ## START - New code 137 | ## -- UP TO HERE -- 138 | ## if (any(!tolower(series_type) %in% c("statistical tables",'historical data','discontinued data'))) 139 | ## stop("series_type argument invalid!") 140 | z <- z[grepl(paste(series_type, collapse="|"), z$table_type, ignore.case=ignore.case),] 141 | ## series_index <- grep(paste(series_type, collapse="|"), 142 | ## z$table_type, ignore.case=ignore.case); 143 | ## ## series_index <- sort(unique(unlist(series_index))); 144 | ## z <- z[series_index,]; 145 | ## END - New code 146 | return(z); 147 | } 148 | 149 | 150 | #' @name rba_stats 151 | #' @title Return data for a specified RBA time series 152 | #' @description Function to download and return specified RBA time series data. 153 | #' @param table_no Character vector specifying one or more RBA table numbers to download. 154 | #' @param pattern Character string or regular expression to be matched. 155 | #' @param url Valid URL for RBA dataset (Excel format only). 156 | #' @param update_cache Logical expression, if FALSE (default), use the cached list of available 157 | #' RBA datasets, if TRUE, update the list of available datasets. 158 | #' @param ... Other arguments to \code{\link{rba_search}}, e.g. \code{series_type = "statistical_tables"}. 159 | #' @return data frame in long format 160 | #' @export 161 | #' @author David Mitchell 162 | #' @examples 163 | #' \donttest{ 164 | #' ## Example - Selecting by table_no 165 | #' x <- rba_stats("A1"); 166 | #' 167 | #' ## Example - Selecting by pattern 168 | #' x <- rba_stats(pattern="Liabilities and Assets"); 169 | #' } 170 | rba_stats <- function(table_no, pattern, url, update_cache=FALSE, ...) 171 | { 172 | if (FALSE) { 173 | table_no = "D2" 174 | rba_g1 <- rba_stats(table_no = "G1") 175 | rba_d2 <- rba_stats(table_no = "D2") 176 | } 177 | ## Deprecate: series_type 178 | if (missing(table_no) & missing(pattern) & missing(url)) 179 | stop("One of either table_no, pattern or url must be specified.") 180 | if (!missing(table_no) & !missing(pattern)) 181 | warning("Both table_no and pattern supplied, using table_no.") 182 | if (!missing(table_no) & !missing(url)) 183 | warning("Both table_no and url supplied, using table_no.") 184 | if (!missing(pattern) & !missing(url)) 185 | warning("Both pattern and url supplied, using pattern.") 186 | ## Update RBA table list 187 | if (update_cache) { 188 | rba_cache <- rba_table_cache(); 189 | } else { 190 | rba_cache <- raustats::rba_cachelist; 191 | } 192 | 193 | ## TO DO: Add table_type attribute to vector 'urls' 194 | if (!missing(table_no)) { 195 | if (!table_no %in% rba_cache$table_no) 196 | stop("table_no not valid RBA table code") 197 | ## Changed here 198 | urls <- rba_search(pattern=table_no, update_cache=update_cache, ...)$url 199 | ## urls <- as.character(table$url[which(table_no == rba_cache$table_no)]); 200 | } 201 | 202 | if (!missing(pattern)) 203 | urls <- as.character(rba_search(pattern, update_cache=update_cache, ...)$url) 204 | 205 | if (!missing(url)) { 206 | if (!any(url %in% rba_cache$url)) 207 | stop(sprintf("Following urls invalid: %s", 208 | paste(rba_cache$url[!url %in% rba_cache$url], collapse=", "))); 209 | urls <- as.character(url) 210 | } 211 | ## Download RBA statistical data 212 | ## Internet resource checking undertaken in 'rba_file_download' function. 213 | z <- lapply(urls, rba_file_download); 214 | ## Read data 215 | data <- lapply(z, rba_read_tss); 216 | data <- do.call(rbind, data); 217 | rownames(data) <- seq_len(nrow(data)); 218 | return(data); 219 | } 220 | 221 | 222 | #' @name rba_file_download 223 | #' @title Function to download statistics files from the RBA website and store locally 224 | #' @description This function downloads one or more RBA data files at the specified by URLs and 225 | #' saves a local copy. 226 | #' @importFrom httr GET http_type http_error progress status_code write_disk 227 | #' @param data_url Character vector specifying an RBA data set URL. 228 | #' @param exdir Target directory for downloaded files (defaults to \code{tempdir()}). Directory is 229 | #' created if it doesn't exist. 230 | #' @param update_cache Logical expression, if FALSE (default), use the cached list of available 231 | #' RBA datasets, if TRUE, update the list of available datasets. 232 | #' @return Downloads data from the ABS website and returns a character vector listing the location 233 | #' where files are saved. 234 | #' @author David Mitchell 235 | #' @export 236 | rba_file_download <- function(data_url, exdir=tempdir(), update_cache=TRUE) 237 | { 238 | ## DEBUGGING CODE 239 | ## if (FALSE) { 240 | ## exdir <- tempdir() 241 | ## data_url <- head(rba_table_cache()$url, 1); 242 | ## xx <- rba_file_download(rba_url); 243 | ## } 244 | if (!dir.exists(exdir)) dir.create(exdir) 245 | data_url <- as.character(data_url) 246 | local_filename <- basename(data_url); 247 | 248 | ## Update RBA table list 249 | if (update_cache) { 250 | rba_cache <- rba_table_cache(); 251 | } else { 252 | rba_cache <- raustats::rba_cachelist; 253 | } 254 | 255 | ## Check if url is not valid RBA data URL 256 | if (!data_url %in% rba_cache$url) 257 | stop(sprintf("Invalid RBA url: %s", data_url)); 258 | ## -- Download files -- 259 | cat(sprintf("Downloading: %s", local_filename)); 260 | ## Error check URL call 261 | raustats_check_url_available(data_url); 262 | resp <- GET(data_url, write_disk(file.path(exdir, local_filename), overwrite=TRUE), 263 | raustats_ua(), progress()); 264 | ## http_type(resp) 265 | ## File download validation code based on: 266 | ## https://cran.r-project.org/web/packages/httr/vignettes/api-packages.html 267 | ## if (http_error(resp)) { 268 | ## stop( 269 | ## sprintf( 270 | ## "RBA data file request failed (Error code: %s)\nInvalid URL: %s", 271 | ## status_code(resp), 272 | ## data_url 273 | ## ), 274 | ## call. = FALSE 275 | ## ) 276 | ## } 277 | 278 | ## RBA website returns: content-type: application/octet-stream 279 | ## if (!http_type(resp) %in% c("text/csv", "application/vnd.ms-excel")) { 280 | ## stop("RBA file request did not return an Excel or CSV file", call. = FALSE) 281 | ## } 282 | 283 | ## Return results 284 | return(file.path(exdir, local_filename)); 285 | } 286 | 287 | 288 | ### Function: rba_read_tss 289 | #' @name rba_read_tss 290 | #' @title Read RBA statistical time series spreadsheet 291 | #' @description Functions to extract data from a specified RBA time series spreadsheet. 292 | #' @importFrom readxl read_excel excel_sheets 293 | #' @importFrom dplyr left_join 294 | #' @importFrom tidyr gather 295 | #' @importFrom stats complete.cases 296 | #' @param files Names of one or more ABS data file 297 | #' @return data frame in long format 298 | #' @author David Mitchell 299 | #' @export 300 | #' @examples 301 | #' \donttest{ 302 | #' rba_urls <- rba_search(pattern = "Liabilities and Assets")$url 303 | #' rba_files <- sapply(rba_urls, rba_file_download) 304 | #' data <- rba_read_tss(rba_files); 305 | #' } 306 | rba_read_tss <- function(files) 307 | { 308 | x <- lapply(files, 309 | function(file) 310 | rba_read_tss_(file) 311 | ) 312 | z <- do.call(rbind, x); 313 | return(z) 314 | } 315 | 316 | 317 | rba_read_tss_ <- function(file) 318 | { 319 | ## Debugging text 320 | if (FALSE) { 321 | rba_cache <- rba_table_cache(); 322 | rba_cache %>% write.csv(file.path(tempdir(), "rba_cache.csv")) 323 | table_no = "G3" 324 | urls <- as.character(rba_cache$url[which(table_no == rba_cache$table_no)]); 325 | file <- lapply(urls, rba_file_download)[[1]]; 326 | sheet_names <- excel_sheets(file)[grepl("data|series breaks", excel_sheets(file), ignore.case=TRUE)]; 327 | } 328 | 329 | ## Avoid 'No visible binding for global variables' note 330 | { series_id <- value <- NULL } 331 | sheet_names <- excel_sheets(file); 332 | ## CONSIDER: writing message for data sets containing series breaks, e.g. 333 | ## if (any(grepl("series.+breaks", sheet_names, ignore.case=TRUE))) 334 | ## cat(sprintf("Note RBA data file %s contains Series Breaks. (See: %s for details).\n", 335 | ## basename(file), file)); 336 | ## CONSIDER: option for incorporating series breaks. 337 | 338 | ## TO DO 339 | ## 1. Require method to import historical and supplementary RBA data tables 340 | ## Check validity 341 | ## if (!all(c("notes", "data") %in% tolower(sheet_names))) 342 | ## stop(sprintf("File: %s is not a valid RBA time series file.", basename(file))); 343 | data <- lapply( 344 | ## Only process sheets named: 'Data' 345 | sheet_names[grepl("data", excel_sheets(file), ignore.case=TRUE)], 346 | function(sheet_name) { 347 | ## Read metadata 348 | .data <- read_excel(file, sheet=sheet_name, col_names=FALSE, col_types="text", 349 | na=c("","--"), .name_repair="minimal"); 350 | ## Return pre-header information from RBA files 351 | header_row <- which(sapply(1:nrow(.data), 352 | function(i) 353 | grepl("series\\s*id", paste(.data[i,], collapse=" "), 354 | ignore.case=TRUE))); 355 | ## -- Extract table name & number -- 356 | ## Note use of 'word' character /here /here for 13a, 6b, etc. 357 | regex_table_name <- "^(\\w+\\d+(\\.\\d+)*)(.+)$"; 358 | table_no <- trimws(sub(regex_table_name, "\\1", 359 | paste(replace(.data[1,], is.na(.data[1,]), ""), collapse=""))); 360 | ## Return table name/number details 361 | table_name <- trimws(sub(regex_table_name, "\\3", 362 | paste(replace(.data[1,], is.na(.data[1,]), ""), collapse=""))); 363 | ## Extract metadata 364 | metadata <- .data[1:header_row,]; 365 | metadata <- metadata[complete.cases(metadata),]; ## Drop NA rows 366 | metadata <- as.data.frame(t(metadata), stringsAsFactors=FALSE); 367 | rownames(metadata) <- seq_len(nrow(metadata)); 368 | names(metadata) <- tolower(gsub("\\s","_", 369 | gsub("\\.", "", 370 | metadata[1,]))); ## Rename variables 371 | metadata <- metadata[-1,]; 372 | metadata$publication_date <- excel2Date(as.integer(metadata$publication_date)); 373 | ## Append to metadata table 374 | metadata <- transform(metadata, 375 | table_no = table_no, 376 | table_name = table_name); 377 | 378 | z <- .data[-(1:header_row),]; 379 | ## Rename variables, including renaming `Series ID` 380 | names(z) <- sub("series.*id", "date", .data[header_row,], ignore.case=TRUE); 381 | z <- gather(z, series_id, value, -date, convert=TRUE); ## Transform to key:value pairs 382 | z <- transform(z, 383 | date = excel2Date(as.integer(date)), 384 | value = as.numeric(value)); 385 | 386 | data <- left_join(z, metadata, by="series_id"); 387 | data <- data[complete.cases(data),]; 388 | names(data) <- tolower(names(data)); 389 | return(data) 390 | }); 391 | data <- do.call(rbind, data); 392 | return(data); 393 | } 394 | -------------------------------------------------------------------------------- /R/abs-api-functions.R: -------------------------------------------------------------------------------- 1 | ### ABS API functions 2 | 3 | #' @name abs_api_urls 4 | #' @title ABS URL addresses and paths used in ABS.Stat API calls 5 | #' @description This function returns a list of URLs and data paths used to construction ABS.Stat 6 | #' API call. It is used in other functions in this package and need not be called directly. 7 | #' @return a list with a base url and a url section for formatting the JSON API calls 8 | #' @author David Mitchell 9 | #' @keywords internal 10 | abs_api_urls <- function() 11 | list(base_url = "http://stat.data.abs.gov.au", 12 | datastr_path = "restsdmx/sdmx.ashx/GetDataStructure", 13 | sdmx_json_path = "SDMX-JSON/data") 14 | 15 | 16 | #' @name abs_api_call 17 | #' @title Create ABS.Stat API URL call 18 | #' @description The function created the ABS.Stat API call URL 19 | #' @param path Character vector specifying one or more ABS collections or catalogue numbers to 20 | #' download. 21 | #' @param args Named list of arguments to supply to call. 22 | #' @return data frame in long format 23 | #' @author David Mitchell 24 | #' @keywords internal 25 | abs_api_call <- function(path, args) 26 | { 27 | if (missing(path)) 28 | stop("path missing.") 29 | 30 | if (missing(args)) 31 | stop("Argument path missing.") 32 | 33 | url <- file.path(abs_api_urls()$base_url, path, args) 34 | 35 | return(url); 36 | } 37 | 38 | 39 | #' @name abs_call_api 40 | #' @title Submit API call to ABS.Stat 41 | #' @description This function submits the specified API call to ABS.Stat 42 | #' @importFrom xml2 read_xml 43 | #' @importFrom httr http_error 44 | #' @param url Character vector specifying one or more ABS collections or catalogue numbers to 45 | #' download. 46 | #' @return data frame in long format 47 | #' @author David Mitchell 48 | #' @keywords internal 49 | abs_call_api <- function(url) 50 | { 51 | if (http_error(url)) 52 | stop(sprintf("HTTP error returned by url: %s", url)) 53 | 54 | x <- read_xml(url) 55 | return(x); 56 | } 57 | 58 | 59 | #' @name abs_datasets 60 | #' @title Download ABS.Stat datasets 61 | #' @description This function returns a list of all datasets available from ABS.Stat. 62 | #' @importFrom xml2 as_list read_xml read_html xml_name xml_find_all 63 | #' @param lang Preferred language (default 'en' - English). 64 | #' @param include_notes Include ABS annotation information for each series. 65 | #' @return data frame in long format 66 | #' @export 67 | #' @author David Mitchell 68 | #' @examples 69 | #' \donttest{ 70 | #' datasets <- abs_datasets() 71 | #' datasets <- abs_datasets(include_notes=TRUE) 72 | #' } 73 | abs_datasets <- function(lang="en", include_notes=FALSE) 74 | { 75 | ## Return xml document of ABS indicators 76 | url <- abs_api_call(path=abs_api_urls()$datastr_path, args="all"); 77 | x <- abs_call_api(url); 78 | ## Select node name for 79 | no_ids <- table(xml_name(xml_find_all(x, "//*[@id]"))); 80 | series_node_name <- names(no_ids[no_ids == max(no_ids)]) 81 | ## Extract Series ID information 82 | xpath_str <- sprintf("//*[name() = '%s']", series_node_name); 83 | name_fld <- "Name" 84 | ## The following code extracts the relevant ABS series information from the returned 85 | ## XML document by first saving the relevant part of the XML document to an R list and 86 | ## then explicitly extracting the relevant information from specific nodes by name. 87 | ## A more general recursive process, impervious to name changes would be preferred, 88 | ## however, it is more complex than simply revising the following code in response to 89 | ## potential future server-side changes. 90 | y <- as_list(xml_find_all(x, xpath_str)); 91 | z <- lapply(y, 92 | function(m) 93 | list(agencyID = attr(m, "agencyID"), 94 | id = attr(m, "id"), 95 | name = unlist( 96 | if (length(m[names(m) == name_fld]) == 1) { 97 | m[[name_fld]] 98 | } else { 99 | m[names(m) == name_fld][sapply(m[names(m) == name_fld], 100 | function(p) attributes(p)) == lang] 101 | }), 102 | notes = paste(unlist(m$Annotations), collapse=": ")) 103 | ); 104 | z <- as.data.frame(do.call(rbind, z)); 105 | z <- z[, c("id","agencyID","name","notes")]; 106 | if (!include_notes) 107 | z <- z[, c("id","agencyID","name")]; 108 | return(z) 109 | } 110 | 111 | 112 | #' @name abs_metadata 113 | #' @title Download dataset metadata from the ABS API 114 | #' @description This function queries and returns all metadata associated with a specified dataset 115 | #' from ABS.Stat. 116 | #' @importFrom xml2 xml_name xml_children xml_child xml_length xml_attrs xml_attr xml_ns_strip 117 | #' xml_text xml_find_all xml_parent 118 | #' @param id ABS dataset ID. 119 | #' @param lang Preferred language (default 'en' - English). 120 | #' @return data frame in long format 121 | #' @export 122 | #' @author David Mitchell 123 | #' @examples 124 | #' \donttest{ 125 | #' datasets <- abs_datasets(); 126 | #' x <- abs_metadata("CPI"); 127 | #' x <- abs_metadata(grep("cpi", datasets$id, ignore.case=TRUE, value=TRUE)); 128 | #' names(x) 129 | #' y <- abs_metadata(datasets$id[1]); 130 | #' names(y) 131 | #' } 132 | abs_metadata <- function(id, lang="en") 133 | { 134 | ## Return xml document of ABS indicators 135 | url <- abs_api_call(path=abs_api_urls()$datastr_path, args=id); 136 | x <- abs_call_api(url); 137 | 138 | ## Return all codelists 139 | i_codelist <- grep("codelist", xml_name(xml_children(x)), ignore.case=TRUE); 140 | n_codelists <- xml_length(xml_child(x, i_codelist)); 141 | ## Dataset dimensions and codes 142 | codelists_attrs <- as.data.frame( 143 | do.call(rbind, 144 | lapply(seq_len(n_codelists), 145 | function(i) 146 | xml_attrs(xml_child(xml_child(x, 2),i)) 147 | )), 148 | stringsAsFactors = FALSE); 149 | ## Codelist content 150 | codelists <- lapply(seq_len(n_codelists), 151 | function(i) { 152 | ## Note 'xml_ns_strip' essential to extracting Description 153 | y <- xml_ns_strip(xml_child(xml_child(x, i_codelist), i)); 154 | 155 | codelist <- data.frame( 156 | Code = xml_text(xml_find_all(xml_children(y), "@value")), 157 | Description = xml_text(xml_find_all(y, 158 | sprintf(".//Code//Description[@xml:lang='%s']", 159 | lang))), 160 | stringsAsFactors=FALSE); 161 | }); 162 | ## Return components 163 | i_keyfamilies <- grep("keyfamilies", xml_name(xml_children(x)), ignore.case=TRUE); 164 | z <- xml_parent(xml_find_all(xml_children(xml_child(x, i_keyfamilies)), 165 | ".//@codelist")); 166 | components <- data.frame(codes = xml_text(xml_find_all(z, ".//@codelist")), 167 | conceptRef = xml_text(xml_find_all(z, ".//@conceptRef")), 168 | type = xml_name(z), 169 | stringsAsFactors=FALSE); 170 | ## Return concepts 171 | i_concepts <- grep("concepts", xml_name(xml_children(x)), ignore.case=TRUE); 172 | w <- xml_children(xml_child(x, i_concepts)); 173 | concepts <- data.frame(concept = xml_attr(xml_find_all(w, "."), "id"), 174 | agencyID = xml_attr(xml_find_all(w, "."), "agencyID"), 175 | conceptRef=xml_text(xml_find_all(w, sprintf(".//Name[@xml:lang='%s']", lang))), 176 | stringsAsFactors=FALSE); 177 | ## Set names/attributes 178 | names(codelists) <- components$codes; 179 | ## Add dataset and dataset_desc attributes 180 | attr(codelists, "concept") <- components$conceptRef; 181 | attr(codelists, "description") <- concepts$conceptRef[match(components$conceptRef, concepts$concept)]; 182 | attr(codelists, "type") <- components$type; 183 | return(codelists); 184 | } 185 | 186 | 187 | #' @name abs_dimensions 188 | #' @title Return available dimensions of ABS series 189 | #' @description This function returns the available dimeninsions for a specified ABS API dataset. 190 | #' @param dataset Character vector of dataset codes. These codes correspond to the 191 | #' \code{indicatorID} column from the indicator data frame of \code{abs_cache} or 192 | #' \code{abs_cachelist}, or the result of \code{abs_indicators}. 193 | #' @param update_cache Logical expression, if FALSE (default), use the cached list of available 194 | #' ABS.Stat datasets, if TRUE, update the list of available datasets. 195 | #' @return a data frame with available dataset dimensions. 196 | #' @export 197 | #' @author David Mitchell 198 | #' @examples 199 | #' \donttest{ 200 | #' ## CPI - Consumer Price Index 201 | #' x <- abs_dimensions("CPI"); 202 | #' str(x) 203 | #' ## LF - Labour Force 204 | #' x <- abs_dimensions("LF"); 205 | #' str(x) 206 | #' } 207 | abs_dimensions <- function(dataset, update_cache=FALSE) 208 | { 209 | ## Check dataset present and valid 210 | if (missing(dataset)) 211 | stop("No dataset name supplied."); 212 | if (update_cache) { 213 | cache <- abs_datasets(); 214 | } else { 215 | cache <- raustats::abs_cachelist; 216 | } 217 | if (!dataset %in% cache$id) 218 | stop(sprintf("%s not valid dataset name.", dataset)); 219 | metadata <- abs_metadata(dataset) 220 | ## Return data frame of dataset dimensions: 221 | z <- data.frame(name = attr(metadata, "concept"), 222 | type = attr(metadata, "type")); 223 | return(z) 224 | } 225 | 226 | 227 | #' @name abs_search 228 | #' @title Search dataset information from the ABS.Stat API 229 | #' @description This function finds datasets or dimensions within a specific that match a specified 230 | #' regular expresion and returns matching results. 231 | #' @param pattern Character string or regular expression to be matched. 232 | #' @param dataset Character vector of ABS.Stat dataset codes. These codes correspond to the 233 | #' \code{indicatorID} column from the indicator data frame of \code{abs_cache} or 234 | #' \code{abs_cachelist}, or the result of \code{abs_indicators}. If NULL (default), then function 235 | #' undertakes a dataset mode search. If not NULL, function searches all dimensions of specified 236 | #' dataset. 237 | #' @param ignore.case Case senstive pattern match or not. 238 | #' @param code_only If FALSE (default), all column/fields are returned. If TRUE, only the dataset 239 | #' identifier or indicator code are returned. 240 | #' @param update_cache Logical expression, if FALSE (default), use the cached list of available 241 | #' ABS.Stat datasets, if TRUE, update the list of available datasets. 242 | #' @return A data frame with datasets and data items that match the search pattern. 243 | #' @export 244 | #' @note With acknowledgements to \code{wb_search} function. 245 | #' @author David Mitchell 246 | #' @examples 247 | #' ## ABS dataset search 248 | #' x <- abs_search(pattern = "consumer price index") 249 | #' x <- abs_search(pattern = "census") 250 | #' x <- abs_search(pattern = "labour force") 251 | #' 252 | #' ## ABS indicator search 253 | #' x <- abs_search(pattern = "all groups", dataset="CPI") 254 | #' x <- abs_search(pattern = c("all groups", "capital cities"), dataset="CPI") 255 | #' 256 | abs_search <- function(pattern, dataset=NULL, ignore.case=TRUE, code_only=FALSE, update_cache=FALSE) 257 | { 258 | if (missing(pattern)) 259 | stop("No regular expression provided.") 260 | if (update_cache) { 261 | cache <- abs_datasets(); 262 | } else { 263 | cache <- raustats::abs_cachelist; 264 | } 265 | ## 266 | if (is.null(dataset)) { 267 | ## 1. If dataset not specified, search through list of datasets 268 | ## Return list of matching ABS.Stat datasets 269 | match_index <- sapply(names(cache), ## cache_table 270 | function(i) grep(pattern, cache[, i], ignore.case=ignore.case), ## cache_table[, i] 271 | USE.NAMES = FALSE); 272 | match_index <- sort(unique(unlist(match_index))); 273 | if (length(match_index) == 0) 274 | warning(sprintf("No matches were found for the search term %s. Returning an empty data frame.", 275 | pattern)); 276 | match_df <- unique(cache[match_index, ]) ## unique(cache_table[match_index, ]) 277 | rownames(match_df) <- seq_len(nrow(match_df)); 278 | if (code_only) 279 | match_df <- as.character(match_df[,"id"]); 280 | return(match_df); 281 | } else { 282 | ## 2. If dataset specified, search through list of datasets 283 | if (!dataset %in% cache$id) 284 | stop(sprintf("Dataset: %s not available on ABS.Stat", dataset)) 285 | .cachelist <- abs_metadata(dataset); 286 | names(.cachelist) <- attr(.cachelist, "concept"); 287 | ## Return list of all dataset dimensions with matching elements 288 | filter_index <- lapply(.cachelist, 289 | function(x) { 290 | i <- grep(sprintf("(%s)", paste(pattern, collapse="|")), 291 | x$Description, ignore.case=ignore.case); 292 | z <- x[i,]; 293 | return(z); 294 | }); 295 | filter <- filter_index[sapply(filter_index, nrow) > 0] 296 | if (code_only) 297 | filter <- lapply(filter, function(x) as.character(x$Code)); 298 | return(filter) 299 | } 300 | } 301 | 302 | 303 | #' @name abs_stats 304 | #' @title Download data from the ABS API 305 | #' @description This function queries and returns data for a specified ABS dataset from the ABS API. 306 | #' @importFrom xml2 read_xml read_html 307 | #' @importFrom httr content GET http_error http_status http_type progress status_code 308 | #' @importFrom jsonlite fromJSON 309 | #' @importFrom stats setNames 310 | #' @param dataset Character vector of ABS.Stat dataset codes. These codes correspond to the 311 | #' \code{indicatorID} column from the indicator data frame of \code{abs_cache} or 312 | #' \code{abs_cachelist}, or the result of \code{abs_indicators}. 313 | #' @param filter A list that contains filter of dimensions available in the specified \code{series} 314 | #' to use in the API call. If NULL, no filter is set and the query tries to return all dimensions 315 | #' of the dataset. Valid dimensions to include in the list supplied to filter include: MEASURE, 316 | #' REGION, INDEX, TSEST and FREQUENCY. 317 | #' @param start_date Numeric or character. If numeric it must be in %Y form (i.e. four digit 318 | #' year). For data at the sub-annual granularity the API supports a format as follows: Monthly 319 | #' data -- '2016-M01', Quarterly data -- '2016-Q1', Semi-annual data -- '2016-B2', Financial year 320 | #' data -- '2016-17'. 321 | #' @param end_date Numeric or character (refer to \code{startdate}). 322 | #' @param lang Language in which to return the results. If \code{lang} is unspecified, english is 323 | #' the default. ## @param remove_na If \code{TRUE}, remove blank or NA observations. If 324 | #' \code{FALSE}, no blank or NA ## values are removed from the return. ## @param include_unit If 325 | #' \code{TRUE}, the column unit is not removed from the return. If ## \code{FALSE}, this column is 326 | #' removed. ## @param include_obsStatus If \code{TRUE}, the column obsStatus is not removed from 327 | #' the return. If ## \code{FALSE}, this column is removed. 328 | #' @param dimensionAtObservation The identifier of the dimension to be attached at the observation 329 | #' level. The default order is: 'AllDimensions', 'TimeDimension' and 'MeasureDimension'. 330 | #' AllDimensions results in a flat list of observations without any grouping. 331 | #' @param detail This argument specifies the desired amount of information to be returned. Possible 332 | #' values are: 333 | #' 334 | #' \itemize{ 335 | #' \item Full: all data and documentation, including annotations (default) 336 | #' \item DataOnly: attributes – and therefore groups – will be excluded 337 | #' \item SeriesKeysOnly: only the series elements and the dimensions that make up the series keys 338 | #' \item NoData: returns the groups and series, including attributes and annotations, without observations (all values = NA) 339 | #' } 340 | #' 341 | #' @param return_json Logical. Default is \code{FALSE}. If \code{TRUE}, the function returns the 342 | #' result in raw sdmx-json. 343 | #' @param return_url Default is \code{FALSE}. If \code{TRUE}, the function returns the generated 344 | #' request URL and does not submit the request. 345 | #' @param enforce_api_limits If \code{TRUE} (the default), the function enforces the ABS.Stat 346 | #' RESTful API limits and will not submit the query if the URL string length exceeds 1000 347 | #' characters or the query would return more than 1 million records. If \code{FALSE}, the function 348 | #' submits the API call regardless and attempts to return the results. 349 | #' @param update_cache Logical expression, if FALSE (default), use the cached list of available 350 | #' ABS.Stat datasets, if TRUE, update the list of available datasets. 351 | #' @return Returns a data frame of the selected series from the specified ABS dataset. 352 | #' @note The data query submitted by this function uses the ABS RESTful API based on the SDMX-JSON 353 | #' standard. It has a maximum allowable character limit of 1000 characters allowed in the data 354 | #' URL. 355 | #' 356 | #' Further limitations known at this time include: 357 | #' \itemize{ 358 | #' \item Only anonymous queries are supported, there is no authentication 359 | #' \item Each response is limited to no more than 1 million observations 360 | #' \item Errors are not returned in the JSON format but HTTP status codes and messages are 361 | #' set according to the Web Services Guidelines 362 | #' \item The lastNObservations parameter is not supported 363 | #' \item Observations follow the time series (or import-specific) order even if 364 | #' \code{dimensionAtObservation=AllDimensions} is used. 365 | #' } 366 | #' 367 | #' @export 368 | #' @author David Mitchell 369 | #' @examples 370 | #' \donttest{ 371 | #' x <- abs_stats(dataset="CPI", filter="all", return_url=TRUE); 372 | #' x <- abs_stats(dataset="CPI", filter=list(MEASURE=1, REGION=c(1:8,50), 373 | #' INDEX=10001, TSEST=10, FREQUENCY="Q")); 374 | #' x <- abs_stats(dataset="CPI", filter=list(MEASURE="all", REGION=50, 375 | #' INDEX=10001, TSEST=10, FREQUENCY="Q")); 376 | #' x <- abs_stats(dataset="CPI", filter=list(MEASURE="all", REGION=50, INDEX=10001, 377 | #' TSEST=10, FREQUENCY="Q"), return_url=TRUE); 378 | #' } 379 | abs_stats <- function(dataset, filter, start_date, end_date, lang=c("en","fr"), 380 | dimensionAtObservation=c("AllDimensions","TimeDimension","MeasureDimension"), 381 | detail=c("Full","DataOnly","SeriesKeysOnly","NoData"), 382 | return_json=FALSE, return_url=FALSE, 383 | enforce_api_limits=TRUE, update_cache=FALSE) 384 | { 385 | ## Check dataset present and valid 386 | if (missing(dataset)) 387 | stop("No dataset supplied."); 388 | if (!dataset %in% abs_datasets()$id) 389 | stop(sprintf("%s not a valid ABS dataset.", dataset)); 390 | ## Check if filter provided 391 | if (missing(filter)) { 392 | dataset_dim <- abs_dimensions(dataset) 393 | stop(sprintf("No filter argument. Should be either 'all' or valid list with dataset dimensions: %s", 394 | paste(dataset_dim[grepl("^dimension$", dataset_dim$type, 395 | ignore.case=TRUE), "name"], collapse=", "))); 396 | } 397 | ## Check if start_date > end_date 398 | if (!missing(start_date) && !missing(end_date) && start_date > end_date) 399 | stop("start_date later than end_date, request not submitted.") 400 | ## Return metadata 401 | if (update_cache) { 402 | cache <- abs_datasets(); 403 | } else { 404 | cache <- raustats::abs_cachelist; 405 | } 406 | ## Get list of Dimension name: 407 | metadata <- abs_metadata(dataset); 408 | metadata_names <- abs_dimensions(dataset, ); 409 | metadata_dims <- as.character(metadata_names[grepl("^dimension$", metadata_names$type, ignore.case=TRUE), 410 | "name"]); 411 | names(metadata) <- metadata_names$name; 412 | ## Return agency name 413 | ## agency_name <- unlist(attr(cache[[dataset]], "agency")); 414 | ## -- Check the set of dimensions supplied in 'filter' -- 415 | if (length(filter) == 1 && filter == "all") { 416 | ## If filter='all', replace with detailed filter list including all dimensions 417 | .filter <- metadata; 418 | filter <- lapply(.filter, function(x) x$Code); 419 | filter <- filter[names(filter) %in% metadata_dims]; 420 | } else if (class(filter) == "list") { 421 | ## If filter is a list: 422 | if (any(!metadata_dims %in% names(filter))) { 423 | ## Check if any filter dimensions missing, and append missing elements (set to 'all') 424 | message(sprintf("Filter dimension(s): %s not in filter, dimensions added and set to 'all'.", 425 | paste(metadata_dims[!metadata_dims %in% names(filter)], collapse=", "))); 426 | for (name in metadata_dims[!metadata_dims %in% names(filter)]) 427 | filter[[name]] <- "all" 428 | } 429 | filter <- filter[metadata_dims]; 430 | for (name in names(filter)) 431 | if( length(filter[[name]]) == 1 && grepl("all", filter[[name]], ignore.case=TRUE) ) 432 | filter[[name]] <- metadata[[name]]$Code; 433 | } else { 434 | stop("Argument filter must be either the single character string: 'all' or a valid filter list."); 435 | } 436 | n_filter <- prod(lengths(filter)); 437 | ## Create ABS URL and open session 438 | url <- file.path(abs_api_urls()$base_url, abs_api_urls()$sdmx_json_path, 439 | dataset, 440 | paste(lapply(filter, 441 | function(x) paste(x, collapse="+")), 442 | collapse="."), 443 | "all"); 444 | ## dimensionAtObservation 445 | dimensionAtObservation <- match.arg(dimensionAtObservation); 446 | if (!dimensionAtObservation %in% c("AllDimensions","TimeDimension","MeasureDimension")) 447 | stop("dimensionAtObservation argument invalid!") 448 | detail <- match.arg(detail); 449 | if (!detail %in% c("Full","DataOnly","SeriesKeysOnly","NoData")) 450 | stop("detail argument invalid!") 451 | ## Append 'detail' and 'dimensionAtObservation' values to URL query 452 | url <- sprintf("%s?detail=%s&dimensionAtObservation=%s", 453 | url, detail, dimensionAtObservation); 454 | ## Add start/end dates, and check validity 455 | if (!missing(start_date)) 456 | url <- paste0(url, "&startPeriod=", start_date) 457 | if (!missing(end_date)) 458 | url <- paste0(url, "&endPeriod=", end_date); 459 | ## Return URL if specified 460 | if (return_url) { 461 | return(url) 462 | } else { 463 | ## Check URL length - ABS.Stat limit: 1000 characters 464 | if (enforce_api_limits) { 465 | if (nchar(url) > 1000) 466 | stop(sprintf(paste("URL query length (%i) exceeds maximum request URL limit (1000 characters).", 467 | "Filter query in one or more dimensions."), 468 | nchar(url))); 469 | ## Check number of observations - ABS.Stat limit: 1 million observations 470 | time_filter <- metadata$TIME$Code; 471 | if(!missing(start_date)) 472 | time_filter <- time_filter[time_filter >= start_date] 473 | if(!missing(end_date)) 474 | time_filter <- time_filter[time_filter >= end_date] 475 | ## Count approximate number of records to be returned 476 | n_time <- sum(c(ifelse("A" %in% filter$FREQUENCY, 477 | length(grep("^\\d{4}$", time_filter)), 478 | NA_integer_)), 479 | c(ifelse("S" %in% filter$FREQUENCY, 480 | length(grep("^\\d{4}-B\\d+$", time_filter)), 481 | NA_integer_)), 482 | c(ifelse("Q" %in% filter$FREQUENCY, 483 | length(grep("^\\d{4}-Q\\d+$", time_filter)), 484 | NA_integer_)), 485 | c(ifelse("M" %in% filter$FREQUENCY, 486 | length(grep("^\\d{4}-M\\d+$", time_filter)), 487 | NA_integer_)), 488 | na.rm = TRUE); 489 | if (n_filter * n_time > 10^6) 490 | stop(sprintf(paste("Estimated number of records (%i) exceeds ABS.Stat limit (1 million).", 491 | "Filter query in one or more dimensions."), 492 | n_filter * n_time)); 493 | } 494 | 495 | ## Download data 496 | ## cat(sprintf("API query submitted: %s...\n", substr(url, 30))); 497 | ## Error check URL call 498 | raustats_check_url_available(url) 499 | resp <- GET(url, raustats_ua(), progress()) 500 | ## ## Error check URL call 501 | ## if (http_error(resp)) { 502 | ## stop( 503 | ## sprintf( 504 | ## "ABS.Stat API request failed [%s]\n%s\n<%s>", 505 | ## status_code(resp), 506 | ## http_status(resp)$message, 507 | ## http_status(resp)$reason, 508 | ## ), 509 | ## call. = FALSE 510 | ## ) 511 | ## } 512 | ## Check content type 513 | if (!grepl("draft-sdmx-json", http_type(resp))) { 514 | stop("ABS.Stat API did not return SDMX-JSON format", call. = FALSE) 515 | } 516 | 517 | if (return_json) { 518 | ## Return results as sdmx-json text format 519 | return(content(resp, as="text")) 520 | } else { 521 | cat("Converting query output to data frame ... \n"); 522 | ## Convert JSON to list 523 | x_json <- fromJSON(content(resp, as="text")) ## , simplifyVector = FALSE) 524 | ## Check whether data contains any observations 525 | if (ncol(x_json$dataSets$observation) == 0) 526 | stop(paste("API call returns no observations.", 527 | "Check ABS.Stat or inspect JSON object with `return_json=TRUE`"), call. = FALSE); 528 | ## Convert JSON format to long (tidy) data frame 529 | x_obs <- x_json$dataSets$observation; 530 | x_str <- x_json$structure$dimensions$observation; 531 | y <- data.frame(do.call(rbind, unlist(x_obs, recursive=FALSE))); 532 | ## Set names of returned records 533 | y <- if (detail == "Full") { 534 | setNames(y, c("values","obs_status","unknown")) 535 | } else if (detail == "SeriesKeysOnly") { 536 | setNames(y, c("series_key")); 537 | } else if (detail == "DataOnly") { 538 | setNames(y, c("values")); 539 | } else { ## if (detail == NoData) { 540 | setNames(y, c("values","obs_status","unknown")) 541 | } 542 | y <- cbind(setNames(data.frame(do.call(rbind, strsplit(row.names(y), ":"))), 543 | tolower(sub("\\s+","_", x_str$name))), 544 | y); 545 | ## Re-index dimension IDs from 0-based to 1-based 546 | for (name in tolower(sub("\\s+","_", x_str$name))) 547 | y[,name] <- as.integer(as.character(y[,name])) + 1; 548 | names_y <- setNames(lapply(seq_len(nrow(x_str)), 549 | function(j) unlist(x_str[j,"values"], recursive=FALSE) 550 | ), 551 | tolower(sub("\\s+","_", x_str$name))); 552 | ## Substitute dimension IDs for Names 553 | for (name in names(names_y)) 554 | y[,name] <- names_y[[name]]$name[y[,name]] 555 | ## Insert dataset_name 556 | y$agency_id <- x_json$header$sender$id; 557 | y$agency_name <- x_json$header$sender$name; 558 | y$dataset_name <- x_json$structure$name; 559 | ## Re-index rows 560 | row.names(y) <- seq_len(nrow(y)); 561 | ## cat("completed.\n"); 562 | ## Return data 563 | return(y); 564 | } ## End: return_json 565 | } 566 | } 567 | -------------------------------------------------------------------------------- /R/abs-cat-functions.R: -------------------------------------------------------------------------------- 1 | ### ABS Catalogue functions 2 | 3 | #' @name abs_urls 4 | #' @title ABS URL addresses and paths used in accessing ABS Catalogue data calls 5 | #' @description This function returns a list of URLs and data paths used to construct ABS Catalogue 6 | #' data access calls. It is used in other functions in this package and need not be called 7 | #' directly. 8 | #' @return a list with a base url and a url section for formatting ABS Catalogue statistics calls 9 | #' @author David Mitchell 10 | #' @keywords internal 11 | abs_urls <- function() 12 | { 13 | list(base_url = "https://www.abs.gov.au", 14 | ausstats_path = "ausstats/abs@.nsf", 15 | mf_path = "mf", 16 | downloads_regex = "Downloads", 17 | releases_regex = "Past.*Future.*Releases"); 18 | } 19 | 20 | 21 | #' @name abs_filetypes 22 | #' @title Valid ABS file types 23 | #' @description This function returns a vector of valid ABS file types for using list of URLs and data paths used to construct ABS Catalogue 24 | #' data access calls. It is used in other functions in this package and need not be called 25 | #' directly. 26 | #' @return a vector containing a list of valid ABS file types. 27 | #' @author David Mitchell 28 | #' @keywords internal 29 | abs_filetypes <- function() 30 | { 31 | c(zip_files = "application/x-zip", 32 | excel_files = "application/vnd.ms-excel", 33 | openxml_files = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", 34 | pdf_files = "application/pdf"); 35 | } 36 | 37 | 38 | #' @name abs_cat_stats 39 | #' @title Get ABS catalogue series data 40 | #' @description This function downloads ABS catalogue series statistics, by ABS catalogue number. 41 | #' @importFrom rvest html_session follow_link html_attr jump_to 42 | #' @importFrom xml2 read_xml read_html 43 | #' @param cat_no Character vector specifying one or more ABS collections or catalogue numbers to 44 | #' download. 45 | #' @param tables A character vector of regular expressions denoting tables to download. The default 46 | #' ('All') downloads all time series spreadsheet tables for each specified catalogue. Use a list 47 | #' to specify different table sets for each specified ABS catalogue number. 48 | #' @param releases Date or character string object specifying the month and year denoting which 49 | #' release to download. Default is "Latest", which downloads the latest available data. See 50 | #' examples for further details. 51 | #' @param types One of either 'tss' -- ABS time series spreadsheet (the default) or 'css' -- ABS 52 | #' data cube (cross-section spreadsheet). 53 | #' @param na.rm logical (default: \code{TRUE}) - remove observations containing missing values. 54 | #' @return data frame in long format 55 | #' @export 56 | #' @author David Mitchell 57 | #' @examples 58 | #' \donttest{ 59 | #' ## Download quarterly Australian National Accounts, Tables 1 & 2 60 | #' ana_q <- abs_cat_stats("5206.0", tables=c("Table 1\\W+", "Table 2\\W+")); 61 | #' 62 | #' ## Download December 2017 Australian National Accounts, Table 1 63 | #' ana_q_2017q4 <- abs_cat_stats("5206.0", tables="Table 1\\W+", release="Dec 2017"); 64 | #' } 65 | abs_cat_stats <- function(cat_no, tables="All", releases="Latest", types="tss", na.rm=TRUE) 66 | { 67 | if (missing(cat_no)) 68 | stop("No cat_no supplied."); 69 | ## if (tolower(releases) != "latest" || 70 | ## releases IS NOT A DATE ) 71 | ## stop("releases arguments ") 72 | if (any(!types %in% c("tss","css"))) 73 | stop("Allowable type arguments limited to one or both: 'tss' and 'css'."); 74 | ## Get available catalogue tables 75 | if (FALSE) { 76 | cat_no <- "5206.0"; tables <- c("Table 1\\W+", "Table 2\\W+"); 77 | releases <- "Latest"; types <- "tss"; include_urls <- FALSE; 78 | } 79 | cat_tables <- abs_cat_tables(cat_no=cat_no, releases=releases, types=types, include_urls=TRUE) 80 | ## Select only the user specified tables ('sel_tables') 81 | if (length(tables) == 1 && tolower(tables) == "all") { 82 | ## If 'all' tables, download all 83 | sel_tables <- if (any(grepl("^all time series.*", cat_tables$item_name, ignore.case=TRUE))) { 84 | ## If all tables provided as single compressed archive, select that 85 | cat_tables[grepl("^all time series.*", cat_tables$item_name, ignore.case=TRUE),] 86 | } else { 87 | ## Else, select all tables 88 | cat_tables 89 | }; 90 | } else { 91 | ## Else, return only selected tables 92 | sel_tables <- cat_tables[grepl(sprintf("(%s)", paste(tables, collapse="|")), 93 | cat_tables$item_name, ignore.case=TRUE),] 94 | ## Stop if regular expression does not return any tables 95 | if (nrow(sel_tables) == 0) 96 | stop(paste("Specified table regular expressions do not match any table names, re-specify.")) 97 | } 98 | ## Select only the user specified tables ('sel_tables') 99 | sel_urls <- apply(sel_tables, 1, 100 | function(y) { 101 | ## If zip in path_zip, select zip file, else select xls(x) file 102 | if (any(grepl("\\.zip", y, ignore.case=TRUE))) { 103 | unique(grep("\\.zip", unlist(y), ignore.case=TRUE, value=TRUE)) 104 | } else { 105 | unique(grep("\\.xlsx*", unlist(y), ignore.case=TRUE, value=TRUE)) 106 | } 107 | }); 108 | ## Download ABS TSS/Data Cubes .. 109 | z <- lapply(sel_urls, abs_cat_download); 110 | z <- lapply(z, 111 | function(x) 112 | if (!grepl("\\.zip", x, ignore.case=TRUE)) { 113 | x 114 | } else { 115 | abs_cat_unzip(files=x) 116 | }); 117 | ## .. and combine into single data frame 118 | data <- lapply(z, function(x) abs_read_tss(x, na.rm=na.rm)); 119 | data <- do.call(rbind, data); 120 | rownames(data) <- 1:nrow(data); 121 | return(data); 122 | } 123 | 124 | 125 | #' @name abs_cat_tables 126 | #' @title Return ABS catalogue tables 127 | #' @description Return list of data tables available from specified ABS catalogue number. 128 | #' @importFrom rvest html_session html_text html_nodes html_attr follow_link 129 | #' @importFrom httr http_error 130 | #' @importFrom dplyr case_when bind_rows 131 | #' @param cat_no ABS catalogue numbers. 132 | #' @param releases Date or character string object specifying the month and year denoting which 133 | #' release to download. Default is "Latest", which downloads the latest available data. See 134 | #' examples for further details. 135 | #' @param types ABS publication types to return. Permissable options include one or more of: 'tss' 136 | #' -- ABS Time Series Spreadsheets, 'css' - ABS Data Cubes and 'pub' -- ABS Publications. The 137 | #' default returns all Time Series Spreadsheets and Data Cubes. 138 | #' @param include_urls Include full URLs to returned ABS data files. Default (FALSE) does not 139 | #' include data file URLs. 140 | #' @return Returns a data frame listing the data collection tables and URLs for Excel (column: 141 | #' \code{path_xls}) and, if available, Zip (column: \code{path_zip}) files. 142 | #' @export 143 | #' @author David Mitchell 144 | #' @examples 145 | #' \donttest{ 146 | #' ## List latest available quarterly National Accounts tables 147 | #' ana_tables <- abs_cat_tables("5206.0", releases="Latest"); 148 | #' ana_tables_url <- abs_cat_tables("5206.0", releases="Latest", include_urls=TRUE); 149 | #' 150 | #' ## List latest available CPI Time Series Spreadsheet tables only 151 | #' cpi_tables <- abs_cat_tables("6401.0", releases="Latest", types="tss"); 152 | #' cpi_tables_url <- abs_cat_tables("5206.0", releases="Latest", types="tss", include_urls=TRUE); 153 | #' 154 | #' ## List latest available ASGS Volume 3 Data Cubes 155 | #' asgs_vol3_tables <- abs_cat_tables("1270.0.55.003", releases="Latest", types="css"); 156 | #' asgs_vol3_tables_url <- abs_cat_tables("1270.0.55.003", releases="Latest", 157 | #' types="css", include_urls=TRUE); 158 | #' 159 | #' ## List latest available ASGS ANZSIC publications (PDF) files 160 | #' anzsic_2006 <- abs_cat_tables("1292.0", releases="Latest", types="pub", include_urls=TRUE); 161 | #' } 162 | abs_cat_tables <- function(cat_no, releases="Latest", types=c("tss", "css"), include_urls=FALSE) 163 | { 164 | ## if (FALSE) { 165 | ## -- DEBUGGING CODE -- 166 | ## cat_no <- "6401.0"; types <- "tss"; releases <- "Latest"; include_urls <- TRUE; 167 | ## cat_no <- "5209.0.55.001"; types <- "css"; releases <- "Latest"; include_urls <- TRUE; 168 | ## cat_no <- "1270.0.55.001"; releases <- "Latest"; types <- "css"; include_urls <- TRUE; 169 | ## cat_no <- "6202.0"; releases <- "Latest"; types <- "css"; include_urls <- TRUE; 170 | ## cat_no <- "3105.0.65.001"; releases <- "Latest"; types <- "css"; include_urls <- TRUE; 171 | ## } 172 | if (missing(cat_no)) 173 | stop("No cat_no supplied."); 174 | if (any(!types %in% c("tss", "css", "pub"))) 175 | stop("Allowable type arguments limited to one or more of: 'tss', 'css' or 'pub'."); 176 | if (!is.logical(include_urls)) 177 | stop("include_urls must be either TRUE or FALSE"); 178 | ## Spell out type -- for ABS website scraping 179 | types <- sapply(types, 180 | function(x) switch(x, 181 | "tss" = "Time Series Spreadsheet", 182 | "css" = "Data Cubes", 183 | "pub" = "Publication")); 184 | ## Create ABS URL and open session 185 | url <- file.path(abs_urls()$base_url, abs_urls()$ausstats_path, abs_urls()$mf_path, cat_no); 186 | ## Check for HTTP errors 187 | raustats_check_url_available(url); 188 | ## -- OLD CODE -- 189 | ## if (http_error(url)) 190 | ## stop(sprintf("File cannot be downloaded. Check URL: %s", url)) 191 | ## Open html session 192 | suppressWarnings(s <- html_session(url)); 193 | releases <- unique(releases); 194 | if (length(releases) == 1 && tolower(releases) == "latest") { 195 | .paths <- ""; 196 | } else { 197 | ## Get path to 'Past & Future Releases' page 198 | .paths <- html_nodes(s, "a"); 199 | .paths <- .paths[grepl(abs_urls()$releases_regex, .paths)]; 200 | .paths <- html_attr(.paths, "href"); 201 | s <- jump_to(s, .paths) 202 | .paths <- html_nodes(s, "a"); 203 | .paths <- .paths[grepl(paste(releases, collapse="|"), .paths, ignore.case=TRUE)]; 204 | .paths <- html_attr(.paths, "href"); 205 | } 206 | ## Return list of all downloadable files, for specified catalogue tables ('cat_tables') 207 | v <- lapply(.paths, 208 | function(x) { 209 | ## Check for HTTP errors 210 | ## raustats_check_url_available(file.path(s, x)); 211 | y <- jump_to(s, x) 212 | l <- follow_link(y, abs_urls()$downloads_regex) 213 | ht <- html_nodes(html_nodes(l, "table"), "table") 214 | ## Return data table 215 | ## The ABS data catalogue lists the data inside a HTML table within a table, i.e. 216 | ## 217 | ##
218 | ## 219 | ## The following nested apply functions, exploits this structure to extract the 220 | ## list of available publication types and associated links. 221 | all_nodes <- lapply(sapply(ht, function(x) html_nodes(x, "tr")), 222 | function(x) 223 | c(html_text(html_nodes(x, "td")), 224 | ## html_attr(html_nodes(html_nodes(x, "td"), "a"), "href"))); 225 | paste0(abs_urls()$base_url, 226 | html_attr(html_nodes(html_nodes(x, "td"), "a"), "href"))) 227 | ); 228 | ## Remove ABS data download section heading from all_nodes 229 | ## Where ABS data download section titles that include links, are included 230 | ## in the node set, but are not conformant with publication information. 231 | ## The following code block, removes these entries. 232 | data_nodes <- lapply(all_nodes, 233 | function(x) { 234 | if (grepl(paste(c("(^\\W{0,1}$)", 235 | "(^data\\s*cubes\\W*$)", 236 | "(^time series spreadsheet\\W*$)"), 237 | collapse="|"), 238 | x[1], ignore.case=TRUE)) { 239 | NULL 240 | } else { 241 | x 242 | } 243 | }) 244 | data_nodes <- data_nodes[-which(sapply(data_nodes, is.null))]; 245 | ## Tidy and return data set names and urls 246 | nodes <- data_nodes[unlist(lapply(data_nodes, 247 | function(x) 248 | any(grepl(sprintf("(%s)", 249 | paste(types, collapse="|")), 250 | x, ignore.case=TRUE)) & 251 | any(grepl("ausstats", x, ignore.case=TRUE)) 252 | ))]; 253 | ## Remove non-breaking spaces ( ), and blank entries 254 | nodes <- lapply(nodes, 255 | function(x) { 256 | z <- trimws(gsub("\u00a0", "", x)); ## Remove non-breaking spaces 257 | z <- replace(z, z == "", NA_character_); ## Replace blank objects with NA 258 | ## Set entries not starting with 'https*' with 'NA_character_' 259 | z[-1] <- replace(z[-1], 260 | !grepl("^https*.+", z[-1], ignore.case=TRUE), 261 | NA_character_); 262 | ## Set entries containing 'INotes' with 'NA_character_' 263 | z <- replace(z, 264 | grepl("INotes", z, ignore.case=TRUE), 265 | NA_character_); 266 | z <- z[!is.na(z)]; ## Remove NA objects 267 | ## Set object names: First element = 'item_name' 268 | names(z)[1] <- "item_name"; 269 | names(z)[-1] <- case_when( 270 | ## !grepl("(^https*|^Releases|INotes)", z, ignore.case=TRUE) ~ "item_name", 271 | grepl("\\.xlsx*", z[-1], ignore.case=TRUE) ~ "path_xls", 272 | grepl("\\.zip", z[-1], ignore.case=TRUE) ~ "path_zip", 273 | grepl("\\.pdf", z[-1], ignore.case=TRUE) ~ "path_pdf", 274 | TRUE ~ NA_character_) 275 | z <- as.data.frame(t(cbind.data.frame(z, deparse.level=1)), 276 | stringsAsFactors=FALSE); 277 | return(z); 278 | }); 279 | ## Tidy nodes into data.frame (using dplyr::bind_rows) 280 | dt <- suppressWarnings(bind_rows(nodes)) 281 | ## Lastly replace spaces in all URL paths with '%20' string 282 | for(name in grep("^path_", names(dt), ignore.case=TRUE, value=TRUE)) # names(dt)[-1] 283 | dt[,name] <- gsub("\\s+", "%20", dt[,name]); 284 | return(dt); 285 | }); 286 | ## Add catalogue number and release information to table 287 | v <- lapply(seq_along(v), 288 | function(i) { 289 | v[[i]]$release <- sub("^$", "Latest", releases[i]); 290 | v[[i]]$cat_no <- cat_no; 291 | as.data.frame(v) 292 | }); 293 | ## Bind all results together 294 | z <- do.call(rbind, v); 295 | ## If rbind breaks on different row names try: 296 | ## z <- do.call(function(...) rbind(..., make.row.names=FALSE), v); 297 | ## names(z) <- c("item_name", ..., "cat_no", "release"); 298 | z <- if (!include_urls) { 299 | z[,c("cat_no", "release", "item_name")] 300 | } else { 301 | z[,c("cat_no", "release", "item_name", 302 | names(z)[!names(z) %in% c("cat_no", "release", "item_name")])] 303 | } 304 | row.names(z) <- seq_len(nrow(z)); 305 | return(z) 306 | } 307 | 308 | 309 | #' @name abs_cat_releases 310 | #' @title Return ABS catalogue table releases 311 | #' @description Return list of all releases available for specified ABS catalogue number. 312 | #' @importFrom rvest html_session html_table html_text html_nodes html_attr follow_link 313 | #' @importFrom httr http_error 314 | #' @param cat_no ABS catalogue numbers. 315 | #' @param include_urls Include full path URL to specified ABS catalogue releases. Default (FALSE) 316 | #' does not include release URLs. 317 | #' @return Returns a data frame listing available ABS catalogue releases. 318 | #' @export 319 | #' @author David Mitchell 320 | #' @examples 321 | #' \donttest{ 322 | #' ## List all available quarterly National Accounts tables 323 | #' ana_releases <- abs_cat_releases("5206.0"); 324 | #' ana_release_urls <- abs_cat_releases("5206.0", include_urls=TRUE); 325 | #' 326 | #' ## List latest available CPI Time Series Spreadsheet tables only 327 | #' cpi_releases <- abs_cat_releases("6401.0"); 328 | #' cpi_release_urls <- abs_cat_releases("6401.0", include_urls=TRUE); 329 | #' } 330 | abs_cat_releases <- function(cat_no, include_urls=FALSE) 331 | { 332 | ## if (FALSE) { 333 | ## ## -- DEBUGGING CODE -- 334 | ## cat_no <- "5206.0" 335 | ## include_urls <- FALSE 336 | ## } 337 | if (missing(cat_no)) 338 | stop("No cat_no supplied."); 339 | if (!is.logical(include_urls)) 340 | stop("include_urls must be either TRUE or FALSE"); 341 | ## Create ABS URL and open session 342 | url <- file.path(abs_urls()$base_url, abs_urls()$ausstats_path, abs_urls()$mf_path, cat_no); 343 | ## Check for HTTP errors 344 | raustats_check_url_available(url) 345 | ## if (http_error(url)) 346 | ## stop(sprintf("File cannot be downloaded. Check URL: %s", url)) 347 | suppressWarnings(s <- html_session(url)); 348 | ## Get path to 'Past & Future Releases' page 349 | .paths <- html_nodes(s, "a"); 350 | .paths <- .paths[grepl(abs_urls()$releases_regex, .paths)]; 351 | .paths <- html_attr(.paths, "href"); 352 | s <- jump_to(s, .paths) 353 | ## Get list of available ABS catalogue releases (See: https://devhints.io/xpath for Xpath hints) 354 | .tables <- html_nodes(s, "table"); 355 | .tables <- .tables[grepl("Past Releases", .tables, ignore.case=TRUE)]; 356 | .paths <- html_nodes(.tables, "a"); 357 | ## Return results 358 | if (!include_urls) { 359 | z <- data.frame(releases = html_text(.paths)) 360 | } else { 361 | z <- data.frame(releases = html_text(.paths), 362 | urls = file.path(abs_urls()$base_url, 363 | abs_urls()$ausstats_path, 364 | html_attr(.paths, "href"))) 365 | } 366 | row.names(z) <- seq_len(nrow(z)); 367 | return(z) 368 | } 369 | 370 | 371 | #' @name abs_cat_download 372 | #' @title Function to download files from the ABS website and store locally 373 | #' @description Downloads specified ABS catalogue data files from the ABS website, using a valid ABS 374 | #' data table URL. 375 | #' @importFrom httr GET http_type http_error progress status_code write_disk 376 | #' @param data_url Character vector specifying an ABS data URLs. 377 | #' @param exdir Target directory for downloaded files (defaults to \code{tempdir()}). Directory is 378 | #' created if it doesn't exist. 379 | #' @return Downloads data from the ABS website and returns a character vector listing the location 380 | #' where files are saved. 381 | #' @export 382 | #' @author David Mitchell 383 | abs_cat_download <- function(data_url, exdir=tempdir()) { 384 | if (!dir.exists(exdir)) dir.create(exdir); 385 | local_filenames <- 386 | sapply(data_url[!is.na(data_url)], 387 | function(url) { 388 | this_filename <- abs_local_filename(url); 389 | ## Check if any data_urls are not ABS data URLs 390 | if (!grepl("^https*:\\/\\/www\\.abs\\.gov\\.au\\/ausstats.+", 391 | url, ignore.case=TRUE)) 392 | stop(sprintf("Invalid ABS url: %s", url)); 393 | ## 394 | ## -- Download files -- 395 | cat(sprintf("Downloading: %s", this_filename)); 396 | ## Check for errors 397 | raustats_check_url_available(url) 398 | resp <- GET(url, write_disk(file.path(exdir, this_filename), overwrite=TRUE), 399 | raustats_ua(), progress()); 400 | ## ## File download validation code based on: 401 | ## ## https://cran.r-project.org/web/packages/httr/vignettes/api-packages.html 402 | ## if (http_error(resp)) { 403 | ## stop( 404 | ## sprintf( 405 | ## "ABS catalogue file request failed (Error code: %s)\nInvalid URL: %s", 406 | ## status_code(resp), 407 | ## url 408 | ## ), 409 | ## call. = FALSE 410 | ## ) 411 | ## } 412 | ## Check content-type is compliant 413 | if (!http_type(resp) %in% abs_filetypes()) { 414 | stop("ABS file request did not return Excel, Zip or PDF file", call. = FALSE) 415 | } 416 | return(file.path(exdir, this_filename)); 417 | }) 418 | ## local_filename <- abs_local_filename(data_url); 419 | ## ## Check if any data_urls are not ABS data URLs 420 | ## if (!grepl("^https*:\\/\\/www\\.abs\\.gov\\.au\\/ausstats.+", 421 | ## data_url, ignore.case=TRUE)) 422 | ## stop(sprintf("Invalid ABS url: %s", data_url)); 423 | ## ## 424 | ## ## -- Download files -- 425 | ## cat(sprintf("Downloading: %s", local_filename)); 426 | ## resp <- GET(data_url, write_disk(file.path(exdir, local_filename), overwrite=TRUE), 427 | ## raustats_ua(), progress()); 428 | ## ## File download validation code based on: 429 | ## ## https://cran.r-project.org/web/packages/httr/vignettes/api-packages.html 430 | ## if (http_error(resp)) { 431 | ## stop( 432 | ## sprintf( 433 | ## "ABS catalogue file request failed (Error code: %s)\nInvalid URL: %s", 434 | ## status_code(resp), 435 | ## data_url 436 | ## ), 437 | ## call. = FALSE 438 | ## ) 439 | ## } 440 | ## ## Check content-type is compliant 441 | ## if (!http_type(resp) %in% abs_filetypes()) { 442 | ## stop("ABS file request did not return Excel, Zip or PDF file", call. = FALSE) 443 | ## } 444 | ## Return results 445 | ## return(file.path(exdir, local_filename)); 446 | return(local_filenames); 447 | } 448 | 449 | 450 | #' @name abs_local_filename 451 | #' @title Create local file names for storing downloaded ABS data files 452 | #' @description Function to create local filename from web-based file name. 453 | #' @param url Character vector specifying one or more ABS data URLs. 454 | #' @return Returns a local file names (character vector) in which downloaded files will be saved. 455 | #' @author David Mitchell 456 | #' @keywords internal 457 | abs_local_filename <- function(url) 458 | { 459 | sprintf("%s_%s.%s", 460 | sub("^.+&(.+)\\.(zip|xlsx*|pdf)&.+$", "\\1", url), 461 | sub("^.+(\\d{2}).(\\d{2}).(\\d{4}).+$", "\\3\\2\\1", url), 462 | sub("^.+&(.+)\\.(zip|xlsx*|pdf)&.+$", "\\2", url)); 463 | } 464 | 465 | 466 | #' @name abs_cat_unzip 467 | #' @title Uncompress locally-stored ABS Catalogue data file archives 468 | #' @description Function to uncompress locally-stored ABS Catalogue data file archives. 469 | #' @importFrom utils unzip zip 470 | #' @param files One or more local zip files. 471 | #' @param exdir Target directory for extracted archive files. Directory is created if it doesn't 472 | #' exist. If missing, creates a new subdirectory in \code{tempdir()} using the respective zip 473 | #' files (specified in \code{files}. 474 | #' @return Returns a character vector listing the names of all files extracted. 475 | #' @export 476 | #' @author David Mitchell 477 | abs_cat_unzip <- function(files, exdir) { 478 | if (any(!file.exists(files))) 479 | stop(sprintf("Files %s do not exist", 480 | paste(files[!file.exists(files)], collapse=", "))); 481 | if (missing(exdir)) 482 | exdir <- tempdir(); 483 | ## Only extract from zip files 484 | files <- files[grepl("\\.zip$", files, ignore.case=TRUE)]; 485 | xl_files <- sapply(files, 486 | function(x) 487 | if (grepl("\\.zip$", x, ignore.case=TRUE)) { 488 | ## If exdir NOT missing, then use it 489 | if (exdir == tempdir()) { 490 | exdir <- file.path(exdir, sub("\\.zip", "", basename(x))); 491 | } else { 492 | ## Else, use tempdir() 493 | if (!dir.exists(exdir)) 494 | dir.create(exdir) 495 | } 496 | unzip(x, exdir=exdir); 497 | file.path(exdir, unzip(x, list=TRUE)$Name); 498 | } else { 499 | x; 500 | }); 501 | return(xl_files); 502 | } 503 | 504 | 505 | ### Function: abs_read_tss 506 | #' @name abs_read_tss 507 | #' @title Extract data from an ABS time series data file 508 | #' @description This function extracts time series data from ABS data files. 509 | #' @param files Names of one or more ABS data files 510 | #' @param type One of either 'tss' -- ABS Time Series Spreadsheet (the Default) or 'css' -- Data 511 | #' Cube.R 512 | #' @param na.rm logical. If \code{TRUE} (default), remove observations containing missing values. 513 | #' @return data frame in long format 514 | #' @export 515 | #' @author David Mitchell 516 | #' @examples 517 | #' \donttest{ 518 | #' ## Read specified ABS Excel time series files 519 | #' tables <- abs_cat_tables("5206.0", releases="Latest", include_urls=TRUE); 520 | #' downloaded_tables <- abs_cat_download(tables$path_zip, exdir=tempdir()) 521 | #' extracted_files <- abs_cat_unzip(downloaded_tables) 522 | #' x <- abs_read_tss(extracted_files); 523 | #' } 524 | abs_read_tss <- function(files, type="tss", na.rm=TRUE) { 525 | x <- lapply(files, 526 | function(file) 527 | abs_read_tss_(file, type=type, na.rm=na.rm)); 528 | z <- do.call(rbind, x); 529 | # rownames(z) <- seq_len(nrow(z)); 530 | return(z); 531 | } 532 | 533 | 534 | ### Function: abs_read_tss_ 535 | #' @name abs_read_tss 536 | #' @title Read ABS time series data file(s) 537 | #' @description This is the internal function that extracts time series data from ABS data files. 538 | #' @importFrom readxl read_excel excel_sheets 539 | #' @importFrom dplyr left_join 540 | #' @importFrom tidyr gather 541 | #' @importFrom stats complete.cases 542 | #' @param files Names of one or more ABS data files 543 | #' @param type One of either 'tss' -- ABS Time Series Spreadsheet (the Default) or 'css' -- Data 544 | #' Cube.R 545 | #' @param na.rm logical. If \code{TRUE} (default), remove observations containing missing values. 546 | #' @author David Mitchell 547 | #' @keywords internal 548 | abs_read_tss_ <- function(file, type="tss", na.rm=na.rm) { 549 | ## Avoid 'No visible binding for global variables' note 550 | { series_start <- series_end <- no_obs <- collection_month <- series_id <- value <- NULL } 551 | 552 | sheet_names <- tolower(excel_sheets(file)); 553 | if (!all(c("index", "data1") %in% sheet_names)) 554 | stop(sprintf("File: %s is not a valid ABS time series file.", basename(file))); 555 | ## -- Read metadata -- 556 | .meta <- read_excel(file, 557 | sheet = grep("index", excel_sheets(file), ignore.case=TRUE, value=TRUE), 558 | .name_repair = "minimal"); 559 | ## Return pre-header information from ABS files 560 | header_row <- which(sapply(1:nrow(.meta), 561 | function(i) 562 | grepl("series\\s*id", paste(.meta[i,], collapse=" "), 563 | ignore.case=TRUE))); 564 | metadata <- .meta; 565 | names(metadata) <- tolower(gsub("\\s","_", 566 | gsub("\\.", "", 567 | .meta[header_row,]))); ## Rename variables 568 | metadata <- metadata[-(1:header_row), !is.na(names(metadata))]; ## Drop header rows & empty columns 569 | metadata <- metadata[complete.cases(metadata),]; ## Drop NA rows 570 | metadata <- metadata[grepl("\\w\\d{4,7}\\w", metadata$series_id),]; ## Drop if Series ID invalid 571 | metadata <- transform(metadata, 572 | series_start = excel2Date(as.integer(series_start)), 573 | series_end = excel2Date(as.integer(series_end)), 574 | no_obs = as.integer(no_obs), 575 | collection_month = as.integer(collection_month)); 576 | ## 577 | ## Get publication details 578 | ## -- Catalogue number & name -- 579 | regex_catno_name <- "^.*(\\d{4}\\.\\d+(\\.\\d+)*)\\s+(.+)$"; 580 | catno_name <- sapply(1:header_row, 581 | function(i) 582 | grep(regex_catno_name, paste(.meta[i,], collapse=" "), 583 | ignore.case=TRUE, value=TRUE)); 584 | catno_name <- gsub("(\\s*NA)+", "", 585 | sub(regex_catno_name, "\\1|\\3", unlist(catno_name), ignore.case=TRUE)); 586 | catno_name <- trimws(unlist(strsplit(catno_name, split="\\|"))); 587 | ## 588 | ## -- Table number & name -- 589 | ## Note use of 'word' character \/here \/here for 13a, 6b, etc. 590 | regex_table_name <- "^.*Tables*\\s+(\\w+(\\s+\\w+\\s+\\w+)*)(\\.|:)*\\s+(.+)$"; 591 | ## Note use of alternative separators: .|: ^here 592 | tableno_name <- sapply(1:header_row, 593 | function(i) 594 | grep(regex_table_name, 595 | paste(.meta[i,], collapse=" "), 596 | ignore.case=TRUE, value=TRUE)); 597 | tableno_name <- gsub("(\\s*NA)+", "", 598 | sub(regex_table_name, "\\1|\\4", unlist(tableno_name), ignore.case=TRUE)); 599 | tableno_name <- trimws(unlist(strsplit(tableno_name, split="\\|"))); 600 | ## 601 | ## Add publication details to metadata table 602 | metadata <- transform(metadata, 603 | catalogue_no = catno_name[1], 604 | publication_title = catno_name[2], 605 | table_no = tableno_name[1], 606 | table_title = tableno_name[2]); 607 | ## Extract data 608 | data <- lapply(grep("data", excel_sheets(file), ignore.case=TRUE, value=TRUE), 609 | function(sheet_name) { 610 | z <- read_excel(file, sheet=sheet_name, .name_repair = "minimal"); 611 | ## Return pre-header information from ABS files 612 | header_row <- which(sapply(1:nrow(z), 613 | function(i) 614 | grepl("series\\s*id", paste(z[i,], collapse=" "), 615 | ignore.case=TRUE))); 616 | names(z) <- gsub("\\s","_", 617 | gsub("\\.","", z[header_row,])); ## Rename variables 618 | names(z) <- sub("series_id", "date", names(z), ## Rename Series_ID field 619 | ignore.case=TRUE); 620 | z <- z[-(1:header_row), !is.na(names(z))]; ## Drop empty columns 621 | z <- gather(z, series_id, value, -date, convert=TRUE); ## Transform data to key:value pairs 622 | z <- transform(z, 623 | date = excel2Date(as.integer(date)), 624 | value = as.numeric(value)); 625 | names(z) <- tolower(names(z)); 626 | return(z); 627 | }); 628 | data <- do.call(rbind, data); 629 | data <- left_join(data, metadata, by="series_id"); 630 | if (na.rm) 631 | data <- data[complete.cases(data),] 632 | names(data) <- tolower(names(data)); 633 | return(data); 634 | } 635 | -------------------------------------------------------------------------------- /R/abs-series-abbreviation.R: -------------------------------------------------------------------------------- 1 | ## ========================================================================= 2 | ## Filename: 3 | ## Created: 4 | ## Updated: <2019-06-19 10:46:32 david at grover> 5 | ## Author: 6 | ## Description: 7 | ## 8 | ## 9 | ## ========================================================================= 10 | 11 | #' 12 | #' Table - ABS ANA Series IDs and series names 13 | #' 14 | #' ABS Series ID | Series abb | Series name 15 | #' :---------------|:--------------|:-------------------------------------- 16 | #' A2304402X | gdp_cv_sa | GDP Chain Volume measures: Seasonally Adjusted 17 | #' A2304340C | gdp_cv_tr | " : Trend 18 | #' A2302459A | gdp_cv_or | " : Original 19 | #' A2304408L | gdi_cv_sa | Gross Domestic Income, Chain Volume measures: Seasonally Adjusted 20 | #' A2304342J | gdi_cv_tr | " : Trend 21 | #' A2302463T | gdi_cv_or | " : Original 22 | #' A2304412C | gni_cv_sa | GNI Chain Volume measures: Seasonally Adjusted 23 | #' A2304344L | gni_cv_tr | " : Trend 24 | #' A2302464V | gni_cv_or | " : Original 25 | #' A2304414J | nndi_cv_sa | NNDI Chain Volume measures: Seasonally Adjusted 26 | #' A2304346T | nndi_cv_tr | " : Trend 27 | #' A2302465W | nndi_cv_or | " : Original 28 | #' A2304404C | gdppc_cv_sa | GDP per capita Chain volume measures: Seasonally Adjusted 29 | #' A2304336L | gdppc_cv_tr | " : Trend 30 | #' A2302459A | gdppc_cv_or | " : Original 31 | #' A2304113C | gne_cv_sa | GNE Chain Volume measures: Seasonally Adjusted 32 | #' A2304237F | gne_cv_tr | " : Trend 33 | #' A2302514F | gne_cv_or | " : Original 34 | #' A2304111X | dfd_cv_sa | DFD Chain Volume measures: Seasonally Adjusted 35 | #' A2304235A | dfd_cv_tr | " : Trend 36 | #' A2302519T | dfd_cv_or | " : Original 37 | #' A2304114F | exp_cv_sa | Exports Chain Volume measures: Seasonally Adjusted 38 | #' A2304238J | exp_cv_tr | " : Trend 39 | #' A2302520A | exp_cv_or | " : Original 40 | #' A2304115J | imp_cv_sa | Imports Chain Volume measures: Seasonally Adjusted 41 | #' A2304239K | imp_cv_sa | " : Trend 42 | #' A2302521C | imp_cv_sa | " : Original 43 | #' 44 | #' Notes 45 | #' GDI = GDP - ToT effects (GDI - Gross Domestic Income) 46 | #' NNDI = ?? (NNDI - Net National Disposable Income) 47 | #' DFD = GNE - Inventory change (DFD - Domestic Final Demand) 48 | #' 49 | #' 50 | 51 | #### Add human-readable series model names (abbreviations) 52 | ## -- TO DO - INCLUDE IN ABS data package GENERAL FUNCTIONS 53 | 54 | ana_series_abb <- function(x) { 55 | x %>% 56 | ## Table abbreviations 57 | mutate(series_abb = 58 | case_when(grepl("^key national accounts aggregates", table_title, ignore.case=TRUE) 59 | ~ "ana", 60 | grepl("^expenditure.+GDP", table_title, ignore.case=TRUE) 61 | ~ "gdpe", 62 | grepl("^income from.+GDP", table_title, ignore.case=TRUE) 63 | ~ "gdpi", 64 | grepl("^gross value added.+industry", table_title, ignore.case=TRUE) 65 | ~ "gva", 66 | grepl("^gross value added.+industry.+current price", table_title, ignore.case=TRUE) 67 | ~ "gvacp", 68 | grepl("^household.*final.*consumption.*expenditure", table_title, ignore.case=TRUE) 69 | ~ "hfce", 70 | TRUE ~ ""), 71 | ## Series abbreviations 72 | series_abb = 73 | paste0(series_abb, 74 | case_when(grepl("^gross domestic product", data_item_description, ignore.case=TRUE) 75 | ~ "_gdp", 76 | grepl("^gdp", data_item_description, ignore.case=TRUE) 77 | ~ "_gdp", 78 | grepl("^gross value added", data_item_description, ignore.case=TRUE) 79 | ~ "_gva", 80 | grepl("^net domestic product", data_item_description, ignore.case=TRUE) 81 | ~ "_ndp", 82 | grepl("^net domestic product", data_item_description, ignore.case=TRUE) 83 | ~ "_ndp", 84 | grepl("gross domestic income", data_item_description, ignore.case=TRUE) 85 | ~ "_gdi", 86 | grepl("gross national income", data_item_description, ignore.case=TRUE) 87 | ~ "_gni", 88 | grepl("net national disposable income", data_item_description, ignore.case=TRUE) 89 | ~ "_ndi", 90 | grepl("terms of trade", data_item_description, ignore.case=TRUE) 91 | ~ "_tot", 92 | grepl("gross national expenditure", data_item_description, ignore.case=TRUE) 93 | ~ "_gne", 94 | grepl("exports of goods and services", data_item_description, ignore.case=TRUE) 95 | ~ "_exp", 96 | grepl("imports of goods and services", data_item_description, ignore.case=TRUE) 97 | ~ "_imp", 98 | grepl("domestic final demand", data_item_description, ignore.case=TRUE) 99 | ~ "_dfd", 100 | grepl("change.+inventories", data_item_description, ignore.case=TRUE) 101 | ~ "_chinv", 102 | grepl("final consumption expenditure", data_item_description, ignore.case=TRUE) 103 | ~ "_fce", 104 | grepl("gross fixed capital formation", data_item_description, ignore.case=TRUE) 105 | ~ "_gfcf", 106 | grepl("state final demand", data_item_description, ignore.case=TRUE) 107 | ~ "_sfd", 108 | grepl("hours worked market sector", data_item_description, ignore.case=TRUE) 109 | ~ "_hrsmk", 110 | grepl("hours worked", data_item_description, ignore.case=TRUE) 111 | ~ "_hrstl", 112 | grepl("real unit.*labour cost.*non.*farm", data_item_description, ignore.case=TRUE) 113 | ~ "_rulcnf", 114 | grepl("real unit.*labour cost.*", data_item_description, ignore.case=TRUE) 115 | ~ "_rulc", 116 | grepl("household saving ratio", data_item_description, ignore.case=TRUE) 117 | ~ "_hsr", 118 | grepl("net saving", data_item_description, ignore.case=TRUE) 119 | ~ "_netsav", 120 | grepl("statistical discrepancy", data_item_description, ignore.case=TRUE) 121 | ~ "_statdis", 122 | ## Industry gross value added 123 | ## grepl(sprintf("\\(%s\\)\\s*;", paste(letters, collapse="|")), 124 | ## data_item_description, ignore.case=TRUE) 125 | ## ~ sub(sprintf(".+\\((%s)\\)\\s*;.*", paste(letters, collapse="|")), 126 | ## tolower("_div\\1"), data_item_description, ignore.case=TRUE), 127 | ## Division A - Agriculture, forestry & fishing 128 | grepl("\\(a\\).+Agriculture", data_item_description, ignore.case=TRUE) 129 | ~ "_diva_ag", 130 | grepl("\\(a\\).+Forestry.*fishing", data_item_description, ignore.case=TRUE) 131 | ~ "_diva_ff", 132 | grepl("\\(a\\).+;$", data_item_description, ignore.case=TRUE) 133 | ~ "_diva_tot", 134 | ## Division B - Mining 135 | grepl("\\(b\\).+coal.*mining", data_item_description, ignore.case=TRUE) 136 | ~ "_divb_cl", 137 | grepl("\\(b\\).+oil.*gas", data_item_description, ignore.case=TRUE) 138 | ~ "_divb_og", 139 | grepl("\\(b\\).+iron.*ore", data_item_description, ignore.case=TRUE) 140 | ~ "_divb_fe", 141 | grepl("\\(b\\).+other.*mining", data_item_description, ignore.case=TRUE) 142 | ~ "_divb_ot", 143 | grepl("\\(b\\).+mining.*excluding.*exploration", data_item_description, 144 | ignore.case=TRUE) 145 | ~ "_divb_mn", 146 | grepl("\\(b\\).+exploration.*support", data_item_description, 147 | ignore.case=TRUE) 148 | ~ "_divb_es", 149 | grepl("\\(b\\).+;$", data_item_description, ignore.case=TRUE) 150 | ~ "_divb_tot", 151 | ## Division C - Manufacturing 152 | grepl("\\(c\\).+food.*beverage", data_item_description, ignore.case=TRUE) 153 | ~ "_divc_fb", 154 | grepl("\\(c\\).+petroleum.*coal", data_item_description, ignore.case=TRUE) 155 | ~ "_divc_pc", 156 | grepl("\\(c\\).+metal.*products", data_item_description, ignore.case=TRUE) 157 | ~ "_divc_mt", 158 | grepl("\\(c\\).+machinery.*equipment", data_item_description, ignore.case=TRUE) 159 | ~ "_divc_mc", 160 | grepl("\\(c\\).+other.*manufacturing", data_item_description, 161 | ignore.case=TRUE) 162 | ~ "_divc_ot", 163 | grepl("\\(c\\).+;$", data_item_description, ignore.case=TRUE) 164 | ~ "_divc_tot", 165 | ## Division D - Utilities 166 | grepl("\\(d\\).+electricity", data_item_description, ignore.case=TRUE) 167 | ~ "_divd_el", 168 | grepl("\\(d\\).+gas", data_item_description, ignore.case=TRUE) 169 | ~ "_divd_gs", 170 | grepl("\\(d\\).+water.*supply", data_item_description, ignore.case=TRUE) 171 | ~ "_divd_wt", 172 | grepl("\\(d\\).+;$", data_item_description, ignore.case=TRUE) 173 | ~ "_divd_tot", 174 | ## Division E - Construction 175 | grepl("\\(e\\).+building.*construction", data_item_description, ignore.case=TRUE) 176 | ~ "_dive_bc", 177 | grepl("\\(e\\).+civil.*engineering", data_item_description, ignore.case=TRUE) 178 | ~ "_dive_ce", 179 | grepl("\\(e\\).+construction.*services", data_item_description, ignore.case=TRUE) 180 | ~ "_dive_cs", 181 | grepl("\\(e\\).+;$", data_item_description, ignore.case=TRUE) 182 | ~ "_dive_tot", 183 | ## Division F - Wholesale trade 184 | grepl("\\(f\\).+;$", data_item_description, ignore.case=TRUE) 185 | ~ "_divf_tot", 186 | ## Division G - Retail trade 187 | grepl("\\(g\\).+;$", data_item_description, ignore.case=TRUE) 188 | ~ "_divg_tot", 189 | ## Division H - Accommodation & food services 190 | grepl("\\(h\\).+;$", data_item_description, ignore.case=TRUE) 191 | ~ "_divh_tot", 192 | ## Division I - Transport 193 | grepl("\\(i\\).+road", data_item_description, ignore.case=TRUE) 194 | ~ "_divi_rd", 195 | grepl("\\(i\\).+air.*space", data_item_description, ignore.case=TRUE) 196 | ~ "_divi_as", 197 | grepl("\\(i\\).+rail.*pipeline", data_item_description, ignore.case=TRUE) 198 | ~ "_divi_rl", 199 | grepl("\\(i\\).+postal.*storage", data_item_description, ignore.case=TRUE) 200 | ~ "_divi_ps", 201 | grepl("\\(i\\).+;$", data_item_description, ignore.case=TRUE) 202 | ~ "_divi_tot", 203 | ## Division J - Telecommunications 204 | grepl("\\(j\\).+telecommunications", data_item_description, ignore.case=TRUE) 205 | ~ "_divj_tl", 206 | grepl("\\(j\\).+other.*information", data_item_description, ignore.case=TRUE) 207 | ~ "_divj_ot", 208 | grepl("\\(j\\).+;$", data_item_description, ignore.case=TRUE) 209 | ~ "_divj_tot", 210 | ## Division K - Finance & insurance 211 | grepl("\\(k\\).+finance", data_item_description, ignore.case=TRUE) 212 | ~ "_divk_fn", 213 | grepl("\\(k\\).+other.*financial", data_item_description, ignore.case=TRUE) 214 | ~ "_divk_ot", 215 | grepl("\\(k\\).+;$", data_item_description, ignore.case=TRUE) 216 | ~ "_divk_tot", 217 | ## Division L - Rental, hiring & real estate 218 | grepl("\\(l\\).+rental.*hiring", data_item_description, ignore.case=TRUE) 219 | ~ "_divl_rh", 220 | grepl("\\(l\\).+real.*estate", data_item_description, ignore.case=TRUE) 221 | ~ "_divl_re", 222 | grepl("\\(l\\).+;$", data_item_description, ignore.case=TRUE) 223 | ~ "_divl_tot", 224 | ## Division M - Professional and scientific services 225 | grepl("\\(m\\).+computer.*system", data_item_description, ignore.case=TRUE) 226 | ~ "_divm_cs", 227 | grepl("\\(m\\).+other.*professional", data_item_description, ignore.case=TRUE) 228 | ~ "_divm_op", 229 | grepl("\\(m\\).+;$", data_item_description, ignore.case=TRUE) 230 | ~ "_divm_tot", 231 | ## Division N - Administrative & support services 232 | grepl("\\(n\\).+;$", data_item_description, ignore.case=TRUE) 233 | ~ "_divn_tot", 234 | ## Division O - Public administration & safety 235 | grepl("\\(o\\).+;$", data_item_description, ignore.case=TRUE) 236 | ~ "_divo_tot", 237 | ## Division P - Education and training 238 | grepl("\\(p\\).+;$", data_item_description, ignore.case=TRUE) 239 | ~ "_divp_tot", 240 | ## Division Q - Health care & social assistance 241 | grepl("\\(q\\).+;$", data_item_description, ignore.case=TRUE) 242 | ~ "_divq_tot", 243 | ## Division R - Arts and recreation services 244 | grepl("\\(r\\).+;$", data_item_description, ignore.case=TRUE) 245 | ~ "_divr_tot", 246 | ## Division S - Other services 247 | grepl("\\(s\\).+;$", data_item_description, ignore.case=TRUE) 248 | ~ "_divs_tot", 249 | ## Ownership of dwellings 250 | grepl("ownership.*dwellings", data_item_description, ignore.case=TRUE) 251 | ~ "_dwell", 252 | grepl("taxes less subsidies", data_item_description, ignore.case=TRUE) 253 | ~ "_nettax", 254 | grepl("gross value added at basi prices taxes less", data_item_description, 255 | ignore.case=TRUE) 256 | ~ "_nettax", 257 | ## Household Final Consumption Expenditure items 258 | grepl("Food", data_item_description, ignore.case=TRUE) 259 | ~ "_01_food", 260 | grepl("Alcoholic.*beverage.*cigarettes.*tobacco", data_item_description, 261 | ignore.case=TRUE) 262 | ~ "_02_albt", 263 | grepl("Cigarettes.*tobacco", data_item_description, ignore.case=TRUE) 264 | ~ "_02a_tabc", 265 | grepl("Alcoholic.*beverage", data_item_description, ignore.case=TRUE) 266 | ~ "_02b_abev", 267 | grepl("Clothing.*footwear", data_item_description, ignore.case=TRUE) 268 | ~ "_03_clft", 269 | grepl("Housing.*water.*electricity.*gas", data_item_description, ignore.case=TRUE) 270 | ~ "_04_hhsv", 271 | grepl("Rent.*other.*dwelling.*services", data_item_description, ignore.case=TRUE) 272 | ~ "_04a_rnts", 273 | grepl("Actual.*imputed.*rent", data_item_description, ignore.case=TRUE) 274 | ~ "_04b_rent", 275 | grepl("Electricity.*gas.*other.*fuel", data_item_description, ignore.case=TRUE) 276 | ~ "_04c_util", 277 | grepl("Water.*sewerage.*charges", data_item_description, ignore.case=TRUE) 278 | ~ "_04d_watr", 279 | grepl("Furnishings.*household.*equip", data_item_description, ignore.case=TRUE) 280 | ~ "_05_furn", 281 | grepl("Furniture.*floor.*coverings", data_item_description, ignore.case=TRUE) 282 | ~ "_05a_furn", 283 | grepl("Household.*appliances", data_item_description, ignore.case=TRUE) 284 | ~ "_05b_appl", 285 | grepl("Household.*tools", data_item_description, ignore.case=TRUE) 286 | ~ "_05c_tool", 287 | grepl("Health", data_item_description, ignore.case=TRUE) 288 | ~ "_06_hlth", 289 | grepl("Medicines", data_item_description, ignore.case=TRUE) 290 | ~ "_06a_hlth", 291 | grepl("Total.*health.*services", data_item_description, ignore.case=TRUE) 292 | ~ "_06b_hlth", 293 | grepl("Purchase.*vehicles", data_item_description, ignore.case=TRUE) 294 | ~ "_07a_vcpx", 295 | grepl("Operation.*vehicles", data_item_description, ignore.case=TRUE) 296 | ~ "_07b_vopx", 297 | grepl("Transport.*services", data_item_description, ignore.case=TRUE) 298 | ~ "_07c_tran", 299 | grepl("Transport", data_item_description, ignore.case=TRUE) 300 | ~ "_07_tran", 301 | grepl("Communications", data_item_description, ignore.case=TRUE) 302 | ~ "_08_comm", 303 | grepl("Goods.*for.*recreation.*culture", data_item_description, ignore.case=TRUE) 304 | ~ "_09a_recg", 305 | grepl("^Recreational.*cultural.*services", data_item_description, 306 | ignore.case=TRUE) 307 | ~ "_09b_recs", 308 | grepl("Sporting.*recreational.*cultural.*services", data_item_description, 309 | ignore.case=TRUE) 310 | ~ "_09c_sprt", 311 | grepl("Net.*losses.*gambling", data_item_description, ignore.case=TRUE) 312 | ~ "_09d_gamb", 313 | grepl("Newspapers.*books.*stationery", data_item_description, ignore.case=TRUE) 314 | ~ "_09e_news", 315 | grepl("Recreation.*culture", data_item_description, ignore.case=TRUE) 316 | ~ "_09_recc", 317 | grepl("Education.*services", data_item_description, ignore.case=TRUE) 318 | ~ "_10_educ", 319 | grepl("Hotels.*cafes.*restaurants", data_item_description, ignore.case=TRUE) 320 | ~ "_11_acrs", 321 | grepl("Catering.*services", data_item_description, ignore.case=TRUE) 322 | ~ "_11a_cats", 323 | grepl("Accommodation.*services", data_item_description, ignore.case=TRUE) 324 | ~ "_11b_accs", 325 | grepl("Miscellaneous.*goods.*services", data_item_description, ignore.case=TRUE) 326 | ~ "_12_misc", 327 | grepl("Other.*goods", data_item_description, ignore.case=TRUE) 328 | ~ "_12a_othg", 329 | grepl("Insurance.*financial.*services", data_item_description, ignore.case=TRUE) 330 | ~ "_12b_fins", 331 | grepl("Other.*services", data_item_description, ignore.case=TRUE) 332 | ~ "_12c_oths", 333 | grepl("Net.*expenditure.*overseas", data_item_description, ignore.case=TRUE) 334 | ~ "_neo", 335 | grepl("Final.*consumption.*expenditure", data_item_description, ignore.case=TRUE) 336 | ~ "_totc", 337 | TRUE ~ "")), 338 | ## 339 | ## Per capita/hour worked series 340 | series_abb = paste0(series_abb, 341 | case_when(grepl("per capita", data_item_description, ignore.case=TRUE) 342 | ~ "pc", 343 | grepl("per hour", data_item_description, ignore.case=TRUE) 344 | ~ "ph", 345 | TRUE ~ "")), 346 | ## 347 | ## Households, government, private, public 348 | series_abb = paste0(series_abb, 349 | case_when(grepl("general government", data_item_description, ignore.case=TRUE) 350 | ~ "_gov", 351 | TRUE ~ "")), 352 | ## -- General government options 353 | series_abb = paste0(series_abb, 354 | case_when(grepl("general government", 355 | data_item_description, ignore.case=TRUE) ~ "_gov", 356 | grepl("general government.+national", 357 | data_item_description, ignore.case=TRUE) ~ "_nat", 358 | grepl("general government.+national.+non-defence", 359 | data_item_description, ignore.case=TRUE) ~ "_ndf", 360 | grepl("general government.+national.+defence", 361 | data_item_description, ignore.case=TRUE) ~ "_def", 362 | grepl("general government.+state and local", 363 | data_item_description, ignore.case=TRUE) ~ "_stl", 364 | grepl("households", 365 | data_item_description, ignore.case=TRUE) ~ "_hhld", 366 | grepl("all sectors", 367 | data_item_description, ignore.case=TRUE) ~ "_tot", 368 | grepl("private", 369 | data_item_description, ignore.case=TRUE) ~ "_priv", 370 | grepl("public", data_item_description, ignore.case=TRUE) 371 | ~ "_pub", 372 | TRUE ~ "")), 373 | ## 374 | ## States/territories 375 | series_abb = paste0(series_abb, 376 | case_when(grepl("new south wales", 377 | data_item_description, ignore.case=TRUE) 378 | ~ "_nsw", 379 | grepl("victoria", data_item_description, ignore.case=TRUE) 380 | ~ "_vic", 381 | grepl("queensland", data_item_description, ignore.case=TRUE) 382 | ~ "_qld", 383 | grepl("south australia", data_item_description, ignore.case=TRUE) 384 | ~ "_sa", 385 | grepl("western australia", data_item_description, ignore.case=TRUE) 386 | ~ "_wa", 387 | grepl("tasmania", data_item_description, ignore.case=TRUE) 388 | ~ "_tas", 389 | grepl("northern territory", data_item_description, ignore.case=TRUE) 390 | ~ "_nt", 391 | grepl("australian capital territory", data_item_description, ignore.case=TRUE) 392 | ~ "_act", 393 | TRUE ~ "")), 394 | ## 395 | ## Chain volume/current prices 396 | series_abb = paste0(series_abb, 397 | case_when(grepl("chain volume measures", data_item_description, ignore.case=TRUE) | 398 | grepl("chain volume measures", table_title, ignore.case=TRUE) 399 | ~ "_cv", 400 | grepl("current prices", data_item_description, ignore.case=TRUE) | 401 | grepl("current prices", table_title, ignore.case=TRUE) 402 | ~ "_cp", 403 | grepl("price indexes", data_item_description, ignore.case=TRUE) | 404 | grepl("price indexes", table_title, ignore.case=TRUE) 405 | ~ "_ix", 406 | grepl("implicit price deflators", data_item_description, ignore.case=TRUE) | 407 | grepl("implicit price deflators", table_title, ignore.case=TRUE) 408 | ~ "_pd", 409 | TRUE ~ "")), 410 | ## 411 | ## Original/seasonally adjusted/trend/index 412 | series_abb = paste0(series_abb, 413 | case_when(grepl("original", series_type, ignore.case=TRUE) 414 | ~ "_or", 415 | grepl("seasonally adjusted", series_type, ignore.case=TRUE) 416 | ~ "_sa", 417 | grepl("trend", series_type, ignore.case=TRUE) 418 | ~ "_tr", 419 | TRUE ~ "")), 420 | ## 421 | ## Percentage change/ratio/index 422 | series_abb = paste0(series_abb, 423 | case_when(grepl("percent", unit, ignore.case=TRUE) 424 | ~ "_pc", 425 | grepl("\\$.*(million)*", unit, ignore.case=TRUE) 426 | ~ "_aud", 427 | grepl("index", unit, ignore.case=TRUE) 428 | ~ "_ix", 429 | grepl("proportion", unit, ignore.case=TRUE) 430 | ~ "_rt", 431 | TRUE ~ "")) 432 | ); 433 | } 434 | 435 | ## ana_series_abb <- function(x) { 436 | ## x %>% 437 | ## ## Series abbreviations 438 | ## mutate(series_abb = ifelse(grepl("^gross domestic product", data_item_description, ignore.case=TRUE), 439 | ## "gdp", "")) %>% 440 | ## mutate(series_abb = ifelse(grepl("^gdp", data_item_description, ignore.case=TRUE), 441 | ## "gdp", series_abb)) %>% 442 | ## mutate(series_abb = ifelse(grepl("^gross value added", data_item_description, ignore.case=TRUE), 443 | ## "gva", series_abb)) %>% 444 | ## mutate(series_abb = ifelse(grepl("^net domestic product", data_item_description, ignore.case=TRUE), 445 | ## "ndp", series_abb)) %>% 446 | ## mutate(series_abb = ifelse(grepl("^net domestic product", data_item_description, ignore.case=TRUE), 447 | ## "ndp", series_abb)) %>% 448 | ## mutate(series_abb = ifelse(grepl("gross domestic income", data_item_description, ignore.case=TRUE), 449 | ## "gdi", series_abb)) %>% 450 | ## mutate(series_abb = ifelse(grepl("gross national income", data_item_description, ignore.case=TRUE), 451 | ## "gni", series_abb)) %>% 452 | ## mutate(series_abb = ifelse(grepl("net national disposable income", data_item_description, ignore.case=TRUE), 453 | ## "ndi", series_abb)) %>% 454 | ## mutate(series_abb = ifelse(grepl("terms of trade", data_item_description, ignore.case=TRUE), 455 | ## "tot", series_abb)) %>% 456 | ## mutate(series_abb = ifelse(grepl("gross national expenditure", data_item_description, ignore.case=TRUE), 457 | ## "gne", series_abb)) %>% 458 | ## mutate(series_abb = ifelse(grepl("exports of goods and services", data_item_description, ignore.case=TRUE), 459 | ## "exp", series_abb)) %>% 460 | ## mutate(series_abb = ifelse(grepl("imports of goods and services", data_item_description, ignore.case=TRUE), 461 | ## "imp", series_abb)) %>% 462 | ## mutate(series_abb = ifelse(grepl("final consumption expenditure", data_item_description, ignore.case=TRUE), 463 | ## "fce", series_abb)) %>% 464 | ## mutate(series_abb = ifelse(grepl("gross fixed capital formation", data_item_description, ignore.case=TRUE), 465 | ## "gfcf", series_abb)) %>% 466 | ## mutate(series_abb = ifelse(grepl("state final demand", data_item_description, ignore.case=TRUE), 467 | ## "sfd", series_abb)) %>% 468 | ## ## 469 | ## ## Per capita series 470 | ## mutate(series_abb = ifelse(grepl("per capita", data_item_description, ignore.case=TRUE), 471 | ## paste0(series_abb, "pc"), series_abb)) %>% 472 | ## ## 473 | ## ## Households, government, private, public 474 | ## mutate(series_abb = ifelse(grepl("general government", data_item_description, ignore.case=TRUE), 475 | ## paste0(series_abb, "_gov"), series_abb)) %>% 476 | ## ## -- General government options 477 | ## mutate(series_abb = ifelse(grepl("general government.+national", 478 | ## data_item_description, ignore.case=TRUE), 479 | ## paste0(series_abb, "_nat"), series_abb)) %>% 480 | ## mutate(series_abb = ifelse(grepl("general government.+national.+defence", 481 | ## data_item_description, ignore.case=TRUE), 482 | ## paste0(series_abb, "_def"), series_abb)) %>% 483 | ## mutate(series_abb = ifelse(grepl("general government.+national.+non-defence", 484 | ## data_item_description, ignore.case=TRUE), 485 | ## sub("_def", "_ndf", series_abb), series_abb)) %>% 486 | ## mutate(series_abb = ifelse(grepl("general government.+state and local", 487 | ## data_item_description, ignore.case=TRUE), 488 | ## paste0(series_abb, "_stl"), series_abb)) %>% 489 | ## mutate(series_abb = ifelse(grepl("households", data_item_description, ignore.case=TRUE), 490 | ## paste0(series_abb, "_hhld"), series_abb)) %>% 491 | ## mutate(series_abb = ifelse(grepl("all sectors", data_item_description, ignore.case=TRUE), 492 | ## paste0(series_abb, "_tot"), series_abb)) %>% 493 | ## mutate(series_abb = ifelse(grepl("private", data_item_description, ignore.case=TRUE), 494 | ## paste0(series_abb, "_priv"), series_abb)) %>% 495 | ## mutate(series_abb = ifelse(grepl("public", data_item_description, ignore.case=TRUE), 496 | ## paste0(series_abb, "_pub"), series_abb)) %>% 497 | ## ## 498 | ## ## States/territories 499 | ## mutate(series_abb = ifelse(grepl("new south wales", data_item_description, ignore.case=TRUE), 500 | ## paste0(series_abb, "_nsw"), series_abb)) %>% 501 | ## mutate(series_abb = ifelse(grepl("victoria", data_item_description, ignore.case=TRUE), 502 | ## paste0(series_abb, "_vic"), series_abb)) %>% 503 | ## mutate(series_abb = ifelse(grepl("queensland", data_item_description, ignore.case=TRUE), 504 | ## paste0(series_abb, "_qld"), series_abb)) %>% 505 | ## mutate(series_abb = ifelse(grepl("south australia", data_item_description, ignore.case=TRUE), 506 | ## paste0(series_abb, "_sa"), series_abb)) %>% 507 | ## mutate(series_abb = ifelse(grepl("western australia", data_item_description, ignore.case=TRUE), 508 | ## paste0(series_abb, "_wa"), series_abb)) %>% 509 | ## mutate(series_abb = ifelse(grepl("tasmania", data_item_description, ignore.case=TRUE), 510 | ## paste0(series_abb, "_tas"), series_abb)) %>% 511 | ## mutate(series_abb = ifelse(grepl("northern territory", data_item_description, ignore.case=TRUE), 512 | ## paste0(series_abb, "_nt"), series_abb)) %>% 513 | ## mutate(series_abb = ifelse(grepl("australian capital territory", data_item_description, ignore.case=TRUE), 514 | ## paste0(series_abb, "_act"), series_abb)) %>% 515 | ## ## 516 | ## ## Chain volume/current prices 517 | ## mutate(series_abb = ifelse(grepl("chain volume measures", data_item_description, ignore.case=TRUE) | 518 | ## grepl("chain volume measures", table_title, ignore.case=TRUE), 519 | ## paste0(series_abb, "_cv"), series_abb)) %>% 520 | ## mutate(series_abb = ifelse(grepl("current prices", data_item_description, ignore.case=TRUE) | 521 | ## grepl("current prices", table_title, ignore.case=TRUE), 522 | ## paste0(series_abb, "_cp"), series_abb)) %>% 523 | ## ## 524 | ## ## Original/seasonally adjusted/trend/index 525 | ## mutate(series_abb = ifelse(grepl("original", series_type, ignore.case=TRUE), 526 | ## paste0(series_abb, "_or"), series_abb)) %>% 527 | ## mutate(series_abb = ifelse(grepl("seasonally adjusted", series_type, ignore.case=TRUE), 528 | ## paste0(series_abb, "_sa"), series_abb)) %>% 529 | ## mutate(series_abb = ifelse(grepl("trend", series_type, ignore.case=TRUE), 530 | ## paste0(series_abb, "_tr"), series_abb)) %>% 531 | ## ## 532 | ## ## Percentage change/ratio/index 533 | ## mutate(series_abb = ifelse(grepl("percent", unit, ignore.case=TRUE), 534 | ## paste0(series_abb, "_pc"), series_abb)) %>% 535 | ## mutate(series_abb = ifelse(grepl("\\$.*million", unit, ignore.case=TRUE), 536 | ## paste0(series_abb, "_aud"), series_abb)) %>% 537 | ## mutate(series_abb = ifelse(grepl("index", unit, ignore.case=TRUE), 538 | ## paste0(series_abb, "_ix"), series_abb)) %>% 539 | ## mutate(series_abb = ifelse(grepl("proportion", unit, ignore.case=TRUE), 540 | ## paste0(series_abb, "_rt"), series_abb)) 541 | ## } 542 | 543 | 544 | 545 | 546 | ## ppi_series_abb <- function(x) { 547 | ## x %>% 548 | ## ## Publication abbreviations 549 | ## mutate(series_abb = ifelse(grepl("^producer price indexes", publication_title, ignore.case=TRUE), 550 | ## "ppi", "")) %>% 551 | ## ## Sector abbreviations 552 | ## mutate(series_abb = ifelse(grepl("transport.+warehousing", table_title, ignore.case=TRUE), 553 | ## paste0(series_abb, "_tr"), series_abb)) %>% 554 | ## ## Series abbreviations 555 | ## mutate(series_abb = 556 | ## paste0(series_abb, 557 | ## case_when(grepl("road freight", data_item_description, ignore.case=TRUE) ~ "_rdfrt", 558 | ## grepl("urban bus", data_item_description, ignore.case=TRUE) ~ "_ubus", 559 | ## grepl("taxi", data_item_description, ignore.case=TRUE) ~ "_taxi", 560 | ## grepl("rail freight", data_item_description, ignore.case=TRUE) ~ "_rlfrt", 561 | ## grepl("water freight", data_item_description, ignore.case=TRUE) ~ "_wtfrt", 562 | ## grepl("pipeline", data_item_description, ignore.case=TRUE) ~ "_pipe", 563 | ## grepl("postal and courier", data_item_description, ignore.case=TRUE) ~ "_pstl", 564 | ## grepl("courier pick-up", data_item_description, ignore.case=TRUE) ~ "_cour", 565 | ## grepl("water transport support", data_item_description, ignore.case=TRUE) ~ "_wtspt", 566 | ## grepl("stevedoring", data_item_description, ignore.case=TRUE) ~ "_wtstv", 567 | ## grepl("port and water transport", data_item_description, ignore.case=TRUE) ~ "_wtprt", 568 | ## grepl("other water", data_item_description, ignore.case=TRUE) ~ "_wtoth", 569 | ## grepl("airport operations", data_item_description, ignore.case=TRUE) ~ "_arprt", 570 | ## grepl("customs agency", data_item_description, ignore.case=TRUE) ~ "_svcust", 571 | ## grepl("warehousing and storage", data_item_description, ignore.case=TRUE) ~ "_whgen", 572 | ## grepl("grain storage", data_item_description, ignore.case=TRUE) ~ "_whgrn", 573 | ## grepl("other warehousing", data_item_description, ignore.case=TRUE) ~ "_whoth", 574 | ## TRUE ~ "")) 575 | ## ) %>% 576 | ## ## Percentage change/ratio/index 577 | ## mutate(series_abb = 578 | ## paste0(series_abb, 579 | ## case_when(grepl("percent", unit, ignore.case=TRUE) ~ "_pc", 580 | ## grepl("\\$.*million", unit, ignore.case=TRUE) ~ "_aud", 581 | ## grepl("index", unit, ignore.case=TRUE) ~ "_ix", 582 | ## grepl("proportion", unit, ignore.case=TRUE) ~ "_rt", 583 | ## TRUE ~ "")) 584 | ## ); 585 | ## } 586 | 587 | 588 | ## =============================== EOF ===================================== 589 | --------------------------------------------------------------------------------