├── .Rbuildignore
├── .all-contributorsrc
├── .dir-locals.el
├── .github
    ├── .gitignore
    └── workflows
    │   ├── main.yml
    │   └── pkgdown.yaml
├── .gitignore
├── .projectile
├── DESCRIPTION
├── LICENSE
├── LICENSE.md
├── NAMESPACE
├── R
    ├── clean.R
    ├── ons.R
    ├── util.R
    └── utils-pipe.R
├── README.md
├── THFOpenDataPipeline.Rproj
├── _pkgdown.yml
├── man
    ├── figures
    │   ├── logo.png
    │   ├── monstR_2.png
    │   └── monstR_sticker.png
    ├── generate_download_filename.Rd
    ├── monstr_clean.Rd
    ├── monstr_data.Rd
    ├── monstr_pipeline_defaults.Rd
    ├── monstr_read_file.Rd
    ├── monstr_write_clean.Rd
    ├── ons_api_call.Rd
    ├── ons_available_datasets.Rd
    ├── ons_available_editions.Rd
    ├── ons_available_versions.Rd
    ├── ons_dataset_by_id.Rd
    ├── ons_datasets_setup.Rd
    ├── ons_download.Rd
    ├── pipe.Rd
    ├── safe_download.Rd
    ├── write_csv.Rd
    ├── write_metadata.Rd
    ├── write_rds.Rd
    └── write_xlsx.Rd
├── pkgdown
    └── favicon
    │   ├── apple-touch-icon-120x120.png
    │   ├── apple-touch-icon-152x152.png
    │   ├── apple-touch-icon-180x180.png
    │   ├── apple-touch-icon-60x60.png
    │   ├── apple-touch-icon-76x76.png
    │   ├── apple-touch-icon.png
    │   ├── favicon-16x16.png
    │   ├── favicon-32x32.png
    │   └── favicon.ico
└── vignettes
    ├── .gitignore
    ├── merged_table.PNG
    ├── mortality-rate-using-population-and-deaths.Rmd
    ├── mortality_by_region.png
    ├── pipeline.Rmd
    ├── pop_deaths.PNG
    ├── pop_deaths_aggregate.PNG
    └── pop_table.PNG


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^Meta$
 2 | ^doc$
 3 | ^LICENSE\.md$
 4 | ^\.dir-locals\.el$
 5 | ^\.github/.*$
 6 | ^.projectile$
 7 | ^.*\.Rproj$
 8 | ^\.Rproj\.user$
 9 | ^_pkgdown\.yml$
10 | ^docs$
11 | ^pkgdown$
12 | ^\.github$
13 | 


--------------------------------------------------------------------------------
/.all-contributorsrc:
--------------------------------------------------------------------------------
 1 | {
 2 |   "files": [
 3 |     "README.md"
 4 |   ],
 5 |   "imageSize": 100,
 6 |   "commit": false,
 7 |   "contributors": [
 8 |     {
 9 |       "login": "emmavestesson",
10 |       "name": "Emma Vestesson",
11 |       "avatar_url": "https://avatars2.githubusercontent.com/u/31949401?v=4",
12 |       "profile": "https://emmavestesson.netlify.com/",
13 |       "contributions": [
14 |         "ideas",
15 |         "content",
16 |         "doc"
17 |       ]
18 |     },
19 |     {
20 |       "login": "tomjemmett",
21 |       "name": "Tom Jemmett",
22 |       "avatar_url": "https://avatars1.githubusercontent.com/u/12023696?v=4",
23 |       "profile": "https://www.strategyunitwm.nhs.uk/",
24 |       "contributions": [
25 |         "bug"
26 |       ]
27 |     },
28 |     {
29 |       "login": "JohnHC86",
30 |       "name": "JohnHC86",
31 |       "avatar_url": "https://avatars1.githubusercontent.com/u/12610020?v=4",
32 |       "profile": "https://github.com/JohnHC86",
33 |       "contributions": [
34 |         "bug"
35 |       ]
36 |     },
37 |     {
38 |       "login": "sw1nn",
39 |       "name": "Neale Swinnerton",
40 |       "avatar_url": "https://avatars1.githubusercontent.com/u/373335?v=4",
41 |       "profile": "http://sw1nn.com",
42 |       "contributions": [
43 |         "code"
44 |       ]
45 |     },
46 |     {
47 |       "login": "fiona-grimm",
48 |       "name": "fiona-grimm",
49 |       "avatar_url": "https://avatars1.githubusercontent.com/u/31844347?v=4",
50 |       "profile": "https://github.com/fiona-grimm",
51 |       "contributions": [
52 |         "ideas",
53 |         "design"
54 |       ]
55 |     },
56 |     {
57 |       "login": "SimonCRUK",
58 |       "name": "SimonCRUK",
59 |       "avatar_url": "https://avatars2.githubusercontent.com/u/58686505?v=4",
60 |       "profile": "https://github.com/SimonCRUK",
61 |       "contributions": [
62 |         "bug"
63 |       ]
64 |     },
65 |     {
66 |       "login": "Lextuga007",
67 |       "name": "Zoe Turner",
68 |       "avatar_url": "https://avatars0.githubusercontent.com/u/39963221?v=4",
69 |       "profile": "https://github.com/Lextuga007",
70 |       "contributions": [
71 |         "bug"
72 |       ]
73 |     }
74 |   ],
75 |   "contributorsPerLine": 7,
76 |   "projectName": "monstR",
77 |   "projectOwner": "HFAnalyticsLab",
78 |   "repoType": "github",
79 |   "repoHost": "https://github.com",
80 |   "skipCi": true
81 | }
82 | 


--------------------------------------------------------------------------------
/.dir-locals.el:
--------------------------------------------------------------------------------
1 | ;;; Directory Local Variables
2 | ;;; For more information see (info "(emacs) Directory Variables")
3 | 
4 | ((ess-mode
5 |   (comment-column . 0)))
6 | 


--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     branches:
 4 |       - master
 5 |   pull_request:
 6 |     branches:
 7 |       - master
 8 | 
 9 | name: R-CMD-check
10 | 
11 | jobs:
12 |   R-CMD-check:
13 |     runs-on: macOS-latest
14 |     steps:
15 |       - uses: actions/checkout@v2
16 |       - uses: r-lib/actions/setup-r@master
17 |       - name: Install dependencies
18 |         run: |
19 |           install.packages(c("remotes", "rcmdcheck"))
20 |           remotes::install_deps(dependencies = TRUE)
21 |         shell: Rscript {0}
22 |       - name: Check
23 |         run: rcmdcheck::rcmdcheck(args = "--no-manual", error_on = "error")
24 |         shell: Rscript {0}
25 | 


--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     branches:
 4 |       - main
 5 |       - master
 6 | 
 7 | name: pkgdown
 8 | 
 9 | jobs:
10 |   pkgdown:
11 |     runs-on: macOS-latest
12 |     env:
13 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
14 |     steps:
15 |       - uses: actions/checkout@v2
16 | 
17 |       - uses: r-lib/actions/setup-r@v1
18 | 
19 |       - uses: r-lib/actions/setup-pandoc@v1
20 | 
21 |       - name: Query dependencies
22 |         run: |
23 |           install.packages('remotes')
24 |           saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
25 |           writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
26 |         shell: Rscript {0}
27 | 
28 |       - name: Cache R packages
29 |         uses: actions/cache@v2
30 |         with:
31 |           path: ${{ env.R_LIBS_USER }}
32 |           key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
33 |           restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-
34 | 
35 |       - name: Install dependencies
36 |         run: |
37 |           remotes::install_deps(dependencies = TRUE)
38 |           install.packages("pkgdown", type = "binary")
39 |         shell: Rscript {0}
40 | 
41 |       - name: Install package
42 |         run: R CMD INSTALL .
43 | 
44 |       - name: Deploy package
45 |         run: |
46 |           git config --local user.email "actions@github.com"
47 |           git config --local user.name "GitHub Actions"
48 |           Rscript -e 'pkgdown::deploy_to_branch(new_process = FALSE)'
49 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | Meta
 2 | doc
 3 | inst/doc
 4 | # History files
 5 | .Rhistory
 6 | .Rapp.history
 7 | 
 8 | # Session Data files
 9 | .RData
10 | # User-specific files
11 | .Ruserdata
12 | # Example code in package build process
13 | *-Ex.R
14 | # Output files from R CMD build
15 | /*.tar.gz
16 | # Output files from R CMD check
17 | /*.Rcheck/
18 | # RStudio files
19 | .Rproj.user/
20 | # produced vignettes
21 | vignettes/*.html
22 | vignettes/*.pdf
23 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
24 | .httr-oauth
25 | # knitr and R markdown default cache directories
26 | *_cache/
27 | /cache/
28 | # Temporary files created by R markdown
29 | *.utf8.md
30 | *.knit.md
31 | # R Environment Variables
32 | .Renviron
33 | docs
34 | 


--------------------------------------------------------------------------------
/.projectile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HFAnalyticsLab/monstR/4f428e0ea5f896108e3ac78488d50a33edc8af65/.projectile


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: monstR
 2 | Title: Download publically available data the ONS API
 3 | Version: 0.0.0.9000
 4 | Authors@R: 
 5 |     c(person(given = "Neale",
 6 |              family = "Swinnerton",
 7 |              role = c("aut"),
 8 |              email = "neale@mastodonc.com"),
 9 |       person(given = "Emma",
10 |              family = "Vestesson",
11 |              role = c("cre", "ctb"),
12 |              email = "emma.vestesson@gmail.com",
13 |              comment = c(ORCID = "0000-0002-7284-9172")))
14 | Description: Queries ONS API to download data. It can be used to retrieve publically available data and meta data from the ONS.
15 | License: MIT + file LICENSE
16 | Encoding: UTF-8
17 | LazyData: true
18 | Roxygen: list(markdown = TRUE)
19 | RoxygenNote: 7.1.1
20 | Imports:
21 |     magrittr,
22 |     logger,
23 |     jsonlite,
24 |     usethis,
25 |     curl,
26 |     purrr,
27 |     dplyr,
28 |     whisker,
29 |     here,
30 |     readr,
31 |     janitor,
32 |     readxl,
33 |     writexl
34 | Suggests: 
35 |     knitr,
36 |     rmarkdown
37 | VignetteBuilder: knitr
38 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | Copyright (c) 2020 The Health Foundation Analytics Lab
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | Copyright (c) 2020 The Health Foundation Analytics Lab
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export("%>%")
 4 | export(monstr_clean)
 5 | export(monstr_data)
 6 | export(monstr_pipeline_defaults)
 7 | export(monstr_read_file)
 8 | export(monstr_write_clean)
 9 | export(ons_available_datasets)
10 | export(ons_available_editions)
11 | export(ons_available_versions)
12 | export(ons_dataset_by_id)
13 | export(ons_datasets_setup)
14 | export(ons_download)
15 | import(dplyr)
16 | import(here)
17 | import(janitor)
18 | import(jsonlite)
19 | import(logger)
20 | import(readr)
21 | import(readxl)
22 | import(whisker)
23 | import(writexl)
24 | importFrom(curl,curl_download)
25 | importFrom(magrittr,"%>%")
26 | importFrom(readr,write_csv)
27 | importFrom(readr,write_rds)
28 | importFrom(writexl,write_xlsx)
29 | 


--------------------------------------------------------------------------------
/R/clean.R:
--------------------------------------------------------------------------------
  1 | ##' @title Create the MONSTR defaults
  2 | ##' @param download_root Root of directory hierarchy.
  3 | ##' @return an augmented metadata
  4 | ##' @author Neale Swinnerton <neale@mastodonc.com>
  5 | ##' @export
  6 | ##' @import here
  7 | monstr_pipeline_defaults <- function(download_root="") {
  8 |     basedir <- "{{download_root}}/data"
  9 |     filepath <- "{{datasource}}/{{dataset}}/{{edition}}/{{dataset}}-v{{version}}.{{format}}"
 10 | 
 11 |     metadata <- list()
 12 |     metadata$download_filename_template = sprintf("%s/raw/%s",
 13 |                                                 basedir,
 14 |                                                 filepath)
 15 |     metadata$clean_filename_template = sprintf("%s/clean/%s",
 16 |                                              basedir,
 17 |                                              filepath)
 18 |     metadata$create_latest_symlink <- FALSE
 19 |     if (missing(download_root)) {
 20 |         metadata$download_root = here::here() # TODO here supposedly for
 21 |                                             # interactive use?
 22 |     }
 23 |     metadata$download_root = download_root
 24 |     metadata
 25 | }
 26 | 
 27 | ##' @title Read the file described by the metadata
 28 | ##' @param metadata description of the downloaded file.
 29 | ##' @return a metadata incorporating the data. The actually data can then be
 30 | ##'     extracted with \code{\link{monstr_data}}
 31 | ##' @author Neale Swinnerton <neale@mastodonc.com>
 32 | ##' @export
 33 | ##' @import readr
 34 | ##' @import readxl
 35 | monstr_read_file <- function(metadata) {
 36 |     monstr <- metadata$monstr
 37 | 
 38 |     if (monstr$format == "csv") {
 39 |         metadata$monstr_data <- readr::read_csv(metadata$monstr$destfile)
 40 |     } else if (monstr$format %in% c("xls", "xlsx")) {
 41 |         metadata$monstr_data <- readxl::read_excel(metadata$monstr$destfile)
 42 |     }
 43 |     metadata$monstr <- monstr
 44 |     metadata
 45 | }
 46 | 
 47 | ##' @title Clean the data according to MONSTR rules.
 48 | ##' @param metadata description the downloaded file.
 49 | ##' @return description of the cleaned data
 50 | ##' @author Neale Swinnerton <neale@mastodonc.com>
 51 | ##' @export
 52 | ##' @import janitor
 53 | monstr_clean <- function(metadata) {
 54 |     metadata$monstr_data <- janitor::clean_names(metadata$monstr_data)
 55 |     metadata$monstr$is_clean <- TRUE
 56 |     metadata
 57 | }
 58 | 
 59 | ##' Extract the tibble of the actual data
 60 | ##'
 61 | ##' @title Get the Data
 62 | ##' @param metadata description of the downloaded data
 63 | ##' @return a \code{\link[tibble]{dplyr::tibble}} of the data from the
 64 | ##'     described download
 65 | ##' @author Neale Swinnerton <neale@mastodonc.com>
 66 | ##' @export
 67 | monstr_data <- function(metadata) {
 68 |     metadata$monstr_data
 69 | }
 70 | 
 71 | ##' @title Writes the data to the 'clean' area
 72 | ##' @param metadata description of the data.
 73 | ##' @param format any known format or "all" to save a copy as all
 74 | ##'     known formats
 75 | ##' @param create_directory boolean indicating whether directories
 76 | ##'     should be created.
 77 | ##' @return a boolean indicating success
 78 | ##' @author Neale Swinnerton <neale@mastodonc.com>
 79 | ##' @export
 80 | ##' @import logger
 81 | ##' @importFrom readr write_csv write_rds
 82 | ##' @importFrom writexl write_xlsx
 83 | monstr_write_clean <- function(metadata,
 84 |                             format="csv",
 85 |                             create_directory=TRUE) {
 86 |     success <- TRUE
 87 |     monstr <- metadata$monstr
 88 | 
 89 |     if (monstr$is_clean) {
 90 | 
 91 |         data <- metadata$monstr_data
 92 |         csv <- format == "csv"
 93 |         xls <- format %in% c("xls", "xlsx")
 94 |         rds <- format == "rds"
 95 | 
 96 |         if (format == "all") {
 97 |             csv <- TRUE
 98 |             xls <- TRUE
 99 |             rds <- TRUE
100 |         }
101 | 
102 |         # TODO - should success be a logical vector indicating which
103 |         # have succeeded?
104 |         if (csv) {
105 |             success <- success && write_csv(data, monstr, create_directory)
106 |         }
107 | 
108 |         if (xls) {
109 |             success <- success && write_xlsx(data, monstr, create_directory)
110 |         }
111 | 
112 |         if (rds) {
113 |             success <- success && write_rds(data, monstr, create_directory)
114 |         }
115 |     } else {
116 |         logger::log_warn("Data has not been cleaned. NOT writing")
117 |         success <- FALSE
118 |     }
119 | 
120 |     success
121 | }
122 | 


--------------------------------------------------------------------------------
/R/ons.R:
--------------------------------------------------------------------------------
  1 | 
  2 | api_base_url <- "https://api.beta.ons.gov.uk/v1/datasets"
  3 | 
  4 | ## START TODO - make these fns more general?
  5 | ## Something like this (but this example doesn't work):
  6 | ## ons_get_item_by <- function(df, name, value) {
  7 | ##     df$items[df$items[name] %>% detect_index(~ . == value)]
  8 | ## }
  9 | 
 10 | ## TODO - fix weirdness here - should be able to df$items %>%
 11 | ## filter(...) rather than this detect_index but some type confusion
 12 | 
 13 | 
 14 | ons_item_by_id <- function(df, id) {
 15 |     df$items[df$items$id %>% purrr::detect_index(~ . == id), ]
 16 | }
 17 | 
 18 | ons_edition_by_name <- function(df, edition) {
 19 |     df$items[df$items$edition %>% purrr::detect_index(~ . == edition), ]
 20 | }
 21 | 
 22 | ons_version_by_version <- function(df, version) {
 23 |     df$items[df$items$version %>%  purrr::detect_index(~ . == version), ]
 24 | }
 25 | 
 26 | ## END TODO - make these fns more general?
 27 | 
 28 | ons_download_by_format <- function(metadata, format) {
 29 |     download <- metadata$downloads[[format]]
 30 |     if (is.null(download)) {
 31 |         valid_formats <- names(metadata$downloads)
 32 |         logger::log_error(sprintf("Format '%s' not found, valid formats for this dataset are %s", format, toString(names(metadata$downloads))))
 33 |         stop()
 34 |     }
 35 | 
 36 |     download
 37 | }
 38 | 
 39 | ## TODO - is there a std fn for this?
 40 | ##' @import logger
 41 | log_panic <- function(...) {
 42 |     logger::log_error(...)
 43 |     quit(status = 1)
 44 | }
 45 | 
 46 | ##' Make request to given url, which is assumed to be the ONS api.
 47 | ##'
 48 | ##' data retrieved is converted to tidyverse tibble if possible.
 49 | ##'
 50 | ##' @title Call the ONS API
 51 | ##' @param url url to call @seeAlso \code{\link{[api_base_url]}}
 52 | ##' @return a list contained the API call results
 53 | ##' @author Neale Swinnerton <neale@mastodonc.com>
 54 | ##' @import dplyr
 55 | ons_api_call <- function(url) {
 56 |     df <- jsonlite::fromJSON(url)
 57 |     if ("items" %in% colnames(df)) {
 58 |         df$items <- dplyr::as_tibble(df$items)
 59 |     }
 60 |     df
 61 | }
 62 | 
 63 | 
 64 | ##' This returns a dataframe containing details that can be passed to
 65 | ##' other fns in this package for further processing
 66 | ##' @title Datasets Setup
 67 | ##' @param defaults a list with folder system.  Valid values from \code{monstr_pipeline_defaults(...)} 
 68 | ##' @return a list describing available datasets
 69 | ##' @author Neale Swinnerton <neale@mastodonc.com>
 70 | ##' @export
 71 | ##' @import jsonlite
 72 | ##' @import dplyr
 73 | ##' @examples
 74 | ##' \dontrun{
 75 | ##' monstr_pipeline_defaults() %>% 
 76 | ##'  ons_datasets_setup() # rooted in current project
 77 | ##' }
 78 | ##' \dontrun{
 79 | ##' monstr_pipeline_defaults(download_root="/path/to/download/root/") %>% 
 80 | ##'      ons_datasets_setup()
 81 | ##' }
 82 | ons_datasets_setup <- function(defaults) {
 83 |     results <- ons_api_call(api_base_url)
 84 |     results$monstr <- defaults
 85 |     results$monstr$src_url <-  api_base_url
 86 | 
 87 |     results
 88 | }
 89 | 
 90 | ##' Retrieves a dataframe describing the datasets available from ONS via the API.
 91 | ##' @title Available Datasets
 92 | ##' @return list of available datasets and associated metadata
 93 | ##' @author Neale Swinnerton <neale@mastodonc.com>
 94 | ##' @export
 95 | ##' @import dplyr
 96 | ##' @examples 
 97 | ##' \dontrun{
 98 | ##' # return information on all available datasets and then filter on specific id
 99 | ##' datasets <- ons_available_datasets()
100 | ##' 
101 | ##' datasets %>% 
102 | ##' filter(id='health-accounts')
103 | ##' }
104 | ##' \dontrun{
105 | ##' # display just the ids
106 | ##' ons_available_datasets() %>% select(id)
107 | ##' }
108 | ons_available_datasets <- function() {
109 |             desc <- ons_api_call(api_base_url)$items %>% 
110 |                 dplyr::select(id, title, description, unit_of_measure, next_release, release_frequency, publications)
111 |             return(desc)
112 | 
113 | }
114 | 
115 | #' Retrieve the metadata for the given dataset.
116 | #'
117 | #' Makes calls to the ONS API and retrieves the metadata for the
118 | #' datasets. The dataset selection can be refined via the edition and
119 | #' version parameters
120 | #'
121 | #' @title Dataset By Id
122 | #' @param metadata data describing the dataset
123 | #' @param id the identifier of the dataset. Valid values from \code{ons_available_datasets()}
124 | #' @param edition the edition of the dataset (if empty, select latest). Valid values from \code{ons_available_editions(...)}
125 | #' @param version the version of the dataset (if empty, select latest). Valid values from \code{ons_available_available(...)}
126 | #' @return a dataframe describing the dataset.
127 | #' @author Neale Swinnerton <neale@mastodonc.com>
128 | #' @export
129 | ##' @import logger
130 | ons_dataset_by_id <- function(metadata, id, edition, version) {
131 |     links <- ons_item_by_id(metadata, id)$links
132 |     monstr <- metadata$monstr # save for later
133 | 
134 |     if (missing(edition)) {
135 |         logger::log_info("Edition not specified, defaulting to  latest version")
136 |         link <- links$latest_version$href
137 |         is_latest <- TRUE
138 |     } else {
139 |         metadata <-
140 |             ons_api_call(links$editions$href) %>%
141 |             ons_edition_by_name(edition)
142 | 
143 |         is_latest <- FALSE
144 |         if (missing(version)) {
145 |             logger::log_info("Version of ", edition,
146 |                              " edition not specified, defaulting to latest version")
147 |             link <- metadata$links$latest_version$href
148 |             is_latest <- TRUE
149 |         } else {
150 |             version_metadata <-
151 |                 ons_api_call(metadata$links$versions$href) %>%
152 |                 ons_version_by_version(version)
153 | 
154 |             if (nrow(version_metadata) == 0) {
155 |                 log_panic("Version ", version, " of ", edition,
156 |                           " is not available")
157 |             } else {
158 |                 logger::log_info("Version ", version, " of ", edition,
159 |                                  " edition selected")
160 |             }
161 | 
162 |             link <- version_metadata$links$self$href
163 | 
164 |             ## TODO should we work out whether the specified version is the latest here?
165 |             ##      is 'latest' highest version or newest release-date ?
166 |         }
167 |     }
168 | 
169 |     logger::log_info(sprintf("Retrieving dataset metadata from %s", link))
170 |     dataset <- ons_api_call(link)
171 | 
172 |     dataset$monstr <- monstr
173 |     dataset$monstr$is_latest <- is_latest
174 |     dataset$monstr$datasource <- "ons"
175 |     dataset$monstr$dataset <- id
176 |     dataset$monstr$edition <- dataset$edition
177 |     dataset$monstr$version <- dataset$version
178 |     dataset
179 | }
180 | 
181 | ##' @title Available Editions
182 | ##' @param id dataset identifier. Valid values from \code{ons_available_datasets(...)}
183 | ##' @return a list of edition identifiers
184 | ##' @author Neale Swinnerton <neale@mastodonc.com>
185 | ##' @export
186 | ##' @import dplyr
187 | ##' @examples 
188 | ##' \dontrun{
189 | #' ons_available_editions(id = 'mid-year-pop-est')
190 | #' }
191 | ons_available_editions <- function(id) {
192 |     metadata <- ons_api_call(sprintf("%s/%s/editions", api_base_url, id))
193 | 
194 |     metadata$items %>%
195 |         dplyr::select(matches("edition"))
196 | }
197 | 
198 | ##' @title Available Versions
199 | ##' @param id dataset identifier. Valid values from \code{ons_available_datasets(...)}
200 | ##' @param edition edition identifier. Valid values from \code{ons_available_editions(...)}
201 | ##' @return a list of version identifiers
202 | ##' @author Neale Swinnerton <neale@mastodonc.com>
203 | ##' @export
204 | ##' @import dplyr
205 | ##' @examples 
206 | #' \dontrun{
207 | #'  ons_available_versions(id = "regional-gdp-by-quarter", edition = "time-series") 
208 | #'  }
209 | ons_available_versions <- function(id, edition) {
210 |     metadata <- ons_api_call(sprintf("%s/%s/editions/%s/versions", api_base_url, id, edition))
211 | 
212 |     metadata$items %>%
213 |         dplyr::select(version)
214 | }
215 | 
216 | ##' Download
217 | ##'
218 | ##' \code{ons_download} retrieves the data described by the given df
219 | ##' @param metadata data describing the download
220 | ##' @param format a valid format for the download
221 | ##' @export
222 | ##' @import logger
223 | ons_download <- function(metadata,
224 |                          format="csv" ) {
225 |     validate_file <- function(f) {
226 |         expected_size <- as.numeric(download$size)
227 | 
228 |         if (file.size(f) != expected_size) {
229 |             log_panic(sprintf("Inconsistent file size expected %d, got %d",
230 |                               expected_size,
231 |                               file.size(f)))
232 |             FALSE
233 |         } else {
234 |             TRUE
235 |         }
236 |     }
237 | try (if(!(format %in% c('csv', 'xls'))) stop('Format not allowed'))
238 |     download <-
239 |         metadata %>%
240 |         ons_download_by_format(format)  ## TODO - error if format not found?
241 | 
242 |     metadata$monstr$format <- format
243 | 
244 |     logger::log_info(sprintf("Downloading data from %s", download$href))
245 | 
246 |     destfile <-  generate_download_filename(template=metadata$monstr$download_filename_template,
247 |                                             root=metadata$monstr$download_root,
248 |                                             data=metadata$monstr)
249 | 
250 |     if (safe_download(url = c(download$href),
251 |                       destfile = destfile,
252 |                       fvalidate = validate_file)) {
253 |         write_metadata(metadata, sprintf("%s.meta.json", destfile))
254 |         logger::log_info(sprintf("File created at %s ", destfile))
255 |     }
256 | 
257 |     if (metadata$monstr$create_latest_symlink &&
258 |         metadata$monstr$is_latest) {
259 | 
260 |         version <- metadata$monstr$version
261 |         metadata$monstr$version <- "LATEST"
262 | 
263 |         linkfile <- generate_download_filename(template=metadata$monstr$download_filename_template,
264 |                                                root=metadata$monstr$download_root,
265 |                                                data=metadata$monstr)
266 | 
267 |         metadata$monstr$version <- version
268 |         if (file.exists(linkfile)) {
269 |             file.remove(linkfile)
270 |         }
271 | 
272 |         file.symlink(destfile,
273 |                      linkfile)
274 |         log_info("Create symlink to LATEST file")
275 |     }
276 | 
277 |     metadata$monstr$destfile <- destfile
278 |     metadata
279 | }
280 | 


--------------------------------------------------------------------------------
/R/util.R:
--------------------------------------------------------------------------------
  1 | 
  2 | ##' @title Safe Download
  3 | ##'
  4 | ##' Downloads a file and tries hard to tidy up in the event of
  5 | ##' errors. Since these files are typically large we don't want to
  6 | ##' leave them in temp directories.
  7 | ##'
  8 | ##' The destfile should only appear if the download was successful.
  9 | ##'
 10 | ##' @param url src for the download
 11 | ##' @param destfile destination filename
 12 | ##' @param fvalidate a fn that is passed the filename after download
 13 | ##'     to validate it in some way. The fn should return TRUE if the
 14 | ##'     file is valid.
 15 | ##' @importFrom curl curl_download
 16 | safe_download <- function(url, destfile, fvalidate) {
 17 |     success <- TRUE
 18 | 
 19 |     tryCatch({
 20 |         tmp <- tempfile()
 21 |         curl::curl_download(url = url,
 22 |                             destfile = tmp)
 23 | 
 24 |         if (!missing(fvalidate) && !fvalidate(tmp)) {
 25 |             success <- FALSE
 26 |             ## report the destfile name to not confuse user, although
 27 |             ## not strictly true
 28 |             log_panic("file ", destfile, " failed validation. Deleting it")
 29 |         }
 30 | 
 31 |         ## rename to final destination. This is generally an atomic
 32 |         ## operation, so we can assume the final file only appears if
 33 |         ## this succeeds.
 34 |         if (success && !file.rename(from = tmp,
 35 |                                     to = destfile)) {
 36 |             success <- FALSE
 37 |             log_panic("file ", destfile, " Not created!")
 38 |         }
 39 |     },
 40 |     finally = if (file.exists(tmp)) file.remove(tmp))
 41 | 
 42 |     success
 43 | }
 44 | 
 45 | #' @title Write Metadata
 46 | #'
 47 | #' \code{(write_metadata)} writes some metadata about where the file
 48 | #' came from.  TODO - could do this with fs xattr, but maybe that's
 49 | #' not well known by users?
 50 | #'
 51 | #' @param metadata a dataframe containing metadata
 52 | #' @param destfile filename into which the metadata should be written
 53 | #'     as JSON
 54 | write_metadata <- function(metadata, destfile) {
 55 |     json <- jsonlite::toJSON(metadata, pretty = TRUE, flatten = TRUE)
 56 |     tryCatch({
 57 |         f <- file(destfile)
 58 |         writeLines(c(json), con = f, sep = "")
 59 |     },
 60 |     finally = close(f)
 61 |     )
 62 | }
 63 | 
 64 | 
 65 | ##' @title generate a filename for a download
 66 | ##'
 67 | ##' @param template same as whisker template
 68 | ##' @param root the root of the directory hierarchy
 69 | ##' @param data data used to populate the template
 70 | ##' @param create_directory boolean indicating whether to
 71 | ##'     (recursively) create the directory hierarchy.
 72 | ##' @return a filename
 73 | ##' @import whisker
 74 | generate_download_filename <- function(template, root, data, create_directory=TRUE) {
 75 | 
 76 |     path <- whisker.render(template,
 77 |                            data)
 78 | 
 79 |     dir <- dirname(path)
 80 | 
 81 |     if (create_directory && !dir.exists(dir)) {
 82 |         logger::log_info("Creating directory ", dir)
 83 |         dir.create(dir, recursive=TRUE)
 84 |     }
 85 | 
 86 |     path
 87 | 
 88 | }
 89 | 
 90 | ##' @title write the data as a csv.
 91 | ##' @param data The actual data
 92 | ##' @param monstr metadata dataframe created by the pipeline
 93 | ##' @param create_directory boolean indicating whether to
 94 | ##'     (recursively) create the directory hierarchy.
 95 | ##' @return boolean indicating success
 96 | ##' @author Neale Swinnerton <neale@mastodonc.com
 97 | ##' @import logger
 98 | write_csv <- function(data, monstr, create_directory) {
 99 |     success <- TRUE
100 |     monstr$format <- "csv"
101 | 
102 |     destfile <- generate_download_filename(monstr$clean_filename_template,
103 |                                            monstr$download_root,
104 |                                            monstr,
105 |                                            create_directory)
106 |     logger::log_info(sprintf("Writing %s data to %s", monstr$format,  destfile))
107 | 
108 |     tryCatch (
109 |         write.csv(data, file=destfile, row.names=FALSE),
110 |         error = function(e) {
111 |             success <- FALSE
112 |         }
113 |     )
114 | 
115 |     success
116 | }
117 | 
118 | 
119 | ##' @title write the data as a xlsx.
120 | ##' @param data The actual data
121 | ##' @param monstr metadata dataframe created by the pipeline
122 | ##' @param create_directory boolean indicating whether to
123 | ##'     (recursively) create the directory hierarchy.
124 | ##' @return boolean indicating success
125 | ##' @author Neale Swinnerton <neale@mastodonc.com>
126 | ##' @import writexl
127 | ##' @import logger
128 | write_xlsx <- function(data, monstr, create_directory) {
129 |     success <- TRUE
130 |     monstr$format <- "xlsx"
131 |     destfile <- generate_download_filename(monstr$clean_filename_template,
132 |                                            monstr$download_root,
133 |                                            monstr,
134 |                                            create_directory)
135 |     logger::log_info(sprintf("Writing %s data to %s", monstr$format,  destfile))
136 |     tryCatch (
137 |         writexl::write_xlsx(x=data, path=destfile),
138 |         error = function(e) {
139 |             logger::log_error("Problem writing xlsx")
140 |             success <- FALSE
141 |         })
142 | 
143 |     success
144 | }
145 | 
146 | ##' @title write the data as a RDS.
147 | ##' @param data The actual data
148 | ##' @param monstr metadata dataframe created by the pipeline
149 | ##' @param create_directory boolean indicating whether to
150 | ##'     (recursively) create the directory hierarchy.
151 | ##' @return boolean indicating success
152 | ##' @author Neale Swinnerton <neale@mastodonc.com>
153 | ##' @import logger
154 | write_rds <- function(data, monstr,create_directory) {
155 |     success <- TRUE
156 |     monstr$format <- "rds"
157 |     destfile <- generate_download_filename(monstr$clean_filename_template,
158 |                                            monstr$download_root,
159 |                                            monstr,
160 |                                            create_directory)
161 |     logger::log_info(sprintf("Writing %s data to %s", monstr$format,  destfile))
162 |     tryCatch (
163 |         saveRDS(object=data, file=destfile),
164 | 
165 |         error = function(e) {
166 |             logger::log_error("Problem writing rds")
167 |             success <- FALSE
168 |         }
169 | )
170 | 
171 |     success
172 | }
173 | 


--------------------------------------------------------------------------------
/R/utils-pipe.R:
--------------------------------------------------------------------------------
 1 | #' Pipe operator
 2 | #'
 3 | #' See \code{magrittr::\link[magrittr]{\%>\%}} for details.
 4 | #'
 5 | #' @name %>%
 6 | #' @rdname pipe
 7 | #' @keywords internal
 8 | #' @export
 9 | #' @importFrom magrittr %>%
10 | #' @usage lhs \%>\% rhs
11 | NULL
12 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # monstR - making ONS tables readable  <a><img src='man/figures/monstR_sticker.png' align="right" height="139" /></a>
  2 | <!-- ALL-CONTRIBUTORS-BADGE:START - Do not remove or modify this section -->
  3 | [![All Contributors](https://img.shields.io/badge/all_contributors-7-orange.svg?style=flat-square)](#contributors-)
  4 | <!-- ALL-CONTRIBUTORS-BADGE:END -->
  5 | 
  6 | ![R-CMD-check](https://github.com/HFAnalyticsLab/Open_data_pipelines/workflows/R-CMD-check/badge.svg)
  7 | 
  8 | #### Project Status: in progress
  9 | ## Project Description
 10 | 
 11 | This package is a part of our open-source R pipeline to download and clean public data related to health and social care. The aim is to provide analysts, primarily at the Health Foundation, with clean and ready for analysis data. 
 12 | 
 13 | ## Overview
 14 | 
 15 | monstR - making ONS tables readable is a package that queries the [Office for National Statistics (ONS) API](https://developer.beta.ons.gov.uk/) to download data. It can be used to retrieve publically available data and meta data from the ONS.
 16 | 
 17 | - `ons_available_datasets()` returns information about available datasets
 18 | - `ons_available_versions()` returns information about available dataset versions
 19 | - `ons_available_editions()` returns information about available dataset editions
 20 | - `ons_download()` downloads the specified data
 21 | 
 22 | Please note that the ONS API that this package relies on is in beta and it might change. 
 23 | 
 24 | ## Installation
 25 | 
 26 | 
 27 | If you have cloned a local copy of the repo, you should be able to load it using devtools
 28 | 
 29 | <pre>
 30 | <!-- use a pre to allow italics, urrgh -->
 31 | library(devtools)
 32 | setwd("<i>{location of monstR repo}</i>")
 33 | devtools::load_all()
 34 | </pre>
 35 | 
 36 | or to install direct from Github
 37 | ```
 38 | remotes::install_github("HFAnalyticsLab/monstR", build_vignettes = TRUE )
 39 | ```
 40 | 
 41 | ## Examples
 42 | 
 43 | This is an example of how to download weekly mortality data by region. Note that this will create folders and download data. 
 44 | 
 45 | ```
 46 | monstr_pipeline_defaults() %>%  # Uses the monstr 'standards' for location and format
 47 |   ons_datasets_setup() %>% 
 48 | 	ons_dataset_by_id("weekly-deaths-region") %>%
 49 | 	ons_download(format="csv") %>%
 50 | 	monstr_read_file() %>%  
 51 | 	monstr_clean() %>%
 52 | 	monstr_write_clean(format="all")
 53 | 
 54 | ```
 55 | 
 56 | ## Resources
 57 | 
 58 | ### Online documentation
 59 | 
 60 | You can find the help pages at <https://hfanalyticslab.github.io/monstR/>.
 61 | 
 62 | ## Questions and bug reports
 63 | 
 64 | This is a package under active development and we would love for you to contribute or flag any issues you might find. 
 65 | 
 66 | You can ask questions or flag a bug by [filing an issue](https://github.com/HFAnalyticsLab/monstR/issues). We are more likely to be able to help you if we can reproduce your issue. The `reprex` package is a good way of producing a minimal reproducible package and [So you've been asked to make a reprex](https://www.jessemaegan.com/post/so-you-ve-been-asked-to-make-a-reprex/) will help you get started. 
 67 | 
 68 | ### Contributing to the package development
 69 | 
 70 | We aim to make the documentation as comprehensive as possible. Please contribute examples or suggest improvements to the
 71 | documentation.
 72 | 
 73 | If you have written a function that you think should be added to the package, or improved an existing function, please submit a pull request that includes:
 74 | 
 75 |   - the new/amended function(s) with code and roxygen tags (with examples)
 76 |   - a new section in the appropriate vignette that describes how to use
 77 |     the new function
 78 |   - corresponding tests in directory `tests/testthat`.
 79 | 
 80 | ## Design Principles
 81 | 
 82 | The monstrR Open Data Pipeline is designed to work well with tidyverse and in particular within pipelines created by the `%>%` pipe operator. With this in mind, most functions take a data structure in the first argument and return a data structure which has been augmented in some way. Typically this is metadata about the actual data, although once the data has been cleaned it can be accessed using `monstr_data(metadata)` to get at a tidyverse tibble of the data.
 83 | 
 84 | 
 85 | ## Authors
 86 | * **Neale Swinnerton** -  [Github](https://github.com/sw1nn)
 87 | * **Emma Vestesson** -  [Github](https://github.com/emmavestesson) [Twitter](https://twitter.com/Gummifot)
 88 | 
 89 | ## License
 90 | 
 91 | This project is licensed under the [MIT License](https://github.com/HFAnalyticsLab/monstR/blob/master/LICENSE).
 92 | 
 93 | ## Contributors ✨
 94 | 
 95 | Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/docs/en/emoji-key)):
 96 | 
 97 | <!-- ALL-CONTRIBUTORS-LIST:START - Do not remove or modify this section -->
 98 | <!-- prettier-ignore-start -->
 99 | <!-- markdownlint-disable -->
100 | <table>
101 |   <tr>
102 |     <td align="center"><a href="https://emmavestesson.netlify.com/"><img src="https://avatars2.githubusercontent.com/u/31949401?v=4" width="100px;" alt=""/><br /><sub><b>Emma Vestesson</b></sub></a><br /><a href="#ideas-emmavestesson" title="Ideas, Planning, & Feedback">🤔</a> <a href="#content-emmavestesson" title="Content">🖋</a> <a href="https://github.com/HFAnalyticsLab/monstR/commits?author=emmavestesson" title="Documentation">📖</a></td>
103 |     <td align="center"><a href="https://www.strategyunitwm.nhs.uk/"><img src="https://avatars1.githubusercontent.com/u/12023696?v=4" width="100px;" alt=""/><br /><sub><b>Tom Jemmett</b></sub></a><br /><a href="https://github.com/HFAnalyticsLab/monstR/issues?q=author%3Atomjemmett" title="Bug reports">🐛</a></td>
104 |     <td align="center"><a href="https://github.com/JohnHC86"><img src="https://avatars1.githubusercontent.com/u/12610020?v=4" width="100px;" alt=""/><br /><sub><b>JohnHC86</b></sub></a><br /><a href="https://github.com/HFAnalyticsLab/monstR/issues?q=author%3AJohnHC86" title="Bug reports">🐛</a></td>
105 |     <td align="center"><a href="http://sw1nn.com"><img src="https://avatars1.githubusercontent.com/u/373335?v=4" width="100px;" alt=""/><br /><sub><b>Neale Swinnerton</b></sub></a><br /><a href="https://github.com/HFAnalyticsLab/monstR/commits?author=sw1nn" title="Code">💻</a></td>
106 |     <td align="center"><a href="https://github.com/fiona-grimm"><img src="https://avatars1.githubusercontent.com/u/31844347?v=4" width="100px;" alt=""/><br /><sub><b>fiona-grimm</b></sub></a><br /><a href="#ideas-fiona-grimm" title="Ideas, Planning, & Feedback">🤔</a> <a href="#design-fiona-grimm" title="Design">🎨</a></td>
107 |     <td align="center"><a href="https://github.com/SimonCRUK"><img src="https://avatars2.githubusercontent.com/u/58686505?v=4" width="100px;" alt=""/><br /><sub><b>SimonCRUK</b></sub></a><br /><a href="https://github.com/HFAnalyticsLab/monstR/issues?q=author%3ASimonCRUK" title="Bug reports">🐛</a></td>
108 |     <td align="center"><a href="https://github.com/Lextuga007"><img src="https://avatars0.githubusercontent.com/u/39963221?v=4" width="100px;" alt=""/><br /><sub><b>Zoe Turner</b></sub></a><br /><a href="https://github.com/HFAnalyticsLab/monstR/issues?q=author%3ALextuga007" title="Bug reports">🐛</a></td>
109 |   </tr>
110 | </table>
111 | 
112 | <!-- markdownlint-enable -->
113 | <!-- prettier-ignore-end -->
114 | <!-- ALL-CONTRIBUTORS-LIST:END -->
115 | 
116 | This project follows the [all-contributors](https://github.com/all-contributors/all-contributors) specification. Contributions of any kind welcome!
117 | 


--------------------------------------------------------------------------------
/THFOpenDataPipeline.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | BuildType: Package
16 | PackageUseDevtools: Yes
17 | PackageInstallArgs: --no-multiarch --with-keep.source
18 | 


--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
 1 | home:
 2 |   title: An R package to download publically available data from the ONS API
 3 |   description: Download publically available data from the ONS API
 4 |   
 5 | template:
 6 |   opengraph:
 7 |     image:
 8 |       src: man/figures/monstR_sticker.png
 9 |       alt: "The logo for the monstR package - a grey monster"
10 |     twitter:
11 |       creator: "@gummifot"
12 |       site: "@HealthFdn"
13 |       card: summary_large_image


--------------------------------------------------------------------------------
/man/figures/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HFAnalyticsLab/monstR/4f428e0ea5f896108e3ac78488d50a33edc8af65/man/figures/logo.png


--------------------------------------------------------------------------------
/man/figures/monstR_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HFAnalyticsLab/monstR/4f428e0ea5f896108e3ac78488d50a33edc8af65/man/figures/monstR_2.png


--------------------------------------------------------------------------------
/man/figures/monstR_sticker.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HFAnalyticsLab/monstR/4f428e0ea5f896108e3ac78488d50a33edc8af65/man/figures/monstR_sticker.png


--------------------------------------------------------------------------------
/man/generate_download_filename.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/util.R
 3 | \name{generate_download_filename}
 4 | \alias{generate_download_filename}
 5 | \title{generate a filename for a download}
 6 | \usage{
 7 | generate_download_filename(template, root, data, create_directory = TRUE)
 8 | }
 9 | \arguments{
10 | \item{template}{same as whisker template}
11 | 
12 | \item{root}{the root of the directory hierarchy}
13 | 
14 | \item{data}{data used to populate the template}
15 | 
16 | \item{create_directory}{boolean indicating whether to
17 | (recursively) create the directory hierarchy.}
18 | }
19 | \value{
20 | a filename
21 | }
22 | \description{
23 | generate a filename for a download
24 | }
25 | 


--------------------------------------------------------------------------------
/man/monstr_clean.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/clean.R
 3 | \name{monstr_clean}
 4 | \alias{monstr_clean}
 5 | \title{Clean the data according to MONSTR rules.}
 6 | \usage{
 7 | monstr_clean(metadata)
 8 | }
 9 | \arguments{
10 | \item{metadata}{description the downloaded file.}
11 | }
12 | \value{
13 | description of the cleaned data
14 | }
15 | \description{
16 | Clean the data according to MONSTR rules.
17 | }
18 | \author{
19 | Neale Swinnerton \href{mailto:neale@mastodonc.com}{neale@mastodonc.com}
20 | }
21 | 


--------------------------------------------------------------------------------
/man/monstr_data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/clean.R
 3 | \name{monstr_data}
 4 | \alias{monstr_data}
 5 | \title{Get the Data}
 6 | \usage{
 7 | monstr_data(metadata)
 8 | }
 9 | \arguments{
10 | \item{metadata}{description of the downloaded data}
11 | }
12 | \value{
13 | a \code{\link[tibble]{dplyr::tibble}} of the data from the
14 | described download
15 | }
16 | \description{
17 | Extract the tibble of the actual data
18 | }
19 | \author{
20 | Neale Swinnerton \href{mailto:neale@mastodonc.com}{neale@mastodonc.com}
21 | }
22 | 


--------------------------------------------------------------------------------
/man/monstr_pipeline_defaults.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/clean.R
 3 | \name{monstr_pipeline_defaults}
 4 | \alias{monstr_pipeline_defaults}
 5 | \title{Create the MONSTR defaults}
 6 | \usage{
 7 | monstr_pipeline_defaults(download_root = "")
 8 | }
 9 | \arguments{
10 | \item{download_root}{Root of directory hierarchy.}
11 | }
12 | \value{
13 | an augmented metadata
14 | }
15 | \description{
16 | Create the MONSTR defaults
17 | }
18 | \author{
19 | Neale Swinnerton \href{mailto:neale@mastodonc.com}{neale@mastodonc.com}
20 | }
21 | 


--------------------------------------------------------------------------------
/man/monstr_read_file.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/clean.R
 3 | \name{monstr_read_file}
 4 | \alias{monstr_read_file}
 5 | \title{Read the file described by the metadata}
 6 | \usage{
 7 | monstr_read_file(metadata)
 8 | }
 9 | \arguments{
10 | \item{metadata}{description of the downloaded file.}
11 | }
12 | \value{
13 | a metadata incorporating the data. The actually data can then be
14 | extracted with \code{\link{monstr_data}}
15 | }
16 | \description{
17 | Read the file described by the metadata
18 | }
19 | \author{
20 | Neale Swinnerton \href{mailto:neale@mastodonc.com}{neale@mastodonc.com}
21 | }
22 | 


--------------------------------------------------------------------------------
/man/monstr_write_clean.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/clean.R
 3 | \name{monstr_write_clean}
 4 | \alias{monstr_write_clean}
 5 | \title{Writes the data to the 'clean' area}
 6 | \usage{
 7 | monstr_write_clean(metadata, format = "csv", create_directory = TRUE)
 8 | }
 9 | \arguments{
10 | \item{metadata}{description of the data.}
11 | 
12 | \item{format}{any known format or "all" to save a copy as all
13 | known formats}
14 | 
15 | \item{create_directory}{boolean indicating whether directories
16 | should be created.}
17 | }
18 | \value{
19 | a boolean indicating success
20 | }
21 | \description{
22 | Writes the data to the 'clean' area
23 | }
24 | \author{
25 | Neale Swinnerton \href{mailto:neale@mastodonc.com}{neale@mastodonc.com}
26 | }
27 | 


--------------------------------------------------------------------------------
/man/ons_api_call.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ons.R
 3 | \name{ons_api_call}
 4 | \alias{ons_api_call}
 5 | \title{Call the ONS API}
 6 | \usage{
 7 | ons_api_call(url)
 8 | }
 9 | \arguments{
10 | \item{url}{url to call @seeAlso \code{\link{[api_base_url]}}}
11 | }
12 | \value{
13 | a list contained the API call results
14 | }
15 | \description{
16 | Make request to given url, which is assumed to be the ONS api.
17 | }
18 | \details{
19 | data retrieved is converted to tidyverse tibble if possible.
20 | }
21 | \author{
22 | Neale Swinnerton \href{mailto:neale@mastodonc.com}{neale@mastodonc.com}
23 | }
24 | 


--------------------------------------------------------------------------------
/man/ons_available_datasets.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ons.R
 3 | \name{ons_available_datasets}
 4 | \alias{ons_available_datasets}
 5 | \title{Available Datasets}
 6 | \usage{
 7 | ons_available_datasets()
 8 | }
 9 | \value{
10 | list of available datasets and associated metadata
11 | }
12 | \description{
13 | Retrieves a dataframe describing the datasets available from ONS via the API.
14 | }
15 | \examples{
16 | \dontrun{
17 | # return information on all available datasets and then filter on specific id
18 | datasets <- ons_available_datasets()
19 | 
20 | datasets \%>\% 
21 | filter(id='health-accounts')
22 | }
23 | \dontrun{
24 | # display just the ids
25 | ons_available_datasets() \%>\% select(id)
26 | }
27 | }
28 | \author{
29 | Neale Swinnerton \href{mailto:neale@mastodonc.com}{neale@mastodonc.com}
30 | }
31 | 


--------------------------------------------------------------------------------
/man/ons_available_editions.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ons.R
 3 | \name{ons_available_editions}
 4 | \alias{ons_available_editions}
 5 | \title{Available Editions}
 6 | \usage{
 7 | ons_available_editions(id)
 8 | }
 9 | \arguments{
10 | \item{id}{dataset identifier. Valid values from \code{ons_available_datasets(...)}}
11 | }
12 | \value{
13 | a list of edition identifiers
14 | }
15 | \description{
16 | Available Editions
17 | }
18 | \examples{
19 | \dontrun{
20 | ons_available_editions(id = 'mid-year-pop-est')
21 | }
22 | }
23 | \author{
24 | Neale Swinnerton \href{mailto:neale@mastodonc.com}{neale@mastodonc.com}
25 | }
26 | 


--------------------------------------------------------------------------------
/man/ons_available_versions.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ons.R
 3 | \name{ons_available_versions}
 4 | \alias{ons_available_versions}
 5 | \title{Available Versions}
 6 | \usage{
 7 | ons_available_versions(id, edition)
 8 | }
 9 | \arguments{
10 | \item{id}{dataset identifier. Valid values from \code{ons_available_datasets(...)}}
11 | 
12 | \item{edition}{edition identifier. Valid values from \code{ons_available_editions(...)}}
13 | }
14 | \value{
15 | a list of version identifiers
16 | }
17 | \description{
18 | Available Versions
19 | }
20 | \examples{
21 | \dontrun{
22 |  ons_available_versions(id = "regional-gdp-by-quarter", edition = "time-series") 
23 |  }
24 | }
25 | \author{
26 | Neale Swinnerton \href{mailto:neale@mastodonc.com}{neale@mastodonc.com}
27 | }
28 | 


--------------------------------------------------------------------------------
/man/ons_dataset_by_id.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ons.R
 3 | \name{ons_dataset_by_id}
 4 | \alias{ons_dataset_by_id}
 5 | \title{Dataset By Id}
 6 | \usage{
 7 | ons_dataset_by_id(metadata, id, edition, version)
 8 | }
 9 | \arguments{
10 | \item{metadata}{data describing the dataset}
11 | 
12 | \item{id}{the identifier of the dataset. Valid values from \code{ons_available_datasets()}}
13 | 
14 | \item{edition}{the edition of the dataset (if empty, select latest). Valid values from \code{ons_available_editions(...)}}
15 | 
16 | \item{version}{the version of the dataset (if empty, select latest). Valid values from \code{ons_available_available(...)}}
17 | }
18 | \value{
19 | a dataframe describing the dataset.
20 | }
21 | \description{
22 | Retrieve the metadata for the given dataset.
23 | }
24 | \details{
25 | Makes calls to the ONS API and retrieves the metadata for the
26 | datasets. The dataset selection can be refined via the edition and
27 | version parameters
28 | }
29 | \author{
30 | Neale Swinnerton \href{mailto:neale@mastodonc.com}{neale@mastodonc.com}
31 | }
32 | 


--------------------------------------------------------------------------------
/man/ons_datasets_setup.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ons.R
 3 | \name{ons_datasets_setup}
 4 | \alias{ons_datasets_setup}
 5 | \title{Datasets Setup}
 6 | \usage{
 7 | ons_datasets_setup(defaults)
 8 | }
 9 | \arguments{
10 | \item{defaults}{a list with folder system.  Valid values from \code{monstr_pipeline_defaults(...)}}
11 | }
12 | \value{
13 | a list describing available datasets
14 | }
15 | \description{
16 | This returns a dataframe containing details that can be passed to
17 | other fns in this package for further processing
18 | }
19 | \examples{
20 | \dontrun{
21 | monstr_pipeline_defaults() \%>\% 
22 |  ons_datasets_setup() # rooted in current project
23 | }
24 | \dontrun{
25 | monstr_pipeline_defaults(download_root="/path/to/download/root/") \%>\% 
26 |      ons_datasets_setup()
27 | }
28 | }
29 | \author{
30 | Neale Swinnerton \href{mailto:neale@mastodonc.com}{neale@mastodonc.com}
31 | }
32 | 


--------------------------------------------------------------------------------
/man/ons_download.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ons.R
 3 | \name{ons_download}
 4 | \alias{ons_download}
 5 | \title{Download}
 6 | \usage{
 7 | ons_download(metadata, format = "csv")
 8 | }
 9 | \arguments{
10 | \item{metadata}{data describing the download}
11 | 
12 | \item{format}{a valid format for the download}
13 | }
14 | \description{
15 | \code{ons_download} retrieves the data described by the given df
16 | }
17 | 


--------------------------------------------------------------------------------
/man/pipe.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils-pipe.R
 3 | \name{\%>\%}
 4 | \alias{\%>\%}
 5 | \title{Pipe operator}
 6 | \usage{
 7 | lhs \%>\% rhs
 8 | }
 9 | \description{
10 | See \code{magrittr::\link[magrittr]{\%>\%}} for details.
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/man/safe_download.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/util.R
 3 | \name{safe_download}
 4 | \alias{safe_download}
 5 | \title{Safe Download
 6 | 
 7 | Downloads a file and tries hard to tidy up in the event of
 8 | errors. Since these files are typically large we don't want to
 9 | leave them in temp directories.
10 | 
11 | The destfile should only appear if the download was successful.}
12 | \usage{
13 | safe_download(url, destfile, fvalidate)
14 | }
15 | \arguments{
16 | \item{url}{src for the download}
17 | 
18 | \item{destfile}{destination filename}
19 | 
20 | \item{fvalidate}{a fn that is passed the filename after download
21 | to validate it in some way. The fn should return TRUE if the
22 | file is valid.}
23 | }
24 | \description{
25 | Safe Download
26 | 
27 | Downloads a file and tries hard to tidy up in the event of
28 | errors. Since these files are typically large we don't want to
29 | leave them in temp directories.
30 | 
31 | The destfile should only appear if the download was successful.
32 | }
33 | 


--------------------------------------------------------------------------------
/man/write_csv.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/util.R
 3 | \name{write_csv}
 4 | \alias{write_csv}
 5 | \title{write the data as a csv.}
 6 | \usage{
 7 | write_csv(data, monstr, create_directory)
 8 | }
 9 | \arguments{
10 | \item{data}{The actual data}
11 | 
12 | \item{monstr}{metadata dataframe created by the pipeline}
13 | 
14 | \item{create_directory}{boolean indicating whether to
15 | (recursively) create the directory hierarchy.}
16 | }
17 | \value{
18 | boolean indicating success
19 | }
20 | \description{
21 | write the data as a csv.
22 | }
23 | \author{
24 | Neale Swinnerton <neale@mastodonc.com
25 | }
26 | 


--------------------------------------------------------------------------------
/man/write_metadata.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/util.R
 3 | \name{write_metadata}
 4 | \alias{write_metadata}
 5 | \title{Write Metadata
 6 | 
 7 | \code{(write_metadata)} writes some metadata about where the file
 8 | came from.  TODO - could do this with fs xattr, but maybe that's
 9 | not well known by users?}
10 | \usage{
11 | write_metadata(metadata, destfile)
12 | }
13 | \arguments{
14 | \item{metadata}{a dataframe containing metadata}
15 | 
16 | \item{destfile}{filename into which the metadata should be written
17 | as JSON}
18 | }
19 | \description{
20 | Write Metadata
21 | 
22 | \code{(write_metadata)} writes some metadata about where the file
23 | came from.  TODO - could do this with fs xattr, but maybe that's
24 | not well known by users?
25 | }
26 | 


--------------------------------------------------------------------------------
/man/write_rds.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/util.R
 3 | \name{write_rds}
 4 | \alias{write_rds}
 5 | \title{write the data as a RDS.}
 6 | \usage{
 7 | write_rds(data, monstr, create_directory)
 8 | }
 9 | \arguments{
10 | \item{data}{The actual data}
11 | 
12 | \item{monstr}{metadata dataframe created by the pipeline}
13 | 
14 | \item{create_directory}{boolean indicating whether to
15 | (recursively) create the directory hierarchy.}
16 | }
17 | \value{
18 | boolean indicating success
19 | }
20 | \description{
21 | write the data as a RDS.
22 | }
23 | \author{
24 | Neale Swinnerton \href{mailto:neale@mastodonc.com}{neale@mastodonc.com}
25 | }
26 | 


--------------------------------------------------------------------------------
/man/write_xlsx.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/util.R
 3 | \name{write_xlsx}
 4 | \alias{write_xlsx}
 5 | \title{write the data as a xlsx.}
 6 | \usage{
 7 | write_xlsx(data, monstr, create_directory)
 8 | }
 9 | \arguments{
10 | \item{data}{The actual data}
11 | 
12 | \item{monstr}{metadata dataframe created by the pipeline}
13 | 
14 | \item{create_directory}{boolean indicating whether to
15 | (recursively) create the directory hierarchy.}
16 | }
17 | \value{
18 | boolean indicating success
19 | }
20 | \description{
21 | write the data as a xlsx.
22 | }
23 | \author{
24 | Neale Swinnerton \href{mailto:neale@mastodonc.com}{neale@mastodonc.com}
25 | }
26 | 


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-120x120.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HFAnalyticsLab/monstR/4f428e0ea5f896108e3ac78488d50a33edc8af65/pkgdown/favicon/apple-touch-icon-120x120.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-152x152.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HFAnalyticsLab/monstR/4f428e0ea5f896108e3ac78488d50a33edc8af65/pkgdown/favicon/apple-touch-icon-152x152.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-180x180.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HFAnalyticsLab/monstR/4f428e0ea5f896108e3ac78488d50a33edc8af65/pkgdown/favicon/apple-touch-icon-180x180.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-60x60.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HFAnalyticsLab/monstR/4f428e0ea5f896108e3ac78488d50a33edc8af65/pkgdown/favicon/apple-touch-icon-60x60.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-76x76.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HFAnalyticsLab/monstR/4f428e0ea5f896108e3ac78488d50a33edc8af65/pkgdown/favicon/apple-touch-icon-76x76.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HFAnalyticsLab/monstR/4f428e0ea5f896108e3ac78488d50a33edc8af65/pkgdown/favicon/apple-touch-icon.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HFAnalyticsLab/monstR/4f428e0ea5f896108e3ac78488d50a33edc8af65/pkgdown/favicon/favicon-16x16.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HFAnalyticsLab/monstR/4f428e0ea5f896108e3ac78488d50a33edc8af65/pkgdown/favicon/favicon-32x32.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HFAnalyticsLab/monstR/4f428e0ea5f896108e3ac78488d50a33edc8af65/pkgdown/favicon/favicon.ico


--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 | 


--------------------------------------------------------------------------------
/vignettes/merged_table.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HFAnalyticsLab/monstR/4f428e0ea5f896108e3ac78488d50a33edc8af65/vignettes/merged_table.PNG


--------------------------------------------------------------------------------
/vignettes/mortality-rate-using-population-and-deaths.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Using the monstR package to estimate the mortality rate in England and Wales"
  3 | author: "Sebastien Peytrignet"
  4 | date: "`r Sys.Date()`"
  5 | output: rmarkdown::html_vignette
  6 | vignette: >
  7 |   %\VignetteIndexEntry{Mortality rate}
  8 |   %\VignetteEngine{knitr::rmarkdown}
  9 |   %\VignetteEncoding{UTF-8}
 10 | ---
 11 | 
 12 | ```{r setup, include = FALSE}
 13 | knitr::opts_chunk$set(
 14 |   collapse = TRUE,
 15 |   comment = "#>"
 16 | )
 17 | 
 18 | # install.packages("dplyr")
 19 | # install.packages("data.table")
 20 | # install.packages("ggplot2")
 21 | # install.packages("here")
 22 | 
 23 | # library(monstR)
 24 | # library(dplyr)
 25 | # library(data.table)
 26 | library(here)
 27 | ```
 28 | 
 29 | ## Vignette background
 30 | 
 31 | The `monstR` package interacts with the UK's Office for National Statistics (ONS) API, enabling us to easily extract their datasets for analysis. In this vignette, we will demonstrate how to use `monstR` to download the following ONS tables:
 32 | 
 33 | - Mid-year population estimates by region, age and gender
 34 | - Deaths registered weekly in England and Wales by region
 35 | 
 36 | We will then merge both of those datasets to compute a time-series of mortality rates in England and Wales by sub-region. We will also plot those statistics.
 37 | 
 38 | ## Find identifiers for desired ONS datasets
 39 | 
 40 | The function `ons_available_datasets()` is useful to find out what datasets are available through the ONS API.
 41 | 
 42 | ```{r,eval=FALSE}
 43 | datasets <- ons_available_datasets()
 44 | ```
 45 | 
 46 | 
 47 | Each dataset is associated with an identifier, which we can retrieve by inspecting the output from `ons_available_datasets()`. First, we will extract the identifier for the desired population dataset ('Mid-year population estimates by region, age and gender').
 48 | 
 49 | ```{r,eval=FALSE}
 50 | population_id <- datasets %>% 
 51 |   filter(str_detect(tolower(title),'population estimates for uk')) %>% 
 52 |   pull(id)
 53 | 
 54 | population_id
 55 | 
 56 | datasets %>%
 57 |   filter(id %in% population_id) %>%
 58 |   select(title)
 59 | ```
 60 | 
 61 | Then, we will do the same for our desired dataset recording deaths by region.
 62 | 
 63 | ```{r,eval=FALSE}
 64 | mortality_id <- datasets %>% 
 65 |   filter(str_detect(tolower(title),'deaths')) %>%
 66 |   filter(str_detect(tolower(title),'by region')) %>%
 67 |   pull(id)
 68 | 
 69 | mortality_id
 70 | 
 71 | datasets %>%
 72 |   filter(id %in% mortality_id) %>%
 73 |   select(title)
 74 | ```
 75 | 
 76 | ## Browse editions and versions available for each dataset
 77 | 
 78 | ONS datasets are usually associated with multiple editions and versions. Different editions may contain different variables or be presented in different formats, while versions usually refresh or update the content with new data points. Before downloading a dataset with `monstR`, it is recommended to know beforehand which edition and version you would like to donwload. 
 79 | 
 80 | The function `ons_available_editions` returns available editions, while `ons_available_versions` return available versions.
 81 | 
 82 | We will check the editions and versions associated with the population dataset.
 83 | 
 84 | ```{r,eval=FALSE}
 85 | ### Editions available for the population dataset
 86 | ids_and_editions_pop <- map(population_id, ons_available_editions) %>% 
 87 |   set_names(population_id) %>% 
 88 |   bind_rows(.id='id') %>%
 89 |   mutate(.,id_edition=paste(id,edition,sep="-"))
 90 | 
 91 | ids_and_editions_pop
 92 | 
 93 | ### Versions available for each edition
 94 | ids_and_editions_and_versions_pop <- mapply(id=ids_and_editions_pop$id,
 95 |                                         edition=ids_and_editions_pop$edition,
 96 |                                         ons_available_versions)
 97 | names(ids_and_editions_and_versions_pop) <- ids_and_editions_pop$id_edition
 98 | 
 99 | ids_and_editions_and_versions_pop
100 | ```
101 | 
102 | From the dataset with identifier `mid-year-pop` we would like to download edition `mid-2018-april-2019-geography` and version `1`.
103 | 
104 | We will also explore editins and vesions for the deaths dataset.
105 | 
106 | ```{r,eval=FALSE}
107 | ### Editions available for the deaths dataset
108 | ids_and_editions_deaths <- map(mortality_id, ons_available_editions) %>% 
109 |   set_names(mortality_id) %>% 
110 |   bind_rows(.id='id') %>%
111 |   mutate(.,id_edition=paste(id,edition,sep="-"))
112 | 
113 | ids_and_editions_deaths
114 | 
115 | ### Versions available for each edition
116 | ids_and_editions_and_versions_deaths <- mapply(id=ids_and_editions_deaths$id,
117 |                                         edition=ids_and_editions_deaths$edition,
118 |                                         ons_available_versions)
119 | names(ids_and_editions_and_versions_deaths) <- ids_and_editions_deaths$id_edition
120 | 
121 | ids_and_editions_and_versions_deaths
122 | ```
123 | 
124 | From the dataset with identifier `weekly-deaths-local-authority` we would like to download edition `2010-19` and version `1`.
125 | 
126 | ## Download both ONS datasets
127 | 
128 | After inspection in the previous step, we have decided which edition and version pair we would like to download for each dataset.
129 | 
130 | The following command sets up the pipeline using the `monstR` default settings.
131 | 
132 | ```{r,eval=FALSE}
133 | set_up_df <- monstr_pipeline_defaults() %>% 
134 |   ons_datasets_setup()
135 | ```
136 | 
137 | We feed the edition and version to function `ons_dataset_by_id`, while 
138 | `ons_download` downloads the data. Additional functions `monstr_read_file`, `monstr_clean` and `monstr_write_clean` read, clean and write the desired datasets.
139 | 
140 | We can now download our population dataset.
141 | 
142 | ```{r echo=TRUE, eval=FALSE,results='hide'}
143 | set_up_df %>%
144 | ons_dataset_by_id(id=population_id,edition="mid-2019-april-2020-geography",version=1)  %>%
145 |   ons_download(format="csv") %>%
146 |   monstr_read_file() %>%  
147 |   monstr_clean() %>%
148 |   monstr_write_clean(format="all")
149 | ```
150 | 
151 | And our deaths dataset.
152 | 
153 | ```{r echo=TRUE, eval=FALSE,results='hide'}
154 | set_up_df %>%
155 |   ons_dataset_by_id(id=mortality_id,edition="2010-19",version=1)  %>%
156 |   ons_download(format="csv") %>%
157 |   monstr_read_file() %>%  
158 |   monstr_clean() %>%
159 |   monstr_write_clean(format="all")
160 | ```
161 | 
162 | ## Import and clean population dataset
163 | 
164 | The cleaned data can be found in your root project folder under `data/clean/ons/`.
165 | 
166 | ```{r,eval=FALSE}
167 | ons_midyear_pop <- fread(here("data","clean","ons","mid-year-pop-est","mid-2019-april-2020-geography","mid-year-pop-est-v1.csv"), header=TRUE, sep=",", check.names=TRUE) %>%
168 |   rename(.,population=v4_0)
169 | ```
170 | 
171 | For this analysis, we will keep the number of deaths for all ages and both genders in each year and region.
172 | 
173 | ```{r,eval=FALSE}
174 | ons_midyear_pop_total <- filter(ons_midyear_pop,age=="Total"&sex=="All") %>%
175 |   select(.,population,calendar_years,admin_geography) %>%
176 |   arrange(.,admin_geography,desc(calendar_years))
177 | 
178 | knitr::kable(head(ons_midyear_pop_total, 10))
179 | ```
180 | 
181 | ```{r,echo=FALSE,out.width = "50%", fig.pos="h"}
182 | knitr::include_graphics(here('vignettes','pop_table.PNG'),dpi=500)
183 | ```
184 | 
185 | ## Import and clean deaths dataset
186 | 
187 | ```{r,eval=FALSE}
188 | ons_weekly_deaths_region <- fread(here("data","clean","ons","weekly-deaths-region","2010-19","weekly-deaths-region-v1.csv"), header=TRUE, sep=",", check.names=TRUE) %>%
189 |   rename(.,nr_deaths=v4_1) %>%
190 |   arrange(.,admin_geography,desc(calendar_years))
191 | 
192 | knitr::kable(head(ons_weekly_deaths_region, 10))
193 | ```
194 | 
195 | ```{r,echo=FALSE,,out.width = "50%", fig.pos="h"}
196 | knitr::include_graphics(here('vignettes','pop_deaths.PNG'),dpi=500)
197 | ```
198 | 
199 | This data is currently presented as a weekly time series, with one row per week. We will aggregate it into a yearly time series, thus matching the ONS population dataset.
200 | 
201 | ```{r,eval=FALSE}
202 | ons_weekly_deaths_region <- as.data.table(ons_weekly_deaths_region)
203 | 
204 | ons_weekly_deaths_region_annual <- ons_weekly_deaths_region[, list(
205 |   geography  = first(geography),
206 |   nr_deaths=sum(nr_deaths,na.rm=TRUE)),
207 |                          by = list(calendar_years,admin_geography)]
208 | 
209 | knitr::kable(head(ons_weekly_deaths_region_annual, 10))
210 | ```
211 | 
212 | ```{r,echo=FALSE,out.width = "50%", fig.pos="h"}
213 | knitr::include_graphics(here('vignettes','pop_deaths_aggregate.PNG'),dpi=500)
214 | ```
215 | 
216 | ## Merge datasets
217 | 
218 | We are now ready to merge our population data into our deaths data, which will allow us to compute a new variable: the death rate per 100,000 residents for a given region and year.
219 | 
220 | ```{r,eval=FALSE}
221 | ons_weekly_deaths_region_annual <- left_join(ons_weekly_deaths_region_annual,
222 |                                              ons_midyear_pop_total,
223 |                                              by=c("calendar_years" = "calendar_years",
224 |                                                   "admin_geography" = "admin_geography"))
225 | 
226 | ons_weekly_deaths_region_annual <- mutate(ons_weekly_deaths_region_annual,
227 |                                           deaths_per_100K=nr_deaths/population*100000)
228 | 
229 | knitr::kable(head(ons_weekly_deaths_region_annual, 10))
230 | ```
231 | 
232 | ```{r,echo=FALSE,out.width = "50%", fig.pos="h"}
233 | knitr::include_graphics(here('vignettes','merged_table.PNG'),dpi=500)
234 | ```
235 | 
236 | ## Present results in a chart
237 | 
238 | Having used the `monstR` package to download our required ONS tables and processed the data, we are ready to display the mortality rate by year according to region.
239 | 
240 | ```{r, eval=FALSE}
241 | ons_weekly_deaths_region_annual$bold <- ifelse(ons_weekly_deaths_region_annual$geography=="England and Wales",1,0)
242 | 
243 | ggplot(ons_weekly_deaths_region_annual,
244 |                     aes(x=factor(calendar_years), y=deaths_per_100K, group=geography)) +
245 |   geom_line(aes(color=geography,size = factor(bold)))+
246 |   geom_point(aes(color=geography)) +
247 |   theme(text = element_text(size = 10),
248 |         panel.border = element_blank(),
249 |         panel.grid.major = element_blank(),
250 |         panel.grid.minor = element_blank(),
251 |         panel.background = element_blank(),
252 |         legend.key=element_blank()) +
253 |   ggtitle("Mortality rate by region in England and Wales") +
254 |   xlab("Year") + ylab("Number of deaths per 100,000") +
255 |   scale_color_brewer(palette="Set3",name = "Region") +
256 |   scale_size_manual(values = c(0.5,1.25),guide = "none") +
257 |   scale_y_continuous(labels = function(x) format(x, big.mark = ",",
258 |                                                  scientific = FALSE))
259 | ```
260 | 
261 | ```{r,echo=FALSE,out.width = "50%", fig.pos="h"}
262 | knitr::include_graphics(here('vignettes','mortality_by_region.png'),dpi=800)
263 | ```


--------------------------------------------------------------------------------
/vignettes/mortality_by_region.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HFAnalyticsLab/monstR/4f428e0ea5f896108e3ac78488d50a33edc8af65/vignettes/mortality_by_region.png


--------------------------------------------------------------------------------
/vignettes/pipeline.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Quick start"
  3 | author: "The Health Foundation Analytics Lab"
  4 | date: "`r Sys.Date()`"
  5 | output: rmarkdown::html_vignette
  6 | vignette: >
  7 |   %\VignetteIndexEntry{Vignette Title}
  8 |   %\VignetteEngine{knitr::rmarkdown}
  9 |   %\VignetteEncoding{UTF-8}
 10 | ---
 11 | 
 12 | 
 13 | 
 14 | ## Discover what is available:
 15 | 
 16 | There are a few helper functions to help you find out what data sets are available as well as the corresponding editions and versions The `ons_available_datasets()` function will return a dataframe with information about all available datasets. The `id` column is what you need to download a dataset.  
 17 | <!-- Important that eval = FALSE here, otherwise building the vignette downloads data from the ONS API, which is not desirable. -->
 18 | ```{r , eval = FALSE, include = TRUE}
 19 | 
 20 | datasets <-  ons_available_datasets()
 21 | 
 22 |  datasets() %>% 
 23 |   select(id)
 24 |                                     id
 25 | 1                               cpih01
 26 | 2                     mid-year-pop-est
 27 | 3                   ashe-table-7-hours
 28 | 4                ashe-table-7-earnings
 29 | 5                   ashe-table-8-hours
 30 | 6                ashe-table-8-earnings
 31 | 7                           opss-rates
 32 | 8                      opss-membership
 33 | 9                wellbeing-year-ending
 34 | 10           wellbeing-local-authority
 35 | ...
 36 | 
 37 | 
 38 | ```
 39 | 
 40 | Once you have picked a dataset, you need pick the edition you want. This can be done using `ons_available_editions()`.
 41 | ```{r, eval = FALSE, include = TRUE}
 42 | 
 43 | # Discover the available editions for a particular dataset
 44 | ons_available_editions(id = "mid-year-pop-est")
 45 | 
 46 | edition
 47 | <chr>
 48 | 1	mid-2018-april-2019-geography			
 49 | 2	mid-2019-april-2020-geography			
 50 | 3	time-series
 51 | 
 52 | 
 53 | 
 54 | ```
 55 | 
 56 | Finally, you need to find out what what versions are availble for a specific edition of a dataset. 
 57 | ```{r, eval = FALSE, include = TRUE}
 58 | # Discover the available versions for a particular edition
 59 | 
 60 | ons_available_versions("mid-year-pop-est", "time-series")
 61 | 
 62 |   version
 63 | 1       1
 64 | 2       2
 65 | 3       3
 66 | 4       4
 67 | 
 68 | ```
 69 | 
 70 | ## Download the data
 71 | 
 72 | You should now be ready to download the data. Start by specifying where you want the data to downloaded to. The `monstr_pipeline_defaults()` returns a default folder structure (without creating it). You can specify the a file path base using the `download_root` argument. If you do not specify `download_root`, the base file path will be your project root if you are using Rstudio projects and wherever you working directory is set to otherwise. The output from `monstr_pipeline_defaults()` is then fed to `ons_datasets_setup()` which queries the ONS API to get the relevant information to prepare for downloading the data. Finally, `ons_download()` downloads the data. The rest of the piped code reads in, cleans and saves a clean version of the data. 
 73 | ```{r, eval=FALSE, include=TRUE}
 74 | monstr_pipeline_defaults(download_root="/path/to/download/root/") %>% 
 75 |   ons_datasets_setup() %>% # Uses the monstr 'standards' for location and format
 76 | 	ons_dataset_by_id("weekly-deaths-local-authority") %>%
 77 |   ons_download(format="csv") %>%
 78 | 	monstr_read_file() %>%  
 79 | 	monstr_clean() %>%
 80 | 	monstr_write_clean(format="all")
 81 |   
 82 | ```
 83 | 
 84 | 
 85 | 
 86 | 
 87 | ## Further Examples
 88 | 
 89 | ### Download the latest weekly-deaths-local-authority data as a csv.
 90 | 
 91 | <!-- Important that eval = FALSE here, otherwise building the vignette downloads data from the ONS API, which is not desirable. -->
 92 | ```{r , eval = FALSE, include = TRUE}
 93 | ons_datasets_setup(monstr_pipeline_defaults()) %>%
 94 | 	ons_dataset_by_id("weekly-deaths-local-authority") %>%
 95 | 	ons_download(format="csv")
 96 | 
 97 | # file will be in `{{root}}/data/raw/ons/weekly-deaths-local-authority/time-series/vN.csv`
 98 | # metadata about the file will be in `{{root}}/data/raw/ons/weekly-deaths-local-authority/time-series/vN.csv.meta.json`
 99 | ```
100 | 
101 | ### Similarly it can be downloaded as an xls
102 | <!-- Important that eval = FALSE here, otherwise building the vignette downloads data from the ONS API, which is not desirable. -->
103 | ```{r , eval = FALSE, include = TRUE}
104 | ons_datasets_setup(monstr_pipeline_defaults()) %>%
105 | 	ons_dataset_by_id("weekly-deaths-local-authority") %>%
106 | 	ons_download(format="xls")
107 | ```
108 | 
109 | 
110 | ### Specific versions can be selected.
111 | <!-- Important that eval = FALSE here, otherwise building the vignette downloads data from the ONS API, which is not desirable. -->
112 | ```{r , eval = FALSE, include = TRUE}
113 | datasets <- ons_datasets_setup(monstr_pipeline_defaults())
114 | ## get the metadata about v4 of the time-series edition of weekly-deaths-local-authority dataset.
115 | wdla4_meta <- datasets %>% ons_dataset_by_id("weekly-deaths-local-authority", edition="time-series", version=4)
116 | 
117 | # download it
118 | wdla4_meta  %>% 
119 |   monstr_pipeline_defaults() %>% 
120 |   ons_download(format="csv")
121 | 
122 | # Or get the latest
123 | wdla_latest <- datasets %>% ons_dataset_by_id("weekly-deaths-local-authority", edition="time-series")
124 | 
125 | 
126 | # csv for the web meta data about the schema of the data.
127 | wdla_latest %>% ons_download(format="csv")
128 | 
129 | ```
130 | 


--------------------------------------------------------------------------------
/vignettes/pop_deaths.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HFAnalyticsLab/monstR/4f428e0ea5f896108e3ac78488d50a33edc8af65/vignettes/pop_deaths.PNG


--------------------------------------------------------------------------------
/vignettes/pop_deaths_aggregate.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HFAnalyticsLab/monstR/4f428e0ea5f896108e3ac78488d50a33edc8af65/vignettes/pop_deaths_aggregate.PNG


--------------------------------------------------------------------------------
/vignettes/pop_table.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HFAnalyticsLab/monstR/4f428e0ea5f896108e3ac78488d50a33edc8af65/vignettes/pop_table.PNG


--------------------------------------------------------------------------------