├── .Rbuildignore ├── .github ├── .gitignore ├── CONTRIBUTING.md └── workflows │ ├── R-CMD-check.yaml │ └── test-coverage.yaml ├── .gitignore ├── DESCRIPTION ├── LICENSE.md ├── NAMESPACE ├── NEWS.md ├── R ├── aggregate_data.R ├── aggregation_functions.R ├── daiquiri-package.R ├── field_types.R ├── main.R ├── reports.R ├── source_data.R ├── utilities.R └── zzz.R ├── README.Rmd ├── README.md ├── _pkgdown.yml ├── codecov.yml ├── codemeta.json ├── daiquiri.Rproj ├── devtesting ├── changedetectiontesting.R └── testoutput │ └── .gitignore ├── docs ├── 404.html ├── CONTRIBUTING.html ├── LICENSE.html ├── apple-touch-icon-120x120.png ├── apple-touch-icon-152x152.png ├── apple-touch-icon-180x180.png ├── apple-touch-icon-60x60.png ├── apple-touch-icon-76x76.png ├── apple-touch-icon.png ├── articles │ ├── daiquiri.html │ ├── example_prescriptions.html │ ├── example_prescriptions_stratified.html │ ├── example_report.html │ ├── example_report_stratified.html │ └── index.html ├── authors.html ├── deps │ ├── bootstrap-5.1.3 │ │ ├── bootstrap.bundle.min.js │ │ ├── bootstrap.bundle.min.js.map │ │ └── bootstrap.min.css │ ├── data-deps.txt │ └── jquery-3.6.0 │ │ ├── jquery-3.6.0.js │ │ ├── jquery-3.6.0.min.js │ │ └── jquery-3.6.0.min.map ├── dev │ ├── .nojekyll │ ├── CONTRIBUTING.html │ ├── LICENSE.html │ ├── apple-touch-icon-120x120.png │ ├── apple-touch-icon-152x152.png │ ├── apple-touch-icon-180x180.png │ ├── apple-touch-icon-60x60.png │ ├── apple-touch-icon-76x76.png │ ├── apple-touch-icon.png │ ├── articles │ │ ├── daiquiri.html │ │ ├── example_report.html │ │ ├── example_report_stratified.html │ │ └── index.html │ ├── authors.html │ ├── deps │ │ ├── bootstrap-5.1.3 │ │ │ ├── bootstrap.bundle.min.js │ │ │ ├── bootstrap.bundle.min.js.map │ │ │ └── bootstrap.min.css │ │ ├── data-deps.txt │ │ └── jquery-3.6.0 │ │ │ ├── jquery-3.6.0.js │ │ │ ├── jquery-3.6.0.min.js │ │ │ └── jquery-3.6.0.min.map │ ├── favicon-16x16.png │ ├── favicon-32x32.png │ ├── favicon.ico │ ├── index.html │ ├── link.svg │ ├── logo.png │ ├── news │ │ └── index.html │ ├── pkgdown.js │ ├── pkgdown.yml │ ├── reference │ │ ├── Rplot001.png │ │ ├── aggregate_data.html │ │ ├── close_log.html │ │ ├── daiquiri-package.html │ │ ├── daiquiri_report.html │ │ ├── export_aggregated_data.html │ │ ├── field_types.html │ │ ├── field_types_available.html │ │ ├── figures │ │ │ ├── antibiotics_day_DurationEnteredByPrescriber_missing_perc.png │ │ │ ├── bchem_creatinine_day_Value_mean.png │ │ │ ├── example_prescriptions_aggregated_valuespresent.png │ │ │ ├── example_prescriptions_allfields_missing_perc.png │ │ │ ├── example_prescriptions_head.png │ │ │ ├── example_prescriptions_source_fieldsimported.png │ │ │ ├── example_prescriptions_stratified_midnight_perc.png │ │ │ ├── example_prescriptions_stratified_strata.png │ │ │ └── logo.png │ │ ├── index.html │ │ ├── initialise_log.html │ │ ├── prepare_data.html │ │ ├── read_data.html │ │ ├── report_data.html │ │ └── template_field_types.html │ ├── search.json │ └── sitemap.xml ├── favicon-16x16.png ├── favicon-32x32.png ├── favicon.ico ├── index.html ├── link.svg ├── logo.png ├── news │ └── index.html ├── pkgdown.js ├── pkgdown.yml ├── reference │ ├── Rplot001.png │ ├── aggregate_data.html │ ├── close_log.html │ ├── daiquiri-package.html │ ├── daiquiri_report.html │ ├── export_aggregated_data.html │ ├── field_types.html │ ├── field_types_available.html │ ├── figures │ │ ├── antibiotics_day_DurationEnteredByPrescriber_missing_perc.png │ │ ├── bchem_creatinine_day_Value_mean.png │ │ ├── example_prescriptions_aggregated_valuespresent.png │ │ ├── example_prescriptions_allfields_missing_perc.png │ │ ├── example_prescriptions_head.png │ │ ├── example_prescriptions_source_fieldsimported.png │ │ ├── example_prescriptions_stratified_midnight_perc.png │ │ ├── example_prescriptions_stratified_strata.png │ │ └── logo.png │ ├── index.html │ ├── initialise_log.html │ ├── prepare_data.html │ ├── read_data.html │ ├── report_data.html │ └── template_field_types.html ├── search.json └── sitemap.xml ├── inst ├── CITATION ├── extdata │ └── example_prescriptions.csv └── rmd │ └── report_htmldoc.Rmd ├── man ├── aggregate_data.Rd ├── close_log.Rd ├── daiquiri-package.Rd ├── daiquiri_report.Rd ├── export_aggregated_data.Rd ├── field_types.Rd ├── field_types_advanced.Rd ├── field_types_available.Rd ├── figures │ ├── antibiotics_day_DurationEnteredByPrescriber_missing_perc.png │ ├── bchem_creatinine_day_Value_mean.png │ ├── example_prescriptions_aggregated_valuespresent.png │ ├── example_prescriptions_allfields_missing_perc.png │ ├── example_prescriptions_head.png │ ├── example_prescriptions_source_fieldsimported.png │ ├── example_prescriptions_stratified_midnight_perc.png │ ├── example_prescriptions_stratified_strata.png │ └── logo.png ├── initialise_log.Rd ├── prepare_data.Rd ├── read_data.Rd ├── report_data.Rd └── template_field_types.Rd ├── paper ├── bchem_creatinine_day_Value_mean.png ├── example_prescriptions_admdate_missing_perc.png ├── example_prescriptions_aggregated_valuespresent.png ├── example_prescriptions_head.png ├── paper.bib └── paper.md ├── pkgdown └── favicon │ ├── apple-touch-icon-120x120.png │ ├── apple-touch-icon-152x152.png │ ├── apple-touch-icon-180x180.png │ ├── apple-touch-icon-60x60.png │ ├── apple-touch-icon-76x76.png │ ├── apple-touch-icon.png │ ├── favicon-16x16.png │ ├── favicon-32x32.png │ └── favicon.ico ├── renv.lock ├── renv ├── .gitignore ├── activate.R └── settings.dcf ├── tests ├── testthat.R └── testthat │ ├── _snaps │ ├── aggregate_data.md │ ├── aggregate_data │ │ ├── test_[ALL_FIELDS_COMBINED].csv │ │ ├── test_[DUPLICATES].csv │ │ ├── test_col1.csv │ │ └── test_col2.csv │ ├── field_types.md │ ├── main.md │ └── source_data.md │ ├── test-aggregate_data.R │ ├── test-aggregation_functions.R │ ├── test-field_types.R │ ├── test-main.R │ ├── test-reports.R │ ├── test-source_data.R │ ├── test-utilities.R │ └── testdata │ ├── completetestset.csv │ ├── completetestset.xlsx │ ├── specialchars_colnames.csv │ └── specialchars_excel.csv └── vignettes ├── .gitignore ├── articles ├── example_report.Rmd └── example_report_stratified.Rmd └── daiquiri.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^renv$ 2 | ^renv\.lock$ 3 | ^.*\.Rproj$ 4 | ^\.Rproj\.user$ 5 | ^devtesting$ 6 | ^README\.Rmd$ 7 | ^daiquiri_report_*\.html$ 8 | ^.*\.log$ 9 | ^\.github$ 10 | ^codecov\.yml$ 11 | ^codemeta\.json$ 12 | ^_pkgdown\.yml$ 13 | ^docs$ 14 | ^pkgdown$ 15 | ^LICENSE\.md$ 16 | ^vignettes/articles$ 17 | daiquiri_report_*_files 18 | daiquiri_temp_* 19 | ^paper$ 20 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to daiquiri 2 | 3 | ## Reporting issues 4 | 5 | Please report any bugs or suggestions by opening a [github issue](https://github.com/ropensci/daiquiri/issues). 6 | 7 | 8 | ## Development guidelines 9 | 10 | So far, daiquiri has been developed by a single person so there is not yet a system in place for collaboration. 11 | If you'd like to contribute changes please raise an issue first and then we can probably use [GitHub flow](https://guides.github.com/introduction/flow/). 12 | 13 | 14 | ## Code of Conduct 15 | 16 | Please note that this package is released with a [Contributor Code of Conduct](https://ropensci.org/code-of-conduct/). 17 | By contributing to this project, you agree to abide by its terms. 18 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: R-CMD-check 10 | 11 | permissions: read-all 12 | 13 | jobs: 14 | R-CMD-check: 15 | runs-on: ${{ matrix.config.os }} 16 | 17 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 18 | 19 | strategy: 20 | fail-fast: false 21 | matrix: 22 | config: 23 | - {os: macos-latest, r: 'release'} 24 | - {os: windows-latest, r: 'release'} 25 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 26 | - {os: ubuntu-latest, r: 'release'} 27 | - {os: ubuntu-latest, r: 'oldrel-1'} 28 | 29 | env: 30 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 31 | R_KEEP_PKG_SOURCE: yes 32 | 33 | steps: 34 | - uses: actions/checkout@v4 35 | 36 | - uses: r-lib/actions/setup-pandoc@v2 37 | 38 | - uses: r-lib/actions/setup-r@v2 39 | with: 40 | r-version: ${{ matrix.config.r }} 41 | http-user-agent: ${{ matrix.config.http-user-agent }} 42 | use-public-rspm: true 43 | 44 | - uses: r-lib/actions/setup-r-dependencies@v2 45 | with: 46 | extra-packages: any::rcmdcheck 47 | needs: check 48 | 49 | - uses: r-lib/actions/check-r-package@v2 50 | with: 51 | upload-snapshots: true 52 | build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' 53 | -------------------------------------------------------------------------------- /.github/workflows/test-coverage.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: test-coverage 10 | 11 | permissions: read-all 12 | 13 | jobs: 14 | test-coverage: 15 | runs-on: ubuntu-latest 16 | env: 17 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 18 | 19 | steps: 20 | - uses: actions/checkout@v4 21 | 22 | - uses: r-lib/actions/setup-r@v2 23 | with: 24 | use-public-rspm: true 25 | 26 | - uses: r-lib/actions/setup-r-dependencies@v2 27 | with: 28 | extra-packages: any::covr, any::xml2 29 | needs: coverage 30 | 31 | - name: Test coverage 32 | run: | 33 | cov <- covr::package_coverage( 34 | quiet = FALSE, 35 | clean = FALSE, 36 | install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") 37 | ) 38 | covr::to_cobertura(cov) 39 | shell: Rscript {0} 40 | 41 | - uses: codecov/codecov-action@v4 42 | with: 43 | fail_ci_if_error: ${{ github.event_name != 'pull_request' && true || false }} 44 | file: ./cobertura.xml 45 | plugin: noop 46 | disable_search: true 47 | token: ${{ secrets.CODECOV_TOKEN }} 48 | 49 | - name: Show testthat output 50 | if: always() 51 | run: | 52 | ## -------------------------------------------------------------------- 53 | find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true 54 | shell: bash 55 | 56 | - name: Upload test results 57 | if: failure() 58 | uses: actions/upload-artifact@v4 59 | with: 60 | name: coverage-test-failures 61 | path: ${{ runner.temp }}/package 62 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | ^daiquiri_report_*\.html$ 5 | *.log 6 | daiquiri_report_*_files 7 | daiquiri_temp_* 8 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: daiquiri 2 | Type: Package 3 | Title: Data Quality Reporting for Temporal Datasets 4 | Version: 1.1.1.9000 5 | Authors@R: c( 6 | person(c("T.", "Phuong"), "Quan", email = "phuong.quan@ndm.ox.ac.uk", 7 | role = c("aut", "cre"), comment = c(ORCID = "0000-0001-8566-1817")), 8 | person("Jack", "Cregan", role = "ctb"), 9 | person(family = "University of Oxford", role = "cph"), 10 | person(family = "National Institute for Health Research (NIHR)", role = "fnd"), 11 | person("Brad", "Cannell", role = "rev") 12 | ) 13 | Description: Generate reports that enable quick visual review of 14 | temporal shifts in record-level data. Time series plots showing aggregated 15 | values are automatically created for each data field (column) depending on its 16 | contents (e.g. min/max/mean values for numeric data, no. of distinct 17 | values for categorical data), as well as overviews for missing values, 18 | non-conformant values, and duplicated rows. The resulting reports are shareable 19 | and can contribute to forming a transparent record of the entire analysis process. 20 | It is designed with Electronic Health Records in mind, but can be used for 21 | any type of record-level temporal data (i.e. tabular data where each row represents 22 | a single "event", one column contains the "event date", and other columns 23 | contain any associated values for the event). 24 | URL: https://github.com/ropensci/daiquiri, https://ropensci.github.io/daiquiri/ 25 | BugReports: https://github.com/ropensci/daiquiri/issues 26 | License: GPL (>= 3) 27 | Encoding: UTF-8 28 | Imports: 29 | data.table (>= 1.12.8), 30 | readr (>= 2.0.0), 31 | ggplot2 (>= 3.1.0), 32 | scales (>= 1.1.0), 33 | cowplot (>= 0.9.3), 34 | rmarkdown, 35 | reactable (>= 0.2.3), 36 | utils, 37 | stats, 38 | xfun (>= 0.15) 39 | RoxygenNote: 7.3.1 40 | Suggests: 41 | covr, 42 | knitr, 43 | testthat (>= 3.0.0), 44 | codemetar 45 | VignetteBuilder: knitr 46 | Config/testthat/edition: 3 47 | Roxygen: list(markdown = TRUE) 48 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(print,daiquiri_aggregated_data) 4 | S3method(print,daiquiri_field_types) 5 | S3method(print,daiquiri_object) 6 | S3method(print,daiquiri_source_data) 7 | export(aggregate_data) 8 | export(close_log) 9 | export(daiquiri_report) 10 | export(export_aggregated_data) 11 | export(field_types) 12 | export(field_types_advanced) 13 | export(ft_categorical) 14 | export(ft_datetime) 15 | export(ft_freetext) 16 | export(ft_ignore) 17 | export(ft_numeric) 18 | export(ft_simple) 19 | export(ft_strata) 20 | export(ft_timepoint) 21 | export(ft_uniqueidentifier) 22 | export(initialise_log) 23 | export(prepare_data) 24 | export(read_data) 25 | export(report_data) 26 | export(template_field_types) 27 | importFrom(data.table,':=') 28 | importFrom(data.table,.EACHI) 29 | importFrom(data.table,.N) 30 | importFrom(data.table,.SD) 31 | importFrom(ggplot2,.data) 32 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # daiquiri (development version) 2 | 3 | ## New features 4 | 5 | * New `field_types_advanced()` function. Allows just a subset of the columns in the source df to be named explicitly in the specification, with the remaining columns set to the `.default_field_type` parameter. (#16) 6 | 7 | ## Bug fixes and minor improvements 8 | 9 | * Improved scaling of heatmaps when there are lots of fields. (#17) 10 | 11 | # daiquiri 1.1.1 (2023-07-18) 12 | 13 | ## New features 14 | 15 | * New `ft_strata()` field type, which when specified, produces a report where the aggregated data and individual data fields plots are stratified by the `ft_strata()` column's distinct values 16 | 17 | * Column-specific strings for missing values can now be set in the `field_types()` specification (#13) 18 | 19 | ## Bug fixes and minor improvements 20 | 21 | * Categorical data fields now show a heatmap plot as well as the individual time series plots when `aggregate_by_each_category` option is set to `TRUE` 22 | 23 | * Categorical data fields now retain special characters in labels when `aggregate_by_each_category` option is set to `TRUE` 24 | 25 | * Print method of `daiquiri_object` now displays the location the report was saved to 26 | 27 | * When a data field contains all missing values, this now shows correctly in the various tabs (#12) 28 | 29 | * When running package from within rmarkdown/quarto (rmd/qmd) files, the parent file can now contain a chunk labelled `setup` without causing an error. (#7) 30 | 31 | * Hex logo now appears on reports, adding dependency to `xfun` 32 | 33 | 34 | # daiquiri 1.0.3 (2022-12-06) 35 | 36 | ## Bug fixes and minor improvements 37 | 38 | * Validation warnings now match column names correctly when `field_types` are specified in a different order to the supplied data frame columns 39 | 40 | * Passing in a data frame containing integer columns no longer causes an aggregation error (#9) 41 | 42 | * Calling functions with package prefix no longer causes an error (#10) 43 | 44 | * Fixed (some) errors about duplicate chunk labels when running package from within rmarkdown/quarto (rmd/qmd) files (bug introduced in previous release 1.0.2). This now allows chunks in the parent file to be unlabelled but unfortunately still errors when there is a chunk labelled `setup`. (#7) 45 | 46 | 47 | # daiquiri 1.0.2 (2022-11-21) 48 | 49 | * When rendering reports, intermediate files are now written to `tempdir()` instead of to the directory of the `report_htmldoc.Rmd` file (the default behaviour of `rmarkdown::render()`). This fixes errors caused when the library location is read-only. 50 | 51 | 52 | # daiquiri 1.0.1 (2022-11-11) 53 | 54 | First release to CRAN 55 | 56 | * Replaced calls to deprecated function `aes_string()` in `ggplot2` 57 | 58 | 59 | # daiquiri 1.0.0 (2022-11-01) 60 | 61 | This release incorporates changes requested for acceptance into https://ropensci.org/. There are many breaking changes as objects have been renamed for better consistency and style. 62 | 63 | ## Breaking changes 64 | 65 | * `daiquiri_report()` replaces `create_report()` and some parameters have been renamed. 66 | 67 | * `field_types()` replaces `fieldtypes()`. 68 | 69 | * `prepare_data()`, `aggregate_data()`, and `report_data()` parameters have been renamed. 70 | 71 | * `initialise_log()` replaces `log_initialise()`. 72 | 73 | * `close_log()` replaces `log_close()`. 74 | 75 | * `template_field_types()` replaces `fieldtypes_template()` 76 | 77 | ## Bug fixes and minor improvements 78 | 79 | * Fixed error when user passes in a `data.table` (to `daiquiri_report()` or `prepare_data()`) that contains non-character columns. 80 | 81 | * `daiquiri_report` (formerly `create_report()`) and `report_data()` accept a new parameter `report_title`. 82 | 83 | * `report_data()` now accepts `...` parameter to be passed through to `rmarkdown::render()`. 84 | 85 | * `close_log()` now returns the path to the closed log file (if any). 86 | 87 | * `example_prescriptions.csv` replaces `example_dataset.csv` as the example dataset supplied with the package. 88 | 89 | # daiquiri 0.7.0 (2022-04-20) 90 | 91 | This release moves the reading of csv files out into a separate function in order to make it more configurable and to handle the parsing of all fields as character data for the user. 92 | 93 | ## Breaking changes 94 | 95 | * `create_report()` now only accepts a dataframe as the first parameter. The `textfile_contains_columnnames` parameter has been removed. 96 | 97 | * `load_data()` has been replaced with `read_data()` and `prepare_data()`. 98 | 99 | * `log_initialise()` function: `dirpath` parameter renamed to `log_directory`. 100 | 101 | ## New features 102 | 103 | * New function `read_data()` reads data from a delimited file, with all columns read in as character type. 104 | 105 | * New function `prepare_data()` validates a dataframe against a fieldtypes specification, and prepares it for aggregation. 106 | 107 | * `create_report()` accepts a new parameter `dataset_shortdesc` for the user to specify a dataset description to appear on the report. 108 | 109 | * `export_aggregated_data()` function accepts new `save_fileprefix` parameter. 110 | 111 | * New function `fieldtypes_template()` generates template code for creating a fieldtypes specification based on an existing dataframe, and outputs it to the console. 112 | 113 | ## Bug fixes and minor improvements 114 | 115 | * Fixed ALL_FIELDS_COMBINED calculated field rowsumming NAs incorrectly. 116 | 117 | * Fixed plots failing when all values are missing. 118 | 119 | * Fixed `log_message()` trying to write to different log file when called from Rmd folder (and relative path used). 120 | 121 | * Made '[DUPLICATES]' and '[ALL_FIELDS_COMBINED]' reserved names for data fields. 122 | 123 | * Allow column names in supplied dataframe to contain special characters. 124 | 125 | * Reduced real estate at top of report. 126 | 127 | * Removed datatype column and fixed validation warnings total from Source data tab in report. 128 | 129 | * Updated example data. 130 | 131 | * Added further validation checks for user-supplied params. 132 | 133 | * Added CITATION file. 134 | 135 | 136 | # daiquiri 0.6.1 (2022-02-23) 137 | 138 | Beta release. Complete list of functions exported: 139 | 140 | * `aggregate_data()` 141 | * `create_report()` accepts either a dataframe or csv filename as the first parameter. This may change in future. 142 | * `export_aggregated_data()` 143 | * `field_types()` 144 | * `ft_categorical()` 145 | * `ft_datetime()` 146 | * `ft_freetext()` 147 | * `ft_ignore()` 148 | * `ft_numeric()` 149 | * `ft_simple()` 150 | * `ft_timepoint()` 151 | * `ft_uniqueidentifier()` 152 | * `load_data()` accepts either a dataframe or csv filename as the first parameter. This may change in future. 153 | * `log_close()` 154 | * `log_initialise()` 155 | * `report_data()` 156 | -------------------------------------------------------------------------------- /R/daiquiri-package.R: -------------------------------------------------------------------------------- 1 | #' @keywords internal 2 | "_PACKAGE" 3 | 4 | ## usethis namespace: start 5 | ## usethis namespace: end 6 | NULL 7 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | # initialise package environment (doesn't work if put in .onLoad) 2 | package_environment <- new.env(parent = emptyenv()) 3 | # tell data.table that you as a package developer have designed your code to intentionally 4 | # rely on data.table functionality even though it is not mentioned in NAMESPACE file. 5 | # This is to ensure the "[" function works even though it can't be prefixed with data.table:: 6 | .datatable.aware <- TRUE 7 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | ```{r setup, include=FALSE} 8 | knitr::opts_chunk$set(echo = TRUE) 9 | ``` 10 | 11 | # daiquiri 12 | 13 | 14 | [![CRAN Status](https://www.r-pkg.org/badges/version/daiquiri)](https://cran.r-project.org/package=daiquiri) 15 | [![R-CMD-check](https://github.com/ropensci/daiquiri/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/ropensci/daiquiri/actions/workflows/R-CMD-check.yaml) 16 | [![Codecov test coverage](https://codecov.io/gh/ropensci/daiquiri/branch/master/graph/badge.svg)](https://app.codecov.io/gh/ropensci/daiquiri?branch=master) 17 | [![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) 18 | [![Status at rOpenSci Software Peer Review](https://badges.ropensci.org/535_status.svg)](https://github.com/ropensci/software-review/issues/535) 19 | [![JOSS paper](https://joss.theoj.org/papers/10.21105/joss.05034/status.svg)](https://doi.org/10.21105/joss.05034) 20 | 21 | 22 | The daiquiri package generates data quality reports that enable quick visual review of temporal shifts in record-level data. Time series plots showing aggregated values are automatically created for each data field (column) depending on its contents (e.g. min/max/mean values for numeric data, no. of distinct values for categorical data), as well as overviews for missing values, non-conformant values, and duplicated rows. 23 | 24 | Essentially, it takes input such as this: 25 | 26 | 27 | 28 | And outputs this: 29 | 30 | 31 | 32 | The resulting html reports are shareable and can contribute to forming a transparent record of the entire analysis process. It is designed with electronic health records in mind, but can be used for any type of record-level temporal data. 33 | 34 | ## Why should I use it? 35 | 36 | Large routinely-collected datasets are increasingly being used in research. However, given their data are collected for operational rather than research purposes, there is a greater-than-usual need for them to be checked for data quality issues before any analyses are conducted. Events occurring at the institutional level such as software updates, new machinery or processes can cause temporal artefacts that, if not identified and taken into account, can lead to biased results and incorrect conclusions. For example, the figures below show real data from a large hospital in the UK, and how it has changed over time. 37 | 38 | 39 | 40 | The first figure shows the percentage of missing values in the 'Duration' field of a dataset containing antibiotic prescriptions, and the second figure shows the mean value of all laboratory tests checking for levels of 'creatinine' in the blood. As you can see, there are points in time where these values shift up or down suddenly and unnaturally, indicating that something changed in the way the data was collected or processed. A careful researcher needs to take these sudden changes into account, particularly if comparing or combining the data before and after these 'change points'. 41 | 42 | While these checks should theoretically be conducted by the researcher at the initial data analysis stage, in practice it is unclear to what extent this is actually done, since it is rarely, if ever, reported in published papers. With the increasing drive towards greater transparency and reproducibility within the scientific community, this essential yet often-overlooked part of the analysis process will inevitably begin to come under greater scrutiny. The daiquiri package helps researchers conduct this part of the process more thoroughly, consistently and transparently, hence increasing the quality of their studies as well as trust in the scientific process. 43 | 44 | ## Installation 45 | 46 | ```{r, eval = FALSE} 47 | # install the latest release from CRAN 48 | install.packages("daiquiri") 49 | 50 | # or install the development version from rOpenSci 51 | install.packages("daiquiri", repos = "https://ropensci.r-universe.dev") 52 | 53 | # or install direct from source 54 | # install.packages("remotes") 55 | remotes::install_github("ropensci/daiquiri") 56 | ``` 57 | 58 | ## Usage 59 | 60 | ```{r} 61 | library(daiquiri) 62 | 63 | # load delimited file into a data.frame without doing any datatype conversion 64 | path <- system.file("extdata", "example_prescriptions.csv", package = "daiquiri") 65 | raw_data <- read_data(path, show_progress = FALSE) 66 | 67 | head(raw_data) 68 | 69 | # specify the type of data expected in each column of the data.frame 70 | fts <- field_types( 71 | PrescriptionID = ft_uniqueidentifier(), 72 | PrescriptionDate = ft_timepoint(), 73 | AdmissionDate = ft_datetime(includes_time = FALSE), 74 | Drug = ft_freetext(), 75 | Dose = ft_numeric(), 76 | DoseUnit = ft_categorical(), 77 | PatientID = ft_ignore(), 78 | Location = ft_categorical(aggregate_by_each_category = TRUE) 79 | ) 80 | ``` 81 | 82 | ```{r, eval = FALSE} 83 | # create a report in the current directory 84 | daiq_obj <- daiquiri_report( 85 | raw_data, 86 | field_types = fts 87 | ) 88 | ``` 89 | 90 | An [example report](https://ropensci.github.io/daiquiri/articles/example_prescriptions.html) is available from the [package website](https://ropensci.github.io/daiquiri/index.html). 91 | 92 | More detailed guidance can be found in the [walkthrough vignette](https://ropensci.github.io/daiquiri/articles/daiquiri.html): 93 | 94 | ```{r, eval = FALSE} 95 | vignette("daiquiri", package = "daiquiri") 96 | ``` 97 | 98 | ## How to cite this package 99 | 100 | > Quan, T. P., (2022). daiquiri: Data Quality Reporting for Temporal Datasets. Journal of Open Source Software, 7(80), 5034, https://doi.org/10.21105/joss.05034 101 | 102 | ## Acknowledgements 103 | 104 | This work was supported by the National Institute for Health Research Health Protection Research Unit (NIHR HPRU) in Healthcare Associated Infections and Antimicrobial Resistance at the University of Oxford in partnership with Public Health England (PHE) (NIHR200915), and by the NIHR Oxford Biomedical Research Centre. 105 | 106 | 107 | ## Contributing to this package 108 | 109 | Please report any bugs or suggestions by opening a [github issue](https://github.com/ropensci/daiquiri/issues). 110 | 111 | Please note that this package is released with a [Contributor Code of Conduct](https://ropensci.org/code-of-conduct/). 112 | By contributing to this project, you agree to abide by its terms. 113 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # daiquiri 5 | 6 | 7 | 8 | [![CRAN 9 | Status](https://www.r-pkg.org/badges/version/daiquiri)](https://cran.r-project.org/package=daiquiri) 10 | [![R-CMD-check](https://github.com/ropensci/daiquiri/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/ropensci/daiquiri/actions/workflows/R-CMD-check.yaml) 11 | [![Codecov test 12 | coverage](https://codecov.io/gh/ropensci/daiquiri/branch/master/graph/badge.svg)](https://app.codecov.io/gh/ropensci/daiquiri?branch=master) 13 | [![Project Status: Active – The project has reached a stable, usable 14 | state and is being actively 15 | developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) 16 | [![Status at rOpenSci Software Peer 17 | Review](https://badges.ropensci.org/535_status.svg)](https://github.com/ropensci/software-review/issues/535) 18 | [![JOSS 19 | paper](https://joss.theoj.org/papers/10.21105/joss.05034/status.svg)](https://doi.org/10.21105/joss.05034) 20 | 21 | 22 | The daiquiri package generates data quality reports that enable quick 23 | visual review of temporal shifts in record-level data. Time series plots 24 | showing aggregated values are automatically created for each data field 25 | (column) depending on its contents (e.g. min/max/mean values for numeric 26 | data, no. of distinct values for categorical data), as well as overviews 27 | for missing values, non-conformant values, and duplicated rows. 28 | 29 | Essentially, it takes input such as this: 30 | 31 | 32 | 33 | And outputs this: 34 | 35 | 36 | 37 | The resulting html reports are shareable and can contribute to forming a 38 | transparent record of the entire analysis process. It is designed with 39 | electronic health records in mind, but can be used for any type of 40 | record-level temporal data. 41 | 42 | ## Why should I use it? 43 | 44 | Large routinely-collected datasets are increasingly being used in 45 | research. However, given their data are collected for operational rather 46 | than research purposes, there is a greater-than-usual need for them to 47 | be checked for data quality issues before any analyses are conducted. 48 | Events occurring at the institutional level such as software updates, 49 | new machinery or processes can cause temporal artefacts that, if not 50 | identified and taken into account, can lead to biased results and 51 | incorrect conclusions. For example, the figures below show real data 52 | from a large hospital in the UK, and how it has changed over time. 53 | 54 | 55 | 56 | The first figure shows the percentage of missing values in the 57 | ‘Duration’ field of a dataset containing antibiotic prescriptions, and 58 | the second figure shows the mean value of all laboratory tests checking 59 | for levels of ‘creatinine’ in the blood. As you can see, there are 60 | points in time where these values shift up or down suddenly and 61 | unnaturally, indicating that something changed in the way the data was 62 | collected or processed. A careful researcher needs to take these sudden 63 | changes into account, particularly if comparing or combining the data 64 | before and after these ‘change points’. 65 | 66 | While these checks should theoretically be conducted by the researcher 67 | at the initial data analysis stage, in practice it is unclear to what 68 | extent this is actually done, since it is rarely, if ever, reported in 69 | published papers. With the increasing drive towards greater transparency 70 | and reproducibility within the scientific community, this essential yet 71 | often-overlooked part of the analysis process will inevitably begin to 72 | come under greater scrutiny. The daiquiri package helps researchers 73 | conduct this part of the process more thoroughly, consistently and 74 | transparently, hence increasing the quality of their studies as well as 75 | trust in the scientific process. 76 | 77 | ## Installation 78 | 79 | ``` r 80 | # install the latest release from CRAN 81 | install.packages("daiquiri") 82 | 83 | # or install the development version from rOpenSci 84 | install.packages("daiquiri", repos = "https://ropensci.r-universe.dev") 85 | 86 | # or install direct from source 87 | # install.packages("remotes") 88 | remotes::install_github("ropensci/daiquiri") 89 | ``` 90 | 91 | ## Usage 92 | 93 | ``` r 94 | library(daiquiri) 95 | 96 | # load delimited file into a data.frame without doing any datatype conversion 97 | path <- system.file("extdata", "example_prescriptions.csv", package = "daiquiri") 98 | raw_data <- read_data(path, show_progress = FALSE) 99 | 100 | head(raw_data) 101 | ``` 102 | 103 | ## # A tibble: 6 × 8 104 | ## PrescriptionID PrescriptionDate Admis…¹ Drug Dose DoseU…² Patie…³ Locat…⁴ 105 | ## 106 | ## 1 6000 2021-01-01 00:00:00 2020-1… Ceft… 500 mg 4993679 SITE3 107 | ## 2 6001 NULL 2020-1… Fluc… 1000 mg 819452 SITE1 108 | ## 3 6002 NULL 2020-1… Teic… 400 mg 275597 SITE2 109 | ## 4 6003 2021-01-01 01:00:00 1800-0… Fluc… 1000 NULL 819452 SITE2 110 | ## 5 6004 2021-01-01 02:00:00 1800-0… Fluc… 1000 NULL 528071 SITE1 111 | ## 6 6005 2021-01-01 03:00:00 2020-1… Co-a… 1.2 g 1001434 SITE3 112 | ## # … with abbreviated variable names ¹​AdmissionDate, ²​DoseUnit, ³​PatientID, 113 | ## # ⁴​Location 114 | 115 | ``` r 116 | # specify the type of data expected in each column of the data.frame 117 | fts <- field_types( 118 | PrescriptionID = ft_uniqueidentifier(), 119 | PrescriptionDate = ft_timepoint(), 120 | AdmissionDate = ft_datetime(includes_time = FALSE), 121 | Drug = ft_freetext(), 122 | Dose = ft_numeric(), 123 | DoseUnit = ft_categorical(), 124 | PatientID = ft_ignore(), 125 | Location = ft_categorical(aggregate_by_each_category = TRUE) 126 | ) 127 | ``` 128 | 129 | ``` r 130 | # create a report in the current directory 131 | daiq_obj <- daiquiri_report( 132 | raw_data, 133 | field_types = fts 134 | ) 135 | ``` 136 | 137 | An [example 138 | report](https://ropensci.github.io/daiquiri/articles/example_prescriptions.html) 139 | is available from the [package 140 | website](https://ropensci.github.io/daiquiri/index.html). 141 | 142 | More detailed guidance can be found in the [walkthrough 143 | vignette](https://ropensci.github.io/daiquiri/articles/daiquiri.html): 144 | 145 | ``` r 146 | vignette("daiquiri", package = "daiquiri") 147 | ``` 148 | 149 | ## How to cite this package 150 | 151 | > Quan, T. P., (2022). daiquiri: Data Quality Reporting for Temporal 152 | > Datasets. Journal of Open Source Software, 7(80), 5034, 153 | > 154 | 155 | ## Acknowledgements 156 | 157 | This work was supported by the National Institute for Health Research 158 | Health Protection Research Unit (NIHR HPRU) in Healthcare Associated 159 | Infections and Antimicrobial Resistance at the University of Oxford in 160 | partnership with Public Health England (PHE) (NIHR200915), and by the 161 | NIHR Oxford Biomedical Research Centre. 162 | 163 | ## Contributing to this package 164 | 165 | Please report any bugs or suggestions by opening a [github 166 | issue](https://github.com/ropensci/daiquiri/issues). 167 | 168 | Please note that this package is released with a [Contributor Code of 169 | Conduct](https://ropensci.org/code-of-conduct/). By contributing to this 170 | project, you agree to abide by its terms. 171 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: ~ 2 | template: 3 | bootstrap: 5 4 | development: 5 | mode: auto 6 | 7 | navbar: 8 | structure: 9 | left: [intro, reference, example-report, news] 10 | right: [search, github] 11 | components: 12 | example-report: 13 | text: Example reports 14 | menu: 15 | - text: Standard report 16 | href: articles/example_report.html 17 | - text: Stratified report 18 | href: articles/example_report_stratified.html 19 | 20 | reference: 21 | - title: Basic usage 22 | contents: 23 | - daiquiri 24 | - read_data 25 | - daiquiri_report 26 | 27 | - title: Specifying the type of data in each field 28 | contents: 29 | - field_types_available 30 | - field_types 31 | - template_field_types 32 | - field_types_advanced 33 | 34 | - title: Advanced usage 35 | contents: 36 | - prepare_data 37 | - aggregate_data 38 | - report_data 39 | - export_aggregated_data 40 | - initialise_log 41 | - close_log 42 | 43 | authors: 44 | footer: 45 | roles: [cre, cph] 46 | sidebar: 47 | roles: [cre, cph] 48 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | 3 | coverage: 4 | status: 5 | project: 6 | default: 7 | target: auto 8 | threshold: 1% 9 | informational: true 10 | patch: 11 | default: 12 | target: auto 13 | threshold: 1% 14 | informational: true 15 | -------------------------------------------------------------------------------- /daiquiri.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /devtesting/testoutput/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | -------------------------------------------------------------------------------- /docs/404.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Page not found (404) • daiquiri 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 25 | 26 | 27 | Skip to contents 28 | 29 | 30 |
77 |
78 |
82 | 83 | Content not found. Please use links in the navbar. 84 | 85 |
86 |
87 | 88 | 89 |
93 | 94 | 98 | 99 |
100 |
101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | -------------------------------------------------------------------------------- /docs/CONTRIBUTING.html: -------------------------------------------------------------------------------- 1 | 2 | Contributing to daiquiri • daiquiri 6 | Skip to contents 7 | 8 | 9 |
51 |
52 |
56 | 57 |
58 | 59 |
60 |

Reporting issues

61 |

Please report any bugs or suggestions by opening a github issue.

62 |
63 |
64 |

Development guidelines

65 |

So far, daiquiri has been developed by a single person so there is not yet a system in place for collaboration. If you’d like to contribute changes please raise an issue first and then we can probably use GitHub flow.

66 |
67 |
68 |

Code of Conduct

69 |

Please note that this package is released with a Contributor Code of Conduct. By contributing to this project, you agree to abide by its terms.

70 |
71 |
72 | 73 |
75 | 76 | 77 |
80 | 81 | 84 | 85 |
86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | -------------------------------------------------------------------------------- /docs/apple-touch-icon-120x120.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/docs/apple-touch-icon-120x120.png -------------------------------------------------------------------------------- /docs/apple-touch-icon-152x152.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/docs/apple-touch-icon-152x152.png -------------------------------------------------------------------------------- /docs/apple-touch-icon-180x180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/docs/apple-touch-icon-180x180.png -------------------------------------------------------------------------------- /docs/apple-touch-icon-60x60.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/docs/apple-touch-icon-60x60.png -------------------------------------------------------------------------------- /docs/apple-touch-icon-76x76.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/docs/apple-touch-icon-76x76.png -------------------------------------------------------------------------------- /docs/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/docs/apple-touch-icon.png -------------------------------------------------------------------------------- /docs/articles/example_report.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Example report • daiquiri 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 27 | 28 | 29 | Skip to contents 30 | 31 | 32 |
79 | 80 | 81 | 82 | 83 |
84 |
91 | 92 | 93 | 94 | 99 | 101 |
102 |
103 | 104 | 105 | 106 |
110 | 111 | 115 | 116 |
117 |
118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /docs/articles/example_report_stratified.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Example stratified report • daiquiri 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 27 | 28 | 29 | Skip to contents 30 | 31 | 32 |
79 | 80 | 81 | 82 | 83 |
84 |
91 | 92 | 93 | 94 | 99 | 101 |
102 |
103 | 104 | 105 | 106 |
110 | 111 | 115 | 116 |
117 |
118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /docs/articles/index.html: -------------------------------------------------------------------------------- 1 | 2 | Articles • daiquiri 6 | Skip to contents 7 | 8 | 9 |
51 |
52 |
55 | 56 |
57 |

All vignettes

58 |

59 | 60 |
Example report
61 |
62 |
Example stratified report
63 |
64 |
Walkthrough for the daiquiri package
65 |
66 |
67 |
68 | 69 | 70 |
73 | 74 | 77 | 78 |
79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /docs/deps/data-deps.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /docs/dev/.nojekyll: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /docs/dev/CONTRIBUTING.html: -------------------------------------------------------------------------------- 1 | 2 | Contributing to daiquiri • daiquiri 6 | Skip to contents 7 | 8 | 9 |
51 |
52 |
56 | 57 |
58 | 59 |
60 |

Reporting issues

61 |

Please report any bugs or suggestions by opening a github issue.

62 |
63 |
64 |

Development guidelines

65 |

So far, daiquiri has been developed by a single person so there is not yet a system in place for collaboration. If you’d like to contribute changes please raise an issue first and then we can probably use GitHub flow.

66 |
67 |
68 |

Code of Conduct

69 |

Please note that this package is released with a Contributor Code of Conduct. By contributing to this project, you agree to abide by its terms.

70 |
71 |
72 | 73 |
75 | 76 | 77 |
80 | 81 | 84 | 85 |
86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | -------------------------------------------------------------------------------- /docs/dev/apple-touch-icon-120x120.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/docs/dev/apple-touch-icon-120x120.png -------------------------------------------------------------------------------- /docs/dev/apple-touch-icon-152x152.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/docs/dev/apple-touch-icon-152x152.png -------------------------------------------------------------------------------- /docs/dev/apple-touch-icon-180x180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/docs/dev/apple-touch-icon-180x180.png -------------------------------------------------------------------------------- /docs/dev/apple-touch-icon-60x60.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/docs/dev/apple-touch-icon-60x60.png -------------------------------------------------------------------------------- /docs/dev/apple-touch-icon-76x76.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/docs/dev/apple-touch-icon-76x76.png -------------------------------------------------------------------------------- /docs/dev/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/docs/dev/apple-touch-icon.png -------------------------------------------------------------------------------- /docs/dev/articles/example_report.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Example report • daiquiri 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 28 | 29 | 30 | Skip to contents 31 | 32 | 33 |
80 | 81 | 82 | 83 | 84 |
85 |
92 | 93 | 94 | 95 | 100 | 102 |
103 |
104 | 105 | 106 | 107 |
111 | 112 | 116 | 117 |
118 |
119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | -------------------------------------------------------------------------------- /docs/dev/articles/example_report_stratified.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Example stratified report • daiquiri 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 28 | 29 | 30 | Skip to contents 31 | 32 | 33 |
80 | 81 | 82 | 83 | 84 |
85 |
92 | 93 | 94 | 95 | 100 | 102 |
103 |
104 | 105 | 106 | 107 |
111 | 112 | 116 | 117 |
118 |
119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | -------------------------------------------------------------------------------- /docs/dev/articles/index.html: -------------------------------------------------------------------------------- 1 | 2 | Articles • daiquiri 6 | Skip to contents 7 | 8 | 9 |
51 |
52 |
55 | 56 |
57 |

All vignettes

58 |

59 | 60 |
Example report
61 |
62 |
Example stratified report
63 |
64 |
Walkthrough for the daiquiri package
65 |
66 |
67 |
68 | 69 | 70 |
73 | 74 | 77 | 78 |
79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /docs/dev/authors.html: -------------------------------------------------------------------------------- 1 | 2 | Authors and Citation • daiquiri 6 | Skip to contents 7 | 8 | 9 |
51 |
52 |
55 | 56 |
57 |

Authors

58 | 59 |
  • 60 |

    T. Phuong Quan. Author, maintainer. 61 |

    62 |
  • 63 |
  • 64 |

    Jack Cregan. Contributor. 65 |

    66 |
  • 67 |
  • 68 |

    University of Oxford. Copyright holder. 69 |

    70 |
  • 71 |
  • 72 |

    National Institute for Health Research (NIHR). Funder. 73 |

    74 |
  • 75 |
  • 76 |

    Brad Cannell. Reviewer. 77 |

    78 |
  • 79 |
80 | 81 |
82 |

Citation

83 |

Source: inst/CITATION

84 | 85 |

Quan TP (2022). 86 | “daiquiri: Data Quality Reporting for Temporal Datasets.” 87 | Journal of Open Source Software, 7(80), 5034. 88 | doi:10.21105/joss.05034. 89 |

90 |
@Article{,
 91 |   title = {daiquiri: Data Quality Reporting for Temporal Datasets},
 92 |   author = {T. Phuong Quan},
 93 |   doi = {10.21105/joss.05034},
 94 |   year = {2022},
 95 |   publisher = {The Open Journal},
 96 |   volume = {7},
 97 |   number = {80},
 98 |   pages = {5034},
 99 |   journal = {Journal of Open Source Software},
100 | }
101 |
102 |
104 | 105 | 106 |
109 | 110 | 113 | 114 |
115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | -------------------------------------------------------------------------------- /docs/dev/deps/data-deps.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /docs/dev/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/docs/dev/favicon-16x16.png -------------------------------------------------------------------------------- /docs/dev/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/docs/dev/favicon-32x32.png -------------------------------------------------------------------------------- /docs/dev/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/docs/dev/favicon.ico -------------------------------------------------------------------------------- /docs/dev/link.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 8 | 12 | 13 | -------------------------------------------------------------------------------- /docs/dev/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/docs/dev/logo.png -------------------------------------------------------------------------------- /docs/dev/pkgdown.js: -------------------------------------------------------------------------------- 1 | /* http://gregfranko.com/blog/jquery-best-practices/ */ 2 | (function($) { 3 | $(function() { 4 | 5 | $('nav.navbar').headroom(); 6 | 7 | Toc.init({ 8 | $nav: $("#toc"), 9 | $scope: $("main h2, main h3, main h4, main h5, main h6") 10 | }); 11 | 12 | if ($('#toc').length) { 13 | $('body').scrollspy({ 14 | target: '#toc', 15 | offset: $("nav.navbar").outerHeight() + 1 16 | }); 17 | } 18 | 19 | // Activate popovers 20 | $('[data-bs-toggle="popover"]').popover({ 21 | container: 'body', 22 | html: true, 23 | trigger: 'focus', 24 | placement: "top", 25 | sanitize: false, 26 | }); 27 | 28 | $('[data-bs-toggle="tooltip"]').tooltip(); 29 | 30 | /* Clipboard --------------------------*/ 31 | 32 | function changeTooltipMessage(element, msg) { 33 | var tooltipOriginalTitle=element.getAttribute('data-original-title'); 34 | element.setAttribute('data-original-title', msg); 35 | $(element).tooltip('show'); 36 | element.setAttribute('data-original-title', tooltipOriginalTitle); 37 | } 38 | 39 | if(ClipboardJS.isSupported()) { 40 | $(document).ready(function() { 41 | var copyButton = ""; 42 | 43 | $("div.sourceCode").addClass("hasCopyButton"); 44 | 45 | // Insert copy buttons: 46 | $(copyButton).prependTo(".hasCopyButton"); 47 | 48 | // Initialize tooltips: 49 | $('.btn-copy-ex').tooltip({container: 'body'}); 50 | 51 | // Initialize clipboard: 52 | var clipboard = new ClipboardJS('[data-clipboard-copy]', { 53 | text: function(trigger) { 54 | return trigger.parentNode.textContent.replace(/\n#>[^\n]*/g, ""); 55 | } 56 | }); 57 | 58 | clipboard.on('success', function(e) { 59 | changeTooltipMessage(e.trigger, 'Copied!'); 60 | e.clearSelection(); 61 | }); 62 | 63 | clipboard.on('error', function() { 64 | changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy'); 65 | }); 66 | 67 | }); 68 | } 69 | 70 | /* Search marking --------------------------*/ 71 | var url = new URL(window.location.href); 72 | var toMark = url.searchParams.get("q"); 73 | var mark = new Mark("main#main"); 74 | if (toMark) { 75 | mark.mark(toMark, { 76 | accuracy: { 77 | value: "complementary", 78 | limiters: [",", ".", ":", "/"], 79 | } 80 | }); 81 | } 82 | 83 | /* Search --------------------------*/ 84 | /* Adapted from https://github.com/rstudio/bookdown/blob/2d692ba4b61f1e466c92e78fd712b0ab08c11d31/inst/resources/bs4_book/bs4_book.js#L25 */ 85 | // Initialise search index on focus 86 | var fuse; 87 | $("#search-input").focus(async function(e) { 88 | if (fuse) { 89 | return; 90 | } 91 | 92 | $(e.target).addClass("loading"); 93 | var response = await fetch($("#search-input").data("search-index")); 94 | var data = await response.json(); 95 | 96 | var options = { 97 | keys: ["what", "text", "code"], 98 | ignoreLocation: true, 99 | threshold: 0.1, 100 | includeMatches: true, 101 | includeScore: true, 102 | }; 103 | fuse = new Fuse(data, options); 104 | 105 | $(e.target).removeClass("loading"); 106 | }); 107 | 108 | // Use algolia autocomplete 109 | var options = { 110 | autoselect: true, 111 | debug: true, 112 | hint: false, 113 | minLength: 2, 114 | }; 115 | var q; 116 | async function searchFuse(query, callback) { 117 | await fuse; 118 | 119 | var items; 120 | if (!fuse) { 121 | items = []; 122 | } else { 123 | q = query; 124 | var results = fuse.search(query, { limit: 20 }); 125 | items = results 126 | .filter((x) => x.score <= 0.75) 127 | .map((x) => x.item); 128 | if (items.length === 0) { 129 | items = [{dir:"Sorry 😿",previous_headings:"",title:"No results found.",what:"No results found.",path:window.location.href}]; 130 | } 131 | } 132 | callback(items); 133 | } 134 | $("#search-input").autocomplete(options, [ 135 | { 136 | name: "content", 137 | source: searchFuse, 138 | templates: { 139 | suggestion: (s) => { 140 | if (s.title == s.what) { 141 | return `${s.dir} >
${s.title}
`; 142 | } else if (s.previous_headings == "") { 143 | return `${s.dir} >
${s.title}
> ${s.what}`; 144 | } else { 145 | return `${s.dir} >
${s.title}
> ${s.previous_headings} > ${s.what}`; 146 | } 147 | }, 148 | }, 149 | }, 150 | ]).on('autocomplete:selected', function(event, s) { 151 | window.location.href = s.path + "?q=" + q + "#" + s.id; 152 | }); 153 | }); 154 | })(window.jQuery || window.$) 155 | 156 | 157 | -------------------------------------------------------------------------------- /docs/dev/pkgdown.yml: -------------------------------------------------------------------------------- 1 | pandoc: 2.19.2 2 | pkgdown: 2.0.6 3 | pkgdown_sha: ~ 4 | articles: 5 | example_report: example_report.html 6 | example_report_stratified: example_report_stratified.html 7 | daiquiri: daiquiri.html 8 | last_built: 2023-05-23T09:09Z 9 | 10 | -------------------------------------------------------------------------------- /docs/dev/reference/Rplot001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/docs/dev/reference/Rplot001.png -------------------------------------------------------------------------------- /docs/dev/reference/close_log.html: -------------------------------------------------------------------------------- 1 | 2 | Close any active log file — close_log • daiquiri 6 | Skip to contents 7 | 8 | 9 |
51 |
52 |
57 | 58 |
59 |

Close any active log file

60 |
61 | 62 |
63 |

Usage

64 |
close_log()
65 |
66 | 67 |
68 |

Value

69 | 70 | 71 |

If a log file was found, the path to the log file that was closed, 72 | otherwise an empty string

73 |
74 | 75 |
76 |

Examples

77 |
close_log()
 78 | #> [1] ""
 79 | 
80 |
81 |
83 | 84 | 85 |
88 | 89 | 92 | 93 |
94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | -------------------------------------------------------------------------------- /docs/dev/reference/figures/antibiotics_day_DurationEnteredByPrescriber_missing_perc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/docs/dev/reference/figures/antibiotics_day_DurationEnteredByPrescriber_missing_perc.png -------------------------------------------------------------------------------- /docs/dev/reference/figures/bchem_creatinine_day_Value_mean.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/docs/dev/reference/figures/bchem_creatinine_day_Value_mean.png -------------------------------------------------------------------------------- /docs/dev/reference/figures/example_prescriptions_aggregated_valuespresent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/docs/dev/reference/figures/example_prescriptions_aggregated_valuespresent.png -------------------------------------------------------------------------------- /docs/dev/reference/figures/example_prescriptions_allfields_missing_perc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/docs/dev/reference/figures/example_prescriptions_allfields_missing_perc.png -------------------------------------------------------------------------------- /docs/dev/reference/figures/example_prescriptions_head.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/docs/dev/reference/figures/example_prescriptions_head.png -------------------------------------------------------------------------------- /docs/dev/reference/figures/example_prescriptions_source_fieldsimported.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/docs/dev/reference/figures/example_prescriptions_source_fieldsimported.png -------------------------------------------------------------------------------- /docs/dev/reference/figures/example_prescriptions_stratified_midnight_perc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/docs/dev/reference/figures/example_prescriptions_stratified_midnight_perc.png -------------------------------------------------------------------------------- /docs/dev/reference/figures/example_prescriptions_stratified_strata.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/docs/dev/reference/figures/example_prescriptions_stratified_strata.png -------------------------------------------------------------------------------- /docs/dev/reference/figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/docs/dev/reference/figures/logo.png -------------------------------------------------------------------------------- /docs/dev/sitemap.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | /dev/articles/daiquiri.html 5 | 6 | 7 | /dev/articles/example_report.html 8 | 9 | 10 | /dev/articles/example_report_stratified.html 11 | 12 | 13 | /dev/articles/index.html 14 | 15 | 16 | /dev/authors.html 17 | 18 | 19 | /dev/CONTRIBUTING.html 20 | 21 | 22 | /dev/index.html 23 | 24 | 25 | /dev/LICENSE.html 26 | 27 | 28 | /dev/news/index.html 29 | 30 | 31 | /dev/reference/aggregate_data.html 32 | 33 | 34 | /dev/reference/close_log.html 35 | 36 | 37 | /dev/reference/daiquiri-package.html 38 | 39 | 40 | /dev/reference/daiquiri_report.html 41 | 42 | 43 | /dev/reference/export_aggregated_data.html 44 | 45 | 46 | /dev/reference/field_types.html 47 | 48 | 49 | /dev/reference/field_types_available.html 50 | 51 | 52 | /dev/reference/index.html 53 | 54 | 55 | /dev/reference/initialise_log.html 56 | 57 | 58 | /dev/reference/prepare_data.html 59 | 60 | 61 | /dev/reference/read_data.html 62 | 63 | 64 | /dev/reference/report_data.html 65 | 66 | 67 | /dev/reference/template_field_types.html 68 | 69 | 70 | -------------------------------------------------------------------------------- /docs/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/docs/favicon-16x16.png -------------------------------------------------------------------------------- /docs/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/docs/favicon-32x32.png -------------------------------------------------------------------------------- /docs/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/docs/favicon.ico -------------------------------------------------------------------------------- /docs/link.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 8 | 12 | 13 | -------------------------------------------------------------------------------- /docs/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/docs/logo.png -------------------------------------------------------------------------------- /docs/pkgdown.js: -------------------------------------------------------------------------------- 1 | /* http://gregfranko.com/blog/jquery-best-practices/ */ 2 | (function($) { 3 | $(function() { 4 | 5 | $('nav.navbar').headroom(); 6 | 7 | Toc.init({ 8 | $nav: $("#toc"), 9 | $scope: $("main h2, main h3, main h4, main h5, main h6") 10 | }); 11 | 12 | if ($('#toc').length) { 13 | $('body').scrollspy({ 14 | target: '#toc', 15 | offset: $("nav.navbar").outerHeight() + 1 16 | }); 17 | } 18 | 19 | // Activate popovers 20 | $('[data-bs-toggle="popover"]').popover({ 21 | container: 'body', 22 | html: true, 23 | trigger: 'focus', 24 | placement: "top", 25 | sanitize: false, 26 | }); 27 | 28 | $('[data-bs-toggle="tooltip"]').tooltip(); 29 | 30 | /* Clipboard --------------------------*/ 31 | 32 | function changeTooltipMessage(element, msg) { 33 | var tooltipOriginalTitle=element.getAttribute('data-original-title'); 34 | element.setAttribute('data-original-title', msg); 35 | $(element).tooltip('show'); 36 | element.setAttribute('data-original-title', tooltipOriginalTitle); 37 | } 38 | 39 | if(ClipboardJS.isSupported()) { 40 | $(document).ready(function() { 41 | var copyButton = ""; 42 | 43 | $("div.sourceCode").addClass("hasCopyButton"); 44 | 45 | // Insert copy buttons: 46 | $(copyButton).prependTo(".hasCopyButton"); 47 | 48 | // Initialize tooltips: 49 | $('.btn-copy-ex').tooltip({container: 'body'}); 50 | 51 | // Initialize clipboard: 52 | var clipboard = new ClipboardJS('[data-clipboard-copy]', { 53 | text: function(trigger) { 54 | return trigger.parentNode.textContent.replace(/\n#>[^\n]*/g, ""); 55 | } 56 | }); 57 | 58 | clipboard.on('success', function(e) { 59 | changeTooltipMessage(e.trigger, 'Copied!'); 60 | e.clearSelection(); 61 | }); 62 | 63 | clipboard.on('error', function() { 64 | changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy'); 65 | }); 66 | 67 | }); 68 | } 69 | 70 | /* Search marking --------------------------*/ 71 | var url = new URL(window.location.href); 72 | var toMark = url.searchParams.get("q"); 73 | var mark = new Mark("main#main"); 74 | if (toMark) { 75 | mark.mark(toMark, { 76 | accuracy: { 77 | value: "complementary", 78 | limiters: [",", ".", ":", "/"], 79 | } 80 | }); 81 | } 82 | 83 | /* Search --------------------------*/ 84 | /* Adapted from https://github.com/rstudio/bookdown/blob/2d692ba4b61f1e466c92e78fd712b0ab08c11d31/inst/resources/bs4_book/bs4_book.js#L25 */ 85 | // Initialise search index on focus 86 | var fuse; 87 | $("#search-input").focus(async function(e) { 88 | if (fuse) { 89 | return; 90 | } 91 | 92 | $(e.target).addClass("loading"); 93 | var response = await fetch($("#search-input").data("search-index")); 94 | var data = await response.json(); 95 | 96 | var options = { 97 | keys: ["what", "text", "code"], 98 | ignoreLocation: true, 99 | threshold: 0.1, 100 | includeMatches: true, 101 | includeScore: true, 102 | }; 103 | fuse = new Fuse(data, options); 104 | 105 | $(e.target).removeClass("loading"); 106 | }); 107 | 108 | // Use algolia autocomplete 109 | var options = { 110 | autoselect: true, 111 | debug: true, 112 | hint: false, 113 | minLength: 2, 114 | }; 115 | var q; 116 | async function searchFuse(query, callback) { 117 | await fuse; 118 | 119 | var items; 120 | if (!fuse) { 121 | items = []; 122 | } else { 123 | q = query; 124 | var results = fuse.search(query, { limit: 20 }); 125 | items = results 126 | .filter((x) => x.score <= 0.75) 127 | .map((x) => x.item); 128 | if (items.length === 0) { 129 | items = [{dir:"Sorry 😿",previous_headings:"",title:"No results found.",what:"No results found.",path:window.location.href}]; 130 | } 131 | } 132 | callback(items); 133 | } 134 | $("#search-input").autocomplete(options, [ 135 | { 136 | name: "content", 137 | source: searchFuse, 138 | templates: { 139 | suggestion: (s) => { 140 | if (s.title == s.what) { 141 | return `${s.dir} >
${s.title}
`; 142 | } else if (s.previous_headings == "") { 143 | return `${s.dir} >
${s.title}
> ${s.what}`; 144 | } else { 145 | return `${s.dir} >
${s.title}
> ${s.previous_headings} > ${s.what}`; 146 | } 147 | }, 148 | }, 149 | }, 150 | ]).on('autocomplete:selected', function(event, s) { 151 | window.location.href = s.path + "?q=" + q + "#" + s.id; 152 | }); 153 | }); 154 | })(window.jQuery || window.$) 155 | 156 | 157 | -------------------------------------------------------------------------------- /docs/pkgdown.yml: -------------------------------------------------------------------------------- 1 | pandoc: 3.1.1 2 | pkgdown: 2.0.7 3 | pkgdown_sha: ~ 4 | articles: 5 | example_report: example_report.html 6 | example_report_stratified: example_report_stratified.html 7 | daiquiri: daiquiri.html 8 | last_built: 2023-07-18T13:37Z 9 | 10 | -------------------------------------------------------------------------------- /docs/reference/Rplot001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/docs/reference/Rplot001.png -------------------------------------------------------------------------------- /docs/reference/close_log.html: -------------------------------------------------------------------------------- 1 | 2 | Close any active log file — close_log • daiquiri 6 | Skip to contents 7 | 8 | 9 |
51 |
52 |
57 | 58 |
59 |

Close any active log file

60 |
61 | 62 |
63 |

Usage

64 |
close_log()
65 |
66 | 67 |
68 |

Value

69 | 70 | 71 |

If a log file was found, the path to the log file that was closed, 72 | otherwise an empty string

73 |
74 | 75 |
76 |

Examples

77 |
close_log()
 78 | #> [1] ""
 79 | 
80 |
81 |
83 | 84 | 85 |
88 | 89 | 92 | 93 |
94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | -------------------------------------------------------------------------------- /docs/reference/figures/antibiotics_day_DurationEnteredByPrescriber_missing_perc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/docs/reference/figures/antibiotics_day_DurationEnteredByPrescriber_missing_perc.png -------------------------------------------------------------------------------- /docs/reference/figures/bchem_creatinine_day_Value_mean.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/docs/reference/figures/bchem_creatinine_day_Value_mean.png -------------------------------------------------------------------------------- /docs/reference/figures/example_prescriptions_aggregated_valuespresent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/docs/reference/figures/example_prescriptions_aggregated_valuespresent.png -------------------------------------------------------------------------------- /docs/reference/figures/example_prescriptions_allfields_missing_perc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/docs/reference/figures/example_prescriptions_allfields_missing_perc.png -------------------------------------------------------------------------------- /docs/reference/figures/example_prescriptions_head.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/docs/reference/figures/example_prescriptions_head.png -------------------------------------------------------------------------------- /docs/reference/figures/example_prescriptions_source_fieldsimported.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/docs/reference/figures/example_prescriptions_source_fieldsimported.png -------------------------------------------------------------------------------- /docs/reference/figures/example_prescriptions_stratified_midnight_perc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/docs/reference/figures/example_prescriptions_stratified_midnight_perc.png -------------------------------------------------------------------------------- /docs/reference/figures/example_prescriptions_stratified_strata.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/docs/reference/figures/example_prescriptions_stratified_strata.png -------------------------------------------------------------------------------- /docs/reference/figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/docs/reference/figures/logo.png -------------------------------------------------------------------------------- /docs/sitemap.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | /404.html 5 | 6 | 7 | /articles/daiquiri.html 8 | 9 | 10 | /articles/example_prescriptions.html 11 | 12 | 13 | /articles/example_prescriptions_stratified.html 14 | 15 | 16 | /articles/example_report.html 17 | 18 | 19 | /articles/example_report_stratified.html 20 | 21 | 22 | /articles/index.html 23 | 24 | 25 | /authors.html 26 | 27 | 28 | /CONTRIBUTING.html 29 | 30 | 31 | /index.html 32 | 33 | 34 | /LICENSE.html 35 | 36 | 37 | /news/index.html 38 | 39 | 40 | /reference/aggregate_data.html 41 | 42 | 43 | /reference/close_log.html 44 | 45 | 46 | /reference/daiquiri-package.html 47 | 48 | 49 | /reference/daiquiri_report.html 50 | 51 | 52 | /reference/export_aggregated_data.html 53 | 54 | 55 | /reference/field_types.html 56 | 57 | 58 | /reference/field_types_available.html 59 | 60 | 61 | /reference/index.html 62 | 63 | 64 | /reference/initialise_log.html 65 | 66 | 67 | /reference/prepare_data.html 68 | 69 | 70 | /reference/read_data.html 71 | 72 | 73 | /reference/report_data.html 74 | 75 | 76 | /reference/template_field_types.html 77 | 78 | 79 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | bibentry( 2 | bibtype = "Article", 3 | title = "daiquiri: Data Quality Reporting for Temporal Datasets", 4 | author = person(c("T.", "Phuong"), "Quan"), 5 | doi = "10.21105/joss.05034", 6 | year = 2022, 7 | publisher = "The Open Journal", 8 | volume = 7, 9 | number = 80, 10 | pages = 5034, 11 | journal = "Journal of Open Source Software" 12 | ) 13 | -------------------------------------------------------------------------------- /man/aggregate_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/aggregate_data.R 3 | \name{aggregate_data} 4 | \alias{aggregate_data} 5 | \title{Aggregate source data} 6 | \usage{ 7 | aggregate_data(source_data, aggregation_timeunit = "day", show_progress = TRUE) 8 | } 9 | \arguments{ 10 | \item{source_data}{A \code{daiquiri_source_data} object returned from 11 | \code{\link[=prepare_data]{prepare_data()}} function} 12 | 13 | \item{aggregation_timeunit}{Unit of time to aggregate over. Specify one of 14 | \code{"day"}, \code{"week"}, \code{"month"}, \code{"quarter"}, \code{"year"}. The \code{"week"} option is 15 | Monday-based. Default = \code{"day"}} 16 | 17 | \item{show_progress}{Print progress to console. Default = \code{TRUE}} 18 | } 19 | \value{ 20 | A \code{daiquiri_aggregated_data} object 21 | } 22 | \description{ 23 | Aggregates a \code{daiquiri_source_data} object based on the \code{\link[=field_types]{field_types()}} specified at load time. 24 | Default time period for aggregation is a calendar day 25 | } 26 | \examples{ 27 | \dontshow{ 28 | # restrict threads for CRAN compliance 29 | dt_threads <- data.table::getDTthreads() 30 | data.table::setDTthreads(1) 31 | } 32 | 33 | # load example data into a data.frame 34 | raw_data <- read_data( 35 | system.file("extdata", "example_prescriptions.csv", package = "daiquiri"), 36 | delim = ",", 37 | col_names = TRUE 38 | ) 39 | 40 | # validate and prepare the data for aggregation 41 | source_data <- prepare_data( 42 | raw_data, 43 | field_types = field_types( 44 | PrescriptionID = ft_uniqueidentifier(), 45 | PrescriptionDate = ft_timepoint(), 46 | AdmissionDate = ft_datetime(includes_time = FALSE), 47 | Drug = ft_freetext(), 48 | Dose = ft_numeric(), 49 | DoseUnit = ft_categorical(), 50 | PatientID = ft_ignore(), 51 | Location = ft_categorical(aggregate_by_each_category = TRUE) 52 | ), 53 | override_column_names = FALSE, 54 | na = c("", "NULL") 55 | ) 56 | 57 | # aggregate the data 58 | aggregated_data <- aggregate_data( 59 | source_data, 60 | aggregation_timeunit = "day" 61 | ) 62 | 63 | aggregated_data 64 | \dontshow{ 65 | # restore thread setting 66 | data.table::setDTthreads(dt_threads) 67 | } 68 | 69 | } 70 | \seealso{ 71 | \code{\link[=prepare_data]{prepare_data()}}, \code{\link[=report_data]{report_data()}} 72 | } 73 | -------------------------------------------------------------------------------- /man/close_log.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utilities.R 3 | \name{close_log} 4 | \alias{close_log} 5 | \title{Close any active log file} 6 | \usage{ 7 | close_log() 8 | } 9 | \value{ 10 | If a log file was found, the path to the log file that was closed, 11 | otherwise an empty string 12 | } 13 | \description{ 14 | Close any active log file 15 | } 16 | \examples{ 17 | close_log() 18 | } 19 | -------------------------------------------------------------------------------- /man/daiquiri-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/daiquiri-package.R 3 | \docType{package} 4 | \name{daiquiri-package} 5 | \alias{daiquiri} 6 | \alias{daiquiri-package} 7 | \title{daiquiri: Data Quality Reporting for Temporal Datasets} 8 | \description{ 9 | \if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}} 10 | 11 | Generate reports that enable quick visual review of temporal shifts in record-level data. Time series plots showing aggregated values are automatically created for each data field (column) depending on its contents (e.g. min/max/mean values for numeric data, no. of distinct values for categorical data), as well as overviews for missing values, non-conformant values, and duplicated rows. The resulting reports are shareable and can contribute to forming a transparent record of the entire analysis process. It is designed with Electronic Health Records in mind, but can be used for any type of record-level temporal data (i.e. tabular data where each row represents a single "event", one column contains the "event date", and other columns contain any associated values for the event). 12 | } 13 | \seealso{ 14 | Useful links: 15 | \itemize{ 16 | \item \url{https://github.com/ropensci/daiquiri} 17 | \item \url{https://ropensci.github.io/daiquiri/} 18 | \item Report bugs at \url{https://github.com/ropensci/daiquiri/issues} 19 | } 20 | 21 | } 22 | \author{ 23 | \strong{Maintainer}: T. Phuong Quan \email{phuong.quan@ndm.ox.ac.uk} (\href{https://orcid.org/0000-0001-8566-1817}{ORCID}) 24 | 25 | Other contributors: 26 | \itemize{ 27 | \item Jack Cregan [contributor] 28 | \item University of Oxford [copyright holder] 29 | \item National Institute for Health Research (NIHR) [funder] 30 | \item Brad Cannell [reviewer] 31 | } 32 | 33 | } 34 | \keyword{internal} 35 | -------------------------------------------------------------------------------- /man/daiquiri_report.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/main.R 3 | \name{daiquiri_report} 4 | \alias{daiquiri_report} 5 | \title{Create a data quality report from a data frame} 6 | \usage{ 7 | daiquiri_report( 8 | df, 9 | field_types, 10 | override_column_names = FALSE, 11 | na = c("", "NA", "NULL"), 12 | dataset_description = NULL, 13 | aggregation_timeunit = "day", 14 | report_title = "daiquiri data quality report", 15 | save_directory = ".", 16 | save_filename = NULL, 17 | show_progress = TRUE, 18 | log_directory = NULL 19 | ) 20 | } 21 | \arguments{ 22 | \item{df}{A data frame. Rectangular data can be read from file using 23 | \code{\link[=read_data]{read_data()}}. See Details.} 24 | 25 | \item{field_types}{\code{\link[=field_types]{field_types()}} object specifying names and types of 26 | fields (columns) in the supplied \code{df}. See also \link{field_types_available}.} 27 | 28 | \item{override_column_names}{If \code{FALSE}, column names in the supplied \code{df} 29 | must match the names specified in \code{field_types} exactly. If \code{TRUE}, column 30 | names in the supplied \code{df} will be replaced with the names specified in 31 | \code{field_types}. The specification must therefore contain the columns in the 32 | correct order. Default = \code{FALSE}} 33 | 34 | \item{na}{vector containing strings that should be interpreted as missing 35 | values, Default = \code{c("","NA","NULL")}.} 36 | 37 | \item{dataset_description}{Short description of the dataset being checked. 38 | This will appear on the report. If blank, the name of the data frame object 39 | will be used} 40 | 41 | \item{aggregation_timeunit}{Unit of time to aggregate over. Specify one of 42 | \code{"day"}, \code{"week"}, \code{"month"}, \code{"quarter"}, \code{"year"}. The \code{"week"} option is 43 | Monday-based. Default = \code{"day"}} 44 | 45 | \item{report_title}{Title to appear on the report} 46 | 47 | \item{save_directory}{String specifying directory in which to save the 48 | report. Default is current directory.} 49 | 50 | \item{save_filename}{String specifying filename for the report, excluding any 51 | file extension. If no filename is supplied, one will be automatically 52 | generated with the format \code{daiquiri_report_YYMMDD_HHMMSS}.} 53 | 54 | \item{show_progress}{Print progress to console. Default = \code{TRUE}} 55 | 56 | \item{log_directory}{String specifying directory in which to save log file. 57 | If no directory is supplied, progress is not logged.} 58 | } 59 | \value{ 60 | A list containing information relating to the supplied parameters as 61 | well as the resulting \code{daiquiri_source_data} and \code{daiquiri_aggregated_data} 62 | objects. 63 | } 64 | \description{ 65 | Accepts record-level data from a data frame, validates it against the 66 | expected type of content of each column, generates a collection of time 67 | series plots for visual inspection, and saves a report to disk. 68 | } 69 | \section{Details}{ 70 | In order for the package to detect any non-conformant 71 | values in numeric or datetime fields, these should be present in the data 72 | frame in their raw character format. Rectangular data from a text file will 73 | automatically be read in as character type if you use the \code{\link[=read_data]{read_data()}} 74 | function. Data frame columns that are not of class character will still be 75 | processed according to the \code{field_types} specified. 76 | } 77 | 78 | \examples{ 79 | \donttest{ 80 | # load example data into a data.frame 81 | raw_data <- read_data( 82 | system.file("extdata", "example_prescriptions.csv", package = "daiquiri"), 83 | delim = ",", 84 | col_names = TRUE 85 | ) 86 | 87 | # create a report in the current directory 88 | daiq_obj <- daiquiri_report( 89 | raw_data, 90 | field_types = field_types( 91 | PrescriptionID = ft_uniqueidentifier(), 92 | PrescriptionDate = ft_timepoint(), 93 | AdmissionDate = ft_datetime(includes_time = FALSE, na = "1800-01-01"), 94 | Drug = ft_freetext(), 95 | Dose = ft_numeric(), 96 | DoseUnit = ft_categorical(), 97 | PatientID = ft_ignore(), 98 | Location = ft_categorical(aggregate_by_each_category = TRUE) 99 | ), 100 | override_column_names = FALSE, 101 | na = c("", "NULL"), 102 | dataset_description = "Example data provided with package", 103 | aggregation_timeunit = "day", 104 | report_title = "daiquiri data quality report", 105 | save_directory = ".", 106 | save_filename = "example_data_report", 107 | show_progress = TRUE, 108 | log_directory = NULL 109 | ) 110 | \dontshow{file.remove("./example_data_report.html")} 111 | } 112 | 113 | } 114 | \seealso{ 115 | \code{\link[=read_data]{read_data()}}, \code{\link[=field_types]{field_types()}}, 116 | \code{\link[=field_types_available]{field_types_available()}} 117 | } 118 | -------------------------------------------------------------------------------- /man/export_aggregated_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/aggregate_data.R 3 | \name{export_aggregated_data} 4 | \alias{export_aggregated_data} 5 | \title{Export aggregated data} 6 | \usage{ 7 | export_aggregated_data( 8 | aggregated_data, 9 | save_directory, 10 | save_file_prefix = "", 11 | save_file_type = "csv" 12 | ) 13 | } 14 | \arguments{ 15 | \item{aggregated_data}{A \code{daiquiri_aggregated_data} object} 16 | 17 | \item{save_directory}{String. Full or relative path for save folder} 18 | 19 | \item{save_file_prefix}{String. Optional prefix for the exported filenames} 20 | 21 | \item{save_file_type}{String. Filetype extension supported by \code{readr}, 22 | currently only csv allowed} 23 | } 24 | \value{ 25 | (invisibly) The \code{daiquiri_aggregated_data} object that was passed in 26 | } 27 | \description{ 28 | Export aggregated data to disk. Creates a separate file for each aggregated 29 | field in dataset. 30 | } 31 | \examples{ 32 | \donttest{ 33 | raw_data <- read_data( 34 | system.file("extdata", "example_prescriptions.csv", package = "daiquiri"), 35 | delim = ",", 36 | col_names = TRUE 37 | ) 38 | 39 | source_data <- prepare_data( 40 | raw_data, 41 | field_types = field_types( 42 | PrescriptionID = ft_uniqueidentifier(), 43 | PrescriptionDate = ft_timepoint(), 44 | AdmissionDate = ft_datetime(includes_time = FALSE), 45 | Drug = ft_freetext(), 46 | Dose = ft_numeric(), 47 | DoseUnit = ft_categorical(), 48 | PatientID = ft_ignore(), 49 | Location = ft_categorical(aggregate_by_each_category = TRUE) 50 | ), 51 | override_column_names = FALSE, 52 | na = c("", "NULL") 53 | ) 54 | 55 | aggregated_data <- aggregate_data( 56 | source_data, 57 | aggregation_timeunit = "day" 58 | ) 59 | 60 | export_aggregated_data( 61 | aggregated_data, 62 | save_directory = ".", 63 | save_file_prefix = "ex_" 64 | ) 65 | 66 | \dontshow{ 67 | f <- list.files(".", "^ex_.*csv$") 68 | file.remove(f) 69 | } 70 | } 71 | 72 | } 73 | -------------------------------------------------------------------------------- /man/field_types.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/field_types.R 3 | \name{field_types} 4 | \alias{field_types} 5 | \title{Create field_types specification} 6 | \usage{ 7 | field_types(...) 8 | } 9 | \arguments{ 10 | \item{...}{names and types of fields (columns) in source data.} 11 | } 12 | \value{ 13 | A \code{field_types} object 14 | } 15 | \description{ 16 | Specify the names and types of fields in the source data frame. This is 17 | important because the data in each field will be aggregated in different 18 | ways, depending on its \code{field_type}. See \link{field_types_available} 19 | } 20 | \examples{ 21 | fts <- field_types( 22 | PatientID = ft_uniqueidentifier(), 23 | TestID = ft_ignore(), 24 | TestDate = ft_timepoint(), 25 | TestName = ft_categorical(aggregate_by_each_category = FALSE), 26 | TestResult = ft_numeric(), 27 | ResultDate = ft_datetime(), 28 | ResultComment = ft_freetext(), 29 | Location = ft_categorical() 30 | ) 31 | 32 | fts 33 | } 34 | \seealso{ 35 | \code{\link[=field_types_available]{field_types_available()}}, \code{\link[=template_field_types]{template_field_types()}} 36 | } 37 | -------------------------------------------------------------------------------- /man/field_types_advanced.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/field_types.R 3 | \name{field_types_advanced} 4 | \alias{field_types_advanced} 5 | \title{Create field_types_advanced specification} 6 | \usage{ 7 | field_types_advanced(..., .default_field_type = ft_simple()) 8 | } 9 | \arguments{ 10 | \item{...}{names and types of fields (columns) in source data.} 11 | 12 | \item{.default_field_type}{\code{field_type} to use for any remaining fields (columns) in source 13 | data. Note, this means there can not be a field in the data named \code{.default_field_type}} 14 | } 15 | \value{ 16 | A \code{field_types} object 17 | } 18 | \description{ 19 | Specify only a subset of the names and types of fields in the source data frame. The remaining 20 | fields will be given the same 'default' type. 21 | } 22 | \examples{ 23 | fts <- field_types_advanced( 24 | PrescriptionDate = ft_timepoint(), 25 | PatientID = ft_ignore(), 26 | .default_field_type = ft_simple() 27 | ) 28 | 29 | fts 30 | } 31 | \seealso{ 32 | \code{\link[=field_types]{field_types()}}, \code{\link[=field_types_available]{field_types_available()}}, \code{\link[=template_field_types]{template_field_types()}} 33 | } 34 | -------------------------------------------------------------------------------- /man/field_types_available.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/field_types.R 3 | \name{field_types_available} 4 | \alias{field_types_available} 5 | \alias{ft_timepoint} 6 | \alias{ft_uniqueidentifier} 7 | \alias{ft_categorical} 8 | \alias{ft_numeric} 9 | \alias{ft_datetime} 10 | \alias{ft_freetext} 11 | \alias{ft_simple} 12 | \alias{ft_strata} 13 | \alias{ft_ignore} 14 | \title{Types of data fields available for specification} 15 | \usage{ 16 | ft_timepoint(includes_time = TRUE, format = "", na = NULL) 17 | 18 | ft_uniqueidentifier(na = NULL) 19 | 20 | ft_categorical(aggregate_by_each_category = FALSE, na = NULL) 21 | 22 | ft_numeric(na = NULL) 23 | 24 | ft_datetime(includes_time = TRUE, format = "", na = NULL) 25 | 26 | ft_freetext(na = NULL) 27 | 28 | ft_simple(na = NULL) 29 | 30 | ft_strata(na = NULL) 31 | 32 | ft_ignore() 33 | } 34 | \arguments{ 35 | \item{includes_time}{If \code{TRUE}, additional aggregated values will be 36 | generated using the time portion (and if no time portion is present then 37 | midnight will be assumed). If \code{FALSE}, aggregated values will ignore any 38 | time portion. Default = \code{TRUE}} 39 | 40 | \item{format}{Where datetime values are not in the format \code{YYYY-MM-DD} or 41 | \verb{YYYY-MM-DD HH:MM:SS}, an alternative format can be specified at the per 42 | field level, using \code{\link[readr:parse_datetime]{readr::col_datetime()}} format specifications, e.g. 43 | \code{format = "\%d/\%m/\%Y"}. When a format is supplied, it must match the 44 | complete string.} 45 | 46 | \item{na}{Column-specific vector of strings that should be interpreted as missing 47 | values (in addition to those specified at dataset level)} 48 | 49 | \item{aggregate_by_each_category}{If \code{TRUE}, aggregated values will be 50 | generated for each distinct subcategory as well as for the field overall. 51 | If \code{FALSE}, aggregated values will only be generated for the field overall. 52 | Default = \code{FALSE}} 53 | } 54 | \value{ 55 | A \code{field_type} object denoting the type of data in the column 56 | } 57 | \description{ 58 | Each column in the source dataset must be assigned to a particular \code{ft_xx} 59 | depending on the type of data that it contains. This is done through a 60 | \code{\link[=field_types]{field_types()}} specification. 61 | } 62 | \section{Details}{ 63 | \code{ft_timepoint()} - identifies the data field which should 64 | be used as the independent time variable. There should be one and only one 65 | of these specified. 66 | 67 | \code{ft_uniqueidentifier()} - identifies data fields which 68 | contain a (usually computer-generated) identifier for an entity, e.g. a 69 | patient. It does not need to be unique within the dataset. 70 | 71 | \code{ft_categorical()} - identifies data fields which should 72 | be treated as categorical. 73 | 74 | 75 | \code{ft_numeric()} - identifies data fields which contain numeric values that 76 | should be treated as continuous. Any values which contain non-numeric 77 | characters (including grouping marks) will be classed as non-conformant 78 | 79 | \code{ft_datetime()} - identifies data fields which contain date 80 | values that should be treated as continuous. 81 | 82 | \code{ft_freetext()} - identifies data fields which contain 83 | free text values. Only presence/missingness will be evaluated. 84 | 85 | \code{ft_simple()} - identifies data fields where you only 86 | want presence/missingness to be evaluated (but which are not necessarily 87 | free text). 88 | 89 | \code{ft_strata()} - identifies a categorical data field which should 90 | be used to stratify the rest of the data. 91 | 92 | \code{ft_ignore()} - identifies data fields which should be 93 | ignored. These will not be loaded. 94 | } 95 | 96 | \examples{ 97 | fts <- field_types( 98 | PatientID = ft_uniqueidentifier(), 99 | TestID = ft_ignore(), 100 | TestDate = ft_timepoint(), 101 | TestName = ft_categorical(aggregate_by_each_category = FALSE), 102 | TestResult = ft_numeric(), 103 | ResultDate = ft_datetime(), 104 | ResultComment = ft_freetext(), 105 | Location = ft_categorical() 106 | ) 107 | 108 | ft_simple() 109 | } 110 | \seealso{ 111 | \code{\link[=field_types]{field_types()}}, \code{\link[=template_field_types]{template_field_types()}} 112 | } 113 | -------------------------------------------------------------------------------- /man/figures/antibiotics_day_DurationEnteredByPrescriber_missing_perc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/man/figures/antibiotics_day_DurationEnteredByPrescriber_missing_perc.png -------------------------------------------------------------------------------- /man/figures/bchem_creatinine_day_Value_mean.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/man/figures/bchem_creatinine_day_Value_mean.png -------------------------------------------------------------------------------- /man/figures/example_prescriptions_aggregated_valuespresent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/man/figures/example_prescriptions_aggregated_valuespresent.png -------------------------------------------------------------------------------- /man/figures/example_prescriptions_allfields_missing_perc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/man/figures/example_prescriptions_allfields_missing_perc.png -------------------------------------------------------------------------------- /man/figures/example_prescriptions_head.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/man/figures/example_prescriptions_head.png -------------------------------------------------------------------------------- /man/figures/example_prescriptions_source_fieldsimported.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/man/figures/example_prescriptions_source_fieldsimported.png -------------------------------------------------------------------------------- /man/figures/example_prescriptions_stratified_midnight_perc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/man/figures/example_prescriptions_stratified_midnight_perc.png -------------------------------------------------------------------------------- /man/figures/example_prescriptions_stratified_strata.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/man/figures/example_prescriptions_stratified_strata.png -------------------------------------------------------------------------------- /man/figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/man/figures/logo.png -------------------------------------------------------------------------------- /man/initialise_log.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utilities.R 3 | \name{initialise_log} 4 | \alias{initialise_log} 5 | \title{Initialise a log file} 6 | \usage{ 7 | initialise_log(log_directory) 8 | } 9 | \arguments{ 10 | \item{log_directory}{String containing directory to save log file} 11 | } 12 | \value{ 13 | Character string containing the full path to the newly-created log 14 | file 15 | } 16 | \description{ 17 | Choose a directory in which to save the log file. If this is not called, no 18 | log file is created. 19 | } 20 | \examples{ 21 | log_name <- initialise_log(".") 22 | 23 | log_name 24 | \dontshow{ 25 | close_log() 26 | file.remove(log_name) 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /man/prepare_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/source_data.R 3 | \name{prepare_data} 4 | \alias{prepare_data} 5 | \title{Prepare source data} 6 | \usage{ 7 | prepare_data( 8 | df, 9 | field_types, 10 | override_column_names = FALSE, 11 | na = c("", "NA", "NULL"), 12 | dataset_description = NULL, 13 | show_progress = TRUE 14 | ) 15 | } 16 | \arguments{ 17 | \item{df}{A data frame} 18 | 19 | \item{field_types}{\code{\link[=field_types]{field_types()}} object specifying names and types of 20 | fields (columns) in the supplied \code{df}. See also \link{field_types_available}.} 21 | 22 | \item{override_column_names}{If \code{FALSE}, column names in the supplied \code{df} 23 | must match the names specified in \code{field_types} exactly. If \code{TRUE}, column 24 | names in the supplied \code{df} will be replaced with the names specified in 25 | \code{field_types}. The specification must therefore contain the columns in the 26 | correct order. Default = \code{FALSE}} 27 | 28 | \item{na}{vector containing strings that should be interpreted as missing 29 | values. Default = \code{c("","NA","NULL")}. Additional column-specific values 30 | can be specified in the \code{\link[=field_types]{field_types()}} object} 31 | 32 | \item{dataset_description}{Short description of the dataset being checked. 33 | This will appear on the report. If blank, the name of the data frame object 34 | will be used} 35 | 36 | \item{show_progress}{Print progress to console. Default = \code{TRUE}} 37 | } 38 | \value{ 39 | A \code{daiquiri_source_data} object 40 | } 41 | \description{ 42 | Validate a data frame against a \code{\link[=field_types]{field_types()}} specification, and prepare 43 | for aggregation. 44 | } 45 | \examples{ 46 | # load example data into a data.frame 47 | raw_data <- read_data( 48 | system.file("extdata", "example_prescriptions.csv", package = "daiquiri"), 49 | delim = ",", 50 | col_names = TRUE 51 | ) 52 | 53 | # validate and prepare the data for aggregation 54 | source_data <- prepare_data( 55 | raw_data, 56 | field_types = field_types( 57 | PrescriptionID = ft_uniqueidentifier(), 58 | PrescriptionDate = ft_timepoint(), 59 | AdmissionDate = ft_datetime(includes_time = FALSE), 60 | Drug = ft_freetext(), 61 | Dose = ft_numeric(), 62 | DoseUnit = ft_categorical(), 63 | PatientID = ft_ignore(), 64 | Location = ft_categorical(aggregate_by_each_category = TRUE) 65 | ), 66 | override_column_names = FALSE, 67 | na = c("", "NULL"), 68 | dataset_description = "Example data provided with package" 69 | ) 70 | 71 | source_data 72 | } 73 | \seealso{ 74 | \code{\link[=field_types]{field_types()}}, \code{\link[=field_types_available]{field_types_available()}}, 75 | \code{\link[=aggregate_data]{aggregate_data()}}, \code{\link[=report_data]{report_data()}}, 76 | \code{\link[=daiquiri_report]{daiquiri_report()}} 77 | } 78 | -------------------------------------------------------------------------------- /man/read_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/main.R 3 | \name{read_data} 4 | \alias{read_data} 5 | \title{Read delimited data for optimal use with daiquiri} 6 | \usage{ 7 | read_data( 8 | file, 9 | delim = NULL, 10 | col_names = TRUE, 11 | quote = "\\"", 12 | trim_ws = TRUE, 13 | comment = "", 14 | skip = 0, 15 | n_max = Inf, 16 | show_progress = TRUE 17 | ) 18 | } 19 | \arguments{ 20 | \item{file}{A string containing path of file containing data to load, or a 21 | URL starting \verb{http://}, \verb{file://}, etc. Compressed files with extension 22 | \code{.gz}, \code{.bz2}, \code{.xz} and \code{.zip} are supported.} 23 | 24 | \item{delim}{Single character used to separate fields within a record. E.g. 25 | \code{","} or \code{"\\t"}} 26 | 27 | \item{col_names}{Either \code{TRUE}, \code{FALSE} or a character vector of column 28 | names. If \code{TRUE}, the first row of the input will be used as the column 29 | names, and will not be included in the data frame. If \code{FALSE}, column names 30 | will be generated automatically. Default = \code{TRUE}} 31 | 32 | \item{quote}{Single character used to quote strings.} 33 | 34 | \item{trim_ws}{Should leading and trailing whitespace be trimmed from each 35 | field?} 36 | 37 | \item{comment}{A string used to identify comments. Any text after the comment 38 | characters will be silently ignored} 39 | 40 | \item{skip}{Number of lines to skip before reading data. If \code{comment} is 41 | supplied any commented lines are ignored after skipping} 42 | 43 | \item{n_max}{Maximum number of lines to read.} 44 | 45 | \item{show_progress}{Display a progress bar? Default = \code{TRUE}} 46 | } 47 | \value{ 48 | A data frame 49 | } 50 | \description{ 51 | Popular file readers such as \code{readr::read_delim()} perform datatype 52 | conversion by default, which can interfere with daiquiri's ability to detect 53 | non-conformant values. Use this function instead to ensure optimal 54 | compatibility with daiquiri's features. 55 | } 56 | \details{ 57 | This function is aimed at non-expert users of R, and operates as a restricted 58 | implementation of \code{\link[readr:read_delim]{readr::read_delim()}}. If you prefer to use \code{read_delim()} 59 | directly, ensure you set the following parameters: \code{col_types = readr::cols(.default = "c")} and \code{na = character()} 60 | } 61 | \examples{ 62 | raw_data <- read_data( 63 | system.file("extdata", "example_prescriptions.csv", package = "daiquiri"), 64 | delim = ",", 65 | col_names = TRUE 66 | ) 67 | 68 | head(raw_data) 69 | } 70 | \seealso{ 71 | \code{\link[=field_types]{field_types()}}, \code{\link[=field_types_available]{field_types_available()}}, 72 | \code{\link[=aggregate_data]{aggregate_data()}}, \code{\link[=report_data]{report_data()}}, 73 | \code{\link[=daiquiri_report]{daiquiri_report()}} 74 | } 75 | -------------------------------------------------------------------------------- /man/report_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/reports.R 3 | \name{report_data} 4 | \alias{report_data} 5 | \title{Generate report from existing objects} 6 | \usage{ 7 | report_data( 8 | source_data, 9 | aggregated_data, 10 | report_title = "daiquiri data quality report", 11 | save_directory = ".", 12 | save_filename = NULL, 13 | format = "html", 14 | show_progress = TRUE, 15 | ... 16 | ) 17 | } 18 | \arguments{ 19 | \item{source_data}{A \code{daiquiri_source_data} object returned from 20 | \code{\link[=prepare_data]{prepare_data()}} function} 21 | 22 | \item{aggregated_data}{A \code{daiquiri_aggregated_data} object returned from 23 | \code{\link[=aggregate_data]{aggregate_data()}} function} 24 | 25 | \item{report_title}{Title to appear on the report} 26 | 27 | \item{save_directory}{String specifying directory in which to save the 28 | report. Default is current directory.} 29 | 30 | \item{save_filename}{String specifying filename for the report, excluding any 31 | file extension. If no filename is supplied, one will be automatically 32 | generated with the format \code{daiquiri_report_YYMMDD_HHMMSS}.} 33 | 34 | \item{format}{File format of the report. Currently only \code{"html"} is supported} 35 | 36 | \item{show_progress}{Print progress to console. Default = \code{TRUE}} 37 | 38 | \item{...}{Further parameters to be passed to \code{rmarkdown::render()}. Cannot 39 | include any of \code{input}, \code{output_dir}, \code{output_file}, \code{params}, \code{quiet}.} 40 | } 41 | \value{ 42 | A string containing the name and path of the saved report 43 | } 44 | \description{ 45 | Generate report from previously-created \code{daiquiri_source_data} and 46 | \code{daiquiri_aggregated_data} objects 47 | } 48 | \examples{ 49 | \donttest{ 50 | # load example data into a data.frame 51 | raw_data <- read_data( 52 | system.file("extdata", "example_prescriptions.csv", package = "daiquiri"), 53 | delim = ",", 54 | col_names = TRUE 55 | ) 56 | 57 | # validate and prepare the data for aggregation 58 | source_data <- prepare_data( 59 | raw_data, 60 | field_types = field_types( 61 | PrescriptionID = ft_uniqueidentifier(), 62 | PrescriptionDate = ft_timepoint(), 63 | AdmissionDate = ft_datetime(includes_time = FALSE), 64 | Drug = ft_freetext(), 65 | Dose = ft_numeric(), 66 | DoseUnit = ft_categorical(), 67 | PatientID = ft_ignore(), 68 | Location = ft_categorical(aggregate_by_each_category = TRUE) 69 | ), 70 | override_column_names = FALSE, 71 | na = c("", "NULL"), 72 | dataset_description = "Example data provided with package", 73 | show_progress = TRUE 74 | ) 75 | 76 | # aggregate the data 77 | aggregated_data <- aggregate_data( 78 | source_data, 79 | aggregation_timeunit = "day", 80 | show_progress = TRUE 81 | ) 82 | 83 | # save a report in the current directory using the previously-created objects 84 | report_data( 85 | source_data, 86 | aggregated_data, 87 | report_title = "daiquiri data quality report", 88 | save_directory = ".", 89 | save_filename = "example_data_report", 90 | show_progress = TRUE 91 | ) 92 | \dontshow{file.remove("./example_data_report.html")} 93 | } 94 | 95 | } 96 | \seealso{ 97 | \code{\link[=prepare_data]{prepare_data()}}, \code{\link[=aggregate_data]{aggregate_data()}}, 98 | \code{\link[=daiquiri_report]{daiquiri_report()}} 99 | } 100 | -------------------------------------------------------------------------------- /man/template_field_types.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/field_types.R 3 | \name{template_field_types} 4 | \alias{template_field_types} 5 | \title{Print a template field_types() specification to console} 6 | \usage{ 7 | template_field_types(df, default_field_type = ft_ignore()) 8 | } 9 | \arguments{ 10 | \item{df}{data frame including the column names for the template 11 | specification} 12 | 13 | \item{default_field_type}{\code{field_type} to be used for each column. Default = 14 | \code{\link[=ft_ignore]{ft_ignore()}}. See \code{\link[=field_types_available]{field_types_available()}}} 15 | } 16 | \value{ 17 | (invisibly) Character string containing the template code 18 | } 19 | \description{ 20 | Helper function to generate template code for a \code{\link[=field_types]{field_types()}} specification, 21 | based on the supplied data frame. All fields (columns) in the specification 22 | will be defined using the \code{default_field_type}, and the console output can be 23 | copied and edited before being used as input to \code{\link[=daiquiri_report]{daiquiri_report()}} 24 | or \code{\link[=prepare_data]{prepare_data()}}. 25 | } 26 | \examples{ 27 | df <- data.frame( 28 | col1 = rep("2022-01-01", 5), 29 | col2 = rep(1, 5), 30 | col3 = 1:5, 31 | col4 = rnorm(5) 32 | ) 33 | 34 | template_field_types(df, default_field_type = ft_numeric()) 35 | } 36 | \seealso{ 37 | \code{\link[=field_types]{field_types()}} 38 | } 39 | -------------------------------------------------------------------------------- /paper/bchem_creatinine_day_Value_mean.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/paper/bchem_creatinine_day_Value_mean.png -------------------------------------------------------------------------------- /paper/example_prescriptions_admdate_missing_perc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/paper/example_prescriptions_admdate_missing_perc.png -------------------------------------------------------------------------------- /paper/example_prescriptions_aggregated_valuespresent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/paper/example_prescriptions_aggregated_valuespresent.png -------------------------------------------------------------------------------- /paper/example_prescriptions_head.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/paper/example_prescriptions_head.png -------------------------------------------------------------------------------- /paper/paper.bib: -------------------------------------------------------------------------------- 1 | @Manual{Rcore, 2 | title = {R: A Language and Environment for Statistical Computing}, 3 | author = {{R Core Team}}, 4 | organization = {R Foundation for Statistical Computing}, 5 | address = {Vienna, Austria}, 6 | year = {2016}, 7 | url = {https://www.R-project.org/}, 8 | } 9 | 10 | @article{dataquier2021, 11 | doi = {10.21105/joss.03093}, 12 | url = {https://doi.org/10.21105/joss.03093}, 13 | year = {2021}, 14 | publisher = {The Open Journal}, 15 | volume = {6}, 16 | number = {61}, 17 | pages = {3093}, 18 | author = {Adrian Richter and Carsten Oliver Schmidt and Markus Krüger and Stephan Struckmann}, 19 | title = {dataquieR: assessment of data quality in epidemiological research}, 20 | journal = {Journal of Open Source Software} 21 | } 22 | 23 | @article{smarteda2019, 24 | doi = {10.21105/joss.01509}, 25 | url = {https://doi.org/10.21105/joss.01509}, 26 | year = {2019}, 27 | publisher = {The Open Journal}, 28 | volume = {4}, 29 | number = {41}, 30 | pages = {1509}, 31 | author = {Sayan Putatunda and Dayananda Ubrangala and Kiran Rama and Ravi Kondapalli}, 32 | title = {SmartEDA: An R Package for Automated Exploratory Data Analysis}, 33 | journal = {Journal of Open Source Software} 34 | } 35 | 36 | @article{datamaid2019, 37 | title={dataMaid: Your Assistant for Documenting Supervised Data Quality Screening in R}, 38 | volume={90}, 39 | url={https://www.jstatsoft.org/index.php/jss/article/view/v090i06}, 40 | doi={10.18637/jss.v090.i06}, 41 | number={6}, 42 | journal={Journal of Statistical Software}, 43 | author={Anne Helby Petersen and Claus Thorn Ekstrøm}, 44 | year={2019}, 45 | pages={1–38} 46 | } 47 | -------------------------------------------------------------------------------- /paper/paper.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 'daiquiri: Data Quality Reporting for Temporal Datasets' 3 | tags: 4 | - R 5 | - data quality 6 | - time series 7 | - reproducible research 8 | - initial data analysis 9 | authors: 10 | - name: T. Phuong Quan 11 | orcid: 0000-0001-8566-1817 12 | corresponding: true 13 | affiliation: 1 14 | affiliations: 15 | - name: University of Oxford, UK 16 | index: 1 17 | date: 13 December 2022 18 | bibliography: paper.bib 19 | --- 20 | 21 | # Summary 22 | 23 | The `daiquiri` R package generates data quality reports that enable quick visual 24 | review of temporal shifts in record-level data. It is designed with electronic 25 | health records in mind, but can be used for any type of record-level temporal 26 | data (i.e. tabular data where each row represents a single "event", one column 27 | contains the "event date", and other columns contain any associated values for 28 | the event, see \autoref{fig:example_prescriptions_head} for an example). 29 | 30 | ![Example dataset containing information on antibiotic prescriptions.\label{fig:example_prescriptions_head}](example_prescriptions_head.png) 31 | 32 | The package automatically creates time series plots showing aggregated values 33 | for each data field (column) depending on its contents (e.g. min/max/mean values 34 | for numeric data, no. of distinct values for categorical data), see 35 | \autoref{fig:example_prescriptions_individual}, as well as 36 | overviews for missing values, non-conformant values, and duplicated rows, see 37 | \autoref{fig:example_prescriptions_overview}. 38 | 39 | The resulting html reports are shareable and can contribute to forming a 40 | transparent record of the entire analysis process. 41 | 42 | ![Screenshot showing percentage of missing values per day, for the AdmissionDate field of the example dataset. \label{fig:example_prescriptions_individual}](example_prescriptions_admdate_missing_perc.png){ width=80% } 43 | 44 | ![Screenshot showing number of values present per day, across all fields of the example dataset. \label{fig:example_prescriptions_overview}](example_prescriptions_aggregated_valuespresent.png){ width=80% } 45 | 46 | # Statement of need 47 | 48 | Large routinely-collected datasets are increasingly being used in research. 49 | However, given their data are collected for operational rather than research 50 | purposes, there is a greater-than-usual need for them to be checked for data 51 | quality issues before any analyses are conducted. Events occurring at the 52 | institutional level such as software updates, new machinery or processes can 53 | cause temporal artefacts that, if not identified and taken into account, can 54 | lead to biased results and incorrect conclusions. 55 | 56 | For example, 57 | \autoref{fig:bchem_creatinine_mean} shows the mean value of all 58 | laboratory tests checking for levels of creatinine in the blood, from a large 59 | hospital group in the UK. As you can see, there are points in time where these 60 | values shift up or down suddenly and unnaturally, indicating that something 61 | changed in the way the data was collected or processed. A careful researcher 62 | needs to take these sudden changes into account, particularly if comparing or 63 | combining the data before and after these 'change points'. 64 | 65 | ![The mean value per day, of all laboratory tests checking for levels of creatinine in the blood. \label{fig:bchem_creatinine_mean}](bchem_creatinine_day_Value_mean.png){ width=80% } 66 | 67 | While these checks should theoretically be conducted by the researcher at the 68 | initial data analysis stage, in practice it is unclear to what extent this is 69 | actually done, since it is rarely, if ever, reported in published papers. With 70 | the increasing drive towards greater transparency and reproducibility within the 71 | scientific community, this essential yet often-overlooked part of the analysis 72 | process will inevitably begin to come under greater scrutiny. The `daiquiri` 73 | package helps researchers conduct this part of the process more thoroughly, 74 | consistently and transparently, hence increasing the quality of their studies as 75 | well as trust in the scientific process. 76 | 77 | There are a number of existing R packages which generate reports that provide an 78 | overview of a dataset's contents, such as `dataReporter` (formerly `dataMaid`) 79 | [@datamaid2019], `smartEDA` [@smarteda2019], and `dataquieR` [@dataquier2021]. 80 | In these packages, summary statistics are calculated 81 | across all rows in the dataset, or perhaps stratified by a categorical field. In 82 | contrast, `daiquiri` focuses on how these summary statistics may change over the 83 | time scale of the dataset, which can reveal data quality issues that might otherwise be 84 | missed when using these other packages. 85 | 86 | # Acknowledgements 87 | 88 | This package was created as part of a PhD project, supervised by A. Sarah Walker, Tim Peto, Martin Landray, and Ben Lacey. 89 | 90 | I would like to thank the [rOpenSci](https://ropensci.org/) team for their valuable input. 91 | 92 | This work was supported by the National Institute for Health Research Health Protection Research Unit (NIHR HPRU) in Healthcare Associated Infections and Antimicrobial Resistance at the University of Oxford in partnership with Public Health England (PHE) (NIHR200915), and by the NIHR Oxford Biomedical Research Centre. 93 | 94 | # References 95 | -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-120x120.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/pkgdown/favicon/apple-touch-icon-120x120.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-152x152.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/pkgdown/favicon/apple-touch-icon-152x152.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-180x180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/pkgdown/favicon/apple-touch-icon-180x180.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-60x60.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/pkgdown/favicon/apple-touch-icon-60x60.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-76x76.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/pkgdown/favicon/apple-touch-icon-76x76.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/pkgdown/favicon/apple-touch-icon.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/pkgdown/favicon/favicon-16x16.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/pkgdown/favicon/favicon-32x32.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/pkgdown/favicon/favicon.ico -------------------------------------------------------------------------------- /renv/.gitignore: -------------------------------------------------------------------------------- 1 | library/ 2 | lock/ 3 | python/ 4 | staging/ 5 | -------------------------------------------------------------------------------- /renv/settings.dcf: -------------------------------------------------------------------------------- 1 | external.libraries: 2 | ignored.packages: 3 | package.dependency.fields: Imports, Depends, LinkingTo 4 | snapshot.type: implicit 5 | use.cache: TRUE 6 | vcs.ignore.library: TRUE 7 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(daiquiri) 3 | 4 | test_check("daiquiri") 5 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/aggregate_data.md: -------------------------------------------------------------------------------- 1 | # aggregated_data object prints to console ok 2 | 3 | Dataset: completetestset 4 | 5 | Overall: 6 | Number of data fields: 4 7 | Column used for timepoint: col1 8 | Timepoint aggregation unit: day 9 | Min timepoint value: 2022-01-01 10 | Max timepoint value: 2022-01-01 11 | Total number of timepoints: 1 12 | Number of empty timepoints: 0 13 | 14 | 15 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/aggregate_data/test_[ALL_FIELDS_COMBINED].csv: -------------------------------------------------------------------------------- 1 | col1_byday,n,missing_n,nonconformant_n,missing_perc,nonconformant_perc 2 | 2022-01-01,2,0,0,0,0 3 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/aggregate_data/test_[DUPLICATES].csv: -------------------------------------------------------------------------------- 1 | col1_byday,sum,nonzero_perc 2 | 2022-01-01,4,100 3 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/aggregate_data/test_col1.csv: -------------------------------------------------------------------------------- 1 | col1_byday,n,midnight_n,midnight_perc 2 | 2022-01-01,1,1,100 3 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/aggregate_data/test_col2.csv: -------------------------------------------------------------------------------- 1 | col1_byday,n,missing_n,missing_perc 2 | 2022-01-01,1,0,0 3 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/field_types.md: -------------------------------------------------------------------------------- 1 | # template_field_types() generates template field_types output 2 | 3 | field_types( 4 | "col1" = ft_ignore(), 5 | "col2" = ft_ignore(), 6 | "col3" = ft_ignore() 7 | ) 8 | 9 | # field_types object prints to console ok 10 | 11 | Col_tp options: includes_time 12 | Col_uid 13 | Col_cat 14 | Col_cat2 15 | Col_num 16 | Col_dt options: includes_time 17 | Col_dt2 18 | Col_ft 19 | Col_sim 20 | Col_ign 21 | Col_str 22 | 23 | # field_types_advanced object prints to console ok 24 | 25 | Col_tp options: includes_time 26 | Col_uid 27 | .default_field_type 28 | 29 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/main.md: -------------------------------------------------------------------------------- 1 | # daiquiri_object prints to console ok 2 | 3 | Class: daiquiri_object 4 | Dataset: data.frame(col1 = rep("2022-01-01", 5), col2 = rep(1, 5), col3 = 1:5) 5 | Saved to: [tempdir]/daiquiri_testthatreport.html 6 | 7 | Columns in source: 3 8 | Columns imported: 2 9 | Rows in source: 5 10 | Duplicate rows removed: 4 11 | Rows imported: 1 12 | Column used for timepoint: col1 13 | Rows missing timepoint values removed: 0 14 | Total validation warnings: 1 15 | 16 | Min timepoint value: 2022-01-01 17 | Max timepoint value: 2022-01-01 18 | Timepoint aggregation unit: day 19 | Total number of timepoints: 1 20 | Number of empty timepoints: 0 21 | 22 | -------------------------------------------------------------------------------- /tests/testthat/test-field_types.R: -------------------------------------------------------------------------------- 1 | test_that("Valid field_types can be specified", { 2 | expect_s3_class( 3 | field_types( 4 | Col_tp = ft_timepoint(), 5 | Col_uid = ft_uniqueidentifier(), 6 | Col_cat = ft_categorical(), 7 | Col_cat2 = ft_categorical(), 8 | Col_num = ft_numeric(), 9 | Col_dt = ft_datetime(), 10 | Col_dt2 = ft_datetime(includes_time = FALSE), 11 | Col_ft = ft_freetext(), 12 | Col_sim = ft_simple(), 13 | Col_ign = ft_ignore(), 14 | Col_str = ft_strata() 15 | ), 16 | "daiquiri_field_types" 17 | ) 18 | }) 19 | 20 | test_that("Invalid field_types cannot be specified", { 21 | expect_error(field_types(Col_bad = readr::col_character()), class = "invalid_field_types") 22 | expect_error(field_types(Col_bad = "hello"), class = "invalid_field_types") 23 | }) 24 | 25 | test_that("Duplicate column names in field_types specification not allowed", { 26 | expect_error( 27 | field_types( 28 | Col_dup = ft_timepoint(), 29 | Col_dup = ft_uniqueidentifier() 30 | ), 31 | class = "invalid_field_types" 32 | ) 33 | 34 | expect_error( 35 | field_types( 36 | Col_tp = ft_timepoint(), 37 | Col_dup = ft_uniqueidentifier(), 38 | Col_dup = ft_uniqueidentifier() 39 | ), 40 | class = "invalid_field_types" 41 | ) 42 | }) 43 | 44 | test_that("field_types object must include a timepoint field", { 45 | expect_error(field_types(Col_dt = ft_datetime()), 46 | class = "invalid_field_types" 47 | ) 48 | }) 49 | 50 | test_that("field_types object must not contain more than one timepoint field", { 51 | expect_error( 52 | field_types( 53 | Col_tp1 = ft_timepoint(), 54 | Col_tp2 = ft_timepoint() 55 | ), 56 | class = "invalid_field_types" 57 | ) 58 | }) 59 | 60 | test_that("field_types object must not contain more than one strata field", { 61 | expect_error( 62 | field_types( 63 | Col_tp1 = ft_timepoint(), 64 | Col_1 = ft_strata(), 65 | Col_2 = ft_strata() 66 | ), 67 | class = "invalid_field_types" 68 | ) 69 | }) 70 | 71 | test_that("field_types object must not use aggregate_by_each_category option if strata field present", { 72 | expect_error( 73 | field_types( 74 | Col_tp1 = ft_timepoint(), 75 | Col_cat = ft_categorical(), 76 | Col_cat2 = ft_categorical(aggregate_by_each_category = TRUE), 77 | Col_2 = ft_strata() 78 | ), 79 | class = "invalid_field_types" 80 | ) 81 | }) 82 | 83 | test_that("field_types_strata_field_name() returns correct strata field name if present", { 84 | field_types <- field_types( 85 | Col_tp = ft_timepoint(), 86 | Col_uid = ft_uniqueidentifier(), 87 | Col_cat = ft_categorical(), 88 | Col_num = ft_numeric(), 89 | Col_dt = ft_datetime(), 90 | Col_dt2 = ft_datetime(includes_time = FALSE), 91 | Col_ft = ft_freetext(), 92 | Col_sim = ft_simple(), 93 | Col_ign = ft_ignore(), 94 | Col_str = ft_strata() 95 | ) 96 | 97 | expect_equal(field_types_strata_field_name(field_types), "Col_str") 98 | }) 99 | 100 | test_that("field_types_strata_field_name() returns NULL if no strata field present", { 101 | field_types <- field_types( 102 | Col_tp = ft_timepoint(), 103 | Col_uid = ft_uniqueidentifier() 104 | ) 105 | 106 | expect_null(field_types_strata_field_name(field_types)) 107 | }) 108 | 109 | test_that("[DUPLICATES] cannot be used as a field_type colname as it is a reserved word", { 110 | expect_error(field_types("[DUPLICATES]" = ft_timepoint()), 111 | class = "invalid_field_types" 112 | ) 113 | 114 | expect_error( 115 | field_types( 116 | Col_tp = ft_timepoint(), 117 | "[DUPLICATES]" = ft_simple() 118 | ), 119 | class = "invalid_field_types" 120 | ) 121 | }) 122 | 123 | test_that("[ALL_FIELDS_COMBINED] cannot be used as a field_type colname as it is a reserved word", { 124 | expect_error(field_types("[ALL_FIELDS_COMBINED]" = ft_timepoint()), 125 | class = "invalid_field_types" 126 | ) 127 | }) 128 | 129 | 130 | test_that("template_field_types() requires a df param", { 131 | expect_error(template_field_types(), 132 | class = "invalid_param_missing" 133 | ) 134 | }) 135 | 136 | test_that("template_field_types() requires df param to be a data frame", { 137 | expect_error(template_field_types(df = c("Fieldname", 123)), 138 | class = "invalid_param_type" 139 | ) 140 | }) 141 | 142 | test_that("template_field_types() requires default_field_type param to be a field_type", { 143 | expect_error( 144 | template_field_types( 145 | df = data.frame("Fieldname" = 123), 146 | default_field_type = TRUE 147 | ), 148 | class = "invalid_param_type" 149 | ) 150 | }) 151 | 152 | test_that("template_field_types() generates template field_types output", { 153 | expect_snapshot_output(template_field_types(df = data.frame( 154 | "col1" = 123, 155 | "col2" = 123, 156 | "col3" = "hello" 157 | ))) 158 | }) 159 | 160 | test_that("field_types object prints to console ok", { 161 | testfield_types <- field_types( 162 | Col_tp = ft_timepoint(), 163 | Col_uid = ft_uniqueidentifier(), 164 | Col_cat = ft_categorical(), 165 | Col_cat2 = ft_categorical(), 166 | Col_num = ft_numeric(), 167 | Col_dt = ft_datetime(), 168 | Col_dt2 = ft_datetime(includes_time = FALSE), 169 | Col_ft = ft_freetext(), 170 | Col_sim = ft_simple(), 171 | Col_ign = ft_ignore(), 172 | Col_str = ft_strata() 173 | ) 174 | 175 | expect_snapshot_output(print(testfield_types)) 176 | }) 177 | 178 | test_that("field_types_advanced object prints to console ok", { 179 | testfield_types <- field_types_advanced( 180 | Col_tp = ft_timepoint(), 181 | Col_uid = ft_uniqueidentifier(), 182 | .default_field_type = ft_simple() 183 | ) 184 | 185 | expect_snapshot_output(print(testfield_types)) 186 | }) 187 | 188 | 189 | test_that(".default_field_type must be a valid field_type", { 190 | expect_error( 191 | field_types_advanced( 192 | Col_tp1 = ft_timepoint(), 193 | .default_field_type = readr::col_character() 194 | ), 195 | class = "invalid_field_types" 196 | ) 197 | }) 198 | 199 | test_that(".default_field_type cannot be a timepoint or strata field_type", { 200 | expect_error( 201 | field_types_advanced( 202 | Col_tp1 = ft_numeric(), 203 | .default_field_type = ft_timepoint() 204 | ), 205 | class = "invalid_field_types" 206 | ) 207 | 208 | expect_error( 209 | field_types_advanced( 210 | Col_tp1 = ft_timepoint(), 211 | .default_field_type = ft_strata() 212 | ), 213 | class = "invalid_field_types" 214 | ) 215 | 216 | }) 217 | 218 | test_that(".default_field_type cannot be the only field_type", { 219 | expect_error( 220 | field_types_advanced( 221 | .default_field_type = ft_simple() 222 | ), 223 | class = "invalid_field_types" 224 | ) 225 | }) 226 | 227 | test_that(".default_field_type cannot be supplied to field_types()", { 228 | expect_error( 229 | field_types( 230 | Col_tp1 = ft_timepoint(), 231 | .default_field_type = ft_simple() 232 | ), 233 | class = "invalid_field_types" 234 | ) 235 | }) 236 | 237 | -------------------------------------------------------------------------------- /tests/testthat/test-reports.R: -------------------------------------------------------------------------------- 1 | 2 | test_that("report_data() requires a source_data param", { 3 | expect_error( 4 | report_data(aggregated_data = structure(list(data_fields = NA), 5 | class = "daiquiri_aggregated_data" 6 | )), 7 | class = "invalid_param_missing" 8 | ) 9 | }) 10 | 11 | test_that("report_data() requires a aggregated_data param", { 12 | expect_error( 13 | report_data(source_data = structure(list(data_fields = NA), 14 | class = "daiquiri_source_data" 15 | )), 16 | class = "invalid_param_missing" 17 | ) 18 | }) 19 | 20 | test_that("report_data() requires source_data param to be a source_data object", { 21 | expect_error( 22 | report_data( 23 | source_data = data.frame("Fieldname" = 123), 24 | aggregated_data = structure(list(data_fields = NA), 25 | class = "daiquiri_aggregated_data" 26 | ) 27 | ), 28 | class = "invalid_param_type" 29 | ) 30 | }) 31 | 32 | test_that("report_data() requires aggregated_data param to be an aggregated_data object", { 33 | expect_error( 34 | report_data( 35 | source_data = structure(list(data_fields = NA), 36 | class = "daiquiri_source_data" 37 | ), 38 | aggregated_data = data.frame("Fieldname" = 123) 39 | ), 40 | class = "invalid_param_type" 41 | ) 42 | }) 43 | 44 | test_that("report_data() creates report and returns path successfully", { 45 | df <- read_data(test_path("testdata", "completetestset.csv")) 46 | source_data <- prepare_data( 47 | df, 48 | field_types = field_types( 49 | col_timepoint_err = ft_ignore(), 50 | col_timepoint = ft_timepoint(), 51 | col_date_time_err = ft_ignore(), 52 | col_date_time = ft_datetime(), 53 | col_date_only_err = ft_ignore(), 54 | col_date_only = ft_datetime(includes_time = FALSE), 55 | col_date_uk_err = ft_ignore(), 56 | col_date_uk = ft_datetime(includes_time = FALSE, format = "%d/%m/%Y"), 57 | col_id_num_err = ft_ignore(), 58 | col_id_num = ft_uniqueidentifier(), 59 | col_id_string_err = ft_ignore(), 60 | col_id_string = ft_uniqueidentifier(), 61 | col_numeric_clean_err = ft_ignore(), 62 | col_numeric_clean = ft_numeric(), 63 | col_numeric_dirty_err = ft_ignore(), 64 | col_numeric_dirty = ft_numeric(), 65 | col_categorical_small_err = ft_ignore(), 66 | col_categorical_small = ft_categorical(aggregate_by_each_category = TRUE), 67 | col_categorical_large_err = ft_ignore(), 68 | col_categorical_large = ft_categorical(), 69 | col_freetext_err = ft_ignore(), 70 | col_freetext = ft_freetext(), 71 | col_simple_err = ft_ignore(), 72 | col_simple = ft_simple(), 73 | col_numeric_missing_err = ft_ignore(), 74 | col_numeric_missing = ft_numeric() 75 | ), 76 | dataset_description = "completetestset", 77 | show_progress = FALSE 78 | ) 79 | aggregated_data <- 80 | aggregate_data(source_data, 81 | aggregation_timeunit = "week", 82 | show_progress = FALSE 83 | ) 84 | reportpath <- 85 | report_data( 86 | source_data, 87 | aggregated_data, 88 | report_title = "Complete Test Set", 89 | save_directory = tempdir(), 90 | save_filename = "daiquiri_testthatreport", 91 | show_progress = FALSE 92 | ) 93 | 94 | expect_type(reportpath, "character") 95 | 96 | # clean up 97 | expect_true(file.remove(reportpath)) 98 | }) 99 | 100 | test_that("plots still work when all values are missing", { 101 | df <- 102 | data.table::data.table( 103 | "col_timepoint" = paste0("2022-01-", seq(10, 31)), 104 | "col_numeric_missing" = "" 105 | ) 106 | source_data <- 107 | prepare_data( 108 | df, 109 | field_types = field_types( 110 | col_timepoint = ft_timepoint(), 111 | col_numeric_missing = ft_numeric() 112 | ), 113 | dataset_description = "blankplottest", 114 | override_column_names = FALSE, 115 | na = c("", "NULL"), 116 | show_progress = FALSE 117 | ) 118 | aggregated_data <- 119 | aggregate_data(source_data, 120 | aggregation_timeunit = "day", 121 | show_progress = FALSE 122 | ) 123 | 124 | expect_s3_class( 125 | plot_timeseries_static( 126 | agg_field = aggregated_data$aggregatefields$col_numeric_missing, 127 | agg_fun_colname = "missing_n" 128 | ), 129 | "ggplot" 130 | ) 131 | expect_s3_class( 132 | plot_overview_totals_static( 133 | agg_field = aggregated_data$aggregatefields$col_numeric_missing, 134 | aggregation_function = "missing_n" 135 | ), 136 | "ggplot" 137 | ) 138 | }) 139 | 140 | -------------------------------------------------------------------------------- /tests/testthat/test-utilities.R: -------------------------------------------------------------------------------- 1 | test_that("validate_params_required() checks is silent if required params are supplied", { 2 | # NOTE: testfn_params_required() defined in utilities.R as devtools::test() can't find it when it's defined here 3 | # Think it is something to do with environments but haven't figured it out 4 | 5 | expect_silent(testfn_params_required(1, 2)) 6 | expect_silent(testfn_params_required(p2 = 1, p1 = 2)) 7 | }) 8 | 9 | test_that("validate_params_required() checks returns an error if any required params are not supplied", { 10 | # NOTE: testfn_params_required() defined in utilities.R as devtools::test() can't find it when it's defined here 11 | 12 | expect_error(testfn_params_required(), 13 | class = "invalid_param_missing" 14 | ) 15 | expect_error(testfn_params_required(1), 16 | class = "invalid_param_missing" 17 | ) 18 | expect_error(testfn_params_required(p2 = 1), 19 | class = "invalid_param_missing" 20 | ) 21 | }) 22 | 23 | test_that("validate_params_required() allows arbitrary additional params to be supplied via ...", { 24 | expect_silent(testfn_params_required(p2 = 1, p1 = 2, passthrough = 1)) 25 | }) 26 | 27 | test_that("validate_params_required() works with package prefix", { 28 | expect_error(daiquiri::initialise_log(), 29 | class = "invalid_param_missing" 30 | ) 31 | }) 32 | 33 | 34 | test_that("validate_params_type() is silent if all params are of correct type", { 35 | # NOTE: testfn_params_type() defined in utilities.R as devtools::test() can't find it when it's defined here 36 | 37 | # all default args are valid 38 | expect_silent(testfn_params_type()) 39 | }) 40 | 41 | test_that("validate_params_type() checks df params are of correct type", { 42 | expect_error(testfn_params_type(df = c("Fieldname" = 123)), 43 | class = "invalid_param_type" 44 | ) 45 | }) 46 | 47 | test_that("validate_params_type() checks override_column_names params are of correct type", { 48 | expect_error(testfn_params_type(override_column_names = c("col1", "col2")), 49 | class = "invalid_param_type" 50 | ) 51 | }) 52 | 53 | test_that("validate_params_type() checks na params are of correct type", { 54 | expect_error(testfn_params_type(na = NA), 55 | class = "invalid_param_type" 56 | ) 57 | expect_error(testfn_params_type(na = c(NULL)), 58 | class = "invalid_param_type" 59 | ) 60 | expect_error(testfn_params_type(na = c(1, 2, 3)), 61 | class = "invalid_param_type" 62 | ) 63 | }) 64 | 65 | test_that("validate_params_type() checks dataset_description params are of correct type", { 66 | expect_silent(testfn_params_type(dataset_description = "")) 67 | expect_silent(testfn_params_type(dataset_description = NULL)) 68 | expect_error(testfn_params_type(dataset_description = 123), 69 | class = "invalid_param_type" 70 | ) 71 | expect_error(testfn_params_type(dataset_description = c("col1", "col2")), 72 | class = "invalid_param_type" 73 | ) 74 | }) 75 | 76 | test_that("validate_params_type() checks report_title params are of correct type", { 77 | expect_silent(testfn_params_type(report_title = "")) 78 | expect_silent(testfn_params_type(report_title = NULL)) 79 | expect_error(testfn_params_type(report_title = 123), 80 | class = "invalid_param_type" 81 | ) 82 | expect_error(testfn_params_type(report_title = c("col1", "col2")), 83 | class = "invalid_param_type" 84 | ) 85 | }) 86 | 87 | test_that("validate_params_type() checks aggregation_timeunit params are one of day/week/month/quarter/year", { 88 | expect_silent(testfn_params_type(aggregation_timeunit = "day")) 89 | expect_silent(testfn_params_type(aggregation_timeunit = "week")) 90 | expect_silent(testfn_params_type(aggregation_timeunit = "month")) 91 | expect_silent(testfn_params_type(aggregation_timeunit = "quarter")) 92 | expect_silent(testfn_params_type(aggregation_timeunit = "year")) 93 | expect_error(testfn_params_type(aggregation_timeunit = "other"), 94 | class = "invalid_param_type" 95 | ) 96 | }) 97 | 98 | test_that("validate_params_type() checks aggregation_timeunit params cannot be a vector", { 99 | expect_error(testfn_params_type(aggregation_timeunit = c("day", "week")), 100 | class = "invalid_param_type" 101 | ) 102 | }) 103 | 104 | test_that("validate_params_type() checks save_directory params are of correct type", { 105 | # Real dir 106 | expect_silent(testfn_params_type(save_directory = test_path())) 107 | # Real dir with trailing slash 108 | expect_silent(testfn_params_type(save_directory = paste0(test_path(), "/"))) 109 | # Fake dir 110 | expect_error(testfn_params_type(save_directory = "fakedir"), 111 | class = "invalid_param_type" 112 | ) 113 | # Dir includes filename 114 | expect_error(testfn_params_type(save_directory = test_path("test_utilities.R")), 115 | class = "invalid_param_type" 116 | ) 117 | }) 118 | 119 | test_that("validate_params_type() checks save_filename params are allowed to contain alphanumerics, - and _", { 120 | expect_silent(testfn_params_type(save_filename = "alpha123")) 121 | expect_silent(testfn_params_type(save_filename = "alpha-123")) 122 | expect_silent(testfn_params_type(save_filename = "alpha_123")) 123 | }) 124 | 125 | test_that("validate_params_type() checks save_filename params are allowed to be NULL", { 126 | expect_silent(testfn_params_type(save_filename = NULL)) 127 | }) 128 | 129 | test_that("validate_params_type() checks save_filename params are not allowed to contain the extension", { 130 | expect_error(testfn_params_type(save_filename = "badname.html"), 131 | class = "invalid_param_type" 132 | ) 133 | }) 134 | 135 | test_that("validate_params_type() checks save_filename params are not allowed to contain punctuation other than - and _", { 136 | expect_error(testfn_params_type(save_filename = "bad.name"), 137 | class = "invalid_param_type" 138 | ) 139 | expect_error(testfn_params_type(save_filename = "badname&"), 140 | class = "invalid_param_type" 141 | ) 142 | expect_error(testfn_params_type(save_filename = "badname*"), 143 | class = "invalid_param_type" 144 | ) 145 | }) 146 | 147 | test_that("validate_params_type() checks show_progress params are of correct type", { 148 | expect_error(testfn_params_type(show_progress = 1), 149 | class = "invalid_param_type" 150 | ) 151 | }) 152 | 153 | test_that("validate_params_type() checks log_directory params are of correct type", { 154 | expect_silent(testfn_params_type(log_directory = NULL)) 155 | expect_error(testfn_params_type(log_directory = "fakedir"), 156 | class = "invalid_param_type" 157 | ) 158 | }) 159 | 160 | test_that("validate_params_type() checks source_data params are of correct type", { 161 | expect_error(testfn_params_type(source_data = 1), 162 | class = "invalid_param_type" 163 | ) 164 | }) 165 | 166 | test_that("validate_params_type() checks aggregated_data params are of correct type", { 167 | expect_error(testfn_params_type(aggregated_data = 1), 168 | class = "invalid_param_type" 169 | ) 170 | }) 171 | 172 | test_that("validate_params_type() works with package prefix", { 173 | expect_error(daiquiri::initialise_log(log_directory = "hello"), 174 | class = "invalid_param_type" 175 | ) 176 | }) 177 | 178 | 179 | test_that("initialise_log() requires a log_directory param", { 180 | expect_error(initialise_log(), 181 | class = "invalid_param_missing" 182 | ) 183 | }) 184 | 185 | test_that("initialise_log() requires log_directory param to be a valid path", { 186 | expect_error(initialise_log(log_directory = "hello"), 187 | class = "invalid_param_type" 188 | ) 189 | }) 190 | 191 | test_that("initialise_log() creates a file", { 192 | log_filename <- initialise_log(log_directory = tempdir()) 193 | # clean up 194 | expect_true(file.remove(log_filename)) 195 | }) 196 | 197 | test_that("log_message() writes to log", { 198 | log_filename <- initialise_log(log_directory = tempdir()) 199 | expect_silent(log_message("test message", show_progress = FALSE)) 200 | log_text <- readLines(log_filename) 201 | expect_true(any(grepl("test message", log_text))) 202 | # clean up 203 | expect_true(file.remove(log_filename)) 204 | }) 205 | 206 | test_that("close_log() returns the name of the closed log file", { 207 | log_filename <- initialise_log(log_directory = tempdir()) 208 | expect_equal(close_log(), log_filename) 209 | }) 210 | 211 | test_that("close_log() returns empty string if no log file found", { 212 | expect_equal(close_log(), "") 213 | }) 214 | -------------------------------------------------------------------------------- /tests/testthat/testdata/completetestset.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/daiquiri/b0a38a6988108acd81dfe499d75706ab1801615b/tests/testthat/testdata/completetestset.xlsx -------------------------------------------------------------------------------- /tests/testthat/testdata/specialchars_colnames.csv: -------------------------------------------------------------------------------- 1 | col_underscore,col space,col-dash,col.dot,col!exclamation, col%percent, col&ersand,"col""doublequote", col'singlequote, col[]squarebrackets, col()brackets 2 | 2001-01-01,2001-01-01,2001-01-01,2001-01-01,2001-01-01,2001-01-01,2001-01-01,2001-01-01,2001-01-01,2001-01-01,2001-01-01 3 | 2001-02-01,2001-02-01,2001-02-01,2001-02-01,2001-02-01,2001-02-01,2001-02-01,2001-02-01,2001-02-01,2001-02-01,2001-02-01 4 | 2001-03-01,2001-03-01,2001-03-01,2001-03-01,2001-03-01,2001-03-01,2001-03-01,2001-03-01,2001-03-01,2001-03-01,2001-03-01 5 | 2001-04-01,2001-04-01,2001-04-01,2001-04-01,2001-04-01,2001-04-01,2001-04-01,2001-04-01,2001-04-01,2001-04-01,2001-04-01 6 | 2001-05-01,2001-05-01,2001-05-01,2001-05-01,2001-05-01,2001-05-01,2001-05-01,2001-05-01,2001-05-01,2001-05-01,2001-05-01 7 | 2001-06-01,2001-06-01,2001-06-01,2001-06-01,2001-06-01,2001-06-01,2001-06-01,2001-06-01,2001-06-01,2001-06-01,2001-06-01 8 | -------------------------------------------------------------------------------- /tests/testthat/testdata/specialchars_excel.csv: -------------------------------------------------------------------------------- 1 | date,freetext,numeric_good,numeric_mixed 2 | 01/01/2001,plain string,5,5 3 | 02/01/2001,"hello, there",, 4 | 03/01/2001,the show's on,7,7.544 5 | 04/01/2001,"""so,"" he said",7.256,8g 6 | 05/01/2001,"lot's of ""quotes""",0.00001,"""9""" 7 | 06/01/2001,"just "" here",0,'10' 8 | 07/01/2001,"line 9 | return",11000,"11,000" 10 | 08/01/2001,end,-78,-78 11 | -------------------------------------------------------------------------------- /vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.R 3 | -------------------------------------------------------------------------------- /vignettes/articles/example_report.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Example report" 3 | --- 4 | 5 | ```{r example-options, include = FALSE} 6 | knitr::opts_chunk$set( 7 | collapse = TRUE, 8 | comment = "#>", 9 | echo = FALSE 10 | ) 11 | ``` 12 | 13 | ```{r example-setup} 14 | library(daiquiri) 15 | ``` 16 | 17 | ```{r generate-report} 18 | raw_data <- read_data( 19 | system.file("extdata", "example_prescriptions.csv", 20 | package = "daiquiri"), 21 | show_progress = FALSE 22 | ) 23 | 24 | fts <- field_types( 25 | PrescriptionID = ft_uniqueidentifier(), 26 | PrescriptionDate = ft_timepoint(), 27 | AdmissionDate = ft_datetime(includes_time = FALSE, na = "1800-01-01"), 28 | Drug = ft_freetext(), 29 | Dose = ft_numeric(), 30 | DoseUnit = ft_categorical(), 31 | PatientID = ft_ignore(), 32 | Location = ft_categorical(aggregate_by_each_category = TRUE) 33 | ) 34 | 35 | daiqobj <- daiquiri_report(raw_data, 36 | field_types = fts, 37 | dataset_description = "Example prescription data", 38 | save_directory = "../../docs/articles", 39 | save_filename = "example_prescriptions", 40 | show_progress = FALSE) 41 | 42 | ``` 43 | 44 | ```{css hide-header-logo, echo=FALSE} 45 | img.logo { 46 | display: none; 47 | } 48 | ``` 49 | 50 | ```{=html} 51 | 53 | ``` 54 | 55 | -------------------------------------------------------------------------------- /vignettes/articles/example_report_stratified.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Example stratified report" 3 | --- 4 | 5 | ```{r example-options, include = FALSE} 6 | knitr::opts_chunk$set( 7 | collapse = TRUE, 8 | comment = "#>", 9 | echo = FALSE 10 | ) 11 | ``` 12 | 13 | ```{r example-setup} 14 | library(daiquiri) 15 | ``` 16 | 17 | ```{r generate-report} 18 | raw_data <- read_data( 19 | system.file("extdata", "example_prescriptions.csv", 20 | package = "daiquiri"), 21 | show_progress = FALSE 22 | ) 23 | 24 | fts <- field_types( 25 | PrescriptionID = ft_uniqueidentifier(), 26 | PrescriptionDate = ft_timepoint(), 27 | AdmissionDate = ft_datetime(includes_time = FALSE, na = "1800-01-01"), 28 | Drug = ft_freetext(), 29 | Dose = ft_numeric(), 30 | DoseUnit = ft_categorical(), 31 | PatientID = ft_ignore(), 32 | Location = ft_strata() 33 | ) 34 | 35 | daiqobj <- daiquiri_report( 36 | raw_data, 37 | field_types = fts, 38 | dataset_description = "Example prescription data", 39 | save_directory = "../../docs/articles", 40 | save_filename = "example_prescriptions_stratified", 41 | show_progress = FALSE 42 | ) 43 | 44 | ``` 45 | 46 | ```{css hide-header-logo, echo=FALSE} 47 | img.logo { 48 | display: none; 49 | } 50 | ``` 51 | 52 | ```{=html} 53 | 55 | ``` 56 | 57 | --------------------------------------------------------------------------------