├── .Rbuildignore ├── .github ├── .gitignore └── workflows │ ├── R-CMD-check.yaml │ ├── pr-commands.yaml │ └── test-coverage.yaml ├── .gitignore ├── .travis.yml ├── CODE_OF_CONDUCT.md ├── DESCRIPTION ├── LICENSE ├── LICENSE.md ├── NAMESPACE ├── R ├── complete-periods.R ├── create-metrics.R ├── cross-dimensions.R ├── cross-join.R ├── cross-periods.R ├── dimensions.R ├── examples.R ├── generate-date-periods.R ├── globals.R ├── tbl-metric-group.R ├── tbl-metric.R ├── use-metrics-scaffold.R └── utils.R ├── README.md ├── codecov.yml ├── data-raw └── nycflight_metrics.R ├── data └── flights_nyc_avg_arr_delay.rda ├── docker-compose.yml ├── inst ├── WORDLIST └── extdata │ └── metrics_flights_nyc.Rmd ├── man ├── check_metric.Rd ├── complete_periods.Rd ├── condense_metric.Rd ├── create_metrics.Rd ├── cross_by_dimensions.Rd ├── cross_by_periods.Rd ├── cross_join.Rd ├── discard_constant_dimensions.Rd ├── discard_dimensions.Rd ├── flights_nyc_avg_arr_delay.Rd ├── gather_metrics.Rd ├── generate_date_periods.Rd ├── keep_dimensions.Rd ├── metric-group-s3.Rd ├── metric-methods.Rd ├── metric-s3.Rd ├── metric_group.Rd ├── print.tbl_metric_group.Rd ├── reclass.Rd ├── reexports.Rd ├── remove_attribute_all.Rd ├── use_metrics_scaffold.Rd ├── var_names_dimensions.Rd └── var_names_not_dimensions.Rd ├── tests ├── spelling.R ├── testthat.R └── testthat │ ├── helper-src.R │ ├── test-create-metrics.R │ ├── test-cross-dimensions.R │ ├── test-cross-periods.R │ ├── test-discard-dimensions.R │ ├── test-keep-dimensions.R │ ├── test-metric-group.R │ └── test-use-metrics-scaffold.R └── tidymetrics.Rproj /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^CODE_OF_CONDUCT\.md$ 2 | ^codecov\.yml$ 3 | ^\.travis\.yml$ 4 | ^data-raw$ 5 | ^.*\.Rproj$ 6 | ^\.Rproj\.user$ 7 | LICENSE.md 8 | docker-compose.yml 9 | ^\.github$ 10 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | on: [push, pull_request] 2 | 3 | name: R-CMD-check 4 | 5 | jobs: 6 | R-CMD-check: 7 | runs-on: macOS-latest 8 | steps: 9 | - uses: actions/checkout@v2 10 | - uses: r-lib/actions/setup-r@master 11 | - name: Install dependencies 12 | run: | 13 | install.packages(c("remotes", "rcmdcheck")) 14 | remotes::install_deps(dependencies = TRUE) 15 | shell: Rscript {0} 16 | - name: Check 17 | run: rcmdcheck::rcmdcheck(args = "--no-manual", error_on = "error") 18 | shell: Rscript {0} 19 | -------------------------------------------------------------------------------- /.github/workflows/pr-commands.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | issue_comment: 3 | types: [created] 4 | name: Commands 5 | jobs: 6 | document: 7 | if: startsWith(github.event.comment.body, '/document') 8 | name: document 9 | runs-on: macOS-latest 10 | steps: 11 | - uses: actions/checkout@v2 12 | - uses: r-lib/actions/pr-fetch@master 13 | with: 14 | repo-token: ${{ secrets.GITHUB_TOKEN }} 15 | - uses: r-lib/actions/setup-r@master 16 | - name: Install dependencies 17 | run: Rscript -e 'install.packages(c("remotes", "roxygen2"))' -e 'remotes::install_deps(dependencies = TRUE)' 18 | - name: Document 19 | run: Rscript -e 'roxygen2::roxygenise()' 20 | - name: commit 21 | run: | 22 | git add man/\* NAMESPACE 23 | git commit -m 'Document' 24 | - uses: r-lib/actions/pr-push@master 25 | with: 26 | repo-token: ${{ secrets.GITHUB_TOKEN }} 27 | style: 28 | if: startsWith(github.event.comment.body, '/style') 29 | name: style 30 | runs-on: macOS-latest 31 | steps: 32 | - uses: actions/checkout@v2 33 | - uses: r-lib/actions/pr-fetch@master 34 | with: 35 | repo-token: ${{ secrets.GITHUB_TOKEN }} 36 | - uses: r-lib/actions/setup-r@master 37 | - name: Install dependencies 38 | run: Rscript -e 'install.packages("styler")' 39 | - name: Style 40 | run: Rscript -e 'styler::style_pkg()' 41 | - name: commit 42 | run: | 43 | git add \*.R 44 | git commit -m 'Style' 45 | - uses: r-lib/actions/pr-push@master 46 | with: 47 | repo-token: ${{ secrets.GITHUB_TOKEN }} 48 | -------------------------------------------------------------------------------- /.github/workflows/test-coverage.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - master 5 | pull_request: 6 | branches: 7 | - master 8 | 9 | name: test-coverage 10 | 11 | jobs: 12 | test-coverage: 13 | runs-on: macOS-latest 14 | steps: 15 | - uses: actions/checkout@v2 16 | 17 | - uses: r-lib/actions/setup-r@master 18 | 19 | - uses: r-lib/actions/setup-pandoc@master 20 | 21 | - name: Query dependencies 22 | run: | 23 | install.packages('remotes') 24 | saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) 25 | shell: Rscript {0} 26 | 27 | - name: Cache R packages 28 | uses: actions/cache@v1 29 | with: 30 | path: ${{ env.R_LIBS_USER }} 31 | key: macOS-r-4.0-1-${{ hashFiles('.github/depends.Rds') }} 32 | restore-keys: macOS-r-4.0-1- 33 | 34 | - name: Install dependencies 35 | run: | 36 | install.packages(c("remotes")) 37 | remotes::install_deps(dependencies = TRUE) 38 | remotes::install_cran("covr") 39 | shell: Rscript {0} 40 | 41 | - name: Test coverage 42 | run: covr::codecov() 43 | shell: Rscript {0} 44 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | 6 | .DS_Store 7 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # R for travis: see documentation at https://docs.travis-ci.com/user/languages/r 2 | 3 | language: R 4 | sudo: false 5 | cache: packages 6 | services: 7 | - postgresql 8 | after_success: 9 | - Rscript -e 'covr::codecov()' 10 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Code of Conduct 2 | 3 | As contributors and maintainers of this project, we pledge to respect all people who 4 | contribute through reporting issues, posting feature requests, updating documentation, 5 | submitting pull requests or patches, and other activities. 6 | 7 | We are committed to making participation in this project a harassment-free experience for 8 | everyone, regardless of level of experience, gender, gender identity and expression, 9 | sexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion. 10 | 11 | Examples of unacceptable behavior by participants include the use of sexual language or 12 | imagery, derogatory comments or personal attacks, trolling, public or private harassment, 13 | insults, or other unprofessional conduct. 14 | 15 | Project maintainers have the right and responsibility to remove, edit, or reject comments, 16 | commits, code, wiki edits, issues, and other contributions that are not aligned to this 17 | Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed 18 | from the project team. 19 | 20 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by 21 | opening an issue or contacting one or more of the project maintainers. 22 | 23 | This Code of Conduct is adapted from the Contributor Covenant 24 | (https://www.contributor-covenant.org), version 1.0.0, available at 25 | https://contributor-covenant.org/version/1/0/0/. 26 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: tidymetrics 2 | Type: Package 3 | Title: A Tidy Approach to Dimensional Modeling 4 | Version: 0.0.1 5 | Authors@R: c(person("Ramnath", "Vaidyanathan", email = "ramnath.vaidya@gmail.com", role = c("cre", "aut")), 6 | person("David", "Robinson", email = "admiral.david@gmail.com", role = "aut")) 7 | Maintainer: Ramnath Vaidyanathan 8 | Description: Offers tools for aggregating data while segmenting by time, periods, 9 | and dimensions, while allowing documentation and metadata to be associated with it. 10 | License: MIT + file LICENSE 11 | Encoding: UTF-8 12 | LazyData: true 13 | Depends: R (>= 3.4.0) 14 | Imports: 15 | dplyr, 16 | stringr, 17 | tidyr, 18 | pillar, 19 | lubridate, 20 | magrittr, 21 | rlang, 22 | yaml, 23 | tibble, 24 | rmarkdown, 25 | assertthat, 26 | forcats, 27 | glue, 28 | purrr, 29 | rstudioapi 30 | RoxygenNote: 7.1.1 31 | Suggests: 32 | ggplot2, 33 | testthat, 34 | nycflights13, 35 | covr, 36 | dbplyr, 37 | RPostgreSQL, 38 | DBI, 39 | spelling 40 | Roxygen: list(markdown = TRUE) 41 | Language: en-US 42 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2019 2 | COPYRIGHT HOLDER: Ramnath Vaidyanathan and David Robinson 3 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2019 Ramnath Vaidyanathan and David Robinson 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(anti_join,tbl_metric) 4 | S3method(anti_join,tbl_metric_group) 5 | S3method(arrange,tbl_metric) 6 | S3method(arrange,tbl_metric_group) 7 | S3method(as_tibble,tbl_metric) 8 | S3method(as_tibble,tbl_metric_group) 9 | S3method(count,tbl_metric_group) 10 | S3method(cross_by_periods,tbl_df) 11 | S3method(cross_by_periods,tbl_lazy) 12 | S3method(cross_join,data.frame) 13 | S3method(cross_join,tbl_sql) 14 | S3method(distinct,tbl_metric) 15 | S3method(filter,tbl_metric) 16 | S3method(filter,tbl_metric_group) 17 | S3method(group_by,tbl_metric) 18 | S3method(group_by,tbl_metric_group) 19 | S3method(inner_join,tbl_metric) 20 | S3method(inner_join,tbl_metric_group) 21 | S3method(left_join,tbl_metric) 22 | S3method(left_join,tbl_metric_group) 23 | S3method(mutate,tbl_metric) 24 | S3method(mutate,tbl_metric_group) 25 | S3method(print,tbl_metric) 26 | S3method(print,tbl_metric_group) 27 | S3method(reclass,tbl_metric) 28 | S3method(reclass,tbl_metric_group) 29 | S3method(rename,tbl_metric) 30 | S3method(rename,tbl_metric_group) 31 | S3method(right_join,tbl_metric) 32 | S3method(right_join,tbl_metric_group) 33 | S3method(select,tbl_metric) 34 | S3method(select,tbl_metric_group) 35 | S3method(semi_join,tbl_metric) 36 | S3method(semi_join,tbl_metric_group) 37 | S3method(summarise,tbl_metric) 38 | S3method(summarise,tbl_metric_group) 39 | S3method(transmute,tbl_metric) 40 | S3method(transmute,tbl_metric_group) 41 | export(check_metric) 42 | export(complete_periods) 43 | export(condense_metric) 44 | export(create_metric_group) 45 | export(create_metrics) 46 | export(cross_by_dimensions) 47 | export(cross_by_periods) 48 | export(cross_by_periods_cumulative) 49 | export(cross_join) 50 | export(discard_constant_dimensions) 51 | export(discard_dimensions) 52 | export(filter) 53 | export(keep_dimensions) 54 | export(reclass) 55 | export(remove_attribute_all) 56 | export(use_metrics_scaffold) 57 | import(dplyr) 58 | importFrom(dplyr,anti_join) 59 | importFrom(dplyr,arrange) 60 | importFrom(dplyr,as_data_frame) 61 | importFrom(dplyr,as_tibble) 62 | importFrom(dplyr,filter) 63 | importFrom(dplyr,group_by) 64 | importFrom(dplyr,inner_join) 65 | importFrom(dplyr,left_join) 66 | importFrom(dplyr,mutate) 67 | importFrom(dplyr,rename) 68 | importFrom(dplyr,right_join) 69 | importFrom(dplyr,select) 70 | importFrom(dplyr,semi_join) 71 | importFrom(dplyr,summarise) 72 | importFrom(dplyr,transmute) 73 | importFrom(purrr,keep) 74 | importFrom(purrr,map) 75 | importFrom(rlang,":=") 76 | importFrom(tidyr,gather) 77 | -------------------------------------------------------------------------------- /R/complete-periods.R: -------------------------------------------------------------------------------- 1 | #' Given a cumulative metric, add dates for the end of each period 2 | #' 3 | #' Some metrics like ARR are measured cumulatively, so in order to create a bar plot 4 | #' per month or quarter we need to pick the last value from each period. For example, 5 | #' the ARR for January 2019 would be measured as of 2019-01-31. Analogously 6 | #' to the tidyr function `complete()`, this adds rows representing each period 7 | #' present in the data. 8 | #' 9 | #' @param metric A metric table in wide format, containing "date" and "period" columns as 10 | #' well as one or more dimensions and metric values. 11 | #' @param periods Vector of periods to add: one or more of "week", "month", "quarter" or "year". 12 | #' @param add_incomplete If TRUE a value of the running incomplete period will be added. 13 | #' @param week_start when unit is \code{weeks}, specifies the reference day. 7 14 | #' represents Sunday and 1 represents Monday. Note that we use a default of 1 15 | #' instead of 7, in order to be consistent with SQL. 16 | #' 17 | #' @examples 18 | #' 19 | #' library(dplyr) 20 | #' 21 | #' flights <- nycflights13::flights %>% 22 | #' mutate(date = as.Date(ISOdate(year, month, day))) 23 | #' 24 | #' # Include number and cumulative number of flights 25 | #' cumulative_summary <- flights %>% 26 | #' cross_by_periods(periods = "day") %>% 27 | #' summarize(nb_flights = n()) %>% 28 | #' arrange(date) %>% 29 | #' mutate(cumulative_flights = cumsum(nb_flights)) %>% 30 | #' ungroup() 31 | #' 32 | #' # Have periods for week and month as well, representing the end of that period 33 | #' library(ggplot2) 34 | #' 35 | #' cumulative_day_week_month <- cumulative_summary %>% 36 | #' complete_periods(periods = c("week", "month")) 37 | #' 38 | #' cumulative_day_week_month %>% 39 | #' ggplot(aes(date, cumulative_flights, color = period)) + 40 | #' geom_point() 41 | #' @export 42 | complete_periods <- function(metric, 43 | periods = c("month"), 44 | add_incomplete = FALSE, 45 | week_start = getOption('lubridate.week.start', 1)) { 46 | # Check the arguments 47 | if (!"period" %in% colnames(metric)) { 48 | stop("Metric must have a period column (is this a metric data frame)?") 49 | } 50 | if (!"day" %in% metric$period) { 51 | stop("Metric must have a day period to be completed") 52 | } 53 | if (any(!periods %in% c("week", "month", "quarter", "year"))) { 54 | stop( 55 | "Only periods that can be added by complete_periods_end are ", 56 | "week, month, quarter and year" 57 | ) 58 | } 59 | 60 | # only add periods that aren't already in there 61 | periods <- setdiff(periods, unique(metric$period)) 62 | 63 | # last date we have values for 64 | last_date <- max(metric$date) 65 | 66 | new_periods <- metric %>% 67 | dplyr::filter(period == "day") %>% 68 | dplyr::select(-period) %>% 69 | tidyr::crossing(period = periods) %>% 70 | dplyr::group_by(period) %>% 71 | dplyr::filter(date == as.Date(lubridate::ceiling_date(date, period[1], week_start = week_start)) - 1 | (add_incomplete & date == last_date)) %>% 72 | dplyr::mutate(date = as.Date(lubridate::floor_date(date, period[1], week_start = week_start))) %>% 73 | dplyr::ungroup() 74 | 75 | bind_rows(metric, new_periods) 76 | } 77 | -------------------------------------------------------------------------------- /R/create-metrics.R: -------------------------------------------------------------------------------- 1 | #' Given a metric tbl and an Rmd file, turn into a named list of metric objects 2 | #' 3 | #' @param ... One or more metric tables in wide metric format: one column for each metric. 4 | #' @param rmd_file The Rmd file that generated the compact metrics, which has 5 | #' documentation for the metrics and dimensions stored in the YAML front matter. 6 | #' If no Rmd file is given, it uses the currently running one. 7 | #' 8 | #' @return A named list of metric objects. Each of these has both the data and the metadata 9 | #' (documentation, dimensions, owner, etc) to make an interactive visualization. 10 | #' 11 | #' @examples 12 | #' 13 | #' # TODO 14 | #' @export 15 | create_metrics <- function(..., rmd_file = NULL) { 16 | metrics <- list(...) 17 | 18 | if (length(metrics) == 0) { 19 | stop("create_metrics takes at least one argument") 20 | } 21 | 22 | if (length(metrics) > 1) { 23 | metrics_each <- purrr::map(metrics, create_metrics, rmd_file = rmd_file) 24 | all_metrics <- do.call(c, metrics_each) 25 | 26 | # A structure check ensuring uniqueness across all objects 27 | assertthat::assert_that(length(unique(names(all_metrics))) == length(all_metrics), 28 | msg = "Metrics don't have unique names" 29 | ) 30 | 31 | return(all_metrics) 32 | } 33 | # Now there's just one metric dataset, so construct it 34 | data <- metrics[[1]] 35 | 36 | metric_docs <- get_metric_docs(rmd_file) 37 | 38 | # an Rmd always has same category/subcategory 39 | category <- metric_docs[[1]]$category 40 | subcategory <- metric_docs[[1]]$subcategory 41 | 42 | data_nested <- data %>% 43 | gather_metrics() %>% 44 | filter(!is.na(value)) %>% 45 | tidyr::nest_legacy(-metric) %>% 46 | dplyr::mutate(metric_full = paste(category, subcategory, metric, sep = "_")) 47 | 48 | missing_metrics <- setdiff(data_nested$metric_full, names(metric_docs)) 49 | if (length(missing_metrics) > 0) { 50 | stop("Couldn't find documentation for metric(s): ", paste(missing_metrics, collapse = ", ")) 51 | } 52 | 53 | metrics_combined <- data_nested %>% 54 | mutate( 55 | documentation = metric_docs[metric_full], 56 | combined = purrr::map2(data, documentation, combine_metric) 57 | ) 58 | 59 | ret <- metrics_combined$combined %>% 60 | purrr::map(~ { 61 | attr(.x, "metadata")$updated_at <- Sys.time() 62 | return(.x) 63 | }) 64 | names(ret) <- metrics_combined$metric_full 65 | 66 | # sanity and structure checks 67 | context_name <- paste(category, subcategory, sep = "_") 68 | 69 | assertthat::assert_that(length(ret) > 0, 70 | msg = "No metrics found ({ context_name })" 71 | ) 72 | assertthat::assert_that(length(unique(names(ret))) == length(ret), 73 | msg = "Metrics don't have unique names ({ context_name })" 74 | ) 75 | 76 | for (metric in ret) { 77 | check_metric(metric) 78 | } 79 | 80 | purrr::map(ret, prune_dimensions) 81 | } 82 | 83 | get_metric_docs <- function(rmd_file = NULL) { 84 | if (!is.null(rmd_file)) { 85 | metric_docs <- parse_metrics_header(rmarkdown::yaml_front_matter(rmd_file)) 86 | } else if (length(rmarkdown::metadata) > 0) { 87 | metric_docs <- parse_metrics_header(rmarkdown::metadata) 88 | } else { 89 | # If running in RStudio, get the current document 90 | rmd_file <- rstudioapi::getActiveDocumentContext()$path 91 | 92 | if (!stringr::str_detect(rmd_file, "\\.Rmd$")) { 93 | stop( 94 | "create_metrics must either be given the path to an Rmd file, run in a rendered Rmd, ", 95 | "or be run in RStudio as part of the Rmd (that is, by pressing CMD-RETURN with your ", 96 | "cursor in the Rmd, not e.g. copy-pasted into the R terminal)." 97 | ) 98 | } 99 | 100 | metric_docs <- parse_metrics_header(rmarkdown::yaml_front_matter(rmd_file)) 101 | } 102 | return(metric_docs) 103 | } 104 | 105 | 106 | ## Internal utility functions for create_metrics 107 | 108 | parse_metrics_header <- function(y) { 109 | name_components <- stringr::str_split(y$name, "_")[[1]] 110 | 111 | shared <- c( 112 | list( 113 | category = name_components[2], 114 | subcategory = name_components[3] 115 | ), 116 | y[c("owner", "dimensions")] 117 | ) 118 | 119 | ret <- purrr::map(names(y$metrics), ~ c( 120 | list( 121 | metric = ., 122 | metric_full = paste(name_components[2], 123 | name_components[3], 124 | ., 125 | sep = "_" 126 | ) 127 | ), 128 | y$metrics[[.]], 129 | shared 130 | )) 131 | names(ret) <- purrr::map(ret, "metric_full") 132 | ret 133 | } 134 | 135 | combine_metric <- function(data, metadata) { 136 | # reorder dimensions 137 | for (dimension_name in names(metadata$dimensions)) { 138 | levs <- metadata$dimensions[[dimension_name]]$levels 139 | 140 | if (is.list(levs)) { 141 | # named list of colors. Grab the name within each of the key-value pairs 142 | levs <- purrr::map_chr(levs, names) 143 | } 144 | 145 | if (!is.null(levs) && dimension_name %in% colnames(data)) { 146 | if (any(duplicated(levs))) { 147 | stop(glue::glue("Duplicated levels in { dimension_name } in { metadata$metric }")) 148 | } 149 | 150 | data[[dimension_name]] <- forcats::fct_relevel(data[[dimension_name]], c("All", levs)) 151 | } 152 | } 153 | 154 | class(data) <- c("tbl_metric", class(data)) 155 | attr(data, "metadata") <- metadata 156 | 157 | # condense it if requested 158 | if (!is.null(metadata$store_dimensions)) { 159 | data <- condense_metric(data, metadata$store_dimensions) 160 | } 161 | 162 | data 163 | } 164 | -------------------------------------------------------------------------------- /R/cross-dimensions.R: -------------------------------------------------------------------------------- 1 | #' Cross by dimensions 2 | #' 3 | #' This function stacks an extra copy of the table for each dimension column 4 | #' specified as an argument, replaces the value of the column with the word 5 | #' "All", and finally groups by all the columns. It acts as an extended 6 | #' `group_by` that allows complete summaries across each individual 7 | #' dimension and possible combinations. It works both in-database and in-memory. 8 | #' 9 | #' @param tbl A table 10 | #' @param ... A selection of columns 11 | #' @param add Whether to leave the existing groups as well instead of replacing 12 | #' them (by default, yes). 13 | #' @param max_dimensions The number of (non-All) dimensions that each row 14 | #' can have. This reduces the size of a metrics table, by limiting the number 15 | #' of dimensions that can be anything besides All at the same time. 16 | #' @param collect_fun A function to collect or materialize intermediate tables. 17 | #' This is useful when dealing with large tables in which case the resulting 18 | #' SQL queries can become very complex and expensive to execute. Materializing 19 | #' intermediate tables as temporary tables can improve the efficiency of 20 | #' the query. 21 | #' 22 | #' @importFrom rlang := 23 | #' 24 | #' @seealso [discard_dimensions()] 25 | #' 26 | #' @examples 27 | #' # Data Frame 28 | #' library(dplyr) 29 | #' 30 | #' mtcars %>% 31 | #' cross_by_dimensions(cyl, am) %>% 32 | #' summarize(avg_mpg = mean(mpg)) 33 | #' 34 | #' flights <- nycflights13::flights %>% 35 | #' mutate(date = as.Date(ISOdate(year, month, day))) 36 | #' 37 | #' # find flight delays by carrier, origin, and Overall 38 | #' flight_summary <- nycflights13::flights %>% 39 | #' cross_by_dimensions(carrier, origin) %>% 40 | #' summarize( 41 | #' nb_flights = n(), 42 | #' avg_arr_delay = mean(arr_delay, na.rm = TRUE) 43 | #' ) 44 | #' 45 | #' flight_summary 46 | #' 47 | #' flight_summary <- nycflights13::flights %>% 48 | #' cross_by_dimensions(carrier, origin, max_dimensions = 1) %>% 49 | #' summarize( 50 | #' nb_flights = n(), 51 | #' avg_arr_delay = mean(arr_delay, na.rm = TRUE) 52 | #' ) 53 | #' 54 | #' flight_summary 55 | #' 56 | #' # This works well when combined with discard_dimensions, which filters for 57 | #' # an All level and removes the column 58 | #' 59 | #' # Look just by carrier 60 | #' flight_summary %>% 61 | #' discard_dimensions(origin) 62 | #' 63 | #' flight_summary %>% 64 | #' discard_dimensions(carrier) 65 | #' @export 66 | cross_by_dimensions <- function(tbl, ..., add = TRUE, max_dimensions = NULL, 67 | collect_fun = NULL) { 68 | g_vars <- dplyr::group_vars(tbl) 69 | 70 | columns <- ensyms(...) 71 | 72 | # Set up all columns as characters (since they can be "All") 73 | tbl <- tbl %>% 74 | ungroup() %>% 75 | mutate_at(vars(!!!columns), as.character) 76 | 77 | # Separate cases if there's a max_dimensions argument 78 | if (!is.null(max_dimensions)) { 79 | tbl <- tbl %>% 80 | cross_by_dimensions_limited(columns, 81 | max_dimensions = max_dimensions, 82 | collect_fun = collect_fun 83 | ) 84 | } else { 85 | # Combine with k unions, instead of the 2 ^ n that cross_by_dimensions_limited would do 86 | for (column in columns) { 87 | tbl <- tbl %>% 88 | mutate(!!column := "All") %>% 89 | union_all(tbl) 90 | if (!is.null(collect_fun)) { 91 | tbl <- collect_fun(tbl) 92 | } 93 | } 94 | } 95 | 96 | # Regroup 97 | tbl %>% 98 | group_by_at(vars(g_vars)) %>% 99 | group_by(!!!columns, add = add) 100 | } 101 | 102 | cross_by_dimensions_limited <- function(tbl, column_symbols, max_dimensions, 103 | collect_fun = NULL) { 104 | columns <- purrr::map_chr(column_symbols, quo_name) 105 | 106 | # Get all the combinations of columns with up to n items turned to "All" 107 | num_not_all <- seq(length(columns) - max_dimensions, length(columns)) 108 | 109 | cols_list <- num_not_all %>% 110 | purrr::map(~ utils::combn(columns, .)) %>% 111 | purrr::map(~ lapply(1:ncol(.), function(i) .[, i])) %>% 112 | purrr::reduce(c) 113 | 114 | d <- cols_list %>% 115 | purrr::map(~ mutate_at(tbl, vars(.x), ~ ifelse(TRUE, 'All', NA))) 116 | if (!is.null(collect_fun)) { 117 | d <- d %>% 118 | purrr::map(collect_fun) 119 | } 120 | d %>% 121 | purrr::reduce(union_all) 122 | } 123 | -------------------------------------------------------------------------------- /R/cross-join.R: -------------------------------------------------------------------------------- 1 | #' Cross join two tables together, including all combinations of rows 2 | #' 3 | #' Locally, this is equivalent to tidyr::crossing. 4 | #' 5 | #' @param x,y tbls to join 6 | #' @param ... additional arguments to be passed on to 7 | #' [dplyr::full_join()] or [tidyr::crossing()] 8 | #' @export 9 | #' @examples 10 | #' d1 <- dplyr::tibble(x = 1:3) 11 | #' d2 <- dplyr::tibble(y = 1:2) 12 | #' cross_join(d1, d2) 13 | cross_join <- function(x, y, ...) { 14 | UseMethod("cross_join", x) 15 | } 16 | 17 | #' @export 18 | cross_join.tbl_sql <- function(x, y, ...) { 19 | dplyr::full_join(x, y, by = character()) 20 | } 21 | 22 | #' @export 23 | cross_join.data.frame <- function(x, y, ...) { 24 | tidyr::crossing(x, y, ...) 25 | } 26 | -------------------------------------------------------------------------------- /R/cross-periods.R: -------------------------------------------------------------------------------- 1 | #' Expand a table so that it can be aggregated by a period 2 | #' 3 | #' Cross by any set of calendar periods (like day or week), rolling windows, 4 | #' or recent intervals (like "4 Weeks", or "8 Weeks"). This means that each 5 | #' row in the input will appear potentially multiple times, each time associated 6 | #' with a different period and date. 7 | #' 8 | #' @param tbl A tbl, either local or remote. 9 | #' @param periods A vector of calendar periods. This supports "day", "week", "month", "quarter", 10 | #' and "year". 11 | #' @param windows A vector of windows, each representing a # of days 12 | #' @param intervals Whether a preselected set of intervals starting from today, such as 13 | #' "Last Week", "Last 2 Weeks", or "All Time" should be included. 14 | #' @param remote_date_periods For crossing remote tables, an existing remote table 15 | #' linking dates to their respective periods. By default, use a global accessor function. 16 | #' @param ... Extra arguments, not used 17 | #' 18 | #' @return A tbl (either local or remote, depending on the input), where TODO. It is grouped by 19 | #' any grouping columns that were in the input, as well as by the new `date` and 20 | #' `period` columns. 21 | #' 22 | #' @examples 23 | #' 24 | #' library(dplyr) 25 | #' 26 | #' flights <- nycflights13::flights %>% 27 | #' mutate(date = as.Date(ISOdate(year, month, day))) 28 | #' 29 | #' # find flight delays by week, month, and quarter 30 | #' flight_summary <- flights %>% 31 | #' cross_by_periods() %>% 32 | #' summarize( 33 | #' nb_flights = n(), 34 | #' avg_arr_delay = mean(arr_delay, na.rm = TRUE) 35 | #' ) 36 | #' 37 | #' library(ggplot2) 38 | #' 39 | #' ggplot(flight_summary, aes(date, avg_arr_delay, color = period)) + 40 | #' geom_line() 41 | #' @export 42 | cross_by_periods <- function(tbl, periods, windows, intervals, ...) { 43 | UseMethod("cross_by_periods") 44 | } 45 | 46 | #' @rdname cross_by_periods 47 | #' @export 48 | cross_by_periods.tbl_lazy <- function(tbl, 49 | periods = c("week", "month", "quarter"), 50 | windows = c(), 51 | intervals = FALSE, 52 | remote_date_periods = NULL, 53 | ...) { 54 | check_cross_by_tbl(tbl) 55 | gvars <- group_vars(tbl) 56 | tbl <- tbl %>% 57 | ungroup() 58 | # If user provides a vector of intervals, set intervals to TRUE 59 | # This is required for backward compatibility with the previous version. 60 | if (!is.logical(intervals) && length(intervals) > 0) { 61 | intervals <- TRUE 62 | } 63 | if (is.null(remote_date_periods)) { 64 | opt <- getOption("remote_date_periods") 65 | if (is.null(opt)) { 66 | stop("Can't find option remote_date_periods: have you initialized one for this database?") 67 | } 68 | 69 | remote_date_periods <- opt() 70 | } 71 | 72 | all_periods <- c(periods, paste0("rolling_", windows, "d")) 73 | 74 | remote_periods <- remote_date_periods %>% 75 | filter( 76 | (period %in% all_periods) | 77 | (intervals && (period %LIKE% "%All%" || period %LIKE% "%Last%")) 78 | ) 79 | 80 | ## TODO: check that the periods and dates match what's available in the table 81 | 82 | tbl %>% 83 | rename(date_original = date) %>% 84 | inner_join(remote_periods, by = "date_original") %>% 85 | clip_incomplete_rolling_periods() %>% 86 | group_by_at(c("period", "date", gvars)) 87 | } 88 | 89 | clip_incomplete_rolling_periods <- function(tbl) { 90 | # We need to remove incomplete rolling periods at both ends 91 | # since they could be misleading. 92 | date_range <- tbl %>% 93 | ungroup() %>% 94 | summarize( 95 | min = min(date_original, na.rm = TRUE), 96 | max = max(date_original, na.rm = TRUE) 97 | ) %>% 98 | collect() 99 | 100 | date_thresholds <- date_range$min + c(7, 28, 56) 101 | tbl %>% 102 | mutate(include = case_when( 103 | period == "rolling_7d" ~ date >= !!date_thresholds[1] & date <= !!date_range$max, 104 | period == "rolling_28d" ~ date >= !!date_thresholds[2] & date <= !!date_range$max, 105 | period == "rolling_56d" ~ date >= !!date_thresholds[3] & date <= !!date_range$max, 106 | TRUE ~ TRUE 107 | )) %>% 108 | filter(include) %>% 109 | select(-include) 110 | } 111 | 112 | #' @rdname cross_by_periods 113 | #' @export 114 | cross_by_periods.tbl_df <- function(tbl, 115 | periods = c("week", "month", "quarter"), 116 | windows = c(), 117 | intervals = FALSE, 118 | ...) { 119 | ## TODO: 120 | ## 1. Update the in-memory version of cross-by-periods to 121 | ## follow the same logic as the remote version (clipping, intervals) 122 | check_cross_by_tbl(tbl) 123 | 124 | date_periods <- generate_date_periods(min(tbl$date), 125 | max(tbl$date), 126 | periods = periods, 127 | windows = windows, 128 | intervals = intervals 129 | ) 130 | 131 | tbl %>% 132 | rename(date_original = date) %>% 133 | inner_join(date_periods, by = "date_original") %>% 134 | group_by(period, date, add = TRUE) 135 | } 136 | 137 | check_cross_by_tbl <- function(tbl) { 138 | if (!("date" %in% colnames(tbl))) { 139 | stop( 140 | "tbl must have a column named \"date\" to be used with cross_by_periods. ", 141 | "If you have a datetime column, you should cast it to a date first." 142 | ) 143 | } 144 | } 145 | 146 | 147 | #' @rdname cross_by_periods 148 | #' @export 149 | cross_by_periods_cumulative <- function(tbl, remote_date_periods = NULL) { 150 | gvars <- group_vars(tbl) 151 | tbl <- tbl %>% ungroup() 152 | date_range <- tbl %>% 153 | summarize( 154 | min = min(date, na.rm = TRUE), 155 | max = max(date, na.rm = TRUE) 156 | ) %>% 157 | collect() 158 | tbl %>% 159 | rename(date_original = date) %>% 160 | inner_join(remote_periods_cumulative(remote_date_periods), by = "date_original") %>% 161 | filter(date >= !!date_range$min, date <= !!date_range$max) %>% 162 | group_by_at(c("period", "date", gvars)) 163 | } 164 | 165 | # Create a remote table of cumulative periods 166 | remote_periods_cumulative <- function(remote_date_periods = NULL) { 167 | if (is.null(remote_date_periods)) { 168 | opt <- getOption("remote_date_periods") 169 | if (is.null(opt)) { 170 | stop("Can't find option remote_date_periods: have you initialized one for this database?") 171 | } 172 | 173 | remote_date_periods <- opt() 174 | } 175 | cumulative_periods <- remote_date_periods %>% 176 | filter(period == "day") %>% 177 | select(period, date_original) 178 | cumulative_periods %>% 179 | cross_join( 180 | cumulative_periods %>% 181 | dplyr::transmute(date = date_original) 182 | ) %>% 183 | filter(date_original <= date) 184 | } 185 | -------------------------------------------------------------------------------- /R/dimensions.R: -------------------------------------------------------------------------------- 1 | #' Discard dimensions 2 | #' 3 | #' This function discards specified dimensions summary by filtering only for the 4 | #' attribute value "All". If no dimensions are specified, it discards all of them. 5 | #' If the table is grouped, this ungroups it. 6 | #' 7 | #' @param tbl A metric tbl in wide format, with one or more dimensions. 8 | #' @param ... Dimensions to discard. 9 | #' @param quietly If FALSE (default), display a message about what columns are 10 | #' being discarded. 11 | #' 12 | #' @seealso [keep_dimensions()] 13 | #' 14 | #' @export 15 | #' @examples 16 | #' library(dplyr) 17 | #' 18 | #' mtcars_by_cyl_gear <- mtcars %>% 19 | #' cross_by_dimensions(cyl, gear) %>% 20 | #' summarize(avg_mpg = mean(mpg)) 21 | #' 22 | #' # Discard all dimensions 23 | #' mtcars_by_cyl_gear %>% 24 | #' discard_dimensions() 25 | #' 26 | #' # Remove dimension cyl 27 | #' mtcars_by_cyl_gear %>% 28 | #' discard_dimensions(cyl) 29 | #' 30 | #' # Remove all dimensions except `cyl` 31 | #' mtcars_by_cyl_gear %>% 32 | #' discard_dimensions(-cyl) 33 | #' 34 | #' mtcars_by_cyl_gear %>% 35 | #' discard_dimensions(-cyl, -gear) 36 | #' 37 | #' mtcars_by_cyl_gear %>% 38 | #' discard_dimensions(-one_of("cyl", "gear")) 39 | discard_dimensions <- function(tbl, ..., quietly = FALSE) { 40 | cols <- if (rlang::dots_n(...) == 0) { 41 | x <- var_names_dimensions(tbl) 42 | if (!quietly) { 43 | message("Discarding all dimensions: ", paste(x, collapse = ", ")) 44 | } 45 | vars(x) 46 | } else { 47 | vars_not_dimensions <- var_names_not_dimensions(tbl) 48 | vars(..., -!!vars_not_dimensions) 49 | } 50 | 51 | cols_2 <- tbl %>% 52 | dplyr::ungroup() %>% 53 | dplyr::select(!!!cols) %>% 54 | colnames() 55 | 56 | if (length(cols_2) > 0) { 57 | tbl_1 <- tbl %>% 58 | dplyr::ungroup() %>% 59 | dplyr::filter_at(cols, all_vars((. == "All"))) 60 | 61 | cols_to_remove <- tbl_1 %>% 62 | # HACK: dates cause errors if not converted to character 63 | dplyr::mutate_all(as.character) %>% 64 | dplyr::select_if(~ all(. == "All")) %>% 65 | colnames() 66 | 67 | tbl_1 %>% 68 | dplyr::select_at(vars(-one_of(cols_to_remove))) 69 | } else { 70 | if (!quietly) { 71 | message("No dimensions left to discard") 72 | } 73 | return(tbl) 74 | } 75 | } 76 | 77 | 78 | #' Remove attribute "All" 79 | #' 80 | #' This function removes the aggregate segment "All" for specified dimensions. 81 | #' If no dimensions are specified, it removes the segment "All" from all 82 | #' dimensions 83 | #' 84 | #' @param tbl A metric tbl in wide format, with one or more dimensions. 85 | #' @param ... Dimensions from which "All" should be removed, as bare names 86 | #' or select helpers like `contains()`. 87 | #' 88 | #' @export 89 | #' @examples 90 | #' library(dplyr) 91 | #' 92 | #' mtcars_by_cyl_gear <- mtcars %>% 93 | #' cross_by_dimensions(cyl, gear, vs) %>% 94 | #' summarize(avg_mpg = mean(mpg)) 95 | #' 96 | #' mtcars_by_cyl_gear %>% 97 | #' remove_attribute_all() 98 | #' 99 | #' mtcars_by_cyl_gear %>% 100 | #' remove_attribute_all(cyl) 101 | #' 102 | #' mtcars_by_cyl_gear %>% 103 | #' remove_attribute_all(-cyl, -gear) 104 | remove_attribute_all <- function(tbl, ...) { 105 | cols <- if (rlang::dots_n(...) == 0) { 106 | vars(var_names_dimensions(tbl)) 107 | } else { 108 | vars_not_dimensions <- var_names_not_dimensions(tbl) 109 | vars(..., -!!vars_not_dimensions) 110 | } 111 | tbl %>% 112 | filter_at(cols, all_vars((. != "All"))) 113 | } 114 | 115 | #' Keep dimensions 116 | #' 117 | #' This function keeps specified dimensions from a wide metric tbl and discards the rest. 118 | #' 119 | #' @param tbl A metric tbl in wide format, with one or more dimensions. 120 | #' @param ... Dimensions to keep, as bare names 121 | #' or select helpers like `contains()`. 122 | #' @param keep_attribute_all Whether to remove the "All" level from the dimensions 123 | #' @param quietly If FALSE (default), display a message about what columns are 124 | #' being discarded. 125 | #' 126 | #' @examples 127 | #' library(dplyr) 128 | #' 129 | #' mtcars_by_cyl_gear <- mtcars %>% 130 | #' cross_by_dimensions(cyl, gear) %>% 131 | #' summarize(avg_mpg = mean(mpg)) 132 | #' 133 | #' mtcars_by_cyl_gear %>% 134 | #' keep_dimensions() 135 | #' 136 | #' mtcars_by_cyl_gear %>% 137 | #' keep_dimensions(cyl) 138 | #' 139 | #' mtcars_by_cyl_gear %>% 140 | #' keep_dimensions(-cyl) 141 | #' 142 | #' mtcars_by_cyl_gear %>% 143 | #' keep_dimensions(cyl, keep_attribute_all = TRUE) 144 | #' @seealso [discard_dimensions()] 145 | #' 146 | #' @export 147 | keep_dimensions <- function(tbl, ..., keep_attribute_all = FALSE, 148 | quietly = FALSE) { 149 | to_keep <- tbl %>% 150 | ungroup() %>% 151 | select(...) %>% 152 | colnames() 153 | if (length(to_keep) == 0) { 154 | if (!quietly) { 155 | message("Keeping all dimensions") 156 | } 157 | if (!keep_attribute_all) { 158 | if (!quietly) { 159 | message("Removing the attribute 'All' from dimensions") 160 | } 161 | tbl %>% 162 | remove_attribute_all() 163 | } else { 164 | tbl 165 | } 166 | } else { 167 | tbl_1 <- tbl %>% 168 | discard_dimensions(-one_of(to_keep)) 169 | if (!keep_attribute_all) { 170 | if (!quietly) { 171 | message("Removing the attribute 'All' from dimensions") 172 | } 173 | tbl_1 %>% 174 | remove_attribute_all() 175 | } else { 176 | tbl_1 177 | } 178 | } 179 | } 180 | 181 | #' Get names of columns that are dimensions 182 | #' 183 | #' Any character of factor column not named date, value, period, or metric are 184 | #' considered dimensions, as well as any columns ending in _id 185 | #' 186 | #' @param tbl A tbl_metric 187 | var_names_dimensions <- function(tbl) { 188 | set1 <- tbl %>% 189 | ungroup() %>% 190 | select_if(~ is.character(.x) || is.factor(.x)) %>% 191 | colnames() %>% 192 | setdiff(c("date", "value", "period", "metric")) 193 | 194 | set2 <- stringr::str_subset(colnames(tbl), "_id$") 195 | 196 | union(set1, set2) 197 | } 198 | 199 | #' Get names of columns that are NOT dimensions 200 | #' 201 | #' @param tbl A tbl_metric 202 | var_names_not_dimensions <- function(tbl) { 203 | setdiff(colnames(tbl), var_names_dimensions(tbl)) 204 | } 205 | 206 | 207 | #' Remove dimensions with a constant level (single value) 208 | #' 209 | #' Use `constant_constant_dimensions` instead of `select` so the 210 | #' removed dimension value is added to the metadata attribute. 211 | #' 212 | #' @export 213 | #' @param tbl A metric tbl in wide format, with one or more dimensions. 214 | #' @param quietly If FALSE (default), display a message about what columns are 215 | #' being discarded. 216 | #' @importFrom purrr map keep 217 | #' @examples 218 | #' 219 | #' library(dplyr) 220 | #' 221 | #' flights_nyc_avg_arr_delay %>% 222 | #' filter(origin == "JFK") %>% 223 | #' discard_constant_dimensions() 224 | discard_constant_dimensions <- function(tbl, quietly = FALSE) { 225 | dims <- var_names_dimensions(tbl) 226 | dims_to_remove <- tbl[dims] %>% 227 | purrr::map(n_distinct) %>% 228 | purrr::keep(~ .x == 1) %>% 229 | names() 230 | if (length(dims_to_remove) >= 1) { 231 | if (!quietly) { 232 | message("Removing dimensions ", paste(dims_to_remove, collapse = " , ")) 233 | } 234 | d <- tbl %>% 235 | select(-one_of(!!!dims_to_remove)) 236 | filters <- attr(d, "metadata")$dimensions_filters 237 | attr(d, "metadata")$dimensions_filters <- append( 238 | filters, 239 | tbl %>% 240 | as_tibble() %>% 241 | select(!!!dims_to_remove) %>% 242 | purrr::map_chr(unique) %>% 243 | as.list() 244 | ) 245 | return(d) 246 | } else { 247 | tbl 248 | } 249 | } 250 | -------------------------------------------------------------------------------- /R/examples.R: -------------------------------------------------------------------------------- 1 | #' Average arrival delay of NYC flights 2 | #' 3 | #' A tbl_metric showing the average arrival delay of planes departing from 4 | #' an NYC airport over the course of 2013. This can be used as an 5 | #' example of a metric with metadata, and is based on the nycflights13 6 | #' package. 7 | #' 8 | #' @format A tbl_metric object with dimensions: 9 | #' \describe{ 10 | #' \item{origin}{Airport code, either JFK, LGA, or EWR} 11 | #' \item{carrier}{Two-letter airline code} 12 | #' \item{period}{Either week, month, or quarter} 13 | #' \item{date}{Date} 14 | #' \item{value}{Average arrival delay of a flight in this period} 15 | #' } 16 | #' @source 17 | "flights_nyc_avg_arr_delay" 18 | -------------------------------------------------------------------------------- /R/generate-date-periods.R: -------------------------------------------------------------------------------- 1 | #' Generate a table with pairings of dates and periods 2 | #' 3 | #' @param start Start date 4 | #' @param end End date 5 | #' @param periods A vector of calendar periods. This supports "day", "week", "month", "quarter", 6 | #' and "year". 7 | #' @param windows A vector of windows, each representing a # of days 8 | #' @param intervals Whether a preselected set of intervals starting from today, such as 9 | #' "Last Week", "Last 2 Weeks", or "All Time" should be included. 10 | #' @param today_date Date to count as "today": by default, `lubridate::today()`. 11 | generate_date_periods <- function(start, 12 | end, 13 | periods = c("day", "week", "month", "quarter", "year"), 14 | windows = c(7, 28, 56), 15 | intervals = FALSE, 16 | today_date = lubridate::today()) { 17 | dates <- seq(as.Date(start), as.Date(end), by = 1) 18 | 19 | dates_original <- tibble(date_original = dates) 20 | 21 | ret <- NULL 22 | 23 | if (length(periods) > 0) { 24 | calendar_periods <- dates_original %>% 25 | tidyr::crossing(period = periods) %>% 26 | dplyr::group_by(period) %>% 27 | dplyr::mutate( 28 | date = lubridate::floor_date(date_original, period[1], week_start = 1) 29 | ) %>% 30 | dplyr::ungroup() %>% 31 | dplyr::select(period, date, date_original) 32 | 33 | ret <- bind_rows(ret, calendar_periods) 34 | } 35 | 36 | if (length(windows) > 0) { 37 | window_offsets <- tibble::tibble(window_size = windows) %>% 38 | dplyr::mutate(period = paste0("rolling_", window_size, "d")) %>% 39 | tidyr::unnest(offset = purrr::map(window_size, seq_len)) %>% 40 | dplyr::mutate(offset = offset - 1) 41 | 42 | window_periods <- tibble(date_original = dates) %>% 43 | tidyr::crossing(window_offsets) %>% 44 | dplyr::mutate(date = date_original + offset) %>% 45 | dplyr::select(period, date, date_original) 46 | 47 | ret <- bind_rows(ret, window_periods) 48 | } 49 | 50 | if (intervals) { 51 | interval_periods <- generate_intervals_ago() %>% 52 | tibble::enframe("period", "threshold") %>% 53 | tidyr::crossing(date_original = dates) %>% 54 | dplyr::filter(date_original < today_date) %>% 55 | dplyr::filter(date_original >= threshold) %>% 56 | dplyr::transmute(period, date = NA, date_original) 57 | 58 | ret <- bind_rows(ret, interval_periods) 59 | } 60 | 61 | if (is.null(ret)) { 62 | stop("generate_date_periods must be given at least one of periods, intervals, and windows") 63 | } 64 | 65 | ret 66 | } 67 | 68 | generate_intervals_ago <- function(max_date = NULL, today_date = lubridate::today()) { 69 | weeks_back <- c(1, 2, 4, 8, 12, 26, 365 / 7, 100 * 365 / 7) 70 | 71 | dates <- as.character(today_date - as.integer(weeks_back * 7)) 72 | 73 | names(dates) <- dplyr::case_when( 74 | weeks_back == 1 ~ "Last Week", 75 | weeks_back <= 12 ~ stringr::str_c("Last ", weeks_back, " Weeks"), 76 | weeks_back == 26 ~ "Last 6 Months", 77 | round(weeks_back) == 52 ~ "Last Year", 78 | weeks_back > 52 ~ "All Time" 79 | ) 80 | 81 | 82 | if (!is.null(max_date)) { 83 | dates <- dates[dates <= max_date] 84 | } 85 | 86 | dates 87 | } 88 | -------------------------------------------------------------------------------- /R/globals.R: -------------------------------------------------------------------------------- 1 | globalVariables(c( 2 | ".", "period", "date", "value", "metric_full", "documentation", 3 | "date_original", "threshold", "dimensions", "attributes", 4 | "metric", "window_size", "offset", "%LIKE%", "include", "nb_all" 5 | )) 6 | -------------------------------------------------------------------------------- /R/tbl-metric-group.R: -------------------------------------------------------------------------------- 1 | #' Create a metric group 2 | #' 3 | #' @param tbl A wide table of metrics 4 | #' @param group_name An optional underscore separated string as group_name. If 5 | #' not specified, the category and subcategory of the metrics in `tbl` 6 | #' are concatenated to form the group_name. Specify a custom group_name only 7 | #' if you are trying to save multiple metric groups in the same Rmd. 8 | #' @param rmd_file The Rmd file that generated the compact metrics, which has 9 | #' documentation for the metrics and dimensions stored in the YAML front matter. 10 | #' If no Rmd file is given, it uses the currently running one. 11 | #' @export 12 | #' @rdname metric_group 13 | create_metric_group <- function(tbl, group_name = NULL, rmd_file = NULL) { 14 | metric_details <- get_metric_docs(rmd_file = rmd_file) 15 | if (is.null(group_name)) { 16 | group_name <- names(metric_details)[1] %>% 17 | stringr::str_split("_") %>% 18 | magrittr::extract2(1) %>% 19 | magrittr::extract(1:2) %>% 20 | paste(collapse = "_") 21 | } 22 | cat_subcat <- stringr::str_split(group_name, "_")[[1]] 23 | category <- cat_subcat[1] 24 | subcategory <- cat_subcat[2] 25 | metric_ids <- var_names_not_dimensions(tbl) %>% 26 | setdiff(c("date", "period")) %>% 27 | paste(category, subcategory, ., sep = "_") 28 | metric_details <- metric_details %>% 29 | rlang::set_names( 30 | stringr::str_split_fixed(names(.), "_", 3)[, 3] 31 | ) 32 | dimension_details <- metric_details[[1]]$dimensions 33 | metadata <- list( 34 | category = category, 35 | subcategory = subcategory, 36 | owner = metric_details[[1]]$owner, 37 | metrics = metric_details, 38 | dimensions = dimension_details 39 | ) 40 | attr(tbl, "metadata") <- metadata 41 | class(tbl) <- c("tbl_metric_group", class(tbl)) 42 | tbl 43 | } 44 | 45 | #' Print method for tbl_metric_group 46 | #' 47 | #' @param x A tbl_metric_group object to `print` 48 | #' @param ... Additional parameters passed to `print` 49 | #' @export 50 | print.tbl_metric_group <- function(x, ...) { 51 | m <- attr(x, "metadata") 52 | 53 | header <- paste0( 54 | "# Metric group\n", 55 | "# Category: ", m$category, "\n", 56 | "# Subcategory: ", m$subcategory, "\n" 57 | ) 58 | 59 | cat(pillar::style_subtle(header)) 60 | 61 | NextMethod("print") 62 | } 63 | 64 | as_tbl_metric_group <- function(x) { 65 | class(x) <- c("tbl_metric_group", class(x)) 66 | x 67 | } 68 | 69 | #' Metric dplyr S3 methods 70 | #' 71 | #' @param .data A `tbl_metric_group` object 72 | #' @param x For `as_tibble`, a `tbl_metric_group` object 73 | #' @param ... Arguments passed on to the appropriate dplyr verb 74 | #' 75 | #' @importFrom dplyr as_data_frame anti_join arrange filter group_by inner_join 76 | #' left_join mutate rename right_join select semi_join summarise 77 | #' transmute 78 | #' 79 | #' @name metric-group-s3 80 | #' @export 81 | as_tibble.tbl_metric_group <- function(x, ...) { 82 | class(x) <- class(x)[class(x) != "tbl_metric_group"] 83 | x 84 | } 85 | 86 | #' @rdname metric-group-s3 87 | #' @export 88 | filter.tbl_metric_group <- function(.data, ...) { 89 | reclass(.data, NextMethod()) 90 | } 91 | 92 | #' @rdname metric-group-s3 93 | #' @export 94 | select.tbl_metric_group <- function(.data, ...) { 95 | reclass(.data, NextMethod()) 96 | } 97 | 98 | #' @rdname metric-group-s3 99 | #' @export 100 | arrange.tbl_metric_group <- function(.data, ...) { 101 | reclass(.data, NextMethod()) 102 | } 103 | 104 | #' @rdname metric-group-s3 105 | #' @export 106 | mutate.tbl_metric_group <- function(.data, ...) { 107 | reclass(.data, NextMethod()) 108 | } 109 | 110 | #' @rdname metric-group-s3 111 | #' @export 112 | group_by.tbl_metric_group <- function(.data, ...) { 113 | reclass(.data, NextMethod()) 114 | } 115 | 116 | #' @rdname metric-group-s3 117 | #' @export 118 | summarise.tbl_metric_group <- function(.data, ...) { 119 | reclass(.data, NextMethod()) 120 | } 121 | 122 | #' @rdname metric-group-s3 123 | #' @export 124 | inner_join.tbl_metric_group <- function(.data, ...) { 125 | reclass(.data, NextMethod()) 126 | } 127 | 128 | #' @rdname metric-group-s3 129 | #' @export 130 | left_join.tbl_metric_group <- function(.data, ...) { 131 | reclass(.data, NextMethod()) 132 | } 133 | 134 | #' @rdname metric-group-s3 135 | #' @export 136 | right_join.tbl_metric_group <- function(.data, ...) { 137 | reclass(.data, NextMethod()) 138 | } 139 | 140 | #' @rdname metric-group-s3 141 | #' @export 142 | semi_join.tbl_metric_group <- function(.data, ...) { 143 | reclass(.data, NextMethod()) 144 | } 145 | 146 | #' @rdname metric-group-s3 147 | #' @export 148 | anti_join.tbl_metric_group <- function(.data, ...) { 149 | reclass(.data, NextMethod()) 150 | } 151 | 152 | #' @rdname metric-group-s3 153 | #' @export 154 | rename.tbl_metric_group <- function(.data, ...) { 155 | reclass(.data, NextMethod()) 156 | } 157 | 158 | #' @rdname metric-group-s3 159 | #' @export 160 | count.tbl_metric_group <- function(.data, ...) { 161 | reclass(.data, NextMethod()) 162 | } 163 | 164 | #' @rdname metric-group-s3 165 | #' @export 166 | transmute.tbl_metric_group <- function(.data, ...) { 167 | reclass(.data, NextMethod()) 168 | } 169 | -------------------------------------------------------------------------------- /R/tbl-metric.R: -------------------------------------------------------------------------------- 1 | # A tbl_metric is an S3 class built around a tbl_df, which generally contains a period, 2 | # a date, some number of dimensions, and a value. It supports almost all dplyr operations. 3 | 4 | #' S3 operators for metrics, including printing and coercing to a data frame 5 | #' 6 | #' @param x A tbl_metric 7 | #' @param ... Extra arguments, not used. 8 | #' 9 | #' @name metric-methods 10 | #' 11 | #' @import dplyr 12 | #' 13 | #' @export 14 | print.tbl_metric <- function(x, ...) { 15 | periods <- unique(x$period) 16 | m <- attr(x, "metadata") 17 | 18 | header <- paste0( 19 | "# Metric: ", m$title, " (", m$metric_full, ")\n", 20 | "# Dimensions: ", paste(var_names_dimensions(x), collapse = ", "), "\n" 21 | ) 22 | 23 | if (!all(is.na(x$date))) { 24 | header <- paste0(header, "# Dates: ", min(x$date, na.rm = TRUE), " to ", max(x$date, na.rm = TRUE), "\n") 25 | } 26 | header <- paste0(header, "# Periods: ", paste(periods, collapse = ", "), "\n") 27 | if (!is.null(m$updated_at)) { 28 | header <- paste0(header, "# Updated At: ", m$updated_at, "\n") 29 | } 30 | 31 | cat(pillar::style_subtle(header)) 32 | 33 | NextMethod() 34 | } 35 | 36 | #' Perform sanity checks on a metric object 37 | #' 38 | #' This function previously worked on metric data tables, but it now works on metric objects 39 | #' (which contain all the metadata, documentation, and everything needed to plot). 40 | #' 41 | #' @param metric A metric table, as found in the data field of a metric object 42 | #' 43 | #' @export 44 | check_metric <- function(metric) { 45 | assertthat::assert_that(inherits(metric, "tbl_metric"), 46 | msg = "Not a 'tbl_metric' object (check_metric parses tbl_metric objects)" 47 | ) 48 | 49 | # Need metric_full to print error messages 50 | metadata <- attr(metric, "metadata") 51 | assertthat::assert_that("metric_full" %in% names(metadata), 52 | msg = "Missing metric_full field in metric object" 53 | ) 54 | 55 | context_name <- metadata$metric_full 56 | 57 | # check the rest 58 | expected_names <- c( 59 | "metric", "title", "description", "category", 60 | "subcategory", "owner" 61 | ) 62 | 63 | for (n in expected_names) { 64 | assertthat::assert_that(n %in% names(metadata), 65 | msg = glue::glue("Missing { n } field ({ context_name })") 66 | ) 67 | } 68 | 69 | ## check the data 70 | assertthat::assert_that(inherits(metric, "tbl_df"), 71 | msg = glue::glue("Metric data should be a tbl_df ({ context_name })") 72 | ) 73 | assertthat::assert_that(nrow(metric) > 0, 74 | msg = glue::glue("Metric data should have at least one row ({ context_name })") 75 | ) 76 | 77 | fields <- colnames(metric) 78 | fields_numeric <- metric %>% 79 | select_if(is.numeric) %>% 80 | colnames() 81 | fields_dimensions <- var_names_dimensions(metric) 82 | 83 | assertthat::assert_that( 84 | "date" %in% fields, 85 | msg = glue::glue("A metric table should have a field named date ({ context_name })") 86 | ) 87 | assertthat::assert_that( 88 | "period" %in% fields, 89 | msg = glue::glue("A metric table should have a field named period ({ context_name })") 90 | ) 91 | assertthat::assert_that( 92 | length(fields_numeric) >= 1, 93 | msg = glue::glue("A metric table should have at least one numeric field ({ context_name })") 94 | ) 95 | 96 | 97 | d <- metadata$dimensions 98 | 99 | # check dimension documentation 100 | for (dn in names(d)) { 101 | assertthat::assert_that("title" %in% names(d[[dn]]), 102 | msg = glue::glue("Missing title in dimension { dn } ({ context_name })") 103 | ) 104 | assertthat::assert_that("description" %in% names(d[[dn]]), 105 | msg = glue::glue("Missing title in dimension { dn } ({ context_name })") 106 | ) 107 | } 108 | } 109 | 110 | #' Condense a metric_tbl object to remove cases with multiple non-All dimensions 111 | #' 112 | #' This reduces the size of a metrics table, by limiting the number of dimensions 113 | #' that can be anything besides All at the same time. If there is a `min_dimensions` 114 | #' field in the metric metadata, it never condenses beyond that (this is useful for some 115 | #' that need multiple dimensions to be interpretable) 116 | #' 117 | #' @param metric A `tbl_metric` object 118 | #' @param max_dimensions The number of (non-All) dimensions that each row 119 | #' can have 120 | #' 121 | #' @export 122 | condense_metric <- function(metric, max_dimensions = 2) { 123 | min_dimensions <- attr(metric, "metadata")$min_dimensions 124 | if (!is.null(min_dimensions)) { 125 | max_dimensions <- max(min_dimensions, max_dimensions) 126 | } 127 | 128 | dims <- var_names_dimensions(metric) 129 | dimensions <- as.matrix(metric[, dims]) 130 | num_not_all <- rowSums(dimensions != "All") 131 | 132 | ret <- metric[num_not_all <= max_dimensions, ] 133 | 134 | # If it's a tbl_metric, keep it that way 135 | class(ret) <- class(metric) 136 | attr(ret, "metadata") <- attr(metric, "metadata") 137 | ret 138 | } 139 | 140 | 141 | ### S3 methods 142 | 143 | as_tbl_metric <- function(x) { 144 | class(x) <- c("tbl_metric", class(x)) 145 | x 146 | } 147 | 148 | #' Metric dplyr S3 methods 149 | #' 150 | #' @param .data A `tbl_metric` object 151 | #' @param x For as_data_frame, the 152 | #' @param ... Arguments passed on to the appropriate dplyr metric 153 | #' 154 | #' @importFrom dplyr as_tibble anti_join arrange filter group_by inner_join 155 | #' left_join mutate rename right_join select semi_join summarise 156 | #' transmute 157 | #' 158 | #' @name metric-s3 159 | #' @export 160 | as_tibble.tbl_metric <- function(x, ...) { 161 | class(x) <- class(x)[class(x) != "tbl_metric"] 162 | x 163 | } 164 | 165 | #' Copy class and attributes from the original version of an object to a modified version. 166 | #' 167 | #' Copied over from https://github.com/tidyverse/dplyr/issues/719 168 | #' @export 169 | #' @param x The original object, which has a class/attributes to copy 170 | #' @param result The modified object, which is / might be missing the 171 | #' class/attributes. 172 | #' 173 | #' @return `result`, now with class/attributes restored. 174 | reclass <- function(x, result) { 175 | UseMethod("reclass") 176 | } 177 | 178 | #' @export 179 | reclass.tbl_metric <- function(x, result) { 180 | class(result) <- unique(c(class(x)[[1]], class(result))) 181 | attr(result, class(x)[[1]]) <- attr(x, class(x)[[1]]) 182 | attr(result, "metadata") <- attr(x, "metadata") 183 | result 184 | } 185 | 186 | #' @export 187 | reclass.tbl_metric_group <- reclass.tbl_metric 188 | 189 | 190 | #' @importFrom dplyr filter 191 | #' @export 192 | dplyr::filter 193 | 194 | #' @rdname metric-s3 195 | #' @export 196 | filter.tbl_metric <- function(.data, ...) { 197 | reclass(.data, NextMethod()) 198 | } 199 | 200 | #' @rdname metric-s3 201 | #' @export 202 | select.tbl_metric <- function(.data, ...) { 203 | reclass(.data, NextMethod()) 204 | } 205 | 206 | #' @rdname metric-s3 207 | #' @export 208 | arrange.tbl_metric <- function(.data, ...) { 209 | reclass(.data, NextMethod()) 210 | } 211 | 212 | #' @rdname metric-s3 213 | #' @export 214 | mutate.tbl_metric <- function(.data, ...) { 215 | reclass(.data, NextMethod()) 216 | } 217 | 218 | #' @rdname metric-s3 219 | #' @export 220 | group_by.tbl_metric <- function(.data, ...) { 221 | reclass(.data, NextMethod()) 222 | } 223 | 224 | #' @rdname metric-s3 225 | #' @export 226 | summarise.tbl_metric <- function(.data, ...) { 227 | reclass(.data, NextMethod()) 228 | } 229 | 230 | #' @rdname metric-s3 231 | #' @export 232 | inner_join.tbl_metric <- function(.data, ...) { 233 | reclass(.data, NextMethod()) 234 | } 235 | 236 | #' @rdname metric-s3 237 | #' @export 238 | left_join.tbl_metric <- function(.data, ...) { 239 | reclass(.data, NextMethod()) 240 | } 241 | 242 | #' @rdname metric-s3 243 | #' @export 244 | right_join.tbl_metric <- function(.data, ...) { 245 | reclass(.data, NextMethod()) 246 | } 247 | 248 | #' @rdname metric-s3 249 | #' @export 250 | semi_join.tbl_metric <- function(.data, ...) { 251 | reclass(.data, NextMethod()) 252 | } 253 | 254 | #' @rdname metric-s3 255 | #' @export 256 | anti_join.tbl_metric <- function(.data, ...) { 257 | reclass(.data, NextMethod()) 258 | } 259 | 260 | #' @rdname metric-s3 261 | #' @export 262 | rename.tbl_metric <- function(.data, ...) { 263 | reclass(.data, NextMethod()) 264 | } 265 | 266 | #' @rdname metric-s3 267 | #' @export 268 | transmute.tbl_metric <- function(.data, ...) { 269 | reclass(.data, NextMethod()) 270 | } 271 | 272 | 273 | #' @rdname metric-s3 274 | #' @export 275 | distinct.tbl_metric <- function(.data, ...) { 276 | .data <- tibble::as_data_frame(.data) 277 | NextMethod() 278 | } 279 | 280 | # utilities 281 | prune_dimensions <- function(metric) { 282 | metadata <- attr(metric, "metadata") 283 | names_dimensions <- intersect( 284 | names(metadata$dimensions), 285 | colnames(metric) 286 | ) 287 | metadata$dimensions <- metadata$dimensions[names_dimensions] 288 | attr(metric, "metadata") <- metadata 289 | return(metric) 290 | } 291 | -------------------------------------------------------------------------------- /R/use-metrics-scaffold.R: -------------------------------------------------------------------------------- 1 | #' Create a scaffold for documenting metrics 2 | #' 3 | #' Use this to generate a YAML scaffold for documenting metrics, just prior to running 4 | #' [create_metrics()]. You can place this into the header of an Rmd and 5 | #' fill in the names and descriptions. 6 | #' 7 | #' @param tbl A wide metric tbl, including `date`, `period`, optionally one or 8 | #' more dimensions, and one or more calculated metrics. 9 | #' 10 | #' @examples 11 | #' 12 | #' library(dplyr) 13 | #' 14 | #' flights <- nycflights13::flights %>% 15 | #' mutate(date = as.Date(ISOdate(year, month, day))) 16 | #' 17 | #' # find flight delays by week, month, and quarter 18 | #' flight_summary <- flights %>% 19 | #' cross_by_dimensions(origin) %>% 20 | #' cross_by_periods() %>% 21 | #' summarize( 22 | #' nb_flights = n(), 23 | #' avg_arr_delay = mean(arr_delay, na.rm = TRUE) 24 | #' ) 25 | #' 26 | #' use_metrics_scaffold(flight_summary) 27 | #' @export 28 | use_metrics_scaffold <- function(tbl) { 29 | tbl <- ungroup(tbl) 30 | 31 | names_dimensions <- var_names_dimensions(tbl) 32 | dimensions <- names_dimensions %>% 33 | purrr::map(~ list(title = "", description = "")) %>% 34 | rlang::set_names(names_dimensions) 35 | 36 | if (length(dimensions) == 0) { 37 | dimensions <- NULL 38 | } 39 | 40 | names_metrics <- tbl %>% 41 | select_if(is.numeric) %>% 42 | colnames() 43 | 44 | metrics <- names_metrics %>% 45 | purrr::map(~ list(title = "", description = "")) %>% 46 | rlang::set_names(names_metrics) 47 | 48 | out <- list(metrics = metrics, dimensions = dimensions) 49 | cat(yaml::as.yaml(out)) 50 | invisible(out) 51 | } 52 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | #' Gather all the non-dimension columns in a wide metric table 2 | #' 3 | #' @param tbl A table to gather metrics from 4 | #' @param ... A selection of columns to gather. If empty, all numeric columns 5 | #' are selected as a default 6 | #' @param quietly A boolean indicating if diagnostic messages are to be printed. 7 | #' @importFrom tidyr gather 8 | gather_metrics <- function(tbl, ..., quietly = FALSE) { 9 | tbl_c <- tbl %>% 10 | ungroup() %>% 11 | collect() 12 | if (rlang::dots_n(...) == 0) { 13 | cols_numeric <- tbl_c %>% 14 | select_if(is.numeric) %>% 15 | colnames() %>% 16 | grep("\\_id", ., value = TRUE, invert = TRUE) 17 | if (!quietly) { 18 | message("Gathering columns ", paste(cols_numeric, collapse = ", ")) 19 | } 20 | tbl_c %>% 21 | tidyr::gather(metric, value, cols_numeric) 22 | } else { 23 | tbl_c %>% 24 | tidyr::gather(metric, value, ...) 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # tidymetrics 2 | 3 | **Authors:** Ramnath Vaidyanathan, [David Robinson](http://varianceexplained.org/)
4 | 5 | 6 | [![R build status](https://github.com/datacamp/tidymetrics/workflows/R-CMD-check/badge.svg)](https://github.com/datacamp/tidymetrics/actions) 7 | [![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://www.tidyverse.org/lifecycle/#experimental) 8 | [![Codecov test coverage](https://codecov.io/gh/datacamp/tidymetrics/branch/master/graph/badge.svg)](https://codecov.io/gh/datacamp/tidymetrics?branch=master) 9 | 10 | 11 | 17 | 18 | Dimensional modeling done the tidy way! 19 | 20 | ## What the package contains 21 | 22 | The "cross by" family of functions, which prepare data to be aggregated in ways useful for dimensional modeling: 23 | 24 | * `cross_by_periods`, which prepares data with a `date` column to be aggregated by calendar periods (day/week/month), rolling windows, or "X weeks ago" 25 | * `cross_by_dimensions`, which adds an `All` level to each segment 26 | 27 | Methods for annotating aggregated metrics with useful metadata: 28 | 29 | * `create_metrics`, which gathers a table of metrics into a list of `tbl_metric` objects and attaches metadata to it 30 | * `create_metric_group`, which annotates a group of metrics with the same dimensions as a `tbl_metric_group` 31 | 32 | Verbs for working with dimensions in metric tables, including: 33 | 34 | * `discard_dimensions` (and its inverse `keep_dimensions`), which filters for only the `All` segment of dimensions and removes those columns 35 | * `condense_metric`, which retains only observations with one non-All dimension (in order to store a compact version that can still be explored one dimension at a time) 36 | 37 | ## Code of Conduct 38 | 39 | Please note that the 'tidymetrics' project is released with a [Contributor Code of Conduct](CODE_OF_CONDUCT.md). By contributing to this project, you agree to abide by its terms. 40 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | 3 | coverage: 4 | status: 5 | project: 6 | default: 7 | target: auto 8 | threshold: 1% 9 | patch: 10 | default: 11 | target: auto 12 | threshold: 1% 13 | -------------------------------------------------------------------------------- /data-raw/nycflight_metrics.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(tidymetrics) 3 | library(nycflights13) 4 | 5 | flight_summary <- flights %>% 6 | mutate(date = as.Date(ISOdate(year, month, day))) %>% 7 | cross_by_dimensions(origin, carrier) %>% 8 | cross_by_periods() %>% 9 | summarize( 10 | nb_flights = n(), 11 | avg_arr_delay = mean(arr_delay, na.rm = TRUE) 12 | ) 13 | 14 | rmd <- system.file("extdata", "metrics_flights_nyc.Rmd", package = "tidymetrics") 15 | flights_nyc <- create_metrics(flight_summary, rmd_file = rmd) 16 | 17 | flights_nyc_avg_arr_delay <- nycflight_metrics$flights_nyc_avg_arr_delay 18 | 19 | usethis::use_data(flights_nyc_avg_arr_delay, overwrite = TRUE) 20 | -------------------------------------------------------------------------------- /data/flights_nyc_avg_arr_delay.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datacamp/tidymetrics/47f157a529df6114c3c3add2bf00f39c9b97ae2c/data/flights_nyc_avg_arr_delay.rda -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.1' 2 | 3 | services: 4 | 5 | db: 6 | image: postgres 7 | restart: always 8 | environment: 9 | POSTGRES_PASSWORD: "" 10 | ports: 11 | - 5433:5432 12 | -------------------------------------------------------------------------------- /inst/WORDLIST: -------------------------------------------------------------------------------- 1 | Codecov 2 | dplyr 3 | EWR 4 | github 5 | https 6 | interpretable 7 | LGA 8 | Lifecycle 9 | nycflights 10 | preselected 11 | Rmd 12 | tbl 13 | tbls 14 | tidyr 15 | tidyverse 16 | ungroups 17 | YAML 18 | -------------------------------------------------------------------------------- /inst/extdata/metrics_flights_nyc.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | name: metrics_flights_nyc 3 | owner: "David Robinson" 4 | metrics: 5 | nb_flights: 6 | title: Number of Flights 7 | description: Total number of flights 8 | avg_arr_delay: 9 | title: Average Arrival Delay 10 | description: Average arrival delay. Note that some data may be missing their average delay. 11 | dimensions: 12 | origin: 13 | title: Origin 14 | description: "Origin airport: JFK, EWR, or LGA." 15 | carrier: 16 | title: Carrier 17 | description: Airline carrier (two-letter code). 18 | --- 19 | 20 | Example metric Rmd used for unit tests. 21 | 22 | ```{r} 23 | library(dplyr) 24 | library(nycflights13) 25 | 26 | # find flight delays by week, month, and quarter 27 | flight_summary <- flights %>% 28 | mutate(date = as.Date(ISOdate(year, month, day))) %>% 29 | cross_by_dimensions(origin, carrier) %>% 30 | cross_by_periods() %>% 31 | summarize(nb_flights = n(), 32 | avg_arr_delay = mean(arr_delay, na.rm = TRUE)) 33 | ``` 34 | -------------------------------------------------------------------------------- /man/check_metric.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tbl-metric.R 3 | \name{check_metric} 4 | \alias{check_metric} 5 | \title{Perform sanity checks on a metric object} 6 | \usage{ 7 | check_metric(metric) 8 | } 9 | \arguments{ 10 | \item{metric}{A metric table, as found in the data field of a metric object} 11 | } 12 | \description{ 13 | This function previously worked on metric data tables, but it now works on metric objects 14 | (which contain all the metadata, documentation, and everything needed to plot). 15 | } 16 | -------------------------------------------------------------------------------- /man/complete_periods.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/complete-periods.R 3 | \name{complete_periods} 4 | \alias{complete_periods} 5 | \title{Given a cumulative metric, add dates for the end of each period} 6 | \usage{ 7 | complete_periods( 8 | metric, 9 | periods = c("month"), 10 | add_incomplete = FALSE, 11 | week_start = getOption("lubridate.week.start", 1) 12 | ) 13 | } 14 | \arguments{ 15 | \item{metric}{A metric table in wide format, containing "date" and "period" columns as 16 | well as one or more dimensions and metric values.} 17 | 18 | \item{periods}{Vector of periods to add: one or more of "week", "month", "quarter" or "year".} 19 | 20 | \item{add_incomplete}{If TRUE a value of the running incomplete period will be added.} 21 | 22 | \item{week_start}{when unit is \code{weeks}, specifies the reference day. 7 23 | represents Sunday and 1 represents Monday. Note that we use a default of 1 24 | instead of 7, in order to be consistent with SQL.} 25 | } 26 | \description{ 27 | Some metrics like ARR are measured cumulatively, so in order to create a bar plot 28 | per month or quarter we need to pick the last value from each period. For example, 29 | the ARR for January 2019 would be measured as of 2019-01-31. Analogously 30 | to the tidyr function \code{complete()}, this adds rows representing each period 31 | present in the data. 32 | } 33 | \examples{ 34 | 35 | library(dplyr) 36 | 37 | flights <- nycflights13::flights \%>\% 38 | mutate(date = as.Date(ISOdate(year, month, day))) 39 | 40 | # Include number and cumulative number of flights 41 | cumulative_summary <- flights \%>\% 42 | cross_by_periods(periods = "day") \%>\% 43 | summarize(nb_flights = n()) \%>\% 44 | arrange(date) \%>\% 45 | mutate(cumulative_flights = cumsum(nb_flights)) \%>\% 46 | ungroup() 47 | 48 | # Have periods for week and month as well, representing the end of that period 49 | library(ggplot2) 50 | 51 | cumulative_day_week_month <- cumulative_summary \%>\% 52 | complete_periods(periods = c("week", "month")) 53 | 54 | cumulative_day_week_month \%>\% 55 | ggplot(aes(date, cumulative_flights, color = period)) + 56 | geom_point() 57 | } 58 | -------------------------------------------------------------------------------- /man/condense_metric.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tbl-metric.R 3 | \name{condense_metric} 4 | \alias{condense_metric} 5 | \title{Condense a metric_tbl object to remove cases with multiple non-All dimensions} 6 | \usage{ 7 | condense_metric(metric, max_dimensions = 2) 8 | } 9 | \arguments{ 10 | \item{metric}{A \code{tbl_metric} object} 11 | 12 | \item{max_dimensions}{The number of (non-All) dimensions that each row 13 | can have} 14 | } 15 | \description{ 16 | This reduces the size of a metrics table, by limiting the number of dimensions 17 | that can be anything besides All at the same time. If there is a \code{min_dimensions} 18 | field in the metric metadata, it never condenses beyond that (this is useful for some 19 | that need multiple dimensions to be interpretable) 20 | } 21 | -------------------------------------------------------------------------------- /man/create_metrics.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/create-metrics.R 3 | \name{create_metrics} 4 | \alias{create_metrics} 5 | \title{Given a metric tbl and an Rmd file, turn into a named list of metric objects} 6 | \usage{ 7 | create_metrics(..., rmd_file = NULL) 8 | } 9 | \arguments{ 10 | \item{...}{One or more metric tables in wide metric format: one column for each metric.} 11 | 12 | \item{rmd_file}{The Rmd file that generated the compact metrics, which has 13 | documentation for the metrics and dimensions stored in the YAML front matter. 14 | If no Rmd file is given, it uses the currently running one.} 15 | } 16 | \value{ 17 | A named list of metric objects. Each of these has both the data and the metadata 18 | (documentation, dimensions, owner, etc) to make an interactive visualization. 19 | } 20 | \description{ 21 | Given a metric tbl and an Rmd file, turn into a named list of metric objects 22 | } 23 | \examples{ 24 | 25 | # TODO 26 | } 27 | -------------------------------------------------------------------------------- /man/cross_by_dimensions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cross-dimensions.R 3 | \name{cross_by_dimensions} 4 | \alias{cross_by_dimensions} 5 | \title{Cross by dimensions} 6 | \usage{ 7 | cross_by_dimensions( 8 | tbl, 9 | ..., 10 | add = TRUE, 11 | max_dimensions = NULL, 12 | collect_fun = NULL 13 | ) 14 | } 15 | \arguments{ 16 | \item{tbl}{A table} 17 | 18 | \item{...}{A selection of columns} 19 | 20 | \item{add}{Whether to leave the existing groups as well instead of replacing 21 | them (by default, yes).} 22 | 23 | \item{max_dimensions}{The number of (non-All) dimensions that each row 24 | can have. This reduces the size of a metrics table, by limiting the number 25 | of dimensions that can be anything besides All at the same time.} 26 | 27 | \item{collect_fun}{A function to collect or materialize intermediate tables. 28 | This is useful when dealing with large tables in which case the resulting 29 | SQL queries can become very complex and expensive to execute. Materializing 30 | intermediate tables as temporary tables can improve the efficiency of 31 | the query.} 32 | } 33 | \description{ 34 | This function stacks an extra copy of the table for each dimension column 35 | specified as an argument, replaces the value of the column with the word 36 | "All", and finally groups by all the columns. It acts as an extended 37 | \code{group_by} that allows complete summaries across each individual 38 | dimension and possible combinations. It works both in-database and in-memory. 39 | } 40 | \examples{ 41 | # Data Frame 42 | library(dplyr) 43 | 44 | mtcars \%>\% 45 | cross_by_dimensions(cyl, am) \%>\% 46 | summarize(avg_mpg = mean(mpg)) 47 | 48 | flights <- nycflights13::flights \%>\% 49 | mutate(date = as.Date(ISOdate(year, month, day))) 50 | 51 | # find flight delays by carrier, origin, and Overall 52 | flight_summary <- nycflights13::flights \%>\% 53 | cross_by_dimensions(carrier, origin) \%>\% 54 | summarize( 55 | nb_flights = n(), 56 | avg_arr_delay = mean(arr_delay, na.rm = TRUE) 57 | ) 58 | 59 | flight_summary 60 | 61 | flight_summary <- nycflights13::flights \%>\% 62 | cross_by_dimensions(carrier, origin, max_dimensions = 1) \%>\% 63 | summarize( 64 | nb_flights = n(), 65 | avg_arr_delay = mean(arr_delay, na.rm = TRUE) 66 | ) 67 | 68 | flight_summary 69 | 70 | # This works well when combined with discard_dimensions, which filters for 71 | # an All level and removes the column 72 | 73 | # Look just by carrier 74 | flight_summary \%>\% 75 | discard_dimensions(origin) 76 | 77 | flight_summary \%>\% 78 | discard_dimensions(carrier) 79 | } 80 | \seealso{ 81 | \code{\link[=discard_dimensions]{discard_dimensions()}} 82 | } 83 | -------------------------------------------------------------------------------- /man/cross_by_periods.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cross-periods.R 3 | \name{cross_by_periods} 4 | \alias{cross_by_periods} 5 | \alias{cross_by_periods.tbl_lazy} 6 | \alias{cross_by_periods.tbl_df} 7 | \alias{cross_by_periods_cumulative} 8 | \title{Expand a table so that it can be aggregated by a period} 9 | \usage{ 10 | cross_by_periods(tbl, periods, windows, intervals, ...) 11 | 12 | \method{cross_by_periods}{tbl_lazy}( 13 | tbl, 14 | periods = c("week", "month", "quarter"), 15 | windows = c(), 16 | intervals = FALSE, 17 | remote_date_periods = NULL, 18 | ... 19 | ) 20 | 21 | \method{cross_by_periods}{tbl_df}( 22 | tbl, 23 | periods = c("week", "month", "quarter"), 24 | windows = c(), 25 | intervals = FALSE, 26 | ... 27 | ) 28 | 29 | cross_by_periods_cumulative(tbl, remote_date_periods = NULL) 30 | } 31 | \arguments{ 32 | \item{tbl}{A tbl, either local or remote.} 33 | 34 | \item{periods}{A vector of calendar periods. This supports "day", "week", "month", "quarter", 35 | and "year".} 36 | 37 | \item{windows}{A vector of windows, each representing a # of days} 38 | 39 | \item{intervals}{Whether a preselected set of intervals starting from today, such as 40 | "Last Week", "Last 2 Weeks", or "All Time" should be included.} 41 | 42 | \item{...}{Extra arguments, not used} 43 | 44 | \item{remote_date_periods}{For crossing remote tables, an existing remote table 45 | linking dates to their respective periods. By default, use a global accessor function.} 46 | } 47 | \value{ 48 | A tbl (either local or remote, depending on the input), where TODO. It is grouped by 49 | any grouping columns that were in the input, as well as by the new \code{date} and 50 | \code{period} columns. 51 | } 52 | \description{ 53 | Cross by any set of calendar periods (like day or week), rolling windows, 54 | or recent intervals (like "4 Weeks", or "8 Weeks"). This means that each 55 | row in the input will appear potentially multiple times, each time associated 56 | with a different period and date. 57 | } 58 | \examples{ 59 | 60 | library(dplyr) 61 | 62 | flights <- nycflights13::flights \%>\% 63 | mutate(date = as.Date(ISOdate(year, month, day))) 64 | 65 | # find flight delays by week, month, and quarter 66 | flight_summary <- flights \%>\% 67 | cross_by_periods() \%>\% 68 | summarize( 69 | nb_flights = n(), 70 | avg_arr_delay = mean(arr_delay, na.rm = TRUE) 71 | ) 72 | 73 | library(ggplot2) 74 | 75 | ggplot(flight_summary, aes(date, avg_arr_delay, color = period)) + 76 | geom_line() 77 | } 78 | -------------------------------------------------------------------------------- /man/cross_join.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cross-join.R 3 | \name{cross_join} 4 | \alias{cross_join} 5 | \title{Cross join two tables together, including all combinations of rows} 6 | \usage{ 7 | cross_join(x, y, ...) 8 | } 9 | \arguments{ 10 | \item{x, y}{tbls to join} 11 | 12 | \item{...}{additional arguments to be passed on to 13 | \code{\link[dplyr:mutate-joins]{dplyr::full_join()}} or \code{\link[tidyr:expand]{tidyr::crossing()}}} 14 | } 15 | \description{ 16 | Locally, this is equivalent to tidyr::crossing. 17 | } 18 | \examples{ 19 | d1 <- dplyr::tibble(x = 1:3) 20 | d2 <- dplyr::tibble(y = 1:2) 21 | cross_join(d1, d2) 22 | } 23 | -------------------------------------------------------------------------------- /man/discard_constant_dimensions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dimensions.R 3 | \name{discard_constant_dimensions} 4 | \alias{discard_constant_dimensions} 5 | \title{Remove dimensions with a constant level (single value)} 6 | \usage{ 7 | discard_constant_dimensions(tbl, quietly = FALSE) 8 | } 9 | \arguments{ 10 | \item{tbl}{A metric tbl in wide format, with one or more dimensions.} 11 | 12 | \item{quietly}{If FALSE (default), display a message about what columns are 13 | being discarded.} 14 | } 15 | \description{ 16 | Use \code{constant_constant_dimensions} instead of \code{select} so the 17 | removed dimension value is added to the metadata attribute. 18 | } 19 | \examples{ 20 | 21 | library(dplyr) 22 | 23 | flights_nyc_avg_arr_delay \%>\% 24 | filter(origin == "JFK") \%>\% 25 | discard_constant_dimensions() 26 | } 27 | -------------------------------------------------------------------------------- /man/discard_dimensions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dimensions.R 3 | \name{discard_dimensions} 4 | \alias{discard_dimensions} 5 | \title{Discard dimensions} 6 | \usage{ 7 | discard_dimensions(tbl, ..., quietly = FALSE) 8 | } 9 | \arguments{ 10 | \item{tbl}{A metric tbl in wide format, with one or more dimensions.} 11 | 12 | \item{...}{Dimensions to discard.} 13 | 14 | \item{quietly}{If FALSE (default), display a message about what columns are 15 | being discarded.} 16 | } 17 | \description{ 18 | This function discards specified dimensions summary by filtering only for the 19 | attribute value "All". If no dimensions are specified, it discards all of them. 20 | If the table is grouped, this ungroups it. 21 | } 22 | \examples{ 23 | library(dplyr) 24 | 25 | mtcars_by_cyl_gear <- mtcars \%>\% 26 | cross_by_dimensions(cyl, gear) \%>\% 27 | summarize(avg_mpg = mean(mpg)) 28 | 29 | # Discard all dimensions 30 | mtcars_by_cyl_gear \%>\% 31 | discard_dimensions() 32 | 33 | # Remove dimension cyl 34 | mtcars_by_cyl_gear \%>\% 35 | discard_dimensions(cyl) 36 | 37 | # Remove all dimensions except `cyl` 38 | mtcars_by_cyl_gear \%>\% 39 | discard_dimensions(-cyl) 40 | 41 | mtcars_by_cyl_gear \%>\% 42 | discard_dimensions(-cyl, -gear) 43 | 44 | mtcars_by_cyl_gear \%>\% 45 | discard_dimensions(-one_of("cyl", "gear")) 46 | } 47 | \seealso{ 48 | \code{\link[=keep_dimensions]{keep_dimensions()}} 49 | } 50 | -------------------------------------------------------------------------------- /man/flights_nyc_avg_arr_delay.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/examples.R 3 | \docType{data} 4 | \name{flights_nyc_avg_arr_delay} 5 | \alias{flights_nyc_avg_arr_delay} 6 | \title{Average arrival delay of NYC flights} 7 | \format{ 8 | A tbl_metric object with dimensions: 9 | \describe{ 10 | \item{origin}{Airport code, either JFK, LGA, or EWR} 11 | \item{carrier}{Two-letter airline code} 12 | \item{period}{Either week, month, or quarter} 13 | \item{date}{Date} 14 | \item{value}{Average arrival delay of a flight in this period} 15 | } 16 | } 17 | \source{ 18 | \url{https://cran.r-project.org/package=nycflights13} 19 | } 20 | \usage{ 21 | flights_nyc_avg_arr_delay 22 | } 23 | \description{ 24 | A tbl_metric showing the average arrival delay of planes departing from 25 | an NYC airport over the course of 2013. This can be used as an 26 | example of a metric with metadata, and is based on the nycflights13 27 | package. 28 | } 29 | \keyword{datasets} 30 | -------------------------------------------------------------------------------- /man/gather_metrics.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{gather_metrics} 4 | \alias{gather_metrics} 5 | \title{Gather all the non-dimension columns in a wide metric table} 6 | \usage{ 7 | gather_metrics(tbl, ..., quietly = FALSE) 8 | } 9 | \arguments{ 10 | \item{tbl}{A table to gather metrics from} 11 | 12 | \item{...}{A selection of columns to gather. If empty, all numeric columns 13 | are selected as a default} 14 | 15 | \item{quietly}{A boolean indicating if diagnostic messages are to be printed.} 16 | } 17 | \description{ 18 | Gather all the non-dimension columns in a wide metric table 19 | } 20 | -------------------------------------------------------------------------------- /man/generate_date_periods.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/generate-date-periods.R 3 | \name{generate_date_periods} 4 | \alias{generate_date_periods} 5 | \title{Generate a table with pairings of dates and periods} 6 | \usage{ 7 | generate_date_periods( 8 | start, 9 | end, 10 | periods = c("day", "week", "month", "quarter", "year"), 11 | windows = c(7, 28, 56), 12 | intervals = FALSE, 13 | today_date = lubridate::today() 14 | ) 15 | } 16 | \arguments{ 17 | \item{start}{Start date} 18 | 19 | \item{end}{End date} 20 | 21 | \item{periods}{A vector of calendar periods. This supports "day", "week", "month", "quarter", 22 | and "year".} 23 | 24 | \item{windows}{A vector of windows, each representing a # of days} 25 | 26 | \item{intervals}{Whether a preselected set of intervals starting from today, such as 27 | "Last Week", "Last 2 Weeks", or "All Time" should be included.} 28 | 29 | \item{today_date}{Date to count as "today": by default, \code{lubridate::today()}.} 30 | } 31 | \description{ 32 | Generate a table with pairings of dates and periods 33 | } 34 | -------------------------------------------------------------------------------- /man/keep_dimensions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dimensions.R 3 | \name{keep_dimensions} 4 | \alias{keep_dimensions} 5 | \title{Keep dimensions} 6 | \usage{ 7 | keep_dimensions(tbl, ..., keep_attribute_all = FALSE, quietly = FALSE) 8 | } 9 | \arguments{ 10 | \item{tbl}{A metric tbl in wide format, with one or more dimensions.} 11 | 12 | \item{...}{Dimensions to keep, as bare names 13 | or select helpers like \code{contains()}.} 14 | 15 | \item{keep_attribute_all}{Whether to remove the "All" level from the dimensions} 16 | 17 | \item{quietly}{If FALSE (default), display a message about what columns are 18 | being discarded.} 19 | } 20 | \description{ 21 | This function keeps specified dimensions from a wide metric tbl and discards the rest. 22 | } 23 | \examples{ 24 | library(dplyr) 25 | 26 | mtcars_by_cyl_gear <- mtcars \%>\% 27 | cross_by_dimensions(cyl, gear) \%>\% 28 | summarize(avg_mpg = mean(mpg)) 29 | 30 | mtcars_by_cyl_gear \%>\% 31 | keep_dimensions() 32 | 33 | mtcars_by_cyl_gear \%>\% 34 | keep_dimensions(cyl) 35 | 36 | mtcars_by_cyl_gear \%>\% 37 | keep_dimensions(-cyl) 38 | 39 | mtcars_by_cyl_gear \%>\% 40 | keep_dimensions(cyl, keep_attribute_all = TRUE) 41 | } 42 | \seealso{ 43 | \code{\link[=discard_dimensions]{discard_dimensions()}} 44 | } 45 | -------------------------------------------------------------------------------- /man/metric-group-s3.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tbl-metric-group.R 3 | \name{metric-group-s3} 4 | \alias{metric-group-s3} 5 | \alias{as_tibble.tbl_metric_group} 6 | \alias{filter.tbl_metric_group} 7 | \alias{select.tbl_metric_group} 8 | \alias{arrange.tbl_metric_group} 9 | \alias{mutate.tbl_metric_group} 10 | \alias{group_by.tbl_metric_group} 11 | \alias{summarise.tbl_metric_group} 12 | \alias{inner_join.tbl_metric_group} 13 | \alias{left_join.tbl_metric_group} 14 | \alias{right_join.tbl_metric_group} 15 | \alias{semi_join.tbl_metric_group} 16 | \alias{anti_join.tbl_metric_group} 17 | \alias{rename.tbl_metric_group} 18 | \alias{count.tbl_metric_group} 19 | \alias{transmute.tbl_metric_group} 20 | \title{Metric dplyr S3 methods} 21 | \usage{ 22 | \method{as_tibble}{tbl_metric_group}(x, ...) 23 | 24 | \method{filter}{tbl_metric_group}(.data, ...) 25 | 26 | \method{select}{tbl_metric_group}(.data, ...) 27 | 28 | \method{arrange}{tbl_metric_group}(.data, ...) 29 | 30 | \method{mutate}{tbl_metric_group}(.data, ...) 31 | 32 | \method{group_by}{tbl_metric_group}(.data, ...) 33 | 34 | \method{summarise}{tbl_metric_group}(.data, ...) 35 | 36 | \method{inner_join}{tbl_metric_group}(.data, ...) 37 | 38 | \method{left_join}{tbl_metric_group}(.data, ...) 39 | 40 | \method{right_join}{tbl_metric_group}(.data, ...) 41 | 42 | \method{semi_join}{tbl_metric_group}(.data, ...) 43 | 44 | \method{anti_join}{tbl_metric_group}(.data, ...) 45 | 46 | \method{rename}{tbl_metric_group}(.data, ...) 47 | 48 | \method{count}{tbl_metric_group}(.data, ...) 49 | 50 | \method{transmute}{tbl_metric_group}(.data, ...) 51 | } 52 | \arguments{ 53 | \item{x}{For \code{as_tibble}, a \code{tbl_metric_group} object} 54 | 55 | \item{...}{Arguments passed on to the appropriate dplyr verb} 56 | 57 | \item{.data}{A \code{tbl_metric_group} object} 58 | } 59 | \description{ 60 | Metric dplyr S3 methods 61 | } 62 | -------------------------------------------------------------------------------- /man/metric-methods.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tbl-metric.R 3 | \name{metric-methods} 4 | \alias{metric-methods} 5 | \alias{print.tbl_metric} 6 | \title{S3 operators for metrics, including printing and coercing to a data frame} 7 | \usage{ 8 | \method{print}{tbl_metric}(x, ...) 9 | } 10 | \arguments{ 11 | \item{x}{A tbl_metric} 12 | 13 | \item{...}{Extra arguments, not used.} 14 | } 15 | \description{ 16 | S3 operators for metrics, including printing and coercing to a data frame 17 | } 18 | -------------------------------------------------------------------------------- /man/metric-s3.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tbl-metric.R 3 | \name{metric-s3} 4 | \alias{metric-s3} 5 | \alias{as_tibble.tbl_metric} 6 | \alias{filter.tbl_metric} 7 | \alias{select.tbl_metric} 8 | \alias{arrange.tbl_metric} 9 | \alias{mutate.tbl_metric} 10 | \alias{group_by.tbl_metric} 11 | \alias{summarise.tbl_metric} 12 | \alias{inner_join.tbl_metric} 13 | \alias{left_join.tbl_metric} 14 | \alias{right_join.tbl_metric} 15 | \alias{semi_join.tbl_metric} 16 | \alias{anti_join.tbl_metric} 17 | \alias{rename.tbl_metric} 18 | \alias{transmute.tbl_metric} 19 | \alias{distinct.tbl_metric} 20 | \title{Metric dplyr S3 methods} 21 | \usage{ 22 | \method{as_tibble}{tbl_metric}(x, ...) 23 | 24 | \method{filter}{tbl_metric}(.data, ...) 25 | 26 | \method{select}{tbl_metric}(.data, ...) 27 | 28 | \method{arrange}{tbl_metric}(.data, ...) 29 | 30 | \method{mutate}{tbl_metric}(.data, ...) 31 | 32 | \method{group_by}{tbl_metric}(.data, ...) 33 | 34 | \method{summarise}{tbl_metric}(.data, ...) 35 | 36 | \method{inner_join}{tbl_metric}(.data, ...) 37 | 38 | \method{left_join}{tbl_metric}(.data, ...) 39 | 40 | \method{right_join}{tbl_metric}(.data, ...) 41 | 42 | \method{semi_join}{tbl_metric}(.data, ...) 43 | 44 | \method{anti_join}{tbl_metric}(.data, ...) 45 | 46 | \method{rename}{tbl_metric}(.data, ...) 47 | 48 | \method{transmute}{tbl_metric}(.data, ...) 49 | 50 | \method{distinct}{tbl_metric}(.data, ...) 51 | } 52 | \arguments{ 53 | \item{x}{For as_data_frame, the} 54 | 55 | \item{...}{Arguments passed on to the appropriate dplyr metric} 56 | 57 | \item{.data}{A \code{tbl_metric} object} 58 | } 59 | \description{ 60 | Metric dplyr S3 methods 61 | } 62 | -------------------------------------------------------------------------------- /man/metric_group.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tbl-metric-group.R 3 | \name{create_metric_group} 4 | \alias{create_metric_group} 5 | \title{Create a metric group} 6 | \usage{ 7 | create_metric_group(tbl, group_name = NULL, rmd_file = NULL) 8 | } 9 | \arguments{ 10 | \item{tbl}{A wide table of metrics} 11 | 12 | \item{group_name}{An optional underscore separated string as group_name. If 13 | not specified, the category and subcategory of the metrics in \code{tbl} 14 | are concatenated to form the group_name. Specify a custom group_name only 15 | if you are trying to save multiple metric groups in the same Rmd.} 16 | 17 | \item{rmd_file}{The Rmd file that generated the compact metrics, which has 18 | documentation for the metrics and dimensions stored in the YAML front matter. 19 | If no Rmd file is given, it uses the currently running one.} 20 | } 21 | \description{ 22 | Create a metric group 23 | } 24 | -------------------------------------------------------------------------------- /man/print.tbl_metric_group.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tbl-metric-group.R 3 | \name{print.tbl_metric_group} 4 | \alias{print.tbl_metric_group} 5 | \title{Print method for tbl_metric_group} 6 | \usage{ 7 | \method{print}{tbl_metric_group}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{A tbl_metric_group object to \code{print}} 11 | 12 | \item{...}{Additional parameters passed to \code{print}} 13 | } 14 | \description{ 15 | Print method for tbl_metric_group 16 | } 17 | -------------------------------------------------------------------------------- /man/reclass.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tbl-metric.R 3 | \name{reclass} 4 | \alias{reclass} 5 | \title{Copy class and attributes from the original version of an object to a modified version.} 6 | \usage{ 7 | reclass(x, result) 8 | } 9 | \arguments{ 10 | \item{x}{The original object, which has a class/attributes to copy} 11 | 12 | \item{result}{The modified object, which is / might be missing the 13 | class/attributes.} 14 | } 15 | \value{ 16 | \code{result}, now with class/attributes restored. 17 | } 18 | \description{ 19 | Copied over from https://github.com/tidyverse/dplyr/issues/719 20 | } 21 | -------------------------------------------------------------------------------- /man/reexports.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tbl-metric.R 3 | \docType{import} 4 | \name{reexports} 5 | \alias{reexports} 6 | \alias{filter} 7 | \title{Objects exported from other packages} 8 | \keyword{internal} 9 | \description{ 10 | These objects are imported from other packages. Follow the links 11 | below to see their documentation. 12 | 13 | \describe{ 14 | \item{dplyr}{\code{\link[dplyr]{filter}}} 15 | }} 16 | 17 | -------------------------------------------------------------------------------- /man/remove_attribute_all.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dimensions.R 3 | \name{remove_attribute_all} 4 | \alias{remove_attribute_all} 5 | \title{Remove attribute "All"} 6 | \usage{ 7 | remove_attribute_all(tbl, ...) 8 | } 9 | \arguments{ 10 | \item{tbl}{A metric tbl in wide format, with one or more dimensions.} 11 | 12 | \item{...}{Dimensions from which "All" should be removed, as bare names 13 | or select helpers like \code{contains()}.} 14 | } 15 | \description{ 16 | This function removes the aggregate segment "All" for specified dimensions. 17 | If no dimensions are specified, it removes the segment "All" from all 18 | dimensions 19 | } 20 | \examples{ 21 | library(dplyr) 22 | 23 | mtcars_by_cyl_gear <- mtcars \%>\% 24 | cross_by_dimensions(cyl, gear, vs) \%>\% 25 | summarize(avg_mpg = mean(mpg)) 26 | 27 | mtcars_by_cyl_gear \%>\% 28 | remove_attribute_all() 29 | 30 | mtcars_by_cyl_gear \%>\% 31 | remove_attribute_all(cyl) 32 | 33 | mtcars_by_cyl_gear \%>\% 34 | remove_attribute_all(-cyl, -gear) 35 | } 36 | -------------------------------------------------------------------------------- /man/use_metrics_scaffold.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/use-metrics-scaffold.R 3 | \name{use_metrics_scaffold} 4 | \alias{use_metrics_scaffold} 5 | \title{Create a scaffold for documenting metrics} 6 | \usage{ 7 | use_metrics_scaffold(tbl) 8 | } 9 | \arguments{ 10 | \item{tbl}{A wide metric tbl, including \code{date}, \code{period}, optionally one or 11 | more dimensions, and one or more calculated metrics.} 12 | } 13 | \description{ 14 | Use this to generate a YAML scaffold for documenting metrics, just prior to running 15 | \code{\link[=create_metrics]{create_metrics()}}. You can place this into the header of an Rmd and 16 | fill in the names and descriptions. 17 | } 18 | \examples{ 19 | 20 | library(dplyr) 21 | 22 | flights <- nycflights13::flights \%>\% 23 | mutate(date = as.Date(ISOdate(year, month, day))) 24 | 25 | # find flight delays by week, month, and quarter 26 | flight_summary <- flights \%>\% 27 | cross_by_dimensions(origin) \%>\% 28 | cross_by_periods() \%>\% 29 | summarize( 30 | nb_flights = n(), 31 | avg_arr_delay = mean(arr_delay, na.rm = TRUE) 32 | ) 33 | 34 | use_metrics_scaffold(flight_summary) 35 | } 36 | -------------------------------------------------------------------------------- /man/var_names_dimensions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dimensions.R 3 | \name{var_names_dimensions} 4 | \alias{var_names_dimensions} 5 | \title{Get names of columns that are dimensions} 6 | \usage{ 7 | var_names_dimensions(tbl) 8 | } 9 | \arguments{ 10 | \item{tbl}{A tbl_metric} 11 | } 12 | \description{ 13 | Any character of factor column not named date, value, period, or metric are 14 | considered dimensions, as well as any columns ending in _id 15 | } 16 | -------------------------------------------------------------------------------- /man/var_names_not_dimensions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dimensions.R 3 | \name{var_names_not_dimensions} 4 | \alias{var_names_not_dimensions} 5 | \title{Get names of columns that are NOT dimensions} 6 | \usage{ 7 | var_names_not_dimensions(tbl) 8 | } 9 | \arguments{ 10 | \item{tbl}{A tbl_metric} 11 | } 12 | \description{ 13 | Get names of columns that are NOT dimensions 14 | } 15 | -------------------------------------------------------------------------------- /tests/spelling.R: -------------------------------------------------------------------------------- 1 | if(requireNamespace('spelling', quietly = TRUE)) 2 | spelling::spell_check_test(vignettes = TRUE, error = FALSE, 3 | skip_on_cran = TRUE) 4 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(tidymetrics) 3 | 4 | test_check("tidymetrics") 5 | -------------------------------------------------------------------------------- /tests/testthat/helper-src.R: -------------------------------------------------------------------------------- 1 | # library(lubridate) 2 | # library(tidyr) 3 | # library(dplyr) 4 | # 5 | # PORT <- if (identical(Sys.getenv("TRAVIS"), "true")) { 6 | # 5432 7 | # } else { 8 | # port <- Sys.getenv("POSTGRES_PORT") 9 | # ifelse(port == "", 5433, port) 10 | # } 11 | # USER <- if (identical(Sys.getenv("TRAVIS"), "true")) { 12 | # "postgres" 13 | # } else { 14 | # Sys.info()[["user"]] 15 | # } 16 | # 17 | # 18 | # # Connect to DB and add small_flights data ---- 19 | # 20 | # dbcon <- DBI::dbConnect(RPostgreSQL::PostgreSQL(), 21 | # dbname = "postgres", 22 | # user = USER, 23 | # password = "", 24 | # host = "localhost", 25 | # port = PORT 26 | # ) 27 | # 28 | # con <- dbplyr::src_dbi(dbcon, auto_disconnect = TRUE) 29 | # 30 | # small_flights <- 31 | # nycflights13::flights %>% 32 | # filter(month < 3) %>% 33 | # mutate(date = as.Date(ISOdate(year, month, day))) %>% 34 | # select(date, carrier, origin, dep_delay, arr_delay, distance) 35 | # 36 | # 37 | # tbl_small_flights <- copy_to(con, small_flights, "small_flights", overwrite = TRUE) 38 | # 39 | # 40 | # # Copy in calendar dates to db ---- 41 | # 42 | # dates <- seq(as.Date("2013-01-01"), 43 | # today() + years(2), 44 | # by = 1 45 | # ) 46 | # calendar_periods <- tibble(date_original = dates) %>% 47 | # crossing(period = c("day", "week", "month", "quarter", "year")) %>% 48 | # group_by(period) %>% 49 | # mutate(date = floor_date(date_original, period[1], week_start = 1)) %>% 50 | # ungroup() %>% 51 | # select(period, date, date_original) 52 | # 53 | # 54 | # tbl_remote_date_periods <- 55 | # copy_to(con, calendar_periods, "remote_date_periods", overwrite = TRUE) 56 | # 57 | # # set global remote_date_periods option 58 | # options(remote_date_periods = function() { 59 | # tbl_remote_date_periods 60 | # }) 61 | -------------------------------------------------------------------------------- /tests/testthat/test-create-metrics.R: -------------------------------------------------------------------------------- 1 | context("test-create-metrics") 2 | 3 | skip_if_not_installed("nycflights13") 4 | 5 | suppressPackageStartupMessages(library(dplyr)) 6 | library(stringr) 7 | library(nycflights13) 8 | 9 | test_that("Can create metrics based on an Rmd, and operate on them", { 10 | # find flight delays by week, month, and quarter 11 | # To make it faster, filter for only flights < 10 AM 12 | flight_summary <- flights %>% 13 | filter(dep_time <= 1000) %>% 14 | mutate(date = as.Date(ISOdate(year, month, day))) %>% 15 | cross_by_dimensions(origin, carrier) %>% 16 | cross_by_periods() %>% 17 | summarize( 18 | nb_flights = n(), 19 | avg_arr_delay = mean(arr_delay, na.rm = TRUE) 20 | ) 21 | 22 | rmd <- system.file("extdata", "metrics_flights_nyc.Rmd", package = "tidymetrics") 23 | metrics <- create_metrics(flight_summary, rmd_file = rmd) 24 | 25 | expect_equal(length(metrics), 2) 26 | 27 | for (m in metrics) { 28 | check_metric(m) 29 | 30 | expect_equal(sort(unique(m$period)), c("month", "quarter", "week")) 31 | expect_gt(nrow(m), 1000) 32 | 33 | # print 34 | output <- capture.output(print(m)) 35 | expect_true(any(stringr::str_detect(output, "Dimensions: origin, carrier"))) 36 | expect_true(any(stringr::str_detect(output, "Periods:.*month"))) 37 | expect_true(any(stringr::str_detect(output, "tibble"))) 38 | expect_true(any(stringr::str_detect(output, "more rows"))) 39 | 40 | # dplyr verbs 41 | filtered <- m %>% 42 | filter(period == "month") 43 | 44 | expect_is(filtered, "tbl_metric") 45 | expect_equal(unique(filtered$period), "month") 46 | expect_gt(nrow(filtered), 100) 47 | 48 | counted <- m %>% 49 | count(date, period) 50 | 51 | expect_is(counted, "tbl_metric") 52 | expect_equal(colnames(counted), c("date", "period", "n")) 53 | 54 | arranged <- m %>% 55 | arrange(value) 56 | 57 | expect_is(arranged, "tbl_metric") 58 | expect_equal(arranged$value, sort(m$value)) 59 | 60 | # condense 61 | condensed <- m %>% 62 | condense_metric(max_dimensions = 1) 63 | 64 | expect_false(any(condensed$origin != "All" & condensed$carrier != "All")) 65 | expect_true(any(condensed$origin != "All" & condensed$carrier == "All")) 66 | expect_true(any(condensed$origin == "All" & condensed$carrier != "All")) 67 | } 68 | }) 69 | -------------------------------------------------------------------------------- /tests/testthat/test-cross-dimensions.R: -------------------------------------------------------------------------------- 1 | context("cross by dimensions") 2 | 3 | test_that("cross_by_dimensions works with a local table", { 4 | cyl_am_crossed <- mtcars %>% 5 | cross_by_dimensions(cyl, am) %>% 6 | count() 7 | 8 | expect_equal(nrow(cyl_am_crossed), 12) 9 | expect_equal(group_vars(cyl_am_crossed), c("cyl", "am")) 10 | expect_equal(sort(unique(cyl_am_crossed$cyl)), c("4", "6", "8", "All")) 11 | expect_equal(sort(unique(cyl_am_crossed$am)), c("0", "1", "All")) 12 | 13 | cyl_am_vs_crossed <- mtcars %>% 14 | cross_by_dimensions(cyl, am, vs) %>% 15 | count() 16 | 17 | expect_equal(nrow(cyl_am_vs_crossed), 30) 18 | expect_equal(group_vars(cyl_am_vs_crossed), c("cyl", "am", "vs")) 19 | expect_equal(sort(unique(cyl_am_vs_crossed$cyl)), c("4", "6", "8", "All")) 20 | expect_equal(sort(unique(cyl_am_vs_crossed$am)), c("0", "1", "All")) 21 | expect_equal(sort(unique(cyl_am_vs_crossed$vs)), c("0", "1", "All")) 22 | }) 23 | 24 | test_that("cross_by_dimensions works with max_dimensions", { 25 | cyl_am_crossed_0 <- mtcars %>% 26 | cross_by_dimensions(cyl, am, max_dimensions = 0) %>% 27 | count() 28 | 29 | expect_equal(nrow(cyl_am_crossed_0), 1) 30 | expect_equal(group_vars(cyl_am_crossed_0), c("cyl", "am")) 31 | expect_equal(sort(unique(cyl_am_crossed_0$cyl)), "All") 32 | expect_equal(sort(unique(cyl_am_crossed_0$am)), "All") 33 | 34 | cyl_am_crossed_1 <- mtcars %>% 35 | cross_by_dimensions(cyl, am, max_dimensions = 1) %>% 36 | count() 37 | 38 | expect_equal(nrow(cyl_am_crossed_1), 6) 39 | expect_equal(group_vars(cyl_am_crossed_1), c("cyl", "am")) 40 | expect_equal(sort(unique(cyl_am_crossed_1$cyl)), c("4", "6", "8", "All")) 41 | expect_equal(sort(unique(cyl_am_crossed_1$am)), c("0", "1", "All")) 42 | expect_equal(min((cyl_am_crossed_1$cyl == "All") + (cyl_am_crossed_1$am == "All")), 1) 43 | 44 | cyl_am_vs_crossed_2 <- mtcars %>% 45 | cross_by_dimensions(cyl, am, vs, max_dimensions = 2) %>% 46 | count() 47 | 48 | expect_equal(nrow(cyl_am_vs_crossed_2), 23) 49 | expect_equal(group_vars(cyl_am_vs_crossed_2), c("cyl", "am", "vs")) 50 | expect_equal(sort(unique(cyl_am_vs_crossed_2$cyl)), c("4", "6", "8", "All")) 51 | expect_equal(sort(unique(cyl_am_vs_crossed_2$am)), c("0", "1", "All")) 52 | expect_true(all((cyl_am_vs_crossed_2$cyl == "All") | (cyl_am_vs_crossed_2$am == "All") | (cyl_am_vs_crossed_2$vs == "All"))) 53 | }) 54 | -------------------------------------------------------------------------------- /tests/testthat/test-cross-periods.R: -------------------------------------------------------------------------------- 1 | context("test-cross-periods") 2 | 3 | skip_if_not_installed("nycflights13") 4 | 5 | library(dplyr) 6 | library(lubridate) 7 | 8 | flight_data <- nycflights13::flights %>% 9 | mutate(date = as.Date(ISOdate(year, month, day))) %>% 10 | select(date, carrier, origin, dep_delay, arr_delay, distance) 11 | 12 | test_that("can cross a local table by calendar periods", { 13 | p <- c("day", "week", "month", "quarter", "year") 14 | 15 | crossed <- flight_data %>% 16 | cross_by_periods(periods = p) 17 | 18 | expect_equal(group_vars(crossed), c("period", "date")) 19 | expect_true(all(c("period", "date", "date_original", "carrier") %in% colnames(crossed))) 20 | 21 | # has all the periods 22 | expect_equal(sort(unique(crossed$period)), sort(p)) 23 | 24 | expect_equal(min(crossed$date), as.Date(floor_date(as.Date("2013-01-01"), "week", week_start = 1))) 25 | expect_equal(max(crossed$date), as.Date("2013-12-31")) 26 | }) 27 | 28 | 29 | # test_that("can cross a postgres table by calendar periods", { 30 | # p <- c("day", "week", "month", "quarter", "year") 31 | # 32 | # crossed <- tbl_small_flights %>% 33 | # cross_by_periods(periods = p) %>% 34 | # collect() 35 | # 36 | # expect_equal(group_vars(crossed), c("period", "date")) 37 | # expect_true(all(c("period", "date", "date_original", "carrier") %in% colnames(crossed))) 38 | # 39 | # # has all the periods 40 | # expect_equal(sort(unique(crossed$period)), sort(p)) 41 | # 42 | # expect_equal(min(crossed$date), as.Date(floor_date(as.Date("2013-01-01"), "week", week_start = 1))) 43 | # expect_equal(max(crossed$date), as.Date("2013-02-28")) 44 | # }) 45 | 46 | 47 | test_that("can cross a local table by windows", { 48 | w <- c(7, 14) 49 | 50 | crossed <- flight_data %>% 51 | cross_by_periods(periods = c(), windows = w) 52 | 53 | expect_equal(group_vars(crossed), c("period", "date")) 54 | expect_true(all(c("period", "date", "date_original", "carrier") %in% colnames(crossed))) 55 | 56 | # has all the periods 57 | expect_equal(sort(unique(crossed$period)), c("rolling_14d", "rolling_7d")) 58 | expect_equal(min(crossed$date), as.Date("2013-01-01")) 59 | expect_equal(max(crossed$date), as.Date("2013-12-31") + 13) 60 | }) 61 | -------------------------------------------------------------------------------- /tests/testthat/test-discard-dimensions.R: -------------------------------------------------------------------------------- 1 | context("test-discard-dimensions") 2 | 3 | library(dplyr) 4 | 5 | mtcars_by_cyl_gear_am <- mtcars %>% 6 | cross_by_dimensions(cyl, gear, am) %>% 7 | summarize( 8 | nb_cars = n(), 9 | avg_mpg = mean(mpg) 10 | ) 11 | 12 | test_that("discard_dimensions drops desired dimensions", { 13 | discarded_am <- mtcars_by_cyl_gear_am %>% 14 | discard_dimensions(am) 15 | 16 | expect_equal(colnames(discarded_am), c("cyl", "gear", "nb_cars", "avg_mpg")) 17 | expect_equal(sort(unique(discarded_am$cyl)), c("4", "6", "8", "All")) 18 | 19 | discarded_cyl_am <- mtcars_by_cyl_gear_am %>% 20 | discard_dimensions(cyl, am) 21 | 22 | expect_equal(nrow(discarded_cyl_am), 4) 23 | expect_equal(colnames(discarded_cyl_am), c("gear", "nb_cars", "avg_mpg")) 24 | }) 25 | 26 | test_that("discard_dimensions works with select helpers", { 27 | discard_all_but_am <- mtcars_by_cyl_gear_am %>% 28 | discard_dimensions(-am) 29 | 30 | expect_equal(colnames(discard_all_but_am), c("am", "nb_cars", "avg_mpg")) 31 | 32 | discard_gear_am <- mtcars_by_cyl_gear_am %>% 33 | discard_dimensions(contains("a")) 34 | 35 | expect_equal(colnames(discard_gear_am), c("cyl", "nb_cars", "avg_mpg")) 36 | }) 37 | -------------------------------------------------------------------------------- /tests/testthat/test-keep-dimensions.R: -------------------------------------------------------------------------------- 1 | context("test-keep-dimensions") 2 | 3 | library(dplyr) 4 | 5 | mtcars_by_cyl_gear_am <- mtcars %>% 6 | cross_by_dimensions(cyl, gear, am) %>% 7 | summarize( 8 | nb_cars = n(), 9 | avg_mpg = mean(mpg) 10 | ) 11 | 12 | test_that("keep_dimensions retains desired dimensions", { 13 | kept_am <- mtcars_by_cyl_gear_am %>% 14 | keep_dimensions(am) 15 | 16 | expect_equal(nrow(kept_am), 2) 17 | expect_equal(colnames(kept_am), c("am", "nb_cars", "avg_mpg")) 18 | expect_equal(sort(kept_am$am), c("0", "1")) 19 | 20 | kept_cyl_am <- mtcars_by_cyl_gear_am %>% 21 | keep_dimensions(cyl, am) 22 | 23 | expect_equal(nrow(kept_cyl_am), 6) 24 | expect_equal(colnames(kept_cyl_am), c("cyl", "am", "nb_cars", "avg_mpg")) 25 | }) 26 | 27 | test_that("keep_dimensions works with select helpers", { 28 | kept_all_but_am <- mtcars_by_cyl_gear_am %>% 29 | keep_dimensions(-am) 30 | 31 | expect_equal(colnames(kept_all_but_am), c("cyl", "gear", "nb_cars", "avg_mpg")) 32 | 33 | kept_gear_am <- mtcars_by_cyl_gear_am %>% 34 | keep_dimensions(contains("a")) 35 | 36 | expect_equal(colnames(kept_gear_am), c("gear", "am", "nb_cars", "avg_mpg")) 37 | }) 38 | 39 | test_that("keep_dimensions works with keep_attribute_all = TRUE", { 40 | kept_with_all <- mtcars_by_cyl_gear_am %>% 41 | keep_dimensions(am, cyl, keep_attribute_all = TRUE) 42 | 43 | expect_equal(colnames(kept_with_all), c("cyl", "am", "nb_cars", "avg_mpg")) 44 | 45 | expect_equal(sort(unique(kept_with_all$cyl)), c("4", "6", "8", "All")) 46 | expect_equal(sort(unique(kept_with_all$am)), c("0", "1", "All")) 47 | }) 48 | -------------------------------------------------------------------------------- /tests/testthat/test-metric-group.R: -------------------------------------------------------------------------------- 1 | context("test-metric-group") 2 | 3 | skip_if_not_installed("nycflights13") 4 | 5 | library(dplyr) 6 | library(nycflights13) 7 | 8 | test_that("Can create a metric group", { 9 | # find flight delays by week, month, and quarter 10 | flight_summary <- flights %>% 11 | mutate(date = as.Date(ISOdate(year, month, day))) %>% 12 | cross_by_dimensions(origin, carrier) %>% 13 | cross_by_periods() %>% 14 | summarize( 15 | nb_flights = n(), 16 | avg_arr_delay = mean(arr_delay, na.rm = TRUE) 17 | ) 18 | 19 | rmd <- system.file("extdata", "metrics_flights_nyc.Rmd", package = "tidymetrics") 20 | mg <- create_metric_group(flight_summary, rmd_file = rmd) 21 | 22 | expect_is(mg, "tbl_metric_group") 23 | 24 | mg_metadata <- attr(mg, "metadata") 25 | 26 | expect_equal(mg_metadata$category, "flights") 27 | expect_equal(mg_metadata$subcategory, "nyc") 28 | expect_equal(names(mg_metadata$metrics), c("nb_flights", "avg_arr_delay")) 29 | expect_equal(length(mg_metadata$dimensions), 2) 30 | }) 31 | -------------------------------------------------------------------------------- /tests/testthat/test-use-metrics-scaffold.R: -------------------------------------------------------------------------------- 1 | context("test-use-metrics-scaffold") 2 | 3 | library(dplyr) 4 | 5 | test_that("use_metrics_scaffold works", { 6 | car_metrics <- mtcars %>% 7 | cross_by_dimensions(cyl, am) %>% 8 | summarize( 9 | nb_cars = n(), 10 | avg_wt = mean(wt) 11 | ) 12 | 13 | capture.output(scaffold <- use_metrics_scaffold(car_metrics)) 14 | 15 | # It returns YAML, make sure it found the metrics and dimensions appropriately 16 | expect_equal(names(scaffold), c("metrics", "dimensions")) 17 | expect_equal(names(scaffold$metrics), c("nb_cars", "avg_wt")) 18 | expect_equal(names(scaffold$dimensions), c("cyl", "am")) 19 | }) 20 | -------------------------------------------------------------------------------- /tidymetrics.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace 22 | --------------------------------------------------------------------------------