├── .Rbuildignore ├── .github ├── .gitignore ├── CONTRIBUTING.md └── workflows │ ├── R-CMD-check.yaml │ ├── draft-pdf.yml │ └── test-coverage.yaml ├── .gitignore ├── CRAN-SUBMISSION ├── DESCRIPTION ├── LICENSE ├── LICENSE.md ├── NAMESPACE ├── NEWS.md ├── R ├── dot.R ├── dtrackr-package.R ├── dtrackr.R ├── experimental │ ├── column-tracking.R │ └── group-counting.R └── utils-pipe.R ├── README.Rmd ├── README.md ├── _pkgdown.yml ├── codecov.yml ├── cran-comments.md ├── docs ├── 404.html ├── CONTRIBUTING.html ├── LICENSE-text.html ├── LICENSE.html ├── apple-touch-icon-120x120.png ├── apple-touch-icon-152x152.png ├── apple-touch-icon-180x180.png ├── apple-touch-icon-60x60.png ├── apple-touch-icon-76x76.png ├── apple-touch-icon.png ├── articles │ ├── consort-example.html │ ├── consort-example_files │ │ └── accessible-code-block-0.0.1 │ │ │ └── empty-anchor.js │ ├── dtrackr-options.html │ ├── dtrackr-options_files │ │ └── accessible-code-block-0.0.1 │ │ │ └── empty-anchor.js │ ├── dtrackr.html │ ├── dtrackr_files │ │ └── accessible-code-block-0.0.1 │ │ │ └── empty-anchor.js │ ├── index.html │ ├── joining-pipelines.html │ ├── joining-pipelines_files │ │ ├── accessible-code-block-0.0.1 │ │ │ └── empty-anchor.js │ │ └── anchor-sections-1.0 │ │ │ ├── anchor-sections.css │ │ │ └── anchor-sections.js │ ├── tracking-provenance.html │ └── tracking-provenance_files │ │ ├── accessible-code-block-0.0.1 │ │ └── empty-anchor.js │ │ └── anchor-sections-1.0 │ │ ├── anchor-sections.css │ │ └── anchor-sections.js ├── authors.html ├── bootstrap-toc.css ├── bootstrap-toc.js ├── deps │ ├── bootstrap-5.3.1 │ │ ├── bootstrap.bundle.min.js │ │ ├── bootstrap.bundle.min.js.map │ │ └── bootstrap.min.css │ ├── bootstrap-toc-1.0.1 │ │ └── bootstrap-toc.min.js │ ├── clipboard.js-2.0.11 │ │ └── clipboard.min.js │ ├── data-deps.txt │ ├── font-awesome-6.4.2 │ │ ├── css │ │ │ ├── all.css │ │ │ ├── all.min.css │ │ │ ├── v4-shims.css │ │ │ └── v4-shims.min.css │ │ └── webfonts │ │ │ ├── fa-brands-400.ttf │ │ │ ├── fa-brands-400.woff2 │ │ │ ├── fa-regular-400.ttf │ │ │ ├── fa-regular-400.woff2 │ │ │ ├── fa-solid-900.ttf │ │ │ ├── fa-solid-900.woff2 │ │ │ ├── fa-v4compatibility.ttf │ │ │ └── fa-v4compatibility.woff2 │ ├── headroom-0.11.0 │ │ ├── headroom.min.js │ │ └── jQuery.headroom.min.js │ ├── jquery-3.6.0 │ │ ├── jquery-3.6.0.js │ │ ├── jquery-3.6.0.min.js │ │ └── jquery-3.6.0.min.map │ └── search-1.0.0 │ │ ├── autocomplete.jquery.min.js │ │ ├── fuse.min.js │ │ └── mark.min.js ├── docsearch.css ├── docsearch.js ├── favicon-16x16.png ├── favicon-32x32.png ├── favicon-48x48.png ├── favicon.ico ├── favicon.svg ├── index.html ├── katex-auto.js ├── lightswitch.js ├── link.svg ├── logo.png ├── news │ └── index.html ├── pkgdown.css ├── pkgdown.js ├── pkgdown.yml ├── reference │ ├── ILPD.html │ ├── Rplot001.png │ ├── add_count.html │ ├── add_count.trackr_df.html │ ├── add_tally.html │ ├── anti_join.trackr_df.html │ ├── arrange.trackr_df.html │ ├── bind_cols.html │ ├── bind_rows.html │ ├── capture_exclusions.html │ ├── comment.html │ ├── count_subgroup.html │ ├── distinct.trackr_df.html │ ├── dot2svg.html │ ├── dtrackr-package.html │ ├── dtrackr.html │ ├── exclude_all.html │ ├── excluded.html │ ├── figures │ │ ├── README-flowchart.png │ │ ├── demo.dot │ │ ├── demo.pdf │ │ ├── demo.png │ │ ├── demo.svg │ │ ├── dtrackr.xcf │ │ ├── lifecycle-deprecated.svg │ │ ├── lifecycle-experimental.svg │ │ ├── lifecycle-stable.svg │ │ ├── lifecycle-superseded.svg │ │ └── logo.png │ ├── filter.html │ ├── filter.trackr_df.html │ ├── flowchart.html │ ├── full_join.trackr_df.html │ ├── group_by.trackr_df.html │ ├── group_modify.trackr_df.html │ ├── history.html │ ├── include_any.html │ ├── index.html │ ├── inner_join.trackr_df.html │ ├── intersect.trackr_df.html │ ├── landscape.html │ ├── left_join.trackr_df.html │ ├── mutate.trackr_df.html │ ├── nest_join.trackr_df.html │ ├── p_add_count.html │ ├── p_add_tally.html │ ├── p_anti_join.html │ ├── p_arrange.html │ ├── p_bind_cols.html │ ├── p_bind_rows.html │ ├── p_capture_exclusions.html │ ├── p_clear.html │ ├── p_comment.html │ ├── p_copy.html │ ├── p_count_if.html │ ├── p_count_subgroup.html │ ├── p_distinct.html │ ├── p_exclude_all.html │ ├── p_excluded.html │ ├── p_filter.html │ ├── p_flowchart.html │ ├── p_flowcharts.html │ ├── p_full_join.html │ ├── p_get.html │ ├── p_get_as_dot.html │ ├── p_group_by.html │ ├── p_group_modify.html │ ├── p_include_any.html │ ├── p_inner_join.html │ ├── p_intersect.html │ ├── p_left_join.html │ ├── p_mutate.html │ ├── p_nest_join.html │ ├── p_pause.html │ ├── p_pivot_longer.html │ ├── p_pivot_wider.html │ ├── p_reframe.html │ ├── p_relocate.html │ ├── p_rename.html │ ├── p_rename_with.html │ ├── p_resume.html │ ├── p_right_join.html │ ├── p_select.html │ ├── p_semi_join.html │ ├── p_set.html │ ├── p_setdiff.html │ ├── p_slice.html │ ├── p_slice_head.html │ ├── p_slice_max.html │ ├── p_slice_min.html │ ├── p_slice_sample.html │ ├── p_slice_tail.html │ ├── p_status.html │ ├── p_summarise.html │ ├── p_tagged.html │ ├── p_track.html │ ├── p_transmute.html │ ├── p_ungroup.html │ ├── p_union.html │ ├── p_union_all.html │ ├── p_untrack.html │ ├── pause.html │ ├── pipe.html │ ├── pivot_longer.trackr_df.html │ ├── pivot_wider.trackr_df.html │ ├── plot.trackr_graph.html │ ├── print.trackr_graph.html │ ├── reexports.html │ ├── reframe.trackr_df.html │ ├── relocate.trackr_df.html │ ├── rename.trackr_df.html │ ├── rename_with.trackr_df.html │ ├── resume.html │ ├── right_join.trackr_df.html │ ├── save_dot.html │ ├── select.trackr_df.html │ ├── semi_join.trackr_df.html │ ├── setdiff.trackr_df.html │ ├── slice.trackr_df.html │ ├── slice_head.trackr_df.html │ ├── slice_max.trackr_df.html │ ├── slice_min.trackr_df.html │ ├── slice_sample.trackr_df.html │ ├── slice_tail.trackr_df.html │ ├── status.html │ ├── std_size.html │ ├── summarise.trackr_df.html │ ├── tagged.html │ ├── track.html │ ├── transmute.trackr_df.html │ ├── ungroup.trackr_df.html │ ├── union.trackr_df.html │ ├── union_all.trackr_df.html │ └── untrack.html ├── search.json ├── site.webmanifest ├── sitemap.xml ├── web-app-manifest-192x192.png └── web-app-manifest-512x512.png ├── inst ├── CITATION ├── WORDLIST ├── examples │ ├── add-count-tally-examples.R │ ├── anti-join-examples.R │ ├── arrange-examples.R │ ├── full-join-examples.R │ ├── inner-join-examples.R │ ├── left-join-examples.R │ ├── mutate-examples.R │ ├── nest-join-examples.R │ ├── relocate-examples.R │ ├── rename-examples.R │ ├── select-examples.R │ ├── semi-join-examples.R │ ├── set-operation-examples.R │ ├── slice-examples.R │ ├── slice-head-tail-examples.R │ ├── slice-max-min-examples.R │ ├── slice-sample-examples.R │ └── transmute-examples.R └── lib │ ├── viz.js │ └── viz_licence.md ├── man ├── add_count.trackr_df.Rd ├── add_tally.Rd ├── anti_join.trackr_df.Rd ├── arrange.trackr_df.Rd ├── bind_cols.Rd ├── bind_rows.Rd ├── capture_exclusions.Rd ├── comment.Rd ├── count_subgroup.Rd ├── distinct.trackr_df.Rd ├── dot2svg.Rd ├── dtrackr-package.Rd ├── exclude_all.Rd ├── excluded.Rd ├── figures │ ├── README-flowchart.png │ ├── dtrackr.xcf │ ├── lifecycle-deprecated.svg │ ├── lifecycle-experimental.svg │ ├── lifecycle-stable.svg │ ├── lifecycle-superseded.svg │ └── logo.png ├── filter.trackr_df.Rd ├── flowchart.Rd ├── full_join.trackr_df.Rd ├── group_by.trackr_df.Rd ├── group_modify.trackr_df.Rd ├── history.Rd ├── include_any.Rd ├── inner_join.trackr_df.Rd ├── intersect.trackr_df.Rd ├── left_join.trackr_df.Rd ├── mutate.trackr_df.Rd ├── nest_join.trackr_df.Rd ├── p_add_count.Rd ├── p_add_tally.Rd ├── p_anti_join.Rd ├── p_arrange.Rd ├── p_bind_cols.Rd ├── p_bind_rows.Rd ├── p_capture_exclusions.Rd ├── p_clear.Rd ├── p_comment.Rd ├── p_copy.Rd ├── p_count_if.Rd ├── p_count_subgroup.Rd ├── p_distinct.Rd ├── p_exclude_all.Rd ├── p_excluded.Rd ├── p_filter.Rd ├── p_flowchart.Rd ├── p_full_join.Rd ├── p_get.Rd ├── p_get_as_dot.Rd ├── p_group_by.Rd ├── p_group_modify.Rd ├── p_include_any.Rd ├── p_inner_join.Rd ├── p_intersect.Rd ├── p_left_join.Rd ├── p_mutate.Rd ├── p_nest_join.Rd ├── p_pause.Rd ├── p_pivot_longer.Rd ├── p_pivot_wider.Rd ├── p_reframe.Rd ├── p_relocate.Rd ├── p_rename.Rd ├── p_rename_with.Rd ├── p_resume.Rd ├── p_right_join.Rd ├── p_select.Rd ├── p_semi_join.Rd ├── p_set.Rd ├── p_setdiff.Rd ├── p_slice.Rd ├── p_slice_head.Rd ├── p_slice_max.Rd ├── p_slice_min.Rd ├── p_slice_sample.Rd ├── p_slice_tail.Rd ├── p_status.Rd ├── p_summarise.Rd ├── p_tagged.Rd ├── p_track.Rd ├── p_transmute.Rd ├── p_ungroup.Rd ├── p_union.Rd ├── p_union_all.Rd ├── p_untrack.Rd ├── pause.Rd ├── pipe.Rd ├── pivot_longer.trackr_df.Rd ├── pivot_wider.trackr_df.Rd ├── plot.trackr_graph.Rd ├── print.trackr_graph.Rd ├── reexports.Rd ├── reframe.trackr_df.Rd ├── relocate.trackr_df.Rd ├── rename.trackr_df.Rd ├── rename_with.trackr_df.Rd ├── resume.Rd ├── right_join.trackr_df.Rd ├── save_dot.Rd ├── select.trackr_df.Rd ├── semi_join.trackr_df.Rd ├── setdiff.trackr_df.Rd ├── slice.trackr_df.Rd ├── slice_head.trackr_df.Rd ├── slice_max.trackr_df.Rd ├── slice_min.trackr_df.Rd ├── slice_sample.trackr_df.Rd ├── slice_tail.trackr_df.Rd ├── status.Rd ├── std_size.Rd ├── summarise.trackr_df.Rd ├── tagged.Rd ├── track.Rd ├── transmute.trackr_df.Rd ├── ungroup.trackr_df.Rd ├── union.trackr_df.Rd ├── union_all.trackr_df.Rd └── untrack.Rd ├── pkgdown └── favicon │ ├── apple-touch-icon-120x120.png │ ├── apple-touch-icon-152x152.png │ ├── apple-touch-icon-180x180.png │ ├── apple-touch-icon-60x60.png │ ├── apple-touch-icon-76x76.png │ ├── apple-touch-icon.png │ ├── favicon-16x16.png │ ├── favicon-32x32.png │ ├── favicon-48x48.png │ ├── favicon.ico │ ├── favicon.svg │ ├── site.webmanifest │ ├── web-app-manifest-192x192.png │ └── web-app-manifest-512x512.png ├── tests ├── spelling.R ├── testthat.R └── testthat │ ├── test-examples.R │ ├── test-github-issues.R │ ├── test-group_by.R │ ├── test-p_comment.R │ ├── test-p_exclude.R │ ├── test-p_group_modify.R │ ├── test-p_include.R │ ├── test-p_others.R │ ├── test-p_status.R │ └── test-rsvg-scaling.R ├── trackr.Rproj └── vignettes ├── .gitignore ├── consort-example.Rmd ├── dtrackr-options.Rmd ├── dtrackr.Rmd ├── dtrackr.bib ├── joining-pipelines.Rmd └── joss ├── figure1-consort.pdf └── paper.md /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^trackr\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^README\.Rmd$ 4 | ^cran-comments\.md$ 5 | ^LICENSE\.md$ 6 | ^_pkgdown\.yml$ 7 | ^docs$ 8 | ^pkgdown$ 9 | ^data-raw$ 10 | ^\.github$ 11 | ^CRAN-SUBMISSION$ 12 | ^codecov\.yml$ 13 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/master/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: R-CMD-check 10 | 11 | jobs: 12 | R-CMD-check: 13 | 14 | runs-on: ${{ matrix.config.os }} 15 | if: "!contains(github.event.head_commit.message, 'minor')" 16 | 17 | continue-on-error: false 18 | 19 | name: ${{ matrix.config.os }} R:(${{ matrix.config.r }}) 20 | 21 | strategy: 22 | fail-fast: false 23 | matrix: 24 | config: 25 | # - {os: ubuntu-20.04, r: '4.1.0'} 26 | # - {os: macOS-11, r: '4.1.0'} 27 | # - {os: windows-2022, r: '4.1.0'} 28 | # - {os: ubuntu-20.04, r: '3.6.1'} 29 | # - {os: ubuntu-20.04, r: '4.2.0'} 30 | # - {os: ubuntu-18.04, r: 'devel', http-user-agent: 'release'} 31 | 32 | - {os: macOS-latest, r: 'release'} 33 | 34 | - {os: windows-latest, r: 'release'} 35 | # Use 3.6 to trigger usage of RTools35 36 | # - {os: windows-latest, r: '3.6'} 37 | 38 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 39 | - {os: ubuntu-latest, r: 'release'} 40 | - {os: ubuntu-latest, r: 'oldrel-1'} 41 | - {os: ubuntu-latest, r: 'oldrel-2'} 42 | # - {os: ubuntu-latest, r: 'oldrel-3'} 43 | # - {os: ubuntu-latest, r: 'oldrel-4'} 44 | 45 | env: 46 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 47 | R_KEEP_PKG_SOURCE: yes 48 | 49 | steps: 50 | - uses: actions/checkout@v3 51 | 52 | - uses: r-lib/actions/setup-r@v2 53 | with: 54 | r-version: ${{ matrix.config.r }} 55 | http-user-agent: ${{ matrix.config.http-user-agent }} 56 | use-public-rspm: true 57 | 58 | - uses: r-lib/actions/setup-pandoc@v2 59 | 60 | - uses: r-lib/actions/setup-r-dependencies@v2 61 | with: 62 | extra-packages: any::rcmdcheck 63 | # work around for bug in pak 64 | needs: check 65 | 66 | - uses: r-lib/actions/check-r-package@v2 67 | with: 68 | args: 'c("--no-manual", "--no-multiarch", "--as-cran")' 69 | upload-snapshots: true 70 | 71 | -------------------------------------------------------------------------------- /.github/workflows/draft-pdf.yml: -------------------------------------------------------------------------------- 1 | on: [push] 2 | 3 | jobs: 4 | paper: 5 | runs-on: ubuntu-latest 6 | name: Paper Draft 7 | steps: 8 | - name: Checkout 9 | uses: actions/checkout@v4 10 | - name: Build draft PDF 11 | uses: openjournals/openjournals-draft-action@master 12 | with: 13 | journal: joss 14 | # This should be the path to the paper within your repo. 15 | paper-path: vignettes/joss/paper.md 16 | - name: Upload 17 | uses: actions/upload-artifact@v4 18 | with: 19 | name: paper 20 | # This is the output path where Pandoc will write the compiled 21 | # PDF. Note, this should be the same directory as the input 22 | # paper.md 23 | path: vignettes/joss/paper.pdf 24 | -------------------------------------------------------------------------------- /.github/workflows/test-coverage.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: test-coverage 10 | 11 | jobs: 12 | test-coverage: 13 | runs-on: ubuntu-latest 14 | if: "!contains(github.event.head_commit.message, 'minor')" 15 | 16 | env: 17 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 18 | 19 | steps: 20 | - uses: actions/checkout@v3 21 | 22 | - uses: r-lib/actions/setup-r@v2 23 | with: 24 | use-public-rspm: true 25 | 26 | - uses: r-lib/actions/setup-r-dependencies@v2 27 | with: 28 | extra-packages: any::covr 29 | needs: coverage 30 | 31 | - uses: r-lib/actions/setup-pandoc@v2 32 | 33 | - name: Test coverage 34 | run: | 35 | covr::codecov( 36 | quiet = FALSE, 37 | clean = FALSE, 38 | type = "all", 39 | install_path = file.path(Sys.getenv("RUNNER_TEMP"), "package") 40 | ) 41 | shell: Rscript {0} 42 | 43 | - name: Show testthat output 44 | if: always() 45 | run: | 46 | ## -------------------------------------------------------------------- 47 | find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true 48 | shell: bash 49 | 50 | - name: Upload test results 51 | if: failure() 52 | uses: actions/upload-artifact@v3 53 | with: 54 | name: coverage-test-failures 55 | path: ${{ runner.temp }}/package 56 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | .Rapp.history 4 | 5 | # Session Data files 6 | .RData 7 | 8 | # User-specific files 9 | .Ruserdata 10 | 11 | # Example code in package build process 12 | *-Ex.R 13 | 14 | # Output files from R CMD build 15 | /*.tar.gz 16 | 17 | # Output files from R CMD check 18 | /*.Rcheck/ 19 | 20 | # RStudio files 21 | .Rproj.user/ 22 | 23 | # produced vignettes 24 | vignettes/*.html 25 | vignettes/*.pdf 26 | 27 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 28 | .httr-oauth 29 | 30 | # knitr and R markdown default cache directories 31 | *_cache/ 32 | /cache/ 33 | 34 | # Temporary files created by R markdown 35 | *.utf8.md 36 | *.knit.md 37 | 38 | # R Environment Variables 39 | .Renviron 40 | .Rproj.user 41 | inst/doc 42 | -------------------------------------------------------------------------------- /CRAN-SUBMISSION: -------------------------------------------------------------------------------- 1 | Version: 0.4.4 2 | Date: 2023-09-04 11:52:14 UTC 3 | SHA: 84e3f953625e8b94f665bd82b7d680188ccd0e7b 4 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: dtrackr 2 | Title: Track your Data Pipelines 3 | Version: 0.4.6 4 | Authors@R: 5 | person(given = "Robert", 6 | family = "Challen", 7 | role = c("aut", "cre"), 8 | email = "rob.challen@bristol.ac.uk", 9 | comment = c(ORCID = "0000-0002-5504-7768")) 10 | Description: Track and 11 | document 'dplyr' data pipelines. As you filter, mutate, and join your 12 | way through a data set, 'dtrackr' seamlessly keeps track of your data 13 | flow and makes publication ready documentation of a data pipeline simple. 14 | License: MIT + file LICENSE 15 | Language: en-GB 16 | Imports: 17 | dplyr (>= 1.1.0), 18 | glue, 19 | htmltools, 20 | magrittr, 21 | rlang, 22 | rsvg, 23 | stringr, 24 | tibble, 25 | tidyr, 26 | utils, 27 | V8, 28 | fs, 29 | purrr, 30 | base64enc, 31 | pdftools, 32 | png, 33 | lifecycle 34 | Suggests: 35 | spelling, 36 | here, 37 | knitr, 38 | rmarkdown, 39 | tidyselect, 40 | devtools, 41 | testthat (>= 2.1.0), 42 | rstudioapi, 43 | survival, 44 | ggplot2, 45 | covr 46 | VignetteBuilder: 47 | knitr 48 | Encoding: UTF-8 49 | LazyData: true 50 | Roxygen: list(markdown = TRUE) 51 | RoxygenNote: 7.3.2.9003 52 | Depends: 53 | R (>= 2.10) 54 | URL: https://terminological.github.io/dtrackr/index.html, 55 | https://github.com/terminological/dtrackr 56 | BugReports: https://github.com/terminological/dtrackr/issues 57 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2021 2 | COPYRIGHT HOLDER: Robert Challen 3 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2022 Robert Challen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /R/dtrackr-package.R: -------------------------------------------------------------------------------- 1 | #' @keywords internal 2 | "_PACKAGE" 3 | 4 | ## usethis namespace: start 5 | #' @importFrom lifecycle deprecated 6 | ## usethis namespace: end 7 | NULL 8 | -------------------------------------------------------------------------------- /R/experimental/group-counting.R: -------------------------------------------------------------------------------- 1 | #TODO: 2 | # Configure a countable items named list of expressions 3 | # or maybe better a formula like the exclusion spec 4 | # Use a n_distinct call to generate a set of additional .count type columns 5 | # maybe this is able to use status. 6 | 7 | # iris %>% dplyr::group_by(Species) %>% summarise(n = n_distinct(Petal.Length+Sepal.Length)) 8 | # iris %>% dplyr::group_by(Species) %>% dplyr::summarise(n = dplyr::n_distinct(Petal.Length+Sepal.Length)) 9 | # iris %>% dplyr::group_by(Species) %>% dplyr::summarise(n = dplyr::n_distinct(Petal.Length+Sepal.Length, Sepal.Width+Petal.Width)) 10 | # x = function(...) {dots = rlang::enexprs(...); browser()} 11 | # x(a = c(Petal.Length+Sepal.Length, Sepal.Wdith), b= c(asdasda) ) 12 | # class(dots$a) 13 | # as.expression(dots$a) 14 | # as.expression(unlist(dots$a)) 15 | # x(a = c(Petal.Length+Sepal.Length, Sepal.Wdith), b= c(asdasda) ) 16 | # x(a = c(Petal.Length+Sepal.Length, Sepal.Width), b= c(asdasda) ) 17 | # iris %>% dplyr::summarise(n = !!dots$a) 18 | # iris %>% dplyr::summarise(n = dplyr::n_distinct(!!dots$a)) 19 | # iris %>% dplyr::summarise(n = dplyr::n_distinct(!!dots$b)) 20 | # iris %>% dplyr::summarise(n = dplyr::n_distinct(Petal.Length + Sepal.Length, Sepal.Width)) 21 | # iris %>% dplyr::summarise(n = dplyr::n_distinct(c(Petal.Length + Sepal.Length, Sepal.Width))) 22 | -------------------------------------------------------------------------------- /R/utils-pipe.R: -------------------------------------------------------------------------------- 1 | #' Pipe operator 2 | #' 3 | #' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. 4 | #' 5 | #' @name %>% 6 | #' @rdname pipe 7 | #' @keywords internal 8 | #' @export 9 | #' @importFrom magrittr %>% 10 | #' @usage lhs \%>\% rhs 11 | #' @param lhs A value or the magrittr placeholder. 12 | #' @param rhs A function call using the magrittr semantics. 13 | #' @return The result of calling `rhs(lhs)`. 14 | NULL 15 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | 3 | coverage: 4 | status: 5 | project: 6 | default: 7 | target: auto 8 | threshold: 1% 9 | informational: true 10 | patch: 11 | default: 12 | target: auto 13 | threshold: 1% 14 | informational: true 15 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | ## Test environments 2 | 3 | Github actions environments 4 | 5 | * os: macOS-latest, r: 'release' 6 | * os: windows-latest, r: 'release' 7 | * os: ubuntu-latest, r: 'devel' 8 | * os: ubuntu-latest, r: 'release' 9 | * os: ubuntu-latest, r: 'oldrel-1' 10 | * os: ubuntu-latest, r: 'oldrel-2' 11 | 12 | ## R CMD check results 13 | There were no ERRORs or WARNINGs or NOTEs. 14 | (see https://github.com/terminological/dtrackr/actions/ for details) 15 | 0 errors ✔ | 0 warnings ✔ | 0 notes ✔ 16 | 17 | ## Downstream dependencies 18 | There are currently no downstream dependencies for this package in CRAN. 19 | 20 | ## Other info 21 | 22 | This version addresses some CRAN check NOTES that have appeared due to empty 23 | sections in `.Rd` files, and some un-escaped braces. The empty sections were due 24 | to documentation generated by `roxygen2` and have been addressed by using a 25 | patched version of `roxygen2` with transitive documentation inheritance. 26 | Regenerating Rd files with vanilla `roxygen2` will cause reversion of the error. 27 | The patch has been submitted as a PR to `roxygen2`. 28 | 29 | There is no new functionality. 30 | 31 | detailed changes described in NEWS.md 32 | 33 | Many thanks. 34 | -------------------------------------------------------------------------------- /docs/apple-touch-icon-120x120.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/apple-touch-icon-120x120.png -------------------------------------------------------------------------------- /docs/apple-touch-icon-152x152.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/apple-touch-icon-152x152.png -------------------------------------------------------------------------------- /docs/apple-touch-icon-180x180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/apple-touch-icon-180x180.png -------------------------------------------------------------------------------- /docs/apple-touch-icon-60x60.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/apple-touch-icon-60x60.png -------------------------------------------------------------------------------- /docs/apple-touch-icon-76x76.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/apple-touch-icon-76x76.png -------------------------------------------------------------------------------- /docs/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/apple-touch-icon.png -------------------------------------------------------------------------------- /docs/articles/consort-example_files/accessible-code-block-0.0.1/empty-anchor.js: -------------------------------------------------------------------------------- 1 | // Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> 2 | // v0.0.1 3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. 4 | 5 | document.addEventListener('DOMContentLoaded', function() { 6 | const codeList = document.getElementsByClassName("sourceCode"); 7 | for (var i = 0; i < codeList.length; i++) { 8 | var linkList = codeList[i].getElementsByTagName('a'); 9 | for (var j = 0; j < linkList.length; j++) { 10 | if (linkList[j].innerHTML === "") { 11 | linkList[j].setAttribute('aria-hidden', 'true'); 12 | } 13 | } 14 | } 15 | }); 16 | -------------------------------------------------------------------------------- /docs/articles/dtrackr-options_files/accessible-code-block-0.0.1/empty-anchor.js: -------------------------------------------------------------------------------- 1 | // Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> 2 | // v0.0.1 3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. 4 | 5 | document.addEventListener('DOMContentLoaded', function() { 6 | const codeList = document.getElementsByClassName("sourceCode"); 7 | for (var i = 0; i < codeList.length; i++) { 8 | var linkList = codeList[i].getElementsByTagName('a'); 9 | for (var j = 0; j < linkList.length; j++) { 10 | if (linkList[j].innerHTML === "") { 11 | linkList[j].setAttribute('aria-hidden', 'true'); 12 | } 13 | } 14 | } 15 | }); 16 | -------------------------------------------------------------------------------- /docs/articles/dtrackr_files/accessible-code-block-0.0.1/empty-anchor.js: -------------------------------------------------------------------------------- 1 | // Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> 2 | // v0.0.1 3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. 4 | 5 | document.addEventListener('DOMContentLoaded', function() { 6 | const codeList = document.getElementsByClassName("sourceCode"); 7 | for (var i = 0; i < codeList.length; i++) { 8 | var linkList = codeList[i].getElementsByTagName('a'); 9 | for (var j = 0; j < linkList.length; j++) { 10 | if (linkList[j].innerHTML === "") { 11 | linkList[j].setAttribute('aria-hidden', 'true'); 12 | } 13 | } 14 | } 15 | }); 16 | -------------------------------------------------------------------------------- /docs/articles/joining-pipelines_files/accessible-code-block-0.0.1/empty-anchor.js: -------------------------------------------------------------------------------- 1 | // Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> 2 | // v0.0.1 3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. 4 | 5 | document.addEventListener('DOMContentLoaded', function() { 6 | const codeList = document.getElementsByClassName("sourceCode"); 7 | for (var i = 0; i < codeList.length; i++) { 8 | var linkList = codeList[i].getElementsByTagName('a'); 9 | for (var j = 0; j < linkList.length; j++) { 10 | if (linkList[j].innerHTML === "") { 11 | linkList[j].setAttribute('aria-hidden', 'true'); 12 | } 13 | } 14 | } 15 | }); 16 | -------------------------------------------------------------------------------- /docs/articles/joining-pipelines_files/anchor-sections-1.0/anchor-sections.css: -------------------------------------------------------------------------------- 1 | /* Styles for section anchors */ 2 | a.anchor-section {margin-left: 10px; visibility: hidden; color: inherit;} 3 | a.anchor-section::before {content: '#';} 4 | .hasAnchor:hover a.anchor-section {visibility: visible;} 5 | -------------------------------------------------------------------------------- /docs/articles/joining-pipelines_files/anchor-sections-1.0/anchor-sections.js: -------------------------------------------------------------------------------- 1 | // Anchor sections v1.0 written by Atsushi Yasumoto on Oct 3rd, 2020. 2 | document.addEventListener('DOMContentLoaded', function() { 3 | // Do nothing if AnchorJS is used 4 | if (typeof window.anchors === 'object' && anchors.hasOwnProperty('hasAnchorJSLink')) { 5 | return; 6 | } 7 | 8 | const h = document.querySelectorAll('h1, h2, h3, h4, h5, h6'); 9 | 10 | // Do nothing if sections are already anchored 11 | if (Array.from(h).some(x => x.classList.contains('hasAnchor'))) { 12 | return null; 13 | } 14 | 15 | // Use section id when pandoc runs with --section-divs 16 | const section_id = function(x) { 17 | return ((x.classList.contains('section') || (x.tagName === 'SECTION')) 18 | ? x.id : ''); 19 | }; 20 | 21 | // Add anchors 22 | h.forEach(function(x) { 23 | const id = x.id || section_id(x.parentElement); 24 | if (id === '') { 25 | return null; 26 | } 27 | let anchor = document.createElement('a'); 28 | anchor.href = '#' + id; 29 | anchor.classList = ['anchor-section']; 30 | x.classList.add('hasAnchor'); 31 | x.appendChild(anchor); 32 | }); 33 | }); 34 | -------------------------------------------------------------------------------- /docs/articles/tracking-provenance_files/accessible-code-block-0.0.1/empty-anchor.js: -------------------------------------------------------------------------------- 1 | // Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> 2 | // v0.0.1 3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. 4 | 5 | document.addEventListener('DOMContentLoaded', function() { 6 | const codeList = document.getElementsByClassName("sourceCode"); 7 | for (var i = 0; i < codeList.length; i++) { 8 | var linkList = codeList[i].getElementsByTagName('a'); 9 | for (var j = 0; j < linkList.length; j++) { 10 | if (linkList[j].innerHTML === "") { 11 | linkList[j].setAttribute('aria-hidden', 'true'); 12 | } 13 | } 14 | } 15 | }); 16 | -------------------------------------------------------------------------------- /docs/articles/tracking-provenance_files/anchor-sections-1.0/anchor-sections.css: -------------------------------------------------------------------------------- 1 | /* Styles for section anchors */ 2 | a.anchor-section {margin-left: 10px; visibility: hidden; color: inherit;} 3 | a.anchor-section::before {content: '#';} 4 | .hasAnchor:hover a.anchor-section {visibility: visible;} 5 | -------------------------------------------------------------------------------- /docs/articles/tracking-provenance_files/anchor-sections-1.0/anchor-sections.js: -------------------------------------------------------------------------------- 1 | // Anchor sections v1.0 written by Atsushi Yasumoto on Oct 3rd, 2020. 2 | document.addEventListener('DOMContentLoaded', function() { 3 | // Do nothing if AnchorJS is used 4 | if (typeof window.anchors === 'object' && anchors.hasOwnProperty('hasAnchorJSLink')) { 5 | return; 6 | } 7 | 8 | const h = document.querySelectorAll('h1, h2, h3, h4, h5, h6'); 9 | 10 | // Do nothing if sections are already anchored 11 | if (Array.from(h).some(x => x.classList.contains('hasAnchor'))) { 12 | return null; 13 | } 14 | 15 | // Use section id when pandoc runs with --section-divs 16 | const section_id = function(x) { 17 | return ((x.classList.contains('section') || (x.tagName === 'SECTION')) 18 | ? x.id : ''); 19 | }; 20 | 21 | // Add anchors 22 | h.forEach(function(x) { 23 | const id = x.id || section_id(x.parentElement); 24 | if (id === '') { 25 | return null; 26 | } 27 | let anchor = document.createElement('a'); 28 | anchor.href = '#' + id; 29 | anchor.classList = ['anchor-section']; 30 | x.classList.add('hasAnchor'); 31 | x.appendChild(anchor); 32 | }); 33 | }); 34 | -------------------------------------------------------------------------------- /docs/bootstrap-toc.css: -------------------------------------------------------------------------------- 1 | /*! 2 | * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) 3 | * Copyright 2015 Aidan Feldman 4 | * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ 5 | 6 | /* modified from https://github.com/twbs/bootstrap/blob/94b4076dd2efba9af71f0b18d4ee4b163aa9e0dd/docs/assets/css/src/docs.css#L548-L601 */ 7 | 8 | /* All levels of nav */ 9 | nav[data-toggle='toc'] .nav > li > a { 10 | display: block; 11 | padding: 4px 20px; 12 | font-size: 13px; 13 | font-weight: 500; 14 | color: #767676; 15 | } 16 | nav[data-toggle='toc'] .nav > li > a:hover, 17 | nav[data-toggle='toc'] .nav > li > a:focus { 18 | padding-left: 19px; 19 | color: #563d7c; 20 | text-decoration: none; 21 | background-color: transparent; 22 | border-left: 1px solid #563d7c; 23 | } 24 | nav[data-toggle='toc'] .nav > .active > a, 25 | nav[data-toggle='toc'] .nav > .active:hover > a, 26 | nav[data-toggle='toc'] .nav > .active:focus > a { 27 | padding-left: 18px; 28 | font-weight: bold; 29 | color: #563d7c; 30 | background-color: transparent; 31 | border-left: 2px solid #563d7c; 32 | } 33 | 34 | /* Nav: second level (shown on .active) */ 35 | nav[data-toggle='toc'] .nav .nav { 36 | display: none; /* Hide by default, but at >768px, show it */ 37 | padding-bottom: 10px; 38 | } 39 | nav[data-toggle='toc'] .nav .nav > li > a { 40 | padding-top: 1px; 41 | padding-bottom: 1px; 42 | padding-left: 30px; 43 | font-size: 12px; 44 | font-weight: normal; 45 | } 46 | nav[data-toggle='toc'] .nav .nav > li > a:hover, 47 | nav[data-toggle='toc'] .nav .nav > li > a:focus { 48 | padding-left: 29px; 49 | } 50 | nav[data-toggle='toc'] .nav .nav > .active > a, 51 | nav[data-toggle='toc'] .nav .nav > .active:hover > a, 52 | nav[data-toggle='toc'] .nav .nav > .active:focus > a { 53 | padding-left: 28px; 54 | font-weight: 500; 55 | } 56 | 57 | /* from https://github.com/twbs/bootstrap/blob/e38f066d8c203c3e032da0ff23cd2d6098ee2dd6/docs/assets/css/src/docs.css#L631-L634 */ 58 | nav[data-toggle='toc'] .nav > .active > ul { 59 | display: block; 60 | } 61 | -------------------------------------------------------------------------------- /docs/deps/bootstrap-toc-1.0.1/bootstrap-toc.min.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * Bootstrap Table of Contents v1.0.1 (http://afeld.github.io/bootstrap-toc/) 3 | * Copyright 2015 Aidan Feldman 4 | * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ 5 | !function(a){"use strict";window.Toc={helpers:{findOrFilter:function(e,t){var n=e.find(t);return e.filter(t).add(n).filter(":not([data-toc-skip])")},generateUniqueIdBase:function(e){return a(e).text().trim().replace(/\'/gi,"").replace(/[& +$,:;=?@"#{}|^~[`%!'<>\]\.\/\(\)\*\\\n\t\b\v]/g,"-").replace(/-{2,}/g,"-").substring(0,64).replace(/^-+|-+$/gm,"").toLowerCase()||e.tagName.toLowerCase()},generateUniqueId:function(e){for(var t=this.generateUniqueIdBase(e),n=0;;n++){var r=t;if(0')},createChildNavList:function(e){var t=this.createNavList();return e.append(t),t},generateNavEl:function(e,t){var n=a('');n.attr("href","#"+e),n.text(t);var r=a("
  • ");return r.append(n),r},generateNavItem:function(e){var t=this.generateAnchor(e),n=a(e),r=n.data("toc-text")||n.text();return this.generateNavEl(t,r)},getTopLevel:function(e){for(var t=1;t<=6;t++){if(1 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /docs/deps/font-awesome-6.4.2/webfonts/fa-brands-400.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/deps/font-awesome-6.4.2/webfonts/fa-brands-400.ttf -------------------------------------------------------------------------------- /docs/deps/font-awesome-6.4.2/webfonts/fa-brands-400.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/deps/font-awesome-6.4.2/webfonts/fa-brands-400.woff2 -------------------------------------------------------------------------------- /docs/deps/font-awesome-6.4.2/webfonts/fa-regular-400.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/deps/font-awesome-6.4.2/webfonts/fa-regular-400.ttf -------------------------------------------------------------------------------- /docs/deps/font-awesome-6.4.2/webfonts/fa-regular-400.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/deps/font-awesome-6.4.2/webfonts/fa-regular-400.woff2 -------------------------------------------------------------------------------- /docs/deps/font-awesome-6.4.2/webfonts/fa-solid-900.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/deps/font-awesome-6.4.2/webfonts/fa-solid-900.ttf -------------------------------------------------------------------------------- /docs/deps/font-awesome-6.4.2/webfonts/fa-solid-900.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/deps/font-awesome-6.4.2/webfonts/fa-solid-900.woff2 -------------------------------------------------------------------------------- /docs/deps/font-awesome-6.4.2/webfonts/fa-v4compatibility.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/deps/font-awesome-6.4.2/webfonts/fa-v4compatibility.ttf -------------------------------------------------------------------------------- /docs/deps/font-awesome-6.4.2/webfonts/fa-v4compatibility.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/deps/font-awesome-6.4.2/webfonts/fa-v4compatibility.woff2 -------------------------------------------------------------------------------- /docs/deps/headroom-0.11.0/jQuery.headroom.min.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * headroom.js v0.9.4 - Give your page some headroom. Hide your header until you need it 3 | * Copyright (c) 2017 Nick Williams - http://wicky.nillia.ms/headroom.js 4 | * License: MIT 5 | */ 6 | 7 | !function(a){a&&(a.fn.headroom=function(b){return this.each(function(){var c=a(this),d=c.data("headroom"),e="object"==typeof b&&b;e=a.extend(!0,{},Headroom.options,e),d||(d=new Headroom(this,e),d.init(),c.data("headroom",d)),"string"==typeof b&&(d[b](),"destroy"===b&&c.removeData("headroom"))})},a("[data-headroom]").each(function(){var b=a(this);b.headroom(b.data())}))}(window.Zepto||window.jQuery); -------------------------------------------------------------------------------- /docs/docsearch.js: -------------------------------------------------------------------------------- 1 | $(function() { 2 | 3 | // register a handler to move the focus to the search bar 4 | // upon pressing shift + "/" (i.e. "?") 5 | $(document).on('keydown', function(e) { 6 | if (e.shiftKey && e.keyCode == 191) { 7 | e.preventDefault(); 8 | $("#search-input").focus(); 9 | } 10 | }); 11 | 12 | $(document).ready(function() { 13 | // do keyword highlighting 14 | /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */ 15 | var mark = function() { 16 | 17 | var referrer = document.URL ; 18 | var paramKey = "q" ; 19 | 20 | if (referrer.indexOf("?") !== -1) { 21 | var qs = referrer.substr(referrer.indexOf('?') + 1); 22 | var qs_noanchor = qs.split('#')[0]; 23 | var qsa = qs_noanchor.split('&'); 24 | var keyword = ""; 25 | 26 | for (var i = 0; i < qsa.length; i++) { 27 | var currentParam = qsa[i].split('='); 28 | 29 | if (currentParam.length !== 2) { 30 | continue; 31 | } 32 | 33 | if (currentParam[0] == paramKey) { 34 | keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20")); 35 | } 36 | } 37 | 38 | if (keyword !== "") { 39 | $(".contents").unmark({ 40 | done: function() { 41 | $(".contents").mark(keyword); 42 | } 43 | }); 44 | } 45 | } 46 | }; 47 | 48 | mark(); 49 | }); 50 | }); 51 | 52 | /* Search term highlighting ------------------------------*/ 53 | 54 | function matchedWords(hit) { 55 | var words = []; 56 | 57 | var hierarchy = hit._highlightResult.hierarchy; 58 | // loop to fetch from lvl0, lvl1, etc. 59 | for (var idx in hierarchy) { 60 | words = words.concat(hierarchy[idx].matchedWords); 61 | } 62 | 63 | var content = hit._highlightResult.content; 64 | if (content) { 65 | words = words.concat(content.matchedWords); 66 | } 67 | 68 | // return unique words 69 | var words_uniq = [...new Set(words)]; 70 | return words_uniq; 71 | } 72 | 73 | function updateHitURL(hit) { 74 | 75 | var words = matchedWords(hit); 76 | var url = ""; 77 | 78 | if (hit.anchor) { 79 | url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor; 80 | } else { 81 | url = hit.url + '?q=' + escape(words.join(" ")); 82 | } 83 | 84 | return url; 85 | } 86 | -------------------------------------------------------------------------------- /docs/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/favicon-16x16.png -------------------------------------------------------------------------------- /docs/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/favicon-32x32.png -------------------------------------------------------------------------------- /docs/favicon-48x48.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/favicon-48x48.png -------------------------------------------------------------------------------- /docs/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/favicon.ico -------------------------------------------------------------------------------- /docs/katex-auto.js: -------------------------------------------------------------------------------- 1 | // https://github.com/jgm/pandoc/blob/29fa97ab96b8e2d62d48326e1b949a71dc41f47a/src/Text/Pandoc/Writers/HTML.hs#L332-L345 2 | document.addEventListener("DOMContentLoaded", function () { 3 | var mathElements = document.getElementsByClassName("math"); 4 | var macros = []; 5 | for (var i = 0; i < mathElements.length; i++) { 6 | var texText = mathElements[i].firstChild; 7 | if (mathElements[i].tagName == "SPAN") { 8 | katex.render(texText.data, mathElements[i], { 9 | displayMode: mathElements[i].classList.contains("display"), 10 | throwOnError: false, 11 | macros: macros, 12 | fleqn: false 13 | }); 14 | }}}); 15 | -------------------------------------------------------------------------------- /docs/lightswitch.js: -------------------------------------------------------------------------------- 1 | 2 | /*! 3 | * Color mode toggler for Bootstrap's docs (https://getbootstrap.com/) 4 | * Copyright 2011-2023 The Bootstrap Authors 5 | * Licensed under the Creative Commons Attribution 3.0 Unported License. 6 | * Updates for {pkgdown} by the {bslib} authors, also licensed under CC-BY-3.0. 7 | */ 8 | 9 | const getStoredTheme = () => localStorage.getItem('theme') 10 | const setStoredTheme = theme => localStorage.setItem('theme', theme) 11 | 12 | const getPreferredTheme = () => { 13 | const storedTheme = getStoredTheme() 14 | if (storedTheme) { 15 | return storedTheme 16 | } 17 | 18 | return window.matchMedia('(prefers-color-scheme: dark)').matches ? 'dark' : 'light' 19 | } 20 | 21 | const setTheme = theme => { 22 | if (theme === 'auto') { 23 | document.documentElement.setAttribute('data-bs-theme', (window.matchMedia('(prefers-color-scheme: dark)').matches ? 'dark' : 'light')) 24 | } else { 25 | document.documentElement.setAttribute('data-bs-theme', theme) 26 | } 27 | } 28 | 29 | function bsSetupThemeToggle () { 30 | 'use strict' 31 | 32 | const showActiveTheme = (theme, focus = false) => { 33 | var activeLabel, activeIcon; 34 | 35 | document.querySelectorAll('[data-bs-theme-value]').forEach(element => { 36 | const buttonTheme = element.getAttribute('data-bs-theme-value') 37 | const isActive = buttonTheme == theme 38 | 39 | element.classList.toggle('active', isActive) 40 | element.setAttribute('aria-pressed', isActive) 41 | 42 | if (isActive) { 43 | activeLabel = element.textContent; 44 | activeIcon = element.querySelector('span').classList.value; 45 | } 46 | }) 47 | 48 | const themeSwitcher = document.querySelector('#dropdown-lightswitch') 49 | if (!themeSwitcher) { 50 | return 51 | } 52 | 53 | themeSwitcher.setAttribute('aria-label', activeLabel) 54 | themeSwitcher.querySelector('span').classList.value = activeIcon; 55 | 56 | if (focus) { 57 | themeSwitcher.focus() 58 | } 59 | } 60 | 61 | window.matchMedia('(prefers-color-scheme: dark)').addEventListener('change', () => { 62 | const storedTheme = getStoredTheme() 63 | if (storedTheme !== 'light' && storedTheme !== 'dark') { 64 | setTheme(getPreferredTheme()) 65 | } 66 | }) 67 | 68 | window.addEventListener('DOMContentLoaded', () => { 69 | showActiveTheme(getPreferredTheme()) 70 | 71 | document 72 | .querySelectorAll('[data-bs-theme-value]') 73 | .forEach(toggle => { 74 | toggle.addEventListener('click', () => { 75 | const theme = toggle.getAttribute('data-bs-theme-value') 76 | setTheme(theme) 77 | setStoredTheme(theme) 78 | showActiveTheme(theme, true) 79 | }) 80 | }) 81 | }) 82 | } 83 | 84 | setTheme(getPreferredTheme()); 85 | bsSetupThemeToggle(); 86 | -------------------------------------------------------------------------------- /docs/link.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 8 | 12 | 13 | -------------------------------------------------------------------------------- /docs/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/logo.png -------------------------------------------------------------------------------- /docs/pkgdown.yml: -------------------------------------------------------------------------------- 1 | pandoc: 3.1.13 2 | pkgdown: 2.1.1 3 | pkgdown_sha: ~ 4 | articles: 5 | consort-example: consort-example.html 6 | dtrackr-options: dtrackr-options.html 7 | dtrackr: dtrackr.html 8 | joining-pipelines: joining-pipelines.html 9 | last_built: 2024-10-19T15:48Z 10 | urls: 11 | reference: https://terminological.github.io/dtrackr/index.html/reference 12 | article: https://terminological.github.io/dtrackr/index.html/articles 13 | -------------------------------------------------------------------------------- /docs/reference/Rplot001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/reference/Rplot001.png -------------------------------------------------------------------------------- /docs/reference/dtrackr.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /docs/reference/figures/README-flowchart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/reference/figures/README-flowchart.png -------------------------------------------------------------------------------- /docs/reference/figures/demo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/reference/figures/demo.pdf -------------------------------------------------------------------------------- /docs/reference/figures/demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/reference/figures/demo.png -------------------------------------------------------------------------------- /docs/reference/figures/dtrackr.xcf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/reference/figures/dtrackr.xcf -------------------------------------------------------------------------------- /docs/reference/figures/lifecycle-deprecated.svg: -------------------------------------------------------------------------------- 1 | 2 | lifecycle: deprecated 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | lifecycle 18 | 19 | deprecated 20 | 21 | 22 | -------------------------------------------------------------------------------- /docs/reference/figures/lifecycle-experimental.svg: -------------------------------------------------------------------------------- 1 | 2 | lifecycle: experimental 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | lifecycle 18 | 19 | experimental 20 | 21 | 22 | -------------------------------------------------------------------------------- /docs/reference/figures/lifecycle-stable.svg: -------------------------------------------------------------------------------- 1 | 2 | lifecycle: stable 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 19 | 20 | lifecycle 21 | 22 | 25 | 26 | stable 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /docs/reference/figures/lifecycle-superseded.svg: -------------------------------------------------------------------------------- 1 | 2 | lifecycle: superseded 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | lifecycle 18 | 19 | superseded 20 | 21 | 22 | -------------------------------------------------------------------------------- /docs/reference/figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/reference/figures/logo.png -------------------------------------------------------------------------------- /docs/reference/filter.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /docs/site.webmanifest: -------------------------------------------------------------------------------- 1 | { 2 | "name": "", 3 | "short_name": "", 4 | "icons": [ 5 | { 6 | "src": "/web-app-manifest-192x192.png", 7 | "sizes": "192x192", 8 | "type": "image/png", 9 | "purpose": "maskable" 10 | }, 11 | { 12 | "src": "/web-app-manifest-512x512.png", 13 | "sizes": "512x512", 14 | "type": "image/png", 15 | "purpose": "maskable" 16 | } 17 | ], 18 | "theme_color": "#ffffff", 19 | "background_color": "#ffffff", 20 | "display": "standalone" 21 | } -------------------------------------------------------------------------------- /docs/web-app-manifest-192x192.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/web-app-manifest-192x192.png -------------------------------------------------------------------------------- /docs/web-app-manifest-512x512.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/web-app-manifest-512x512.png -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | c( 2 | bibentry(bibtype = "Article", 3 | title = "dtrackr: An R package for tracking the provenance of data", 4 | year = 2022, 5 | note = "R package version 0.2.5", 6 | url = "https://joss.theoj.org/papers/10.21105/joss.04707", 7 | doi = "10.21105/joss.04707", 8 | journal = "Journal of Open Source Software", 9 | author = c( 10 | person(given = "Robert",family = "Challen",email = "rob.challen@bristol.ac.uk",comment = structure("0000-0002-5504-7768", .Names = "ORCID"),role = c("aut", "cre")) ) 11 | ) 12 | ) 13 | -------------------------------------------------------------------------------- /inst/WORDLIST: -------------------------------------------------------------------------------- 1 | CMD 2 | CoV 3 | EPSRC 4 | MRC 5 | REPL 6 | RMarkdown 7 | RSVG 8 | SARS 9 | TJ 10 | Un 11 | Zenodo 12 | autoref 13 | codecov 14 | dbplyr 15 | dplyr 16 | dtplyr 17 | etc 18 | favor 19 | github 20 | magrittr 21 | md 22 | metacran 23 | ptype 24 | rlang 25 | rowwise 26 | setdiff 27 | src 28 | srcs 29 | tibble 30 | tidyr 31 | tidyselect 32 | un 33 | -------------------------------------------------------------------------------- /inst/examples/add-count-tally-examples.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(dtrackr) 3 | 4 | # mutate and other functions are unitary operations that generally change 5 | # the structure but not size of a dataframe. In dtrackr these are by ignored 6 | # by default but we can change that so that their behaviour is obvious. 7 | 8 | # add_count 9 | # adding in a count or tally column as a new column 10 | iris %>% 11 | track() %>% 12 | add_count(Species, name="new_count_total", 13 | .messages="{.new_cols}", 14 | # .messages="{.cols}", 15 | .headline="New columns from add_count:") %>% 16 | history() 17 | 18 | # add_tally 19 | iris %>% 20 | track() %>% 21 | group_by(Species) %>% 22 | dtrackr::add_tally(wt=Petal.Length, name="new_tally_total", 23 | .messages="{.new_cols}", 24 | .headline="New columns from add_tally:") %>% 25 | history() 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /inst/examples/anti-join-examples.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(dtrackr) 3 | # Joins across data sets 4 | 5 | # example data uses the dplyr starways data 6 | people = starwars %>% select(-films, -vehicles, -starships) 7 | films = starwars %>% select(name,films) %>% tidyr::unnest(cols = c(films)) 8 | 9 | lhs = people %>% track() %>% comment("People df {.total}") 10 | rhs = films %>% track() %>% comment("Films df {.total}") %>% 11 | comment("a test comment") 12 | 13 | # Anti join 14 | join = lhs %>% anti_join(rhs, by="name") %>% comment("joined {.total}") 15 | # See what the history of the graph is: 16 | join %>% history() %>% print() 17 | nrow(join) 18 | # Display the tracked graph (not run in examples) 19 | # join %>% flowchart() 20 | -------------------------------------------------------------------------------- /inst/examples/arrange-examples.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(dtrackr) 3 | 4 | # mutate and other functions are unitary operations that generally change 5 | # the structure but not size of a dataframe. In dtrackr these are by ignored 6 | # by default but we can change that so that their behaviour is obvious. 7 | 8 | # arrange 9 | # In this case we sort the data descending and show the first value 10 | # is the same as the maximum value. 11 | iris %>% 12 | track() %>% 13 | arrange( 14 | desc(Petal.Width), 15 | .messages="{.count} items, columns: {.cols}", 16 | .headline="Reordered dataframe:") %>% 17 | history() 18 | -------------------------------------------------------------------------------- /inst/examples/full-join-examples.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(dtrackr) 3 | # Joins across data sets 4 | 5 | # example data uses the dplyr starways data 6 | people = starwars %>% select(-films, -vehicles, -starships) 7 | films = starwars %>% select(name,films) %>% tidyr::unnest(cols = c(films)) 8 | 9 | lhs = people %>% track() %>% comment("People df {.total}") 10 | rhs = films %>% track() %>% comment("Films df {.total}") %>% 11 | comment("a test comment") 12 | 13 | # Full join 14 | join = lhs %>% full_join(rhs, by="name", multiple = "all") %>% comment("joined {.total}") 15 | # See what the history of the graph is: 16 | join %>% history() 17 | nrow(join) 18 | # Display the tracked graph (not run in examples) 19 | # join %>% flowchart() 20 | 21 | -------------------------------------------------------------------------------- /inst/examples/inner-join-examples.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(dtrackr) 3 | # Joins across data sets 4 | 5 | # example data uses the dplyr starways data 6 | people = starwars %>% select(-films, -vehicles, -starships) 7 | films = starwars %>% select(name,films) %>% tidyr::unnest(cols = c(films)) 8 | 9 | lhs = people %>% track() %>% comment("People df {.total}") 10 | rhs = films %>% track() %>% comment("Films df {.total}") %>% 11 | comment("a test comment") 12 | 13 | # Inner join 14 | join = lhs %>% inner_join(rhs, by="name", multiple = "all") %>% comment("joined {.total}") 15 | # See what the history of the graph is: 16 | join %>% history() %>% print() 17 | nrow(join) 18 | # Display the tracked graph (not run in examples) 19 | # join %>% flowchart() 20 | -------------------------------------------------------------------------------- /inst/examples/left-join-examples.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(dtrackr) 3 | # Joins across data sets 4 | 5 | # example data uses the dplyr starways data 6 | people = starwars %>% select(-films, -vehicles, -starships) 7 | films = starwars %>% select(name,films) %>% tidyr::unnest(cols = c(films)) 8 | 9 | lhs = people %>% track() %>% comment("People df {.total}") 10 | rhs = films %>% track() %>% comment("Films df {.total}") %>% 11 | comment("a test comment") 12 | 13 | # Left join 14 | join = lhs %>% left_join(rhs, by="name", multiple = "all") %>% comment("joined {.total}") 15 | # See what the history of the graph is: 16 | join %>% history() 17 | nrow(join) 18 | # Display the tracked graph (not run in examples) 19 | # join %>% flowchart() 20 | 21 | -------------------------------------------------------------------------------- /inst/examples/mutate-examples.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(dtrackr) 3 | 4 | # mutate and other functions are unitary operations that generally change 5 | # the structure but not size of a dataframe. In dtrackr these are by ignored 6 | # by default but we can change that so that their behaviour is obvious. 7 | 8 | # mutate 9 | # In this example we compare the column names of the input and the 10 | # output to identify the new columns created by the mutate operation as 11 | # the `.new_cols` variable 12 | iris %>% 13 | track() %>% 14 | mutate(extra_col = NA_real_, 15 | .messages="{.new_cols}", 16 | .headline="Extra columns from mutate:") %>% 17 | history() 18 | 19 | -------------------------------------------------------------------------------- /inst/examples/nest-join-examples.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(dtrackr) 3 | # Joins across data sets 4 | 5 | # example data uses the dplyr starways data 6 | people = starwars %>% select(-films, -vehicles, -starships) 7 | films = starwars %>% select(name,films) %>% tidyr::unnest(cols = c(films)) 8 | 9 | lhs = people %>% track() %>% comment("People df {.total}") 10 | rhs = films %>% track() %>% comment("Films df {.total}") %>% 11 | comment("a test comment") 12 | 13 | # Nest join 14 | join = lhs %>% nest_join(rhs, by="name") %>% comment("joined {.total}") 15 | # See what the history of the graph is: 16 | join %>% history() %>% print() 17 | nrow(join) 18 | # Display the tracked graph (not run in examples) 19 | # join %>% flowchart() 20 | -------------------------------------------------------------------------------- /inst/examples/relocate-examples.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(dtrackr) 3 | 4 | # mutate and other functions are unitary operations that generally change 5 | # the structure but not size of a dataframe. In dtrackr these are by ignored 6 | # by default but we can change that so that their behaviour is obvious. 7 | 8 | # relocate, this shows how the columns can be reordered 9 | iris %>% 10 | track() %>% 11 | group_by(Species) %>% 12 | relocate( 13 | tidyselect::starts_with("Sepal"), 14 | .after=Species, 15 | .messages="{.cols}", 16 | .headline="Order of columns from relocate:") %>% 17 | history() 18 | -------------------------------------------------------------------------------- /inst/examples/rename-examples.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(dtrackr) 3 | 4 | # mutate and other functions are unitary operations that generally change 5 | # the structure but not size of a dataframe. In dtrackr these are by ignored 6 | # by default but we can change that so that their behaviour is obvious. 7 | 8 | # rename can show us which columns are new and which have been 9 | # removed (with .dropped_cols) 10 | iris %>% 11 | track() %>% 12 | group_by(Species) %>% 13 | rename( 14 | Stamen.Width = Sepal.Width, 15 | Stamen.Length = Sepal.Length, 16 | .messages=c("added {.new_cols}","dropped {.dropped_cols}"), 17 | .headline="Renamed columns:") %>% 18 | history() 19 | -------------------------------------------------------------------------------- /inst/examples/select-examples.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(dtrackr) 3 | 4 | # mutate and other functions are unitary operations that generally change 5 | # the structure but not size of a dataframe. In dtrackr these are by ignored 6 | # by default but we can change that so that their behaviour is obvious. 7 | 8 | # select 9 | # The output of the select verb (here using tidyselect syntax) can be captured 10 | # and here all column names are being reported with the .cols variable. 11 | iris %>% 12 | track() %>% 13 | group_by(Species) %>% 14 | select( 15 | tidyselect::starts_with("Sepal"), 16 | .messages="{.cols}", 17 | .headline="Output columns from select:") %>% 18 | history() 19 | -------------------------------------------------------------------------------- /inst/examples/semi-join-examples.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(dtrackr) 3 | # Joins across data sets 4 | 5 | # example data uses the dplyr starways data 6 | people = starwars %>% select(-films, -vehicles, -starships) 7 | films = starwars %>% select(name,films) %>% tidyr::unnest(cols = c(films)) 8 | 9 | lhs = people %>% track() %>% comment("People df {.total}") 10 | rhs = films %>% track() %>% comment("Films df {.total}") %>% 11 | comment("a test comment") 12 | 13 | # Semi join 14 | join = lhs %>% semi_join(rhs, by="name") %>% comment("joined {.total}") 15 | # See what the history of the graph is: 16 | join %>% history() %>% print() 17 | nrow(join) 18 | # Display the tracked graph (not run in examples) 19 | # join %>% flowchart() 20 | 21 | -------------------------------------------------------------------------------- /inst/examples/set-operation-examples.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(dtrackr) 3 | 4 | # Set operations 5 | people = starwars %>% select(-films, -vehicles, -starships) 6 | chrs = people %>% track("start") 7 | 8 | lhs = chrs %>% include_any( 9 | species == "Human" ~ "{.included} humans", 10 | species == "Droid" ~ "{.included} droids" 11 | ) 12 | 13 | # these are different subsets of the same data 14 | rhs = chrs %>% include_any( 15 | species == "Human" ~ "{.included} humans", 16 | species == "Gungan" ~ "{.included} gungans" 17 | ) %>% comment("{.count} gungans & humans") 18 | 19 | 20 | # Unions 21 | set = bind_rows(lhs,rhs) %>% comment("{.count} 2*human,droids and gungans") 22 | # display the history of the result: 23 | set %>% history() 24 | nrow(set) 25 | # not run - display the flowchart: 26 | # set %>% flowchart() 27 | 28 | set = union(lhs,rhs) %>% comment("{.count} human,droids and gungans") 29 | # display the history of the result: 30 | set %>% history() 31 | nrow(set) 32 | # not run - display the flowchart: 33 | # set %>% flowchart() 34 | 35 | set = union_all(lhs,rhs) %>% comment("{.count} 2*human,droids and gungans") 36 | # display the history of the result: 37 | set %>% history() 38 | nrow(set) 39 | # not run - display the flowchart: 40 | # set %>% flowchart() 41 | 42 | # Intersections and differences 43 | 44 | set = setdiff(lhs,rhs) %>% comment("{.count} droids and gungans") 45 | # display the history of the result: 46 | set %>% history() 47 | nrow(set) 48 | # not run - display the flowchart: 49 | # set %>% flowchart() 50 | 51 | set = intersect(lhs,rhs) %>% comment("{.count} humans") 52 | # display the history of the result: 53 | set %>% history() 54 | nrow(set) 55 | # not run - display the flowchart: 56 | # set %>% flowchart() 57 | -------------------------------------------------------------------------------- /inst/examples/slice-examples.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(dtrackr) 3 | 4 | # an arbitrary 50 items from the iris dataframe is selected. The 5 | # history is tracked 6 | iris %>% track() %>% slice(51:100) %>% history() 7 | 8 | -------------------------------------------------------------------------------- /inst/examples/slice-head-tail-examples.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(dtrackr) 3 | 4 | # the first 50% of the data frame, is taken and the history tracked 5 | iris %>% track() %>% group_by(Species) %>% 6 | slice_head(prop=0.5,.messages="{.count.out} / {.count.in}", 7 | .headline="First {sprintf('%1.0f',prop*100)}%") %>% 8 | history() 9 | 10 | # The last 100 items: 11 | iris %>% track() %>% group_by(Species) %>% 12 | slice_tail(n=100,.messages="{.count.out} / {.count.in}", 13 | .headline="Last 100") %>% 14 | history() 15 | -------------------------------------------------------------------------------- /inst/examples/slice-max-min-examples.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(dtrackr) 3 | 4 | 5 | # Subset the data by the maximum of a given value 6 | iris %>% track() %>% group_by(Species) %>% 7 | slice_max(prop=0.5, order_by = Sepal.Width, 8 | .messages="{.count.out} / {.count.in} = {prop} (with ties)", 9 | .headline="Widest 50% Sepals") %>% 10 | history() 11 | 12 | 13 | # The narrowest 25% of the iris data set by group can be calculated in the 14 | # slice_min() function. Recording this is a matter of tracking and 15 | # using glue specs. 16 | iris %>% 17 | track() %>% 18 | group_by(Species) %>% 19 | slice_min(prop=0.25, order_by = Sepal.Width, 20 | .messages="{.count.out} / {.count.in} (with ties)", 21 | .headline="narrowest {sprintf('%1.0f',prop*100)}% {Species}") %>% 22 | history() 23 | 24 | -------------------------------------------------------------------------------- /inst/examples/slice-sample-examples.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(dtrackr) 3 | 4 | # In this example the iris dataframe is resampled 100 times with replacement 5 | # within each group and the 6 | iris %>% 7 | track() %>% 8 | group_by(Species) %>% 9 | slice_sample(n=100, replace=TRUE, 10 | .messages="{.count.out} / {.count.in} = {n}", 11 | .headline="100 {Species}") %>% 12 | history() 13 | -------------------------------------------------------------------------------- /inst/examples/transmute-examples.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(dtrackr) 3 | 4 | # mutate and other functions are unitary operations that generally change 5 | # the structure but not size of a dataframe. In dtrackr these are by ignored 6 | # by default but we can change that so that their behaviour is obvious. 7 | 8 | # In this example we compare the column names of the input and the 9 | # output to identify the new columns created by the transmute operation as 10 | # the `.new_cols` variable 11 | # Here we do the same for a transmute() 12 | iris %>% 13 | track() %>% 14 | group_by(Species, .add=TRUE) %>% 15 | transmute( 16 | sepal.w = Sepal.Width-1, 17 | sepal.l = Sepal.Length+1, 18 | .messages="{.new_cols}", 19 | .headline="New columns from transmute:") %>% 20 | history() 21 | 22 | -------------------------------------------------------------------------------- /inst/lib/viz_licence.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014-2018 Michael Daines 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so, 8 | subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 15 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 16 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 17 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 18 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | -------------------------------------------------------------------------------- /man/anti_join.trackr_df.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{anti_join.trackr_df} 4 | \alias{anti_join.trackr_df} 5 | \title{Anti join} 6 | \usage{ 7 | \method{anti_join}{trackr_df}( 8 | x, 9 | y, 10 | ..., 11 | .messages = c("{.count.lhs} on LHS", "{.count.rhs} on RHS", "{.count.out} not matched"), 12 | .headline = "Semi join by {.keys}" 13 | ) 14 | } 15 | \arguments{ 16 | \item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or 17 | lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for 18 | more details.} 19 | 20 | \item{...}{Other parameters passed onto methods.} 21 | 22 | \item{.messages}{a set of glue specs. The glue code can use any global 23 | variable, \{.keys\} for the joining columns, \{.count.lhs\}, 24 | \{.count.rhs\}, \{.count.out\} for the input and output dataframes sizes 25 | respectively} 26 | 27 | \item{.headline}{a glue spec. The glue code can use any global variable, 28 | \{.keys\} for the joining columns, \{.count.lhs\}, \{.count.rhs\}, 29 | \{.count.out\} for the input and output dataframes sizes respectively} 30 | } 31 | \value{ 32 | the join of the two dataframes with the history graph updated. 33 | } 34 | \description{ 35 | Mutating joins behave as \code{dplyr} joins, except the history graph of the two 36 | sides of the joins is merged resulting in a tracked dataframe with the 37 | history of both input dataframes. See \code{\link[dplyr:filter-joins]{dplyr::anti_join()}} for more details 38 | on the underlying functions. 39 | } 40 | \examples{ 41 | library(dplyr) 42 | library(dtrackr) 43 | # Joins across data sets 44 | 45 | # example data uses the dplyr starways data 46 | people = starwars \%>\% select(-films, -vehicles, -starships) 47 | films = starwars \%>\% select(name,films) \%>\% tidyr::unnest(cols = c(films)) 48 | 49 | lhs = people \%>\% track() \%>\% comment("People df {.total}") 50 | rhs = films \%>\% track() \%>\% comment("Films df {.total}") \%>\% 51 | comment("a test comment") 52 | 53 | # Anti join 54 | join = lhs \%>\% anti_join(rhs, by="name") \%>\% comment("joined {.total}") 55 | # See what the history of the graph is: 56 | join \%>\% history() \%>\% print() 57 | nrow(join) 58 | # Display the tracked graph (not run in examples) 59 | # join \%>\% flowchart() 60 | } 61 | \seealso{ 62 | dplyr::anti_join() 63 | } 64 | -------------------------------------------------------------------------------- /man/capture_exclusions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{capture_exclusions} 4 | \alias{capture_exclusions} 5 | \title{Start capturing exclusions on a tracked dataframe.} 6 | \usage{ 7 | capture_exclusions(.data, .capture = TRUE) 8 | } 9 | \arguments{ 10 | \item{.data}{a tracked dataframe} 11 | 12 | \item{.capture}{Should we capture exclusions (things removed from the data 13 | set). This is useful for debugging data issues but comes at a significant 14 | cost. Defaults to the value of \code{getOption("dtrackr.exclusions")} or 15 | \code{FALSE}.} 16 | } 17 | \value{ 18 | the .data dataframe with the exclusions flag set (or cleared if 19 | \code{.capture=FALSE}). 20 | } 21 | \description{ 22 | Start capturing exclusions on a tracked dataframe. 23 | } 24 | \examples{ 25 | library(dplyr) 26 | library(dtrackr) 27 | tmp = iris \%>\% track() \%>\% capture_exclusions() 28 | tmp \%>\% filter(Species!="versicolor") \%>\% history() 29 | } 30 | -------------------------------------------------------------------------------- /man/comment.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{comment} 4 | \alias{comment} 5 | \title{Add a generic comment to the dtrackr history graph} 6 | \usage{ 7 | comment( 8 | .data, 9 | .messages = .defaultMessage(), 10 | .headline = .defaultHeadline(), 11 | .type = "info", 12 | .asOffshoot = (.type == "exclusion"), 13 | .tag = NULL 14 | ) 15 | } 16 | \arguments{ 17 | \item{.data}{a dataframe which may be grouped} 18 | 19 | \item{.messages}{a character vector of glue specifications. A glue 20 | specification can refer to any grouping variables of .data, or any 21 | variables defined in the calling environment, the \{.total\} of all rows, 22 | the \{.count\} variable which is the count in each group and \{.strata\} a 23 | description of the group} 24 | 25 | \item{.headline}{a glue specification which can refer to grouping variables 26 | of .data, or any variables defined in the calling environment, or the 27 | \{.total\} variable (which is \code{nrow(.data)}) and \{.strata\} which is a 28 | description of the grouping} 29 | 30 | \item{.type}{one of "info","...,"exclusion": used to define formatting} 31 | 32 | \item{.asOffshoot}{do you want this comment to be an offshoot of the main 33 | flow (default = FALSE).} 34 | 35 | \item{.tag}{if you want the summary data from this step in the future then 36 | give it a name with .tag.} 37 | } 38 | \value{ 39 | the same .data dataframe with the history graph updated with the comment 40 | } 41 | \description{ 42 | A comment can be any kind of note and is added once for every current 43 | grouping as defined by the \code{.message} field. It can be made context specific 44 | by including variables such as \{.count\} and \{.total\} in \code{.message} which 45 | refer to the grouped and ungrouped counts at this current stage of the 46 | pipeline respectively. It can also pull in any global variable. 47 | } 48 | \examples{ 49 | library(dplyr) 50 | library(dtrackr) 51 | iris \%>\% track() \%>\% comment("hello {.total} rows") \%>\% history() 52 | } 53 | -------------------------------------------------------------------------------- /man/count_subgroup.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{count_subgroup} 4 | \alias{count_subgroup} 5 | \title{Add a subgroup count to the dtrackr history graph} 6 | \usage{ 7 | count_subgroup( 8 | .data, 9 | .subgroup, 10 | ..., 11 | .messages = .defaultCountSubgroup(), 12 | .headline = .defaultHeadline(), 13 | .type = "info", 14 | .asOffshoot = FALSE, 15 | .tag = NULL, 16 | .maxsubgroups = .defaultMaxSupportedGroupings() 17 | ) 18 | } 19 | \arguments{ 20 | \item{.data}{a dataframe which may be grouped} 21 | 22 | \item{.subgroup}{a column with a small number of levels (e.g. a factor)} 23 | 24 | \item{...}{passed to \verb{base::factor(subgroup values, ...)} to allow reordering 25 | of levels etc.} 26 | 27 | \item{.messages}{a character vector of glue specifications. A glue 28 | specification can refer to anything from the calling environment, 29 | \{.subgroup\} for the subgroup column name and \{.name\} for the subgroup 30 | column value, \{.count\} for the subgroup column count, \{.subtotal\} for 31 | the current stratification grouping count and \{.total\} for the whole 32 | dataset count} 33 | 34 | \item{.headline}{a glue specification which can refer to grouping variables 35 | of .data, \{.subtotal\} for the current grouping count, or any variables 36 | defined in the calling environment} 37 | 38 | \item{.type}{one of "info","exclusion": used to define formatting} 39 | 40 | \item{.asOffshoot}{do you want this comment to be an offshoot of the main 41 | flow (default = FALSE).} 42 | 43 | \item{.tag}{if you want to use the summary data from this step in the future 44 | then give it a name with .tag.} 45 | 46 | \item{.maxsubgroups}{the maximum number of discrete values allowed in 47 | .subgroup is configurable with 48 | \code{options("dtrackr.max_supported_groupings"=XX)}. The default is 16. Large 49 | values produce unwieldy flow charts.} 50 | } 51 | \value{ 52 | the same .data dataframe with the history graph updated with a 53 | subgroup count as a new stage 54 | } 55 | \description{ 56 | A frequent use case for more detailed description is to have a subgroup count 57 | within a flowchart. This works best for factor subgroup columns but other 58 | data will be converted to a factor automatically. The count of the items in 59 | each subgroup is added as a new stage in the flowchart. 60 | } 61 | \examples{ 62 | library(dplyr) 63 | library(dtrackr) 64 | survival::cgd \%>\% track() \%>\% group_by(treat) \%>\% 65 | count_subgroup(center) \%>\% history() 66 | } 67 | -------------------------------------------------------------------------------- /man/distinct.trackr_df.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{distinct.trackr_df} 4 | \alias{distinct.trackr_df} 5 | \title{Distinct values of data} 6 | \usage{ 7 | \method{distinct}{trackr_df}( 8 | .data, 9 | ..., 10 | .messages = "removing {.count.in-.count.out} duplicates", 11 | .headline = .defaultHeadline(), 12 | .tag = NULL 13 | ) 14 | } 15 | \arguments{ 16 | \item{.data}{A data frame, data frame extension (e.g. a tibble), or a 17 | lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for 18 | more details.} 19 | 20 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Optional variables to 21 | use when determining uniqueness. If there are multiple rows for a given 22 | combination of inputs, only the first row will be preserved. If omitted, 23 | will use all variables in the data frame. 24 | Named arguments passed on to \code{\link[dplyr:distinct]{dplyr::distinct}}\describe{ 25 | \item{\code{.keep_all}}{If \code{TRUE}, keep all variables in \code{.data}. 26 | If a combination of \code{...} is not distinct, this keeps the 27 | first row of values.} 28 | }} 29 | 30 | \item{.messages}{a set of glue specs. The glue code can use any global 31 | variable, or \{.strata\},\{.count.in\},and \{.count.out\}} 32 | 33 | \item{.headline}{a headline glue spec. The glue code can use any global 34 | variable, or \{.strata\},\{.count.in\},and \{.count.out\}} 35 | 36 | \item{.tag}{if you want the summary data from this step in the future then 37 | give it a name with .tag.} 38 | } 39 | \value{ 40 | the .data dataframe with distinct values and history graph updated. 41 | } 42 | \description{ 43 | Distinct acts in the same way as in \code{dplyr::distinct}. Prior to the operation 44 | the size of the group is calculated \{.count.in\} and after the operation the 45 | output size \{.count.out\} The group \{.strata\} is also available (if 46 | grouped) for reporting. See \code{\link[dplyr:distinct]{dplyr::distinct()}}. 47 | } 48 | \examples{ 49 | library(dplyr) 50 | library(dtrackr) 51 | 52 | tmp = bind_rows(iris \%>\% track(), iris \%>\% track() \%>\% filter(Petal.Length > 5)) 53 | tmp \%>\% group_by(Species) \%>\% distinct() \%>\% history() 54 | } 55 | \seealso{ 56 | dplyr::distinct() 57 | } 58 | -------------------------------------------------------------------------------- /man/dot2svg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dot.R 3 | \name{dot2svg} 4 | \alias{dot2svg} 5 | \title{Convert \code{Graphviz} dot content to a SVG} 6 | \usage{ 7 | dot2svg(dot) 8 | } 9 | \arguments{ 10 | \item{dot}{a \code{graphviz} dot string} 11 | } 12 | \value{ 13 | the SVG as a string 14 | } 15 | \description{ 16 | Convert a \code{graphviz} dot digraph as string to \code{SVG} as string 17 | } 18 | \examples{ 19 | dot2svg("digraph { A->B }") 20 | } 21 | -------------------------------------------------------------------------------- /man/dtrackr-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr-package.R 3 | \docType{package} 4 | \name{dtrackr-package} 5 | \alias{dtrackr} 6 | \alias{dtrackr-package} 7 | \title{dtrackr: Track your Data Pipelines} 8 | \description{ 9 | Track and document 'dplyr' data pipelines. As you filter, mutate, and join your way through a data set, 'dtrackr' seamlessly keeps track of your data flow and makes publication ready documentation of a data pipeline simple. 10 | } 11 | \seealso{ 12 | Useful links: 13 | \itemize{ 14 | \item \url{https://terminological.github.io/dtrackr/index.html} 15 | \item \url{https://github.com/terminological/dtrackr} 16 | \item Report bugs at \url{https://github.com/terminological/dtrackr/issues} 17 | } 18 | 19 | } 20 | \author{ 21 | \strong{Maintainer}: Robert Challen \email{rob.challen@bristol.ac.uk} (\href{https://orcid.org/0000-0002-5504-7768}{ORCID}) 22 | 23 | } 24 | \keyword{internal} 25 | -------------------------------------------------------------------------------- /man/excluded.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{excluded} 4 | \alias{excluded} 5 | \title{Get the dtrackr excluded data record} 6 | \usage{ 7 | excluded(.data, simplify = TRUE) 8 | } 9 | \arguments{ 10 | \item{.data}{a dataframe which may be grouped} 11 | 12 | \item{simplify}{return a single summary dataframe of all exclusions.} 13 | } 14 | \value{ 15 | a new dataframe of the excluded data up to this point in the workflow. This dataframe is by default flattened, but if \code{.simplify=FALSE} has a nested structure containing records excluded at each part of the pipeline. 16 | } 17 | \description{ 18 | Get the dtrackr excluded data record 19 | } 20 | \examples{ 21 | library(dplyr) 22 | library(dtrackr) 23 | tmp = iris \%>\% track() \%>\% capture_exclusions() 24 | tmp \%>\% exclude_all( 25 | Petal.Length > 5.8 ~ "{.excluded} long ones", 26 | Petal.Length < 1.3 ~ "{.excluded} short ones", 27 | .stage = "petal length exclusion" 28 | ) \%>\% excluded() 29 | } 30 | -------------------------------------------------------------------------------- /man/figures/README-flowchart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/man/figures/README-flowchart.png -------------------------------------------------------------------------------- /man/figures/dtrackr.xcf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/man/figures/dtrackr.xcf -------------------------------------------------------------------------------- /man/figures/lifecycle-deprecated.svg: -------------------------------------------------------------------------------- 1 | 2 | lifecycle: deprecated 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | lifecycle 18 | 19 | deprecated 20 | 21 | 22 | -------------------------------------------------------------------------------- /man/figures/lifecycle-experimental.svg: -------------------------------------------------------------------------------- 1 | 2 | lifecycle: experimental 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | lifecycle 18 | 19 | experimental 20 | 21 | 22 | -------------------------------------------------------------------------------- /man/figures/lifecycle-stable.svg: -------------------------------------------------------------------------------- 1 | 2 | lifecycle: stable 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 19 | 20 | lifecycle 21 | 22 | 25 | 26 | stable 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /man/figures/lifecycle-superseded.svg: -------------------------------------------------------------------------------- 1 | 2 | lifecycle: superseded 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | lifecycle 18 | 19 | superseded 20 | 21 | 22 | -------------------------------------------------------------------------------- /man/figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/man/figures/logo.png -------------------------------------------------------------------------------- /man/filter.trackr_df.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{filter.trackr_df} 4 | \alias{filter.trackr_df} 5 | \title{Filtering data} 6 | \usage{ 7 | \method{filter}{trackr_df}( 8 | .data, 9 | ..., 10 | .messages = "excluded {.excluded} items", 11 | .headline = .defaultHeadline(), 12 | .type = "exclusion", 13 | .asOffshoot = (.type == "exclusion"), 14 | .stage = (if (is.null(.tag)) "" else .tag), 15 | .tag = NULL 16 | ) 17 | } 18 | \arguments{ 19 | \item{.data}{A data frame, data frame extension (e.g. a tibble), or a 20 | lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for 21 | more details.} 22 | 23 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Expressions that 24 | return a logical value, and are defined in terms of the variables in 25 | \code{.data}. If multiple expressions are included, they are combined with the 26 | \code{&} operator. Only rows for which all conditions evaluate to \code{TRUE} are 27 | kept. 28 | Named arguments passed on to \code{\link[dplyr:filter]{dplyr::filter}}\describe{ 29 | \item{\code{.by}}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} 30 | 31 | <\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Optionally, a selection of columns to 32 | group by for just this operation, functioning as an alternative to \code{\link[dplyr:group_by]{group_by()}}. For 33 | details and examples, see \link[dplyr:dplyr_by]{?dplyr_by}.} 34 | \item{\code{.preserve}}{Relevant when the \code{.data} input is grouped. 35 | If \code{.preserve = FALSE} (the default), the grouping structure 36 | is recalculated based on the resulting data, otherwise the grouping is kept as is.} 37 | }} 38 | 39 | \item{.messages}{a set of glue specs. The glue code can use any global 40 | variable, or \{.strata\},\{.count.in\},and \{.count.out\}} 41 | 42 | \item{.headline}{a headline glue spec. The glue code can use any global 43 | variable, or \{.strata\},\{.count.in\},and \{.count.out\}} 44 | 45 | \item{.type}{the format type of the action typically an exclusion} 46 | 47 | \item{.asOffshoot}{if the type is exclusion, \code{.asOffshoot} places the 48 | information box outside of the main flow, as an exclusion.} 49 | 50 | \item{.stage}{a name for this step in the pathway} 51 | 52 | \item{.tag}{if you want the summary data from this step in the future then 53 | give it a name with \code{.tag}.} 54 | } 55 | \value{ 56 | the filtered \code{.data} dataframe with history graph updated 57 | } 58 | \description{ 59 | Filter acts in the same way as in \code{dplyr} where predicates which evaluate to 60 | TRUE act to select items to include, and items for which the predicate cannot 61 | be evaluated are excluded. For tracking prior to the filter operation the 62 | size of each group is calculated \{.count.in\} and after the operation the 63 | output size of each group \{.count.out\}. The grouping \{.strata\} is also 64 | available (if grouped) for reporting. See \code{\link[dplyr:filter]{dplyr::filter()}}. 65 | } 66 | \examples{ 67 | library(dplyr) 68 | library(dtrackr) 69 | 70 | tmp = iris \%>\% track() \%>\% group_by(Species) 71 | tmp \%>\% filter(Petal.Length > 5) \%>\% history() 72 | } 73 | \seealso{ 74 | dplyr::filter() 75 | } 76 | -------------------------------------------------------------------------------- /man/flowchart.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{flowchart} 4 | \alias{flowchart} 5 | \title{Flowchart output} 6 | \usage{ 7 | flowchart( 8 | .data, 9 | filename = NULL, 10 | size = std_size$full, 11 | maxWidth = size$width, 12 | maxHeight = size$height, 13 | formats = c("dot", "png", "pdf", "svg"), 14 | defaultToHTML = TRUE, 15 | landscape = size$rot != 0, 16 | ... 17 | ) 18 | } 19 | \arguments{ 20 | \item{.data}{the tracked dataframe(s) either as a single dataframe or as a 21 | list of dataframes.} 22 | 23 | \item{filename}{a file name which will be where the formatted flowcharts are 24 | saved. If no extension is specified the output formats are determined by 25 | the \code{formats} parameter.} 26 | 27 | \item{size}{a named list with 3 elements, length and width in inches and 28 | rotation. A predefined set of standard sizes are available in the 29 | \link{std_size} object.} 30 | 31 | \item{maxWidth}{a width (on the paper) in inches if \code{size} is not defined} 32 | 33 | \item{maxHeight}{a height (on the paper) in inches if \code{size} is not defined} 34 | 35 | \item{formats}{some of \code{pdf},\code{dot},\code{svg},\code{png},\code{ps}} 36 | 37 | \item{defaultToHTML}{if the correct output format is not easy to determine 38 | from the context, default providing \code{HTML} (TRUE) or to embedding the \code{PNG} (FALSE)} 39 | 40 | \item{landscape}{rotate the output by 270 degrees into a landscape format. 41 | \code{maxWidth} and \code{maxHeight} still apply and refer to the paper width to fit 42 | the flowchart into after rotation. (you might need to flip width and height)} 43 | 44 | \item{...}{other parameters passed onto either \code{p_get_as_dot()}, notable ones are 45 | \code{fill} (background colour e.g. \code{lightgrey}), \code{fontsize} (in points), 46 | \code{colour} (font colour)} 47 | } 48 | \value{ 49 | the nature of the flowchart output depends on the context in which 50 | the function is called. It will be some form of browse-able html output if 51 | called from an interactive session or a \code{PNG}/\code{PDF} link if in \code{knitr} and 52 | knitting latex or word type outputs, if file name is specified the output 53 | will also be saved at the given location. 54 | } 55 | \description{ 56 | Generate a flowchart of the history of the dataframe(s), with all the tracked 57 | data pipeline as stages in the flowchart. Multiple dataframes can be plotted 58 | together in which case an attempt is made to determine which parts are 59 | common. 60 | } 61 | \examples{ 62 | library(dplyr) 63 | library(dtrackr) 64 | 65 | tmp = iris \%>\% track() \%>\% comment(.tag = "step1") \%>\% filter(Species!="versicolor") 66 | tmp \%>\% group_by(Species) \%>\% comment(.tag="step2") \%>\% flowchart() 67 | } 68 | -------------------------------------------------------------------------------- /man/group_by.trackr_df.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{group_by.trackr_df} 4 | \alias{group_by.trackr_df} 5 | \title{Stratifying your analysis} 6 | \usage{ 7 | \method{group_by}{trackr_df}( 8 | .data, 9 | ..., 10 | .messages = "stratify by {.cols}", 11 | .headline = NULL, 12 | .tag = NULL, 13 | .maxgroups = .defaultMaxSupportedGroupings() 14 | ) 15 | } 16 | \arguments{ 17 | \item{.data}{A data frame, data frame extension (e.g. a tibble), or a 18 | lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for 19 | more details.} 20 | 21 | \item{...}{In \code{group_by()}, variables or computations to group by. 22 | Computations are always done on the ungrouped data frame. 23 | To perform computations on the grouped data, you need to use 24 | a separate \code{mutate()} step before the \code{group_by()}. 25 | Computations are not allowed in \code{nest_by()}. 26 | In \code{ungroup()}, variables to remove from the grouping. 27 | Named arguments passed on to \code{\link[dplyr:group_by]{dplyr::group_by}}\describe{ 28 | \item{\code{.add}}{When \code{FALSE}, the default, \code{group_by()} will 29 | override existing groups. To add to the existing groups, use 30 | \code{.add = TRUE}. 31 | 32 | This argument was previously called \code{add}, but that prevented 33 | creating a new grouping variable called \code{add}, and conflicts with 34 | our naming conventions.} 35 | \item{\code{.drop}}{Drop groups formed by factor levels that don't appear in the 36 | data? The default is \code{TRUE} except when \code{.data} has been previously 37 | grouped with \code{.drop = FALSE}. See \code{\link[dplyr:group_by_drop_default]{group_by_drop_default()}} for details.} 38 | \item{\code{x}}{A \code{\link[dplyr:tbl]{tbl()}}} 39 | }} 40 | 41 | \item{.messages}{a set of glue specs. The glue code can use any global 42 | variable, or \{.cols\} which is the columns that are being grouped by.} 43 | 44 | \item{.headline}{a headline glue spec. The glue code can use any global 45 | variable, or \{.cols\}.} 46 | 47 | \item{.tag}{if you want the summary data from this step in the future then 48 | give it a name with .tag.} 49 | 50 | \item{.maxgroups}{the maximum number of subgroups allowed before the tracking 51 | is paused.} 52 | } 53 | \value{ 54 | the .data but grouped. 55 | } 56 | \description{ 57 | Grouping a data set acts in the normal way. When tracking a dataframe 58 | sometimes a \code{group_by()} operation will create a lot of groups. This happens 59 | for example if you are doing a \code{group_by()}, \code{summarise()} step that is 60 | aggregating data on a fine scale, e.g. by day in a time-series. This is 61 | generally a terrible idea when tracking a dataframe as the resulting 62 | flowchart will have many many branches and be illegible. \code{dtrackr} will detect this issue and 63 | pause tracking the dataframe with a warning. It is up to the user to the 64 | \code{resume()} tracking when the large number of groups have been resolved e.g. 65 | using a \code{dplyr::ungroup()}. This limit is configurable with 66 | \code{options("dtrackr.max_supported_groupings"=XX)}. The default is 16. See 67 | \code{\link[dplyr:group_by]{dplyr::group_by()}}. 68 | } 69 | \examples{ 70 | library(dplyr) 71 | library(dtrackr) 72 | 73 | tmp = iris \%>\% track() \%>\% group_by(Species, .messages="stratify by {.cols}") 74 | tmp \%>\% comment("{.strata}") \%>\% history() 75 | } 76 | \seealso{ 77 | dplyr::group_by() 78 | } 79 | -------------------------------------------------------------------------------- /man/group_modify.trackr_df.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{group_modify.trackr_df} 4 | \alias{group_modify.trackr_df} 5 | \title{Group-wise modification of data and complex operations} 6 | \usage{ 7 | \method{group_modify}{trackr_df}( 8 | .data, 9 | ..., 10 | .messages = NULL, 11 | .headline = .defaultHeadline(), 12 | .type = "modify", 13 | .tag = NULL 14 | ) 15 | } 16 | \arguments{ 17 | \item{.data}{A grouped tibble} 18 | 19 | \item{...}{Additional arguments passed on to \code{.f} 20 | Named arguments passed on to \code{\link[dplyr:group_map]{dplyr::group_modify}}\describe{ 21 | \item{\code{.f}}{A function or formula to apply to each group. 22 | 23 | If a \strong{function}, it is used as is. It should have at least 2 formal arguments. 24 | 25 | If a \strong{formula}, e.g. \code{~ head(.x)}, it is converted to a function. 26 | 27 | In the formula, you can use 28 | \itemize{ 29 | \item \code{.} or \code{.x} to refer to the subset of rows of \code{.tbl} 30 | for the given group 31 | \item \code{.y} to refer to the key, a one row tibble with one column per grouping variable 32 | that identifies the group 33 | }} 34 | \item{\code{.keep}}{are the grouping variables kept in \code{.x}} 35 | }} 36 | 37 | \item{.messages}{a set of glue specs. The glue code can use any global 38 | variable, or \{.strata\},\{.count.in\},and \{.count.out\}} 39 | 40 | \item{.headline}{a headline glue spec. The glue code can use any global 41 | variable, or \{.strata\},\{.count.in\},and \{.count.out\}} 42 | 43 | \item{.type}{default "modify": used to define formatting} 44 | 45 | \item{.tag}{if you want the summary data from this step in the future then 46 | give it a name with .tag.} 47 | } 48 | \value{ 49 | the transformed .data dataframe with the history graph updated. 50 | } 51 | \description{ 52 | Group modifying a data set acts in the normal way. The internal mechanics of 53 | the modify function are opaque to the history. This means these can be used 54 | to wrap any unsupported operation without losing the history (e.g. \code{df \%>\% track() \%>\% group_modify(function(d,...) { d \%>\% unsupported_operation() })} 55 | ) Prior to the operation the size of the group is calculated \{.count.in\} 56 | and after the operation the output size \{.count.out\} The group \{.strata\} 57 | is also available (if grouped) for reporting See \code{\link[dplyr:group_map]{dplyr::group_modify()}}. 58 | } 59 | \examples{ 60 | library(dplyr) 61 | library(dtrackr) 62 | 63 | tmp = iris \%>\% track() \%>\% group_by(Species) 64 | tmp \%>\% group_modify( 65 | function(d,g,...) { return(tibble::tibble(x=runif(10))) }, 66 | .messages="{.count.in} in, {.count.out} out" 67 | ) \%>\% history() 68 | } 69 | \seealso{ 70 | dplyr::group_modify() 71 | } 72 | -------------------------------------------------------------------------------- /man/history.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{history} 4 | \alias{history} 5 | \title{Get the dtrackr history graph} 6 | \usage{ 7 | history(.data) 8 | } 9 | \arguments{ 10 | \item{.data}{a dataframe which may be grouped} 11 | } 12 | \value{ 13 | the history graph. This is a list, of class \code{trackr_graph}, containing the following named items: 14 | \itemize{ 15 | \item excluded - the data items that have been excluded thus far as a nested dataframe 16 | \item tags - a dataframe of tag-value pairs containing the summary of the data at named points in the data flow (see \code{\link[=tagged]{tagged()}}) 17 | \item nodes - a dataframe of the nodes of the flow chart 18 | \item edges - an edge list (as a dataframe) of the relationships between the nodes in the flow chart 19 | \item head - the current most recent nodes added into the graph as a dataframe. 20 | } 21 | 22 | The format of this data may grow over time but these fields are unlikely to be changed. 23 | } 24 | \description{ 25 | This provides the raw history graph and is not really intended for mainstream use. 26 | The internal structure of the graph is explained below. print and plot S3 methods exist for 27 | the dtrackr history graph. 28 | } 29 | \examples{ 30 | library(dplyr) 31 | library(dtrackr) 32 | graph = iris \%>\% track() \%>\% comment("A comment") \%>\% history() 33 | print(graph) 34 | } 35 | -------------------------------------------------------------------------------- /man/intersect.trackr_df.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{intersect.trackr_df} 4 | \alias{intersect.trackr_df} 5 | \title{Set operations} 6 | \usage{ 7 | \method{intersect}{trackr_df}( 8 | x, 9 | y, 10 | ..., 11 | .messages = "{.count.out} in intersection", 12 | .headline = "Intersection" 13 | ) 14 | } 15 | \arguments{ 16 | \item{x, y}{Vectors to combine.} 17 | 18 | \item{...}{a collection of tracked data frames to combine} 19 | 20 | \item{.messages}{a set of glue specs. The glue code can use any global 21 | variable, or \{.count.out\}} 22 | 23 | \item{.headline}{a glue spec. The glue code can use any global variable, or 24 | \{.count.out\}} 25 | } 26 | \value{ 27 | the dplyr output with the history graph updated. 28 | } 29 | \description{ 30 | These perform set operations on tracked dataframes. It merges the history 31 | of 2 (or more) dataframes and combines the rows (or columns). It calculates the total number of 32 | resulting rows as \{.count.out\} in other terms it performs exactly the same 33 | operation as the equivalent \code{dplyr} operation. See \code{\link[dplyr:bind_rows]{dplyr::bind_rows()}}, 34 | \code{\link[dplyr:bind_cols]{dplyr::bind_cols()}}, \code{\link[dplyr:setops]{dplyr::intersect()}}, \code{\link[dplyr:setops]{dplyr::union()}}, 35 | \code{\link[dplyr:setops]{dplyr::setdiff()}},\code{\link[dplyr:setops]{dplyr::intersect()}}, or \code{\link[dplyr:setops]{dplyr::union_all()}} for the 36 | underlying function details. 37 | } 38 | \examples{ 39 | library(dplyr) 40 | library(dtrackr) 41 | 42 | # Set operations 43 | people = starwars \%>\% select(-films, -vehicles, -starships) 44 | chrs = people \%>\% track("start") 45 | 46 | lhs = chrs \%>\% include_any( 47 | species == "Human" ~ "{.included} humans", 48 | species == "Droid" ~ "{.included} droids" 49 | ) 50 | 51 | # these are different subsets of the same data 52 | rhs = chrs \%>\% include_any( 53 | species == "Human" ~ "{.included} humans", 54 | species == "Gungan" ~ "{.included} gungans" 55 | ) \%>\% comment("{.count} gungans & humans") 56 | 57 | 58 | # Unions 59 | set = bind_rows(lhs,rhs) \%>\% comment("{.count} 2*human,droids and gungans") 60 | # display the history of the result: 61 | set \%>\% history() 62 | nrow(set) 63 | # not run - display the flowchart: 64 | # set \%>\% flowchart() 65 | 66 | set = union(lhs,rhs) \%>\% comment("{.count} human,droids and gungans") 67 | # display the history of the result: 68 | set \%>\% history() 69 | nrow(set) 70 | # not run - display the flowchart: 71 | # set \%>\% flowchart() 72 | 73 | set = union_all(lhs,rhs) \%>\% comment("{.count} 2*human,droids and gungans") 74 | # display the history of the result: 75 | set \%>\% history() 76 | nrow(set) 77 | # not run - display the flowchart: 78 | # set \%>\% flowchart() 79 | 80 | # Intersections and differences 81 | 82 | set = setdiff(lhs,rhs) \%>\% comment("{.count} droids and gungans") 83 | # display the history of the result: 84 | set \%>\% history() 85 | nrow(set) 86 | # not run - display the flowchart: 87 | # set \%>\% flowchart() 88 | 89 | set = intersect(lhs,rhs) \%>\% comment("{.count} humans") 90 | # display the history of the result: 91 | set \%>\% history() 92 | nrow(set) 93 | # not run - display the flowchart: 94 | # set \%>\% flowchart() 95 | } 96 | \seealso{ 97 | generics::intersect() 98 | } 99 | -------------------------------------------------------------------------------- /man/p_add_tally.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{p_add_tally} 4 | \alias{p_add_tally} 5 | \title{dplyr modifying operations} 6 | \usage{ 7 | p_add_tally(x, ..., .messages = "", .headline = "", .tag = NULL) 8 | } 9 | \arguments{ 10 | \item{x}{A data frame, data frame extension (e.g. a tibble), or a 11 | lazy data frame (e.g. from dbplyr or dtplyr).} 12 | 13 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Variables to group 14 | by.} 15 | 16 | \item{.messages}{a set of glue specs. The glue code can use any global 17 | variable, grouping variable, \{.new_cols\} or \{.dropped_cols\} for changes to 18 | columns, \{.cols\} for the output column names, or \{.strata\}. Defaults to nothing.} 19 | 20 | \item{.headline}{a headline glue spec. The glue code can use any global 21 | variable, grouping variable, \{.new_cols\}, \{.dropped_cols\}, \{.cols\} or \{.strata\}. 22 | Defaults to nothing.} 23 | 24 | \item{.tag}{if you want the summary data from this step in the future then 25 | give it a name with .tag.} 26 | } 27 | \value{ 28 | the \code{.data} dataframe after being modified by the \code{dplyr} equivalent 29 | function, but with the history graph updated with a new stage if the 30 | \code{.messages} or \code{.headline} parameter is not empty. 31 | } 32 | \description{ 33 | See \code{\link[dplyr:mutate]{dplyr::mutate()}}, \code{\link[dplyr:count]{dplyr::add_count()}}, \code{\link[dplyr:count]{dplyr::add_tally()}}, 34 | \code{\link[dplyr:transmute]{dplyr::transmute()}}, \code{\link[dplyr:select]{dplyr::select()}}, \code{\link[dplyr:relocate]{dplyr::relocate()}}, 35 | \code{\link[dplyr:rename]{dplyr::rename()}} \code{\link[dplyr:rename]{dplyr::rename_with()}}, \code{\link[dplyr:arrange]{dplyr::arrange()}} for more details 36 | on underlying functions. \code{dtrackr} provides equivalent functions for 37 | mutating, selecting and renaming a data set which act in the same way as 38 | \code{dplyr}. \code{mutate} / \code{select} / \code{rename} generally don't add anything in terms 39 | of provenance of data so the default behaviour is to miss these out of the 40 | \code{dtrackr} history. This can be overridden with the \code{.messages}, or 41 | \code{.headline} values in which case they behave just like a \code{comment()}. 42 | } 43 | \examples{ 44 | library(dplyr) 45 | library(dtrackr) 46 | 47 | # mutate and other functions are unitary operations that generally change 48 | # the structure but not size of a dataframe. In dtrackr these are by ignored 49 | # by default but we can change that so that their behaviour is obvious. 50 | 51 | # add_count 52 | # adding in a count or tally column as a new column 53 | iris \%>\% 54 | track() \%>\% 55 | add_count(Species, name="new_count_total", 56 | .messages="{.new_cols}", 57 | # .messages="{.cols}", 58 | .headline="New columns from add_count:") \%>\% 59 | history() 60 | 61 | # add_tally 62 | iris \%>\% 63 | track() \%>\% 64 | group_by(Species) \%>\% 65 | dtrackr::add_tally(wt=Petal.Length, name="new_tally_total", 66 | .messages="{.new_cols}", 67 | .headline="New columns from add_tally:") \%>\% 68 | history() 69 | 70 | 71 | 72 | } 73 | \seealso{ 74 | dplyr::add_tally() 75 | } 76 | -------------------------------------------------------------------------------- /man/p_bind_cols.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{p_bind_cols} 4 | \alias{p_bind_cols} 5 | \title{Set operations} 6 | \usage{ 7 | p_bind_cols( 8 | ..., 9 | .messages = "{.count.out} in combined set", 10 | .headline = "Bind columns" 11 | ) 12 | } 13 | \arguments{ 14 | \item{...}{a collection of tracked data frames to combine} 15 | 16 | \item{.messages}{a set of glue specs. The glue code can use any global 17 | variable, or \{.count.out\}} 18 | 19 | \item{.headline}{a glue spec. The glue code can use any global variable, or 20 | \{.count.out\}} 21 | } 22 | \value{ 23 | the dplyr output with the history graph updated. 24 | } 25 | \description{ 26 | These perform set operations on tracked dataframes. It merges the history 27 | of 2 (or more) dataframes and combines the rows (or columns). It calculates the total number of 28 | resulting rows as \{.count.out\} in other terms it performs exactly the same 29 | operation as the equivalent \code{dplyr} operation. See \code{\link[dplyr:bind_rows]{dplyr::bind_rows()}}, 30 | \code{\link[dplyr:bind_cols]{dplyr::bind_cols()}}, \code{\link[dplyr:setops]{dplyr::intersect()}}, \code{\link[dplyr:setops]{dplyr::union()}}, 31 | \code{\link[dplyr:setops]{dplyr::setdiff()}},\code{\link[dplyr:setops]{dplyr::intersect()}}, or \code{\link[dplyr:setops]{dplyr::union_all()}} for the 32 | underlying function details. 33 | } 34 | \examples{ 35 | library(dplyr) 36 | library(dtrackr) 37 | 38 | # Set operations 39 | people = starwars \%>\% select(-films, -vehicles, -starships) 40 | chrs = people \%>\% track("start") 41 | 42 | lhs = chrs \%>\% include_any( 43 | species == "Human" ~ "{.included} humans", 44 | species == "Droid" ~ "{.included} droids" 45 | ) 46 | 47 | # these are different subsets of the same data 48 | rhs = chrs \%>\% include_any( 49 | species == "Human" ~ "{.included} humans", 50 | species == "Gungan" ~ "{.included} gungans" 51 | ) \%>\% comment("{.count} gungans & humans") 52 | 53 | 54 | # Unions 55 | set = bind_rows(lhs,rhs) \%>\% comment("{.count} 2*human,droids and gungans") 56 | # display the history of the result: 57 | set \%>\% history() 58 | nrow(set) 59 | # not run - display the flowchart: 60 | # set \%>\% flowchart() 61 | 62 | set = union(lhs,rhs) \%>\% comment("{.count} human,droids and gungans") 63 | # display the history of the result: 64 | set \%>\% history() 65 | nrow(set) 66 | # not run - display the flowchart: 67 | # set \%>\% flowchart() 68 | 69 | set = union_all(lhs,rhs) \%>\% comment("{.count} 2*human,droids and gungans") 70 | # display the history of the result: 71 | set \%>\% history() 72 | nrow(set) 73 | # not run - display the flowchart: 74 | # set \%>\% flowchart() 75 | 76 | # Intersections and differences 77 | 78 | set = setdiff(lhs,rhs) \%>\% comment("{.count} droids and gungans") 79 | # display the history of the result: 80 | set \%>\% history() 81 | nrow(set) 82 | # not run - display the flowchart: 83 | # set \%>\% flowchart() 84 | 85 | set = intersect(lhs,rhs) \%>\% comment("{.count} humans") 86 | # display the history of the result: 87 | set \%>\% history() 88 | nrow(set) 89 | # not run - display the flowchart: 90 | # set \%>\% flowchart() 91 | } 92 | \seealso{ 93 | dplyr::bind_cols() 94 | } 95 | -------------------------------------------------------------------------------- /man/p_bind_rows.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{p_bind_rows} 4 | \alias{p_bind_rows} 5 | \title{Set operations} 6 | \usage{ 7 | p_bind_rows(..., .messages = "{.count.out} in union", .headline = "Union") 8 | } 9 | \arguments{ 10 | \item{...}{a collection of tracked data frames to combine} 11 | 12 | \item{.messages}{a set of glue specs. The glue code can use any global 13 | variable, or \{.count.out\}} 14 | 15 | \item{.headline}{a glue spec. The glue code can use any global variable, or 16 | \{.count.out\}} 17 | } 18 | \value{ 19 | the dplyr output with the history graph updated. 20 | } 21 | \description{ 22 | These perform set operations on tracked dataframes. It merges the history 23 | of 2 (or more) dataframes and combines the rows (or columns). It calculates the total number of 24 | resulting rows as \{.count.out\} in other terms it performs exactly the same 25 | operation as the equivalent \code{dplyr} operation. See \code{\link[dplyr:bind_rows]{dplyr::bind_rows()}}, 26 | \code{\link[dplyr:bind_cols]{dplyr::bind_cols()}}, \code{\link[dplyr:setops]{dplyr::intersect()}}, \code{\link[dplyr:setops]{dplyr::union()}}, 27 | \code{\link[dplyr:setops]{dplyr::setdiff()}},\code{\link[dplyr:setops]{dplyr::intersect()}}, or \code{\link[dplyr:setops]{dplyr::union_all()}} for the 28 | underlying function details. 29 | } 30 | \examples{ 31 | library(dplyr) 32 | library(dtrackr) 33 | 34 | # Set operations 35 | people = starwars \%>\% select(-films, -vehicles, -starships) 36 | chrs = people \%>\% track("start") 37 | 38 | lhs = chrs \%>\% include_any( 39 | species == "Human" ~ "{.included} humans", 40 | species == "Droid" ~ "{.included} droids" 41 | ) 42 | 43 | # these are different subsets of the same data 44 | rhs = chrs \%>\% include_any( 45 | species == "Human" ~ "{.included} humans", 46 | species == "Gungan" ~ "{.included} gungans" 47 | ) \%>\% comment("{.count} gungans & humans") 48 | 49 | 50 | # Unions 51 | set = bind_rows(lhs,rhs) \%>\% comment("{.count} 2*human,droids and gungans") 52 | # display the history of the result: 53 | set \%>\% history() 54 | nrow(set) 55 | # not run - display the flowchart: 56 | # set \%>\% flowchart() 57 | 58 | set = union(lhs,rhs) \%>\% comment("{.count} human,droids and gungans") 59 | # display the history of the result: 60 | set \%>\% history() 61 | nrow(set) 62 | # not run - display the flowchart: 63 | # set \%>\% flowchart() 64 | 65 | set = union_all(lhs,rhs) \%>\% comment("{.count} 2*human,droids and gungans") 66 | # display the history of the result: 67 | set \%>\% history() 68 | nrow(set) 69 | # not run - display the flowchart: 70 | # set \%>\% flowchart() 71 | 72 | # Intersections and differences 73 | 74 | set = setdiff(lhs,rhs) \%>\% comment("{.count} droids and gungans") 75 | # display the history of the result: 76 | set \%>\% history() 77 | nrow(set) 78 | # not run - display the flowchart: 79 | # set \%>\% flowchart() 80 | 81 | set = intersect(lhs,rhs) \%>\% comment("{.count} humans") 82 | # display the history of the result: 83 | set \%>\% history() 84 | nrow(set) 85 | # not run - display the flowchart: 86 | # set \%>\% flowchart() 87 | } 88 | \seealso{ 89 | dplyr::bind_rows() 90 | } 91 | -------------------------------------------------------------------------------- /man/p_capture_exclusions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{p_capture_exclusions} 4 | \alias{p_capture_exclusions} 5 | \title{Start capturing exclusions on a tracked dataframe.} 6 | \usage{ 7 | p_capture_exclusions(.data, .capture = TRUE) 8 | } 9 | \arguments{ 10 | \item{.data}{a tracked dataframe} 11 | 12 | \item{.capture}{Should we capture exclusions (things removed from the data 13 | set). This is useful for debugging data issues but comes at a significant 14 | cost. Defaults to the value of \code{getOption("dtrackr.exclusions")} or 15 | \code{FALSE}.} 16 | } 17 | \value{ 18 | the .data dataframe with the exclusions flag set (or cleared if 19 | \code{.capture=FALSE}). 20 | } 21 | \description{ 22 | Start capturing exclusions on a tracked dataframe. 23 | } 24 | \examples{ 25 | library(dplyr) 26 | library(dtrackr) 27 | tmp = iris \%>\% track() \%>\% capture_exclusions() 28 | tmp \%>\% filter(Species!="versicolor") \%>\% history() 29 | } 30 | -------------------------------------------------------------------------------- /man/p_clear.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{p_clear} 4 | \alias{p_clear} 5 | \title{Clear the dtrackr history graph} 6 | \usage{ 7 | p_clear(.data) 8 | } 9 | \arguments{ 10 | \item{.data}{a dataframe which may be grouped} 11 | } 12 | \value{ 13 | the .data dataframe with the history graph removed 14 | } 15 | \description{ 16 | This is unlikely to be needed directly and is mostly and internal function 17 | } 18 | \examples{ 19 | library(dplyr) 20 | library(dtrackr) 21 | mtcars \%>\% track() \%>\% comment("A comment") \%>\% p_clear() \%>\% history() 22 | } 23 | -------------------------------------------------------------------------------- /man/p_comment.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{p_comment} 4 | \alias{p_comment} 5 | \title{Add a generic comment to the dtrackr history graph} 6 | \usage{ 7 | p_comment( 8 | .data, 9 | .messages = .defaultMessage(), 10 | .headline = .defaultHeadline(), 11 | .type = "info", 12 | .asOffshoot = (.type == "exclusion"), 13 | .tag = NULL 14 | ) 15 | } 16 | \arguments{ 17 | \item{.data}{a dataframe which may be grouped} 18 | 19 | \item{.messages}{a character vector of glue specifications. A glue 20 | specification can refer to any grouping variables of .data, or any 21 | variables defined in the calling environment, the \{.total\} of all rows, 22 | the \{.count\} variable which is the count in each group and \{.strata\} a 23 | description of the group} 24 | 25 | \item{.headline}{a glue specification which can refer to grouping variables 26 | of .data, or any variables defined in the calling environment, or the 27 | \{.total\} variable (which is \code{nrow(.data)}) and \{.strata\} which is a 28 | description of the grouping} 29 | 30 | \item{.type}{one of "info","...,"exclusion": used to define formatting} 31 | 32 | \item{.asOffshoot}{do you want this comment to be an offshoot of the main 33 | flow (default = FALSE).} 34 | 35 | \item{.tag}{if you want the summary data from this step in the future then 36 | give it a name with .tag.} 37 | } 38 | \value{ 39 | the same .data dataframe with the history graph updated with the comment 40 | } 41 | \description{ 42 | A comment can be any kind of note and is added once for every current 43 | grouping as defined by the \code{.message} field. It can be made context specific 44 | by including variables such as \{.count\} and \{.total\} in \code{.message} which 45 | refer to the grouped and ungrouped counts at this current stage of the 46 | pipeline respectively. It can also pull in any global variable. 47 | } 48 | \examples{ 49 | library(dplyr) 50 | library(dtrackr) 51 | iris \%>\% track() \%>\% comment("hello {.total} rows") \%>\% history() 52 | } 53 | -------------------------------------------------------------------------------- /man/p_copy.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{p_copy} 4 | \alias{p_copy} 5 | \title{Copy the dtrackr history graph from one dataframe to another} 6 | \usage{ 7 | p_copy(.data, from) 8 | } 9 | \arguments{ 10 | \item{.data}{a dataframe which may be grouped} 11 | 12 | \item{from}{the dataframe to copy the history graph from} 13 | } 14 | \value{ 15 | the .data dataframe with the history graph of "from" 16 | } 17 | \description{ 18 | Copy the dtrackr history graph from one dataframe to another 19 | } 20 | \examples{ 21 | mtcars \%>\% p_copy(iris \%>\% comment("A comment")) \%>\% history() 22 | } 23 | -------------------------------------------------------------------------------- /man/p_count_if.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{p_count_if} 4 | \alias{p_count_if} 5 | \title{Simple count_if dplyr summary function} 6 | \usage{ 7 | p_count_if(..., na.rm = TRUE) 8 | } 9 | \arguments{ 10 | \item{...}{expression to be evaluated} 11 | 12 | \item{na.rm}{ignore NA values?} 13 | } 14 | \value{ 15 | a count of the number of times the expression evaluated to true, in the current context 16 | } 17 | \description{ 18 | Simple count_if dplyr summary function 19 | } 20 | \examples{ 21 | library(dplyr) 22 | library(dtrackr) 23 | tmp = iris \%>\% dplyr::group_by(Species) 24 | tmp \%>\% dplyr::summarise(long_ones = p_count_if(Petal.Length > 4)) 25 | } 26 | -------------------------------------------------------------------------------- /man/p_count_subgroup.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{p_count_subgroup} 4 | \alias{p_count_subgroup} 5 | \title{Add a subgroup count to the dtrackr history graph} 6 | \usage{ 7 | p_count_subgroup( 8 | .data, 9 | .subgroup, 10 | ..., 11 | .messages = .defaultCountSubgroup(), 12 | .headline = .defaultHeadline(), 13 | .type = "info", 14 | .asOffshoot = FALSE, 15 | .tag = NULL, 16 | .maxsubgroups = .defaultMaxSupportedGroupings() 17 | ) 18 | } 19 | \arguments{ 20 | \item{.data}{a dataframe which may be grouped} 21 | 22 | \item{.subgroup}{a column with a small number of levels (e.g. a factor)} 23 | 24 | \item{...}{passed to \verb{base::factor(subgroup values, ...)} to allow reordering 25 | of levels etc.} 26 | 27 | \item{.messages}{a character vector of glue specifications. A glue 28 | specification can refer to anything from the calling environment, 29 | \{.subgroup\} for the subgroup column name and \{.name\} for the subgroup 30 | column value, \{.count\} for the subgroup column count, \{.subtotal\} for 31 | the current stratification grouping count and \{.total\} for the whole 32 | dataset count} 33 | 34 | \item{.headline}{a glue specification which can refer to grouping variables 35 | of .data, \{.subtotal\} for the current grouping count, or any variables 36 | defined in the calling environment} 37 | 38 | \item{.type}{one of "info","exclusion": used to define formatting} 39 | 40 | \item{.asOffshoot}{do you want this comment to be an offshoot of the main 41 | flow (default = FALSE).} 42 | 43 | \item{.tag}{if you want to use the summary data from this step in the future 44 | then give it a name with .tag.} 45 | 46 | \item{.maxsubgroups}{the maximum number of discrete values allowed in 47 | .subgroup is configurable with 48 | \code{options("dtrackr.max_supported_groupings"=XX)}. The default is 16. Large 49 | values produce unwieldy flow charts.} 50 | } 51 | \value{ 52 | the same .data dataframe with the history graph updated with a 53 | subgroup count as a new stage 54 | } 55 | \description{ 56 | A frequent use case for more detailed description is to have a subgroup count 57 | within a flowchart. This works best for factor subgroup columns but other 58 | data will be converted to a factor automatically. The count of the items in 59 | each subgroup is added as a new stage in the flowchart. 60 | } 61 | \examples{ 62 | library(dplyr) 63 | library(dtrackr) 64 | survival::cgd \%>\% track() \%>\% group_by(treat) \%>\% 65 | count_subgroup(center) \%>\% history() 66 | } 67 | -------------------------------------------------------------------------------- /man/p_distinct.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{p_distinct} 4 | \alias{p_distinct} 5 | \title{Distinct values of data} 6 | \usage{ 7 | p_distinct( 8 | .data, 9 | ..., 10 | .messages = "removing {.count.in-.count.out} duplicates", 11 | .headline = .defaultHeadline(), 12 | .tag = NULL 13 | ) 14 | } 15 | \arguments{ 16 | \item{.data}{A data frame, data frame extension (e.g. a tibble), or a 17 | lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for 18 | more details.} 19 | 20 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Optional variables to 21 | use when determining uniqueness. If there are multiple rows for a given 22 | combination of inputs, only the first row will be preserved. If omitted, 23 | will use all variables in the data frame. 24 | Named arguments passed on to \code{\link[dplyr:distinct]{dplyr::distinct}}\describe{ 25 | \item{\code{.keep_all}}{If \code{TRUE}, keep all variables in \code{.data}. 26 | If a combination of \code{...} is not distinct, this keeps the 27 | first row of values.} 28 | }} 29 | 30 | \item{.messages}{a set of glue specs. The glue code can use any global 31 | variable, or \{.strata\},\{.count.in\},and \{.count.out\}} 32 | 33 | \item{.headline}{a headline glue spec. The glue code can use any global 34 | variable, or \{.strata\},\{.count.in\},and \{.count.out\}} 35 | 36 | \item{.tag}{if you want the summary data from this step in the future then 37 | give it a name with .tag.} 38 | } 39 | \value{ 40 | the .data dataframe with distinct values and history graph updated. 41 | } 42 | \description{ 43 | Distinct acts in the same way as in \code{dplyr::distinct}. Prior to the operation 44 | the size of the group is calculated \{.count.in\} and after the operation the 45 | output size \{.count.out\} The group \{.strata\} is also available (if 46 | grouped) for reporting. See \code{\link[dplyr:distinct]{dplyr::distinct()}}. 47 | } 48 | \examples{ 49 | library(dplyr) 50 | library(dtrackr) 51 | 52 | tmp = bind_rows(iris \%>\% track(), iris \%>\% track() \%>\% filter(Petal.Length > 5)) 53 | tmp \%>\% group_by(Species) \%>\% distinct() \%>\% history() 54 | } 55 | \seealso{ 56 | dplyr::distinct() 57 | } 58 | -------------------------------------------------------------------------------- /man/p_excluded.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{p_excluded} 4 | \alias{p_excluded} 5 | \title{Get the dtrackr excluded data record} 6 | \usage{ 7 | p_excluded(.data, simplify = TRUE) 8 | } 9 | \arguments{ 10 | \item{.data}{a dataframe which may be grouped} 11 | 12 | \item{simplify}{return a single summary dataframe of all exclusions.} 13 | } 14 | \value{ 15 | a new dataframe of the excluded data up to this point in the workflow. This dataframe is by default flattened, but if \code{.simplify=FALSE} has a nested structure containing records excluded at each part of the pipeline. 16 | } 17 | \description{ 18 | Get the dtrackr excluded data record 19 | } 20 | \examples{ 21 | library(dplyr) 22 | library(dtrackr) 23 | tmp = iris \%>\% track() \%>\% capture_exclusions() 24 | tmp \%>\% exclude_all( 25 | Petal.Length > 5.8 ~ "{.excluded} long ones", 26 | Petal.Length < 1.3 ~ "{.excluded} short ones", 27 | .stage = "petal length exclusion" 28 | ) \%>\% excluded() 29 | } 30 | -------------------------------------------------------------------------------- /man/p_filter.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{p_filter} 4 | \alias{p_filter} 5 | \title{Filtering data} 6 | \usage{ 7 | p_filter( 8 | .data, 9 | ..., 10 | .messages = "excluded {.excluded} items", 11 | .headline = .defaultHeadline(), 12 | .type = "exclusion", 13 | .asOffshoot = (.type == "exclusion"), 14 | .stage = (if (is.null(.tag)) "" else .tag), 15 | .tag = NULL 16 | ) 17 | } 18 | \arguments{ 19 | \item{.data}{A data frame, data frame extension (e.g. a tibble), or a 20 | lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for 21 | more details.} 22 | 23 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Expressions that 24 | return a logical value, and are defined in terms of the variables in 25 | \code{.data}. If multiple expressions are included, they are combined with the 26 | \code{&} operator. Only rows for which all conditions evaluate to \code{TRUE} are 27 | kept. 28 | Named arguments passed on to \code{\link[dplyr:filter]{dplyr::filter}}\describe{ 29 | \item{\code{.by}}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} 30 | 31 | <\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Optionally, a selection of columns to 32 | group by for just this operation, functioning as an alternative to \code{\link[dplyr:group_by]{group_by()}}. For 33 | details and examples, see \link[dplyr:dplyr_by]{?dplyr_by}.} 34 | \item{\code{.preserve}}{Relevant when the \code{.data} input is grouped. 35 | If \code{.preserve = FALSE} (the default), the grouping structure 36 | is recalculated based on the resulting data, otherwise the grouping is kept as is.} 37 | }} 38 | 39 | \item{.messages}{a set of glue specs. The glue code can use any global 40 | variable, or \{.strata\},\{.count.in\},and \{.count.out\}} 41 | 42 | \item{.headline}{a headline glue spec. The glue code can use any global 43 | variable, or \{.strata\},\{.count.in\},and \{.count.out\}} 44 | 45 | \item{.type}{the format type of the action typically an exclusion} 46 | 47 | \item{.asOffshoot}{if the type is exclusion, \code{.asOffshoot} places the 48 | information box outside of the main flow, as an exclusion.} 49 | 50 | \item{.stage}{a name for this step in the pathway} 51 | 52 | \item{.tag}{if you want the summary data from this step in the future then 53 | give it a name with \code{.tag}.} 54 | } 55 | \value{ 56 | the filtered \code{.data} dataframe with history graph updated 57 | } 58 | \description{ 59 | Filter acts in the same way as in \code{dplyr} where predicates which evaluate to 60 | TRUE act to select items to include, and items for which the predicate cannot 61 | be evaluated are excluded. For tracking prior to the filter operation the 62 | size of each group is calculated \{.count.in\} and after the operation the 63 | output size of each group \{.count.out\}. The grouping \{.strata\} is also 64 | available (if grouped) for reporting. See \code{\link[dplyr:filter]{dplyr::filter()}}. 65 | } 66 | \examples{ 67 | library(dplyr) 68 | library(dtrackr) 69 | 70 | tmp = iris \%>\% track() \%>\% group_by(Species) 71 | tmp \%>\% filter(Petal.Length > 5) \%>\% history() 72 | } 73 | \seealso{ 74 | dplyr::filter() 75 | } 76 | -------------------------------------------------------------------------------- /man/p_flowchart.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{p_flowchart} 4 | \alias{p_flowchart} 5 | \title{Flowchart output} 6 | \usage{ 7 | p_flowchart( 8 | .data, 9 | filename = NULL, 10 | size = std_size$full, 11 | maxWidth = size$width, 12 | maxHeight = size$height, 13 | formats = c("dot", "png", "pdf", "svg"), 14 | defaultToHTML = TRUE, 15 | landscape = size$rot != 0, 16 | ... 17 | ) 18 | } 19 | \arguments{ 20 | \item{.data}{the tracked dataframe(s) either as a single dataframe or as a 21 | list of dataframes.} 22 | 23 | \item{filename}{a file name which will be where the formatted flowcharts are 24 | saved. If no extension is specified the output formats are determined by 25 | the \code{formats} parameter.} 26 | 27 | \item{size}{a named list with 3 elements, length and width in inches and 28 | rotation. A predefined set of standard sizes are available in the 29 | \link{std_size} object.} 30 | 31 | \item{maxWidth}{a width (on the paper) in inches if \code{size} is not defined} 32 | 33 | \item{maxHeight}{a height (on the paper) in inches if \code{size} is not defined} 34 | 35 | \item{formats}{some of \code{pdf},\code{dot},\code{svg},\code{png},\code{ps}} 36 | 37 | \item{defaultToHTML}{if the correct output format is not easy to determine 38 | from the context, default providing \code{HTML} (TRUE) or to embedding the \code{PNG} (FALSE)} 39 | 40 | \item{landscape}{rotate the output by 270 degrees into a landscape format. 41 | \code{maxWidth} and \code{maxHeight} still apply and refer to the paper width to fit 42 | the flowchart into after rotation. (you might need to flip width and height)} 43 | 44 | \item{...}{other parameters passed onto either \code{p_get_as_dot()}, notable ones are 45 | \code{fill} (background colour e.g. \code{lightgrey}), \code{fontsize} (in points), 46 | \code{colour} (font colour)} 47 | } 48 | \value{ 49 | the nature of the flowchart output depends on the context in which 50 | the function is called. It will be some form of browse-able html output if 51 | called from an interactive session or a \code{PNG}/\code{PDF} link if in \code{knitr} and 52 | knitting latex or word type outputs, if file name is specified the output 53 | will also be saved at the given location. 54 | } 55 | \description{ 56 | Generate a flowchart of the history of the dataframe(s), with all the tracked 57 | data pipeline as stages in the flowchart. Multiple dataframes can be plotted 58 | together in which case an attempt is made to determine which parts are 59 | common. 60 | } 61 | \examples{ 62 | library(dplyr) 63 | library(dtrackr) 64 | 65 | tmp = iris \%>\% track() \%>\% comment(.tag = "step1") \%>\% filter(Species!="versicolor") 66 | tmp \%>\% group_by(Species) \%>\% comment(.tag="step2") \%>\% flowchart() 67 | } 68 | -------------------------------------------------------------------------------- /man/p_get.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{p_get} 4 | \alias{p_get} 5 | \title{Get the dtrackr history graph} 6 | \usage{ 7 | p_get(.data) 8 | } 9 | \arguments{ 10 | \item{.data}{a dataframe which may be grouped} 11 | } 12 | \value{ 13 | the history graph. This is a list, of class \code{trackr_graph}, containing the following named items: 14 | \itemize{ 15 | \item excluded - the data items that have been excluded thus far as a nested dataframe 16 | \item tags - a dataframe of tag-value pairs containing the summary of the data at named points in the data flow (see \code{\link[=tagged]{tagged()}}) 17 | \item nodes - a dataframe of the nodes of the flow chart 18 | \item edges - an edge list (as a dataframe) of the relationships between the nodes in the flow chart 19 | \item head - the current most recent nodes added into the graph as a dataframe. 20 | } 21 | 22 | The format of this data may grow over time but these fields are unlikely to be changed. 23 | } 24 | \description{ 25 | This provides the raw history graph and is not really intended for mainstream use. 26 | The internal structure of the graph is explained below. print and plot S3 methods exist for 27 | the dtrackr history graph. 28 | } 29 | \examples{ 30 | library(dplyr) 31 | library(dtrackr) 32 | graph = iris \%>\% track() \%>\% comment("A comment") \%>\% history() 33 | print(graph) 34 | } 35 | -------------------------------------------------------------------------------- /man/p_get_as_dot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{p_get_as_dot} 4 | \alias{p_get_as_dot} 5 | \title{DOT output} 6 | \usage{ 7 | p_get_as_dot(.data, fill = "lightgrey", fontsize = "8", colour = "black", ...) 8 | } 9 | \arguments{ 10 | \item{.data}{the tracked dataframe} 11 | 12 | \item{fill}{the default node fill colour} 13 | 14 | \item{fontsize}{the default font size} 15 | 16 | \item{colour}{the default font colour} 17 | 18 | \item{...}{not used} 19 | } 20 | \value{ 21 | a representation of the history graph in \code{Graphviz} dot format. 22 | } 23 | \description{ 24 | (advance usage) outputs a \code{dtrackr} history graph as a DOT string for rendering with \code{Graphviz} 25 | } 26 | \examples{ 27 | library(dplyr) 28 | library(dtrackr) 29 | 30 | tmp = iris \%>\% track() \%>\% comment(.tag = "step1") \%>\% filter(Species!="versicolor") 31 | dot = tmp \%>\% group_by(Species) \%>\% comment(.tag="step2") \%>\% p_get_as_dot() 32 | cat(dot) 33 | } 34 | -------------------------------------------------------------------------------- /man/p_group_by.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{p_group_by} 4 | \alias{p_group_by} 5 | \title{Stratifying your analysis} 6 | \usage{ 7 | p_group_by( 8 | .data, 9 | ..., 10 | .messages = "stratify by {.cols}", 11 | .headline = NULL, 12 | .tag = NULL, 13 | .maxgroups = .defaultMaxSupportedGroupings() 14 | ) 15 | } 16 | \arguments{ 17 | \item{.data}{A data frame, data frame extension (e.g. a tibble), or a 18 | lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for 19 | more details.} 20 | 21 | \item{...}{In \code{group_by()}, variables or computations to group by. 22 | Computations are always done on the ungrouped data frame. 23 | To perform computations on the grouped data, you need to use 24 | a separate \code{mutate()} step before the \code{group_by()}. 25 | Computations are not allowed in \code{nest_by()}. 26 | In \code{ungroup()}, variables to remove from the grouping. 27 | Named arguments passed on to \code{\link[dplyr:group_by]{dplyr::group_by}}\describe{ 28 | \item{\code{.add}}{When \code{FALSE}, the default, \code{group_by()} will 29 | override existing groups. To add to the existing groups, use 30 | \code{.add = TRUE}. 31 | 32 | This argument was previously called \code{add}, but that prevented 33 | creating a new grouping variable called \code{add}, and conflicts with 34 | our naming conventions.} 35 | \item{\code{.drop}}{Drop groups formed by factor levels that don't appear in the 36 | data? The default is \code{TRUE} except when \code{.data} has been previously 37 | grouped with \code{.drop = FALSE}. See \code{\link[dplyr:group_by_drop_default]{group_by_drop_default()}} for details.} 38 | \item{\code{x}}{A \code{\link[dplyr:tbl]{tbl()}}} 39 | }} 40 | 41 | \item{.messages}{a set of glue specs. The glue code can use any global 42 | variable, or \{.cols\} which is the columns that are being grouped by.} 43 | 44 | \item{.headline}{a headline glue spec. The glue code can use any global 45 | variable, or \{.cols\}.} 46 | 47 | \item{.tag}{if you want the summary data from this step in the future then 48 | give it a name with .tag.} 49 | 50 | \item{.maxgroups}{the maximum number of subgroups allowed before the tracking 51 | is paused.} 52 | } 53 | \value{ 54 | the .data but grouped. 55 | } 56 | \description{ 57 | Grouping a data set acts in the normal way. When tracking a dataframe 58 | sometimes a \code{group_by()} operation will create a lot of groups. This happens 59 | for example if you are doing a \code{group_by()}, \code{summarise()} step that is 60 | aggregating data on a fine scale, e.g. by day in a time-series. This is 61 | generally a terrible idea when tracking a dataframe as the resulting 62 | flowchart will have many many branches and be illegible. \code{dtrackr} will detect this issue and 63 | pause tracking the dataframe with a warning. It is up to the user to the 64 | \code{resume()} tracking when the large number of groups have been resolved e.g. 65 | using a \code{dplyr::ungroup()}. This limit is configurable with 66 | \code{options("dtrackr.max_supported_groupings"=XX)}. The default is 16. See 67 | \code{\link[dplyr:group_by]{dplyr::group_by()}}. 68 | } 69 | \examples{ 70 | library(dplyr) 71 | library(dtrackr) 72 | 73 | tmp = iris \%>\% track() \%>\% group_by(Species, .messages="stratify by {.cols}") 74 | tmp \%>\% comment("{.strata}") \%>\% history() 75 | } 76 | \seealso{ 77 | dplyr::group_by() 78 | } 79 | -------------------------------------------------------------------------------- /man/p_group_modify.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{p_group_modify} 4 | \alias{p_group_modify} 5 | \title{Group-wise modification of data and complex operations} 6 | \usage{ 7 | p_group_modify( 8 | .data, 9 | ..., 10 | .messages = NULL, 11 | .headline = .defaultHeadline(), 12 | .type = "modify", 13 | .tag = NULL 14 | ) 15 | } 16 | \arguments{ 17 | \item{.data}{A grouped tibble} 18 | 19 | \item{...}{Additional arguments passed on to \code{.f} 20 | Named arguments passed on to \code{\link[dplyr:group_map]{dplyr::group_modify}}\describe{ 21 | \item{\code{.f}}{A function or formula to apply to each group. 22 | 23 | If a \strong{function}, it is used as is. It should have at least 2 formal arguments. 24 | 25 | If a \strong{formula}, e.g. \code{~ head(.x)}, it is converted to a function. 26 | 27 | In the formula, you can use 28 | \itemize{ 29 | \item \code{.} or \code{.x} to refer to the subset of rows of \code{.tbl} 30 | for the given group 31 | \item \code{.y} to refer to the key, a one row tibble with one column per grouping variable 32 | that identifies the group 33 | }} 34 | \item{\code{.keep}}{are the grouping variables kept in \code{.x}} 35 | }} 36 | 37 | \item{.messages}{a set of glue specs. The glue code can use any global 38 | variable, or \{.strata\},\{.count.in\},and \{.count.out\}} 39 | 40 | \item{.headline}{a headline glue spec. The glue code can use any global 41 | variable, or \{.strata\},\{.count.in\},and \{.count.out\}} 42 | 43 | \item{.type}{default "modify": used to define formatting} 44 | 45 | \item{.tag}{if you want the summary data from this step in the future then 46 | give it a name with .tag.} 47 | } 48 | \value{ 49 | the transformed .data dataframe with the history graph updated. 50 | } 51 | \description{ 52 | Group modifying a data set acts in the normal way. The internal mechanics of 53 | the modify function are opaque to the history. This means these can be used 54 | to wrap any unsupported operation without losing the history (e.g. \code{df \%>\% track() \%>\% group_modify(function(d,...) { d \%>\% unsupported_operation() })} 55 | ) Prior to the operation the size of the group is calculated \{.count.in\} 56 | and after the operation the output size \{.count.out\} The group \{.strata\} 57 | is also available (if grouped) for reporting See \code{\link[dplyr:group_map]{dplyr::group_modify()}}. 58 | } 59 | \examples{ 60 | library(dplyr) 61 | library(dtrackr) 62 | 63 | tmp = iris \%>\% track() \%>\% group_by(Species) 64 | tmp \%>\% group_modify( 65 | function(d,g,...) { return(tibble::tibble(x=runif(10))) }, 66 | .messages="{.count.in} in, {.count.out} out" 67 | ) \%>\% history() 68 | } 69 | \seealso{ 70 | dplyr::group_modify() 71 | } 72 | -------------------------------------------------------------------------------- /man/p_intersect.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{p_intersect} 4 | \alias{p_intersect} 5 | \title{Set operations} 6 | \usage{ 7 | p_intersect( 8 | x, 9 | y, 10 | ..., 11 | .messages = "{.count.out} in intersection", 12 | .headline = "Intersection" 13 | ) 14 | } 15 | \arguments{ 16 | \item{x, y}{Vectors to combine.} 17 | 18 | \item{...}{a collection of tracked data frames to combine} 19 | 20 | \item{.messages}{a set of glue specs. The glue code can use any global 21 | variable, or \{.count.out\}} 22 | 23 | \item{.headline}{a glue spec. The glue code can use any global variable, or 24 | \{.count.out\}} 25 | } 26 | \value{ 27 | the dplyr output with the history graph updated. 28 | } 29 | \description{ 30 | These perform set operations on tracked dataframes. It merges the history 31 | of 2 (or more) dataframes and combines the rows (or columns). It calculates the total number of 32 | resulting rows as \{.count.out\} in other terms it performs exactly the same 33 | operation as the equivalent \code{dplyr} operation. See \code{\link[dplyr:bind_rows]{dplyr::bind_rows()}}, 34 | \code{\link[dplyr:bind_cols]{dplyr::bind_cols()}}, \code{\link[dplyr:setops]{dplyr::intersect()}}, \code{\link[dplyr:setops]{dplyr::union()}}, 35 | \code{\link[dplyr:setops]{dplyr::setdiff()}},\code{\link[dplyr:setops]{dplyr::intersect()}}, or \code{\link[dplyr:setops]{dplyr::union_all()}} for the 36 | underlying function details. 37 | } 38 | \examples{ 39 | library(dplyr) 40 | library(dtrackr) 41 | 42 | # Set operations 43 | people = starwars \%>\% select(-films, -vehicles, -starships) 44 | chrs = people \%>\% track("start") 45 | 46 | lhs = chrs \%>\% include_any( 47 | species == "Human" ~ "{.included} humans", 48 | species == "Droid" ~ "{.included} droids" 49 | ) 50 | 51 | # these are different subsets of the same data 52 | rhs = chrs \%>\% include_any( 53 | species == "Human" ~ "{.included} humans", 54 | species == "Gungan" ~ "{.included} gungans" 55 | ) \%>\% comment("{.count} gungans & humans") 56 | 57 | 58 | # Unions 59 | set = bind_rows(lhs,rhs) \%>\% comment("{.count} 2*human,droids and gungans") 60 | # display the history of the result: 61 | set \%>\% history() 62 | nrow(set) 63 | # not run - display the flowchart: 64 | # set \%>\% flowchart() 65 | 66 | set = union(lhs,rhs) \%>\% comment("{.count} human,droids and gungans") 67 | # display the history of the result: 68 | set \%>\% history() 69 | nrow(set) 70 | # not run - display the flowchart: 71 | # set \%>\% flowchart() 72 | 73 | set = union_all(lhs,rhs) \%>\% comment("{.count} 2*human,droids and gungans") 74 | # display the history of the result: 75 | set \%>\% history() 76 | nrow(set) 77 | # not run - display the flowchart: 78 | # set \%>\% flowchart() 79 | 80 | # Intersections and differences 81 | 82 | set = setdiff(lhs,rhs) \%>\% comment("{.count} droids and gungans") 83 | # display the history of the result: 84 | set \%>\% history() 85 | nrow(set) 86 | # not run - display the flowchart: 87 | # set \%>\% flowchart() 88 | 89 | set = intersect(lhs,rhs) \%>\% comment("{.count} humans") 90 | # display the history of the result: 91 | set \%>\% history() 92 | nrow(set) 93 | # not run - display the flowchart: 94 | # set \%>\% flowchart() 95 | } 96 | \seealso{ 97 | generics::intersect() 98 | } 99 | -------------------------------------------------------------------------------- /man/p_pause.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{p_pause} 4 | \alias{p_pause} 5 | \title{Pause tracking the data frame.} 6 | \usage{ 7 | p_pause(.data, auto = FALSE) 8 | } 9 | \arguments{ 10 | \item{.data}{a tracked dataframe} 11 | 12 | \item{auto}{if \code{TRUE} the tracking will resume automatically when the 13 | number of groups has fallen to a sensible level (default is \code{FALSE})?} 14 | } 15 | \value{ 16 | the .data dataframe with history graph tracking paused 17 | } 18 | \description{ 19 | Pausing tracking of a data frame may be required if an operation is about to 20 | be performed that creates a lot of groupings or that you otherwise don't 21 | want to pollute the history graph (e.g. maybe selecting something using 22 | an anti-join). Once paused the history is not updated until a \code{resume()} is 23 | called, or when the data frame is ungrouped (if \code{auto} is enabled). 24 | } 25 | \examples{ 26 | iris \%>\% track() \%>\% pause() \%>\% history() 27 | } 28 | -------------------------------------------------------------------------------- /man/p_reframe.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{p_reframe} 4 | \alias{p_reframe} 5 | \title{Summarise a data set} 6 | \usage{ 7 | p_reframe(.data, ..., .messages = "", .headline = "", .tag = NULL) 8 | } 9 | \arguments{ 10 | \item{.data}{A data frame, data frame extension (e.g. a tibble), or a 11 | lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for 12 | more details.} 13 | 14 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Name-value pairs of 15 | summary functions. The name will be the name of the variable in the result. 16 | 17 | The value can be: 18 | \itemize{ 19 | \item A vector of length 1, e.g. \code{min(x)}, \code{n()}, or \code{sum(is.na(y))}. 20 | \item A data frame, to add multiple columns from a single expression. 21 | } 22 | 23 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Returning values with size 0 or >1 was 24 | deprecated as of 1.1.0. Please use \code{\link[dplyr:reframe]{reframe()}} for this instead. 25 | Named arguments passed on to \code{\link[dplyr:reframe]{dplyr::reframe}}\describe{ 26 | \item{\code{.by}}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} 27 | 28 | <\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Optionally, a selection of columns to 29 | group by for just this operation, functioning as an alternative to \code{\link[dplyr:group_by]{group_by()}}. For 30 | details and examples, see \link[dplyr:dplyr_by]{?dplyr_by}.} 31 | }} 32 | 33 | \item{.messages}{a set of glue specs. The glue code can use any summary 34 | variable defined in the ... parameter, or any global variable, or 35 | \{.strata\}} 36 | 37 | \item{.headline}{a headline glue spec. The glue code can use any summary 38 | variable defined in the ... parameter, or any global variable, or 39 | \{.strata\}} 40 | 41 | \item{.tag}{if you want the summary data from this step in the future then 42 | give it a name with .tag.} 43 | } 44 | \value{ 45 | the .data dataframe summarised with the history graph updated showing 46 | the summarise operation as a new stage 47 | } 48 | \description{ 49 | Summarising a data set acts in the normal \code{dplyr} manner to collapse groups 50 | to individual rows. Any columns resulting from the summary can be added to 51 | the history graph. In the history this also joins any stratified branches and 52 | allows you to generate some summary statistics about the un-grouped data. See 53 | \code{\link[dplyr:summarise]{dplyr::summarise()}}. 54 | } 55 | \examples{ 56 | library(dplyr) 57 | library(dtrackr) 58 | 59 | tmp = iris \%>\% group_by(Species) \%>\% track() 60 | tmp \%>\% reframe(tibble( 61 | param = c("mean","min","max"), 62 | value = c(mean(Petal.Length), min(Petal.Length), max(Petal.Length)) 63 | ), .messages="length {param}: {value}") \%>\% history() 64 | } 65 | \seealso{ 66 | dplyr::reframe() 67 | } 68 | -------------------------------------------------------------------------------- /man/p_resume.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{p_resume} 4 | \alias{p_resume} 5 | \title{Resume tracking the data frame.} 6 | \usage{ 7 | p_resume(.data, ...) 8 | } 9 | \arguments{ 10 | \item{.data}{a tracked dataframe} 11 | 12 | \item{...}{ 13 | Named arguments passed on to \code{\link[=p_group_by]{p_group_by}}\describe{ 14 | \item{\code{.messages}}{a set of glue specs. The glue code can use any global 15 | variable, or \{.cols\} which is the columns that are being grouped by.} 16 | \item{\code{.headline}}{a headline glue spec. The glue code can use any global 17 | variable, or \{.cols\}.} 18 | \item{\code{.tag}}{if you want the summary data from this step in the future then 19 | give it a name with .tag.} 20 | \item{\code{.maxgroups}}{the maximum number of subgroups allowed before the tracking 21 | is paused.} 22 | \item{\code{...}}{In \code{group_by()}, variables or computations to group by. 23 | Computations are always done on the ungrouped data frame. 24 | To perform computations on the grouped data, you need to use 25 | a separate \code{mutate()} step before the \code{group_by()}. 26 | Computations are not allowed in \code{nest_by()}. 27 | In \code{ungroup()}, variables to remove from the grouping.} 28 | }} 29 | } 30 | \value{ 31 | the .data data frame with history graph tracking resumed 32 | } 33 | \description{ 34 | This may reset the grouping of the tracked data if the grouping structure 35 | has changed since the data frame was paused. If you try and resume tracking a 36 | data frame with too many groups (as defined by \code{options("dtrackr.max_supported_groupings"=XX)}) 37 | then the resume will fail and the data frame will still be paused. This can 38 | be overridden by specifying a value for the \code{.maxgroups} parameter. 39 | } 40 | \examples{ 41 | library(dplyr) 42 | library(dtrackr) 43 | iris \%>\% track() \%>\% pause() \%>\% resume() \%>\% history() 44 | } 45 | -------------------------------------------------------------------------------- /man/p_select.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{p_select} 4 | \alias{p_select} 5 | \title{dplyr modifying operations} 6 | \usage{ 7 | p_select(.data, ..., .messages = "", .headline = "", .tag = NULL) 8 | } 9 | \arguments{ 10 | \item{.data}{A data frame, data frame extension (e.g. a tibble), or a 11 | lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for 12 | more details.} 13 | 14 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Name-value pairs. 15 | The name gives the name of the column in the output. 16 | 17 | The value can be: 18 | \itemize{ 19 | \item A vector of length 1, which will be recycled to the correct length. 20 | \item A vector the same length as the current group (or the whole data frame 21 | if ungrouped). 22 | \item \code{NULL}, to remove the column. 23 | \item A data frame or tibble, to create multiple columns in the output. 24 | }} 25 | 26 | \item{.messages}{a set of glue specs. The glue code can use any global 27 | variable, grouping variable, \{.new_cols\} or \{.dropped_cols\} for changes to 28 | columns, \{.cols\} for the output column names, or \{.strata\}. Defaults to nothing.} 29 | 30 | \item{.headline}{a headline glue spec. The glue code can use any global 31 | variable, grouping variable, \{.new_cols\}, \{.dropped_cols\}, \{.cols\} or \{.strata\}. 32 | Defaults to nothing.} 33 | 34 | \item{.tag}{if you want the summary data from this step in the future then 35 | give it a name with .tag.} 36 | } 37 | \value{ 38 | the \code{.data} dataframe after being modified by the \code{dplyr} equivalent 39 | function, but with the history graph updated with a new stage if the 40 | \code{.messages} or \code{.headline} parameter is not empty. 41 | } 42 | \description{ 43 | See \code{\link[dplyr:mutate]{dplyr::mutate()}}, \code{\link[dplyr:count]{dplyr::add_count()}}, \code{\link[dplyr:count]{dplyr::add_tally()}}, 44 | \code{\link[dplyr:transmute]{dplyr::transmute()}}, \code{\link[dplyr:select]{dplyr::select()}}, \code{\link[dplyr:relocate]{dplyr::relocate()}}, 45 | \code{\link[dplyr:rename]{dplyr::rename()}} \code{\link[dplyr:rename]{dplyr::rename_with()}}, \code{\link[dplyr:arrange]{dplyr::arrange()}} for more details 46 | on underlying functions. \code{dtrackr} provides equivalent functions for 47 | mutating, selecting and renaming a data set which act in the same way as 48 | \code{dplyr}. \code{mutate} / \code{select} / \code{rename} generally don't add anything in terms 49 | of provenance of data so the default behaviour is to miss these out of the 50 | \code{dtrackr} history. This can be overridden with the \code{.messages}, or 51 | \code{.headline} values in which case they behave just like a \code{comment()}. 52 | } 53 | \examples{ 54 | library(dplyr) 55 | library(dtrackr) 56 | 57 | # mutate and other functions are unitary operations that generally change 58 | # the structure but not size of a dataframe. In dtrackr these are by ignored 59 | # by default but we can change that so that their behaviour is obvious. 60 | 61 | # select 62 | # The output of the select verb (here using tidyselect syntax) can be captured 63 | # and here all column names are being reported with the .cols variable. 64 | iris \%>\% 65 | track() \%>\% 66 | group_by(Species) \%>\% 67 | select( 68 | tidyselect::starts_with("Sepal"), 69 | .messages="{.cols}", 70 | .headline="Output columns from select:") \%>\% 71 | history() 72 | } 73 | \seealso{ 74 | dplyr::select() 75 | } 76 | -------------------------------------------------------------------------------- /man/p_set.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{p_set} 4 | \alias{p_set} 5 | \title{Set the dtrackr history graph} 6 | \usage{ 7 | p_set(.data, .graph) 8 | } 9 | \arguments{ 10 | \item{.data}{a dataframe which may be grouped} 11 | 12 | \item{.graph}{a history graph list (consisting of nodes, edges, and head) see examples} 13 | } 14 | \value{ 15 | the .data dataframe with the history graph metadata set to the provided value 16 | } 17 | \description{ 18 | This is unlikely to be useful to an end user and is called automatically by many of the other 19 | functions here. On the off chance you need to copy history metadata from one dataframe to another 20 | } 21 | \examples{ 22 | library(dplyr) 23 | library(dtrackr) 24 | mtcars \%>\% p_set(iris \%>\% comment("A comment") \%>\% p_get()) \%>\% history() 25 | } 26 | -------------------------------------------------------------------------------- /man/p_setdiff.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{p_setdiff} 4 | \alias{p_setdiff} 5 | \title{Set operations} 6 | \usage{ 7 | p_setdiff( 8 | x, 9 | y, 10 | ..., 11 | .messages = "{.count.out} items in difference", 12 | .headline = "Difference" 13 | ) 14 | } 15 | \arguments{ 16 | \item{x, y}{Vectors to combine.} 17 | 18 | \item{...}{a collection of tracked data frames to combine} 19 | 20 | \item{.messages}{a set of glue specs. The glue code can use any global 21 | variable, or \{.count.out\}} 22 | 23 | \item{.headline}{a glue spec. The glue code can use any global variable, or 24 | \{.count.out\}} 25 | } 26 | \value{ 27 | the dplyr output with the history graph updated. 28 | } 29 | \description{ 30 | These perform set operations on tracked dataframes. It merges the history 31 | of 2 (or more) dataframes and combines the rows (or columns). It calculates the total number of 32 | resulting rows as \{.count.out\} in other terms it performs exactly the same 33 | operation as the equivalent \code{dplyr} operation. See \code{\link[dplyr:bind_rows]{dplyr::bind_rows()}}, 34 | \code{\link[dplyr:bind_cols]{dplyr::bind_cols()}}, \code{\link[dplyr:setops]{dplyr::intersect()}}, \code{\link[dplyr:setops]{dplyr::union()}}, 35 | \code{\link[dplyr:setops]{dplyr::setdiff()}},\code{\link[dplyr:setops]{dplyr::intersect()}}, or \code{\link[dplyr:setops]{dplyr::union_all()}} for the 36 | underlying function details. 37 | } 38 | \examples{ 39 | library(dplyr) 40 | library(dtrackr) 41 | 42 | # Set operations 43 | people = starwars \%>\% select(-films, -vehicles, -starships) 44 | chrs = people \%>\% track("start") 45 | 46 | lhs = chrs \%>\% include_any( 47 | species == "Human" ~ "{.included} humans", 48 | species == "Droid" ~ "{.included} droids" 49 | ) 50 | 51 | # these are different subsets of the same data 52 | rhs = chrs \%>\% include_any( 53 | species == "Human" ~ "{.included} humans", 54 | species == "Gungan" ~ "{.included} gungans" 55 | ) \%>\% comment("{.count} gungans & humans") 56 | 57 | 58 | # Unions 59 | set = bind_rows(lhs,rhs) \%>\% comment("{.count} 2*human,droids and gungans") 60 | # display the history of the result: 61 | set \%>\% history() 62 | nrow(set) 63 | # not run - display the flowchart: 64 | # set \%>\% flowchart() 65 | 66 | set = union(lhs,rhs) \%>\% comment("{.count} human,droids and gungans") 67 | # display the history of the result: 68 | set \%>\% history() 69 | nrow(set) 70 | # not run - display the flowchart: 71 | # set \%>\% flowchart() 72 | 73 | set = union_all(lhs,rhs) \%>\% comment("{.count} 2*human,droids and gungans") 74 | # display the history of the result: 75 | set \%>\% history() 76 | nrow(set) 77 | # not run - display the flowchart: 78 | # set \%>\% flowchart() 79 | 80 | # Intersections and differences 81 | 82 | set = setdiff(lhs,rhs) \%>\% comment("{.count} droids and gungans") 83 | # display the history of the result: 84 | set \%>\% history() 85 | nrow(set) 86 | # not run - display the flowchart: 87 | # set \%>\% flowchart() 88 | 89 | set = intersect(lhs,rhs) \%>\% comment("{.count} humans") 90 | # display the history of the result: 91 | set \%>\% history() 92 | nrow(set) 93 | # not run - display the flowchart: 94 | # set \%>\% flowchart() 95 | } 96 | \seealso{ 97 | dplyr::setdiff() 98 | } 99 | -------------------------------------------------------------------------------- /man/p_status.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{p_status} 4 | \alias{p_status} 5 | \title{Add a summary to the dtrackr history graph} 6 | \usage{ 7 | p_status( 8 | .data, 9 | ..., 10 | .messages = .defaultMessage(), 11 | .headline = .defaultHeadline(), 12 | .type = "info", 13 | .asOffshoot = FALSE, 14 | .tag = NULL 15 | ) 16 | } 17 | \arguments{ 18 | \item{.data}{a dataframe which may be grouped} 19 | 20 | \item{...}{any normal dplyr::summarise specification, e.g. \code{count=n()} or 21 | \code{av=mean(x)}, etcetera.} 22 | 23 | \item{.messages}{a character vector of glue specifications. A glue 24 | specification can refer to the summary outputs, any grouping variables of 25 | .data, the \{.strata\}, or any variables defined in the calling environment} 26 | 27 | \item{.headline}{a glue specification which can refer to grouping variables 28 | of .data, or any variables defined in the calling environment} 29 | 30 | \item{.type}{one of "info","exclusion": used to define formatting} 31 | 32 | \item{.asOffshoot}{do you want this comment to be an offshoot of the main 33 | flow (default = FALSE).} 34 | 35 | \item{.tag}{if you want the summary data from this step in the future then 36 | give it a name with .tag.} 37 | } 38 | \value{ 39 | the same .data dataframe with the history metadata updated with the 40 | status inserted as a new stage 41 | } 42 | \description{ 43 | In the middle of a pipeline you may wish to document something about the data 44 | that is more complex than the simple counts. \code{status} is essentially a 45 | \code{dplyr} summarisation step which is connected to a \code{glue} specification 46 | output, that is recorded in the data frame history. This means you can do an 47 | arbitrary interim summarisation and put the result into the flowchart without 48 | disrupting the pipeline flow. 49 | } 50 | \details{ 51 | Because of the ... summary specification parameters MUST BE NAMED. 52 | } 53 | \examples{ 54 | library(dplyr) 55 | library(dtrackr) 56 | tmp = iris \%>\% track() \%>\% group_by(Species) 57 | tmp \%>\% status( 58 | long = p_count_if(Petal.Length>5), 59 | short = p_count_if(Petal.Length<2), 60 | .messages="{Species}: {long} long ones & {short} short ones" 61 | ) \%>\% history() 62 | } 63 | -------------------------------------------------------------------------------- /man/p_tagged.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{p_tagged} 4 | \alias{p_tagged} 5 | \title{Retrieve tagged data in the history graph} 6 | \usage{ 7 | p_tagged(.data, .tag = NULL, .strata = NULL, .glue = NULL, ...) 8 | } 9 | \arguments{ 10 | \item{.data}{the tracked dataframe.} 11 | 12 | \item{.tag}{(optional) the tag to retrieve.} 13 | 14 | \item{.strata}{(optional) filter the tagged data by the strata. set to "" to filter just the top level ungrouped data.} 15 | 16 | \item{.glue}{(optional) a glue specification which will be applied to the tagged content to generate a \code{.label} for the tagged content.} 17 | 18 | \item{...}{(optional) any other named parameters will be passed to \code{glue::glue} and can be used to generate a label.} 19 | } 20 | \value{ 21 | various things depending on what is requested. 22 | 23 | By default a tibble with a \code{.tag} column and all associated summary values in a nested \code{.content} column. 24 | 25 | If a \code{.strata} column is specified the results are filtered to just those that match a given \code{.strata} grouping (i.e. this will be the grouping label on the flowchart). Ungrouped content will have an empty "" as \code{.strata} 26 | 27 | If \code{.tag} is specified the result will be for a single tag and \code{.content} will be automatically un-nested to give a single un-nested dataframe of the content captured at the \code{.tag} tagged step. 28 | This could be single or multiple rows depending on whether the original data was grouped at the point of tagging. 29 | 30 | If both the \code{.tag} and \code{.glue} is specified a \code{.label} column will be computed from \code{.glue} and the tagged content. If the result of this is a single row then just the string value of \code{.label} is returned. 31 | 32 | If just the \code{.glue} is specified, an un-nested dataframe with \code{.tag},\code{.strata} and \code{.label} columns with a label for each tag in each strata. 33 | 34 | If this seems complex then the best thing is to experiment until you get the output you want, leaving any \code{.glue} options until you think you know what you are doing. It made sense at the time. 35 | } 36 | \description{ 37 | Any counts at the individual stages that was stored with a \code{.tag} option in a pipeline step can be recovered here. The idea here is to provide a quick way to access a single value 38 | for the counts or other details tagged in a pipeline into a format that can be reported in text of a document. (e.g. for a results section). For more examples the consort statement vignette 39 | has some examples of use. 40 | } 41 | \examples{ 42 | library(dplyr) 43 | library(dtrackr) 44 | tmp = iris \%>\% track() \%>\% comment(.tag = "step1") 45 | tmp = tmp \%>\% filter(Species!="versicolor") \%>\% group_by(Species) 46 | tmp \%>\% comment(.tag="step2") \%>\% tagged(.glue = "{.count}/{.total}") 47 | } 48 | -------------------------------------------------------------------------------- /man/p_track.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{p_track} 4 | \alias{p_track} 5 | \title{Start tracking the dtrackr history graph} 6 | \usage{ 7 | p_track( 8 | .data, 9 | .messages = .defaultMessage(), 10 | .headline = .defaultHeadline(), 11 | .tag = NULL 12 | ) 13 | } 14 | \arguments{ 15 | \item{.data}{a dataframe which may be grouped} 16 | 17 | \item{.messages}{a character vector of glue specifications. A glue 18 | specification can refer to any grouping variables of .data, or any 19 | variables defined in the calling environment, the \{.total\} variable which 20 | is the count of all rows, the \{.count\} variable which is the count of 21 | rows in the current group and the \{.strata\} which describes the current 22 | group. Defaults to the value of \code{getOption("dtrackr.default_message")}.} 23 | 24 | \item{.headline}{a glue specification which can refer to grouping variables 25 | of .data, or any variables defined in the calling environment, or the 26 | \{.total\} variable which is \code{nrow(.data)}, or \{.strata\} a summary of the 27 | current group. Defaults to the value of \code{getOption("dtrackr.default_headline")}.} 28 | 29 | \item{.tag}{if you want the summary data from this step in the future then 30 | give it a name with .tag.} 31 | } 32 | \value{ 33 | the .data dataframe with additional history graph metadata, to allow 34 | tracking. 35 | } 36 | \description{ 37 | Start tracking the dtrackr history graph 38 | } 39 | \examples{ 40 | library(dplyr) 41 | library(dtrackr) 42 | iris \%>\% track() \%>\% history() 43 | } 44 | -------------------------------------------------------------------------------- /man/p_ungroup.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{p_ungroup} 4 | \alias{p_ungroup} 5 | \title{Remove a stratification from a data set} 6 | \usage{ 7 | p_ungroup( 8 | x, 9 | ..., 10 | .messages = .defaultMessage(), 11 | .headline = .defaultHeadline(), 12 | .tag = NULL 13 | ) 14 | } 15 | \arguments{ 16 | \item{x}{A \code{\link[dplyr:tbl]{tbl()}}} 17 | 18 | \item{...}{variables to remove from the grouping.} 19 | 20 | \item{.messages}{a set of glue specs. The glue code can use any any global 21 | variable, or \{.count\}. the default is "total \{.count\} items"} 22 | 23 | \item{.headline}{a headline glue spec. The glue code can use \{.count\} and 24 | \{.strata\}.} 25 | 26 | \item{.tag}{if you want the summary data from this step in the future then 27 | give it a name with .tag.} 28 | } 29 | \value{ 30 | the .data dataframe but ungrouped with the history graph 31 | updated showing the ungroup operation as a new stage. 32 | } 33 | \description{ 34 | Un-grouping a data set logically combines the different arms. In the history 35 | this joins any stratified branches and acts as a specific type of \code{\link[=status]{status()}}, 36 | allowing you to generate some summary statistics about the un-grouped data. 37 | See \code{\link[dplyr:group_by]{dplyr::ungroup()}}. 38 | } 39 | \examples{ 40 | library(dplyr) 41 | library(dtrackr) 42 | 43 | tmp = iris \%>\% group_by(Species) \%>\% comment("A test") 44 | tmp \%>\% ungroup(.messages="{.count} items in combined") \%>\% history() 45 | } 46 | \seealso{ 47 | dplyr::ungroup() 48 | } 49 | -------------------------------------------------------------------------------- /man/p_union.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{p_union} 4 | \alias{p_union} 5 | \title{Set operations} 6 | \usage{ 7 | p_union( 8 | x, 9 | y, 10 | ..., 11 | .messages = "{.count.out} unique items in union", 12 | .headline = "Distinct union" 13 | ) 14 | } 15 | \arguments{ 16 | \item{x, y}{Vectors to combine.} 17 | 18 | \item{...}{a collection of tracked data frames to combine} 19 | 20 | \item{.messages}{a set of glue specs. The glue code can use any global 21 | variable, or \{.count.out\}} 22 | 23 | \item{.headline}{a glue spec. The glue code can use any global variable, or 24 | \{.count.out\}} 25 | } 26 | \value{ 27 | the dplyr output with the history graph updated. 28 | } 29 | \description{ 30 | These perform set operations on tracked dataframes. It merges the history 31 | of 2 (or more) dataframes and combines the rows (or columns). It calculates the total number of 32 | resulting rows as \{.count.out\} in other terms it performs exactly the same 33 | operation as the equivalent \code{dplyr} operation. See \code{\link[dplyr:bind_rows]{dplyr::bind_rows()}}, 34 | \code{\link[dplyr:bind_cols]{dplyr::bind_cols()}}, \code{\link[dplyr:setops]{dplyr::intersect()}}, \code{\link[dplyr:setops]{dplyr::union()}}, 35 | \code{\link[dplyr:setops]{dplyr::setdiff()}},\code{\link[dplyr:setops]{dplyr::intersect()}}, or \code{\link[dplyr:setops]{dplyr::union_all()}} for the 36 | underlying function details. 37 | } 38 | \examples{ 39 | library(dplyr) 40 | library(dtrackr) 41 | 42 | # Set operations 43 | people = starwars \%>\% select(-films, -vehicles, -starships) 44 | chrs = people \%>\% track("start") 45 | 46 | lhs = chrs \%>\% include_any( 47 | species == "Human" ~ "{.included} humans", 48 | species == "Droid" ~ "{.included} droids" 49 | ) 50 | 51 | # these are different subsets of the same data 52 | rhs = chrs \%>\% include_any( 53 | species == "Human" ~ "{.included} humans", 54 | species == "Gungan" ~ "{.included} gungans" 55 | ) \%>\% comment("{.count} gungans & humans") 56 | 57 | 58 | # Unions 59 | set = bind_rows(lhs,rhs) \%>\% comment("{.count} 2*human,droids and gungans") 60 | # display the history of the result: 61 | set \%>\% history() 62 | nrow(set) 63 | # not run - display the flowchart: 64 | # set \%>\% flowchart() 65 | 66 | set = union(lhs,rhs) \%>\% comment("{.count} human,droids and gungans") 67 | # display the history of the result: 68 | set \%>\% history() 69 | nrow(set) 70 | # not run - display the flowchart: 71 | # set \%>\% flowchart() 72 | 73 | set = union_all(lhs,rhs) \%>\% comment("{.count} 2*human,droids and gungans") 74 | # display the history of the result: 75 | set \%>\% history() 76 | nrow(set) 77 | # not run - display the flowchart: 78 | # set \%>\% flowchart() 79 | 80 | # Intersections and differences 81 | 82 | set = setdiff(lhs,rhs) \%>\% comment("{.count} droids and gungans") 83 | # display the history of the result: 84 | set \%>\% history() 85 | nrow(set) 86 | # not run - display the flowchart: 87 | # set \%>\% flowchart() 88 | 89 | set = intersect(lhs,rhs) \%>\% comment("{.count} humans") 90 | # display the history of the result: 91 | set \%>\% history() 92 | nrow(set) 93 | # not run - display the flowchart: 94 | # set \%>\% flowchart() 95 | } 96 | \seealso{ 97 | generics::union() 98 | } 99 | -------------------------------------------------------------------------------- /man/p_untrack.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{p_untrack} 4 | \alias{p_untrack} 5 | \title{Remove tracking from the dataframe} 6 | \usage{ 7 | p_untrack(.data) 8 | } 9 | \arguments{ 10 | \item{.data}{a tracked dataframe} 11 | } 12 | \value{ 13 | the .data dataframe with history graph metadata removed. 14 | } 15 | \description{ 16 | Remove tracking from the dataframe 17 | } 18 | \examples{ 19 | library(dplyr) 20 | library(dtrackr) 21 | iris \%>\% track() \%>\% untrack() \%>\% class() 22 | } 23 | -------------------------------------------------------------------------------- /man/pause.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{pause} 4 | \alias{pause} 5 | \title{Pause tracking the data frame.} 6 | \usage{ 7 | pause(.data, auto = FALSE) 8 | } 9 | \arguments{ 10 | \item{.data}{a tracked dataframe} 11 | 12 | \item{auto}{if \code{TRUE} the tracking will resume automatically when the 13 | number of groups has fallen to a sensible level (default is \code{FALSE})?} 14 | } 15 | \value{ 16 | the .data dataframe with history graph tracking paused 17 | } 18 | \description{ 19 | Pausing tracking of a data frame may be required if an operation is about to 20 | be performed that creates a lot of groupings or that you otherwise don't 21 | want to pollute the history graph (e.g. maybe selecting something using 22 | an anti-join). Once paused the history is not updated until a \code{resume()} is 23 | called, or when the data frame is ungrouped (if \code{auto} is enabled). 24 | } 25 | \examples{ 26 | iris \%>\% track() \%>\% pause() \%>\% history() 27 | } 28 | -------------------------------------------------------------------------------- /man/pipe.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-pipe.R 3 | \name{\%>\%} 4 | \alias{\%>\%} 5 | \title{Pipe operator} 6 | \usage{ 7 | lhs \%>\% rhs 8 | } 9 | \arguments{ 10 | \item{lhs}{A value or the magrittr placeholder.} 11 | 12 | \item{rhs}{A function call using the magrittr semantics.} 13 | } 14 | \value{ 15 | The result of calling \code{rhs(lhs)}. 16 | } 17 | \description{ 18 | See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. 19 | } 20 | \keyword{internal} 21 | -------------------------------------------------------------------------------- /man/plot.trackr_graph.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{plot.trackr_graph} 4 | \alias{plot.trackr_graph} 5 | \title{Plots a history graph as html} 6 | \usage{ 7 | \method{plot}{trackr_graph}(x, fill = "lightgrey", fontsize = "8", colour = "black", ...) 8 | } 9 | \arguments{ 10 | \item{x}{a dtrackr history graph (e.g. output from \code{\link[=history]{history()}})} 11 | 12 | \item{fill}{the default node fill colour} 13 | 14 | \item{fontsize}{the default font size} 15 | 16 | \item{colour}{the default font colour} 17 | 18 | \item{...}{not used} 19 | } 20 | \value{ 21 | HTML displayed 22 | } 23 | \description{ 24 | Plots a history graph as html 25 | } 26 | \examples{ 27 | library(dplyr) 28 | library(dtrackr) 29 | iris \%>\% comment("hello {.total} rows") \%>\% history() \%>\% plot() 30 | } 31 | -------------------------------------------------------------------------------- /man/print.trackr_graph.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{print.trackr_graph} 4 | \alias{print.trackr_graph} 5 | \title{Print a history graph to the console} 6 | \usage{ 7 | \method{print}{trackr_graph}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{a dtrackr history graph (e.g. output from \code{\link[=p_get]{p_get()}})} 11 | 12 | \item{...}{not used} 13 | } 14 | \value{ 15 | nothing 16 | } 17 | \description{ 18 | Print a history graph to the console 19 | } 20 | \examples{ 21 | library(dplyr) 22 | library(dtrackr) 23 | iris \%>\% comment("hello {.total} rows") \%>\% history() \%>\% print() 24 | } 25 | -------------------------------------------------------------------------------- /man/reexports.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \docType{import} 4 | \name{reexports} 5 | \alias{reexports} 6 | \alias{filter} 7 | \title{Objects exported from other packages} 8 | \keyword{internal} 9 | \description{ 10 | These objects are imported from other packages. Follow the links 11 | below to see their documentation. 12 | 13 | \describe{ 14 | \item{dplyr}{\code{\link[dplyr]{filter}}} 15 | }} 16 | 17 | -------------------------------------------------------------------------------- /man/reframe.trackr_df.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{reframe.trackr_df} 4 | \alias{reframe.trackr_df} 5 | \title{Summarise a data set} 6 | \usage{ 7 | \method{reframe}{trackr_df}(.data, ..., .messages = "", .headline = "", .tag = NULL) 8 | } 9 | \arguments{ 10 | \item{.data}{A data frame, data frame extension (e.g. a tibble), or a 11 | lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for 12 | more details.} 13 | 14 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Name-value pairs of 15 | summary functions. The name will be the name of the variable in the result. 16 | 17 | The value can be: 18 | \itemize{ 19 | \item A vector of length 1, e.g. \code{min(x)}, \code{n()}, or \code{sum(is.na(y))}. 20 | \item A data frame, to add multiple columns from a single expression. 21 | } 22 | 23 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Returning values with size 0 or >1 was 24 | deprecated as of 1.1.0. Please use \code{\link[dplyr:reframe]{reframe()}} for this instead.} 25 | 26 | \item{.messages}{a set of glue specs. The glue code can use any summary 27 | variable defined in the ... parameter, or any global variable, or 28 | \{.strata\}} 29 | 30 | \item{.headline}{a headline glue spec. The glue code can use any summary 31 | variable defined in the ... parameter, or any global variable, or 32 | \{.strata\}} 33 | 34 | \item{.tag}{if you want the summary data from this step in the future then 35 | give it a name with .tag.} 36 | } 37 | \value{ 38 | the .data dataframe summarised with the history graph updated showing 39 | the summarise operation as a new stage 40 | } 41 | \description{ 42 | Summarising a data set acts in the normal \code{dplyr} manner to collapse groups 43 | to individual rows. Any columns resulting from the summary can be added to 44 | the history graph. In the history this also joins any stratified branches and 45 | allows you to generate some summary statistics about the un-grouped data. See 46 | \code{\link[dplyr:summarise]{dplyr::summarise()}}. 47 | } 48 | \examples{ 49 | library(dplyr) 50 | library(dtrackr) 51 | 52 | tmp = iris \%>\% group_by(Species) \%>\% track() 53 | tmp \%>\% reframe(tibble( 54 | param = c("mean","min","max"), 55 | value = c(mean(Petal.Length), min(Petal.Length), max(Petal.Length)) 56 | ), .messages="length {param}: {value}") \%>\% history() 57 | } 58 | \seealso{ 59 | dplyr::reframe() 60 | } 61 | -------------------------------------------------------------------------------- /man/resume.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{resume} 4 | \alias{resume} 5 | \title{Resume tracking the data frame.} 6 | \usage{ 7 | resume(.data, ...) 8 | } 9 | \arguments{ 10 | \item{.data}{a tracked dataframe} 11 | 12 | \item{...}{ 13 | Named arguments passed on to \code{\link[=p_group_by]{p_group_by}}\describe{ 14 | \item{\code{.messages}}{a set of glue specs. The glue code can use any global 15 | variable, or \{.cols\} which is the columns that are being grouped by.} 16 | \item{\code{.headline}}{a headline glue spec. The glue code can use any global 17 | variable, or \{.cols\}.} 18 | \item{\code{.tag}}{if you want the summary data from this step in the future then 19 | give it a name with .tag.} 20 | \item{\code{.maxgroups}}{the maximum number of subgroups allowed before the tracking 21 | is paused.} 22 | \item{\code{...}}{In \code{group_by()}, variables or computations to group by. 23 | Computations are always done on the ungrouped data frame. 24 | To perform computations on the grouped data, you need to use 25 | a separate \code{mutate()} step before the \code{group_by()}. 26 | Computations are not allowed in \code{nest_by()}. 27 | In \code{ungroup()}, variables to remove from the grouping. 28 | Named arguments passed on to \code{\link[dplyr:group_by]{dplyr::group_by}}\describe{ 29 | \item{\code{.add}}{When \code{FALSE}, the default, \code{group_by()} will 30 | override existing groups. To add to the existing groups, use 31 | \code{.add = TRUE}. 32 | 33 | This argument was previously called \code{add}, but that prevented 34 | creating a new grouping variable called \code{add}, and conflicts with 35 | our naming conventions.} 36 | \item{\code{.drop}}{Drop groups formed by factor levels that don't appear in the 37 | data? The default is \code{TRUE} except when \code{.data} has been previously 38 | grouped with \code{.drop = FALSE}. See \code{\link[dplyr:group_by_drop_default]{group_by_drop_default()}} for details.} 39 | \item{\code{x}}{A \code{\link[dplyr:tbl]{tbl()}}} 40 | }} 41 | }} 42 | } 43 | \value{ 44 | the .data data frame with history graph tracking resumed 45 | } 46 | \description{ 47 | This may reset the grouping of the tracked data if the grouping structure 48 | has changed since the data frame was paused. If you try and resume tracking a 49 | data frame with too many groups (as defined by \code{options("dtrackr.max_supported_groupings"=XX)}) 50 | then the resume will fail and the data frame will still be paused. This can 51 | be overridden by specifying a value for the \code{.maxgroups} parameter. 52 | } 53 | \examples{ 54 | library(dplyr) 55 | library(dtrackr) 56 | iris \%>\% track() \%>\% pause() \%>\% resume() \%>\% history() 57 | } 58 | -------------------------------------------------------------------------------- /man/save_dot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dot.R 3 | \name{save_dot} 4 | \alias{save_dot} 5 | \title{Save DOT content to a file} 6 | \usage{ 7 | save_dot( 8 | dot, 9 | filename, 10 | size = std_size$half, 11 | maxWidth = size$width, 12 | maxHeight = size$height, 13 | formats = c("dot", "png", "pdf", "svg"), 14 | landscape = size$rot != 0, 15 | ... 16 | ) 17 | } 18 | \arguments{ 19 | \item{dot}{a \code{graphviz} dot string} 20 | 21 | \item{filename}{the full path of the file name (minus extension for multiple 22 | formats)} 23 | 24 | \item{size}{a named list with 3 elements, length and width in inches and 25 | rotation. A predefined set of standard sizes are available in the 26 | \link{std_size} object.} 27 | 28 | \item{maxWidth}{a width (on the paper) in inches if \code{size} is not defined} 29 | 30 | \item{maxHeight}{a height (on the paper) in inches if \code{size} is not defined} 31 | 32 | \item{formats}{some of \code{pdf},\code{dot},\code{svg},\code{png},\code{ps}} 33 | 34 | \item{landscape}{rotate the output by 270 degrees into a landscape format. 35 | \code{maxWidth} and \code{maxHeight} still apply and refer to the paper width to fit 36 | the flowchart into after rotation. (you might need to flip width and height)} 37 | 38 | \item{...}{ignored} 39 | } 40 | \value{ 41 | a list with items \code{paths} with the absolute paths of the saved files 42 | as a named list, and \code{svg} as the SVG string of the rendered dot file. 43 | } 44 | \description{ 45 | Convert a digraph in dot format to SVG and save it to a range of output file types 46 | } 47 | \examples{ 48 | save_dot("digraph {A->B}",tempfile()) 49 | } 50 | -------------------------------------------------------------------------------- /man/setdiff.trackr_df.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{setdiff.trackr_df} 4 | \alias{setdiff.trackr_df} 5 | \title{Set operations} 6 | \usage{ 7 | \method{setdiff}{trackr_df}( 8 | x, 9 | y, 10 | ..., 11 | .messages = "{.count.out} items in difference", 12 | .headline = "Difference" 13 | ) 14 | } 15 | \arguments{ 16 | \item{x, y}{Vectors to combine.} 17 | 18 | \item{...}{a collection of tracked data frames to combine} 19 | 20 | \item{.messages}{a set of glue specs. The glue code can use any global 21 | variable, or \{.count.out\}} 22 | 23 | \item{.headline}{a glue spec. The glue code can use any global variable, or 24 | \{.count.out\}} 25 | } 26 | \value{ 27 | the dplyr output with the history graph updated. 28 | } 29 | \description{ 30 | These perform set operations on tracked dataframes. It merges the history 31 | of 2 (or more) dataframes and combines the rows (or columns). It calculates the total number of 32 | resulting rows as \{.count.out\} in other terms it performs exactly the same 33 | operation as the equivalent \code{dplyr} operation. See \code{\link[dplyr:bind_rows]{dplyr::bind_rows()}}, 34 | \code{\link[dplyr:bind_cols]{dplyr::bind_cols()}}, \code{\link[dplyr:setops]{dplyr::intersect()}}, \code{\link[dplyr:setops]{dplyr::union()}}, 35 | \code{\link[dplyr:setops]{dplyr::setdiff()}},\code{\link[dplyr:setops]{dplyr::intersect()}}, or \code{\link[dplyr:setops]{dplyr::union_all()}} for the 36 | underlying function details. 37 | } 38 | \examples{ 39 | library(dplyr) 40 | library(dtrackr) 41 | 42 | # Set operations 43 | people = starwars \%>\% select(-films, -vehicles, -starships) 44 | chrs = people \%>\% track("start") 45 | 46 | lhs = chrs \%>\% include_any( 47 | species == "Human" ~ "{.included} humans", 48 | species == "Droid" ~ "{.included} droids" 49 | ) 50 | 51 | # these are different subsets of the same data 52 | rhs = chrs \%>\% include_any( 53 | species == "Human" ~ "{.included} humans", 54 | species == "Gungan" ~ "{.included} gungans" 55 | ) \%>\% comment("{.count} gungans & humans") 56 | 57 | 58 | # Unions 59 | set = bind_rows(lhs,rhs) \%>\% comment("{.count} 2*human,droids and gungans") 60 | # display the history of the result: 61 | set \%>\% history() 62 | nrow(set) 63 | # not run - display the flowchart: 64 | # set \%>\% flowchart() 65 | 66 | set = union(lhs,rhs) \%>\% comment("{.count} human,droids and gungans") 67 | # display the history of the result: 68 | set \%>\% history() 69 | nrow(set) 70 | # not run - display the flowchart: 71 | # set \%>\% flowchart() 72 | 73 | set = union_all(lhs,rhs) \%>\% comment("{.count} 2*human,droids and gungans") 74 | # display the history of the result: 75 | set \%>\% history() 76 | nrow(set) 77 | # not run - display the flowchart: 78 | # set \%>\% flowchart() 79 | 80 | # Intersections and differences 81 | 82 | set = setdiff(lhs,rhs) \%>\% comment("{.count} droids and gungans") 83 | # display the history of the result: 84 | set \%>\% history() 85 | nrow(set) 86 | # not run - display the flowchart: 87 | # set \%>\% flowchart() 88 | 89 | set = intersect(lhs,rhs) \%>\% comment("{.count} humans") 90 | # display the history of the result: 91 | set \%>\% history() 92 | nrow(set) 93 | # not run - display the flowchart: 94 | # set \%>\% flowchart() 95 | } 96 | \seealso{ 97 | dplyr::setdiff() 98 | } 99 | -------------------------------------------------------------------------------- /man/status.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{status} 4 | \alias{status} 5 | \title{Add a summary to the dtrackr history graph} 6 | \usage{ 7 | status( 8 | .data, 9 | ..., 10 | .messages = .defaultMessage(), 11 | .headline = .defaultHeadline(), 12 | .type = "info", 13 | .asOffshoot = FALSE, 14 | .tag = NULL 15 | ) 16 | } 17 | \arguments{ 18 | \item{.data}{a dataframe which may be grouped} 19 | 20 | \item{...}{any normal dplyr::summarise specification, e.g. \code{count=n()} or 21 | \code{av=mean(x)}, etcetera.} 22 | 23 | \item{.messages}{a character vector of glue specifications. A glue 24 | specification can refer to the summary outputs, any grouping variables of 25 | .data, the \{.strata\}, or any variables defined in the calling environment} 26 | 27 | \item{.headline}{a glue specification which can refer to grouping variables 28 | of .data, or any variables defined in the calling environment} 29 | 30 | \item{.type}{one of "info","exclusion": used to define formatting} 31 | 32 | \item{.asOffshoot}{do you want this comment to be an offshoot of the main 33 | flow (default = FALSE).} 34 | 35 | \item{.tag}{if you want the summary data from this step in the future then 36 | give it a name with .tag.} 37 | } 38 | \value{ 39 | the same .data dataframe with the history metadata updated with the 40 | status inserted as a new stage 41 | } 42 | \description{ 43 | In the middle of a pipeline you may wish to document something about the data 44 | that is more complex than the simple counts. \code{status} is essentially a 45 | \code{dplyr} summarisation step which is connected to a \code{glue} specification 46 | output, that is recorded in the data frame history. This means you can do an 47 | arbitrary interim summarisation and put the result into the flowchart without 48 | disrupting the pipeline flow. 49 | } 50 | \details{ 51 | Because of the ... summary specification parameters MUST BE NAMED. 52 | } 53 | \examples{ 54 | library(dplyr) 55 | library(dtrackr) 56 | tmp = iris \%>\% track() \%>\% group_by(Species) 57 | tmp \%>\% status( 58 | long = p_count_if(Petal.Length>5), 59 | short = p_count_if(Petal.Length<2), 60 | .messages="{Species}: {long} long ones & {short} short ones" 61 | ) \%>\% history() 62 | } 63 | -------------------------------------------------------------------------------- /man/std_size.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dot.R 3 | \docType{data} 4 | \name{std_size} 5 | \alias{std_size} 6 | \title{Standard paper sizes} 7 | \format{ 8 | An object of class \code{list} of length 12. 9 | } 10 | \usage{ 11 | std_size 12 | } 13 | \description{ 14 | A list of standard paper sizes for outputting flowcharts or other dot 15 | graphs. These include width and height dimensions in inches and can be 16 | used as one way to specify the output size of a dot graph, including 17 | flowcharts (see the \code{size} parameter of \code{\link[=flowchart]{flowchart()}}). 18 | } 19 | \details{ 20 | The sizes available are \code{A4}, \code{A5}, \code{full} (fits a portrait A4 with margins), \code{half} (half an 21 | A4 with margins), \code{third}, \code{two_third}, \code{quarter}, \code{sixth} (all with reference to 22 | an A4 page with margins). There are 2 landscape sizes \code{A4_landscape} and \code{full_landscape} which 23 | fit an A4 page with or without margins. There are also 2 slide dimensions, 24 | to fit with standard presentation software dimensions. 25 | 26 | This is just a convenience. Similar effects can be achieved by providing \code{width} and \code{height} 27 | parameters to \code{\link[=flowchart]{flowchart()}} directly. 28 | } 29 | \keyword{datasets} 30 | -------------------------------------------------------------------------------- /man/summarise.trackr_df.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{summarise.trackr_df} 4 | \alias{summarise.trackr_df} 5 | \title{Summarise a data set} 6 | \usage{ 7 | \method{summarise}{trackr_df}(.data, ..., .messages = "", .headline = "", .tag = NULL) 8 | } 9 | \arguments{ 10 | \item{.data}{A data frame, data frame extension (e.g. a tibble), or a 11 | lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for 12 | more details.} 13 | 14 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Name-value pairs of 15 | summary functions. The name will be the name of the variable in the result. 16 | 17 | The value can be: 18 | \itemize{ 19 | \item A vector of length 1, e.g. \code{min(x)}, \code{n()}, or \code{sum(is.na(y))}. 20 | \item A data frame, to add multiple columns from a single expression. 21 | } 22 | 23 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Returning values with size 0 or >1 was 24 | deprecated as of 1.1.0. Please use \code{\link[dplyr:reframe]{reframe()}} for this instead.} 25 | 26 | \item{.messages}{a set of glue specs. The glue code can use any summary 27 | variable defined in the ... parameter, or any global variable, or 28 | \{.strata\}} 29 | 30 | \item{.headline}{a headline glue spec. The glue code can use any summary 31 | variable defined in the ... parameter, or any global variable, or 32 | \{.strata\}} 33 | 34 | \item{.tag}{if you want the summary data from this step in the future then 35 | give it a name with .tag.} 36 | } 37 | \value{ 38 | the .data dataframe summarised with the history graph updated showing 39 | the summarise operation as a new stage 40 | } 41 | \description{ 42 | Summarising a data set acts in the normal \code{dplyr} manner to collapse groups 43 | to individual rows. Any columns resulting from the summary can be added to 44 | the history graph. In the history this also joins any stratified branches and 45 | allows you to generate some summary statistics about the un-grouped data. See 46 | \code{\link[dplyr:summarise]{dplyr::summarise()}}. 47 | } 48 | \examples{ 49 | library(dplyr) 50 | library(dtrackr) 51 | 52 | tmp = iris \%>\% group_by(Species) \%>\% track() 53 | tmp \%>\% summarise(avg = mean(Petal.Length), .messages="{avg} length") \%>\% history() 54 | } 55 | \seealso{ 56 | dplyr::summarise() 57 | } 58 | -------------------------------------------------------------------------------- /man/tagged.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{tagged} 4 | \alias{tagged} 5 | \title{Retrieve tagged data in the history graph} 6 | \usage{ 7 | tagged(.data, .tag = NULL, .strata = NULL, .glue = NULL, ...) 8 | } 9 | \arguments{ 10 | \item{.data}{the tracked dataframe.} 11 | 12 | \item{.tag}{(optional) the tag to retrieve.} 13 | 14 | \item{.strata}{(optional) filter the tagged data by the strata. set to "" to filter just the top level ungrouped data.} 15 | 16 | \item{.glue}{(optional) a glue specification which will be applied to the tagged content to generate a \code{.label} for the tagged content.} 17 | 18 | \item{...}{(optional) any other named parameters will be passed to \code{glue::glue} and can be used to generate a label.} 19 | } 20 | \value{ 21 | various things depending on what is requested. 22 | 23 | By default a tibble with a \code{.tag} column and all associated summary values in a nested \code{.content} column. 24 | 25 | If a \code{.strata} column is specified the results are filtered to just those that match a given \code{.strata} grouping (i.e. this will be the grouping label on the flowchart). Ungrouped content will have an empty "" as \code{.strata} 26 | 27 | If \code{.tag} is specified the result will be for a single tag and \code{.content} will be automatically un-nested to give a single un-nested dataframe of the content captured at the \code{.tag} tagged step. 28 | This could be single or multiple rows depending on whether the original data was grouped at the point of tagging. 29 | 30 | If both the \code{.tag} and \code{.glue} is specified a \code{.label} column will be computed from \code{.glue} and the tagged content. If the result of this is a single row then just the string value of \code{.label} is returned. 31 | 32 | If just the \code{.glue} is specified, an un-nested dataframe with \code{.tag},\code{.strata} and \code{.label} columns with a label for each tag in each strata. 33 | 34 | If this seems complex then the best thing is to experiment until you get the output you want, leaving any \code{.glue} options until you think you know what you are doing. It made sense at the time. 35 | } 36 | \description{ 37 | Any counts at the individual stages that was stored with a \code{.tag} option in a pipeline step can be recovered here. The idea here is to provide a quick way to access a single value 38 | for the counts or other details tagged in a pipeline into a format that can be reported in text of a document. (e.g. for a results section). For more examples the consort statement vignette 39 | has some examples of use. 40 | } 41 | \examples{ 42 | library(dplyr) 43 | library(dtrackr) 44 | tmp = iris \%>\% track() \%>\% comment(.tag = "step1") 45 | tmp = tmp \%>\% filter(Species!="versicolor") \%>\% group_by(Species) 46 | tmp \%>\% comment(.tag="step2") \%>\% tagged(.glue = "{.count}/{.total}") 47 | } 48 | -------------------------------------------------------------------------------- /man/track.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{track} 4 | \alias{track} 5 | \title{Start tracking the dtrackr history graph} 6 | \usage{ 7 | track( 8 | .data, 9 | .messages = .defaultMessage(), 10 | .headline = .defaultHeadline(), 11 | .tag = NULL 12 | ) 13 | } 14 | \arguments{ 15 | \item{.data}{a dataframe which may be grouped} 16 | 17 | \item{.messages}{a character vector of glue specifications. A glue 18 | specification can refer to any grouping variables of .data, or any 19 | variables defined in the calling environment, the \{.total\} variable which 20 | is the count of all rows, the \{.count\} variable which is the count of 21 | rows in the current group and the \{.strata\} which describes the current 22 | group. Defaults to the value of \code{getOption("dtrackr.default_message")}.} 23 | 24 | \item{.headline}{a glue specification which can refer to grouping variables 25 | of .data, or any variables defined in the calling environment, or the 26 | \{.total\} variable which is \code{nrow(.data)}, or \{.strata\} a summary of the 27 | current group. Defaults to the value of \code{getOption("dtrackr.default_headline")}.} 28 | 29 | \item{.tag}{if you want the summary data from this step in the future then 30 | give it a name with .tag.} 31 | } 32 | \value{ 33 | the .data dataframe with additional history graph metadata, to allow 34 | tracking. 35 | } 36 | \description{ 37 | Start tracking the dtrackr history graph 38 | } 39 | \examples{ 40 | library(dplyr) 41 | library(dtrackr) 42 | iris \%>\% track() \%>\% history() 43 | } 44 | -------------------------------------------------------------------------------- /man/ungroup.trackr_df.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{ungroup.trackr_df} 4 | \alias{ungroup.trackr_df} 5 | \title{Remove a stratification from a data set} 6 | \usage{ 7 | \method{ungroup}{trackr_df}( 8 | x, 9 | ..., 10 | .messages = .defaultMessage(), 11 | .headline = .defaultHeadline(), 12 | .tag = NULL 13 | ) 14 | } 15 | \arguments{ 16 | \item{x}{A \code{\link[dplyr:tbl]{tbl()}}} 17 | 18 | \item{...}{variables to remove from the grouping.} 19 | 20 | \item{.messages}{a set of glue specs. The glue code can use any any global 21 | variable, or \{.count\}. the default is "total \{.count\} items"} 22 | 23 | \item{.headline}{a headline glue spec. The glue code can use \{.count\} and 24 | \{.strata\}.} 25 | 26 | \item{.tag}{if you want the summary data from this step in the future then 27 | give it a name with .tag.} 28 | } 29 | \value{ 30 | the .data dataframe but ungrouped with the history graph 31 | updated showing the ungroup operation as a new stage. 32 | } 33 | \description{ 34 | Un-grouping a data set logically combines the different arms. In the history 35 | this joins any stratified branches and acts as a specific type of \code{\link[=status]{status()}}, 36 | allowing you to generate some summary statistics about the un-grouped data. 37 | See \code{\link[dplyr:group_by]{dplyr::ungroup()}}. 38 | } 39 | \examples{ 40 | library(dplyr) 41 | library(dtrackr) 42 | 43 | tmp = iris \%>\% group_by(Species) \%>\% comment("A test") 44 | tmp \%>\% ungroup(.messages="{.count} items in combined") \%>\% history() 45 | } 46 | \seealso{ 47 | dplyr::ungroup() 48 | } 49 | -------------------------------------------------------------------------------- /man/union.trackr_df.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{union.trackr_df} 4 | \alias{union.trackr_df} 5 | \title{Set operations} 6 | \usage{ 7 | \method{union}{trackr_df}( 8 | x, 9 | y, 10 | ..., 11 | .messages = "{.count.out} unique items in union", 12 | .headline = "Distinct union" 13 | ) 14 | } 15 | \arguments{ 16 | \item{x, y}{Vectors to combine.} 17 | 18 | \item{...}{a collection of tracked data frames to combine} 19 | 20 | \item{.messages}{a set of glue specs. The glue code can use any global 21 | variable, or \{.count.out\}} 22 | 23 | \item{.headline}{a glue spec. The glue code can use any global variable, or 24 | \{.count.out\}} 25 | } 26 | \value{ 27 | the dplyr output with the history graph updated. 28 | } 29 | \description{ 30 | These perform set operations on tracked dataframes. It merges the history 31 | of 2 (or more) dataframes and combines the rows (or columns). It calculates the total number of 32 | resulting rows as \{.count.out\} in other terms it performs exactly the same 33 | operation as the equivalent \code{dplyr} operation. See \code{\link[dplyr:bind_rows]{dplyr::bind_rows()}}, 34 | \code{\link[dplyr:bind_cols]{dplyr::bind_cols()}}, \code{\link[dplyr:setops]{dplyr::intersect()}}, \code{\link[dplyr:setops]{dplyr::union()}}, 35 | \code{\link[dplyr:setops]{dplyr::setdiff()}},\code{\link[dplyr:setops]{dplyr::intersect()}}, or \code{\link[dplyr:setops]{dplyr::union_all()}} for the 36 | underlying function details. 37 | } 38 | \examples{ 39 | library(dplyr) 40 | library(dtrackr) 41 | 42 | # Set operations 43 | people = starwars \%>\% select(-films, -vehicles, -starships) 44 | chrs = people \%>\% track("start") 45 | 46 | lhs = chrs \%>\% include_any( 47 | species == "Human" ~ "{.included} humans", 48 | species == "Droid" ~ "{.included} droids" 49 | ) 50 | 51 | # these are different subsets of the same data 52 | rhs = chrs \%>\% include_any( 53 | species == "Human" ~ "{.included} humans", 54 | species == "Gungan" ~ "{.included} gungans" 55 | ) \%>\% comment("{.count} gungans & humans") 56 | 57 | 58 | # Unions 59 | set = bind_rows(lhs,rhs) \%>\% comment("{.count} 2*human,droids and gungans") 60 | # display the history of the result: 61 | set \%>\% history() 62 | nrow(set) 63 | # not run - display the flowchart: 64 | # set \%>\% flowchart() 65 | 66 | set = union(lhs,rhs) \%>\% comment("{.count} human,droids and gungans") 67 | # display the history of the result: 68 | set \%>\% history() 69 | nrow(set) 70 | # not run - display the flowchart: 71 | # set \%>\% flowchart() 72 | 73 | set = union_all(lhs,rhs) \%>\% comment("{.count} 2*human,droids and gungans") 74 | # display the history of the result: 75 | set \%>\% history() 76 | nrow(set) 77 | # not run - display the flowchart: 78 | # set \%>\% flowchart() 79 | 80 | # Intersections and differences 81 | 82 | set = setdiff(lhs,rhs) \%>\% comment("{.count} droids and gungans") 83 | # display the history of the result: 84 | set \%>\% history() 85 | nrow(set) 86 | # not run - display the flowchart: 87 | # set \%>\% flowchart() 88 | 89 | set = intersect(lhs,rhs) \%>\% comment("{.count} humans") 90 | # display the history of the result: 91 | set \%>\% history() 92 | nrow(set) 93 | # not run - display the flowchart: 94 | # set \%>\% flowchart() 95 | } 96 | \seealso{ 97 | generics::union() 98 | } 99 | -------------------------------------------------------------------------------- /man/untrack.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtrackr.R 3 | \name{untrack} 4 | \alias{untrack} 5 | \title{Remove tracking from the dataframe} 6 | \usage{ 7 | untrack(.data) 8 | } 9 | \arguments{ 10 | \item{.data}{a tracked dataframe} 11 | } 12 | \value{ 13 | the .data dataframe with history graph metadata removed. 14 | } 15 | \description{ 16 | Remove tracking from the dataframe 17 | } 18 | \examples{ 19 | library(dplyr) 20 | library(dtrackr) 21 | iris \%>\% track() \%>\% untrack() \%>\% class() 22 | } 23 | -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-120x120.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/pkgdown/favicon/apple-touch-icon-120x120.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-152x152.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/pkgdown/favicon/apple-touch-icon-152x152.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-180x180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/pkgdown/favicon/apple-touch-icon-180x180.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-60x60.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/pkgdown/favicon/apple-touch-icon-60x60.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-76x76.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/pkgdown/favicon/apple-touch-icon-76x76.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/pkgdown/favicon/apple-touch-icon.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/pkgdown/favicon/favicon-16x16.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/pkgdown/favicon/favicon-32x32.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-48x48.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/pkgdown/favicon/favicon-48x48.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/pkgdown/favicon/favicon.ico -------------------------------------------------------------------------------- /pkgdown/favicon/site.webmanifest: -------------------------------------------------------------------------------- 1 | { 2 | "name": "", 3 | "short_name": "", 4 | "icons": [ 5 | { 6 | "src": "/web-app-manifest-192x192.png", 7 | "sizes": "192x192", 8 | "type": "image/png", 9 | "purpose": "maskable" 10 | }, 11 | { 12 | "src": "/web-app-manifest-512x512.png", 13 | "sizes": "512x512", 14 | "type": "image/png", 15 | "purpose": "maskable" 16 | } 17 | ], 18 | "theme_color": "#ffffff", 19 | "background_color": "#ffffff", 20 | "display": "standalone" 21 | } -------------------------------------------------------------------------------- /pkgdown/favicon/web-app-manifest-192x192.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/pkgdown/favicon/web-app-manifest-192x192.png -------------------------------------------------------------------------------- /pkgdown/favicon/web-app-manifest-512x512.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/pkgdown/favicon/web-app-manifest-512x512.png -------------------------------------------------------------------------------- /tests/spelling.R: -------------------------------------------------------------------------------- 1 | if(requireNamespace('spelling', quietly = TRUE)) 2 | spelling::spell_check_test(vignettes = TRUE, error = FALSE, 3 | skip_on_cran = TRUE) 4 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(dplyr) 3 | library(dtrackr) 4 | 5 | test_check("dtrackr") 6 | 7 | -------------------------------------------------------------------------------- /tests/testthat/test-examples.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(dtrackr) 3 | 4 | # test_that("examples run to completion", { 5 | # devtools::run_examples() 6 | # 7 | # }) 8 | 9 | if (identical(Sys.getenv("NOT_CRAN"), "true")) { 10 | test_examples(path = "../..") 11 | } 12 | -------------------------------------------------------------------------------- /tests/testthat/test-github-issues.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(dtrackr) 3 | 4 | graphContains = function(g, .strata, .message) { 5 | return(g$nodes %>% filter(.strata==.strata, .label %>% stringr::str_detect(paste0("(^|>)",stringr::fixed(.message),"<"))) %>% nrow() > 0) 6 | } 7 | 8 | graphContainsExactly = function(g, .strata, .message, .count) { 9 | return(g$nodes %>% filter(.strata==.strata, .label %>% stringr::str_detect(paste0("(^|>)",stringr::fixed(.message),"<"))) %>% nrow() == .count) 10 | } 11 | 12 | 13 | test_that("Issue #25 fixed", { 14 | tibble(id = 1:20, x = rnorm(20)) %>% track() %>% inner_join( 15 | tibble(id = 1:20, y = runif(20)) %>% track() 16 | ) %>% 17 | p_get() %>% 18 | graphContains("", "Inner join by id") %>% 19 | testthat::expect_true() 20 | # pre fixing this would output "Inner join by " without `id` 21 | }) 22 | 23 | 24 | test_that("Issue #26 fixed", { 25 | expected = iris %>% track() %>% group_by(Species) %>% filter(Species == "setosa") %>% untrack() 26 | # pre fixing this throws error. 27 | actual = try(iris %>% track() %>% group_by(Species) %>% include_any(Species == "setosa" ~ "{.included}") %>% untrack()) 28 | actual2 = try(iris %>% track() %>% group_by(Species) %>% exclude_all(Species != "setosa" ~ "{.excluded}") %>% untrack()) 29 | testthat::expect_equal(actual2,expected) 30 | testthat::expect_equal(actual,expected) 31 | }) 32 | 33 | 34 | test_that("Issue #33 fixed", { 35 | # distinct was using the wrong function signature and not passing it on properly 36 | correct = mtcars %>% dplyr::distinct(carb) %>% dim() 37 | was_wrong = mtcars %>% dtrackr::track() %>% dtrackr::p_distinct(carb) %>% dim() 38 | testthat::expect_equal(correct, was_wrong) 39 | }) 40 | -------------------------------------------------------------------------------- /tests/testthat/test-p_exclude.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(dtrackr) 3 | 4 | graphContains = function(g, .strata, .message) { 5 | return(g$nodes %>% filter(.strata==.strata,.label %>% stringr::str_detect(paste0("(^|>)",stringr::fixed(.message),"<"))) %>% nrow() > 0) 6 | } 7 | 8 | 9 | test_that("exclusions works", { 10 | df = tibble(a=c(1,1,1,2,2,2), b=c(1,2,3,1,2,3), c=c(1,2,3,4,5,6)) %>% group_by(a) 11 | 12 | h = df %>% p_clear() %>% p_comment("test") %>% p_exclude_all(c%%2==0 ~ "removed {.excluded} even values") %>% p_comment("test2") 13 | g = h %>% p_get() 14 | testthat::expect_true(g %>% graphContains("a:1","removed 1 even values")) 15 | testthat::expect_true(g %>% graphContains("a:2","removed 2 even values")) 16 | testthat::expect_true(nrow(h) == 3) 17 | }) 18 | 19 | test_that("exclusions works groupwise", { 20 | df = tibble(a=c(1,1,1,2,2,2), b=c(1,2,3,1,2,3), c=c(1,2,3,4,5,6)) %>% group_by(a) 21 | 22 | h = df %>% p_clear() %>% p_comment("test") %>% p_exclude_all(c==max(c) ~ "removed {.excluded} max values") %>% p_comment("test2") 23 | g = h %>% p_get() 24 | testthat::expect_true( 25 | g %>% graphContains("a:1","removed 1 max values") & 26 | g %>% graphContains("a:2","removed 1 max values") & 27 | nrow(h) == 4 28 | ) 29 | 30 | }) 31 | 32 | 33 | test_that("exclusions works groupwise when nothing is excluded", { 34 | df = tibble(a=c(1,1,1,2,2,2), b=c(1,2,3,1,2,3), c=c(1,2,3,4,5,6)) %>% group_by(a) 35 | 36 | h = df %>% p_clear() %>% p_comment("test") %>% p_exclude_all(c>=5 ~ "removed {.excluded} c values lt 5") %>% p_comment("test2") 37 | g = h %>% p_get() 38 | testthat::expect_true( 39 | g %>% graphContains("a:1","removed 0 c values lt 5") & 40 | g %>% graphContains("a:2","removed 2 c values lt 5") & 41 | nrow(h) == 4 42 | ) 43 | }) 44 | 45 | test_that("exclusions can reference variable in function", { 46 | df = tibble(a=c(1,1,1,2,2,2), b=c(1,2,3,1,2,3), c=c(1,2,3,4,5,6)) %>% group_by(a) 47 | 48 | fn = function(.df) { 49 | someVar = "xyz" 50 | h = .df %>% p_clear() %>% p_comment("test") %>% p_exclude_all(c==max(c) ~ "removed {.excluded} {someVar} values") %>% p_comment("test2") 51 | return(h) 52 | } 53 | k = fn(df) 54 | g = k %>% p_get() 55 | testthat::expect_true( 56 | g %>% graphContains("a:1","removed 1 xyz values") & 57 | g %>% graphContains("a:2","removed 1 xyz values") & 58 | nrow(k) == 4 59 | ) 60 | 61 | }) 62 | 63 | test_that("missing values exclusions works", { 64 | dfNa = tibble(a=c(1,1,1,2,2,2), b=c(1,2,3,1,2,3), c=c(1,2,NA,4,5,6)) %>% group_by(a) 65 | 66 | h = dfNa %>% p_clear() %>% p_comment("test") %>% p_exclude_all(c%%2==0 ~ "removed {.matched} even values and {.missing} missing", na.rm = TRUE) %>% p_comment("test2") 67 | g = h %>% p_get() 68 | testthat::expect_true( 69 | g %>% graphContains("a:1","removed 1 even values and 1 missing") & 70 | g %>% graphContains("a:2","removed 2 even values and 0 missing") & 71 | nrow(h) == 2 72 | ) 73 | }) 74 | -------------------------------------------------------------------------------- /tests/testthat/test-p_group_modify.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(dtrackr) 3 | 4 | graphContains = function(g, .strata, .message) { 5 | return(g$nodes %>% filter(.strata==.strata,.label %>% stringr::str_detect(paste0("(^|>)",stringr::fixed(.message),"<"))) %>% nrow() > 0) 6 | } 7 | 8 | 9 | test_that("default mutate is not recorded", { 10 | df = tibble(a=c(1,1,1,2,2,2), b=c(1,2,3,1,2,3), c=c(1,2,3,4,5,6)) %>% group_by(a) 11 | 12 | h = df %>% p_clear() %>% p_comment("test") %>% p_group_modify( 13 | function(d,g,...) { tibble(e=c(4,8)*g$a,f=c(4,8)+g$a) }, 14 | .messages="was {.count.in}, now {.count.out}" 15 | ) %>% p_comment("test2") 16 | g = h %>% p_get() 17 | testthat::expect_true( 18 | g %>% graphContains("a:1","test") & 19 | g %>% graphContains("a:1","was 3, now 2") 20 | ) 21 | 22 | }) 23 | 24 | # df %>% p_clear() %>% p_modify(function(d) { d %>% filter(c==2) }, .message="was {.count.in}, now {.count.out}") %>% p_get() 25 | # TODO: FAILS: df %>% p_clear() %>% p_modify(function(d) { d %>% filter(c==2) }, .headline="was {nrow(df)}") %>% p_get() 26 | # df %>% p_clear() %>% p_modify(function(d) { d %>% filter(c==2) }) %>% p_get() # NULL 27 | 28 | test_that("use of global expressions in headline", { 29 | df = tibble(a=c(1,1,1,2,2,2), b=c(1,2,3,1,2,3), c=c(1,2,3,4,5,6)) %>% group_by(a) 30 | 31 | h = df %>% p_clear() %>% p_comment("test") %>% p_group_modify( 32 | function(d,g,...) { d %>% filter(c==2) }, 33 | .messages="was {.count.in}, now {.count.out}", 34 | .headline="was {nrow(df)}" 35 | ) %>% p_comment("test2") 36 | g = h %>% p_get() 37 | testthat::expect_true( 38 | g %>% graphContains("a:1","test") & 39 | g %>% graphContains("a:1","was 6") 40 | ) 41 | 42 | }) 43 | 44 | 45 | 46 | test_that("distinct works", { 47 | df = tibble(a=c(1,1,1,2,2,2), b=c(1,2,3,1,2,3), c=c(1,2,3,4,5,6)) %>% group_by(a) 48 | df = bind_rows(df,df) 49 | 50 | h = df %>% p_clear() %>% p_comment("test") %>% p_distinct() %>% p_comment("test2") 51 | g = h %>% p_get() 52 | testthat::expect_true(g %>% graphContains("a:1","removing 3 duplicates")) 53 | testthat::expect_true(nrow(g$head) == 2) 54 | testthat::expect_true(groups(h) %>% sapply(as_label) %>% as.character() == groups(df) %>% sapply(as_label) %>% as.character()) 55 | 56 | 57 | }) 58 | -------------------------------------------------------------------------------- /tests/testthat/test-p_include.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(dtrackr) 3 | 4 | graphContains = function(g, .strata, .message) { 5 | return(g$nodes %>% filter(.strata==.strata,.label %>% stringr::str_detect(paste0("(^|>)",stringr::fixed(.message),"<"))) %>% nrow() > 0) 6 | } 7 | 8 | test_that("inclusions works", { 9 | df = tibble(a=c(1,1,1,2,2,2), b=c(1,2,3,1,2,3), c=c(1,2,3,4,5,6)) %>% group_by(a) 10 | 11 | h = df %>% p_clear() %>% p_comment("test") %>% p_include_any(c%%2==0 ~ "including {.included} even values") %>% p_comment("test2") 12 | g = h %>% p_get() 13 | testthat::expect_true( 14 | g %>% graphContains("a:1","including 1 even values") & 15 | g %>% graphContains("a:2","including 2 even values") & 16 | nrow(h) == 3 17 | ) 18 | 19 | }) 20 | 21 | test_that("inclusions works groupwise", { 22 | df = tibble(a=c(1,1,1,2,2,2), b=c(1,2,3,1,2,3), c=c(1,2,3,4,5,6)) %>% group_by(a) 23 | 24 | h = df %>% p_clear() %>% p_comment("test") %>% p_include_any(c==max(c) ~ "kept {.included} max values") %>% p_comment("test2") 25 | g = h %>% p_get() 26 | testthat::expect_true( 27 | g %>% graphContains("a:1","kept 1 max values") & 28 | g %>% graphContains("a:2","kept 1 max values") & 29 | nrow(h) == 2 30 | ) 31 | 32 | }) 33 | 34 | 35 | test_that("inclusions works groupwise when nothing is included", { 36 | df = tibble(a=c(1,1,1,2,2,2), b=c(1,2,3,1,2,3), c=c(1,2,3,4,5,6)) %>% group_by(a) 37 | 38 | h = df %>% p_clear() %>% p_comment("test") %>% p_include_any(c>=5 ~ "kept {.included} c values gte 5") %>% p_comment("test2") 39 | g = h %>% p_get() 40 | testthat::expect_true( 41 | g %>% graphContains("a:1","kept 0 c values gte 5") & 42 | g %>% graphContains("a:2","kept 2 c values gte 5") & 43 | nrow(h) == 2 44 | ) 45 | }) 46 | 47 | test_that("inclusions can reference variable in function", { 48 | df = tibble(a=c(1,1,1,2,2,2), b=c(1,2,3,1,2,3), c=c(1,2,3,4,5,6)) %>% group_by(a) 49 | 50 | fn = function(.df) { 51 | someVar = "xyz" 52 | h = .df %>% p_clear() %>% p_comment("test") %>% p_include_any(c==max(c) ~ "kept {.included} {someVar} values") %>% p_comment("test2") 53 | return(h) 54 | } 55 | k = fn(df) 56 | g = k %>% p_get() 57 | testthat::expect_true( 58 | g %>% graphContains("a:1","kept 1 xyz values") & 59 | g %>% graphContains("a:2","kept 1 xyz values") & 60 | nrow(k) == 2 61 | ) 62 | 63 | }) 64 | 65 | test_that("missing values inclusions works", { 66 | dfNa = tibble(a=c(1,1,1,2,2,2), b=c(1,2,3,1,2,3), c=c(1,2,NA,4,5,6)) %>% group_by(a) 67 | 68 | h = dfNa %>% p_clear() %>% p_comment("test") %>% p_include_any(c%%2==0 ~ "kept {.matched} even values and {.missing} missing", na.rm = FALSE) %>% p_comment("test2") 69 | g = h %>% p_get() 70 | testthat::expect_true( 71 | g %>% graphContains("a:1","kept 1 even values and 1 missing") & 72 | g %>% graphContains("a:2","kept 2 even values and 0 missing") & 73 | nrow(h) == 4 74 | ) 75 | }) 76 | -------------------------------------------------------------------------------- /tests/testthat/test-p_others.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(dtrackr) 3 | 4 | graphContains = function(g, .strata, .message) { 5 | return(g$nodes %>% filter(.strata==.strata,.label %>% stringr::str_detect(paste0("(^|>)",stringr::fixed(.message),"<"))) %>% nrow() > 0) 6 | } 7 | 8 | test_that("ungrouping works", { 9 | df = tibble(a=c(1,1,1,2,2,2), b=c(1,2,3,1,2,3), c=c(1,2,3,4,5,6)) %>% group_by(a) 10 | 11 | h = df %>% p_clear() %>% p_comment("test") %>% p_ungroup(.messages="{.count} items") %>% p_comment("test2") 12 | g = h %>% p_get() 13 | testthat::expect_true( 14 | g %>% graphContains("","6 items") & 15 | g %>% graphContains("","test2") & 16 | nrow(h) == 6 17 | ) 18 | 19 | }) 20 | 21 | 22 | test_that("summarise works", { 23 | df = tibble(a=c(1,1,1,2,2,2), b=c(1,2,3,1,2,3), c=c(1,2,3,4,5,6)) %>% group_by(a) 24 | 25 | h = df %>% p_clear() %>% p_comment("test") %>% p_summarise(mean_c=mean(c), count=n(), .messages=c("{mean_c} average c","{count} items")) %>% p_comment("test2") 26 | g = h %>% p_get() 27 | testthat::expect_true( 28 | g %>% graphContains("a:1","3 items") & 29 | g %>% graphContains("","test2") & 30 | nrow(h) == 2 31 | ) 32 | 33 | }) 34 | 35 | test_that("default mutate is not recorded", { 36 | df = tibble(a=c(1,1,1,2,2,2), b=c(1,2,3,1,2,3), c=c(1,2,3,4,5,6)) %>% group_by(a) 37 | 38 | h = df %>% p_clear() %>% p_comment("test") %>% p_mutate(x="hello") %>% p_comment("test2") 39 | g = h %>% p_get() 40 | testthat::expect_true( 41 | g %>% graphContains("a:1","test") & 42 | g %>% graphContains("a:1","test2") & 43 | nrow(g$nodes) == 4 44 | ) 45 | 46 | }) 47 | 48 | 49 | test_that("filter works", { 50 | df = tibble(a=c(1,1,1,2,2,2), b=c(1,2,3,1,2,3), c=c(1,2,3,4,5,6)) %>% group_by(a) 51 | 52 | h = df %>% p_clear() %>% p_comment("test") %>% p_filter(b!=3) %>% p_comment("test2") 53 | g = h %>% p_get() 54 | testthat::expect_true( 55 | g %>% graphContains("a:1","excluded 1 items") & 56 | g %>% graphContains("","test2") & 57 | nrow(h) == 4 58 | ) 59 | 60 | }) 61 | 62 | 63 | # df %>% p_clear() %>% p_status(c%%2==0 ~ "consisting of {count} even items",c%%2!=0 ~ "and {count} odd items") %>% p_ungroup() %>% p_get() 64 | -------------------------------------------------------------------------------- /tests/testthat/test-p_status.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(dtrackr) 3 | 4 | graphContains = function(g, .strata, .message) { 5 | return(g$nodes %>% filter(.strata==.strata,.label %>% stringr::str_detect(paste0("(^|>)",stringr::fixed(.message),"<"))) %>% nrow() > 0) 6 | } 7 | 8 | test_that("basic status works", { 9 | df = tibble(a=c(1,1,1,2,2,2), b=c(1,2,3,1,2,3), c=c(1,2,3,4,5,6)) %>% group_by(a) 10 | 11 | g = df %>% p_clear() %>% p_comment("test2") %>% p_status(count=n(),.messages = "{count} items") %>% p_comment("test2") %>% p_get() 12 | testthat::expect_true( 13 | g %>% graphContains("a:1","3 items") 14 | ) 15 | 16 | }) 17 | 18 | test_that("status on empty df does not crash", { 19 | dfempty = tibble(a=integer(), b=integer(), c=integer()) %>% group_by(a) 20 | 21 | g = dfempty %>% p_clear() %>% p_comment("test") %>% p_status() %>% p_get() 22 | testthat::expect_true( 23 | nrow(g$nodes)==0 24 | ) 25 | }) 26 | 27 | test_that("no args status works", { 28 | df = tibble(a=c(1,1,1,2,2,2), b=c(1,2,3,1,2,3), c=c(1,2,3,4,5,6)) %>% group_by(a) 29 | 30 | g = df %>% p_clear() %>% p_comment("test2") %>% p_status() %>% p_comment("test2") %>% p_get() 31 | testthat::expect_true( 32 | g %>% graphContains("a:1","3 items") 33 | ) 34 | 35 | }) 36 | 37 | 38 | test_that("more complex status works", { 39 | df = tibble(a=c(1,1,1,2,2,2), b=c(1,2,3,1,2,3), c=c(1,2,3,4,5,6)) %>% group_by(a) 40 | 41 | g = df %>% p_clear() %>% p_comment("test2") %>% p_status(count=n(),m=mean(b),z=max(c),.messages = c("{count} items","{m} mean b","{z} max c")) %>% p_comment("test2") %>% p_get() 42 | testthat::expect_true( 43 | g %>% graphContains("a:1","3 items") & 44 | g %>% graphContains("a:1","3 max c") & 45 | g %>% graphContains("a:1","2 mean b") 46 | ) 47 | 48 | }) 49 | 50 | test_that("subgroup counts work", { 51 | g = survival::cgd %>% 52 | p_track() %>% 53 | p_comment() %>% 54 | p_group_by(treat) %>% 55 | p_comment() %>% 56 | p_count_subgroup( 57 | .subgroup = sex, 58 | .messages="{.name}: {.count}/{.subtotal}", 59 | .headline="{treat}: {.subtotal}/{.total}" 60 | ) %>% 61 | p_comment() %>% 62 | p_get() 63 | testthat::expect_true( 64 | g %>% graphContains("placebo: 120/203","female: 20/120") & 65 | g %>% graphContains("rIFN-g: 83/203","male: 68/83") 66 | ) 67 | }) 68 | -------------------------------------------------------------------------------- /trackr.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.R 3 | -------------------------------------------------------------------------------- /vignettes/joining-pipelines.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "dtrackr - Joining data pipelines" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{dtrackr - Joining data pipelines} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | ```{r setup, include=FALSE} 11 | knitr::opts_chunk$set(echo = TRUE) 12 | library(dplyr) 13 | library(tidyr) 14 | library(dtrackr) 15 | ``` 16 | 17 | # Joins across data sets 18 | 19 | Joining `dtrackr` tracked data is supported and allows us to combine linked data 20 | sets. In this toy example the data sets are characters from a popular film from 21 | my youth. 22 | 23 | ```{r} 24 | 25 | # here we create a set of linked data from the starwars data 26 | # in a real example these data sets would have come from different places 27 | people = starwars %>% select(-films, -vehicles, -starships) 28 | vehicles = starwars %>% select(name,vehicles) %>% unnest(cols = c(vehicles)) 29 | starships = starwars %>% select(name,starships) %>% unnest(cols = c(starships)) 30 | films = starwars %>% select(name,films) %>% unnest(cols = c(films)) 31 | # these 4 data frames are linked together by the name attribute 32 | 33 | # we track both input data sets: 34 | tmp1 = people %>% track() %>% comment("People df {.total}") 35 | tmp2 = films %>% track() %>% comment("Films df {.total}") %>% comment("a test comment") 36 | 37 | # and here we (re)join the two data sets: 38 | tmp1 %>% 39 | inner_join(tmp2, by="name") %>% 40 | comment("joined {.total}") %>% 41 | flowchart() 42 | 43 | # The join message used by inner_join here is configurable but defaults to 44 | # {.count.lhs} on LHS 45 | # {.count.rhs} on RHS 46 | # {.count.out} in linked set 47 | 48 | ``` 49 | 50 | All `dplyr` join types are supported by `dtrackr` which allows us to report on the 51 | numbers on either side of the join and on the resulting total. This can help 52 | detect if any data items are lost during the join. However we do not yet capture 53 | data that becomes excluded during joins, as the interpretation depends on the 54 | type of join employed. 55 | 56 | # Unions 57 | 58 | Another type of binary operator is a union. This is a simpler problem and works 59 | as expected. In this example the early part of the pipeline is detected to be 60 | the same on both branches of the data flow. This therefore results in a flow 61 | that splits then subsequently joins again during the union (`bind_rows`) 62 | operator. 63 | 64 | ```{r} 65 | 66 | tmp = people %>% comment("start") 67 | 68 | tmp1 = tmp %>% include_any( 69 | species == "Human" ~ "{.included} humans", 70 | species == "Droid" ~ "{.included} droids" 71 | ) 72 | 73 | tmp2 = tmp %>% include_any( 74 | species == "Gungan" ~ "{.included} gungans" 75 | ) %>% comment("{.count} gungans") 76 | 77 | tmp3 = bind_rows(tmp1,tmp2) %>% comment("{.count} human,droids and gungans") 78 | tmp3 %>% flowchart() 79 | 80 | ``` 81 | 82 | Other `dplyr` set operations are supported such as `setdiff()`, `union()`, 83 | `union_all()` and `intersect()` which are included in the function 84 | documentation. 85 | -------------------------------------------------------------------------------- /vignettes/joss/figure1-consort.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/vignettes/joss/figure1-consort.pdf --------------------------------------------------------------------------------