├── .Rbuildignore
├── .github
├── .gitignore
├── CONTRIBUTING.md
└── workflows
│ ├── R-CMD-check.yaml
│ ├── draft-pdf.yml
│ └── test-coverage.yaml
├── .gitignore
├── CRAN-SUBMISSION
├── DESCRIPTION
├── LICENSE
├── LICENSE.md
├── NAMESPACE
├── NEWS.md
├── R
├── dot.R
├── dtrackr-package.R
├── dtrackr.R
├── experimental
│ ├── column-tracking.R
│ └── group-counting.R
└── utils-pipe.R
├── README.Rmd
├── README.md
├── _pkgdown.yml
├── codecov.yml
├── cran-comments.md
├── docs
├── 404.html
├── CONTRIBUTING.html
├── LICENSE-text.html
├── LICENSE.html
├── apple-touch-icon-120x120.png
├── apple-touch-icon-152x152.png
├── apple-touch-icon-180x180.png
├── apple-touch-icon-60x60.png
├── apple-touch-icon-76x76.png
├── apple-touch-icon.png
├── articles
│ ├── consort-example.html
│ ├── consort-example_files
│ │ └── accessible-code-block-0.0.1
│ │ │ └── empty-anchor.js
│ ├── dtrackr-options.html
│ ├── dtrackr-options_files
│ │ └── accessible-code-block-0.0.1
│ │ │ └── empty-anchor.js
│ ├── dtrackr.html
│ ├── dtrackr_files
│ │ └── accessible-code-block-0.0.1
│ │ │ └── empty-anchor.js
│ ├── index.html
│ ├── joining-pipelines.html
│ ├── joining-pipelines_files
│ │ ├── accessible-code-block-0.0.1
│ │ │ └── empty-anchor.js
│ │ └── anchor-sections-1.0
│ │ │ ├── anchor-sections.css
│ │ │ └── anchor-sections.js
│ ├── tracking-provenance.html
│ └── tracking-provenance_files
│ │ ├── accessible-code-block-0.0.1
│ │ └── empty-anchor.js
│ │ └── anchor-sections-1.0
│ │ ├── anchor-sections.css
│ │ └── anchor-sections.js
├── authors.html
├── bootstrap-toc.css
├── bootstrap-toc.js
├── deps
│ ├── bootstrap-5.3.1
│ │ ├── bootstrap.bundle.min.js
│ │ ├── bootstrap.bundle.min.js.map
│ │ └── bootstrap.min.css
│ ├── bootstrap-toc-1.0.1
│ │ └── bootstrap-toc.min.js
│ ├── clipboard.js-2.0.11
│ │ └── clipboard.min.js
│ ├── data-deps.txt
│ ├── font-awesome-6.4.2
│ │ ├── css
│ │ │ ├── all.css
│ │ │ ├── all.min.css
│ │ │ ├── v4-shims.css
│ │ │ └── v4-shims.min.css
│ │ └── webfonts
│ │ │ ├── fa-brands-400.ttf
│ │ │ ├── fa-brands-400.woff2
│ │ │ ├── fa-regular-400.ttf
│ │ │ ├── fa-regular-400.woff2
│ │ │ ├── fa-solid-900.ttf
│ │ │ ├── fa-solid-900.woff2
│ │ │ ├── fa-v4compatibility.ttf
│ │ │ └── fa-v4compatibility.woff2
│ ├── headroom-0.11.0
│ │ ├── headroom.min.js
│ │ └── jQuery.headroom.min.js
│ ├── jquery-3.6.0
│ │ ├── jquery-3.6.0.js
│ │ ├── jquery-3.6.0.min.js
│ │ └── jquery-3.6.0.min.map
│ └── search-1.0.0
│ │ ├── autocomplete.jquery.min.js
│ │ ├── fuse.min.js
│ │ └── mark.min.js
├── docsearch.css
├── docsearch.js
├── favicon-16x16.png
├── favicon-32x32.png
├── favicon-48x48.png
├── favicon.ico
├── favicon.svg
├── index.html
├── katex-auto.js
├── lightswitch.js
├── link.svg
├── logo.png
├── news
│ └── index.html
├── pkgdown.css
├── pkgdown.js
├── pkgdown.yml
├── reference
│ ├── ILPD.html
│ ├── Rplot001.png
│ ├── add_count.html
│ ├── add_count.trackr_df.html
│ ├── add_tally.html
│ ├── anti_join.trackr_df.html
│ ├── arrange.trackr_df.html
│ ├── bind_cols.html
│ ├── bind_rows.html
│ ├── capture_exclusions.html
│ ├── comment.html
│ ├── count_subgroup.html
│ ├── distinct.trackr_df.html
│ ├── dot2svg.html
│ ├── dtrackr-package.html
│ ├── dtrackr.html
│ ├── exclude_all.html
│ ├── excluded.html
│ ├── figures
│ │ ├── README-flowchart.png
│ │ ├── demo.dot
│ │ ├── demo.pdf
│ │ ├── demo.png
│ │ ├── demo.svg
│ │ ├── dtrackr.xcf
│ │ ├── lifecycle-deprecated.svg
│ │ ├── lifecycle-experimental.svg
│ │ ├── lifecycle-stable.svg
│ │ ├── lifecycle-superseded.svg
│ │ └── logo.png
│ ├── filter.html
│ ├── filter.trackr_df.html
│ ├── flowchart.html
│ ├── full_join.trackr_df.html
│ ├── group_by.trackr_df.html
│ ├── group_modify.trackr_df.html
│ ├── history.html
│ ├── include_any.html
│ ├── index.html
│ ├── inner_join.trackr_df.html
│ ├── intersect.trackr_df.html
│ ├── landscape.html
│ ├── left_join.trackr_df.html
│ ├── mutate.trackr_df.html
│ ├── nest_join.trackr_df.html
│ ├── p_add_count.html
│ ├── p_add_tally.html
│ ├── p_anti_join.html
│ ├── p_arrange.html
│ ├── p_bind_cols.html
│ ├── p_bind_rows.html
│ ├── p_capture_exclusions.html
│ ├── p_clear.html
│ ├── p_comment.html
│ ├── p_copy.html
│ ├── p_count_if.html
│ ├── p_count_subgroup.html
│ ├── p_distinct.html
│ ├── p_exclude_all.html
│ ├── p_excluded.html
│ ├── p_filter.html
│ ├── p_flowchart.html
│ ├── p_flowcharts.html
│ ├── p_full_join.html
│ ├── p_get.html
│ ├── p_get_as_dot.html
│ ├── p_group_by.html
│ ├── p_group_modify.html
│ ├── p_include_any.html
│ ├── p_inner_join.html
│ ├── p_intersect.html
│ ├── p_left_join.html
│ ├── p_mutate.html
│ ├── p_nest_join.html
│ ├── p_pause.html
│ ├── p_pivot_longer.html
│ ├── p_pivot_wider.html
│ ├── p_reframe.html
│ ├── p_relocate.html
│ ├── p_rename.html
│ ├── p_rename_with.html
│ ├── p_resume.html
│ ├── p_right_join.html
│ ├── p_select.html
│ ├── p_semi_join.html
│ ├── p_set.html
│ ├── p_setdiff.html
│ ├── p_slice.html
│ ├── p_slice_head.html
│ ├── p_slice_max.html
│ ├── p_slice_min.html
│ ├── p_slice_sample.html
│ ├── p_slice_tail.html
│ ├── p_status.html
│ ├── p_summarise.html
│ ├── p_tagged.html
│ ├── p_track.html
│ ├── p_transmute.html
│ ├── p_ungroup.html
│ ├── p_union.html
│ ├── p_union_all.html
│ ├── p_untrack.html
│ ├── pause.html
│ ├── pipe.html
│ ├── pivot_longer.trackr_df.html
│ ├── pivot_wider.trackr_df.html
│ ├── plot.trackr_graph.html
│ ├── print.trackr_graph.html
│ ├── reexports.html
│ ├── reframe.trackr_df.html
│ ├── relocate.trackr_df.html
│ ├── rename.trackr_df.html
│ ├── rename_with.trackr_df.html
│ ├── resume.html
│ ├── right_join.trackr_df.html
│ ├── save_dot.html
│ ├── select.trackr_df.html
│ ├── semi_join.trackr_df.html
│ ├── setdiff.trackr_df.html
│ ├── slice.trackr_df.html
│ ├── slice_head.trackr_df.html
│ ├── slice_max.trackr_df.html
│ ├── slice_min.trackr_df.html
│ ├── slice_sample.trackr_df.html
│ ├── slice_tail.trackr_df.html
│ ├── status.html
│ ├── std_size.html
│ ├── summarise.trackr_df.html
│ ├── tagged.html
│ ├── track.html
│ ├── transmute.trackr_df.html
│ ├── ungroup.trackr_df.html
│ ├── union.trackr_df.html
│ ├── union_all.trackr_df.html
│ └── untrack.html
├── search.json
├── site.webmanifest
├── sitemap.xml
├── web-app-manifest-192x192.png
└── web-app-manifest-512x512.png
├── inst
├── CITATION
├── WORDLIST
├── examples
│ ├── add-count-tally-examples.R
│ ├── anti-join-examples.R
│ ├── arrange-examples.R
│ ├── full-join-examples.R
│ ├── inner-join-examples.R
│ ├── left-join-examples.R
│ ├── mutate-examples.R
│ ├── nest-join-examples.R
│ ├── relocate-examples.R
│ ├── rename-examples.R
│ ├── select-examples.R
│ ├── semi-join-examples.R
│ ├── set-operation-examples.R
│ ├── slice-examples.R
│ ├── slice-head-tail-examples.R
│ ├── slice-max-min-examples.R
│ ├── slice-sample-examples.R
│ └── transmute-examples.R
└── lib
│ ├── viz.js
│ └── viz_licence.md
├── man
├── add_count.trackr_df.Rd
├── add_tally.Rd
├── anti_join.trackr_df.Rd
├── arrange.trackr_df.Rd
├── bind_cols.Rd
├── bind_rows.Rd
├── capture_exclusions.Rd
├── comment.Rd
├── count_subgroup.Rd
├── distinct.trackr_df.Rd
├── dot2svg.Rd
├── dtrackr-package.Rd
├── exclude_all.Rd
├── excluded.Rd
├── figures
│ ├── README-flowchart.png
│ ├── dtrackr.xcf
│ ├── lifecycle-deprecated.svg
│ ├── lifecycle-experimental.svg
│ ├── lifecycle-stable.svg
│ ├── lifecycle-superseded.svg
│ └── logo.png
├── filter.trackr_df.Rd
├── flowchart.Rd
├── full_join.trackr_df.Rd
├── group_by.trackr_df.Rd
├── group_modify.trackr_df.Rd
├── history.Rd
├── include_any.Rd
├── inner_join.trackr_df.Rd
├── intersect.trackr_df.Rd
├── left_join.trackr_df.Rd
├── mutate.trackr_df.Rd
├── nest_join.trackr_df.Rd
├── p_add_count.Rd
├── p_add_tally.Rd
├── p_anti_join.Rd
├── p_arrange.Rd
├── p_bind_cols.Rd
├── p_bind_rows.Rd
├── p_capture_exclusions.Rd
├── p_clear.Rd
├── p_comment.Rd
├── p_copy.Rd
├── p_count_if.Rd
├── p_count_subgroup.Rd
├── p_distinct.Rd
├── p_exclude_all.Rd
├── p_excluded.Rd
├── p_filter.Rd
├── p_flowchart.Rd
├── p_full_join.Rd
├── p_get.Rd
├── p_get_as_dot.Rd
├── p_group_by.Rd
├── p_group_modify.Rd
├── p_include_any.Rd
├── p_inner_join.Rd
├── p_intersect.Rd
├── p_left_join.Rd
├── p_mutate.Rd
├── p_nest_join.Rd
├── p_pause.Rd
├── p_pivot_longer.Rd
├── p_pivot_wider.Rd
├── p_reframe.Rd
├── p_relocate.Rd
├── p_rename.Rd
├── p_rename_with.Rd
├── p_resume.Rd
├── p_right_join.Rd
├── p_select.Rd
├── p_semi_join.Rd
├── p_set.Rd
├── p_setdiff.Rd
├── p_slice.Rd
├── p_slice_head.Rd
├── p_slice_max.Rd
├── p_slice_min.Rd
├── p_slice_sample.Rd
├── p_slice_tail.Rd
├── p_status.Rd
├── p_summarise.Rd
├── p_tagged.Rd
├── p_track.Rd
├── p_transmute.Rd
├── p_ungroup.Rd
├── p_union.Rd
├── p_union_all.Rd
├── p_untrack.Rd
├── pause.Rd
├── pipe.Rd
├── pivot_longer.trackr_df.Rd
├── pivot_wider.trackr_df.Rd
├── plot.trackr_graph.Rd
├── print.trackr_graph.Rd
├── reexports.Rd
├── reframe.trackr_df.Rd
├── relocate.trackr_df.Rd
├── rename.trackr_df.Rd
├── rename_with.trackr_df.Rd
├── resume.Rd
├── right_join.trackr_df.Rd
├── save_dot.Rd
├── select.trackr_df.Rd
├── semi_join.trackr_df.Rd
├── setdiff.trackr_df.Rd
├── slice.trackr_df.Rd
├── slice_head.trackr_df.Rd
├── slice_max.trackr_df.Rd
├── slice_min.trackr_df.Rd
├── slice_sample.trackr_df.Rd
├── slice_tail.trackr_df.Rd
├── status.Rd
├── std_size.Rd
├── summarise.trackr_df.Rd
├── tagged.Rd
├── track.Rd
├── transmute.trackr_df.Rd
├── ungroup.trackr_df.Rd
├── union.trackr_df.Rd
├── union_all.trackr_df.Rd
└── untrack.Rd
├── pkgdown
└── favicon
│ ├── apple-touch-icon-120x120.png
│ ├── apple-touch-icon-152x152.png
│ ├── apple-touch-icon-180x180.png
│ ├── apple-touch-icon-60x60.png
│ ├── apple-touch-icon-76x76.png
│ ├── apple-touch-icon.png
│ ├── favicon-16x16.png
│ ├── favicon-32x32.png
│ ├── favicon-48x48.png
│ ├── favicon.ico
│ ├── favicon.svg
│ ├── site.webmanifest
│ ├── web-app-manifest-192x192.png
│ └── web-app-manifest-512x512.png
├── tests
├── spelling.R
├── testthat.R
└── testthat
│ ├── test-examples.R
│ ├── test-github-issues.R
│ ├── test-group_by.R
│ ├── test-p_comment.R
│ ├── test-p_exclude.R
│ ├── test-p_group_modify.R
│ ├── test-p_include.R
│ ├── test-p_others.R
│ ├── test-p_status.R
│ └── test-rsvg-scaling.R
├── trackr.Rproj
└── vignettes
├── .gitignore
├── consort-example.Rmd
├── dtrackr-options.Rmd
├── dtrackr.Rmd
├── dtrackr.bib
├── joining-pipelines.Rmd
└── joss
├── figure1-consort.pdf
└── paper.md
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^trackr\.Rproj$
2 | ^\.Rproj\.user$
3 | ^README\.Rmd$
4 | ^cran-comments\.md$
5 | ^LICENSE\.md$
6 | ^_pkgdown\.yml$
7 | ^docs$
8 | ^pkgdown$
9 | ^data-raw$
10 | ^\.github$
11 | ^CRAN-SUBMISSION$
12 | ^codecov\.yml$
13 |
--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 |
--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
1 | # Workflow derived from https://github.com/r-lib/actions/tree/master/examples
2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
3 | on:
4 | push:
5 | branches: [main, master]
6 | pull_request:
7 | branches: [main, master]
8 |
9 | name: R-CMD-check
10 |
11 | jobs:
12 | R-CMD-check:
13 |
14 | runs-on: ${{ matrix.config.os }}
15 | if: "!contains(github.event.head_commit.message, 'minor')"
16 |
17 | continue-on-error: false
18 |
19 | name: ${{ matrix.config.os }} R:(${{ matrix.config.r }})
20 |
21 | strategy:
22 | fail-fast: false
23 | matrix:
24 | config:
25 | # - {os: ubuntu-20.04, r: '4.1.0'}
26 | # - {os: macOS-11, r: '4.1.0'}
27 | # - {os: windows-2022, r: '4.1.0'}
28 | # - {os: ubuntu-20.04, r: '3.6.1'}
29 | # - {os: ubuntu-20.04, r: '4.2.0'}
30 | # - {os: ubuntu-18.04, r: 'devel', http-user-agent: 'release'}
31 |
32 | - {os: macOS-latest, r: 'release'}
33 |
34 | - {os: windows-latest, r: 'release'}
35 | # Use 3.6 to trigger usage of RTools35
36 | # - {os: windows-latest, r: '3.6'}
37 |
38 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}
39 | - {os: ubuntu-latest, r: 'release'}
40 | - {os: ubuntu-latest, r: 'oldrel-1'}
41 | - {os: ubuntu-latest, r: 'oldrel-2'}
42 | # - {os: ubuntu-latest, r: 'oldrel-3'}
43 | # - {os: ubuntu-latest, r: 'oldrel-4'}
44 |
45 | env:
46 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
47 | R_KEEP_PKG_SOURCE: yes
48 |
49 | steps:
50 | - uses: actions/checkout@v3
51 |
52 | - uses: r-lib/actions/setup-r@v2
53 | with:
54 | r-version: ${{ matrix.config.r }}
55 | http-user-agent: ${{ matrix.config.http-user-agent }}
56 | use-public-rspm: true
57 |
58 | - uses: r-lib/actions/setup-pandoc@v2
59 |
60 | - uses: r-lib/actions/setup-r-dependencies@v2
61 | with:
62 | extra-packages: any::rcmdcheck
63 | # work around for bug in pak
64 | needs: check
65 |
66 | - uses: r-lib/actions/check-r-package@v2
67 | with:
68 | args: 'c("--no-manual", "--no-multiarch", "--as-cran")'
69 | upload-snapshots: true
70 |
71 |
--------------------------------------------------------------------------------
/.github/workflows/draft-pdf.yml:
--------------------------------------------------------------------------------
1 | on: [push]
2 |
3 | jobs:
4 | paper:
5 | runs-on: ubuntu-latest
6 | name: Paper Draft
7 | steps:
8 | - name: Checkout
9 | uses: actions/checkout@v4
10 | - name: Build draft PDF
11 | uses: openjournals/openjournals-draft-action@master
12 | with:
13 | journal: joss
14 | # This should be the path to the paper within your repo.
15 | paper-path: vignettes/joss/paper.md
16 | - name: Upload
17 | uses: actions/upload-artifact@v4
18 | with:
19 | name: paper
20 | # This is the output path where Pandoc will write the compiled
21 | # PDF. Note, this should be the same directory as the input
22 | # paper.md
23 | path: vignettes/joss/paper.pdf
24 |
--------------------------------------------------------------------------------
/.github/workflows/test-coverage.yaml:
--------------------------------------------------------------------------------
1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
3 | on:
4 | push:
5 | branches: [main, master]
6 | pull_request:
7 | branches: [main, master]
8 |
9 | name: test-coverage
10 |
11 | jobs:
12 | test-coverage:
13 | runs-on: ubuntu-latest
14 | if: "!contains(github.event.head_commit.message, 'minor')"
15 |
16 | env:
17 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
18 |
19 | steps:
20 | - uses: actions/checkout@v3
21 |
22 | - uses: r-lib/actions/setup-r@v2
23 | with:
24 | use-public-rspm: true
25 |
26 | - uses: r-lib/actions/setup-r-dependencies@v2
27 | with:
28 | extra-packages: any::covr
29 | needs: coverage
30 |
31 | - uses: r-lib/actions/setup-pandoc@v2
32 |
33 | - name: Test coverage
34 | run: |
35 | covr::codecov(
36 | quiet = FALSE,
37 | clean = FALSE,
38 | type = "all",
39 | install_path = file.path(Sys.getenv("RUNNER_TEMP"), "package")
40 | )
41 | shell: Rscript {0}
42 |
43 | - name: Show testthat output
44 | if: always()
45 | run: |
46 | ## --------------------------------------------------------------------
47 | find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true
48 | shell: bash
49 |
50 | - name: Upload test results
51 | if: failure()
52 | uses: actions/upload-artifact@v3
53 | with:
54 | name: coverage-test-failures
55 | path: ${{ runner.temp }}/package
56 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # History files
2 | .Rhistory
3 | .Rapp.history
4 |
5 | # Session Data files
6 | .RData
7 |
8 | # User-specific files
9 | .Ruserdata
10 |
11 | # Example code in package build process
12 | *-Ex.R
13 |
14 | # Output files from R CMD build
15 | /*.tar.gz
16 |
17 | # Output files from R CMD check
18 | /*.Rcheck/
19 |
20 | # RStudio files
21 | .Rproj.user/
22 |
23 | # produced vignettes
24 | vignettes/*.html
25 | vignettes/*.pdf
26 |
27 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
28 | .httr-oauth
29 |
30 | # knitr and R markdown default cache directories
31 | *_cache/
32 | /cache/
33 |
34 | # Temporary files created by R markdown
35 | *.utf8.md
36 | *.knit.md
37 |
38 | # R Environment Variables
39 | .Renviron
40 | .Rproj.user
41 | inst/doc
42 |
--------------------------------------------------------------------------------
/CRAN-SUBMISSION:
--------------------------------------------------------------------------------
1 | Version: 0.4.4
2 | Date: 2023-09-04 11:52:14 UTC
3 | SHA: 84e3f953625e8b94f665bd82b7d680188ccd0e7b
4 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: dtrackr
2 | Title: Track your Data Pipelines
3 | Version: 0.4.6
4 | Authors@R:
5 | person(given = "Robert",
6 | family = "Challen",
7 | role = c("aut", "cre"),
8 | email = "rob.challen@bristol.ac.uk",
9 | comment = c(ORCID = "0000-0002-5504-7768"))
10 | Description: Track and
11 | document 'dplyr' data pipelines. As you filter, mutate, and join your
12 | way through a data set, 'dtrackr' seamlessly keeps track of your data
13 | flow and makes publication ready documentation of a data pipeline simple.
14 | License: MIT + file LICENSE
15 | Language: en-GB
16 | Imports:
17 | dplyr (>= 1.1.0),
18 | glue,
19 | htmltools,
20 | magrittr,
21 | rlang,
22 | rsvg,
23 | stringr,
24 | tibble,
25 | tidyr,
26 | utils,
27 | V8,
28 | fs,
29 | purrr,
30 | base64enc,
31 | pdftools,
32 | png,
33 | lifecycle
34 | Suggests:
35 | spelling,
36 | here,
37 | knitr,
38 | rmarkdown,
39 | tidyselect,
40 | devtools,
41 | testthat (>= 2.1.0),
42 | rstudioapi,
43 | survival,
44 | ggplot2,
45 | covr
46 | VignetteBuilder:
47 | knitr
48 | Encoding: UTF-8
49 | LazyData: true
50 | Roxygen: list(markdown = TRUE)
51 | RoxygenNote: 7.3.2.9003
52 | Depends:
53 | R (>= 2.10)
54 | URL: https://terminological.github.io/dtrackr/index.html,
55 | https://github.com/terminological/dtrackr
56 | BugReports: https://github.com/terminological/dtrackr/issues
57 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2021
2 | COPYRIGHT HOLDER: Robert Challen
3 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | # MIT License
2 |
3 | Copyright (c) 2022 Robert Challen
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/R/dtrackr-package.R:
--------------------------------------------------------------------------------
1 | #' @keywords internal
2 | "_PACKAGE"
3 |
4 | ## usethis namespace: start
5 | #' @importFrom lifecycle deprecated
6 | ## usethis namespace: end
7 | NULL
8 |
--------------------------------------------------------------------------------
/R/experimental/group-counting.R:
--------------------------------------------------------------------------------
1 | #TODO:
2 | # Configure a countable items named list of expressions
3 | # or maybe better a formula like the exclusion spec
4 | # Use a n_distinct call to generate a set of additional .count type columns
5 | # maybe this is able to use status.
6 |
7 | # iris %>% dplyr::group_by(Species) %>% summarise(n = n_distinct(Petal.Length+Sepal.Length))
8 | # iris %>% dplyr::group_by(Species) %>% dplyr::summarise(n = dplyr::n_distinct(Petal.Length+Sepal.Length))
9 | # iris %>% dplyr::group_by(Species) %>% dplyr::summarise(n = dplyr::n_distinct(Petal.Length+Sepal.Length, Sepal.Width+Petal.Width))
10 | # x = function(...) {dots = rlang::enexprs(...); browser()}
11 | # x(a = c(Petal.Length+Sepal.Length, Sepal.Wdith), b= c(asdasda) )
12 | # class(dots$a)
13 | # as.expression(dots$a)
14 | # as.expression(unlist(dots$a))
15 | # x(a = c(Petal.Length+Sepal.Length, Sepal.Wdith), b= c(asdasda) )
16 | # x(a = c(Petal.Length+Sepal.Length, Sepal.Width), b= c(asdasda) )
17 | # iris %>% dplyr::summarise(n = !!dots$a)
18 | # iris %>% dplyr::summarise(n = dplyr::n_distinct(!!dots$a))
19 | # iris %>% dplyr::summarise(n = dplyr::n_distinct(!!dots$b))
20 | # iris %>% dplyr::summarise(n = dplyr::n_distinct(Petal.Length + Sepal.Length, Sepal.Width))
21 | # iris %>% dplyr::summarise(n = dplyr::n_distinct(c(Petal.Length + Sepal.Length, Sepal.Width)))
22 |
--------------------------------------------------------------------------------
/R/utils-pipe.R:
--------------------------------------------------------------------------------
1 | #' Pipe operator
2 | #'
3 | #' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
4 | #'
5 | #' @name %>%
6 | #' @rdname pipe
7 | #' @keywords internal
8 | #' @export
9 | #' @importFrom magrittr %>%
10 | #' @usage lhs \%>\% rhs
11 | #' @param lhs A value or the magrittr placeholder.
12 | #' @param rhs A function call using the magrittr semantics.
13 | #' @return The result of calling `rhs(lhs)`.
14 | NULL
15 |
--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
1 | comment: false
2 |
3 | coverage:
4 | status:
5 | project:
6 | default:
7 | target: auto
8 | threshold: 1%
9 | informational: true
10 | patch:
11 | default:
12 | target: auto
13 | threshold: 1%
14 | informational: true
15 |
--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
1 | ## Test environments
2 |
3 | Github actions environments
4 |
5 | * os: macOS-latest, r: 'release'
6 | * os: windows-latest, r: 'release'
7 | * os: ubuntu-latest, r: 'devel'
8 | * os: ubuntu-latest, r: 'release'
9 | * os: ubuntu-latest, r: 'oldrel-1'
10 | * os: ubuntu-latest, r: 'oldrel-2'
11 |
12 | ## R CMD check results
13 | There were no ERRORs or WARNINGs or NOTEs.
14 | (see https://github.com/terminological/dtrackr/actions/ for details)
15 | 0 errors ✔ | 0 warnings ✔ | 0 notes ✔
16 |
17 | ## Downstream dependencies
18 | There are currently no downstream dependencies for this package in CRAN.
19 |
20 | ## Other info
21 |
22 | This version addresses some CRAN check NOTES that have appeared due to empty
23 | sections in `.Rd` files, and some un-escaped braces. The empty sections were due
24 | to documentation generated by `roxygen2` and have been addressed by using a
25 | patched version of `roxygen2` with transitive documentation inheritance.
26 | Regenerating Rd files with vanilla `roxygen2` will cause reversion of the error.
27 | The patch has been submitted as a PR to `roxygen2`.
28 |
29 | There is no new functionality.
30 |
31 | detailed changes described in NEWS.md
32 |
33 | Many thanks.
34 |
--------------------------------------------------------------------------------
/docs/apple-touch-icon-120x120.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/apple-touch-icon-120x120.png
--------------------------------------------------------------------------------
/docs/apple-touch-icon-152x152.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/apple-touch-icon-152x152.png
--------------------------------------------------------------------------------
/docs/apple-touch-icon-180x180.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/apple-touch-icon-180x180.png
--------------------------------------------------------------------------------
/docs/apple-touch-icon-60x60.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/apple-touch-icon-60x60.png
--------------------------------------------------------------------------------
/docs/apple-touch-icon-76x76.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/apple-touch-icon-76x76.png
--------------------------------------------------------------------------------
/docs/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/apple-touch-icon.png
--------------------------------------------------------------------------------
/docs/articles/consort-example_files/accessible-code-block-0.0.1/empty-anchor.js:
--------------------------------------------------------------------------------
1 | // Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) -->
2 | // v0.0.1
3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020.
4 |
5 | document.addEventListener('DOMContentLoaded', function() {
6 | const codeList = document.getElementsByClassName("sourceCode");
7 | for (var i = 0; i < codeList.length; i++) {
8 | var linkList = codeList[i].getElementsByTagName('a');
9 | for (var j = 0; j < linkList.length; j++) {
10 | if (linkList[j].innerHTML === "") {
11 | linkList[j].setAttribute('aria-hidden', 'true');
12 | }
13 | }
14 | }
15 | });
16 |
--------------------------------------------------------------------------------
/docs/articles/dtrackr-options_files/accessible-code-block-0.0.1/empty-anchor.js:
--------------------------------------------------------------------------------
1 | // Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) -->
2 | // v0.0.1
3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020.
4 |
5 | document.addEventListener('DOMContentLoaded', function() {
6 | const codeList = document.getElementsByClassName("sourceCode");
7 | for (var i = 0; i < codeList.length; i++) {
8 | var linkList = codeList[i].getElementsByTagName('a');
9 | for (var j = 0; j < linkList.length; j++) {
10 | if (linkList[j].innerHTML === "") {
11 | linkList[j].setAttribute('aria-hidden', 'true');
12 | }
13 | }
14 | }
15 | });
16 |
--------------------------------------------------------------------------------
/docs/articles/dtrackr_files/accessible-code-block-0.0.1/empty-anchor.js:
--------------------------------------------------------------------------------
1 | // Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) -->
2 | // v0.0.1
3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020.
4 |
5 | document.addEventListener('DOMContentLoaded', function() {
6 | const codeList = document.getElementsByClassName("sourceCode");
7 | for (var i = 0; i < codeList.length; i++) {
8 | var linkList = codeList[i].getElementsByTagName('a');
9 | for (var j = 0; j < linkList.length; j++) {
10 | if (linkList[j].innerHTML === "") {
11 | linkList[j].setAttribute('aria-hidden', 'true');
12 | }
13 | }
14 | }
15 | });
16 |
--------------------------------------------------------------------------------
/docs/articles/joining-pipelines_files/accessible-code-block-0.0.1/empty-anchor.js:
--------------------------------------------------------------------------------
1 | // Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) -->
2 | // v0.0.1
3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020.
4 |
5 | document.addEventListener('DOMContentLoaded', function() {
6 | const codeList = document.getElementsByClassName("sourceCode");
7 | for (var i = 0; i < codeList.length; i++) {
8 | var linkList = codeList[i].getElementsByTagName('a');
9 | for (var j = 0; j < linkList.length; j++) {
10 | if (linkList[j].innerHTML === "") {
11 | linkList[j].setAttribute('aria-hidden', 'true');
12 | }
13 | }
14 | }
15 | });
16 |
--------------------------------------------------------------------------------
/docs/articles/joining-pipelines_files/anchor-sections-1.0/anchor-sections.css:
--------------------------------------------------------------------------------
1 | /* Styles for section anchors */
2 | a.anchor-section {margin-left: 10px; visibility: hidden; color: inherit;}
3 | a.anchor-section::before {content: '#';}
4 | .hasAnchor:hover a.anchor-section {visibility: visible;}
5 |
--------------------------------------------------------------------------------
/docs/articles/joining-pipelines_files/anchor-sections-1.0/anchor-sections.js:
--------------------------------------------------------------------------------
1 | // Anchor sections v1.0 written by Atsushi Yasumoto on Oct 3rd, 2020.
2 | document.addEventListener('DOMContentLoaded', function() {
3 | // Do nothing if AnchorJS is used
4 | if (typeof window.anchors === 'object' && anchors.hasOwnProperty('hasAnchorJSLink')) {
5 | return;
6 | }
7 |
8 | const h = document.querySelectorAll('h1, h2, h3, h4, h5, h6');
9 |
10 | // Do nothing if sections are already anchored
11 | if (Array.from(h).some(x => x.classList.contains('hasAnchor'))) {
12 | return null;
13 | }
14 |
15 | // Use section id when pandoc runs with --section-divs
16 | const section_id = function(x) {
17 | return ((x.classList.contains('section') || (x.tagName === 'SECTION'))
18 | ? x.id : '');
19 | };
20 |
21 | // Add anchors
22 | h.forEach(function(x) {
23 | const id = x.id || section_id(x.parentElement);
24 | if (id === '') {
25 | return null;
26 | }
27 | let anchor = document.createElement('a');
28 | anchor.href = '#' + id;
29 | anchor.classList = ['anchor-section'];
30 | x.classList.add('hasAnchor');
31 | x.appendChild(anchor);
32 | });
33 | });
34 |
--------------------------------------------------------------------------------
/docs/articles/tracking-provenance_files/accessible-code-block-0.0.1/empty-anchor.js:
--------------------------------------------------------------------------------
1 | // Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) -->
2 | // v0.0.1
3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020.
4 |
5 | document.addEventListener('DOMContentLoaded', function() {
6 | const codeList = document.getElementsByClassName("sourceCode");
7 | for (var i = 0; i < codeList.length; i++) {
8 | var linkList = codeList[i].getElementsByTagName('a');
9 | for (var j = 0; j < linkList.length; j++) {
10 | if (linkList[j].innerHTML === "") {
11 | linkList[j].setAttribute('aria-hidden', 'true');
12 | }
13 | }
14 | }
15 | });
16 |
--------------------------------------------------------------------------------
/docs/articles/tracking-provenance_files/anchor-sections-1.0/anchor-sections.css:
--------------------------------------------------------------------------------
1 | /* Styles for section anchors */
2 | a.anchor-section {margin-left: 10px; visibility: hidden; color: inherit;}
3 | a.anchor-section::before {content: '#';}
4 | .hasAnchor:hover a.anchor-section {visibility: visible;}
5 |
--------------------------------------------------------------------------------
/docs/articles/tracking-provenance_files/anchor-sections-1.0/anchor-sections.js:
--------------------------------------------------------------------------------
1 | // Anchor sections v1.0 written by Atsushi Yasumoto on Oct 3rd, 2020.
2 | document.addEventListener('DOMContentLoaded', function() {
3 | // Do nothing if AnchorJS is used
4 | if (typeof window.anchors === 'object' && anchors.hasOwnProperty('hasAnchorJSLink')) {
5 | return;
6 | }
7 |
8 | const h = document.querySelectorAll('h1, h2, h3, h4, h5, h6');
9 |
10 | // Do nothing if sections are already anchored
11 | if (Array.from(h).some(x => x.classList.contains('hasAnchor'))) {
12 | return null;
13 | }
14 |
15 | // Use section id when pandoc runs with --section-divs
16 | const section_id = function(x) {
17 | return ((x.classList.contains('section') || (x.tagName === 'SECTION'))
18 | ? x.id : '');
19 | };
20 |
21 | // Add anchors
22 | h.forEach(function(x) {
23 | const id = x.id || section_id(x.parentElement);
24 | if (id === '') {
25 | return null;
26 | }
27 | let anchor = document.createElement('a');
28 | anchor.href = '#' + id;
29 | anchor.classList = ['anchor-section'];
30 | x.classList.add('hasAnchor');
31 | x.appendChild(anchor);
32 | });
33 | });
34 |
--------------------------------------------------------------------------------
/docs/bootstrap-toc.css:
--------------------------------------------------------------------------------
1 | /*!
2 | * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/)
3 | * Copyright 2015 Aidan Feldman
4 | * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */
5 |
6 | /* modified from https://github.com/twbs/bootstrap/blob/94b4076dd2efba9af71f0b18d4ee4b163aa9e0dd/docs/assets/css/src/docs.css#L548-L601 */
7 |
8 | /* All levels of nav */
9 | nav[data-toggle='toc'] .nav > li > a {
10 | display: block;
11 | padding: 4px 20px;
12 | font-size: 13px;
13 | font-weight: 500;
14 | color: #767676;
15 | }
16 | nav[data-toggle='toc'] .nav > li > a:hover,
17 | nav[data-toggle='toc'] .nav > li > a:focus {
18 | padding-left: 19px;
19 | color: #563d7c;
20 | text-decoration: none;
21 | background-color: transparent;
22 | border-left: 1px solid #563d7c;
23 | }
24 | nav[data-toggle='toc'] .nav > .active > a,
25 | nav[data-toggle='toc'] .nav > .active:hover > a,
26 | nav[data-toggle='toc'] .nav > .active:focus > a {
27 | padding-left: 18px;
28 | font-weight: bold;
29 | color: #563d7c;
30 | background-color: transparent;
31 | border-left: 2px solid #563d7c;
32 | }
33 |
34 | /* Nav: second level (shown on .active) */
35 | nav[data-toggle='toc'] .nav .nav {
36 | display: none; /* Hide by default, but at >768px, show it */
37 | padding-bottom: 10px;
38 | }
39 | nav[data-toggle='toc'] .nav .nav > li > a {
40 | padding-top: 1px;
41 | padding-bottom: 1px;
42 | padding-left: 30px;
43 | font-size: 12px;
44 | font-weight: normal;
45 | }
46 | nav[data-toggle='toc'] .nav .nav > li > a:hover,
47 | nav[data-toggle='toc'] .nav .nav > li > a:focus {
48 | padding-left: 29px;
49 | }
50 | nav[data-toggle='toc'] .nav .nav > .active > a,
51 | nav[data-toggle='toc'] .nav .nav > .active:hover > a,
52 | nav[data-toggle='toc'] .nav .nav > .active:focus > a {
53 | padding-left: 28px;
54 | font-weight: 500;
55 | }
56 |
57 | /* from https://github.com/twbs/bootstrap/blob/e38f066d8c203c3e032da0ff23cd2d6098ee2dd6/docs/assets/css/src/docs.css#L631-L634 */
58 | nav[data-toggle='toc'] .nav > .active > ul {
59 | display: block;
60 | }
61 |
--------------------------------------------------------------------------------
/docs/deps/bootstrap-toc-1.0.1/bootstrap-toc.min.js:
--------------------------------------------------------------------------------
1 | /*!
2 | * Bootstrap Table of Contents v1.0.1 (http://afeld.github.io/bootstrap-toc/)
3 | * Copyright 2015 Aidan Feldman
4 | * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */
5 | !function(a){"use strict";window.Toc={helpers:{findOrFilter:function(e,t){var n=e.find(t);return e.filter(t).add(n).filter(":not([data-toc-skip])")},generateUniqueIdBase:function(e){return a(e).text().trim().replace(/\'/gi,"").replace(/[& +$,:;=?@"#{}|^~[`%!'<>\]\.\/\(\)\*\\\n\t\b\v]/g,"-").replace(/-{2,}/g,"-").substring(0,64).replace(/^-+|-+$/gm,"").toLowerCase()||e.tagName.toLowerCase()},generateUniqueId:function(e){for(var t=this.generateUniqueIdBase(e),n=0;;n++){var r=t;if(0')},createChildNavList:function(e){var t=this.createNavList();return e.append(t),t},generateNavEl:function(e,t){var n=a('');n.attr("href","#"+e),n.text(t);var r=a("");return r.append(n),r},generateNavItem:function(e){var t=this.generateAnchor(e),n=a(e),r=n.data("toc-text")||n.text();return this.generateNavEl(t,r)},getTopLevel:function(e){for(var t=1;t<=6;t++){if(1
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/docs/deps/font-awesome-6.4.2/webfonts/fa-brands-400.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/deps/font-awesome-6.4.2/webfonts/fa-brands-400.ttf
--------------------------------------------------------------------------------
/docs/deps/font-awesome-6.4.2/webfonts/fa-brands-400.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/deps/font-awesome-6.4.2/webfonts/fa-brands-400.woff2
--------------------------------------------------------------------------------
/docs/deps/font-awesome-6.4.2/webfonts/fa-regular-400.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/deps/font-awesome-6.4.2/webfonts/fa-regular-400.ttf
--------------------------------------------------------------------------------
/docs/deps/font-awesome-6.4.2/webfonts/fa-regular-400.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/deps/font-awesome-6.4.2/webfonts/fa-regular-400.woff2
--------------------------------------------------------------------------------
/docs/deps/font-awesome-6.4.2/webfonts/fa-solid-900.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/deps/font-awesome-6.4.2/webfonts/fa-solid-900.ttf
--------------------------------------------------------------------------------
/docs/deps/font-awesome-6.4.2/webfonts/fa-solid-900.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/deps/font-awesome-6.4.2/webfonts/fa-solid-900.woff2
--------------------------------------------------------------------------------
/docs/deps/font-awesome-6.4.2/webfonts/fa-v4compatibility.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/deps/font-awesome-6.4.2/webfonts/fa-v4compatibility.ttf
--------------------------------------------------------------------------------
/docs/deps/font-awesome-6.4.2/webfonts/fa-v4compatibility.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/deps/font-awesome-6.4.2/webfonts/fa-v4compatibility.woff2
--------------------------------------------------------------------------------
/docs/deps/headroom-0.11.0/jQuery.headroom.min.js:
--------------------------------------------------------------------------------
1 | /*!
2 | * headroom.js v0.9.4 - Give your page some headroom. Hide your header until you need it
3 | * Copyright (c) 2017 Nick Williams - http://wicky.nillia.ms/headroom.js
4 | * License: MIT
5 | */
6 |
7 | !function(a){a&&(a.fn.headroom=function(b){return this.each(function(){var c=a(this),d=c.data("headroom"),e="object"==typeof b&&b;e=a.extend(!0,{},Headroom.options,e),d||(d=new Headroom(this,e),d.init(),c.data("headroom",d)),"string"==typeof b&&(d[b](),"destroy"===b&&c.removeData("headroom"))})},a("[data-headroom]").each(function(){var b=a(this);b.headroom(b.data())}))}(window.Zepto||window.jQuery);
--------------------------------------------------------------------------------
/docs/docsearch.js:
--------------------------------------------------------------------------------
1 | $(function() {
2 |
3 | // register a handler to move the focus to the search bar
4 | // upon pressing shift + "/" (i.e. "?")
5 | $(document).on('keydown', function(e) {
6 | if (e.shiftKey && e.keyCode == 191) {
7 | e.preventDefault();
8 | $("#search-input").focus();
9 | }
10 | });
11 |
12 | $(document).ready(function() {
13 | // do keyword highlighting
14 | /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */
15 | var mark = function() {
16 |
17 | var referrer = document.URL ;
18 | var paramKey = "q" ;
19 |
20 | if (referrer.indexOf("?") !== -1) {
21 | var qs = referrer.substr(referrer.indexOf('?') + 1);
22 | var qs_noanchor = qs.split('#')[0];
23 | var qsa = qs_noanchor.split('&');
24 | var keyword = "";
25 |
26 | for (var i = 0; i < qsa.length; i++) {
27 | var currentParam = qsa[i].split('=');
28 |
29 | if (currentParam.length !== 2) {
30 | continue;
31 | }
32 |
33 | if (currentParam[0] == paramKey) {
34 | keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20"));
35 | }
36 | }
37 |
38 | if (keyword !== "") {
39 | $(".contents").unmark({
40 | done: function() {
41 | $(".contents").mark(keyword);
42 | }
43 | });
44 | }
45 | }
46 | };
47 |
48 | mark();
49 | });
50 | });
51 |
52 | /* Search term highlighting ------------------------------*/
53 |
54 | function matchedWords(hit) {
55 | var words = [];
56 |
57 | var hierarchy = hit._highlightResult.hierarchy;
58 | // loop to fetch from lvl0, lvl1, etc.
59 | for (var idx in hierarchy) {
60 | words = words.concat(hierarchy[idx].matchedWords);
61 | }
62 |
63 | var content = hit._highlightResult.content;
64 | if (content) {
65 | words = words.concat(content.matchedWords);
66 | }
67 |
68 | // return unique words
69 | var words_uniq = [...new Set(words)];
70 | return words_uniq;
71 | }
72 |
73 | function updateHitURL(hit) {
74 |
75 | var words = matchedWords(hit);
76 | var url = "";
77 |
78 | if (hit.anchor) {
79 | url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor;
80 | } else {
81 | url = hit.url + '?q=' + escape(words.join(" "));
82 | }
83 |
84 | return url;
85 | }
86 |
--------------------------------------------------------------------------------
/docs/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/favicon-16x16.png
--------------------------------------------------------------------------------
/docs/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/favicon-32x32.png
--------------------------------------------------------------------------------
/docs/favicon-48x48.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/favicon-48x48.png
--------------------------------------------------------------------------------
/docs/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/favicon.ico
--------------------------------------------------------------------------------
/docs/katex-auto.js:
--------------------------------------------------------------------------------
1 | // https://github.com/jgm/pandoc/blob/29fa97ab96b8e2d62d48326e1b949a71dc41f47a/src/Text/Pandoc/Writers/HTML.hs#L332-L345
2 | document.addEventListener("DOMContentLoaded", function () {
3 | var mathElements = document.getElementsByClassName("math");
4 | var macros = [];
5 | for (var i = 0; i < mathElements.length; i++) {
6 | var texText = mathElements[i].firstChild;
7 | if (mathElements[i].tagName == "SPAN") {
8 | katex.render(texText.data, mathElements[i], {
9 | displayMode: mathElements[i].classList.contains("display"),
10 | throwOnError: false,
11 | macros: macros,
12 | fleqn: false
13 | });
14 | }}});
15 |
--------------------------------------------------------------------------------
/docs/lightswitch.js:
--------------------------------------------------------------------------------
1 |
2 | /*!
3 | * Color mode toggler for Bootstrap's docs (https://getbootstrap.com/)
4 | * Copyright 2011-2023 The Bootstrap Authors
5 | * Licensed under the Creative Commons Attribution 3.0 Unported License.
6 | * Updates for {pkgdown} by the {bslib} authors, also licensed under CC-BY-3.0.
7 | */
8 |
9 | const getStoredTheme = () => localStorage.getItem('theme')
10 | const setStoredTheme = theme => localStorage.setItem('theme', theme)
11 |
12 | const getPreferredTheme = () => {
13 | const storedTheme = getStoredTheme()
14 | if (storedTheme) {
15 | return storedTheme
16 | }
17 |
18 | return window.matchMedia('(prefers-color-scheme: dark)').matches ? 'dark' : 'light'
19 | }
20 |
21 | const setTheme = theme => {
22 | if (theme === 'auto') {
23 | document.documentElement.setAttribute('data-bs-theme', (window.matchMedia('(prefers-color-scheme: dark)').matches ? 'dark' : 'light'))
24 | } else {
25 | document.documentElement.setAttribute('data-bs-theme', theme)
26 | }
27 | }
28 |
29 | function bsSetupThemeToggle () {
30 | 'use strict'
31 |
32 | const showActiveTheme = (theme, focus = false) => {
33 | var activeLabel, activeIcon;
34 |
35 | document.querySelectorAll('[data-bs-theme-value]').forEach(element => {
36 | const buttonTheme = element.getAttribute('data-bs-theme-value')
37 | const isActive = buttonTheme == theme
38 |
39 | element.classList.toggle('active', isActive)
40 | element.setAttribute('aria-pressed', isActive)
41 |
42 | if (isActive) {
43 | activeLabel = element.textContent;
44 | activeIcon = element.querySelector('span').classList.value;
45 | }
46 | })
47 |
48 | const themeSwitcher = document.querySelector('#dropdown-lightswitch')
49 | if (!themeSwitcher) {
50 | return
51 | }
52 |
53 | themeSwitcher.setAttribute('aria-label', activeLabel)
54 | themeSwitcher.querySelector('span').classList.value = activeIcon;
55 |
56 | if (focus) {
57 | themeSwitcher.focus()
58 | }
59 | }
60 |
61 | window.matchMedia('(prefers-color-scheme: dark)').addEventListener('change', () => {
62 | const storedTheme = getStoredTheme()
63 | if (storedTheme !== 'light' && storedTheme !== 'dark') {
64 | setTheme(getPreferredTheme())
65 | }
66 | })
67 |
68 | window.addEventListener('DOMContentLoaded', () => {
69 | showActiveTheme(getPreferredTheme())
70 |
71 | document
72 | .querySelectorAll('[data-bs-theme-value]')
73 | .forEach(toggle => {
74 | toggle.addEventListener('click', () => {
75 | const theme = toggle.getAttribute('data-bs-theme-value')
76 | setTheme(theme)
77 | setStoredTheme(theme)
78 | showActiveTheme(theme, true)
79 | })
80 | })
81 | })
82 | }
83 |
84 | setTheme(getPreferredTheme());
85 | bsSetupThemeToggle();
86 |
--------------------------------------------------------------------------------
/docs/link.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
13 |
--------------------------------------------------------------------------------
/docs/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/logo.png
--------------------------------------------------------------------------------
/docs/pkgdown.yml:
--------------------------------------------------------------------------------
1 | pandoc: 3.1.13
2 | pkgdown: 2.1.1
3 | pkgdown_sha: ~
4 | articles:
5 | consort-example: consort-example.html
6 | dtrackr-options: dtrackr-options.html
7 | dtrackr: dtrackr.html
8 | joining-pipelines: joining-pipelines.html
9 | last_built: 2024-10-19T15:48Z
10 | urls:
11 | reference: https://terminological.github.io/dtrackr/index.html/reference
12 | article: https://terminological.github.io/dtrackr/index.html/articles
13 |
--------------------------------------------------------------------------------
/docs/reference/Rplot001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/reference/Rplot001.png
--------------------------------------------------------------------------------
/docs/reference/dtrackr.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/docs/reference/figures/README-flowchart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/reference/figures/README-flowchart.png
--------------------------------------------------------------------------------
/docs/reference/figures/demo.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/reference/figures/demo.pdf
--------------------------------------------------------------------------------
/docs/reference/figures/demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/reference/figures/demo.png
--------------------------------------------------------------------------------
/docs/reference/figures/dtrackr.xcf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/reference/figures/dtrackr.xcf
--------------------------------------------------------------------------------
/docs/reference/figures/lifecycle-deprecated.svg:
--------------------------------------------------------------------------------
1 |
22 |
--------------------------------------------------------------------------------
/docs/reference/figures/lifecycle-experimental.svg:
--------------------------------------------------------------------------------
1 |
22 |
--------------------------------------------------------------------------------
/docs/reference/figures/lifecycle-stable.svg:
--------------------------------------------------------------------------------
1 |
30 |
--------------------------------------------------------------------------------
/docs/reference/figures/lifecycle-superseded.svg:
--------------------------------------------------------------------------------
1 |
22 |
--------------------------------------------------------------------------------
/docs/reference/figures/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/reference/figures/logo.png
--------------------------------------------------------------------------------
/docs/reference/filter.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/docs/site.webmanifest:
--------------------------------------------------------------------------------
1 | {
2 | "name": "",
3 | "short_name": "",
4 | "icons": [
5 | {
6 | "src": "/web-app-manifest-192x192.png",
7 | "sizes": "192x192",
8 | "type": "image/png",
9 | "purpose": "maskable"
10 | },
11 | {
12 | "src": "/web-app-manifest-512x512.png",
13 | "sizes": "512x512",
14 | "type": "image/png",
15 | "purpose": "maskable"
16 | }
17 | ],
18 | "theme_color": "#ffffff",
19 | "background_color": "#ffffff",
20 | "display": "standalone"
21 | }
--------------------------------------------------------------------------------
/docs/web-app-manifest-192x192.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/web-app-manifest-192x192.png
--------------------------------------------------------------------------------
/docs/web-app-manifest-512x512.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/docs/web-app-manifest-512x512.png
--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
1 | c(
2 | bibentry(bibtype = "Article",
3 | title = "dtrackr: An R package for tracking the provenance of data",
4 | year = 2022,
5 | note = "R package version 0.2.5",
6 | url = "https://joss.theoj.org/papers/10.21105/joss.04707",
7 | doi = "10.21105/joss.04707",
8 | journal = "Journal of Open Source Software",
9 | author = c(
10 | person(given = "Robert",family = "Challen",email = "rob.challen@bristol.ac.uk",comment = structure("0000-0002-5504-7768", .Names = "ORCID"),role = c("aut", "cre")) )
11 | )
12 | )
13 |
--------------------------------------------------------------------------------
/inst/WORDLIST:
--------------------------------------------------------------------------------
1 | CMD
2 | CoV
3 | EPSRC
4 | MRC
5 | REPL
6 | RMarkdown
7 | RSVG
8 | SARS
9 | TJ
10 | Un
11 | Zenodo
12 | autoref
13 | codecov
14 | dbplyr
15 | dplyr
16 | dtplyr
17 | etc
18 | favor
19 | github
20 | magrittr
21 | md
22 | metacran
23 | ptype
24 | rlang
25 | rowwise
26 | setdiff
27 | src
28 | srcs
29 | tibble
30 | tidyr
31 | tidyselect
32 | un
33 |
--------------------------------------------------------------------------------
/inst/examples/add-count-tally-examples.R:
--------------------------------------------------------------------------------
1 | library(dplyr)
2 | library(dtrackr)
3 |
4 | # mutate and other functions are unitary operations that generally change
5 | # the structure but not size of a dataframe. In dtrackr these are by ignored
6 | # by default but we can change that so that their behaviour is obvious.
7 |
8 | # add_count
9 | # adding in a count or tally column as a new column
10 | iris %>%
11 | track() %>%
12 | add_count(Species, name="new_count_total",
13 | .messages="{.new_cols}",
14 | # .messages="{.cols}",
15 | .headline="New columns from add_count:") %>%
16 | history()
17 |
18 | # add_tally
19 | iris %>%
20 | track() %>%
21 | group_by(Species) %>%
22 | dtrackr::add_tally(wt=Petal.Length, name="new_tally_total",
23 | .messages="{.new_cols}",
24 | .headline="New columns from add_tally:") %>%
25 | history()
26 |
27 |
28 |
29 |
--------------------------------------------------------------------------------
/inst/examples/anti-join-examples.R:
--------------------------------------------------------------------------------
1 | library(dplyr)
2 | library(dtrackr)
3 | # Joins across data sets
4 |
5 | # example data uses the dplyr starways data
6 | people = starwars %>% select(-films, -vehicles, -starships)
7 | films = starwars %>% select(name,films) %>% tidyr::unnest(cols = c(films))
8 |
9 | lhs = people %>% track() %>% comment("People df {.total}")
10 | rhs = films %>% track() %>% comment("Films df {.total}") %>%
11 | comment("a test comment")
12 |
13 | # Anti join
14 | join = lhs %>% anti_join(rhs, by="name") %>% comment("joined {.total}")
15 | # See what the history of the graph is:
16 | join %>% history() %>% print()
17 | nrow(join)
18 | # Display the tracked graph (not run in examples)
19 | # join %>% flowchart()
20 |
--------------------------------------------------------------------------------
/inst/examples/arrange-examples.R:
--------------------------------------------------------------------------------
1 | library(dplyr)
2 | library(dtrackr)
3 |
4 | # mutate and other functions are unitary operations that generally change
5 | # the structure but not size of a dataframe. In dtrackr these are by ignored
6 | # by default but we can change that so that their behaviour is obvious.
7 |
8 | # arrange
9 | # In this case we sort the data descending and show the first value
10 | # is the same as the maximum value.
11 | iris %>%
12 | track() %>%
13 | arrange(
14 | desc(Petal.Width),
15 | .messages="{.count} items, columns: {.cols}",
16 | .headline="Reordered dataframe:") %>%
17 | history()
18 |
--------------------------------------------------------------------------------
/inst/examples/full-join-examples.R:
--------------------------------------------------------------------------------
1 | library(dplyr)
2 | library(dtrackr)
3 | # Joins across data sets
4 |
5 | # example data uses the dplyr starways data
6 | people = starwars %>% select(-films, -vehicles, -starships)
7 | films = starwars %>% select(name,films) %>% tidyr::unnest(cols = c(films))
8 |
9 | lhs = people %>% track() %>% comment("People df {.total}")
10 | rhs = films %>% track() %>% comment("Films df {.total}") %>%
11 | comment("a test comment")
12 |
13 | # Full join
14 | join = lhs %>% full_join(rhs, by="name", multiple = "all") %>% comment("joined {.total}")
15 | # See what the history of the graph is:
16 | join %>% history()
17 | nrow(join)
18 | # Display the tracked graph (not run in examples)
19 | # join %>% flowchart()
20 |
21 |
--------------------------------------------------------------------------------
/inst/examples/inner-join-examples.R:
--------------------------------------------------------------------------------
1 | library(dplyr)
2 | library(dtrackr)
3 | # Joins across data sets
4 |
5 | # example data uses the dplyr starways data
6 | people = starwars %>% select(-films, -vehicles, -starships)
7 | films = starwars %>% select(name,films) %>% tidyr::unnest(cols = c(films))
8 |
9 | lhs = people %>% track() %>% comment("People df {.total}")
10 | rhs = films %>% track() %>% comment("Films df {.total}") %>%
11 | comment("a test comment")
12 |
13 | # Inner join
14 | join = lhs %>% inner_join(rhs, by="name", multiple = "all") %>% comment("joined {.total}")
15 | # See what the history of the graph is:
16 | join %>% history() %>% print()
17 | nrow(join)
18 | # Display the tracked graph (not run in examples)
19 | # join %>% flowchart()
20 |
--------------------------------------------------------------------------------
/inst/examples/left-join-examples.R:
--------------------------------------------------------------------------------
1 | library(dplyr)
2 | library(dtrackr)
3 | # Joins across data sets
4 |
5 | # example data uses the dplyr starways data
6 | people = starwars %>% select(-films, -vehicles, -starships)
7 | films = starwars %>% select(name,films) %>% tidyr::unnest(cols = c(films))
8 |
9 | lhs = people %>% track() %>% comment("People df {.total}")
10 | rhs = films %>% track() %>% comment("Films df {.total}") %>%
11 | comment("a test comment")
12 |
13 | # Left join
14 | join = lhs %>% left_join(rhs, by="name", multiple = "all") %>% comment("joined {.total}")
15 | # See what the history of the graph is:
16 | join %>% history()
17 | nrow(join)
18 | # Display the tracked graph (not run in examples)
19 | # join %>% flowchart()
20 |
21 |
--------------------------------------------------------------------------------
/inst/examples/mutate-examples.R:
--------------------------------------------------------------------------------
1 | library(dplyr)
2 | library(dtrackr)
3 |
4 | # mutate and other functions are unitary operations that generally change
5 | # the structure but not size of a dataframe. In dtrackr these are by ignored
6 | # by default but we can change that so that their behaviour is obvious.
7 |
8 | # mutate
9 | # In this example we compare the column names of the input and the
10 | # output to identify the new columns created by the mutate operation as
11 | # the `.new_cols` variable
12 | iris %>%
13 | track() %>%
14 | mutate(extra_col = NA_real_,
15 | .messages="{.new_cols}",
16 | .headline="Extra columns from mutate:") %>%
17 | history()
18 |
19 |
--------------------------------------------------------------------------------
/inst/examples/nest-join-examples.R:
--------------------------------------------------------------------------------
1 | library(dplyr)
2 | library(dtrackr)
3 | # Joins across data sets
4 |
5 | # example data uses the dplyr starways data
6 | people = starwars %>% select(-films, -vehicles, -starships)
7 | films = starwars %>% select(name,films) %>% tidyr::unnest(cols = c(films))
8 |
9 | lhs = people %>% track() %>% comment("People df {.total}")
10 | rhs = films %>% track() %>% comment("Films df {.total}") %>%
11 | comment("a test comment")
12 |
13 | # Nest join
14 | join = lhs %>% nest_join(rhs, by="name") %>% comment("joined {.total}")
15 | # See what the history of the graph is:
16 | join %>% history() %>% print()
17 | nrow(join)
18 | # Display the tracked graph (not run in examples)
19 | # join %>% flowchart()
20 |
--------------------------------------------------------------------------------
/inst/examples/relocate-examples.R:
--------------------------------------------------------------------------------
1 | library(dplyr)
2 | library(dtrackr)
3 |
4 | # mutate and other functions are unitary operations that generally change
5 | # the structure but not size of a dataframe. In dtrackr these are by ignored
6 | # by default but we can change that so that their behaviour is obvious.
7 |
8 | # relocate, this shows how the columns can be reordered
9 | iris %>%
10 | track() %>%
11 | group_by(Species) %>%
12 | relocate(
13 | tidyselect::starts_with("Sepal"),
14 | .after=Species,
15 | .messages="{.cols}",
16 | .headline="Order of columns from relocate:") %>%
17 | history()
18 |
--------------------------------------------------------------------------------
/inst/examples/rename-examples.R:
--------------------------------------------------------------------------------
1 | library(dplyr)
2 | library(dtrackr)
3 |
4 | # mutate and other functions are unitary operations that generally change
5 | # the structure but not size of a dataframe. In dtrackr these are by ignored
6 | # by default but we can change that so that their behaviour is obvious.
7 |
8 | # rename can show us which columns are new and which have been
9 | # removed (with .dropped_cols)
10 | iris %>%
11 | track() %>%
12 | group_by(Species) %>%
13 | rename(
14 | Stamen.Width = Sepal.Width,
15 | Stamen.Length = Sepal.Length,
16 | .messages=c("added {.new_cols}","dropped {.dropped_cols}"),
17 | .headline="Renamed columns:") %>%
18 | history()
19 |
--------------------------------------------------------------------------------
/inst/examples/select-examples.R:
--------------------------------------------------------------------------------
1 | library(dplyr)
2 | library(dtrackr)
3 |
4 | # mutate and other functions are unitary operations that generally change
5 | # the structure but not size of a dataframe. In dtrackr these are by ignored
6 | # by default but we can change that so that their behaviour is obvious.
7 |
8 | # select
9 | # The output of the select verb (here using tidyselect syntax) can be captured
10 | # and here all column names are being reported with the .cols variable.
11 | iris %>%
12 | track() %>%
13 | group_by(Species) %>%
14 | select(
15 | tidyselect::starts_with("Sepal"),
16 | .messages="{.cols}",
17 | .headline="Output columns from select:") %>%
18 | history()
19 |
--------------------------------------------------------------------------------
/inst/examples/semi-join-examples.R:
--------------------------------------------------------------------------------
1 | library(dplyr)
2 | library(dtrackr)
3 | # Joins across data sets
4 |
5 | # example data uses the dplyr starways data
6 | people = starwars %>% select(-films, -vehicles, -starships)
7 | films = starwars %>% select(name,films) %>% tidyr::unnest(cols = c(films))
8 |
9 | lhs = people %>% track() %>% comment("People df {.total}")
10 | rhs = films %>% track() %>% comment("Films df {.total}") %>%
11 | comment("a test comment")
12 |
13 | # Semi join
14 | join = lhs %>% semi_join(rhs, by="name") %>% comment("joined {.total}")
15 | # See what the history of the graph is:
16 | join %>% history() %>% print()
17 | nrow(join)
18 | # Display the tracked graph (not run in examples)
19 | # join %>% flowchart()
20 |
21 |
--------------------------------------------------------------------------------
/inst/examples/set-operation-examples.R:
--------------------------------------------------------------------------------
1 | library(dplyr)
2 | library(dtrackr)
3 |
4 | # Set operations
5 | people = starwars %>% select(-films, -vehicles, -starships)
6 | chrs = people %>% track("start")
7 |
8 | lhs = chrs %>% include_any(
9 | species == "Human" ~ "{.included} humans",
10 | species == "Droid" ~ "{.included} droids"
11 | )
12 |
13 | # these are different subsets of the same data
14 | rhs = chrs %>% include_any(
15 | species == "Human" ~ "{.included} humans",
16 | species == "Gungan" ~ "{.included} gungans"
17 | ) %>% comment("{.count} gungans & humans")
18 |
19 |
20 | # Unions
21 | set = bind_rows(lhs,rhs) %>% comment("{.count} 2*human,droids and gungans")
22 | # display the history of the result:
23 | set %>% history()
24 | nrow(set)
25 | # not run - display the flowchart:
26 | # set %>% flowchart()
27 |
28 | set = union(lhs,rhs) %>% comment("{.count} human,droids and gungans")
29 | # display the history of the result:
30 | set %>% history()
31 | nrow(set)
32 | # not run - display the flowchart:
33 | # set %>% flowchart()
34 |
35 | set = union_all(lhs,rhs) %>% comment("{.count} 2*human,droids and gungans")
36 | # display the history of the result:
37 | set %>% history()
38 | nrow(set)
39 | # not run - display the flowchart:
40 | # set %>% flowchart()
41 |
42 | # Intersections and differences
43 |
44 | set = setdiff(lhs,rhs) %>% comment("{.count} droids and gungans")
45 | # display the history of the result:
46 | set %>% history()
47 | nrow(set)
48 | # not run - display the flowchart:
49 | # set %>% flowchart()
50 |
51 | set = intersect(lhs,rhs) %>% comment("{.count} humans")
52 | # display the history of the result:
53 | set %>% history()
54 | nrow(set)
55 | # not run - display the flowchart:
56 | # set %>% flowchart()
57 |
--------------------------------------------------------------------------------
/inst/examples/slice-examples.R:
--------------------------------------------------------------------------------
1 | library(dplyr)
2 | library(dtrackr)
3 |
4 | # an arbitrary 50 items from the iris dataframe is selected. The
5 | # history is tracked
6 | iris %>% track() %>% slice(51:100) %>% history()
7 |
8 |
--------------------------------------------------------------------------------
/inst/examples/slice-head-tail-examples.R:
--------------------------------------------------------------------------------
1 | library(dplyr)
2 | library(dtrackr)
3 |
4 | # the first 50% of the data frame, is taken and the history tracked
5 | iris %>% track() %>% group_by(Species) %>%
6 | slice_head(prop=0.5,.messages="{.count.out} / {.count.in}",
7 | .headline="First {sprintf('%1.0f',prop*100)}%") %>%
8 | history()
9 |
10 | # The last 100 items:
11 | iris %>% track() %>% group_by(Species) %>%
12 | slice_tail(n=100,.messages="{.count.out} / {.count.in}",
13 | .headline="Last 100") %>%
14 | history()
15 |
--------------------------------------------------------------------------------
/inst/examples/slice-max-min-examples.R:
--------------------------------------------------------------------------------
1 | library(dplyr)
2 | library(dtrackr)
3 |
4 |
5 | # Subset the data by the maximum of a given value
6 | iris %>% track() %>% group_by(Species) %>%
7 | slice_max(prop=0.5, order_by = Sepal.Width,
8 | .messages="{.count.out} / {.count.in} = {prop} (with ties)",
9 | .headline="Widest 50% Sepals") %>%
10 | history()
11 |
12 |
13 | # The narrowest 25% of the iris data set by group can be calculated in the
14 | # slice_min() function. Recording this is a matter of tracking and
15 | # using glue specs.
16 | iris %>%
17 | track() %>%
18 | group_by(Species) %>%
19 | slice_min(prop=0.25, order_by = Sepal.Width,
20 | .messages="{.count.out} / {.count.in} (with ties)",
21 | .headline="narrowest {sprintf('%1.0f',prop*100)}% {Species}") %>%
22 | history()
23 |
24 |
--------------------------------------------------------------------------------
/inst/examples/slice-sample-examples.R:
--------------------------------------------------------------------------------
1 | library(dplyr)
2 | library(dtrackr)
3 |
4 | # In this example the iris dataframe is resampled 100 times with replacement
5 | # within each group and the
6 | iris %>%
7 | track() %>%
8 | group_by(Species) %>%
9 | slice_sample(n=100, replace=TRUE,
10 | .messages="{.count.out} / {.count.in} = {n}",
11 | .headline="100 {Species}") %>%
12 | history()
13 |
--------------------------------------------------------------------------------
/inst/examples/transmute-examples.R:
--------------------------------------------------------------------------------
1 | library(dplyr)
2 | library(dtrackr)
3 |
4 | # mutate and other functions are unitary operations that generally change
5 | # the structure but not size of a dataframe. In dtrackr these are by ignored
6 | # by default but we can change that so that their behaviour is obvious.
7 |
8 | # In this example we compare the column names of the input and the
9 | # output to identify the new columns created by the transmute operation as
10 | # the `.new_cols` variable
11 | # Here we do the same for a transmute()
12 | iris %>%
13 | track() %>%
14 | group_by(Species, .add=TRUE) %>%
15 | transmute(
16 | sepal.w = Sepal.Width-1,
17 | sepal.l = Sepal.Length+1,
18 | .messages="{.new_cols}",
19 | .headline="New columns from transmute:") %>%
20 | history()
21 |
22 |
--------------------------------------------------------------------------------
/inst/lib/viz_licence.md:
--------------------------------------------------------------------------------
1 | Copyright (c) 2014-2018 Michael Daines
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of
4 | this software and associated documentation files (the "Software"), to deal in
5 | the Software without restriction, including without limitation the rights to
6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
7 | the Software, and to permit persons to whom the Software is furnished to do so,
8 | subject to the following conditions:
9 |
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 |
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
15 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
16 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
17 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19 |
--------------------------------------------------------------------------------
/man/anti_join.trackr_df.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{anti_join.trackr_df}
4 | \alias{anti_join.trackr_df}
5 | \title{Anti join}
6 | \usage{
7 | \method{anti_join}{trackr_df}(
8 | x,
9 | y,
10 | ...,
11 | .messages = c("{.count.lhs} on LHS", "{.count.rhs} on RHS", "{.count.out} not matched"),
12 | .headline = "Semi join by {.keys}"
13 | )
14 | }
15 | \arguments{
16 | \item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or
17 | lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
18 | more details.}
19 |
20 | \item{...}{Other parameters passed onto methods.}
21 |
22 | \item{.messages}{a set of glue specs. The glue code can use any global
23 | variable, \{.keys\} for the joining columns, \{.count.lhs\},
24 | \{.count.rhs\}, \{.count.out\} for the input and output dataframes sizes
25 | respectively}
26 |
27 | \item{.headline}{a glue spec. The glue code can use any global variable,
28 | \{.keys\} for the joining columns, \{.count.lhs\}, \{.count.rhs\},
29 | \{.count.out\} for the input and output dataframes sizes respectively}
30 | }
31 | \value{
32 | the join of the two dataframes with the history graph updated.
33 | }
34 | \description{
35 | Mutating joins behave as \code{dplyr} joins, except the history graph of the two
36 | sides of the joins is merged resulting in a tracked dataframe with the
37 | history of both input dataframes. See \code{\link[dplyr:filter-joins]{dplyr::anti_join()}} for more details
38 | on the underlying functions.
39 | }
40 | \examples{
41 | library(dplyr)
42 | library(dtrackr)
43 | # Joins across data sets
44 |
45 | # example data uses the dplyr starways data
46 | people = starwars \%>\% select(-films, -vehicles, -starships)
47 | films = starwars \%>\% select(name,films) \%>\% tidyr::unnest(cols = c(films))
48 |
49 | lhs = people \%>\% track() \%>\% comment("People df {.total}")
50 | rhs = films \%>\% track() \%>\% comment("Films df {.total}") \%>\%
51 | comment("a test comment")
52 |
53 | # Anti join
54 | join = lhs \%>\% anti_join(rhs, by="name") \%>\% comment("joined {.total}")
55 | # See what the history of the graph is:
56 | join \%>\% history() \%>\% print()
57 | nrow(join)
58 | # Display the tracked graph (not run in examples)
59 | # join \%>\% flowchart()
60 | }
61 | \seealso{
62 | dplyr::anti_join()
63 | }
64 |
--------------------------------------------------------------------------------
/man/capture_exclusions.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{capture_exclusions}
4 | \alias{capture_exclusions}
5 | \title{Start capturing exclusions on a tracked dataframe.}
6 | \usage{
7 | capture_exclusions(.data, .capture = TRUE)
8 | }
9 | \arguments{
10 | \item{.data}{a tracked dataframe}
11 |
12 | \item{.capture}{Should we capture exclusions (things removed from the data
13 | set). This is useful for debugging data issues but comes at a significant
14 | cost. Defaults to the value of \code{getOption("dtrackr.exclusions")} or
15 | \code{FALSE}.}
16 | }
17 | \value{
18 | the .data dataframe with the exclusions flag set (or cleared if
19 | \code{.capture=FALSE}).
20 | }
21 | \description{
22 | Start capturing exclusions on a tracked dataframe.
23 | }
24 | \examples{
25 | library(dplyr)
26 | library(dtrackr)
27 | tmp = iris \%>\% track() \%>\% capture_exclusions()
28 | tmp \%>\% filter(Species!="versicolor") \%>\% history()
29 | }
30 |
--------------------------------------------------------------------------------
/man/comment.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{comment}
4 | \alias{comment}
5 | \title{Add a generic comment to the dtrackr history graph}
6 | \usage{
7 | comment(
8 | .data,
9 | .messages = .defaultMessage(),
10 | .headline = .defaultHeadline(),
11 | .type = "info",
12 | .asOffshoot = (.type == "exclusion"),
13 | .tag = NULL
14 | )
15 | }
16 | \arguments{
17 | \item{.data}{a dataframe which may be grouped}
18 |
19 | \item{.messages}{a character vector of glue specifications. A glue
20 | specification can refer to any grouping variables of .data, or any
21 | variables defined in the calling environment, the \{.total\} of all rows,
22 | the \{.count\} variable which is the count in each group and \{.strata\} a
23 | description of the group}
24 |
25 | \item{.headline}{a glue specification which can refer to grouping variables
26 | of .data, or any variables defined in the calling environment, or the
27 | \{.total\} variable (which is \code{nrow(.data)}) and \{.strata\} which is a
28 | description of the grouping}
29 |
30 | \item{.type}{one of "info","...,"exclusion": used to define formatting}
31 |
32 | \item{.asOffshoot}{do you want this comment to be an offshoot of the main
33 | flow (default = FALSE).}
34 |
35 | \item{.tag}{if you want the summary data from this step in the future then
36 | give it a name with .tag.}
37 | }
38 | \value{
39 | the same .data dataframe with the history graph updated with the comment
40 | }
41 | \description{
42 | A comment can be any kind of note and is added once for every current
43 | grouping as defined by the \code{.message} field. It can be made context specific
44 | by including variables such as \{.count\} and \{.total\} in \code{.message} which
45 | refer to the grouped and ungrouped counts at this current stage of the
46 | pipeline respectively. It can also pull in any global variable.
47 | }
48 | \examples{
49 | library(dplyr)
50 | library(dtrackr)
51 | iris \%>\% track() \%>\% comment("hello {.total} rows") \%>\% history()
52 | }
53 |
--------------------------------------------------------------------------------
/man/count_subgroup.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{count_subgroup}
4 | \alias{count_subgroup}
5 | \title{Add a subgroup count to the dtrackr history graph}
6 | \usage{
7 | count_subgroup(
8 | .data,
9 | .subgroup,
10 | ...,
11 | .messages = .defaultCountSubgroup(),
12 | .headline = .defaultHeadline(),
13 | .type = "info",
14 | .asOffshoot = FALSE,
15 | .tag = NULL,
16 | .maxsubgroups = .defaultMaxSupportedGroupings()
17 | )
18 | }
19 | \arguments{
20 | \item{.data}{a dataframe which may be grouped}
21 |
22 | \item{.subgroup}{a column with a small number of levels (e.g. a factor)}
23 |
24 | \item{...}{passed to \verb{base::factor(subgroup values, ...)} to allow reordering
25 | of levels etc.}
26 |
27 | \item{.messages}{a character vector of glue specifications. A glue
28 | specification can refer to anything from the calling environment,
29 | \{.subgroup\} for the subgroup column name and \{.name\} for the subgroup
30 | column value, \{.count\} for the subgroup column count, \{.subtotal\} for
31 | the current stratification grouping count and \{.total\} for the whole
32 | dataset count}
33 |
34 | \item{.headline}{a glue specification which can refer to grouping variables
35 | of .data, \{.subtotal\} for the current grouping count, or any variables
36 | defined in the calling environment}
37 |
38 | \item{.type}{one of "info","exclusion": used to define formatting}
39 |
40 | \item{.asOffshoot}{do you want this comment to be an offshoot of the main
41 | flow (default = FALSE).}
42 |
43 | \item{.tag}{if you want to use the summary data from this step in the future
44 | then give it a name with .tag.}
45 |
46 | \item{.maxsubgroups}{the maximum number of discrete values allowed in
47 | .subgroup is configurable with
48 | \code{options("dtrackr.max_supported_groupings"=XX)}. The default is 16. Large
49 | values produce unwieldy flow charts.}
50 | }
51 | \value{
52 | the same .data dataframe with the history graph updated with a
53 | subgroup count as a new stage
54 | }
55 | \description{
56 | A frequent use case for more detailed description is to have a subgroup count
57 | within a flowchart. This works best for factor subgroup columns but other
58 | data will be converted to a factor automatically. The count of the items in
59 | each subgroup is added as a new stage in the flowchart.
60 | }
61 | \examples{
62 | library(dplyr)
63 | library(dtrackr)
64 | survival::cgd \%>\% track() \%>\% group_by(treat) \%>\%
65 | count_subgroup(center) \%>\% history()
66 | }
67 |
--------------------------------------------------------------------------------
/man/distinct.trackr_df.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{distinct.trackr_df}
4 | \alias{distinct.trackr_df}
5 | \title{Distinct values of data}
6 | \usage{
7 | \method{distinct}{trackr_df}(
8 | .data,
9 | ...,
10 | .messages = "removing {.count.in-.count.out} duplicates",
11 | .headline = .defaultHeadline(),
12 | .tag = NULL
13 | )
14 | }
15 | \arguments{
16 | \item{.data}{A data frame, data frame extension (e.g. a tibble), or a
17 | lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
18 | more details.}
19 |
20 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Optional variables to
21 | use when determining uniqueness. If there are multiple rows for a given
22 | combination of inputs, only the first row will be preserved. If omitted,
23 | will use all variables in the data frame.
24 | Named arguments passed on to \code{\link[dplyr:distinct]{dplyr::distinct}}\describe{
25 | \item{\code{.keep_all}}{If \code{TRUE}, keep all variables in \code{.data}.
26 | If a combination of \code{...} is not distinct, this keeps the
27 | first row of values.}
28 | }}
29 |
30 | \item{.messages}{a set of glue specs. The glue code can use any global
31 | variable, or \{.strata\},\{.count.in\},and \{.count.out\}}
32 |
33 | \item{.headline}{a headline glue spec. The glue code can use any global
34 | variable, or \{.strata\},\{.count.in\},and \{.count.out\}}
35 |
36 | \item{.tag}{if you want the summary data from this step in the future then
37 | give it a name with .tag.}
38 | }
39 | \value{
40 | the .data dataframe with distinct values and history graph updated.
41 | }
42 | \description{
43 | Distinct acts in the same way as in \code{dplyr::distinct}. Prior to the operation
44 | the size of the group is calculated \{.count.in\} and after the operation the
45 | output size \{.count.out\} The group \{.strata\} is also available (if
46 | grouped) for reporting. See \code{\link[dplyr:distinct]{dplyr::distinct()}}.
47 | }
48 | \examples{
49 | library(dplyr)
50 | library(dtrackr)
51 |
52 | tmp = bind_rows(iris \%>\% track(), iris \%>\% track() \%>\% filter(Petal.Length > 5))
53 | tmp \%>\% group_by(Species) \%>\% distinct() \%>\% history()
54 | }
55 | \seealso{
56 | dplyr::distinct()
57 | }
58 |
--------------------------------------------------------------------------------
/man/dot2svg.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dot.R
3 | \name{dot2svg}
4 | \alias{dot2svg}
5 | \title{Convert \code{Graphviz} dot content to a SVG}
6 | \usage{
7 | dot2svg(dot)
8 | }
9 | \arguments{
10 | \item{dot}{a \code{graphviz} dot string}
11 | }
12 | \value{
13 | the SVG as a string
14 | }
15 | \description{
16 | Convert a \code{graphviz} dot digraph as string to \code{SVG} as string
17 | }
18 | \examples{
19 | dot2svg("digraph { A->B }")
20 | }
21 |
--------------------------------------------------------------------------------
/man/dtrackr-package.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr-package.R
3 | \docType{package}
4 | \name{dtrackr-package}
5 | \alias{dtrackr}
6 | \alias{dtrackr-package}
7 | \title{dtrackr: Track your Data Pipelines}
8 | \description{
9 | Track and document 'dplyr' data pipelines. As you filter, mutate, and join your way through a data set, 'dtrackr' seamlessly keeps track of your data flow and makes publication ready documentation of a data pipeline simple.
10 | }
11 | \seealso{
12 | Useful links:
13 | \itemize{
14 | \item \url{https://terminological.github.io/dtrackr/index.html}
15 | \item \url{https://github.com/terminological/dtrackr}
16 | \item Report bugs at \url{https://github.com/terminological/dtrackr/issues}
17 | }
18 |
19 | }
20 | \author{
21 | \strong{Maintainer}: Robert Challen \email{rob.challen@bristol.ac.uk} (\href{https://orcid.org/0000-0002-5504-7768}{ORCID})
22 |
23 | }
24 | \keyword{internal}
25 |
--------------------------------------------------------------------------------
/man/excluded.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{excluded}
4 | \alias{excluded}
5 | \title{Get the dtrackr excluded data record}
6 | \usage{
7 | excluded(.data, simplify = TRUE)
8 | }
9 | \arguments{
10 | \item{.data}{a dataframe which may be grouped}
11 |
12 | \item{simplify}{return a single summary dataframe of all exclusions.}
13 | }
14 | \value{
15 | a new dataframe of the excluded data up to this point in the workflow. This dataframe is by default flattened, but if \code{.simplify=FALSE} has a nested structure containing records excluded at each part of the pipeline.
16 | }
17 | \description{
18 | Get the dtrackr excluded data record
19 | }
20 | \examples{
21 | library(dplyr)
22 | library(dtrackr)
23 | tmp = iris \%>\% track() \%>\% capture_exclusions()
24 | tmp \%>\% exclude_all(
25 | Petal.Length > 5.8 ~ "{.excluded} long ones",
26 | Petal.Length < 1.3 ~ "{.excluded} short ones",
27 | .stage = "petal length exclusion"
28 | ) \%>\% excluded()
29 | }
30 |
--------------------------------------------------------------------------------
/man/figures/README-flowchart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/man/figures/README-flowchart.png
--------------------------------------------------------------------------------
/man/figures/dtrackr.xcf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/man/figures/dtrackr.xcf
--------------------------------------------------------------------------------
/man/figures/lifecycle-deprecated.svg:
--------------------------------------------------------------------------------
1 |
22 |
--------------------------------------------------------------------------------
/man/figures/lifecycle-experimental.svg:
--------------------------------------------------------------------------------
1 |
22 |
--------------------------------------------------------------------------------
/man/figures/lifecycle-stable.svg:
--------------------------------------------------------------------------------
1 |
30 |
--------------------------------------------------------------------------------
/man/figures/lifecycle-superseded.svg:
--------------------------------------------------------------------------------
1 |
22 |
--------------------------------------------------------------------------------
/man/figures/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/man/figures/logo.png
--------------------------------------------------------------------------------
/man/filter.trackr_df.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{filter.trackr_df}
4 | \alias{filter.trackr_df}
5 | \title{Filtering data}
6 | \usage{
7 | \method{filter}{trackr_df}(
8 | .data,
9 | ...,
10 | .messages = "excluded {.excluded} items",
11 | .headline = .defaultHeadline(),
12 | .type = "exclusion",
13 | .asOffshoot = (.type == "exclusion"),
14 | .stage = (if (is.null(.tag)) "" else .tag),
15 | .tag = NULL
16 | )
17 | }
18 | \arguments{
19 | \item{.data}{A data frame, data frame extension (e.g. a tibble), or a
20 | lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
21 | more details.}
22 |
23 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Expressions that
24 | return a logical value, and are defined in terms of the variables in
25 | \code{.data}. If multiple expressions are included, they are combined with the
26 | \code{&} operator. Only rows for which all conditions evaluate to \code{TRUE} are
27 | kept.
28 | Named arguments passed on to \code{\link[dplyr:filter]{dplyr::filter}}\describe{
29 | \item{\code{.by}}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
30 |
31 | <\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Optionally, a selection of columns to
32 | group by for just this operation, functioning as an alternative to \code{\link[dplyr:group_by]{group_by()}}. For
33 | details and examples, see \link[dplyr:dplyr_by]{?dplyr_by}.}
34 | \item{\code{.preserve}}{Relevant when the \code{.data} input is grouped.
35 | If \code{.preserve = FALSE} (the default), the grouping structure
36 | is recalculated based on the resulting data, otherwise the grouping is kept as is.}
37 | }}
38 |
39 | \item{.messages}{a set of glue specs. The glue code can use any global
40 | variable, or \{.strata\},\{.count.in\},and \{.count.out\}}
41 |
42 | \item{.headline}{a headline glue spec. The glue code can use any global
43 | variable, or \{.strata\},\{.count.in\},and \{.count.out\}}
44 |
45 | \item{.type}{the format type of the action typically an exclusion}
46 |
47 | \item{.asOffshoot}{if the type is exclusion, \code{.asOffshoot} places the
48 | information box outside of the main flow, as an exclusion.}
49 |
50 | \item{.stage}{a name for this step in the pathway}
51 |
52 | \item{.tag}{if you want the summary data from this step in the future then
53 | give it a name with \code{.tag}.}
54 | }
55 | \value{
56 | the filtered \code{.data} dataframe with history graph updated
57 | }
58 | \description{
59 | Filter acts in the same way as in \code{dplyr} where predicates which evaluate to
60 | TRUE act to select items to include, and items for which the predicate cannot
61 | be evaluated are excluded. For tracking prior to the filter operation the
62 | size of each group is calculated \{.count.in\} and after the operation the
63 | output size of each group \{.count.out\}. The grouping \{.strata\} is also
64 | available (if grouped) for reporting. See \code{\link[dplyr:filter]{dplyr::filter()}}.
65 | }
66 | \examples{
67 | library(dplyr)
68 | library(dtrackr)
69 |
70 | tmp = iris \%>\% track() \%>\% group_by(Species)
71 | tmp \%>\% filter(Petal.Length > 5) \%>\% history()
72 | }
73 | \seealso{
74 | dplyr::filter()
75 | }
76 |
--------------------------------------------------------------------------------
/man/flowchart.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{flowchart}
4 | \alias{flowchart}
5 | \title{Flowchart output}
6 | \usage{
7 | flowchart(
8 | .data,
9 | filename = NULL,
10 | size = std_size$full,
11 | maxWidth = size$width,
12 | maxHeight = size$height,
13 | formats = c("dot", "png", "pdf", "svg"),
14 | defaultToHTML = TRUE,
15 | landscape = size$rot != 0,
16 | ...
17 | )
18 | }
19 | \arguments{
20 | \item{.data}{the tracked dataframe(s) either as a single dataframe or as a
21 | list of dataframes.}
22 |
23 | \item{filename}{a file name which will be where the formatted flowcharts are
24 | saved. If no extension is specified the output formats are determined by
25 | the \code{formats} parameter.}
26 |
27 | \item{size}{a named list with 3 elements, length and width in inches and
28 | rotation. A predefined set of standard sizes are available in the
29 | \link{std_size} object.}
30 |
31 | \item{maxWidth}{a width (on the paper) in inches if \code{size} is not defined}
32 |
33 | \item{maxHeight}{a height (on the paper) in inches if \code{size} is not defined}
34 |
35 | \item{formats}{some of \code{pdf},\code{dot},\code{svg},\code{png},\code{ps}}
36 |
37 | \item{defaultToHTML}{if the correct output format is not easy to determine
38 | from the context, default providing \code{HTML} (TRUE) or to embedding the \code{PNG} (FALSE)}
39 |
40 | \item{landscape}{rotate the output by 270 degrees into a landscape format.
41 | \code{maxWidth} and \code{maxHeight} still apply and refer to the paper width to fit
42 | the flowchart into after rotation. (you might need to flip width and height)}
43 |
44 | \item{...}{other parameters passed onto either \code{p_get_as_dot()}, notable ones are
45 | \code{fill} (background colour e.g. \code{lightgrey}), \code{fontsize} (in points),
46 | \code{colour} (font colour)}
47 | }
48 | \value{
49 | the nature of the flowchart output depends on the context in which
50 | the function is called. It will be some form of browse-able html output if
51 | called from an interactive session or a \code{PNG}/\code{PDF} link if in \code{knitr} and
52 | knitting latex or word type outputs, if file name is specified the output
53 | will also be saved at the given location.
54 | }
55 | \description{
56 | Generate a flowchart of the history of the dataframe(s), with all the tracked
57 | data pipeline as stages in the flowchart. Multiple dataframes can be plotted
58 | together in which case an attempt is made to determine which parts are
59 | common.
60 | }
61 | \examples{
62 | library(dplyr)
63 | library(dtrackr)
64 |
65 | tmp = iris \%>\% track() \%>\% comment(.tag = "step1") \%>\% filter(Species!="versicolor")
66 | tmp \%>\% group_by(Species) \%>\% comment(.tag="step2") \%>\% flowchart()
67 | }
68 |
--------------------------------------------------------------------------------
/man/group_by.trackr_df.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{group_by.trackr_df}
4 | \alias{group_by.trackr_df}
5 | \title{Stratifying your analysis}
6 | \usage{
7 | \method{group_by}{trackr_df}(
8 | .data,
9 | ...,
10 | .messages = "stratify by {.cols}",
11 | .headline = NULL,
12 | .tag = NULL,
13 | .maxgroups = .defaultMaxSupportedGroupings()
14 | )
15 | }
16 | \arguments{
17 | \item{.data}{A data frame, data frame extension (e.g. a tibble), or a
18 | lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
19 | more details.}
20 |
21 | \item{...}{In \code{group_by()}, variables or computations to group by.
22 | Computations are always done on the ungrouped data frame.
23 | To perform computations on the grouped data, you need to use
24 | a separate \code{mutate()} step before the \code{group_by()}.
25 | Computations are not allowed in \code{nest_by()}.
26 | In \code{ungroup()}, variables to remove from the grouping.
27 | Named arguments passed on to \code{\link[dplyr:group_by]{dplyr::group_by}}\describe{
28 | \item{\code{.add}}{When \code{FALSE}, the default, \code{group_by()} will
29 | override existing groups. To add to the existing groups, use
30 | \code{.add = TRUE}.
31 |
32 | This argument was previously called \code{add}, but that prevented
33 | creating a new grouping variable called \code{add}, and conflicts with
34 | our naming conventions.}
35 | \item{\code{.drop}}{Drop groups formed by factor levels that don't appear in the
36 | data? The default is \code{TRUE} except when \code{.data} has been previously
37 | grouped with \code{.drop = FALSE}. See \code{\link[dplyr:group_by_drop_default]{group_by_drop_default()}} for details.}
38 | \item{\code{x}}{A \code{\link[dplyr:tbl]{tbl()}}}
39 | }}
40 |
41 | \item{.messages}{a set of glue specs. The glue code can use any global
42 | variable, or \{.cols\} which is the columns that are being grouped by.}
43 |
44 | \item{.headline}{a headline glue spec. The glue code can use any global
45 | variable, or \{.cols\}.}
46 |
47 | \item{.tag}{if you want the summary data from this step in the future then
48 | give it a name with .tag.}
49 |
50 | \item{.maxgroups}{the maximum number of subgroups allowed before the tracking
51 | is paused.}
52 | }
53 | \value{
54 | the .data but grouped.
55 | }
56 | \description{
57 | Grouping a data set acts in the normal way. When tracking a dataframe
58 | sometimes a \code{group_by()} operation will create a lot of groups. This happens
59 | for example if you are doing a \code{group_by()}, \code{summarise()} step that is
60 | aggregating data on a fine scale, e.g. by day in a time-series. This is
61 | generally a terrible idea when tracking a dataframe as the resulting
62 | flowchart will have many many branches and be illegible. \code{dtrackr} will detect this issue and
63 | pause tracking the dataframe with a warning. It is up to the user to the
64 | \code{resume()} tracking when the large number of groups have been resolved e.g.
65 | using a \code{dplyr::ungroup()}. This limit is configurable with
66 | \code{options("dtrackr.max_supported_groupings"=XX)}. The default is 16. See
67 | \code{\link[dplyr:group_by]{dplyr::group_by()}}.
68 | }
69 | \examples{
70 | library(dplyr)
71 | library(dtrackr)
72 |
73 | tmp = iris \%>\% track() \%>\% group_by(Species, .messages="stratify by {.cols}")
74 | tmp \%>\% comment("{.strata}") \%>\% history()
75 | }
76 | \seealso{
77 | dplyr::group_by()
78 | }
79 |
--------------------------------------------------------------------------------
/man/group_modify.trackr_df.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{group_modify.trackr_df}
4 | \alias{group_modify.trackr_df}
5 | \title{Group-wise modification of data and complex operations}
6 | \usage{
7 | \method{group_modify}{trackr_df}(
8 | .data,
9 | ...,
10 | .messages = NULL,
11 | .headline = .defaultHeadline(),
12 | .type = "modify",
13 | .tag = NULL
14 | )
15 | }
16 | \arguments{
17 | \item{.data}{A grouped tibble}
18 |
19 | \item{...}{Additional arguments passed on to \code{.f}
20 | Named arguments passed on to \code{\link[dplyr:group_map]{dplyr::group_modify}}\describe{
21 | \item{\code{.f}}{A function or formula to apply to each group.
22 |
23 | If a \strong{function}, it is used as is. It should have at least 2 formal arguments.
24 |
25 | If a \strong{formula}, e.g. \code{~ head(.x)}, it is converted to a function.
26 |
27 | In the formula, you can use
28 | \itemize{
29 | \item \code{.} or \code{.x} to refer to the subset of rows of \code{.tbl}
30 | for the given group
31 | \item \code{.y} to refer to the key, a one row tibble with one column per grouping variable
32 | that identifies the group
33 | }}
34 | \item{\code{.keep}}{are the grouping variables kept in \code{.x}}
35 | }}
36 |
37 | \item{.messages}{a set of glue specs. The glue code can use any global
38 | variable, or \{.strata\},\{.count.in\},and \{.count.out\}}
39 |
40 | \item{.headline}{a headline glue spec. The glue code can use any global
41 | variable, or \{.strata\},\{.count.in\},and \{.count.out\}}
42 |
43 | \item{.type}{default "modify": used to define formatting}
44 |
45 | \item{.tag}{if you want the summary data from this step in the future then
46 | give it a name with .tag.}
47 | }
48 | \value{
49 | the transformed .data dataframe with the history graph updated.
50 | }
51 | \description{
52 | Group modifying a data set acts in the normal way. The internal mechanics of
53 | the modify function are opaque to the history. This means these can be used
54 | to wrap any unsupported operation without losing the history (e.g. \code{df \%>\% track() \%>\% group_modify(function(d,...) { d \%>\% unsupported_operation() })}
55 | ) Prior to the operation the size of the group is calculated \{.count.in\}
56 | and after the operation the output size \{.count.out\} The group \{.strata\}
57 | is also available (if grouped) for reporting See \code{\link[dplyr:group_map]{dplyr::group_modify()}}.
58 | }
59 | \examples{
60 | library(dplyr)
61 | library(dtrackr)
62 |
63 | tmp = iris \%>\% track() \%>\% group_by(Species)
64 | tmp \%>\% group_modify(
65 | function(d,g,...) { return(tibble::tibble(x=runif(10))) },
66 | .messages="{.count.in} in, {.count.out} out"
67 | ) \%>\% history()
68 | }
69 | \seealso{
70 | dplyr::group_modify()
71 | }
72 |
--------------------------------------------------------------------------------
/man/history.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{history}
4 | \alias{history}
5 | \title{Get the dtrackr history graph}
6 | \usage{
7 | history(.data)
8 | }
9 | \arguments{
10 | \item{.data}{a dataframe which may be grouped}
11 | }
12 | \value{
13 | the history graph. This is a list, of class \code{trackr_graph}, containing the following named items:
14 | \itemize{
15 | \item excluded - the data items that have been excluded thus far as a nested dataframe
16 | \item tags - a dataframe of tag-value pairs containing the summary of the data at named points in the data flow (see \code{\link[=tagged]{tagged()}})
17 | \item nodes - a dataframe of the nodes of the flow chart
18 | \item edges - an edge list (as a dataframe) of the relationships between the nodes in the flow chart
19 | \item head - the current most recent nodes added into the graph as a dataframe.
20 | }
21 |
22 | The format of this data may grow over time but these fields are unlikely to be changed.
23 | }
24 | \description{
25 | This provides the raw history graph and is not really intended for mainstream use.
26 | The internal structure of the graph is explained below. print and plot S3 methods exist for
27 | the dtrackr history graph.
28 | }
29 | \examples{
30 | library(dplyr)
31 | library(dtrackr)
32 | graph = iris \%>\% track() \%>\% comment("A comment") \%>\% history()
33 | print(graph)
34 | }
35 |
--------------------------------------------------------------------------------
/man/intersect.trackr_df.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{intersect.trackr_df}
4 | \alias{intersect.trackr_df}
5 | \title{Set operations}
6 | \usage{
7 | \method{intersect}{trackr_df}(
8 | x,
9 | y,
10 | ...,
11 | .messages = "{.count.out} in intersection",
12 | .headline = "Intersection"
13 | )
14 | }
15 | \arguments{
16 | \item{x, y}{Vectors to combine.}
17 |
18 | \item{...}{a collection of tracked data frames to combine}
19 |
20 | \item{.messages}{a set of glue specs. The glue code can use any global
21 | variable, or \{.count.out\}}
22 |
23 | \item{.headline}{a glue spec. The glue code can use any global variable, or
24 | \{.count.out\}}
25 | }
26 | \value{
27 | the dplyr output with the history graph updated.
28 | }
29 | \description{
30 | These perform set operations on tracked dataframes. It merges the history
31 | of 2 (or more) dataframes and combines the rows (or columns). It calculates the total number of
32 | resulting rows as \{.count.out\} in other terms it performs exactly the same
33 | operation as the equivalent \code{dplyr} operation. See \code{\link[dplyr:bind_rows]{dplyr::bind_rows()}},
34 | \code{\link[dplyr:bind_cols]{dplyr::bind_cols()}}, \code{\link[dplyr:setops]{dplyr::intersect()}}, \code{\link[dplyr:setops]{dplyr::union()}},
35 | \code{\link[dplyr:setops]{dplyr::setdiff()}},\code{\link[dplyr:setops]{dplyr::intersect()}}, or \code{\link[dplyr:setops]{dplyr::union_all()}} for the
36 | underlying function details.
37 | }
38 | \examples{
39 | library(dplyr)
40 | library(dtrackr)
41 |
42 | # Set operations
43 | people = starwars \%>\% select(-films, -vehicles, -starships)
44 | chrs = people \%>\% track("start")
45 |
46 | lhs = chrs \%>\% include_any(
47 | species == "Human" ~ "{.included} humans",
48 | species == "Droid" ~ "{.included} droids"
49 | )
50 |
51 | # these are different subsets of the same data
52 | rhs = chrs \%>\% include_any(
53 | species == "Human" ~ "{.included} humans",
54 | species == "Gungan" ~ "{.included} gungans"
55 | ) \%>\% comment("{.count} gungans & humans")
56 |
57 |
58 | # Unions
59 | set = bind_rows(lhs,rhs) \%>\% comment("{.count} 2*human,droids and gungans")
60 | # display the history of the result:
61 | set \%>\% history()
62 | nrow(set)
63 | # not run - display the flowchart:
64 | # set \%>\% flowchart()
65 |
66 | set = union(lhs,rhs) \%>\% comment("{.count} human,droids and gungans")
67 | # display the history of the result:
68 | set \%>\% history()
69 | nrow(set)
70 | # not run - display the flowchart:
71 | # set \%>\% flowchart()
72 |
73 | set = union_all(lhs,rhs) \%>\% comment("{.count} 2*human,droids and gungans")
74 | # display the history of the result:
75 | set \%>\% history()
76 | nrow(set)
77 | # not run - display the flowchart:
78 | # set \%>\% flowchart()
79 |
80 | # Intersections and differences
81 |
82 | set = setdiff(lhs,rhs) \%>\% comment("{.count} droids and gungans")
83 | # display the history of the result:
84 | set \%>\% history()
85 | nrow(set)
86 | # not run - display the flowchart:
87 | # set \%>\% flowchart()
88 |
89 | set = intersect(lhs,rhs) \%>\% comment("{.count} humans")
90 | # display the history of the result:
91 | set \%>\% history()
92 | nrow(set)
93 | # not run - display the flowchart:
94 | # set \%>\% flowchart()
95 | }
96 | \seealso{
97 | generics::intersect()
98 | }
99 |
--------------------------------------------------------------------------------
/man/p_add_tally.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{p_add_tally}
4 | \alias{p_add_tally}
5 | \title{dplyr modifying operations}
6 | \usage{
7 | p_add_tally(x, ..., .messages = "", .headline = "", .tag = NULL)
8 | }
9 | \arguments{
10 | \item{x}{A data frame, data frame extension (e.g. a tibble), or a
11 | lazy data frame (e.g. from dbplyr or dtplyr).}
12 |
13 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Variables to group
14 | by.}
15 |
16 | \item{.messages}{a set of glue specs. The glue code can use any global
17 | variable, grouping variable, \{.new_cols\} or \{.dropped_cols\} for changes to
18 | columns, \{.cols\} for the output column names, or \{.strata\}. Defaults to nothing.}
19 |
20 | \item{.headline}{a headline glue spec. The glue code can use any global
21 | variable, grouping variable, \{.new_cols\}, \{.dropped_cols\}, \{.cols\} or \{.strata\}.
22 | Defaults to nothing.}
23 |
24 | \item{.tag}{if you want the summary data from this step in the future then
25 | give it a name with .tag.}
26 | }
27 | \value{
28 | the \code{.data} dataframe after being modified by the \code{dplyr} equivalent
29 | function, but with the history graph updated with a new stage if the
30 | \code{.messages} or \code{.headline} parameter is not empty.
31 | }
32 | \description{
33 | See \code{\link[dplyr:mutate]{dplyr::mutate()}}, \code{\link[dplyr:count]{dplyr::add_count()}}, \code{\link[dplyr:count]{dplyr::add_tally()}},
34 | \code{\link[dplyr:transmute]{dplyr::transmute()}}, \code{\link[dplyr:select]{dplyr::select()}}, \code{\link[dplyr:relocate]{dplyr::relocate()}},
35 | \code{\link[dplyr:rename]{dplyr::rename()}} \code{\link[dplyr:rename]{dplyr::rename_with()}}, \code{\link[dplyr:arrange]{dplyr::arrange()}} for more details
36 | on underlying functions. \code{dtrackr} provides equivalent functions for
37 | mutating, selecting and renaming a data set which act in the same way as
38 | \code{dplyr}. \code{mutate} / \code{select} / \code{rename} generally don't add anything in terms
39 | of provenance of data so the default behaviour is to miss these out of the
40 | \code{dtrackr} history. This can be overridden with the \code{.messages}, or
41 | \code{.headline} values in which case they behave just like a \code{comment()}.
42 | }
43 | \examples{
44 | library(dplyr)
45 | library(dtrackr)
46 |
47 | # mutate and other functions are unitary operations that generally change
48 | # the structure but not size of a dataframe. In dtrackr these are by ignored
49 | # by default but we can change that so that their behaviour is obvious.
50 |
51 | # add_count
52 | # adding in a count or tally column as a new column
53 | iris \%>\%
54 | track() \%>\%
55 | add_count(Species, name="new_count_total",
56 | .messages="{.new_cols}",
57 | # .messages="{.cols}",
58 | .headline="New columns from add_count:") \%>\%
59 | history()
60 |
61 | # add_tally
62 | iris \%>\%
63 | track() \%>\%
64 | group_by(Species) \%>\%
65 | dtrackr::add_tally(wt=Petal.Length, name="new_tally_total",
66 | .messages="{.new_cols}",
67 | .headline="New columns from add_tally:") \%>\%
68 | history()
69 |
70 |
71 |
72 | }
73 | \seealso{
74 | dplyr::add_tally()
75 | }
76 |
--------------------------------------------------------------------------------
/man/p_bind_cols.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{p_bind_cols}
4 | \alias{p_bind_cols}
5 | \title{Set operations}
6 | \usage{
7 | p_bind_cols(
8 | ...,
9 | .messages = "{.count.out} in combined set",
10 | .headline = "Bind columns"
11 | )
12 | }
13 | \arguments{
14 | \item{...}{a collection of tracked data frames to combine}
15 |
16 | \item{.messages}{a set of glue specs. The glue code can use any global
17 | variable, or \{.count.out\}}
18 |
19 | \item{.headline}{a glue spec. The glue code can use any global variable, or
20 | \{.count.out\}}
21 | }
22 | \value{
23 | the dplyr output with the history graph updated.
24 | }
25 | \description{
26 | These perform set operations on tracked dataframes. It merges the history
27 | of 2 (or more) dataframes and combines the rows (or columns). It calculates the total number of
28 | resulting rows as \{.count.out\} in other terms it performs exactly the same
29 | operation as the equivalent \code{dplyr} operation. See \code{\link[dplyr:bind_rows]{dplyr::bind_rows()}},
30 | \code{\link[dplyr:bind_cols]{dplyr::bind_cols()}}, \code{\link[dplyr:setops]{dplyr::intersect()}}, \code{\link[dplyr:setops]{dplyr::union()}},
31 | \code{\link[dplyr:setops]{dplyr::setdiff()}},\code{\link[dplyr:setops]{dplyr::intersect()}}, or \code{\link[dplyr:setops]{dplyr::union_all()}} for the
32 | underlying function details.
33 | }
34 | \examples{
35 | library(dplyr)
36 | library(dtrackr)
37 |
38 | # Set operations
39 | people = starwars \%>\% select(-films, -vehicles, -starships)
40 | chrs = people \%>\% track("start")
41 |
42 | lhs = chrs \%>\% include_any(
43 | species == "Human" ~ "{.included} humans",
44 | species == "Droid" ~ "{.included} droids"
45 | )
46 |
47 | # these are different subsets of the same data
48 | rhs = chrs \%>\% include_any(
49 | species == "Human" ~ "{.included} humans",
50 | species == "Gungan" ~ "{.included} gungans"
51 | ) \%>\% comment("{.count} gungans & humans")
52 |
53 |
54 | # Unions
55 | set = bind_rows(lhs,rhs) \%>\% comment("{.count} 2*human,droids and gungans")
56 | # display the history of the result:
57 | set \%>\% history()
58 | nrow(set)
59 | # not run - display the flowchart:
60 | # set \%>\% flowchart()
61 |
62 | set = union(lhs,rhs) \%>\% comment("{.count} human,droids and gungans")
63 | # display the history of the result:
64 | set \%>\% history()
65 | nrow(set)
66 | # not run - display the flowchart:
67 | # set \%>\% flowchart()
68 |
69 | set = union_all(lhs,rhs) \%>\% comment("{.count} 2*human,droids and gungans")
70 | # display the history of the result:
71 | set \%>\% history()
72 | nrow(set)
73 | # not run - display the flowchart:
74 | # set \%>\% flowchart()
75 |
76 | # Intersections and differences
77 |
78 | set = setdiff(lhs,rhs) \%>\% comment("{.count} droids and gungans")
79 | # display the history of the result:
80 | set \%>\% history()
81 | nrow(set)
82 | # not run - display the flowchart:
83 | # set \%>\% flowchart()
84 |
85 | set = intersect(lhs,rhs) \%>\% comment("{.count} humans")
86 | # display the history of the result:
87 | set \%>\% history()
88 | nrow(set)
89 | # not run - display the flowchart:
90 | # set \%>\% flowchart()
91 | }
92 | \seealso{
93 | dplyr::bind_cols()
94 | }
95 |
--------------------------------------------------------------------------------
/man/p_bind_rows.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{p_bind_rows}
4 | \alias{p_bind_rows}
5 | \title{Set operations}
6 | \usage{
7 | p_bind_rows(..., .messages = "{.count.out} in union", .headline = "Union")
8 | }
9 | \arguments{
10 | \item{...}{a collection of tracked data frames to combine}
11 |
12 | \item{.messages}{a set of glue specs. The glue code can use any global
13 | variable, or \{.count.out\}}
14 |
15 | \item{.headline}{a glue spec. The glue code can use any global variable, or
16 | \{.count.out\}}
17 | }
18 | \value{
19 | the dplyr output with the history graph updated.
20 | }
21 | \description{
22 | These perform set operations on tracked dataframes. It merges the history
23 | of 2 (or more) dataframes and combines the rows (or columns). It calculates the total number of
24 | resulting rows as \{.count.out\} in other terms it performs exactly the same
25 | operation as the equivalent \code{dplyr} operation. See \code{\link[dplyr:bind_rows]{dplyr::bind_rows()}},
26 | \code{\link[dplyr:bind_cols]{dplyr::bind_cols()}}, \code{\link[dplyr:setops]{dplyr::intersect()}}, \code{\link[dplyr:setops]{dplyr::union()}},
27 | \code{\link[dplyr:setops]{dplyr::setdiff()}},\code{\link[dplyr:setops]{dplyr::intersect()}}, or \code{\link[dplyr:setops]{dplyr::union_all()}} for the
28 | underlying function details.
29 | }
30 | \examples{
31 | library(dplyr)
32 | library(dtrackr)
33 |
34 | # Set operations
35 | people = starwars \%>\% select(-films, -vehicles, -starships)
36 | chrs = people \%>\% track("start")
37 |
38 | lhs = chrs \%>\% include_any(
39 | species == "Human" ~ "{.included} humans",
40 | species == "Droid" ~ "{.included} droids"
41 | )
42 |
43 | # these are different subsets of the same data
44 | rhs = chrs \%>\% include_any(
45 | species == "Human" ~ "{.included} humans",
46 | species == "Gungan" ~ "{.included} gungans"
47 | ) \%>\% comment("{.count} gungans & humans")
48 |
49 |
50 | # Unions
51 | set = bind_rows(lhs,rhs) \%>\% comment("{.count} 2*human,droids and gungans")
52 | # display the history of the result:
53 | set \%>\% history()
54 | nrow(set)
55 | # not run - display the flowchart:
56 | # set \%>\% flowchart()
57 |
58 | set = union(lhs,rhs) \%>\% comment("{.count} human,droids and gungans")
59 | # display the history of the result:
60 | set \%>\% history()
61 | nrow(set)
62 | # not run - display the flowchart:
63 | # set \%>\% flowchart()
64 |
65 | set = union_all(lhs,rhs) \%>\% comment("{.count} 2*human,droids and gungans")
66 | # display the history of the result:
67 | set \%>\% history()
68 | nrow(set)
69 | # not run - display the flowchart:
70 | # set \%>\% flowchart()
71 |
72 | # Intersections and differences
73 |
74 | set = setdiff(lhs,rhs) \%>\% comment("{.count} droids and gungans")
75 | # display the history of the result:
76 | set \%>\% history()
77 | nrow(set)
78 | # not run - display the flowchart:
79 | # set \%>\% flowchart()
80 |
81 | set = intersect(lhs,rhs) \%>\% comment("{.count} humans")
82 | # display the history of the result:
83 | set \%>\% history()
84 | nrow(set)
85 | # not run - display the flowchart:
86 | # set \%>\% flowchart()
87 | }
88 | \seealso{
89 | dplyr::bind_rows()
90 | }
91 |
--------------------------------------------------------------------------------
/man/p_capture_exclusions.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{p_capture_exclusions}
4 | \alias{p_capture_exclusions}
5 | \title{Start capturing exclusions on a tracked dataframe.}
6 | \usage{
7 | p_capture_exclusions(.data, .capture = TRUE)
8 | }
9 | \arguments{
10 | \item{.data}{a tracked dataframe}
11 |
12 | \item{.capture}{Should we capture exclusions (things removed from the data
13 | set). This is useful for debugging data issues but comes at a significant
14 | cost. Defaults to the value of \code{getOption("dtrackr.exclusions")} or
15 | \code{FALSE}.}
16 | }
17 | \value{
18 | the .data dataframe with the exclusions flag set (or cleared if
19 | \code{.capture=FALSE}).
20 | }
21 | \description{
22 | Start capturing exclusions on a tracked dataframe.
23 | }
24 | \examples{
25 | library(dplyr)
26 | library(dtrackr)
27 | tmp = iris \%>\% track() \%>\% capture_exclusions()
28 | tmp \%>\% filter(Species!="versicolor") \%>\% history()
29 | }
30 |
--------------------------------------------------------------------------------
/man/p_clear.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{p_clear}
4 | \alias{p_clear}
5 | \title{Clear the dtrackr history graph}
6 | \usage{
7 | p_clear(.data)
8 | }
9 | \arguments{
10 | \item{.data}{a dataframe which may be grouped}
11 | }
12 | \value{
13 | the .data dataframe with the history graph removed
14 | }
15 | \description{
16 | This is unlikely to be needed directly and is mostly and internal function
17 | }
18 | \examples{
19 | library(dplyr)
20 | library(dtrackr)
21 | mtcars \%>\% track() \%>\% comment("A comment") \%>\% p_clear() \%>\% history()
22 | }
23 |
--------------------------------------------------------------------------------
/man/p_comment.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{p_comment}
4 | \alias{p_comment}
5 | \title{Add a generic comment to the dtrackr history graph}
6 | \usage{
7 | p_comment(
8 | .data,
9 | .messages = .defaultMessage(),
10 | .headline = .defaultHeadline(),
11 | .type = "info",
12 | .asOffshoot = (.type == "exclusion"),
13 | .tag = NULL
14 | )
15 | }
16 | \arguments{
17 | \item{.data}{a dataframe which may be grouped}
18 |
19 | \item{.messages}{a character vector of glue specifications. A glue
20 | specification can refer to any grouping variables of .data, or any
21 | variables defined in the calling environment, the \{.total\} of all rows,
22 | the \{.count\} variable which is the count in each group and \{.strata\} a
23 | description of the group}
24 |
25 | \item{.headline}{a glue specification which can refer to grouping variables
26 | of .data, or any variables defined in the calling environment, or the
27 | \{.total\} variable (which is \code{nrow(.data)}) and \{.strata\} which is a
28 | description of the grouping}
29 |
30 | \item{.type}{one of "info","...,"exclusion": used to define formatting}
31 |
32 | \item{.asOffshoot}{do you want this comment to be an offshoot of the main
33 | flow (default = FALSE).}
34 |
35 | \item{.tag}{if you want the summary data from this step in the future then
36 | give it a name with .tag.}
37 | }
38 | \value{
39 | the same .data dataframe with the history graph updated with the comment
40 | }
41 | \description{
42 | A comment can be any kind of note and is added once for every current
43 | grouping as defined by the \code{.message} field. It can be made context specific
44 | by including variables such as \{.count\} and \{.total\} in \code{.message} which
45 | refer to the grouped and ungrouped counts at this current stage of the
46 | pipeline respectively. It can also pull in any global variable.
47 | }
48 | \examples{
49 | library(dplyr)
50 | library(dtrackr)
51 | iris \%>\% track() \%>\% comment("hello {.total} rows") \%>\% history()
52 | }
53 |
--------------------------------------------------------------------------------
/man/p_copy.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{p_copy}
4 | \alias{p_copy}
5 | \title{Copy the dtrackr history graph from one dataframe to another}
6 | \usage{
7 | p_copy(.data, from)
8 | }
9 | \arguments{
10 | \item{.data}{a dataframe which may be grouped}
11 |
12 | \item{from}{the dataframe to copy the history graph from}
13 | }
14 | \value{
15 | the .data dataframe with the history graph of "from"
16 | }
17 | \description{
18 | Copy the dtrackr history graph from one dataframe to another
19 | }
20 | \examples{
21 | mtcars \%>\% p_copy(iris \%>\% comment("A comment")) \%>\% history()
22 | }
23 |
--------------------------------------------------------------------------------
/man/p_count_if.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{p_count_if}
4 | \alias{p_count_if}
5 | \title{Simple count_if dplyr summary function}
6 | \usage{
7 | p_count_if(..., na.rm = TRUE)
8 | }
9 | \arguments{
10 | \item{...}{expression to be evaluated}
11 |
12 | \item{na.rm}{ignore NA values?}
13 | }
14 | \value{
15 | a count of the number of times the expression evaluated to true, in the current context
16 | }
17 | \description{
18 | Simple count_if dplyr summary function
19 | }
20 | \examples{
21 | library(dplyr)
22 | library(dtrackr)
23 | tmp = iris \%>\% dplyr::group_by(Species)
24 | tmp \%>\% dplyr::summarise(long_ones = p_count_if(Petal.Length > 4))
25 | }
26 |
--------------------------------------------------------------------------------
/man/p_count_subgroup.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{p_count_subgroup}
4 | \alias{p_count_subgroup}
5 | \title{Add a subgroup count to the dtrackr history graph}
6 | \usage{
7 | p_count_subgroup(
8 | .data,
9 | .subgroup,
10 | ...,
11 | .messages = .defaultCountSubgroup(),
12 | .headline = .defaultHeadline(),
13 | .type = "info",
14 | .asOffshoot = FALSE,
15 | .tag = NULL,
16 | .maxsubgroups = .defaultMaxSupportedGroupings()
17 | )
18 | }
19 | \arguments{
20 | \item{.data}{a dataframe which may be grouped}
21 |
22 | \item{.subgroup}{a column with a small number of levels (e.g. a factor)}
23 |
24 | \item{...}{passed to \verb{base::factor(subgroup values, ...)} to allow reordering
25 | of levels etc.}
26 |
27 | \item{.messages}{a character vector of glue specifications. A glue
28 | specification can refer to anything from the calling environment,
29 | \{.subgroup\} for the subgroup column name and \{.name\} for the subgroup
30 | column value, \{.count\} for the subgroup column count, \{.subtotal\} for
31 | the current stratification grouping count and \{.total\} for the whole
32 | dataset count}
33 |
34 | \item{.headline}{a glue specification which can refer to grouping variables
35 | of .data, \{.subtotal\} for the current grouping count, or any variables
36 | defined in the calling environment}
37 |
38 | \item{.type}{one of "info","exclusion": used to define formatting}
39 |
40 | \item{.asOffshoot}{do you want this comment to be an offshoot of the main
41 | flow (default = FALSE).}
42 |
43 | \item{.tag}{if you want to use the summary data from this step in the future
44 | then give it a name with .tag.}
45 |
46 | \item{.maxsubgroups}{the maximum number of discrete values allowed in
47 | .subgroup is configurable with
48 | \code{options("dtrackr.max_supported_groupings"=XX)}. The default is 16. Large
49 | values produce unwieldy flow charts.}
50 | }
51 | \value{
52 | the same .data dataframe with the history graph updated with a
53 | subgroup count as a new stage
54 | }
55 | \description{
56 | A frequent use case for more detailed description is to have a subgroup count
57 | within a flowchart. This works best for factor subgroup columns but other
58 | data will be converted to a factor automatically. The count of the items in
59 | each subgroup is added as a new stage in the flowchart.
60 | }
61 | \examples{
62 | library(dplyr)
63 | library(dtrackr)
64 | survival::cgd \%>\% track() \%>\% group_by(treat) \%>\%
65 | count_subgroup(center) \%>\% history()
66 | }
67 |
--------------------------------------------------------------------------------
/man/p_distinct.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{p_distinct}
4 | \alias{p_distinct}
5 | \title{Distinct values of data}
6 | \usage{
7 | p_distinct(
8 | .data,
9 | ...,
10 | .messages = "removing {.count.in-.count.out} duplicates",
11 | .headline = .defaultHeadline(),
12 | .tag = NULL
13 | )
14 | }
15 | \arguments{
16 | \item{.data}{A data frame, data frame extension (e.g. a tibble), or a
17 | lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
18 | more details.}
19 |
20 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Optional variables to
21 | use when determining uniqueness. If there are multiple rows for a given
22 | combination of inputs, only the first row will be preserved. If omitted,
23 | will use all variables in the data frame.
24 | Named arguments passed on to \code{\link[dplyr:distinct]{dplyr::distinct}}\describe{
25 | \item{\code{.keep_all}}{If \code{TRUE}, keep all variables in \code{.data}.
26 | If a combination of \code{...} is not distinct, this keeps the
27 | first row of values.}
28 | }}
29 |
30 | \item{.messages}{a set of glue specs. The glue code can use any global
31 | variable, or \{.strata\},\{.count.in\},and \{.count.out\}}
32 |
33 | \item{.headline}{a headline glue spec. The glue code can use any global
34 | variable, or \{.strata\},\{.count.in\},and \{.count.out\}}
35 |
36 | \item{.tag}{if you want the summary data from this step in the future then
37 | give it a name with .tag.}
38 | }
39 | \value{
40 | the .data dataframe with distinct values and history graph updated.
41 | }
42 | \description{
43 | Distinct acts in the same way as in \code{dplyr::distinct}. Prior to the operation
44 | the size of the group is calculated \{.count.in\} and after the operation the
45 | output size \{.count.out\} The group \{.strata\} is also available (if
46 | grouped) for reporting. See \code{\link[dplyr:distinct]{dplyr::distinct()}}.
47 | }
48 | \examples{
49 | library(dplyr)
50 | library(dtrackr)
51 |
52 | tmp = bind_rows(iris \%>\% track(), iris \%>\% track() \%>\% filter(Petal.Length > 5))
53 | tmp \%>\% group_by(Species) \%>\% distinct() \%>\% history()
54 | }
55 | \seealso{
56 | dplyr::distinct()
57 | }
58 |
--------------------------------------------------------------------------------
/man/p_excluded.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{p_excluded}
4 | \alias{p_excluded}
5 | \title{Get the dtrackr excluded data record}
6 | \usage{
7 | p_excluded(.data, simplify = TRUE)
8 | }
9 | \arguments{
10 | \item{.data}{a dataframe which may be grouped}
11 |
12 | \item{simplify}{return a single summary dataframe of all exclusions.}
13 | }
14 | \value{
15 | a new dataframe of the excluded data up to this point in the workflow. This dataframe is by default flattened, but if \code{.simplify=FALSE} has a nested structure containing records excluded at each part of the pipeline.
16 | }
17 | \description{
18 | Get the dtrackr excluded data record
19 | }
20 | \examples{
21 | library(dplyr)
22 | library(dtrackr)
23 | tmp = iris \%>\% track() \%>\% capture_exclusions()
24 | tmp \%>\% exclude_all(
25 | Petal.Length > 5.8 ~ "{.excluded} long ones",
26 | Petal.Length < 1.3 ~ "{.excluded} short ones",
27 | .stage = "petal length exclusion"
28 | ) \%>\% excluded()
29 | }
30 |
--------------------------------------------------------------------------------
/man/p_filter.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{p_filter}
4 | \alias{p_filter}
5 | \title{Filtering data}
6 | \usage{
7 | p_filter(
8 | .data,
9 | ...,
10 | .messages = "excluded {.excluded} items",
11 | .headline = .defaultHeadline(),
12 | .type = "exclusion",
13 | .asOffshoot = (.type == "exclusion"),
14 | .stage = (if (is.null(.tag)) "" else .tag),
15 | .tag = NULL
16 | )
17 | }
18 | \arguments{
19 | \item{.data}{A data frame, data frame extension (e.g. a tibble), or a
20 | lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
21 | more details.}
22 |
23 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Expressions that
24 | return a logical value, and are defined in terms of the variables in
25 | \code{.data}. If multiple expressions are included, they are combined with the
26 | \code{&} operator. Only rows for which all conditions evaluate to \code{TRUE} are
27 | kept.
28 | Named arguments passed on to \code{\link[dplyr:filter]{dplyr::filter}}\describe{
29 | \item{\code{.by}}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
30 |
31 | <\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Optionally, a selection of columns to
32 | group by for just this operation, functioning as an alternative to \code{\link[dplyr:group_by]{group_by()}}. For
33 | details and examples, see \link[dplyr:dplyr_by]{?dplyr_by}.}
34 | \item{\code{.preserve}}{Relevant when the \code{.data} input is grouped.
35 | If \code{.preserve = FALSE} (the default), the grouping structure
36 | is recalculated based on the resulting data, otherwise the grouping is kept as is.}
37 | }}
38 |
39 | \item{.messages}{a set of glue specs. The glue code can use any global
40 | variable, or \{.strata\},\{.count.in\},and \{.count.out\}}
41 |
42 | \item{.headline}{a headline glue spec. The glue code can use any global
43 | variable, or \{.strata\},\{.count.in\},and \{.count.out\}}
44 |
45 | \item{.type}{the format type of the action typically an exclusion}
46 |
47 | \item{.asOffshoot}{if the type is exclusion, \code{.asOffshoot} places the
48 | information box outside of the main flow, as an exclusion.}
49 |
50 | \item{.stage}{a name for this step in the pathway}
51 |
52 | \item{.tag}{if you want the summary data from this step in the future then
53 | give it a name with \code{.tag}.}
54 | }
55 | \value{
56 | the filtered \code{.data} dataframe with history graph updated
57 | }
58 | \description{
59 | Filter acts in the same way as in \code{dplyr} where predicates which evaluate to
60 | TRUE act to select items to include, and items for which the predicate cannot
61 | be evaluated are excluded. For tracking prior to the filter operation the
62 | size of each group is calculated \{.count.in\} and after the operation the
63 | output size of each group \{.count.out\}. The grouping \{.strata\} is also
64 | available (if grouped) for reporting. See \code{\link[dplyr:filter]{dplyr::filter()}}.
65 | }
66 | \examples{
67 | library(dplyr)
68 | library(dtrackr)
69 |
70 | tmp = iris \%>\% track() \%>\% group_by(Species)
71 | tmp \%>\% filter(Petal.Length > 5) \%>\% history()
72 | }
73 | \seealso{
74 | dplyr::filter()
75 | }
76 |
--------------------------------------------------------------------------------
/man/p_flowchart.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{p_flowchart}
4 | \alias{p_flowchart}
5 | \title{Flowchart output}
6 | \usage{
7 | p_flowchart(
8 | .data,
9 | filename = NULL,
10 | size = std_size$full,
11 | maxWidth = size$width,
12 | maxHeight = size$height,
13 | formats = c("dot", "png", "pdf", "svg"),
14 | defaultToHTML = TRUE,
15 | landscape = size$rot != 0,
16 | ...
17 | )
18 | }
19 | \arguments{
20 | \item{.data}{the tracked dataframe(s) either as a single dataframe or as a
21 | list of dataframes.}
22 |
23 | \item{filename}{a file name which will be where the formatted flowcharts are
24 | saved. If no extension is specified the output formats are determined by
25 | the \code{formats} parameter.}
26 |
27 | \item{size}{a named list with 3 elements, length and width in inches and
28 | rotation. A predefined set of standard sizes are available in the
29 | \link{std_size} object.}
30 |
31 | \item{maxWidth}{a width (on the paper) in inches if \code{size} is not defined}
32 |
33 | \item{maxHeight}{a height (on the paper) in inches if \code{size} is not defined}
34 |
35 | \item{formats}{some of \code{pdf},\code{dot},\code{svg},\code{png},\code{ps}}
36 |
37 | \item{defaultToHTML}{if the correct output format is not easy to determine
38 | from the context, default providing \code{HTML} (TRUE) or to embedding the \code{PNG} (FALSE)}
39 |
40 | \item{landscape}{rotate the output by 270 degrees into a landscape format.
41 | \code{maxWidth} and \code{maxHeight} still apply and refer to the paper width to fit
42 | the flowchart into after rotation. (you might need to flip width and height)}
43 |
44 | \item{...}{other parameters passed onto either \code{p_get_as_dot()}, notable ones are
45 | \code{fill} (background colour e.g. \code{lightgrey}), \code{fontsize} (in points),
46 | \code{colour} (font colour)}
47 | }
48 | \value{
49 | the nature of the flowchart output depends on the context in which
50 | the function is called. It will be some form of browse-able html output if
51 | called from an interactive session or a \code{PNG}/\code{PDF} link if in \code{knitr} and
52 | knitting latex or word type outputs, if file name is specified the output
53 | will also be saved at the given location.
54 | }
55 | \description{
56 | Generate a flowchart of the history of the dataframe(s), with all the tracked
57 | data pipeline as stages in the flowchart. Multiple dataframes can be plotted
58 | together in which case an attempt is made to determine which parts are
59 | common.
60 | }
61 | \examples{
62 | library(dplyr)
63 | library(dtrackr)
64 |
65 | tmp = iris \%>\% track() \%>\% comment(.tag = "step1") \%>\% filter(Species!="versicolor")
66 | tmp \%>\% group_by(Species) \%>\% comment(.tag="step2") \%>\% flowchart()
67 | }
68 |
--------------------------------------------------------------------------------
/man/p_get.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{p_get}
4 | \alias{p_get}
5 | \title{Get the dtrackr history graph}
6 | \usage{
7 | p_get(.data)
8 | }
9 | \arguments{
10 | \item{.data}{a dataframe which may be grouped}
11 | }
12 | \value{
13 | the history graph. This is a list, of class \code{trackr_graph}, containing the following named items:
14 | \itemize{
15 | \item excluded - the data items that have been excluded thus far as a nested dataframe
16 | \item tags - a dataframe of tag-value pairs containing the summary of the data at named points in the data flow (see \code{\link[=tagged]{tagged()}})
17 | \item nodes - a dataframe of the nodes of the flow chart
18 | \item edges - an edge list (as a dataframe) of the relationships between the nodes in the flow chart
19 | \item head - the current most recent nodes added into the graph as a dataframe.
20 | }
21 |
22 | The format of this data may grow over time but these fields are unlikely to be changed.
23 | }
24 | \description{
25 | This provides the raw history graph and is not really intended for mainstream use.
26 | The internal structure of the graph is explained below. print and plot S3 methods exist for
27 | the dtrackr history graph.
28 | }
29 | \examples{
30 | library(dplyr)
31 | library(dtrackr)
32 | graph = iris \%>\% track() \%>\% comment("A comment") \%>\% history()
33 | print(graph)
34 | }
35 |
--------------------------------------------------------------------------------
/man/p_get_as_dot.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{p_get_as_dot}
4 | \alias{p_get_as_dot}
5 | \title{DOT output}
6 | \usage{
7 | p_get_as_dot(.data, fill = "lightgrey", fontsize = "8", colour = "black", ...)
8 | }
9 | \arguments{
10 | \item{.data}{the tracked dataframe}
11 |
12 | \item{fill}{the default node fill colour}
13 |
14 | \item{fontsize}{the default font size}
15 |
16 | \item{colour}{the default font colour}
17 |
18 | \item{...}{not used}
19 | }
20 | \value{
21 | a representation of the history graph in \code{Graphviz} dot format.
22 | }
23 | \description{
24 | (advance usage) outputs a \code{dtrackr} history graph as a DOT string for rendering with \code{Graphviz}
25 | }
26 | \examples{
27 | library(dplyr)
28 | library(dtrackr)
29 |
30 | tmp = iris \%>\% track() \%>\% comment(.tag = "step1") \%>\% filter(Species!="versicolor")
31 | dot = tmp \%>\% group_by(Species) \%>\% comment(.tag="step2") \%>\% p_get_as_dot()
32 | cat(dot)
33 | }
34 |
--------------------------------------------------------------------------------
/man/p_group_by.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{p_group_by}
4 | \alias{p_group_by}
5 | \title{Stratifying your analysis}
6 | \usage{
7 | p_group_by(
8 | .data,
9 | ...,
10 | .messages = "stratify by {.cols}",
11 | .headline = NULL,
12 | .tag = NULL,
13 | .maxgroups = .defaultMaxSupportedGroupings()
14 | )
15 | }
16 | \arguments{
17 | \item{.data}{A data frame, data frame extension (e.g. a tibble), or a
18 | lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
19 | more details.}
20 |
21 | \item{...}{In \code{group_by()}, variables or computations to group by.
22 | Computations are always done on the ungrouped data frame.
23 | To perform computations on the grouped data, you need to use
24 | a separate \code{mutate()} step before the \code{group_by()}.
25 | Computations are not allowed in \code{nest_by()}.
26 | In \code{ungroup()}, variables to remove from the grouping.
27 | Named arguments passed on to \code{\link[dplyr:group_by]{dplyr::group_by}}\describe{
28 | \item{\code{.add}}{When \code{FALSE}, the default, \code{group_by()} will
29 | override existing groups. To add to the existing groups, use
30 | \code{.add = TRUE}.
31 |
32 | This argument was previously called \code{add}, but that prevented
33 | creating a new grouping variable called \code{add}, and conflicts with
34 | our naming conventions.}
35 | \item{\code{.drop}}{Drop groups formed by factor levels that don't appear in the
36 | data? The default is \code{TRUE} except when \code{.data} has been previously
37 | grouped with \code{.drop = FALSE}. See \code{\link[dplyr:group_by_drop_default]{group_by_drop_default()}} for details.}
38 | \item{\code{x}}{A \code{\link[dplyr:tbl]{tbl()}}}
39 | }}
40 |
41 | \item{.messages}{a set of glue specs. The glue code can use any global
42 | variable, or \{.cols\} which is the columns that are being grouped by.}
43 |
44 | \item{.headline}{a headline glue spec. The glue code can use any global
45 | variable, or \{.cols\}.}
46 |
47 | \item{.tag}{if you want the summary data from this step in the future then
48 | give it a name with .tag.}
49 |
50 | \item{.maxgroups}{the maximum number of subgroups allowed before the tracking
51 | is paused.}
52 | }
53 | \value{
54 | the .data but grouped.
55 | }
56 | \description{
57 | Grouping a data set acts in the normal way. When tracking a dataframe
58 | sometimes a \code{group_by()} operation will create a lot of groups. This happens
59 | for example if you are doing a \code{group_by()}, \code{summarise()} step that is
60 | aggregating data on a fine scale, e.g. by day in a time-series. This is
61 | generally a terrible idea when tracking a dataframe as the resulting
62 | flowchart will have many many branches and be illegible. \code{dtrackr} will detect this issue and
63 | pause tracking the dataframe with a warning. It is up to the user to the
64 | \code{resume()} tracking when the large number of groups have been resolved e.g.
65 | using a \code{dplyr::ungroup()}. This limit is configurable with
66 | \code{options("dtrackr.max_supported_groupings"=XX)}. The default is 16. See
67 | \code{\link[dplyr:group_by]{dplyr::group_by()}}.
68 | }
69 | \examples{
70 | library(dplyr)
71 | library(dtrackr)
72 |
73 | tmp = iris \%>\% track() \%>\% group_by(Species, .messages="stratify by {.cols}")
74 | tmp \%>\% comment("{.strata}") \%>\% history()
75 | }
76 | \seealso{
77 | dplyr::group_by()
78 | }
79 |
--------------------------------------------------------------------------------
/man/p_group_modify.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{p_group_modify}
4 | \alias{p_group_modify}
5 | \title{Group-wise modification of data and complex operations}
6 | \usage{
7 | p_group_modify(
8 | .data,
9 | ...,
10 | .messages = NULL,
11 | .headline = .defaultHeadline(),
12 | .type = "modify",
13 | .tag = NULL
14 | )
15 | }
16 | \arguments{
17 | \item{.data}{A grouped tibble}
18 |
19 | \item{...}{Additional arguments passed on to \code{.f}
20 | Named arguments passed on to \code{\link[dplyr:group_map]{dplyr::group_modify}}\describe{
21 | \item{\code{.f}}{A function or formula to apply to each group.
22 |
23 | If a \strong{function}, it is used as is. It should have at least 2 formal arguments.
24 |
25 | If a \strong{formula}, e.g. \code{~ head(.x)}, it is converted to a function.
26 |
27 | In the formula, you can use
28 | \itemize{
29 | \item \code{.} or \code{.x} to refer to the subset of rows of \code{.tbl}
30 | for the given group
31 | \item \code{.y} to refer to the key, a one row tibble with one column per grouping variable
32 | that identifies the group
33 | }}
34 | \item{\code{.keep}}{are the grouping variables kept in \code{.x}}
35 | }}
36 |
37 | \item{.messages}{a set of glue specs. The glue code can use any global
38 | variable, or \{.strata\},\{.count.in\},and \{.count.out\}}
39 |
40 | \item{.headline}{a headline glue spec. The glue code can use any global
41 | variable, or \{.strata\},\{.count.in\},and \{.count.out\}}
42 |
43 | \item{.type}{default "modify": used to define formatting}
44 |
45 | \item{.tag}{if you want the summary data from this step in the future then
46 | give it a name with .tag.}
47 | }
48 | \value{
49 | the transformed .data dataframe with the history graph updated.
50 | }
51 | \description{
52 | Group modifying a data set acts in the normal way. The internal mechanics of
53 | the modify function are opaque to the history. This means these can be used
54 | to wrap any unsupported operation without losing the history (e.g. \code{df \%>\% track() \%>\% group_modify(function(d,...) { d \%>\% unsupported_operation() })}
55 | ) Prior to the operation the size of the group is calculated \{.count.in\}
56 | and after the operation the output size \{.count.out\} The group \{.strata\}
57 | is also available (if grouped) for reporting See \code{\link[dplyr:group_map]{dplyr::group_modify()}}.
58 | }
59 | \examples{
60 | library(dplyr)
61 | library(dtrackr)
62 |
63 | tmp = iris \%>\% track() \%>\% group_by(Species)
64 | tmp \%>\% group_modify(
65 | function(d,g,...) { return(tibble::tibble(x=runif(10))) },
66 | .messages="{.count.in} in, {.count.out} out"
67 | ) \%>\% history()
68 | }
69 | \seealso{
70 | dplyr::group_modify()
71 | }
72 |
--------------------------------------------------------------------------------
/man/p_intersect.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{p_intersect}
4 | \alias{p_intersect}
5 | \title{Set operations}
6 | \usage{
7 | p_intersect(
8 | x,
9 | y,
10 | ...,
11 | .messages = "{.count.out} in intersection",
12 | .headline = "Intersection"
13 | )
14 | }
15 | \arguments{
16 | \item{x, y}{Vectors to combine.}
17 |
18 | \item{...}{a collection of tracked data frames to combine}
19 |
20 | \item{.messages}{a set of glue specs. The glue code can use any global
21 | variable, or \{.count.out\}}
22 |
23 | \item{.headline}{a glue spec. The glue code can use any global variable, or
24 | \{.count.out\}}
25 | }
26 | \value{
27 | the dplyr output with the history graph updated.
28 | }
29 | \description{
30 | These perform set operations on tracked dataframes. It merges the history
31 | of 2 (or more) dataframes and combines the rows (or columns). It calculates the total number of
32 | resulting rows as \{.count.out\} in other terms it performs exactly the same
33 | operation as the equivalent \code{dplyr} operation. See \code{\link[dplyr:bind_rows]{dplyr::bind_rows()}},
34 | \code{\link[dplyr:bind_cols]{dplyr::bind_cols()}}, \code{\link[dplyr:setops]{dplyr::intersect()}}, \code{\link[dplyr:setops]{dplyr::union()}},
35 | \code{\link[dplyr:setops]{dplyr::setdiff()}},\code{\link[dplyr:setops]{dplyr::intersect()}}, or \code{\link[dplyr:setops]{dplyr::union_all()}} for the
36 | underlying function details.
37 | }
38 | \examples{
39 | library(dplyr)
40 | library(dtrackr)
41 |
42 | # Set operations
43 | people = starwars \%>\% select(-films, -vehicles, -starships)
44 | chrs = people \%>\% track("start")
45 |
46 | lhs = chrs \%>\% include_any(
47 | species == "Human" ~ "{.included} humans",
48 | species == "Droid" ~ "{.included} droids"
49 | )
50 |
51 | # these are different subsets of the same data
52 | rhs = chrs \%>\% include_any(
53 | species == "Human" ~ "{.included} humans",
54 | species == "Gungan" ~ "{.included} gungans"
55 | ) \%>\% comment("{.count} gungans & humans")
56 |
57 |
58 | # Unions
59 | set = bind_rows(lhs,rhs) \%>\% comment("{.count} 2*human,droids and gungans")
60 | # display the history of the result:
61 | set \%>\% history()
62 | nrow(set)
63 | # not run - display the flowchart:
64 | # set \%>\% flowchart()
65 |
66 | set = union(lhs,rhs) \%>\% comment("{.count} human,droids and gungans")
67 | # display the history of the result:
68 | set \%>\% history()
69 | nrow(set)
70 | # not run - display the flowchart:
71 | # set \%>\% flowchart()
72 |
73 | set = union_all(lhs,rhs) \%>\% comment("{.count} 2*human,droids and gungans")
74 | # display the history of the result:
75 | set \%>\% history()
76 | nrow(set)
77 | # not run - display the flowchart:
78 | # set \%>\% flowchart()
79 |
80 | # Intersections and differences
81 |
82 | set = setdiff(lhs,rhs) \%>\% comment("{.count} droids and gungans")
83 | # display the history of the result:
84 | set \%>\% history()
85 | nrow(set)
86 | # not run - display the flowchart:
87 | # set \%>\% flowchart()
88 |
89 | set = intersect(lhs,rhs) \%>\% comment("{.count} humans")
90 | # display the history of the result:
91 | set \%>\% history()
92 | nrow(set)
93 | # not run - display the flowchart:
94 | # set \%>\% flowchart()
95 | }
96 | \seealso{
97 | generics::intersect()
98 | }
99 |
--------------------------------------------------------------------------------
/man/p_pause.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{p_pause}
4 | \alias{p_pause}
5 | \title{Pause tracking the data frame.}
6 | \usage{
7 | p_pause(.data, auto = FALSE)
8 | }
9 | \arguments{
10 | \item{.data}{a tracked dataframe}
11 |
12 | \item{auto}{if \code{TRUE} the tracking will resume automatically when the
13 | number of groups has fallen to a sensible level (default is \code{FALSE})?}
14 | }
15 | \value{
16 | the .data dataframe with history graph tracking paused
17 | }
18 | \description{
19 | Pausing tracking of a data frame may be required if an operation is about to
20 | be performed that creates a lot of groupings or that you otherwise don't
21 | want to pollute the history graph (e.g. maybe selecting something using
22 | an anti-join). Once paused the history is not updated until a \code{resume()} is
23 | called, or when the data frame is ungrouped (if \code{auto} is enabled).
24 | }
25 | \examples{
26 | iris \%>\% track() \%>\% pause() \%>\% history()
27 | }
28 |
--------------------------------------------------------------------------------
/man/p_reframe.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{p_reframe}
4 | \alias{p_reframe}
5 | \title{Summarise a data set}
6 | \usage{
7 | p_reframe(.data, ..., .messages = "", .headline = "", .tag = NULL)
8 | }
9 | \arguments{
10 | \item{.data}{A data frame, data frame extension (e.g. a tibble), or a
11 | lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
12 | more details.}
13 |
14 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Name-value pairs of
15 | summary functions. The name will be the name of the variable in the result.
16 |
17 | The value can be:
18 | \itemize{
19 | \item A vector of length 1, e.g. \code{min(x)}, \code{n()}, or \code{sum(is.na(y))}.
20 | \item A data frame, to add multiple columns from a single expression.
21 | }
22 |
23 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Returning values with size 0 or >1 was
24 | deprecated as of 1.1.0. Please use \code{\link[dplyr:reframe]{reframe()}} for this instead.
25 | Named arguments passed on to \code{\link[dplyr:reframe]{dplyr::reframe}}\describe{
26 | \item{\code{.by}}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
27 |
28 | <\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Optionally, a selection of columns to
29 | group by for just this operation, functioning as an alternative to \code{\link[dplyr:group_by]{group_by()}}. For
30 | details and examples, see \link[dplyr:dplyr_by]{?dplyr_by}.}
31 | }}
32 |
33 | \item{.messages}{a set of glue specs. The glue code can use any summary
34 | variable defined in the ... parameter, or any global variable, or
35 | \{.strata\}}
36 |
37 | \item{.headline}{a headline glue spec. The glue code can use any summary
38 | variable defined in the ... parameter, or any global variable, or
39 | \{.strata\}}
40 |
41 | \item{.tag}{if you want the summary data from this step in the future then
42 | give it a name with .tag.}
43 | }
44 | \value{
45 | the .data dataframe summarised with the history graph updated showing
46 | the summarise operation as a new stage
47 | }
48 | \description{
49 | Summarising a data set acts in the normal \code{dplyr} manner to collapse groups
50 | to individual rows. Any columns resulting from the summary can be added to
51 | the history graph. In the history this also joins any stratified branches and
52 | allows you to generate some summary statistics about the un-grouped data. See
53 | \code{\link[dplyr:summarise]{dplyr::summarise()}}.
54 | }
55 | \examples{
56 | library(dplyr)
57 | library(dtrackr)
58 |
59 | tmp = iris \%>\% group_by(Species) \%>\% track()
60 | tmp \%>\% reframe(tibble(
61 | param = c("mean","min","max"),
62 | value = c(mean(Petal.Length), min(Petal.Length), max(Petal.Length))
63 | ), .messages="length {param}: {value}") \%>\% history()
64 | }
65 | \seealso{
66 | dplyr::reframe()
67 | }
68 |
--------------------------------------------------------------------------------
/man/p_resume.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{p_resume}
4 | \alias{p_resume}
5 | \title{Resume tracking the data frame.}
6 | \usage{
7 | p_resume(.data, ...)
8 | }
9 | \arguments{
10 | \item{.data}{a tracked dataframe}
11 |
12 | \item{...}{
13 | Named arguments passed on to \code{\link[=p_group_by]{p_group_by}}\describe{
14 | \item{\code{.messages}}{a set of glue specs. The glue code can use any global
15 | variable, or \{.cols\} which is the columns that are being grouped by.}
16 | \item{\code{.headline}}{a headline glue spec. The glue code can use any global
17 | variable, or \{.cols\}.}
18 | \item{\code{.tag}}{if you want the summary data from this step in the future then
19 | give it a name with .tag.}
20 | \item{\code{.maxgroups}}{the maximum number of subgroups allowed before the tracking
21 | is paused.}
22 | \item{\code{...}}{In \code{group_by()}, variables or computations to group by.
23 | Computations are always done on the ungrouped data frame.
24 | To perform computations on the grouped data, you need to use
25 | a separate \code{mutate()} step before the \code{group_by()}.
26 | Computations are not allowed in \code{nest_by()}.
27 | In \code{ungroup()}, variables to remove from the grouping.}
28 | }}
29 | }
30 | \value{
31 | the .data data frame with history graph tracking resumed
32 | }
33 | \description{
34 | This may reset the grouping of the tracked data if the grouping structure
35 | has changed since the data frame was paused. If you try and resume tracking a
36 | data frame with too many groups (as defined by \code{options("dtrackr.max_supported_groupings"=XX)})
37 | then the resume will fail and the data frame will still be paused. This can
38 | be overridden by specifying a value for the \code{.maxgroups} parameter.
39 | }
40 | \examples{
41 | library(dplyr)
42 | library(dtrackr)
43 | iris \%>\% track() \%>\% pause() \%>\% resume() \%>\% history()
44 | }
45 |
--------------------------------------------------------------------------------
/man/p_select.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{p_select}
4 | \alias{p_select}
5 | \title{dplyr modifying operations}
6 | \usage{
7 | p_select(.data, ..., .messages = "", .headline = "", .tag = NULL)
8 | }
9 | \arguments{
10 | \item{.data}{A data frame, data frame extension (e.g. a tibble), or a
11 | lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
12 | more details.}
13 |
14 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Name-value pairs.
15 | The name gives the name of the column in the output.
16 |
17 | The value can be:
18 | \itemize{
19 | \item A vector of length 1, which will be recycled to the correct length.
20 | \item A vector the same length as the current group (or the whole data frame
21 | if ungrouped).
22 | \item \code{NULL}, to remove the column.
23 | \item A data frame or tibble, to create multiple columns in the output.
24 | }}
25 |
26 | \item{.messages}{a set of glue specs. The glue code can use any global
27 | variable, grouping variable, \{.new_cols\} or \{.dropped_cols\} for changes to
28 | columns, \{.cols\} for the output column names, or \{.strata\}. Defaults to nothing.}
29 |
30 | \item{.headline}{a headline glue spec. The glue code can use any global
31 | variable, grouping variable, \{.new_cols\}, \{.dropped_cols\}, \{.cols\} or \{.strata\}.
32 | Defaults to nothing.}
33 |
34 | \item{.tag}{if you want the summary data from this step in the future then
35 | give it a name with .tag.}
36 | }
37 | \value{
38 | the \code{.data} dataframe after being modified by the \code{dplyr} equivalent
39 | function, but with the history graph updated with a new stage if the
40 | \code{.messages} or \code{.headline} parameter is not empty.
41 | }
42 | \description{
43 | See \code{\link[dplyr:mutate]{dplyr::mutate()}}, \code{\link[dplyr:count]{dplyr::add_count()}}, \code{\link[dplyr:count]{dplyr::add_tally()}},
44 | \code{\link[dplyr:transmute]{dplyr::transmute()}}, \code{\link[dplyr:select]{dplyr::select()}}, \code{\link[dplyr:relocate]{dplyr::relocate()}},
45 | \code{\link[dplyr:rename]{dplyr::rename()}} \code{\link[dplyr:rename]{dplyr::rename_with()}}, \code{\link[dplyr:arrange]{dplyr::arrange()}} for more details
46 | on underlying functions. \code{dtrackr} provides equivalent functions for
47 | mutating, selecting and renaming a data set which act in the same way as
48 | \code{dplyr}. \code{mutate} / \code{select} / \code{rename} generally don't add anything in terms
49 | of provenance of data so the default behaviour is to miss these out of the
50 | \code{dtrackr} history. This can be overridden with the \code{.messages}, or
51 | \code{.headline} values in which case they behave just like a \code{comment()}.
52 | }
53 | \examples{
54 | library(dplyr)
55 | library(dtrackr)
56 |
57 | # mutate and other functions are unitary operations that generally change
58 | # the structure but not size of a dataframe. In dtrackr these are by ignored
59 | # by default but we can change that so that their behaviour is obvious.
60 |
61 | # select
62 | # The output of the select verb (here using tidyselect syntax) can be captured
63 | # and here all column names are being reported with the .cols variable.
64 | iris \%>\%
65 | track() \%>\%
66 | group_by(Species) \%>\%
67 | select(
68 | tidyselect::starts_with("Sepal"),
69 | .messages="{.cols}",
70 | .headline="Output columns from select:") \%>\%
71 | history()
72 | }
73 | \seealso{
74 | dplyr::select()
75 | }
76 |
--------------------------------------------------------------------------------
/man/p_set.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{p_set}
4 | \alias{p_set}
5 | \title{Set the dtrackr history graph}
6 | \usage{
7 | p_set(.data, .graph)
8 | }
9 | \arguments{
10 | \item{.data}{a dataframe which may be grouped}
11 |
12 | \item{.graph}{a history graph list (consisting of nodes, edges, and head) see examples}
13 | }
14 | \value{
15 | the .data dataframe with the history graph metadata set to the provided value
16 | }
17 | \description{
18 | This is unlikely to be useful to an end user and is called automatically by many of the other
19 | functions here. On the off chance you need to copy history metadata from one dataframe to another
20 | }
21 | \examples{
22 | library(dplyr)
23 | library(dtrackr)
24 | mtcars \%>\% p_set(iris \%>\% comment("A comment") \%>\% p_get()) \%>\% history()
25 | }
26 |
--------------------------------------------------------------------------------
/man/p_setdiff.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{p_setdiff}
4 | \alias{p_setdiff}
5 | \title{Set operations}
6 | \usage{
7 | p_setdiff(
8 | x,
9 | y,
10 | ...,
11 | .messages = "{.count.out} items in difference",
12 | .headline = "Difference"
13 | )
14 | }
15 | \arguments{
16 | \item{x, y}{Vectors to combine.}
17 |
18 | \item{...}{a collection of tracked data frames to combine}
19 |
20 | \item{.messages}{a set of glue specs. The glue code can use any global
21 | variable, or \{.count.out\}}
22 |
23 | \item{.headline}{a glue spec. The glue code can use any global variable, or
24 | \{.count.out\}}
25 | }
26 | \value{
27 | the dplyr output with the history graph updated.
28 | }
29 | \description{
30 | These perform set operations on tracked dataframes. It merges the history
31 | of 2 (or more) dataframes and combines the rows (or columns). It calculates the total number of
32 | resulting rows as \{.count.out\} in other terms it performs exactly the same
33 | operation as the equivalent \code{dplyr} operation. See \code{\link[dplyr:bind_rows]{dplyr::bind_rows()}},
34 | \code{\link[dplyr:bind_cols]{dplyr::bind_cols()}}, \code{\link[dplyr:setops]{dplyr::intersect()}}, \code{\link[dplyr:setops]{dplyr::union()}},
35 | \code{\link[dplyr:setops]{dplyr::setdiff()}},\code{\link[dplyr:setops]{dplyr::intersect()}}, or \code{\link[dplyr:setops]{dplyr::union_all()}} for the
36 | underlying function details.
37 | }
38 | \examples{
39 | library(dplyr)
40 | library(dtrackr)
41 |
42 | # Set operations
43 | people = starwars \%>\% select(-films, -vehicles, -starships)
44 | chrs = people \%>\% track("start")
45 |
46 | lhs = chrs \%>\% include_any(
47 | species == "Human" ~ "{.included} humans",
48 | species == "Droid" ~ "{.included} droids"
49 | )
50 |
51 | # these are different subsets of the same data
52 | rhs = chrs \%>\% include_any(
53 | species == "Human" ~ "{.included} humans",
54 | species == "Gungan" ~ "{.included} gungans"
55 | ) \%>\% comment("{.count} gungans & humans")
56 |
57 |
58 | # Unions
59 | set = bind_rows(lhs,rhs) \%>\% comment("{.count} 2*human,droids and gungans")
60 | # display the history of the result:
61 | set \%>\% history()
62 | nrow(set)
63 | # not run - display the flowchart:
64 | # set \%>\% flowchart()
65 |
66 | set = union(lhs,rhs) \%>\% comment("{.count} human,droids and gungans")
67 | # display the history of the result:
68 | set \%>\% history()
69 | nrow(set)
70 | # not run - display the flowchart:
71 | # set \%>\% flowchart()
72 |
73 | set = union_all(lhs,rhs) \%>\% comment("{.count} 2*human,droids and gungans")
74 | # display the history of the result:
75 | set \%>\% history()
76 | nrow(set)
77 | # not run - display the flowchart:
78 | # set \%>\% flowchart()
79 |
80 | # Intersections and differences
81 |
82 | set = setdiff(lhs,rhs) \%>\% comment("{.count} droids and gungans")
83 | # display the history of the result:
84 | set \%>\% history()
85 | nrow(set)
86 | # not run - display the flowchart:
87 | # set \%>\% flowchart()
88 |
89 | set = intersect(lhs,rhs) \%>\% comment("{.count} humans")
90 | # display the history of the result:
91 | set \%>\% history()
92 | nrow(set)
93 | # not run - display the flowchart:
94 | # set \%>\% flowchart()
95 | }
96 | \seealso{
97 | dplyr::setdiff()
98 | }
99 |
--------------------------------------------------------------------------------
/man/p_status.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{p_status}
4 | \alias{p_status}
5 | \title{Add a summary to the dtrackr history graph}
6 | \usage{
7 | p_status(
8 | .data,
9 | ...,
10 | .messages = .defaultMessage(),
11 | .headline = .defaultHeadline(),
12 | .type = "info",
13 | .asOffshoot = FALSE,
14 | .tag = NULL
15 | )
16 | }
17 | \arguments{
18 | \item{.data}{a dataframe which may be grouped}
19 |
20 | \item{...}{any normal dplyr::summarise specification, e.g. \code{count=n()} or
21 | \code{av=mean(x)}, etcetera.}
22 |
23 | \item{.messages}{a character vector of glue specifications. A glue
24 | specification can refer to the summary outputs, any grouping variables of
25 | .data, the \{.strata\}, or any variables defined in the calling environment}
26 |
27 | \item{.headline}{a glue specification which can refer to grouping variables
28 | of .data, or any variables defined in the calling environment}
29 |
30 | \item{.type}{one of "info","exclusion": used to define formatting}
31 |
32 | \item{.asOffshoot}{do you want this comment to be an offshoot of the main
33 | flow (default = FALSE).}
34 |
35 | \item{.tag}{if you want the summary data from this step in the future then
36 | give it a name with .tag.}
37 | }
38 | \value{
39 | the same .data dataframe with the history metadata updated with the
40 | status inserted as a new stage
41 | }
42 | \description{
43 | In the middle of a pipeline you may wish to document something about the data
44 | that is more complex than the simple counts. \code{status} is essentially a
45 | \code{dplyr} summarisation step which is connected to a \code{glue} specification
46 | output, that is recorded in the data frame history. This means you can do an
47 | arbitrary interim summarisation and put the result into the flowchart without
48 | disrupting the pipeline flow.
49 | }
50 | \details{
51 | Because of the ... summary specification parameters MUST BE NAMED.
52 | }
53 | \examples{
54 | library(dplyr)
55 | library(dtrackr)
56 | tmp = iris \%>\% track() \%>\% group_by(Species)
57 | tmp \%>\% status(
58 | long = p_count_if(Petal.Length>5),
59 | short = p_count_if(Petal.Length<2),
60 | .messages="{Species}: {long} long ones & {short} short ones"
61 | ) \%>\% history()
62 | }
63 |
--------------------------------------------------------------------------------
/man/p_tagged.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{p_tagged}
4 | \alias{p_tagged}
5 | \title{Retrieve tagged data in the history graph}
6 | \usage{
7 | p_tagged(.data, .tag = NULL, .strata = NULL, .glue = NULL, ...)
8 | }
9 | \arguments{
10 | \item{.data}{the tracked dataframe.}
11 |
12 | \item{.tag}{(optional) the tag to retrieve.}
13 |
14 | \item{.strata}{(optional) filter the tagged data by the strata. set to "" to filter just the top level ungrouped data.}
15 |
16 | \item{.glue}{(optional) a glue specification which will be applied to the tagged content to generate a \code{.label} for the tagged content.}
17 |
18 | \item{...}{(optional) any other named parameters will be passed to \code{glue::glue} and can be used to generate a label.}
19 | }
20 | \value{
21 | various things depending on what is requested.
22 |
23 | By default a tibble with a \code{.tag} column and all associated summary values in a nested \code{.content} column.
24 |
25 | If a \code{.strata} column is specified the results are filtered to just those that match a given \code{.strata} grouping (i.e. this will be the grouping label on the flowchart). Ungrouped content will have an empty "" as \code{.strata}
26 |
27 | If \code{.tag} is specified the result will be for a single tag and \code{.content} will be automatically un-nested to give a single un-nested dataframe of the content captured at the \code{.tag} tagged step.
28 | This could be single or multiple rows depending on whether the original data was grouped at the point of tagging.
29 |
30 | If both the \code{.tag} and \code{.glue} is specified a \code{.label} column will be computed from \code{.glue} and the tagged content. If the result of this is a single row then just the string value of \code{.label} is returned.
31 |
32 | If just the \code{.glue} is specified, an un-nested dataframe with \code{.tag},\code{.strata} and \code{.label} columns with a label for each tag in each strata.
33 |
34 | If this seems complex then the best thing is to experiment until you get the output you want, leaving any \code{.glue} options until you think you know what you are doing. It made sense at the time.
35 | }
36 | \description{
37 | Any counts at the individual stages that was stored with a \code{.tag} option in a pipeline step can be recovered here. The idea here is to provide a quick way to access a single value
38 | for the counts or other details tagged in a pipeline into a format that can be reported in text of a document. (e.g. for a results section). For more examples the consort statement vignette
39 | has some examples of use.
40 | }
41 | \examples{
42 | library(dplyr)
43 | library(dtrackr)
44 | tmp = iris \%>\% track() \%>\% comment(.tag = "step1")
45 | tmp = tmp \%>\% filter(Species!="versicolor") \%>\% group_by(Species)
46 | tmp \%>\% comment(.tag="step2") \%>\% tagged(.glue = "{.count}/{.total}")
47 | }
48 |
--------------------------------------------------------------------------------
/man/p_track.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{p_track}
4 | \alias{p_track}
5 | \title{Start tracking the dtrackr history graph}
6 | \usage{
7 | p_track(
8 | .data,
9 | .messages = .defaultMessage(),
10 | .headline = .defaultHeadline(),
11 | .tag = NULL
12 | )
13 | }
14 | \arguments{
15 | \item{.data}{a dataframe which may be grouped}
16 |
17 | \item{.messages}{a character vector of glue specifications. A glue
18 | specification can refer to any grouping variables of .data, or any
19 | variables defined in the calling environment, the \{.total\} variable which
20 | is the count of all rows, the \{.count\} variable which is the count of
21 | rows in the current group and the \{.strata\} which describes the current
22 | group. Defaults to the value of \code{getOption("dtrackr.default_message")}.}
23 |
24 | \item{.headline}{a glue specification which can refer to grouping variables
25 | of .data, or any variables defined in the calling environment, or the
26 | \{.total\} variable which is \code{nrow(.data)}, or \{.strata\} a summary of the
27 | current group. Defaults to the value of \code{getOption("dtrackr.default_headline")}.}
28 |
29 | \item{.tag}{if you want the summary data from this step in the future then
30 | give it a name with .tag.}
31 | }
32 | \value{
33 | the .data dataframe with additional history graph metadata, to allow
34 | tracking.
35 | }
36 | \description{
37 | Start tracking the dtrackr history graph
38 | }
39 | \examples{
40 | library(dplyr)
41 | library(dtrackr)
42 | iris \%>\% track() \%>\% history()
43 | }
44 |
--------------------------------------------------------------------------------
/man/p_ungroup.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{p_ungroup}
4 | \alias{p_ungroup}
5 | \title{Remove a stratification from a data set}
6 | \usage{
7 | p_ungroup(
8 | x,
9 | ...,
10 | .messages = .defaultMessage(),
11 | .headline = .defaultHeadline(),
12 | .tag = NULL
13 | )
14 | }
15 | \arguments{
16 | \item{x}{A \code{\link[dplyr:tbl]{tbl()}}}
17 |
18 | \item{...}{variables to remove from the grouping.}
19 |
20 | \item{.messages}{a set of glue specs. The glue code can use any any global
21 | variable, or \{.count\}. the default is "total \{.count\} items"}
22 |
23 | \item{.headline}{a headline glue spec. The glue code can use \{.count\} and
24 | \{.strata\}.}
25 |
26 | \item{.tag}{if you want the summary data from this step in the future then
27 | give it a name with .tag.}
28 | }
29 | \value{
30 | the .data dataframe but ungrouped with the history graph
31 | updated showing the ungroup operation as a new stage.
32 | }
33 | \description{
34 | Un-grouping a data set logically combines the different arms. In the history
35 | this joins any stratified branches and acts as a specific type of \code{\link[=status]{status()}},
36 | allowing you to generate some summary statistics about the un-grouped data.
37 | See \code{\link[dplyr:group_by]{dplyr::ungroup()}}.
38 | }
39 | \examples{
40 | library(dplyr)
41 | library(dtrackr)
42 |
43 | tmp = iris \%>\% group_by(Species) \%>\% comment("A test")
44 | tmp \%>\% ungroup(.messages="{.count} items in combined") \%>\% history()
45 | }
46 | \seealso{
47 | dplyr::ungroup()
48 | }
49 |
--------------------------------------------------------------------------------
/man/p_union.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{p_union}
4 | \alias{p_union}
5 | \title{Set operations}
6 | \usage{
7 | p_union(
8 | x,
9 | y,
10 | ...,
11 | .messages = "{.count.out} unique items in union",
12 | .headline = "Distinct union"
13 | )
14 | }
15 | \arguments{
16 | \item{x, y}{Vectors to combine.}
17 |
18 | \item{...}{a collection of tracked data frames to combine}
19 |
20 | \item{.messages}{a set of glue specs. The glue code can use any global
21 | variable, or \{.count.out\}}
22 |
23 | \item{.headline}{a glue spec. The glue code can use any global variable, or
24 | \{.count.out\}}
25 | }
26 | \value{
27 | the dplyr output with the history graph updated.
28 | }
29 | \description{
30 | These perform set operations on tracked dataframes. It merges the history
31 | of 2 (or more) dataframes and combines the rows (or columns). It calculates the total number of
32 | resulting rows as \{.count.out\} in other terms it performs exactly the same
33 | operation as the equivalent \code{dplyr} operation. See \code{\link[dplyr:bind_rows]{dplyr::bind_rows()}},
34 | \code{\link[dplyr:bind_cols]{dplyr::bind_cols()}}, \code{\link[dplyr:setops]{dplyr::intersect()}}, \code{\link[dplyr:setops]{dplyr::union()}},
35 | \code{\link[dplyr:setops]{dplyr::setdiff()}},\code{\link[dplyr:setops]{dplyr::intersect()}}, or \code{\link[dplyr:setops]{dplyr::union_all()}} for the
36 | underlying function details.
37 | }
38 | \examples{
39 | library(dplyr)
40 | library(dtrackr)
41 |
42 | # Set operations
43 | people = starwars \%>\% select(-films, -vehicles, -starships)
44 | chrs = people \%>\% track("start")
45 |
46 | lhs = chrs \%>\% include_any(
47 | species == "Human" ~ "{.included} humans",
48 | species == "Droid" ~ "{.included} droids"
49 | )
50 |
51 | # these are different subsets of the same data
52 | rhs = chrs \%>\% include_any(
53 | species == "Human" ~ "{.included} humans",
54 | species == "Gungan" ~ "{.included} gungans"
55 | ) \%>\% comment("{.count} gungans & humans")
56 |
57 |
58 | # Unions
59 | set = bind_rows(lhs,rhs) \%>\% comment("{.count} 2*human,droids and gungans")
60 | # display the history of the result:
61 | set \%>\% history()
62 | nrow(set)
63 | # not run - display the flowchart:
64 | # set \%>\% flowchart()
65 |
66 | set = union(lhs,rhs) \%>\% comment("{.count} human,droids and gungans")
67 | # display the history of the result:
68 | set \%>\% history()
69 | nrow(set)
70 | # not run - display the flowchart:
71 | # set \%>\% flowchart()
72 |
73 | set = union_all(lhs,rhs) \%>\% comment("{.count} 2*human,droids and gungans")
74 | # display the history of the result:
75 | set \%>\% history()
76 | nrow(set)
77 | # not run - display the flowchart:
78 | # set \%>\% flowchart()
79 |
80 | # Intersections and differences
81 |
82 | set = setdiff(lhs,rhs) \%>\% comment("{.count} droids and gungans")
83 | # display the history of the result:
84 | set \%>\% history()
85 | nrow(set)
86 | # not run - display the flowchart:
87 | # set \%>\% flowchart()
88 |
89 | set = intersect(lhs,rhs) \%>\% comment("{.count} humans")
90 | # display the history of the result:
91 | set \%>\% history()
92 | nrow(set)
93 | # not run - display the flowchart:
94 | # set \%>\% flowchart()
95 | }
96 | \seealso{
97 | generics::union()
98 | }
99 |
--------------------------------------------------------------------------------
/man/p_untrack.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{p_untrack}
4 | \alias{p_untrack}
5 | \title{Remove tracking from the dataframe}
6 | \usage{
7 | p_untrack(.data)
8 | }
9 | \arguments{
10 | \item{.data}{a tracked dataframe}
11 | }
12 | \value{
13 | the .data dataframe with history graph metadata removed.
14 | }
15 | \description{
16 | Remove tracking from the dataframe
17 | }
18 | \examples{
19 | library(dplyr)
20 | library(dtrackr)
21 | iris \%>\% track() \%>\% untrack() \%>\% class()
22 | }
23 |
--------------------------------------------------------------------------------
/man/pause.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{pause}
4 | \alias{pause}
5 | \title{Pause tracking the data frame.}
6 | \usage{
7 | pause(.data, auto = FALSE)
8 | }
9 | \arguments{
10 | \item{.data}{a tracked dataframe}
11 |
12 | \item{auto}{if \code{TRUE} the tracking will resume automatically when the
13 | number of groups has fallen to a sensible level (default is \code{FALSE})?}
14 | }
15 | \value{
16 | the .data dataframe with history graph tracking paused
17 | }
18 | \description{
19 | Pausing tracking of a data frame may be required if an operation is about to
20 | be performed that creates a lot of groupings or that you otherwise don't
21 | want to pollute the history graph (e.g. maybe selecting something using
22 | an anti-join). Once paused the history is not updated until a \code{resume()} is
23 | called, or when the data frame is ungrouped (if \code{auto} is enabled).
24 | }
25 | \examples{
26 | iris \%>\% track() \%>\% pause() \%>\% history()
27 | }
28 |
--------------------------------------------------------------------------------
/man/pipe.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/utils-pipe.R
3 | \name{\%>\%}
4 | \alias{\%>\%}
5 | \title{Pipe operator}
6 | \usage{
7 | lhs \%>\% rhs
8 | }
9 | \arguments{
10 | \item{lhs}{A value or the magrittr placeholder.}
11 |
12 | \item{rhs}{A function call using the magrittr semantics.}
13 | }
14 | \value{
15 | The result of calling \code{rhs(lhs)}.
16 | }
17 | \description{
18 | See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
19 | }
20 | \keyword{internal}
21 |
--------------------------------------------------------------------------------
/man/plot.trackr_graph.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{plot.trackr_graph}
4 | \alias{plot.trackr_graph}
5 | \title{Plots a history graph as html}
6 | \usage{
7 | \method{plot}{trackr_graph}(x, fill = "lightgrey", fontsize = "8", colour = "black", ...)
8 | }
9 | \arguments{
10 | \item{x}{a dtrackr history graph (e.g. output from \code{\link[=history]{history()}})}
11 |
12 | \item{fill}{the default node fill colour}
13 |
14 | \item{fontsize}{the default font size}
15 |
16 | \item{colour}{the default font colour}
17 |
18 | \item{...}{not used}
19 | }
20 | \value{
21 | HTML displayed
22 | }
23 | \description{
24 | Plots a history graph as html
25 | }
26 | \examples{
27 | library(dplyr)
28 | library(dtrackr)
29 | iris \%>\% comment("hello {.total} rows") \%>\% history() \%>\% plot()
30 | }
31 |
--------------------------------------------------------------------------------
/man/print.trackr_graph.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{print.trackr_graph}
4 | \alias{print.trackr_graph}
5 | \title{Print a history graph to the console}
6 | \usage{
7 | \method{print}{trackr_graph}(x, ...)
8 | }
9 | \arguments{
10 | \item{x}{a dtrackr history graph (e.g. output from \code{\link[=p_get]{p_get()}})}
11 |
12 | \item{...}{not used}
13 | }
14 | \value{
15 | nothing
16 | }
17 | \description{
18 | Print a history graph to the console
19 | }
20 | \examples{
21 | library(dplyr)
22 | library(dtrackr)
23 | iris \%>\% comment("hello {.total} rows") \%>\% history() \%>\% print()
24 | }
25 |
--------------------------------------------------------------------------------
/man/reexports.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \docType{import}
4 | \name{reexports}
5 | \alias{reexports}
6 | \alias{filter}
7 | \title{Objects exported from other packages}
8 | \keyword{internal}
9 | \description{
10 | These objects are imported from other packages. Follow the links
11 | below to see their documentation.
12 |
13 | \describe{
14 | \item{dplyr}{\code{\link[dplyr]{filter}}}
15 | }}
16 |
17 |
--------------------------------------------------------------------------------
/man/reframe.trackr_df.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{reframe.trackr_df}
4 | \alias{reframe.trackr_df}
5 | \title{Summarise a data set}
6 | \usage{
7 | \method{reframe}{trackr_df}(.data, ..., .messages = "", .headline = "", .tag = NULL)
8 | }
9 | \arguments{
10 | \item{.data}{A data frame, data frame extension (e.g. a tibble), or a
11 | lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
12 | more details.}
13 |
14 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Name-value pairs of
15 | summary functions. The name will be the name of the variable in the result.
16 |
17 | The value can be:
18 | \itemize{
19 | \item A vector of length 1, e.g. \code{min(x)}, \code{n()}, or \code{sum(is.na(y))}.
20 | \item A data frame, to add multiple columns from a single expression.
21 | }
22 |
23 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Returning values with size 0 or >1 was
24 | deprecated as of 1.1.0. Please use \code{\link[dplyr:reframe]{reframe()}} for this instead.}
25 |
26 | \item{.messages}{a set of glue specs. The glue code can use any summary
27 | variable defined in the ... parameter, or any global variable, or
28 | \{.strata\}}
29 |
30 | \item{.headline}{a headline glue spec. The glue code can use any summary
31 | variable defined in the ... parameter, or any global variable, or
32 | \{.strata\}}
33 |
34 | \item{.tag}{if you want the summary data from this step in the future then
35 | give it a name with .tag.}
36 | }
37 | \value{
38 | the .data dataframe summarised with the history graph updated showing
39 | the summarise operation as a new stage
40 | }
41 | \description{
42 | Summarising a data set acts in the normal \code{dplyr} manner to collapse groups
43 | to individual rows. Any columns resulting from the summary can be added to
44 | the history graph. In the history this also joins any stratified branches and
45 | allows you to generate some summary statistics about the un-grouped data. See
46 | \code{\link[dplyr:summarise]{dplyr::summarise()}}.
47 | }
48 | \examples{
49 | library(dplyr)
50 | library(dtrackr)
51 |
52 | tmp = iris \%>\% group_by(Species) \%>\% track()
53 | tmp \%>\% reframe(tibble(
54 | param = c("mean","min","max"),
55 | value = c(mean(Petal.Length), min(Petal.Length), max(Petal.Length))
56 | ), .messages="length {param}: {value}") \%>\% history()
57 | }
58 | \seealso{
59 | dplyr::reframe()
60 | }
61 |
--------------------------------------------------------------------------------
/man/resume.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{resume}
4 | \alias{resume}
5 | \title{Resume tracking the data frame.}
6 | \usage{
7 | resume(.data, ...)
8 | }
9 | \arguments{
10 | \item{.data}{a tracked dataframe}
11 |
12 | \item{...}{
13 | Named arguments passed on to \code{\link[=p_group_by]{p_group_by}}\describe{
14 | \item{\code{.messages}}{a set of glue specs. The glue code can use any global
15 | variable, or \{.cols\} which is the columns that are being grouped by.}
16 | \item{\code{.headline}}{a headline glue spec. The glue code can use any global
17 | variable, or \{.cols\}.}
18 | \item{\code{.tag}}{if you want the summary data from this step in the future then
19 | give it a name with .tag.}
20 | \item{\code{.maxgroups}}{the maximum number of subgroups allowed before the tracking
21 | is paused.}
22 | \item{\code{...}}{In \code{group_by()}, variables or computations to group by.
23 | Computations are always done on the ungrouped data frame.
24 | To perform computations on the grouped data, you need to use
25 | a separate \code{mutate()} step before the \code{group_by()}.
26 | Computations are not allowed in \code{nest_by()}.
27 | In \code{ungroup()}, variables to remove from the grouping.
28 | Named arguments passed on to \code{\link[dplyr:group_by]{dplyr::group_by}}\describe{
29 | \item{\code{.add}}{When \code{FALSE}, the default, \code{group_by()} will
30 | override existing groups. To add to the existing groups, use
31 | \code{.add = TRUE}.
32 |
33 | This argument was previously called \code{add}, but that prevented
34 | creating a new grouping variable called \code{add}, and conflicts with
35 | our naming conventions.}
36 | \item{\code{.drop}}{Drop groups formed by factor levels that don't appear in the
37 | data? The default is \code{TRUE} except when \code{.data} has been previously
38 | grouped with \code{.drop = FALSE}. See \code{\link[dplyr:group_by_drop_default]{group_by_drop_default()}} for details.}
39 | \item{\code{x}}{A \code{\link[dplyr:tbl]{tbl()}}}
40 | }}
41 | }}
42 | }
43 | \value{
44 | the .data data frame with history graph tracking resumed
45 | }
46 | \description{
47 | This may reset the grouping of the tracked data if the grouping structure
48 | has changed since the data frame was paused. If you try and resume tracking a
49 | data frame with too many groups (as defined by \code{options("dtrackr.max_supported_groupings"=XX)})
50 | then the resume will fail and the data frame will still be paused. This can
51 | be overridden by specifying a value for the \code{.maxgroups} parameter.
52 | }
53 | \examples{
54 | library(dplyr)
55 | library(dtrackr)
56 | iris \%>\% track() \%>\% pause() \%>\% resume() \%>\% history()
57 | }
58 |
--------------------------------------------------------------------------------
/man/save_dot.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dot.R
3 | \name{save_dot}
4 | \alias{save_dot}
5 | \title{Save DOT content to a file}
6 | \usage{
7 | save_dot(
8 | dot,
9 | filename,
10 | size = std_size$half,
11 | maxWidth = size$width,
12 | maxHeight = size$height,
13 | formats = c("dot", "png", "pdf", "svg"),
14 | landscape = size$rot != 0,
15 | ...
16 | )
17 | }
18 | \arguments{
19 | \item{dot}{a \code{graphviz} dot string}
20 |
21 | \item{filename}{the full path of the file name (minus extension for multiple
22 | formats)}
23 |
24 | \item{size}{a named list with 3 elements, length and width in inches and
25 | rotation. A predefined set of standard sizes are available in the
26 | \link{std_size} object.}
27 |
28 | \item{maxWidth}{a width (on the paper) in inches if \code{size} is not defined}
29 |
30 | \item{maxHeight}{a height (on the paper) in inches if \code{size} is not defined}
31 |
32 | \item{formats}{some of \code{pdf},\code{dot},\code{svg},\code{png},\code{ps}}
33 |
34 | \item{landscape}{rotate the output by 270 degrees into a landscape format.
35 | \code{maxWidth} and \code{maxHeight} still apply and refer to the paper width to fit
36 | the flowchart into after rotation. (you might need to flip width and height)}
37 |
38 | \item{...}{ignored}
39 | }
40 | \value{
41 | a list with items \code{paths} with the absolute paths of the saved files
42 | as a named list, and \code{svg} as the SVG string of the rendered dot file.
43 | }
44 | \description{
45 | Convert a digraph in dot format to SVG and save it to a range of output file types
46 | }
47 | \examples{
48 | save_dot("digraph {A->B}",tempfile())
49 | }
50 |
--------------------------------------------------------------------------------
/man/setdiff.trackr_df.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{setdiff.trackr_df}
4 | \alias{setdiff.trackr_df}
5 | \title{Set operations}
6 | \usage{
7 | \method{setdiff}{trackr_df}(
8 | x,
9 | y,
10 | ...,
11 | .messages = "{.count.out} items in difference",
12 | .headline = "Difference"
13 | )
14 | }
15 | \arguments{
16 | \item{x, y}{Vectors to combine.}
17 |
18 | \item{...}{a collection of tracked data frames to combine}
19 |
20 | \item{.messages}{a set of glue specs. The glue code can use any global
21 | variable, or \{.count.out\}}
22 |
23 | \item{.headline}{a glue spec. The glue code can use any global variable, or
24 | \{.count.out\}}
25 | }
26 | \value{
27 | the dplyr output with the history graph updated.
28 | }
29 | \description{
30 | These perform set operations on tracked dataframes. It merges the history
31 | of 2 (or more) dataframes and combines the rows (or columns). It calculates the total number of
32 | resulting rows as \{.count.out\} in other terms it performs exactly the same
33 | operation as the equivalent \code{dplyr} operation. See \code{\link[dplyr:bind_rows]{dplyr::bind_rows()}},
34 | \code{\link[dplyr:bind_cols]{dplyr::bind_cols()}}, \code{\link[dplyr:setops]{dplyr::intersect()}}, \code{\link[dplyr:setops]{dplyr::union()}},
35 | \code{\link[dplyr:setops]{dplyr::setdiff()}},\code{\link[dplyr:setops]{dplyr::intersect()}}, or \code{\link[dplyr:setops]{dplyr::union_all()}} for the
36 | underlying function details.
37 | }
38 | \examples{
39 | library(dplyr)
40 | library(dtrackr)
41 |
42 | # Set operations
43 | people = starwars \%>\% select(-films, -vehicles, -starships)
44 | chrs = people \%>\% track("start")
45 |
46 | lhs = chrs \%>\% include_any(
47 | species == "Human" ~ "{.included} humans",
48 | species == "Droid" ~ "{.included} droids"
49 | )
50 |
51 | # these are different subsets of the same data
52 | rhs = chrs \%>\% include_any(
53 | species == "Human" ~ "{.included} humans",
54 | species == "Gungan" ~ "{.included} gungans"
55 | ) \%>\% comment("{.count} gungans & humans")
56 |
57 |
58 | # Unions
59 | set = bind_rows(lhs,rhs) \%>\% comment("{.count} 2*human,droids and gungans")
60 | # display the history of the result:
61 | set \%>\% history()
62 | nrow(set)
63 | # not run - display the flowchart:
64 | # set \%>\% flowchart()
65 |
66 | set = union(lhs,rhs) \%>\% comment("{.count} human,droids and gungans")
67 | # display the history of the result:
68 | set \%>\% history()
69 | nrow(set)
70 | # not run - display the flowchart:
71 | # set \%>\% flowchart()
72 |
73 | set = union_all(lhs,rhs) \%>\% comment("{.count} 2*human,droids and gungans")
74 | # display the history of the result:
75 | set \%>\% history()
76 | nrow(set)
77 | # not run - display the flowchart:
78 | # set \%>\% flowchart()
79 |
80 | # Intersections and differences
81 |
82 | set = setdiff(lhs,rhs) \%>\% comment("{.count} droids and gungans")
83 | # display the history of the result:
84 | set \%>\% history()
85 | nrow(set)
86 | # not run - display the flowchart:
87 | # set \%>\% flowchart()
88 |
89 | set = intersect(lhs,rhs) \%>\% comment("{.count} humans")
90 | # display the history of the result:
91 | set \%>\% history()
92 | nrow(set)
93 | # not run - display the flowchart:
94 | # set \%>\% flowchart()
95 | }
96 | \seealso{
97 | dplyr::setdiff()
98 | }
99 |
--------------------------------------------------------------------------------
/man/status.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{status}
4 | \alias{status}
5 | \title{Add a summary to the dtrackr history graph}
6 | \usage{
7 | status(
8 | .data,
9 | ...,
10 | .messages = .defaultMessage(),
11 | .headline = .defaultHeadline(),
12 | .type = "info",
13 | .asOffshoot = FALSE,
14 | .tag = NULL
15 | )
16 | }
17 | \arguments{
18 | \item{.data}{a dataframe which may be grouped}
19 |
20 | \item{...}{any normal dplyr::summarise specification, e.g. \code{count=n()} or
21 | \code{av=mean(x)}, etcetera.}
22 |
23 | \item{.messages}{a character vector of glue specifications. A glue
24 | specification can refer to the summary outputs, any grouping variables of
25 | .data, the \{.strata\}, or any variables defined in the calling environment}
26 |
27 | \item{.headline}{a glue specification which can refer to grouping variables
28 | of .data, or any variables defined in the calling environment}
29 |
30 | \item{.type}{one of "info","exclusion": used to define formatting}
31 |
32 | \item{.asOffshoot}{do you want this comment to be an offshoot of the main
33 | flow (default = FALSE).}
34 |
35 | \item{.tag}{if you want the summary data from this step in the future then
36 | give it a name with .tag.}
37 | }
38 | \value{
39 | the same .data dataframe with the history metadata updated with the
40 | status inserted as a new stage
41 | }
42 | \description{
43 | In the middle of a pipeline you may wish to document something about the data
44 | that is more complex than the simple counts. \code{status} is essentially a
45 | \code{dplyr} summarisation step which is connected to a \code{glue} specification
46 | output, that is recorded in the data frame history. This means you can do an
47 | arbitrary interim summarisation and put the result into the flowchart without
48 | disrupting the pipeline flow.
49 | }
50 | \details{
51 | Because of the ... summary specification parameters MUST BE NAMED.
52 | }
53 | \examples{
54 | library(dplyr)
55 | library(dtrackr)
56 | tmp = iris \%>\% track() \%>\% group_by(Species)
57 | tmp \%>\% status(
58 | long = p_count_if(Petal.Length>5),
59 | short = p_count_if(Petal.Length<2),
60 | .messages="{Species}: {long} long ones & {short} short ones"
61 | ) \%>\% history()
62 | }
63 |
--------------------------------------------------------------------------------
/man/std_size.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dot.R
3 | \docType{data}
4 | \name{std_size}
5 | \alias{std_size}
6 | \title{Standard paper sizes}
7 | \format{
8 | An object of class \code{list} of length 12.
9 | }
10 | \usage{
11 | std_size
12 | }
13 | \description{
14 | A list of standard paper sizes for outputting flowcharts or other dot
15 | graphs. These include width and height dimensions in inches and can be
16 | used as one way to specify the output size of a dot graph, including
17 | flowcharts (see the \code{size} parameter of \code{\link[=flowchart]{flowchart()}}).
18 | }
19 | \details{
20 | The sizes available are \code{A4}, \code{A5}, \code{full} (fits a portrait A4 with margins), \code{half} (half an
21 | A4 with margins), \code{third}, \code{two_third}, \code{quarter}, \code{sixth} (all with reference to
22 | an A4 page with margins). There are 2 landscape sizes \code{A4_landscape} and \code{full_landscape} which
23 | fit an A4 page with or without margins. There are also 2 slide dimensions,
24 | to fit with standard presentation software dimensions.
25 |
26 | This is just a convenience. Similar effects can be achieved by providing \code{width} and \code{height}
27 | parameters to \code{\link[=flowchart]{flowchart()}} directly.
28 | }
29 | \keyword{datasets}
30 |
--------------------------------------------------------------------------------
/man/summarise.trackr_df.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{summarise.trackr_df}
4 | \alias{summarise.trackr_df}
5 | \title{Summarise a data set}
6 | \usage{
7 | \method{summarise}{trackr_df}(.data, ..., .messages = "", .headline = "", .tag = NULL)
8 | }
9 | \arguments{
10 | \item{.data}{A data frame, data frame extension (e.g. a tibble), or a
11 | lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
12 | more details.}
13 |
14 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Name-value pairs of
15 | summary functions. The name will be the name of the variable in the result.
16 |
17 | The value can be:
18 | \itemize{
19 | \item A vector of length 1, e.g. \code{min(x)}, \code{n()}, or \code{sum(is.na(y))}.
20 | \item A data frame, to add multiple columns from a single expression.
21 | }
22 |
23 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Returning values with size 0 or >1 was
24 | deprecated as of 1.1.0. Please use \code{\link[dplyr:reframe]{reframe()}} for this instead.}
25 |
26 | \item{.messages}{a set of glue specs. The glue code can use any summary
27 | variable defined in the ... parameter, or any global variable, or
28 | \{.strata\}}
29 |
30 | \item{.headline}{a headline glue spec. The glue code can use any summary
31 | variable defined in the ... parameter, or any global variable, or
32 | \{.strata\}}
33 |
34 | \item{.tag}{if you want the summary data from this step in the future then
35 | give it a name with .tag.}
36 | }
37 | \value{
38 | the .data dataframe summarised with the history graph updated showing
39 | the summarise operation as a new stage
40 | }
41 | \description{
42 | Summarising a data set acts in the normal \code{dplyr} manner to collapse groups
43 | to individual rows. Any columns resulting from the summary can be added to
44 | the history graph. In the history this also joins any stratified branches and
45 | allows you to generate some summary statistics about the un-grouped data. See
46 | \code{\link[dplyr:summarise]{dplyr::summarise()}}.
47 | }
48 | \examples{
49 | library(dplyr)
50 | library(dtrackr)
51 |
52 | tmp = iris \%>\% group_by(Species) \%>\% track()
53 | tmp \%>\% summarise(avg = mean(Petal.Length), .messages="{avg} length") \%>\% history()
54 | }
55 | \seealso{
56 | dplyr::summarise()
57 | }
58 |
--------------------------------------------------------------------------------
/man/tagged.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{tagged}
4 | \alias{tagged}
5 | \title{Retrieve tagged data in the history graph}
6 | \usage{
7 | tagged(.data, .tag = NULL, .strata = NULL, .glue = NULL, ...)
8 | }
9 | \arguments{
10 | \item{.data}{the tracked dataframe.}
11 |
12 | \item{.tag}{(optional) the tag to retrieve.}
13 |
14 | \item{.strata}{(optional) filter the tagged data by the strata. set to "" to filter just the top level ungrouped data.}
15 |
16 | \item{.glue}{(optional) a glue specification which will be applied to the tagged content to generate a \code{.label} for the tagged content.}
17 |
18 | \item{...}{(optional) any other named parameters will be passed to \code{glue::glue} and can be used to generate a label.}
19 | }
20 | \value{
21 | various things depending on what is requested.
22 |
23 | By default a tibble with a \code{.tag} column and all associated summary values in a nested \code{.content} column.
24 |
25 | If a \code{.strata} column is specified the results are filtered to just those that match a given \code{.strata} grouping (i.e. this will be the grouping label on the flowchart). Ungrouped content will have an empty "" as \code{.strata}
26 |
27 | If \code{.tag} is specified the result will be for a single tag and \code{.content} will be automatically un-nested to give a single un-nested dataframe of the content captured at the \code{.tag} tagged step.
28 | This could be single or multiple rows depending on whether the original data was grouped at the point of tagging.
29 |
30 | If both the \code{.tag} and \code{.glue} is specified a \code{.label} column will be computed from \code{.glue} and the tagged content. If the result of this is a single row then just the string value of \code{.label} is returned.
31 |
32 | If just the \code{.glue} is specified, an un-nested dataframe with \code{.tag},\code{.strata} and \code{.label} columns with a label for each tag in each strata.
33 |
34 | If this seems complex then the best thing is to experiment until you get the output you want, leaving any \code{.glue} options until you think you know what you are doing. It made sense at the time.
35 | }
36 | \description{
37 | Any counts at the individual stages that was stored with a \code{.tag} option in a pipeline step can be recovered here. The idea here is to provide a quick way to access a single value
38 | for the counts or other details tagged in a pipeline into a format that can be reported in text of a document. (e.g. for a results section). For more examples the consort statement vignette
39 | has some examples of use.
40 | }
41 | \examples{
42 | library(dplyr)
43 | library(dtrackr)
44 | tmp = iris \%>\% track() \%>\% comment(.tag = "step1")
45 | tmp = tmp \%>\% filter(Species!="versicolor") \%>\% group_by(Species)
46 | tmp \%>\% comment(.tag="step2") \%>\% tagged(.glue = "{.count}/{.total}")
47 | }
48 |
--------------------------------------------------------------------------------
/man/track.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{track}
4 | \alias{track}
5 | \title{Start tracking the dtrackr history graph}
6 | \usage{
7 | track(
8 | .data,
9 | .messages = .defaultMessage(),
10 | .headline = .defaultHeadline(),
11 | .tag = NULL
12 | )
13 | }
14 | \arguments{
15 | \item{.data}{a dataframe which may be grouped}
16 |
17 | \item{.messages}{a character vector of glue specifications. A glue
18 | specification can refer to any grouping variables of .data, or any
19 | variables defined in the calling environment, the \{.total\} variable which
20 | is the count of all rows, the \{.count\} variable which is the count of
21 | rows in the current group and the \{.strata\} which describes the current
22 | group. Defaults to the value of \code{getOption("dtrackr.default_message")}.}
23 |
24 | \item{.headline}{a glue specification which can refer to grouping variables
25 | of .data, or any variables defined in the calling environment, or the
26 | \{.total\} variable which is \code{nrow(.data)}, or \{.strata\} a summary of the
27 | current group. Defaults to the value of \code{getOption("dtrackr.default_headline")}.}
28 |
29 | \item{.tag}{if you want the summary data from this step in the future then
30 | give it a name with .tag.}
31 | }
32 | \value{
33 | the .data dataframe with additional history graph metadata, to allow
34 | tracking.
35 | }
36 | \description{
37 | Start tracking the dtrackr history graph
38 | }
39 | \examples{
40 | library(dplyr)
41 | library(dtrackr)
42 | iris \%>\% track() \%>\% history()
43 | }
44 |
--------------------------------------------------------------------------------
/man/ungroup.trackr_df.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{ungroup.trackr_df}
4 | \alias{ungroup.trackr_df}
5 | \title{Remove a stratification from a data set}
6 | \usage{
7 | \method{ungroup}{trackr_df}(
8 | x,
9 | ...,
10 | .messages = .defaultMessage(),
11 | .headline = .defaultHeadline(),
12 | .tag = NULL
13 | )
14 | }
15 | \arguments{
16 | \item{x}{A \code{\link[dplyr:tbl]{tbl()}}}
17 |
18 | \item{...}{variables to remove from the grouping.}
19 |
20 | \item{.messages}{a set of glue specs. The glue code can use any any global
21 | variable, or \{.count\}. the default is "total \{.count\} items"}
22 |
23 | \item{.headline}{a headline glue spec. The glue code can use \{.count\} and
24 | \{.strata\}.}
25 |
26 | \item{.tag}{if you want the summary data from this step in the future then
27 | give it a name with .tag.}
28 | }
29 | \value{
30 | the .data dataframe but ungrouped with the history graph
31 | updated showing the ungroup operation as a new stage.
32 | }
33 | \description{
34 | Un-grouping a data set logically combines the different arms. In the history
35 | this joins any stratified branches and acts as a specific type of \code{\link[=status]{status()}},
36 | allowing you to generate some summary statistics about the un-grouped data.
37 | See \code{\link[dplyr:group_by]{dplyr::ungroup()}}.
38 | }
39 | \examples{
40 | library(dplyr)
41 | library(dtrackr)
42 |
43 | tmp = iris \%>\% group_by(Species) \%>\% comment("A test")
44 | tmp \%>\% ungroup(.messages="{.count} items in combined") \%>\% history()
45 | }
46 | \seealso{
47 | dplyr::ungroup()
48 | }
49 |
--------------------------------------------------------------------------------
/man/union.trackr_df.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{union.trackr_df}
4 | \alias{union.trackr_df}
5 | \title{Set operations}
6 | \usage{
7 | \method{union}{trackr_df}(
8 | x,
9 | y,
10 | ...,
11 | .messages = "{.count.out} unique items in union",
12 | .headline = "Distinct union"
13 | )
14 | }
15 | \arguments{
16 | \item{x, y}{Vectors to combine.}
17 |
18 | \item{...}{a collection of tracked data frames to combine}
19 |
20 | \item{.messages}{a set of glue specs. The glue code can use any global
21 | variable, or \{.count.out\}}
22 |
23 | \item{.headline}{a glue spec. The glue code can use any global variable, or
24 | \{.count.out\}}
25 | }
26 | \value{
27 | the dplyr output with the history graph updated.
28 | }
29 | \description{
30 | These perform set operations on tracked dataframes. It merges the history
31 | of 2 (or more) dataframes and combines the rows (or columns). It calculates the total number of
32 | resulting rows as \{.count.out\} in other terms it performs exactly the same
33 | operation as the equivalent \code{dplyr} operation. See \code{\link[dplyr:bind_rows]{dplyr::bind_rows()}},
34 | \code{\link[dplyr:bind_cols]{dplyr::bind_cols()}}, \code{\link[dplyr:setops]{dplyr::intersect()}}, \code{\link[dplyr:setops]{dplyr::union()}},
35 | \code{\link[dplyr:setops]{dplyr::setdiff()}},\code{\link[dplyr:setops]{dplyr::intersect()}}, or \code{\link[dplyr:setops]{dplyr::union_all()}} for the
36 | underlying function details.
37 | }
38 | \examples{
39 | library(dplyr)
40 | library(dtrackr)
41 |
42 | # Set operations
43 | people = starwars \%>\% select(-films, -vehicles, -starships)
44 | chrs = people \%>\% track("start")
45 |
46 | lhs = chrs \%>\% include_any(
47 | species == "Human" ~ "{.included} humans",
48 | species == "Droid" ~ "{.included} droids"
49 | )
50 |
51 | # these are different subsets of the same data
52 | rhs = chrs \%>\% include_any(
53 | species == "Human" ~ "{.included} humans",
54 | species == "Gungan" ~ "{.included} gungans"
55 | ) \%>\% comment("{.count} gungans & humans")
56 |
57 |
58 | # Unions
59 | set = bind_rows(lhs,rhs) \%>\% comment("{.count} 2*human,droids and gungans")
60 | # display the history of the result:
61 | set \%>\% history()
62 | nrow(set)
63 | # not run - display the flowchart:
64 | # set \%>\% flowchart()
65 |
66 | set = union(lhs,rhs) \%>\% comment("{.count} human,droids and gungans")
67 | # display the history of the result:
68 | set \%>\% history()
69 | nrow(set)
70 | # not run - display the flowchart:
71 | # set \%>\% flowchart()
72 |
73 | set = union_all(lhs,rhs) \%>\% comment("{.count} 2*human,droids and gungans")
74 | # display the history of the result:
75 | set \%>\% history()
76 | nrow(set)
77 | # not run - display the flowchart:
78 | # set \%>\% flowchart()
79 |
80 | # Intersections and differences
81 |
82 | set = setdiff(lhs,rhs) \%>\% comment("{.count} droids and gungans")
83 | # display the history of the result:
84 | set \%>\% history()
85 | nrow(set)
86 | # not run - display the flowchart:
87 | # set \%>\% flowchart()
88 |
89 | set = intersect(lhs,rhs) \%>\% comment("{.count} humans")
90 | # display the history of the result:
91 | set \%>\% history()
92 | nrow(set)
93 | # not run - display the flowchart:
94 | # set \%>\% flowchart()
95 | }
96 | \seealso{
97 | generics::union()
98 | }
99 |
--------------------------------------------------------------------------------
/man/untrack.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dtrackr.R
3 | \name{untrack}
4 | \alias{untrack}
5 | \title{Remove tracking from the dataframe}
6 | \usage{
7 | untrack(.data)
8 | }
9 | \arguments{
10 | \item{.data}{a tracked dataframe}
11 | }
12 | \value{
13 | the .data dataframe with history graph metadata removed.
14 | }
15 | \description{
16 | Remove tracking from the dataframe
17 | }
18 | \examples{
19 | library(dplyr)
20 | library(dtrackr)
21 | iris \%>\% track() \%>\% untrack() \%>\% class()
22 | }
23 |
--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-120x120.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/pkgdown/favicon/apple-touch-icon-120x120.png
--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-152x152.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/pkgdown/favicon/apple-touch-icon-152x152.png
--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-180x180.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/pkgdown/favicon/apple-touch-icon-180x180.png
--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-60x60.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/pkgdown/favicon/apple-touch-icon-60x60.png
--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-76x76.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/pkgdown/favicon/apple-touch-icon-76x76.png
--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/pkgdown/favicon/apple-touch-icon.png
--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/pkgdown/favicon/favicon-16x16.png
--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/pkgdown/favicon/favicon-32x32.png
--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-48x48.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/pkgdown/favicon/favicon-48x48.png
--------------------------------------------------------------------------------
/pkgdown/favicon/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/pkgdown/favicon/favicon.ico
--------------------------------------------------------------------------------
/pkgdown/favicon/site.webmanifest:
--------------------------------------------------------------------------------
1 | {
2 | "name": "",
3 | "short_name": "",
4 | "icons": [
5 | {
6 | "src": "/web-app-manifest-192x192.png",
7 | "sizes": "192x192",
8 | "type": "image/png",
9 | "purpose": "maskable"
10 | },
11 | {
12 | "src": "/web-app-manifest-512x512.png",
13 | "sizes": "512x512",
14 | "type": "image/png",
15 | "purpose": "maskable"
16 | }
17 | ],
18 | "theme_color": "#ffffff",
19 | "background_color": "#ffffff",
20 | "display": "standalone"
21 | }
--------------------------------------------------------------------------------
/pkgdown/favicon/web-app-manifest-192x192.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/pkgdown/favicon/web-app-manifest-192x192.png
--------------------------------------------------------------------------------
/pkgdown/favicon/web-app-manifest-512x512.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/pkgdown/favicon/web-app-manifest-512x512.png
--------------------------------------------------------------------------------
/tests/spelling.R:
--------------------------------------------------------------------------------
1 | if(requireNamespace('spelling', quietly = TRUE))
2 | spelling::spell_check_test(vignettes = TRUE, error = FALSE,
3 | skip_on_cran = TRUE)
4 |
--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(dplyr)
3 | library(dtrackr)
4 |
5 | test_check("dtrackr")
6 |
7 |
--------------------------------------------------------------------------------
/tests/testthat/test-examples.R:
--------------------------------------------------------------------------------
1 | library(dplyr)
2 | library(dtrackr)
3 |
4 | # test_that("examples run to completion", {
5 | # devtools::run_examples()
6 | #
7 | # })
8 |
9 | if (identical(Sys.getenv("NOT_CRAN"), "true")) {
10 | test_examples(path = "../..")
11 | }
12 |
--------------------------------------------------------------------------------
/tests/testthat/test-github-issues.R:
--------------------------------------------------------------------------------
1 | library(dplyr)
2 | library(dtrackr)
3 |
4 | graphContains = function(g, .strata, .message) {
5 | return(g$nodes %>% filter(.strata==.strata, .label %>% stringr::str_detect(paste0("(^|>)",stringr::fixed(.message),"<"))) %>% nrow() > 0)
6 | }
7 |
8 | graphContainsExactly = function(g, .strata, .message, .count) {
9 | return(g$nodes %>% filter(.strata==.strata, .label %>% stringr::str_detect(paste0("(^|>)",stringr::fixed(.message),"<"))) %>% nrow() == .count)
10 | }
11 |
12 |
13 | test_that("Issue #25 fixed", {
14 | tibble(id = 1:20, x = rnorm(20)) %>% track() %>% inner_join(
15 | tibble(id = 1:20, y = runif(20)) %>% track()
16 | ) %>%
17 | p_get() %>%
18 | graphContains("", "Inner join by id") %>%
19 | testthat::expect_true()
20 | # pre fixing this would output "Inner join by " without `id`
21 | })
22 |
23 |
24 | test_that("Issue #26 fixed", {
25 | expected = iris %>% track() %>% group_by(Species) %>% filter(Species == "setosa") %>% untrack()
26 | # pre fixing this throws error.
27 | actual = try(iris %>% track() %>% group_by(Species) %>% include_any(Species == "setosa" ~ "{.included}") %>% untrack())
28 | actual2 = try(iris %>% track() %>% group_by(Species) %>% exclude_all(Species != "setosa" ~ "{.excluded}") %>% untrack())
29 | testthat::expect_equal(actual2,expected)
30 | testthat::expect_equal(actual,expected)
31 | })
32 |
33 |
34 | test_that("Issue #33 fixed", {
35 | # distinct was using the wrong function signature and not passing it on properly
36 | correct = mtcars %>% dplyr::distinct(carb) %>% dim()
37 | was_wrong = mtcars %>% dtrackr::track() %>% dtrackr::p_distinct(carb) %>% dim()
38 | testthat::expect_equal(correct, was_wrong)
39 | })
40 |
--------------------------------------------------------------------------------
/tests/testthat/test-p_exclude.R:
--------------------------------------------------------------------------------
1 | library(dplyr)
2 | library(dtrackr)
3 |
4 | graphContains = function(g, .strata, .message) {
5 | return(g$nodes %>% filter(.strata==.strata,.label %>% stringr::str_detect(paste0("(^|>)",stringr::fixed(.message),"<"))) %>% nrow() > 0)
6 | }
7 |
8 |
9 | test_that("exclusions works", {
10 | df = tibble(a=c(1,1,1,2,2,2), b=c(1,2,3,1,2,3), c=c(1,2,3,4,5,6)) %>% group_by(a)
11 |
12 | h = df %>% p_clear() %>% p_comment("test") %>% p_exclude_all(c%%2==0 ~ "removed {.excluded} even values") %>% p_comment("test2")
13 | g = h %>% p_get()
14 | testthat::expect_true(g %>% graphContains("a:1","removed 1 even values"))
15 | testthat::expect_true(g %>% graphContains("a:2","removed 2 even values"))
16 | testthat::expect_true(nrow(h) == 3)
17 | })
18 |
19 | test_that("exclusions works groupwise", {
20 | df = tibble(a=c(1,1,1,2,2,2), b=c(1,2,3,1,2,3), c=c(1,2,3,4,5,6)) %>% group_by(a)
21 |
22 | h = df %>% p_clear() %>% p_comment("test") %>% p_exclude_all(c==max(c) ~ "removed {.excluded} max values") %>% p_comment("test2")
23 | g = h %>% p_get()
24 | testthat::expect_true(
25 | g %>% graphContains("a:1","removed 1 max values") &
26 | g %>% graphContains("a:2","removed 1 max values") &
27 | nrow(h) == 4
28 | )
29 |
30 | })
31 |
32 |
33 | test_that("exclusions works groupwise when nothing is excluded", {
34 | df = tibble(a=c(1,1,1,2,2,2), b=c(1,2,3,1,2,3), c=c(1,2,3,4,5,6)) %>% group_by(a)
35 |
36 | h = df %>% p_clear() %>% p_comment("test") %>% p_exclude_all(c>=5 ~ "removed {.excluded} c values lt 5") %>% p_comment("test2")
37 | g = h %>% p_get()
38 | testthat::expect_true(
39 | g %>% graphContains("a:1","removed 0 c values lt 5") &
40 | g %>% graphContains("a:2","removed 2 c values lt 5") &
41 | nrow(h) == 4
42 | )
43 | })
44 |
45 | test_that("exclusions can reference variable in function", {
46 | df = tibble(a=c(1,1,1,2,2,2), b=c(1,2,3,1,2,3), c=c(1,2,3,4,5,6)) %>% group_by(a)
47 |
48 | fn = function(.df) {
49 | someVar = "xyz"
50 | h = .df %>% p_clear() %>% p_comment("test") %>% p_exclude_all(c==max(c) ~ "removed {.excluded} {someVar} values") %>% p_comment("test2")
51 | return(h)
52 | }
53 | k = fn(df)
54 | g = k %>% p_get()
55 | testthat::expect_true(
56 | g %>% graphContains("a:1","removed 1 xyz values") &
57 | g %>% graphContains("a:2","removed 1 xyz values") &
58 | nrow(k) == 4
59 | )
60 |
61 | })
62 |
63 | test_that("missing values exclusions works", {
64 | dfNa = tibble(a=c(1,1,1,2,2,2), b=c(1,2,3,1,2,3), c=c(1,2,NA,4,5,6)) %>% group_by(a)
65 |
66 | h = dfNa %>% p_clear() %>% p_comment("test") %>% p_exclude_all(c%%2==0 ~ "removed {.matched} even values and {.missing} missing", na.rm = TRUE) %>% p_comment("test2")
67 | g = h %>% p_get()
68 | testthat::expect_true(
69 | g %>% graphContains("a:1","removed 1 even values and 1 missing") &
70 | g %>% graphContains("a:2","removed 2 even values and 0 missing") &
71 | nrow(h) == 2
72 | )
73 | })
74 |
--------------------------------------------------------------------------------
/tests/testthat/test-p_group_modify.R:
--------------------------------------------------------------------------------
1 | library(dplyr)
2 | library(dtrackr)
3 |
4 | graphContains = function(g, .strata, .message) {
5 | return(g$nodes %>% filter(.strata==.strata,.label %>% stringr::str_detect(paste0("(^|>)",stringr::fixed(.message),"<"))) %>% nrow() > 0)
6 | }
7 |
8 |
9 | test_that("default mutate is not recorded", {
10 | df = tibble(a=c(1,1,1,2,2,2), b=c(1,2,3,1,2,3), c=c(1,2,3,4,5,6)) %>% group_by(a)
11 |
12 | h = df %>% p_clear() %>% p_comment("test") %>% p_group_modify(
13 | function(d,g,...) { tibble(e=c(4,8)*g$a,f=c(4,8)+g$a) },
14 | .messages="was {.count.in}, now {.count.out}"
15 | ) %>% p_comment("test2")
16 | g = h %>% p_get()
17 | testthat::expect_true(
18 | g %>% graphContains("a:1","test") &
19 | g %>% graphContains("a:1","was 3, now 2")
20 | )
21 |
22 | })
23 |
24 | # df %>% p_clear() %>% p_modify(function(d) { d %>% filter(c==2) }, .message="was {.count.in}, now {.count.out}") %>% p_get()
25 | # TODO: FAILS: df %>% p_clear() %>% p_modify(function(d) { d %>% filter(c==2) }, .headline="was {nrow(df)}") %>% p_get()
26 | # df %>% p_clear() %>% p_modify(function(d) { d %>% filter(c==2) }) %>% p_get() # NULL
27 |
28 | test_that("use of global expressions in headline", {
29 | df = tibble(a=c(1,1,1,2,2,2), b=c(1,2,3,1,2,3), c=c(1,2,3,4,5,6)) %>% group_by(a)
30 |
31 | h = df %>% p_clear() %>% p_comment("test") %>% p_group_modify(
32 | function(d,g,...) { d %>% filter(c==2) },
33 | .messages="was {.count.in}, now {.count.out}",
34 | .headline="was {nrow(df)}"
35 | ) %>% p_comment("test2")
36 | g = h %>% p_get()
37 | testthat::expect_true(
38 | g %>% graphContains("a:1","test") &
39 | g %>% graphContains("a:1","was 6")
40 | )
41 |
42 | })
43 |
44 |
45 |
46 | test_that("distinct works", {
47 | df = tibble(a=c(1,1,1,2,2,2), b=c(1,2,3,1,2,3), c=c(1,2,3,4,5,6)) %>% group_by(a)
48 | df = bind_rows(df,df)
49 |
50 | h = df %>% p_clear() %>% p_comment("test") %>% p_distinct() %>% p_comment("test2")
51 | g = h %>% p_get()
52 | testthat::expect_true(g %>% graphContains("a:1","removing 3 duplicates"))
53 | testthat::expect_true(nrow(g$head) == 2)
54 | testthat::expect_true(groups(h) %>% sapply(as_label) %>% as.character() == groups(df) %>% sapply(as_label) %>% as.character())
55 |
56 |
57 | })
58 |
--------------------------------------------------------------------------------
/tests/testthat/test-p_include.R:
--------------------------------------------------------------------------------
1 | library(dplyr)
2 | library(dtrackr)
3 |
4 | graphContains = function(g, .strata, .message) {
5 | return(g$nodes %>% filter(.strata==.strata,.label %>% stringr::str_detect(paste0("(^|>)",stringr::fixed(.message),"<"))) %>% nrow() > 0)
6 | }
7 |
8 | test_that("inclusions works", {
9 | df = tibble(a=c(1,1,1,2,2,2), b=c(1,2,3,1,2,3), c=c(1,2,3,4,5,6)) %>% group_by(a)
10 |
11 | h = df %>% p_clear() %>% p_comment("test") %>% p_include_any(c%%2==0 ~ "including {.included} even values") %>% p_comment("test2")
12 | g = h %>% p_get()
13 | testthat::expect_true(
14 | g %>% graphContains("a:1","including 1 even values") &
15 | g %>% graphContains("a:2","including 2 even values") &
16 | nrow(h) == 3
17 | )
18 |
19 | })
20 |
21 | test_that("inclusions works groupwise", {
22 | df = tibble(a=c(1,1,1,2,2,2), b=c(1,2,3,1,2,3), c=c(1,2,3,4,5,6)) %>% group_by(a)
23 |
24 | h = df %>% p_clear() %>% p_comment("test") %>% p_include_any(c==max(c) ~ "kept {.included} max values") %>% p_comment("test2")
25 | g = h %>% p_get()
26 | testthat::expect_true(
27 | g %>% graphContains("a:1","kept 1 max values") &
28 | g %>% graphContains("a:2","kept 1 max values") &
29 | nrow(h) == 2
30 | )
31 |
32 | })
33 |
34 |
35 | test_that("inclusions works groupwise when nothing is included", {
36 | df = tibble(a=c(1,1,1,2,2,2), b=c(1,2,3,1,2,3), c=c(1,2,3,4,5,6)) %>% group_by(a)
37 |
38 | h = df %>% p_clear() %>% p_comment("test") %>% p_include_any(c>=5 ~ "kept {.included} c values gte 5") %>% p_comment("test2")
39 | g = h %>% p_get()
40 | testthat::expect_true(
41 | g %>% graphContains("a:1","kept 0 c values gte 5") &
42 | g %>% graphContains("a:2","kept 2 c values gte 5") &
43 | nrow(h) == 2
44 | )
45 | })
46 |
47 | test_that("inclusions can reference variable in function", {
48 | df = tibble(a=c(1,1,1,2,2,2), b=c(1,2,3,1,2,3), c=c(1,2,3,4,5,6)) %>% group_by(a)
49 |
50 | fn = function(.df) {
51 | someVar = "xyz"
52 | h = .df %>% p_clear() %>% p_comment("test") %>% p_include_any(c==max(c) ~ "kept {.included} {someVar} values") %>% p_comment("test2")
53 | return(h)
54 | }
55 | k = fn(df)
56 | g = k %>% p_get()
57 | testthat::expect_true(
58 | g %>% graphContains("a:1","kept 1 xyz values") &
59 | g %>% graphContains("a:2","kept 1 xyz values") &
60 | nrow(k) == 2
61 | )
62 |
63 | })
64 |
65 | test_that("missing values inclusions works", {
66 | dfNa = tibble(a=c(1,1,1,2,2,2), b=c(1,2,3,1,2,3), c=c(1,2,NA,4,5,6)) %>% group_by(a)
67 |
68 | h = dfNa %>% p_clear() %>% p_comment("test") %>% p_include_any(c%%2==0 ~ "kept {.matched} even values and {.missing} missing", na.rm = FALSE) %>% p_comment("test2")
69 | g = h %>% p_get()
70 | testthat::expect_true(
71 | g %>% graphContains("a:1","kept 1 even values and 1 missing") &
72 | g %>% graphContains("a:2","kept 2 even values and 0 missing") &
73 | nrow(h) == 4
74 | )
75 | })
76 |
--------------------------------------------------------------------------------
/tests/testthat/test-p_others.R:
--------------------------------------------------------------------------------
1 | library(dplyr)
2 | library(dtrackr)
3 |
4 | graphContains = function(g, .strata, .message) {
5 | return(g$nodes %>% filter(.strata==.strata,.label %>% stringr::str_detect(paste0("(^|>)",stringr::fixed(.message),"<"))) %>% nrow() > 0)
6 | }
7 |
8 | test_that("ungrouping works", {
9 | df = tibble(a=c(1,1,1,2,2,2), b=c(1,2,3,1,2,3), c=c(1,2,3,4,5,6)) %>% group_by(a)
10 |
11 | h = df %>% p_clear() %>% p_comment("test") %>% p_ungroup(.messages="{.count} items") %>% p_comment("test2")
12 | g = h %>% p_get()
13 | testthat::expect_true(
14 | g %>% graphContains("","6 items") &
15 | g %>% graphContains("","test2") &
16 | nrow(h) == 6
17 | )
18 |
19 | })
20 |
21 |
22 | test_that("summarise works", {
23 | df = tibble(a=c(1,1,1,2,2,2), b=c(1,2,3,1,2,3), c=c(1,2,3,4,5,6)) %>% group_by(a)
24 |
25 | h = df %>% p_clear() %>% p_comment("test") %>% p_summarise(mean_c=mean(c), count=n(), .messages=c("{mean_c} average c","{count} items")) %>% p_comment("test2")
26 | g = h %>% p_get()
27 | testthat::expect_true(
28 | g %>% graphContains("a:1","3 items") &
29 | g %>% graphContains("","test2") &
30 | nrow(h) == 2
31 | )
32 |
33 | })
34 |
35 | test_that("default mutate is not recorded", {
36 | df = tibble(a=c(1,1,1,2,2,2), b=c(1,2,3,1,2,3), c=c(1,2,3,4,5,6)) %>% group_by(a)
37 |
38 | h = df %>% p_clear() %>% p_comment("test") %>% p_mutate(x="hello") %>% p_comment("test2")
39 | g = h %>% p_get()
40 | testthat::expect_true(
41 | g %>% graphContains("a:1","test") &
42 | g %>% graphContains("a:1","test2") &
43 | nrow(g$nodes) == 4
44 | )
45 |
46 | })
47 |
48 |
49 | test_that("filter works", {
50 | df = tibble(a=c(1,1,1,2,2,2), b=c(1,2,3,1,2,3), c=c(1,2,3,4,5,6)) %>% group_by(a)
51 |
52 | h = df %>% p_clear() %>% p_comment("test") %>% p_filter(b!=3) %>% p_comment("test2")
53 | g = h %>% p_get()
54 | testthat::expect_true(
55 | g %>% graphContains("a:1","excluded 1 items") &
56 | g %>% graphContains("","test2") &
57 | nrow(h) == 4
58 | )
59 |
60 | })
61 |
62 |
63 | # df %>% p_clear() %>% p_status(c%%2==0 ~ "consisting of {count} even items",c%%2!=0 ~ "and {count} odd items") %>% p_ungroup() %>% p_get()
64 |
--------------------------------------------------------------------------------
/tests/testthat/test-p_status.R:
--------------------------------------------------------------------------------
1 | library(dplyr)
2 | library(dtrackr)
3 |
4 | graphContains = function(g, .strata, .message) {
5 | return(g$nodes %>% filter(.strata==.strata,.label %>% stringr::str_detect(paste0("(^|>)",stringr::fixed(.message),"<"))) %>% nrow() > 0)
6 | }
7 |
8 | test_that("basic status works", {
9 | df = tibble(a=c(1,1,1,2,2,2), b=c(1,2,3,1,2,3), c=c(1,2,3,4,5,6)) %>% group_by(a)
10 |
11 | g = df %>% p_clear() %>% p_comment("test2") %>% p_status(count=n(),.messages = "{count} items") %>% p_comment("test2") %>% p_get()
12 | testthat::expect_true(
13 | g %>% graphContains("a:1","3 items")
14 | )
15 |
16 | })
17 |
18 | test_that("status on empty df does not crash", {
19 | dfempty = tibble(a=integer(), b=integer(), c=integer()) %>% group_by(a)
20 |
21 | g = dfempty %>% p_clear() %>% p_comment("test") %>% p_status() %>% p_get()
22 | testthat::expect_true(
23 | nrow(g$nodes)==0
24 | )
25 | })
26 |
27 | test_that("no args status works", {
28 | df = tibble(a=c(1,1,1,2,2,2), b=c(1,2,3,1,2,3), c=c(1,2,3,4,5,6)) %>% group_by(a)
29 |
30 | g = df %>% p_clear() %>% p_comment("test2") %>% p_status() %>% p_comment("test2") %>% p_get()
31 | testthat::expect_true(
32 | g %>% graphContains("a:1","3 items")
33 | )
34 |
35 | })
36 |
37 |
38 | test_that("more complex status works", {
39 | df = tibble(a=c(1,1,1,2,2,2), b=c(1,2,3,1,2,3), c=c(1,2,3,4,5,6)) %>% group_by(a)
40 |
41 | g = df %>% p_clear() %>% p_comment("test2") %>% p_status(count=n(),m=mean(b),z=max(c),.messages = c("{count} items","{m} mean b","{z} max c")) %>% p_comment("test2") %>% p_get()
42 | testthat::expect_true(
43 | g %>% graphContains("a:1","3 items") &
44 | g %>% graphContains("a:1","3 max c") &
45 | g %>% graphContains("a:1","2 mean b")
46 | )
47 |
48 | })
49 |
50 | test_that("subgroup counts work", {
51 | g = survival::cgd %>%
52 | p_track() %>%
53 | p_comment() %>%
54 | p_group_by(treat) %>%
55 | p_comment() %>%
56 | p_count_subgroup(
57 | .subgroup = sex,
58 | .messages="{.name}: {.count}/{.subtotal}",
59 | .headline="{treat}: {.subtotal}/{.total}"
60 | ) %>%
61 | p_comment() %>%
62 | p_get()
63 | testthat::expect_true(
64 | g %>% graphContains("placebo: 120/203","female: 20/120") &
65 | g %>% graphContains("rIFN-g: 83/203","male: 68/83")
66 | )
67 | })
68 |
--------------------------------------------------------------------------------
/trackr.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: No
4 | SaveWorkspace: No
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 |
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 |
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 |
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace
22 |
--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 |
--------------------------------------------------------------------------------
/vignettes/joining-pipelines.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "dtrackr - Joining data pipelines"
3 | output: rmarkdown::html_vignette
4 | vignette: >
5 | %\VignetteIndexEntry{dtrackr - Joining data pipelines}
6 | %\VignetteEngine{knitr::rmarkdown}
7 | %\VignetteEncoding{UTF-8}
8 | ---
9 |
10 | ```{r setup, include=FALSE}
11 | knitr::opts_chunk$set(echo = TRUE)
12 | library(dplyr)
13 | library(tidyr)
14 | library(dtrackr)
15 | ```
16 |
17 | # Joins across data sets
18 |
19 | Joining `dtrackr` tracked data is supported and allows us to combine linked data
20 | sets. In this toy example the data sets are characters from a popular film from
21 | my youth.
22 |
23 | ```{r}
24 |
25 | # here we create a set of linked data from the starwars data
26 | # in a real example these data sets would have come from different places
27 | people = starwars %>% select(-films, -vehicles, -starships)
28 | vehicles = starwars %>% select(name,vehicles) %>% unnest(cols = c(vehicles))
29 | starships = starwars %>% select(name,starships) %>% unnest(cols = c(starships))
30 | films = starwars %>% select(name,films) %>% unnest(cols = c(films))
31 | # these 4 data frames are linked together by the name attribute
32 |
33 | # we track both input data sets:
34 | tmp1 = people %>% track() %>% comment("People df {.total}")
35 | tmp2 = films %>% track() %>% comment("Films df {.total}") %>% comment("a test comment")
36 |
37 | # and here we (re)join the two data sets:
38 | tmp1 %>%
39 | inner_join(tmp2, by="name") %>%
40 | comment("joined {.total}") %>%
41 | flowchart()
42 |
43 | # The join message used by inner_join here is configurable but defaults to
44 | # {.count.lhs} on LHS
45 | # {.count.rhs} on RHS
46 | # {.count.out} in linked set
47 |
48 | ```
49 |
50 | All `dplyr` join types are supported by `dtrackr` which allows us to report on the
51 | numbers on either side of the join and on the resulting total. This can help
52 | detect if any data items are lost during the join. However we do not yet capture
53 | data that becomes excluded during joins, as the interpretation depends on the
54 | type of join employed.
55 |
56 | # Unions
57 |
58 | Another type of binary operator is a union. This is a simpler problem and works
59 | as expected. In this example the early part of the pipeline is detected to be
60 | the same on both branches of the data flow. This therefore results in a flow
61 | that splits then subsequently joins again during the union (`bind_rows`)
62 | operator.
63 |
64 | ```{r}
65 |
66 | tmp = people %>% comment("start")
67 |
68 | tmp1 = tmp %>% include_any(
69 | species == "Human" ~ "{.included} humans",
70 | species == "Droid" ~ "{.included} droids"
71 | )
72 |
73 | tmp2 = tmp %>% include_any(
74 | species == "Gungan" ~ "{.included} gungans"
75 | ) %>% comment("{.count} gungans")
76 |
77 | tmp3 = bind_rows(tmp1,tmp2) %>% comment("{.count} human,droids and gungans")
78 | tmp3 %>% flowchart()
79 |
80 | ```
81 |
82 | Other `dplyr` set operations are supported such as `setdiff()`, `union()`,
83 | `union_all()` and `intersect()` which are included in the function
84 | documentation.
85 |
--------------------------------------------------------------------------------
/vignettes/joss/figure1-consort.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terminological/dtrackr/44ee14cc70634e90703260fad08868d6427ef557/vignettes/joss/figure1-consort.pdf
--------------------------------------------------------------------------------