├── .Rbuildignore ├── .github ├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── ISSUE_TEMPLATE │ └── issue_template.md └── workflows │ ├── R-CMD-check.yaml │ ├── check-bioc.yml │ ├── pkgdown.yaml │ └── test-coverage.yaml ├── .gitignore ├── DESCRIPTION ├── LICENSE ├── LICENSE.md ├── NAMESPACE ├── NEWS.md ├── R ├── RcppExports.R ├── batch_correction.R ├── clustering.R ├── data.R ├── developmental_classifier_helpers.R ├── differential_discovery.R ├── dimensionality_reduction.R ├── downsampling.R ├── feature_extraction.R ├── flowsom_metacluster_helpers.R ├── imports.R ├── io.R ├── metaclustering.R ├── modeling_helpers.R ├── patient-level_modeling.R ├── phenograph_helpers.R ├── preprocessing.R ├── quality_control.R ├── sysdata.rda ├── tidytof-package.R ├── tof_tbl.R ├── upsample.R ├── utils.R └── visualization.R ├── README.Rmd ├── README.md ├── _pkgdown.yml ├── codecov.yml ├── data ├── ddpr_data.rda ├── ddpr_metadata.rda ├── metal_masterlist.rda └── phenograph_data.rda ├── inst ├── CITATION ├── WORDLIST └── extdata │ ├── aml │ ├── bm5721_healthy_basal.fcs │ └── parbiu_dx_basal.fcs │ ├── ddpr │ ├── Healthy1_Basal.fcs │ └── UPN1_Basal.fcs │ ├── ddpr_metadata.csv │ ├── mix │ ├── H1_PhenoGraph_cluster1.fcs │ └── bm5721_healthy_basal.fcs │ ├── mix2 │ ├── H1_PhenoGraph_cluster1.csv │ ├── H1_PhenoGraph_cluster2.csv │ └── bm5721_healthy_basal.fcs │ ├── phenograph │ ├── H1_PhenoGraph_cluster1.fcs │ ├── H1_PhenoGraph_cluster2.fcs │ └── H1_PhenoGraph_cluster3.fcs │ ├── phenograph_csv │ ├── H1_PhenoGraph_cluster1.csv │ └── H1_PhenoGraph_cluster2.csv │ ├── scaffold │ ├── Blood_Balb.fcs │ ├── BoneMarrow_129.fcs │ └── Colon_129.fcs │ ├── statistical_scaffold │ ├── TIN_TU1_B6Antibodies_Day8.fcs │ └── TIN_TU1_Untreated_Day8.fcs │ └── surgery │ ├── 121002hipsurg_102_1h.fcs │ ├── 121002hipsurg_102_1mo.fcs │ └── 121002hipsurg_102_24h.fcs ├── man ├── as_SingleCellExperiment.Rd ├── as_flowFrame.Rd ├── as_flowSet.Rd ├── as_seurat.Rd ├── as_tof_tbl.Rd ├── as_tof_tbl.flowSet.Rd ├── cosine_similarity.Rd ├── ddpr_data.Rd ├── ddpr_metadata.Rd ├── dot.Rd ├── figures │ ├── README-unnamed-chunk-16-1.png │ ├── README-unnamed-chunk-17-1.png │ ├── README-unnamed-chunk-18-1.png │ ├── README-unnamed-chunk-24-1.png │ ├── README-unnamed-chunk-24-2.png │ ├── README-unnamed-chunk-25-1.png │ ├── README-unnamed-chunk-25-2.png │ ├── README-unnamed-chunk-30-1.png │ ├── README-unnamed-chunk-31-1.png │ ├── README-unnamed-chunk-32-1.png │ ├── README-unnamed-chunk-33-1.png │ ├── README-unnamed-chunk-52-1.png │ ├── README-unnamed-chunk-53-1.png │ ├── README-unnamed-chunk-54-1.png │ ├── README-unnamed-chunk-55-1.png │ ├── README-unnamed-chunk-56-1.png │ ├── README-unnamed-chunk-57-1.png │ └── tidytof_logo.png ├── get_extension.Rd ├── l2_normalize.Rd ├── magnitude.Rd ├── make_flowcore_annotated_data_frame.Rd ├── metal_masterlist.Rd ├── new_tof_model.Rd ├── new_tof_tibble.Rd ├── phenograph_data.Rd ├── reexports.Rd ├── rev_asinh.Rd ├── tidytof_example_data.Rd ├── tof_analyze_abundance.Rd ├── tof_analyze_abundance_diffcyt.Rd ├── tof_analyze_abundance_glmm.Rd ├── tof_analyze_abundance_ttest.Rd ├── tof_analyze_expression.Rd ├── tof_analyze_expression_diffcyt.Rd ├── tof_analyze_expression_lmm.Rd ├── tof_analyze_expression_ttest.Rd ├── tof_annotate_clusters.Rd ├── tof_apply_classifier.Rd ├── tof_assess_channels.Rd ├── tof_assess_clusters_distance.Rd ├── tof_assess_clusters_entropy.Rd ├── tof_assess_clusters_knn.Rd ├── tof_assess_flow_rate.Rd ├── tof_assess_flow_rate_tibble.Rd ├── tof_assess_model.Rd ├── tof_assess_model_new_data.Rd ├── tof_assess_model_tuning.Rd ├── tof_batch_correct.Rd ├── tof_batch_correct_quantile.Rd ├── tof_batch_correct_quantile_tibble.Rd ├── tof_batch_correct_rescale.Rd ├── tof_build_classifier.Rd ├── tof_calculate_flow_rate.Rd ├── tof_check_model_args.Rd ├── tof_classify_cells.Rd ├── tof_clean_metric_names.Rd ├── tof_cluster.Rd ├── tof_cluster_ddpr.Rd ├── tof_cluster_flowsom.Rd ├── tof_cluster_grouped.Rd ├── tof_cluster_kmeans.Rd ├── tof_cluster_phenograph.Rd ├── tof_cluster_tibble.Rd ├── tof_compute_km_curve.Rd ├── tof_cosine_dist.Rd ├── tof_create_grid.Rd ├── tof_create_recipe.Rd ├── tof_downsample.Rd ├── tof_downsample_constant.Rd ├── tof_downsample_density.Rd ├── tof_downsample_prop.Rd ├── tof_estimate_density.Rd ├── tof_extract_central_tendency.Rd ├── tof_extract_emd.Rd ├── tof_extract_features.Rd ├── tof_extract_jsd.Rd ├── tof_extract_proportion.Rd ├── tof_extract_threshold.Rd ├── tof_find_best.Rd ├── tof_find_cv_predictions.Rd ├── tof_find_emd.Rd ├── tof_find_jsd.Rd ├── tof_find_knn.Rd ├── tof_find_log_rank_threshold.Rd ├── tof_find_panel_info.Rd ├── tof_fit_split.Rd ├── tof_generate_palette.Rd ├── tof_get_model_mixture.Rd ├── tof_get_model_outcomes.Rd ├── tof_get_model_penalty.Rd ├── tof_get_model_training_data.Rd ├── tof_get_model_type.Rd ├── tof_get_model_x.Rd ├── tof_get_model_y.Rd ├── tof_get_panel.Rd ├── tof_is_numeric.Rd ├── tof_knn_density.Rd ├── tof_log_rank_test.Rd ├── tof_make_knn_graph.Rd ├── tof_make_roc_curve.Rd ├── tof_metacluster.Rd ├── tof_metacluster_consensus.Rd ├── tof_metacluster_flowsom.Rd ├── tof_metacluster_hierarchical.Rd ├── tof_metacluster_kmeans.Rd ├── tof_metacluster_phenograph.Rd ├── tof_plot_cells_density.Rd ├── tof_plot_cells_embedding.Rd ├── tof_plot_cells_layout.Rd ├── tof_plot_cells_scatter.Rd ├── tof_plot_clusters_heatmap.Rd ├── tof_plot_clusters_mst.Rd ├── tof_plot_clusters_volcano.Rd ├── tof_plot_heatmap.Rd ├── tof_plot_model.Rd ├── tof_plot_model_linear.Rd ├── tof_plot_model_logistic.Rd ├── tof_plot_model_multinomial.Rd ├── tof_plot_model_survival.Rd ├── tof_plot_sample_features.Rd ├── tof_plot_sample_heatmap.Rd ├── tof_postprocess.Rd ├── tof_predict.Rd ├── tof_prep_recipe.Rd ├── tof_preprocess.Rd ├── tof_read_csv.Rd ├── tof_read_data.Rd ├── tof_read_fcs.Rd ├── tof_read_file.Rd ├── tof_reduce_dimensions.Rd ├── tof_reduce_pca.Rd ├── tof_reduce_tsne.Rd ├── tof_reduce_umap.Rd ├── tof_set_panel.Rd ├── tof_spade_density.Rd ├── tof_split_data.Rd ├── tof_split_tidytof_reduced_dimensions.Rd ├── tof_train_model.Rd ├── tof_transform.Rd ├── tof_tune_glmnet.Rd ├── tof_upsample.Rd ├── tof_upsample_distance.Rd ├── tof_upsample_neighbor.Rd ├── tof_write_csv.Rd ├── tof_write_data.Rd ├── tof_write_fcs.Rd └── where.Rd ├── src ├── .gitignore ├── RcppExports.cpp └── code.cpp ├── tests ├── spelling.R ├── testthat.R └── testthat │ ├── test-batch_correction.R │ ├── test-clustering.R │ ├── test-developmental_classifier.R │ ├── test-differential_discovery.R │ ├── test-dimensionality_reduction.R │ ├── test-downsampling.R │ ├── test-feature_extraction.R │ ├── test-io.R │ ├── test-metaclustering.R │ ├── test-patient-level_modeling.R │ ├── test-preprocessing.R │ ├── test-quality_control.R │ ├── test-tof_tbl.R │ ├── test-upsample.R │ ├── test-utils.R │ └── test-visualization.R └── vignettes ├── .gitignore ├── clustering.Rmd ├── contributing-to-tidytof.Rmd ├── differential-discovery-analysis.Rmd ├── dimensionality-reduction.Rmd ├── downsampling.Rmd ├── feature-extraction.Rmd ├── modeling.Rmd ├── preprocessing.Rmd ├── quality-control.Rmd ├── reading-and-writing-data.Rmd └── tidytof.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^LICENSE\.md$ 4 | ^README\.Rmd$ 5 | ^data-raw$ 6 | ^betas$ 7 | ^notes$ 8 | ^manuscript$ 9 | ^\.github$ 10 | ^codecov\.yml$ 11 | ^_pkgdown\.yml$ 12 | ^docs$ 13 | ^pkgdown$ 14 | ^man/figures/README-* 15 | ^dev$ 16 | ^\.git/ 17 | \.Rproj\.user/ 18 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | The Bioconductor community values 2 | 3 | * an open approach to science that promotes the sharing of ideas, code, and expertise 4 | * collaboration 5 | * diversity and inclusivity 6 | * a kind and welcoming environment 7 | * community contributions 8 | 9 | In line with these values, Bioconductor is dedicated to providing a welcoming, supportive, collegial, experience free of harassment, intimidation, and bullying regardless of: 10 | 11 | * identity: gender, gender identity and expression, sexual orientation, disability, physical appearance, ethnicity, body size, race, age, religion, etc. 12 | * intellectual position: approaches to data analysis, software preferences, coding style, scientific perspective, etc. 13 | * stage of career 14 | 15 | In order to uphold these values, members of the Bioconductor community are required to follow the Code of Conduct.The latest version of Bioconductor project Code of Conduct is available at http://bioconductor.org/about/code-of-conduct/. Please read the Code of Conduct before contributing to this project. 16 | 17 | Thank you! 18 | -------------------------------------------------------------------------------- /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to tidytof 2 | 3 | This outlines how to propose a change to tidytof. 4 | For more detailed info about contributing to this, and other tidyverse packages, please see the 5 | [**development contributing guide**](https://rstd.io/tidy-contrib). 6 | 7 | ## Fixing typos 8 | 9 | You can fix typos, spelling mistakes, or grammatical errors in the documentation directly using the GitHub web interface, as long as the changes are made in the _source_ file. 10 | This generally means you'll need to edit [roxygen2 comments](https://roxygen2.r-lib.org/articles/roxygen2.html) in an `.R`, not a `.Rd` file. 11 | You can find the `.R` file that generates the `.Rd` by reading the comment in the first line. 12 | 13 | ## Bigger changes 14 | 15 | If you want to make a bigger change, it's a good idea to first file an issue and make sure someone from the team agrees that it’s needed. 16 | If you’ve found a bug, please file an issue that illustrates the bug with a minimal 17 | [reprex](https://www.tidyverse.org/help/#reprex) (this will also help you write a unit test, if needed). 18 | 19 | ### Pull request process 20 | 21 | * Fork the package and clone onto your computer. If you haven't done this before, we recommend using `usethis::create_from_github("keyes-timothy/tidytof", fork = TRUE)`. 22 | 23 | * Install all development dependencies with `devtools::install_dev_deps()`, and then make sure the package passes R CMD check by running `devtools::check()`. 24 | If R CMD check doesn't pass cleanly, it's a good idea to ask for help before continuing. 25 | * Create a Git branch for your pull request (PR). We recommend using `usethis::pr_init("brief-description-of-change")`. 26 | 27 | * Make your changes, commit to git, and then create a PR by running `usethis::pr_push()`, and following the prompts in your browser. 28 | The title of your PR should briefly describe the change. 29 | The body of your PR should contain `Fixes #issue-number`. 30 | 31 | * For user-facing changes, add a bullet to the top of `NEWS.md` (i.e. just below the first header). Follow the style described in . 32 | 33 | ### Code style 34 | 35 | * New code should follow the tidyverse [style guide](https://style.tidyverse.org). 36 | You can use the [styler](https://CRAN.R-project.org/package=styler) package to apply these styles, but please don't restyle code that has nothing to do with your PR. 37 | 38 | * We use [roxygen2](https://cran.r-project.org/package=roxygen2), with [Markdown syntax](https://cran.r-project.org/web/packages/roxygen2/vignettes/rd-formatting.html), for documentation. 39 | 40 | * We use [testthat](https://cran.r-project.org/package=testthat) for unit tests. 41 | Contributions with test cases included are easier to accept. 42 | 43 | ## Code of Conduct 44 | 45 | Please note that the tidytof project is released with a 46 | [Contributor Code of Conduct](CODE_OF_CONDUCT.md). By contributing to this 47 | project you agree to abide by its terms. 48 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/issue_template.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report or feature request 3 | about: Describe a bug you've seen or make a case for a new feature 4 | --- 5 | 6 | Please briefly describe your problem and what output you expect. If you have a question, please don't use this form. Instead, ask on or . 7 | 8 | Please include a minimal reproducible example (AKA a reprex). If you've never heard of a [reprex](http://reprex.tidyverse.org/) before, start by reading . 9 | 10 | Brief description of the problem 11 | 12 | ```r 13 | # insert reprex here 14 | ``` 15 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: R-CMD-check 10 | 11 | jobs: 12 | R-CMD-check: 13 | runs-on: ubuntu-latest 14 | env: 15 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 16 | R_KEEP_PKG_SOURCE: yes 17 | steps: 18 | - uses: actions/checkout@v3 19 | 20 | - uses: r-lib/actions/setup-r@v2 21 | with: 22 | use-public-rspm: true 23 | 24 | - uses: r-lib/actions/setup-r-dependencies@v2 25 | with: 26 | extra-packages: any::rcmdcheck 27 | needs: check 28 | 29 | - uses: r-lib/actions/check-r-package@v2 30 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | release: 9 | types: [published] 10 | workflow_dispatch: 11 | 12 | name: pkgdown 13 | 14 | jobs: 15 | pkgdown: 16 | runs-on: ubuntu-latest 17 | # Only restrict concurrency for non-PR jobs 18 | concurrency: 19 | group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} 20 | env: 21 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 22 | permissions: 23 | contents: write 24 | steps: 25 | - uses: actions/checkout@v4 26 | 27 | - uses: r-lib/actions/setup-pandoc@v2 28 | 29 | - uses: r-lib/actions/setup-r@v2 30 | with: 31 | use-public-rspm: true 32 | 33 | - uses: r-lib/actions/setup-r-dependencies@v2 34 | with: 35 | extra-packages: any::pkgdown, local::. 36 | needs: website 37 | 38 | - name: Build site 39 | run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) 40 | shell: Rscript {0} 41 | 42 | - name: Deploy to GitHub pages 🚀 43 | if: github.event_name != 'pull_request' 44 | uses: JamesIves/github-pages-deploy-action@v4.4.1 45 | with: 46 | clean: false 47 | branch: gh-pages 48 | folder: docs 49 | -------------------------------------------------------------------------------- /.github/workflows/test-coverage.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: test-coverage 10 | 11 | jobs: 12 | test-coverage: 13 | runs-on: ubuntu-latest 14 | env: 15 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 16 | 17 | steps: 18 | - uses: actions/checkout@v3 19 | 20 | - uses: r-lib/actions/setup-r@v2 21 | with: 22 | use-public-rspm: true 23 | 24 | - uses: r-lib/actions/setup-r-dependencies@v2 25 | with: 26 | extra-packages: any::covr 27 | needs: coverage 28 | 29 | - name: Test coverage 30 | run: | 31 | covr::codecov( 32 | quiet = FALSE, 33 | clean = FALSE, 34 | install_path = file.path(Sys.getenv("RUNNER_TEMP"), "package") 35 | ) 36 | shell: Rscript {0} 37 | 38 | - name: Show testthat output 39 | if: always() 40 | run: | 41 | ## -------------------------------------------------------------------- 42 | find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true 43 | shell: bash 44 | 45 | - name: Upload test results 46 | if: failure() 47 | uses: actions/upload-artifact@v3 48 | with: 49 | name: coverage-test-failures 50 | path: ${{ runner.temp }}/package 51 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | docs 6 | inst/doc 7 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Type: Package 2 | Package: tidytof 3 | Title: Analyze High-dimensional Cytometry Data Using Tidy Data Principles 4 | Version: 0.99.8 5 | Authors@R: 6 | c(person(given = "Timothy", 7 | family = "Keyes", 8 | role = c("cre"), 9 | email = "tkeyes@stanford.edu", 10 | comment = c(ORCID = "0000-0003-0423-9679")), 11 | person(given = "Kara", 12 | family = "Davis", 13 | role = c("rth", "own"), 14 | email = "kldavis@stanford.edu"), 15 | person(given = "Garry", 16 | family = "Nolan", 17 | role = c("rth", "own"), 18 | email = "gnolan@stanford.edu")) 19 | Description: This package implements an interactive, scientific analysis 20 | pipeline for high-dimensional cytometry data built using tidy data principles. 21 | It is specifically designed to play well with both the tidyverse and 22 | Bioconductor software ecosystems, with functionality for reading/writing 23 | data files, data cleaning, preprocessing, clustering, 24 | visualization, modeling, and other quality-of-life functions. tidytof 25 | implements a "grammar" of high-dimensional cytometry data analysis. 26 | License: MIT + file LICENSE 27 | Depends: 28 | R (>= 4.3) 29 | Imports: 30 | doParallel, 31 | dplyr, 32 | flowCore, 33 | foreach, 34 | ggplot2, 35 | ggraph, 36 | glmnet, 37 | methods, 38 | parallel, 39 | purrr, 40 | readr, 41 | recipes, 42 | rlang, 43 | stringr, 44 | survival, 45 | tidygraph, 46 | tidyr, 47 | tidyselect, 48 | yardstick, 49 | Rcpp, 50 | tibble, 51 | stats, 52 | utils, 53 | RcppHNSW 54 | Suggests: 55 | ConsensusClusterPlus, 56 | Biobase, 57 | broom, 58 | covr, 59 | diffcyt, 60 | emdist, 61 | FlowSOM, 62 | forcats, 63 | ggrepel, 64 | HDCytoData, 65 | knitr, 66 | markdown, 67 | philentropy, 68 | rmarkdown, 69 | Rtsne, 70 | statmod, 71 | SummarizedExperiment, 72 | testthat (>= 3.0.0), 73 | lmerTest, 74 | lme4, 75 | ggridges, 76 | spelling, 77 | scattermore, 78 | preprocessCore, 79 | SingleCellExperiment, 80 | Seurat, 81 | SeuratObject, 82 | embed, 83 | rsample, 84 | BiocGenerics 85 | Config/testthat/edition: 3 86 | Encoding: UTF-8 87 | LazyData: false 88 | RoxygenNote: 7.3.1 89 | LinkingTo: 90 | Rcpp 91 | URL: https://keyes-timothy.github.io/tidytof, https://keyes-timothy.github.io/tidytof/ 92 | BugReports: https://github.com/keyes-timothy/tidytof/issues 93 | VignetteBuilder: knitr 94 | Language: en-US 95 | biocViews: SingleCell, FlowCytometry 96 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2020 2 | COPYRIGHT HOLDER: Timothy Keyes 3 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2020 Timothy Keyes 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # tidytof 0.99.8 2 | 3 | NEW FEATURES 4 | 5 | * Vignette bug fixes. 6 | 7 | SIGNIFICANT USER-VISIBLE CHANGES 8 | 9 | * None 10 | 11 | # tidytof 0.99.7 12 | 13 | NEW FEATURES 14 | 15 | * Expand introductory vignette to include more details about how to explore the tidytof package as a beginner. 16 | 17 | SIGNIFICANT USER-VISIBLE CHANGES 18 | 19 | * None 20 | 21 | 22 | # tidytof 0.99.6 23 | 24 | NEW FEATURES 25 | 26 | * Update tests. 27 | 28 | SIGNIFICANT USER-VISIBLE CHANGES 29 | 30 | * None 31 | 32 | 33 | # tidytof 0.99.5 34 | 35 | NEW FEATURES 36 | 37 | * Update dependencies. 38 | 39 | SIGNIFICANT USER-VISIBLE CHANGES 40 | 41 | * None 42 | 43 | # tidytof 0.99.4 44 | 45 | NEW FEATURES 46 | 47 | * Update tests. 48 | 49 | SIGNIFICANT USER-VISIBLE CHANGES 50 | 51 | * None 52 | 53 | # tidytof 0.99.3 54 | 55 | NEW FEATURES 56 | 57 | * Update docs. 58 | 59 | SIGNIFICANT USER-VISIBLE CHANGES 60 | 61 | * None 62 | 63 | # tidytof 0.99.2 64 | 65 | NEW FEATURES 66 | 67 | * Update docs. 68 | 69 | SIGNIFICANT USER-VISIBLE CHANGES 70 | 71 | * None 72 | 73 | BUG FIXES 74 | 75 | * None 76 | 77 | # tidytof 0.99.2 78 | 79 | NEW FEATURES 80 | 81 | * Removed unnecessary files that caused build to fail. 82 | 83 | SIGNIFICANT USER-VISIBLE CHANGES 84 | 85 | * None 86 | 87 | BUG FIXES 88 | 89 | * None 90 | 91 | 92 | 93 | 94 | # tidytof 0.99.0 95 | 96 | NEW FEATURES 97 | 98 | * Added a `NEWS.md` file to track changes to the package. 99 | 100 | SIGNIFICANT USER-VISIBLE CHANGES 101 | 102 | * Submitted to Bioconductor 103 | 104 | BUG FIXES 105 | 106 | * None 107 | 108 | -------------------------------------------------------------------------------- /R/RcppExports.R: -------------------------------------------------------------------------------- 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | find_jaccard_coefficients <- function(knn_ids) { 5 | .Call(`_tidytof_find_jaccard_coefficients`, knn_ids) 6 | } 7 | 8 | -------------------------------------------------------------------------------- /R/imports.R: -------------------------------------------------------------------------------- 1 | #' @import Rcpp 2 | #' 3 | #' @importFrom methods as 4 | #' 5 | #' @importFrom stats cov 6 | #' @importFrom stats median 7 | #' @importFrom stats quantile 8 | #' @importFrom stats setNames 9 | #' 10 | #' @importFrom utils data 11 | #' @importFrom utils file_test 12 | #' 13 | #' @importFrom rlang are_na 14 | #' 15 | #' @importFrom foreach `%dopar%` 16 | #' @importFrom foreach `%do%` 17 | #' 18 | NULL 19 | 20 | # dplyr reexports -------------------------------------------------------------- 21 | 22 | #' @importFrom dplyr %>% 23 | #' @export 24 | dplyr::`%>%` 25 | 26 | 27 | # rlang reexports -------------------------------------------------------------- 28 | 29 | #' @importFrom rlang `:=` 30 | #' @export 31 | rlang::`:=` 32 | 33 | #' @importFrom rlang `.data` 34 | #' @export 35 | rlang::`.data` 36 | 37 | # tidyselect reexports --------------------------------------------------------- 38 | 39 | #' Select variables with a function 40 | #' 41 | #' This is a copy of \code{\link[tidyselect]{where}}, a selection helper that 42 | #' selects the variables for which a predicate function returns TRUE. See 43 | #' \code{\link[tidyselect]{language}} for more details about tidyselection. 44 | #' 45 | #' This help file was replicated verbatim from \code{\link[tidyselect]{tidyselect-package}}. 46 | #' 47 | #' @param fn A function that returns TRUE or FALSE (technically, a predicate function). 48 | #' Can also be a purrr-like formula. 49 | #' 50 | #' @importFrom tidyselect vars_select_helpers 51 | #' 52 | #' @export 53 | #' 54 | #' @return A predicate that can be used to select columns from a data.frame. 55 | #' 56 | #' @references Lionel Henry and Hadley Wickham (2021). tidyselect: 57 | #' Select from a Set of Strings. R package version 1.1.1. 58 | #' https://CRAN.R-project.org/package=tidyselect 59 | #' 60 | #' @examples 61 | #' NULL 62 | #' 63 | where <- tidyselect::vars_select_helpers$where 64 | 65 | # Alias required for help links in downstream packages 66 | 67 | #' @aliases select_helpers 68 | #' @importFrom tidyselect contains 69 | #' @export 70 | tidyselect::contains 71 | 72 | #' @importFrom tidyselect ends_with 73 | #' @export 74 | tidyselect::ends_with 75 | 76 | #' @importFrom tidyselect everything 77 | #' @export 78 | tidyselect::everything 79 | 80 | #' @importFrom tidyselect matches 81 | #' @export 82 | tidyselect::matches 83 | 84 | #' @importFrom tidyselect num_range 85 | #' @export 86 | tidyselect::num_range 87 | 88 | #' @importFrom tidyselect starts_with 89 | #' @export 90 | tidyselect::starts_with 91 | 92 | #' @importFrom tidyselect last_col 93 | #' @export 94 | tidyselect::last_col 95 | 96 | #' @importFrom tidyselect any_of 97 | #' @export 98 | tidyselect::any_of 99 | 100 | #' @importFrom tidyselect all_of 101 | #' @export 102 | tidyselect::all_of 103 | -------------------------------------------------------------------------------- /R/sysdata.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/R/sysdata.rda -------------------------------------------------------------------------------- /R/tidytof-package.R: -------------------------------------------------------------------------------- 1 | ## usethis namespace: start 2 | #' @useDynLib tidytof, .registration = TRUE 3 | ## usethis namespace: end 4 | NULL 5 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | 3 | coverage: 4 | status: 5 | project: 6 | default: 7 | target: auto 8 | threshold: 1% 9 | informational: true 10 | patch: 11 | default: 12 | target: auto 13 | threshold: 1% 14 | informational: true 15 | -------------------------------------------------------------------------------- /data/ddpr_data.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/data/ddpr_data.rda -------------------------------------------------------------------------------- /data/ddpr_metadata.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/data/ddpr_metadata.rda -------------------------------------------------------------------------------- /data/metal_masterlist.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/data/metal_masterlist.rda -------------------------------------------------------------------------------- /data/phenograph_data.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/data/phenograph_data.rda -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | pkgVer <- function(pkg) { 2 | if (!exists("meta") || is.null(meta)) meta <- packageDescription(pkg) 3 | ver <- meta$Version 4 | paste0('https://github.com/keyes-timothy/tidytof/', pkg, ' - R package version ', ver) 5 | } 6 | 7 | c( 8 | 9 | bibentry(bibtype="manual", 10 | title = "tidytof: a user-friendly framework for scalable and reproducible high-dimensional cytometry data analysis", 11 | author = c(person(given = "Timothy", 12 | family = "Keyes", 13 | role = c("cre"), 14 | email = "tkeyes@stanford.edu", 15 | comment = c(ORCID = "0000-0003-0423-9679")), 16 | person(given = "Kara", 17 | family = "Davis", 18 | role = c("rth", "own"), 19 | email = "kldavis@stanford.edu"), 20 | person(given = "Garry", 21 | family = "Nolan", 22 | role = c("rth", "own"), 23 | email = "gnolan@stanford.edu")), 24 | year = format(Sys.Date(), "%Y"), 25 | url = "http://www.bioconductor.org/packages/tidytof", 26 | note = pkgVer('tidytof'), 27 | doi = '10.18129/B9.bioc.tidytof' 28 | ), 29 | bibentry(bibtype = "article", 30 | title = "tidytof: a user-friendly framework for scalable and reproducible high-dimensional cytometry data analysis", 31 | author = personList( 32 | as.person("Keyes, Timothy J."), 33 | as.person("Koladiya, Abhishek"), 34 | as.person("Lo, Yu-Chen"), 35 | as.person("Nolan, Garry P."), 36 | as.person("Davis, Kara L.") 37 | ), 38 | year = "2023", 39 | journal = "Bioinformatics Advances", 40 | volume = "3", 41 | number = "1", 42 | doi = "10.1093/bioadv/vbad071", 43 | url = "https://academic.oup.com/bioinformaticsadvances/article/3/1/vbad071/7192984" 44 | ) 45 | 46 | ) 47 | -------------------------------------------------------------------------------- /inst/WORDLIST: -------------------------------------------------------------------------------- 1 | AML 2 | Arcsinh 3 | BCP 4 | BCR 5 | CCR 6 | CMD 7 | Codecov 8 | CyTOF 9 | Cytometry 10 | DAA 11 | DDPR 12 | Downsample 13 | Downsampling 14 | EF 15 | EMD 16 | Epub 17 | FCS 18 | FlowSOM 19 | FlowSOM's 20 | Fluidigm 21 | Fruchterman 22 | GLMMs 23 | Hadley 24 | Harrel 25 | Harrel's 26 | IDE's 27 | IDE’s 28 | JH 29 | JSD 30 | KNN 31 | LMMs 32 | Lymphoblastic 33 | MRD 34 | Metacluster 35 | Metaclustering 36 | NCI 37 | OLREs 38 | PMC 39 | PMCID 40 | PMID 41 | PhenoGraph 42 | PhenoGraph's 43 | Phenograph 44 | Postprocessing 45 | Pre 46 | Preprocess 47 | Qiu 48 | RANN 49 | RStudio 50 | Reingold 51 | Sarno 52 | Simonds 53 | UMAP 54 | Upsample 55 | WBC 56 | Wickham 57 | al 58 | aml 59 | analyte 60 | approzimation 61 | arcsine 62 | arcsinh 63 | asinh 64 | auc 65 | bioinformaticians 66 | biostatistics 67 | canberra 68 | cells’ 69 | centroid 70 | centroids 71 | chebyshev 72 | cofactor 73 | composable 74 | csv 75 | cv 76 | cytof 77 | cytometer 78 | cytometry 79 | daa 80 | data’s 81 | ddpr 82 | dea 83 | denoising 84 | desc 85 | df 86 | diffcyt 87 | doParallel 88 | doi 89 | downsample 90 | downsampled 91 | downsamples 92 | downsampling 93 | dplyr 94 | edgeR 95 | effectb 96 | effectc 97 | embeddings 98 | emd 99 | et 100 | extdata 101 | fc 102 | fcs 103 | flowCore 104 | flowFrame 105 | flowFrames 106 | flowSOM 107 | flowSet 108 | flowSets 109 | flowsom 110 | foreach 111 | fpr 112 | ggplot 113 | ggraph 114 | ggrepel 115 | ggridges 116 | glmm 117 | glmnet 118 | glmnet's 119 | https 120 | hyperparamters 121 | intracellular 122 | ith 123 | jsd 124 | kk 125 | kmeans 126 | knn 127 | limma 128 | lmm 129 | lymphoblastic 130 | mae 131 | mahalanobis 132 | mcquitty 133 | metacluster 134 | metaclustering 135 | metaclusters 136 | minkowski 137 | misclassified 138 | mse 139 | multiclass 140 | multistep 141 | multivariable 142 | myeloid 143 | ncol 144 | nm 145 | nn 146 | nrow 147 | num 148 | obeservations 149 | overfit 150 | pS 151 | pam 152 | parallelize 153 | pca 154 | pearson 155 | phenograph 156 | phenotypically 157 | postprocess 158 | pre 159 | pred 160 | predcitions 161 | preprocess 162 | preprocessed 163 | purrr 164 | reproducibility 165 | rescale 166 | roc 167 | rsample 168 | rset 169 | rsplit 170 | simplicial 171 | som 172 | spearman 173 | subpopulation 174 | subsampled 175 | tSNE 176 | tbl 177 | tbl's 178 | th 179 | tibble 180 | tibbles 181 | tidyselect 182 | tidyselection 183 | tidytof's 184 | tidyverse 185 | timepoint 186 | tof 187 | tsne 188 | ttest 189 | umap 190 | unadjusted 191 | ungrouped 192 | unstimulated 193 | upsample 194 | upsampled 195 | upsampling 196 | userbase 197 | vec 198 | vectorized 199 | voom 200 | xdim 201 | xshift 202 | ydim 203 | ’s 204 | interpretable 205 | tpr 206 | unsplit 207 | Kaplan 208 | supercluster 209 | bioinformatics 210 | FlowSet 211 | -------------------------------------------------------------------------------- /inst/extdata/aml/bm5721_healthy_basal.fcs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/inst/extdata/aml/bm5721_healthy_basal.fcs -------------------------------------------------------------------------------- /inst/extdata/aml/parbiu_dx_basal.fcs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/inst/extdata/aml/parbiu_dx_basal.fcs -------------------------------------------------------------------------------- /inst/extdata/ddpr/Healthy1_Basal.fcs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/inst/extdata/ddpr/Healthy1_Basal.fcs -------------------------------------------------------------------------------- /inst/extdata/ddpr/UPN1_Basal.fcs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/inst/extdata/ddpr/UPN1_Basal.fcs -------------------------------------------------------------------------------- /inst/extdata/mix/H1_PhenoGraph_cluster1.fcs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/inst/extdata/mix/H1_PhenoGraph_cluster1.fcs -------------------------------------------------------------------------------- /inst/extdata/mix/bm5721_healthy_basal.fcs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/inst/extdata/mix/bm5721_healthy_basal.fcs -------------------------------------------------------------------------------- /inst/extdata/mix2/bm5721_healthy_basal.fcs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/inst/extdata/mix2/bm5721_healthy_basal.fcs -------------------------------------------------------------------------------- /inst/extdata/phenograph/H1_PhenoGraph_cluster1.fcs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/inst/extdata/phenograph/H1_PhenoGraph_cluster1.fcs -------------------------------------------------------------------------------- /inst/extdata/phenograph/H1_PhenoGraph_cluster2.fcs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/inst/extdata/phenograph/H1_PhenoGraph_cluster2.fcs -------------------------------------------------------------------------------- /inst/extdata/phenograph/H1_PhenoGraph_cluster3.fcs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/inst/extdata/phenograph/H1_PhenoGraph_cluster3.fcs -------------------------------------------------------------------------------- /inst/extdata/scaffold/Blood_Balb.fcs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/inst/extdata/scaffold/Blood_Balb.fcs -------------------------------------------------------------------------------- /inst/extdata/scaffold/BoneMarrow_129.fcs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/inst/extdata/scaffold/BoneMarrow_129.fcs -------------------------------------------------------------------------------- /inst/extdata/scaffold/Colon_129.fcs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/inst/extdata/scaffold/Colon_129.fcs -------------------------------------------------------------------------------- /inst/extdata/statistical_scaffold/TIN_TU1_B6Antibodies_Day8.fcs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/inst/extdata/statistical_scaffold/TIN_TU1_B6Antibodies_Day8.fcs -------------------------------------------------------------------------------- /inst/extdata/statistical_scaffold/TIN_TU1_Untreated_Day8.fcs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/inst/extdata/statistical_scaffold/TIN_TU1_Untreated_Day8.fcs -------------------------------------------------------------------------------- /inst/extdata/surgery/121002hipsurg_102_1h.fcs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/inst/extdata/surgery/121002hipsurg_102_1h.fcs -------------------------------------------------------------------------------- /inst/extdata/surgery/121002hipsurg_102_1mo.fcs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/inst/extdata/surgery/121002hipsurg_102_1mo.fcs -------------------------------------------------------------------------------- /inst/extdata/surgery/121002hipsurg_102_24h.fcs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/inst/extdata/surgery/121002hipsurg_102_24h.fcs -------------------------------------------------------------------------------- /man/as_SingleCellExperiment.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tof_tbl.R 3 | \name{as_SingleCellExperiment} 4 | \alias{as_SingleCellExperiment} 5 | \alias{as_SingleCellExperiment.tof_tbl} 6 | \title{Coerce an object into a \code{\link[SingleCellExperiment]{SingleCellExperiment}}} 7 | \usage{ 8 | as_SingleCellExperiment(x, ...) 9 | 10 | \method{as_SingleCellExperiment}{tof_tbl}( 11 | x, 12 | channel_cols = where(tof_is_numeric), 13 | reduced_dimensions_cols, 14 | metadata_cols = where(function(.x) !tof_is_numeric(.x)), 15 | split_reduced_dimensions = FALSE, 16 | ... 17 | ) 18 | } 19 | \arguments{ 20 | \item{x}{A tof_tbl} 21 | 22 | \item{...}{Unused.} 23 | 24 | \item{channel_cols}{Unquoted column names representing columns that contain 25 | single-cell protein measurements. Supports tidyselect helpers. 26 | If nothing is specified, the default is all numeric columns.} 27 | 28 | \item{reduced_dimensions_cols}{Unquoted column names representing columns that contain 29 | dimensionality reduction embeddings, such as tSNE or UMAP embeddings. 30 | Supports tidyselect helpers.} 31 | 32 | \item{metadata_cols}{Unquoted column names representing columns that contain 33 | metadata about the samples from which each cell was collected. If nothing 34 | is specified, the default is all non-numeric columns.} 35 | 36 | \item{split_reduced_dimensions}{A boolean value indicating whether the 37 | dimensionality results in x should be split into separate slots in the resulting 38 | \code{\link[SingleCellExperiment]{SingleCellExperiment}}. If FALSE (the default), 39 | the split will not be performed and the 40 | \code{\link[SingleCellExperiment]{reducedDims}} slot in the result will have 41 | a single entry ("tidytof_reduced_dimensions"). If TRUE, the split will be 42 | performed and the \code{\link[SingleCellExperiment]{reducedDims}} slot in 43 | the result will have 1-4 entries depending on which dimensionality reduction 44 | results are present in x ("tidytof_pca", "tidytof_tsne", "tidytof_umap", 45 | and "tidytof_reduced_dimensions"). Note that "tidytof_reduced_dimensions" will 46 | include all dimensionality reduction results that are not named according to 47 | tidytof's pca, umap, and tsne conventions.} 48 | } 49 | \value{ 50 | A \code{\link[SingleCellExperiment]{SingleCellExperiment}} 51 | 52 | A \code{\link[SingleCellExperiment]{SingleCellExperiment}}. 53 | } 54 | \description{ 55 | Coerce an object into a \code{\link[SingleCellExperiment]{SingleCellExperiment}} 56 | 57 | Coerce a tof_tbl into a \code{\link[SingleCellExperiment]{SingleCellExperiment}} 58 | } 59 | \examples{ 60 | NULL 61 | 62 | NULL 63 | 64 | } 65 | -------------------------------------------------------------------------------- /man/as_flowFrame.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tof_tbl.R 3 | \name{as_flowFrame} 4 | \alias{as_flowFrame} 5 | \alias{as_flowFrame.tof_tbl} 6 | \title{Coerce an object into a \code{\link[flowCore]{flowFrame}}} 7 | \usage{ 8 | as_flowFrame(x, ...) 9 | 10 | \method{as_flowFrame}{tof_tbl}(x, ...) 11 | } 12 | \arguments{ 13 | \item{x}{A tof_tbl.} 14 | 15 | \item{...}{Unused.} 16 | } 17 | \value{ 18 | A \code{\link[flowCore]{flowFrame}} 19 | 20 | A \code{\link[flowCore]{flowFrame}}. Note that all non-numeric 21 | columns in `x` will be removed. 22 | } 23 | \description{ 24 | Coerce an object into a \code{\link[flowCore]{flowFrame}} 25 | 26 | Coerce a tof_tbl into a \code{\link[flowCore]{flowFrame}} 27 | } 28 | \examples{ 29 | NULL 30 | 31 | NULL 32 | } 33 | -------------------------------------------------------------------------------- /man/as_flowSet.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tof_tbl.R 3 | \name{as_flowSet} 4 | \alias{as_flowSet} 5 | \alias{as_flowSet.tof_tbl} 6 | \title{Coerce an object into a \code{\link[flowCore]{flowSet}}} 7 | \usage{ 8 | as_flowSet(x, ...) 9 | 10 | \method{as_flowSet}{tof_tbl}(x, group_cols, ...) 11 | } 12 | \arguments{ 13 | \item{x}{A tof_tbl.} 14 | 15 | \item{...}{Unused.} 16 | 17 | \item{group_cols}{Unquoted names of the columns in `x` that should 18 | be used to group cells into separate \code{\link[flowCore]{flowFrame}}s. 19 | Supports tidyselect helpers. Defaults to 20 | NULL (all cells are written into a single \code{\link[flowCore]{flowFrame}}).} 21 | } 22 | \value{ 23 | A \code{\link[flowCore]{flowSet}} 24 | 25 | A \code{\link[flowCore]{flowSet}}. Note that all non-numeric 26 | columns in `x` will be removed. 27 | } 28 | \description{ 29 | Coerce an object into a \code{\link[flowCore]{flowSet}} 30 | 31 | Coerce a tof_tbl into a \code{\link[flowCore]{flowSet}} 32 | } 33 | \examples{ 34 | NULL 35 | 36 | NULL 37 | } 38 | -------------------------------------------------------------------------------- /man/as_seurat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tof_tbl.R 3 | \name{as_seurat} 4 | \alias{as_seurat} 5 | \alias{as_seurat.tof_tbl} 6 | \title{Coerce an object into a \code{\link[SeuratObject]{SeuratObject}}} 7 | \usage{ 8 | as_seurat(x, ...) 9 | 10 | \method{as_seurat}{tof_tbl}( 11 | x, 12 | channel_cols = where(tof_is_numeric), 13 | reduced_dimensions_cols, 14 | metadata_cols = where(function(.x) !tof_is_numeric(.x)), 15 | split_reduced_dimensions = FALSE, 16 | ... 17 | ) 18 | } 19 | \arguments{ 20 | \item{x}{A tof_tbl} 21 | 22 | \item{...}{Unused.} 23 | 24 | \item{channel_cols}{Unquoted column names representing columns that contain 25 | single-cell protein measurements. Supports tidyselect helpers. 26 | If nothing is specified, the default is all numeric columns.} 27 | 28 | \item{reduced_dimensions_cols}{Unquoted column names representing columns that contain 29 | dimensionality reduction embeddings, such as tSNE or UMAP embeddings. 30 | Supports tidyselect helpers.} 31 | 32 | \item{metadata_cols}{Unquoted column names representing columns that contain 33 | metadata about the samples from which each cell was collected. If nothing 34 | is specified, the default is all non-numeric columns.} 35 | 36 | \item{split_reduced_dimensions}{A boolean value indicating whether the 37 | dimensionality results in x should be split into separate slots in the resulting 38 | \code{\link[SingleCellExperiment]{SingleCellExperiment}}. If FALSE (the default), 39 | the split will not be performed and the 40 | \code{\link[SingleCellExperiment]{reducedDims}} slot in the result will have 41 | a single entry ("tidytof_reduced_dimensions"). If TRUE, the split will be 42 | performed and the \code{\link[SingleCellExperiment]{reducedDims}} slot in 43 | the result will have 1-4 entries depending on which dimensionality reduction 44 | results are present in x ("tidytof_pca", "tidytof_tsne", "tidytof_umap", 45 | and "tidytof_reduced_dimensions"). Note that "tidytof_reduced_dimensions" will 46 | include all dimensionality reduction results that are not named according to 47 | tidytof's pca, umap, and tsne conventions.} 48 | } 49 | \value{ 50 | A \code{\link[SeuratObject]{SeuratObject}} 51 | 52 | A \code{\link[SeuratObject]{SeuratObject}}. 53 | } 54 | \description{ 55 | Coerce an object into a \code{\link[SeuratObject]{SeuratObject}} 56 | 57 | Coerce a tof_tbl into a \code{\link[SeuratObject]{SeuratObject}} 58 | } 59 | \examples{ 60 | NULL 61 | 62 | NULL 63 | 64 | } 65 | -------------------------------------------------------------------------------- /man/as_tof_tbl.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tof_tbl.R 3 | \name{as_tof_tbl} 4 | \alias{as_tof_tbl} 5 | \title{Coerce flowFrames or flowSets into tof_tbl's.} 6 | \usage{ 7 | as_tof_tbl(flow_data, sep = "|") 8 | } 9 | \arguments{ 10 | \item{flow_data}{A flowFrame or flowSet} 11 | 12 | \item{sep}{A string indicating which symbol should be used to separate 13 | antigen names and metal names in the columns of the output tof_tbl.} 14 | } 15 | \value{ 16 | A tof_tbl. 17 | } 18 | \description{ 19 | Coerce flowFrames or flowSets into tof_tbl's. 20 | } 21 | \examples{ 22 | input_file <- dir(tidytof_example_data("aml"), full.names = TRUE)[[1]] 23 | 24 | input_flowframe <- flowCore::read.FCS(input_file) 25 | 26 | tof_tibble <- as_tof_tbl(input_flowframe) 27 | 28 | } 29 | -------------------------------------------------------------------------------- /man/as_tof_tbl.flowSet.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tof_tbl.R 3 | \name{as_tof_tbl.flowSet} 4 | \alias{as_tof_tbl.flowSet} 5 | \title{Convert an object into a tof_tbl} 6 | \usage{ 7 | \method{as_tof_tbl}{flowSet}(flow_data, sep = "|") 8 | } 9 | \arguments{ 10 | \item{flow_data}{A FlowSet} 11 | 12 | \item{sep}{A string to use to separate the antigen name and its associated 13 | metal in the column names of the output tibble. Defaults to "|".} 14 | } 15 | \value{ 16 | a `tof_tbl` 17 | } 18 | \description{ 19 | Convert an object into a tof_tbl 20 | } 21 | -------------------------------------------------------------------------------- /man/cosine_similarity.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{cosine_similarity} 4 | \alias{cosine_similarity} 5 | \title{Find the cosine similarity between two vectors} 6 | \usage{ 7 | cosine_similarity(x, y) 8 | } 9 | \arguments{ 10 | \item{x}{a numeric vector} 11 | 12 | \item{y}{a numeric vector} 13 | } 14 | \value{ 15 | a scalar value representing the cosine similarity between x and y 16 | } 17 | \description{ 18 | Find the cosine similarity between two vectors 19 | } 20 | -------------------------------------------------------------------------------- /man/ddpr_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{ddpr_data} 5 | \alias{ddpr_data} 6 | \title{CyTOF data from two samples: 5,000 B-cell lineage cells from a healthy 7 | patient and 5,000 B-cell lineage cells from a B-cell precursor Acute 8 | Lymphoblastic Leukemia (BCP-ALL) patient.} 9 | \format{ 10 | A data frame with 10000 rows and 24 variables: 11 | \describe{ 12 | \item{sample_name}{name of the sample from which the data was read} 13 | \item{cd45}{A CyTOF measurement in raw ion counts} 14 | \item{cd19}{A CyTOF measurement in raw ion counts} 15 | \item{cd22}{A CyTOF measurement in raw ion counts} 16 | \item{cd79b}{A CyTOF measurement in raw ion counts} 17 | \item{cd20}{A CyTOF measurement in raw ion counts} 18 | \item{cd34}{A CyTOF measurement in raw ion counts} 19 | \item{cd123}{A CyTOF measurement in raw ion counts} 20 | \item{cd10}{A CyTOF measurement in raw ion counts} 21 | \item{cd24}{A CyTOF measurement in raw ion counts} 22 | \item{cd127}{A CyTOF measurement in raw ion counts} 23 | \item{cd43}{A CyTOF measurement in raw ion counts} 24 | \item{cd38}{A CyTOF measurement in raw ion counts} 25 | \item{cd58}{A CyTOF measurement in raw ion counts} 26 | \item{psyk}{A CyTOF measurement in raw ion counts} 27 | \item{p4ebp1}{A CyTOF measurement in raw ion counts} 28 | \item{pstat5}{A CyTOF measurement in raw ion counts} 29 | \item{pakt}{A CyTOF measurement in raw ion counts} 30 | \item{ps6}{A CyTOF measurement in raw ion counts} 31 | \item{perk}{A CyTOF measurement in raw ion counts} 32 | \item{pcreb}{A CyTOF measurement in raw ion counts} 33 | } 34 | } 35 | \source{ 36 | \url{https://github.com/kara-davis-lab/DDPR} 37 | } 38 | \usage{ 39 | data(ddpr_data) 40 | } 41 | \value{ 42 | A data.frame 43 | } 44 | \description{ 45 | A dataset containing CyTOF measurements from immune cells originally studied 46 | in the following paper: \cr \cr 47 | Good Z, Sarno J, et al. 48 | Single-cell developmental classification of B cell precursor acute 49 | lymphoblastic leukemia at diagnosis reveals predictors of relapse. 50 | Nat Med. 2018 May;24(4):474-483. doi: 10.1038/nm.4505. Epub 2018 Mar 5. 51 | PMID: 29505032; PMCID: PMC5953207. 52 | } 53 | \keyword{datasets} 54 | -------------------------------------------------------------------------------- /man/ddpr_metadata.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{ddpr_metadata} 5 | \alias{ddpr_metadata} 6 | \title{Clinical metadata for each patient sample in Good & Sarno et al. (2018).} 7 | \format{ 8 | A data frame with 10000 rows and 12 variables: 9 | \describe{ 10 | \item{patient_id}{Name of the sample from which the data was read} 11 | \item{gender}{Gender of the patient from which each sample was collected} 12 | \item{age_at_diagnosis}{Age (in years) of the patient from which each sample was collected} 13 | \item{wbc_count}{The diagnostic White Blood Cell (WBC) count of the patient from which each sample was collected} 14 | \item{mrd_risk}{Risk stratification category for each patient using minimal residual disease (MRD) criteria} 15 | \item{nci_rome_risk}{Risk stratification category for each patient using National Cancer Institute (NCI) criteria} 16 | \item{relapse_status}{A string representing whether or not a patient relapsed} 17 | \item{time_to_relapse}{The time (in days) it took each patient to relapse. Patients who did not relapse will have the value of NA} 18 | \item{type_of_relapse}{ 19 | A string representing the timing of relapse for each patient. 20 | "Very early" relapses occurred less than 18 months after diagnosis; 21 | "Early" relapses occurred between 18 months and 32 months after diagnosis; 22 | "Late" relapses occurred later than 32 months after diagnosis. 23 | } 24 | \item{ccr}{The number of documented days of continuous complete remission (CCR) for patients who did not relapse. All patients who relapsed will have a value of NA.} 25 | \item{cohort}{A string representing if each sample was used in the "Training" or "Validation" cohort in the original study} 26 | \item{ddpr_risk}{The risk category ("Low" or "High") assigned to each sample using the original paper's risk-stratification algorithm} 27 | } 28 | } 29 | \source{ 30 | Good Z, Sarno J, et al. 31 | Single-cell developmental classification of B cell precursor acute 32 | lymphoblastic leukemia at diagnosis reveals predictors of relapse. 33 | Nat Med. 2018 May;24(4):474-483. doi: 10.1038/nm.4505. Epub 2018 Mar 5. 34 | PMID: 29505032; PMCID: PMC5953207. Supplementary Table 1. 35 | } 36 | \usage{ 37 | data(ddpr_metadata) 38 | } 39 | \value{ 40 | A data.frame 41 | } 42 | \description{ 43 | A dataset containing patient-level clinical metadata for samples originally studied 44 | in the following paper: \cr \cr 45 | Good Z, Sarno J, et al. 46 | Single-cell developmental classification of B cell precursor acute 47 | lymphoblastic leukemia at diagnosis reveals predictors of relapse. 48 | Nat Med. 2018 May;24(4):474-483. doi: 10.1038/nm.4505. Epub 2018 Mar 5. 49 | PMID: 29505032; PMCID: PMC5953207. 50 | } 51 | \keyword{datasets} 52 | -------------------------------------------------------------------------------- /man/dot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{dot} 4 | \alias{dot} 5 | \title{Find the dot product between two vectors.} 6 | \usage{ 7 | dot(x, y) 8 | } 9 | \arguments{ 10 | \item{x}{A numeric vector.} 11 | 12 | \item{y}{A numeric vector.} 13 | } 14 | \value{ 15 | The dot product between x and y. 16 | } 17 | \description{ 18 | Find the dot product between two vectors. 19 | } 20 | -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-16-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/man/figures/README-unnamed-chunk-16-1.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-17-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/man/figures/README-unnamed-chunk-17-1.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-18-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/man/figures/README-unnamed-chunk-18-1.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-24-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/man/figures/README-unnamed-chunk-24-1.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-24-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/man/figures/README-unnamed-chunk-24-2.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-25-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/man/figures/README-unnamed-chunk-25-1.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-25-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/man/figures/README-unnamed-chunk-25-2.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-30-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/man/figures/README-unnamed-chunk-30-1.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-31-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/man/figures/README-unnamed-chunk-31-1.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-32-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/man/figures/README-unnamed-chunk-32-1.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-33-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/man/figures/README-unnamed-chunk-33-1.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-52-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/man/figures/README-unnamed-chunk-52-1.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-53-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/man/figures/README-unnamed-chunk-53-1.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-54-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/man/figures/README-unnamed-chunk-54-1.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-55-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/man/figures/README-unnamed-chunk-55-1.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-56-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/man/figures/README-unnamed-chunk-56-1.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-57-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/man/figures/README-unnamed-chunk-57-1.png -------------------------------------------------------------------------------- /man/figures/tidytof_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keyes-timothy/tidytof/ace89c75b5a2a64971cc206b0c3e8401881434a0/man/figures/tidytof_logo.png -------------------------------------------------------------------------------- /man/get_extension.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{get_extension} 4 | \alias{get_extension} 5 | \title{Find the extension for a file} 6 | \usage{ 7 | get_extension(filename) 8 | } 9 | \arguments{ 10 | \item{filename}{A string representing the name of a file in its local directory} 11 | } 12 | \value{ 13 | The the file extension of `filename` 14 | } 15 | \description{ 16 | Find the extension for a file 17 | } 18 | -------------------------------------------------------------------------------- /man/l2_normalize.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{l2_normalize} 4 | \alias{l2_normalize} 5 | \title{L2 normalize an input vector x to a length of 1} 6 | \usage{ 7 | l2_normalize(x) 8 | } 9 | \arguments{ 10 | \item{x}{a numeric vector} 11 | } 12 | \value{ 13 | a vector of length length(x) with a magnitude of 1 14 | } 15 | \description{ 16 | L2 normalize an input vector x to a length of 1 17 | } 18 | -------------------------------------------------------------------------------- /man/magnitude.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{magnitude} 4 | \alias{magnitude} 5 | \title{Find the magnitude of a vector.} 6 | \usage{ 7 | magnitude(x) 8 | } 9 | \arguments{ 10 | \item{x}{A numeric vector.} 11 | } 12 | \value{ 13 | A scalar value (the magnitude of x). 14 | } 15 | \description{ 16 | Find the magnitude of a vector. 17 | } 18 | -------------------------------------------------------------------------------- /man/make_flowcore_annotated_data_frame.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tof_tbl.R 3 | \name{make_flowcore_annotated_data_frame} 4 | \alias{make_flowcore_annotated_data_frame} 5 | \title{Make the AnnotatedDataFrame needed for the flowFrame class} 6 | \usage{ 7 | make_flowcore_annotated_data_frame(maxes_and_mins) 8 | } 9 | \arguments{ 10 | \item{maxes_and_mins}{a data.frame containing information about the max 11 | and min values of each channel to be saved in the flowFrame.} 12 | } 13 | \value{ 14 | An AnnotatedDataFrame. 15 | } 16 | \description{ 17 | Make the AnnotatedDataFrame needed for the flowFrame class 18 | } 19 | \examples{ 20 | NULL 21 | } 22 | -------------------------------------------------------------------------------- /man/metal_masterlist.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{metal_masterlist} 5 | \alias{metal_masterlist} 6 | \title{A character vector of metal name patterns supported by tidytof.} 7 | \format{ 8 | A character vector in which each entry is a pattern that tidytof searches 9 | for in every CyTOF channel in input .fcs files. These patterns are an amalgamate 10 | of example .fcs files sampled from the studies linked below. 11 | } 12 | \source{ 13 | \url{https://github.com/kara-davis-lab/DDPR} 14 | \url{https://cytobank.org/nolanlab/reports/Levine2015.html} 15 | \url{https://cytobank.org/nolanlab/reports/Spitzer2015.html} 16 | \url{https://cytobank.org/nolanlab/reports/Spitzer2017.html} 17 | \url{https://community.cytobank.org/cytobank/projects/609} 18 | } 19 | \usage{ 20 | data(metal_masterlist) 21 | } 22 | \value{ 23 | A named character vector. 24 | } 25 | \description{ 26 | A character vector used by `tof_read_fcs` and `tof_read_data` to detect and 27 | parse which CyTOF metals correspond to each channel in an input .fcs file. 28 | } 29 | \keyword{datasets} 30 | -------------------------------------------------------------------------------- /man/new_tof_model.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/modeling_helpers.R 3 | \name{new_tof_model} 4 | \alias{new_tof_model} 5 | \title{Constructor for a tof_model.} 6 | \usage{ 7 | new_tof_model( 8 | model, 9 | recipe, 10 | penalty, 11 | mixture, 12 | model_type = c("linear", "two-class", "multiclass", "survival"), 13 | outcome_colnames, 14 | training_data 15 | ) 16 | } 17 | \arguments{ 18 | \item{model}{A glmnet model.} 19 | 20 | \item{recipe}{A prepped recipe object.} 21 | 22 | \item{penalty}{A double indicating which lambda value should be used within the 23 | glmnet path.} 24 | 25 | \item{mixture}{A double indicating which alpha value was used to fit the glmnet model.} 26 | 27 | \item{model_type}{A string indicating which type of glmnet model is being fit.} 28 | 29 | \item{outcome_colnames}{TO DO} 30 | 31 | \item{training_data}{TO DO} 32 | } 33 | \value{ 34 | A `tof_model`, an S3 class that includes a trained glmnet model and 35 | the recipe used to perform its associated preprocessing. 36 | } 37 | \description{ 38 | Constructor for a tof_model. 39 | } 40 | -------------------------------------------------------------------------------- /man/new_tof_tibble.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tof_tbl.R 3 | \name{new_tof_tibble} 4 | \alias{new_tof_tibble} 5 | \title{Constructor for a tof_tibble.} 6 | \usage{ 7 | new_tof_tibble(x = dplyr::tibble(), panel = dplyr::tibble()) 8 | } 9 | \arguments{ 10 | \item{x}{A data.frame or tibble containing single-cell mass cytometry data 11 | such that rows are cells and columns are CyTOF measurements.} 12 | 13 | \item{panel}{A data.frame or tibble containing information about the panel 14 | for the mass cytometry data in x.} 15 | } 16 | \value{ 17 | A `tof_tbl`, an tibble extension that tracks a few other attributes 18 | that are useful for CyTOF data analysis. 19 | } 20 | \description{ 21 | Constructor for a tof_tibble. 22 | } 23 | \seealso{ 24 | Other tof_tbl utilities: 25 | \code{\link{tof_get_panel}()}, 26 | \code{\link{tof_set_panel}()} 27 | } 28 | \concept{tof_tbl utilities} 29 | -------------------------------------------------------------------------------- /man/phenograph_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{phenograph_data} 5 | \alias{phenograph_data} 6 | \title{CyTOF data from 6,000 healthy immune cells from a single patient.} 7 | \format{ 8 | A data frame with 6000 rows and 26 variables: 9 | \describe{ 10 | \item{sample_name}{Name of the sample from which the data was read} 11 | \item{phenograph_cluster}{Numeric ID of the cluster assignment of each row} 12 | \item{cd19}{A CyTOF measurement in raw ion counts} 13 | \item{cd11b}{A CyTOF measurement in raw ion counts} 14 | \item{cd34}{A CyTOF measurement in raw ion counts} 15 | \item{cd45}{A CyTOF measurement in raw ion counts} 16 | \item{cd123}{A CyTOF measurement in raw ion counts} 17 | \item{cd33}{A CyTOF measurement in raw ion counts} 18 | \item{cd47}{A CyTOF measurement in raw ion counts} 19 | \item{cd7}{A CyTOF measurement in raw ion counts} 20 | \item{cd44}{A CyTOF measurement in raw ion counts} 21 | \item{cd38}{A CyTOF measurement in raw ion counts} 22 | \item{cd3}{A CyTOF measurement in raw ion counts} 23 | \item{cd117}{A CyTOF measurement in raw ion counts} 24 | \item{cd64}{A CyTOF measurement in raw ion counts} 25 | \item{cd41}{A CyTOF measurement in raw ion counts} 26 | \item{pstat3}{A CyTOF measurement in raw ion counts} 27 | \item{pstat5}{A CyTOF measurement in raw ion counts} 28 | \item{pampk}{A CyTOF measurement in raw ion counts} 29 | \item{p4ebp1}{A CyTOF measurement in raw ion counts} 30 | \item{ps6}{A CyTOF measurement in raw ion counts} 31 | \item{pcreb}{A CyTOF measurement in raw ion counts} 32 | \item{pzap70-syk}{A CyTOF measurement in raw ion counts} 33 | \item{prb}{A CyTOF measurement in raw ion counts} 34 | \item{perk1-2}{A CyTOF measurement in raw ion counts} 35 | } 36 | } 37 | \source{ 38 | \url{https://cytobank.org/nolanlab/reports/Levine2015.html} 39 | } 40 | \usage{ 41 | data(phenograph_data) 42 | } 43 | \value{ 44 | A data.frame 45 | } 46 | \description{ 47 | A dataset containing CyTOF measurements from healthy control cells originally studied 48 | in the following paper: \cr \cr 49 | Levine JH, Simonds EF, et al. 50 | Data-Driven Phenotypic Dissection of AML Reveals Progenitor-like Cells that 51 | Correlate with Prognosis. Cell. 2015 Jul 2;162(1):184-97. 52 | doi: 10.1016/j.cell.2015.05.047. Epub 2015 Jun 18. PMID: 26095251; 53 | PMCID: PMC4508757. 54 | } 55 | \details{ 56 | 2000 cells from 3 clusters identified in the original paper have been 57 | sampled. 58 | } 59 | \keyword{datasets} 60 | -------------------------------------------------------------------------------- /man/reexports.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/imports.R 3 | \docType{import} 4 | \name{reexports} 5 | \alias{reexports} 6 | \alias{\%>\%} 7 | \alias{:=} 8 | \alias{.data} 9 | \alias{contains} 10 | \alias{select_helpers} 11 | \alias{ends_with} 12 | \alias{everything} 13 | \alias{matches} 14 | \alias{num_range} 15 | \alias{starts_with} 16 | \alias{last_col} 17 | \alias{any_of} 18 | \alias{all_of} 19 | \title{Objects exported from other packages} 20 | \keyword{internal} 21 | \value{ 22 | See documentation in each object's original package. 23 | } 24 | \examples{ 25 | # See examples in each object's original package 26 | NULL 27 | } 28 | \description{ 29 | These objects are imported from other packages. Follow the links 30 | below to see their documentation. 31 | 32 | \describe{ 33 | \item{dplyr}{\code{\link[dplyr:reexports]{\%>\%}}} 34 | 35 | \item{rlang}{\code{\link[rlang:dyn-dots]{:=}}, \code{\link[rlang:dot-data]{.data}}} 36 | 37 | \item{tidyselect}{\code{\link[tidyselect]{all_of}}, \code{\link[tidyselect:all_of]{any_of}}, \code{\link[tidyselect:starts_with]{contains}}, \code{\link[tidyselect:starts_with]{ends_with}}, \code{\link[tidyselect]{everything}}, \code{\link[tidyselect:everything]{last_col}}, \code{\link[tidyselect:starts_with]{matches}}, \code{\link[tidyselect:starts_with]{num_range}}, \code{\link[tidyselect]{starts_with}}} 38 | }} 39 | 40 | -------------------------------------------------------------------------------- /man/rev_asinh.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{rev_asinh} 4 | \alias{rev_asinh} 5 | \title{Reverses arcsinh transformation with cofactor `scale_factor` and a 6 | shift of `shift_factor`.} 7 | \usage{ 8 | rev_asinh(x, shift_factor, scale_factor) 9 | } 10 | \arguments{ 11 | \item{x}{A numeric vector.} 12 | 13 | \item{shift_factor}{The scalar value `a` in the following equation used to 14 | transform high-dimensional cytometry raw data ion counts using the hyperbolic arcsinh function: 15 | `new_x <- asinh(a + b * x)`.} 16 | 17 | \item{scale_factor}{The scalar value `b` in the following equation used to 18 | transform high-dimensional cytometry raw data ion counts using the hyperbolic arcsinh function: 19 | `new_x <- asinh(a + b * x)`.} 20 | } 21 | \value{ 22 | A numeric vector after undergoing reverse 23 | arcsinh transformation 24 | } 25 | \description{ 26 | Reverses arcsinh transformation with cofactor `scale_factor` and a 27 | shift of `shift_factor`. 28 | } 29 | \examples{ 30 | shift_factor <- 0 31 | scale_factor <- 1 / 5 32 | 33 | input_value <- 20 34 | asinh_value <- asinh(shift_factor + input_value * scale_factor) 35 | 36 | restored_value <- rev_asinh(asinh_value, shift_factor, scale_factor) 37 | 38 | } 39 | -------------------------------------------------------------------------------- /man/tidytof_example_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{tidytof_example_data} 4 | \alias{tidytof_example_data} 5 | \title{Get paths to tidytof example data} 6 | \usage{ 7 | tidytof_example_data(dataset_name = NULL) 8 | } 9 | \arguments{ 10 | \item{dataset_name}{Name of the dataset you want to access. If NULL, 11 | the names of the datasets (each of which is from a different study) 12 | will be listed.} 13 | } 14 | \value{ 15 | A character vector of file paths where the requested .fcs 16 | files are located. If `dataset_name` is NULL, a character vector of 17 | dataset names (that can be used as values for `dataset_name`) is 18 | returned instead. 19 | } 20 | \description{ 21 | tidytof comes bundled with a number of sample .fcs files in its 22 | inst/extdata directory. This function makes them easy to access. 23 | } 24 | \examples{ 25 | tidytof_example_data() 26 | tidytof_example_data(dataset_name = "phenograph") 27 | 28 | } 29 | -------------------------------------------------------------------------------- /man/tof_analyze_abundance.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/differential_discovery.R 3 | \name{tof_analyze_abundance} 4 | \alias{tof_analyze_abundance} 5 | \title{Perform Differential Abundance Analysis (DAA) on high-dimensional cytometry data} 6 | \usage{ 7 | tof_analyze_abundance(tof_tibble, method = c("diffcyt", "glmm", "ttest"), ...) 8 | } 9 | \arguments{ 10 | \item{tof_tibble}{A `tof_tbl` or a `tibble`.} 11 | 12 | \item{method}{A string indicating which statistical method should be used. Valid 13 | values include "diffcyt", "glmm", and "ttest".} 14 | 15 | \item{...}{Additional arguments to pass onto the `tof_analyze_abundance_*` 16 | function family member corresponding to the chosen method.} 17 | } 18 | \value{ 19 | A tibble or nested tibble containing the differential abundance results 20 | from the chosen method. See \code{\link{tof_analyze_abundance_diffcyt}}, 21 | \code{\link{tof_analyze_abundance_glmm}}, and \code{\link{tof_analyze_abundance_ttest}} for details. 22 | } 23 | \description{ 24 | This function performs differential abundance analysis on the cell clusters 25 | contained within a `tof_tbl` using one of three methods 26 | ("diffcyt", "glmm", and "ttest"). It wraps the members of the `tof_analyze_abundance_*` 27 | function family: \code{\link{tof_analyze_abundance_diffcyt}}, 28 | \code{\link{tof_analyze_abundance_glmm}}, and \code{\link{tof_analyze_abundance_ttest}}. 29 | } 30 | \examples{ 31 | # For differential discovery examples, please see the package vignettes 32 | NULL 33 | 34 | } 35 | \seealso{ 36 | Other differential abundance analysis functions: 37 | \code{\link{tof_analyze_abundance_diffcyt}()}, 38 | \code{\link{tof_analyze_abundance_glmm}()}, 39 | \code{\link{tof_analyze_abundance_ttest}()} 40 | } 41 | \concept{differential abundance analysis functions} 42 | -------------------------------------------------------------------------------- /man/tof_analyze_expression.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/differential_discovery.R 3 | \name{tof_analyze_expression} 4 | \alias{tof_analyze_expression} 5 | \title{Perform Differential Expression Analysis (DEA) on high-dimensional cytometry data} 6 | \usage{ 7 | tof_analyze_expression(tof_tibble, method = c("diffcyt", "glmm", "ttest"), ...) 8 | } 9 | \arguments{ 10 | \item{tof_tibble}{A `tof_tbl` or a `tibble`.} 11 | 12 | \item{method}{A string indicating which statistical method should be used. Valid 13 | values include "diffcyt", "lmm", and "ttest".} 14 | 15 | \item{...}{Additional arguments to pass onto the `tof_analyze_expression_*` 16 | function family member corresponding to the chosen method.} 17 | } 18 | \value{ 19 | A tibble or nested tibble containing the differential abundance results 20 | from the chosen method. See \code{\link{tof_analyze_expression_diffcyt}}, 21 | \code{\link{tof_analyze_expression_lmm}}, and \code{\link{tof_analyze_expression_ttest}} for details. 22 | } 23 | \description{ 24 | This function performs differential expression analysis on the cell clusters 25 | contained within a `tof_tbl` using one of three methods 26 | ("diffcyt", "glmm", and "ttest"). It wraps the members of the `tof_analyze_expression_*` 27 | function family: \code{\link{tof_analyze_expression_diffcyt}}, 28 | \code{\link{tof_analyze_expression_lmm}}, and \code{\link{tof_analyze_expression_ttest}}. 29 | } 30 | \examples{ 31 | # For differential discovery examples, please see the package vignettes 32 | NULL 33 | 34 | } 35 | \seealso{ 36 | Other differential expression analysis functions: 37 | \code{\link{tof_analyze_expression_diffcyt}()}, 38 | \code{\link{tof_analyze_expression_lmm}()}, 39 | \code{\link{tof_analyze_expression_ttest}()} 40 | } 41 | \concept{differential expression analysis functions} 42 | -------------------------------------------------------------------------------- /man/tof_annotate_clusters.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/clustering.R 3 | \name{tof_annotate_clusters} 4 | \alias{tof_annotate_clusters} 5 | \title{Manually annotate tidytof-computed clusters using user-specified labels} 6 | \usage{ 7 | tof_annotate_clusters(tof_tibble, cluster_col, annotations) 8 | } 9 | \arguments{ 10 | \item{tof_tibble}{`tof_tbl` or `tibble`.} 11 | 12 | \item{cluster_col}{An unquoted column name indicating which column in `tof_tibble` 13 | contains the ids of the unsupervised cluster to which each cell belongs. 14 | Cluster labels can be produced via any method the user chooses - including manual gating, 15 | any of the functions in the `tof_cluster_*` function family, or any other method.} 16 | 17 | \item{annotations}{A data structure indicating how to annotate each cluster id 18 | in `cluster_col`. `annotations` can be provided as a data.frame with two columns 19 | (the first should have the same name as `cluster_col` and contain each unique 20 | cluster id; the second can have any name and should contain a character vector 21 | indicating which manual annotation should be matched with each cluster 22 | id in the first column). `annotations` can also be provided as a named character vector; 23 | in this case, each entry in `annotations` should be a unique cluster id, and the 24 | names for each entry should be the corresponding manual cluster annotation. See 25 | below for examples.} 26 | } 27 | \value{ 28 | A `tof_tbl` with the same number of rows as `tof_tibble` and one 29 | additional column containing the manual cluster annotations for each cell 30 | (as a character vector). If `annotations` was provided as a data.frame, the 31 | new column will have the same name as the column containing the cluster annotations 32 | in `annotations`. If `annotations` was provided as a named character vector, 33 | the new column will be named `\{cluster_col\}_annotation`. 34 | } 35 | \description{ 36 | This function adds an additional column to a `tibble` or `tof_tbl` to allow 37 | users to incorporate manual cell type labels for clusters identified using 38 | unsupervised algorithms. 39 | } 40 | \examples{ 41 | 42 | sim_data <- 43 | dplyr::tibble( 44 | cd45 = rnorm(n = 1000), 45 | cd38 = c(rnorm(n = 500), rnorm(n = 500, mean = 2)), 46 | cd34 = c(rnorm(n = 500), rnorm(n = 500, mean = 4)), 47 | cd19 = rnorm(n = 1000), 48 | cluster_id = c(rep("a", 500), rep("b", 500)) 49 | ) 50 | 51 | # using named character vector 52 | sim_data |> 53 | tof_annotate_clusters( 54 | cluster_col = cluster_id, 55 | annotations = c("macrophage" = "a", "dendritic cell" = "b") 56 | ) 57 | 58 | # using two-column data.frame 59 | annotation_data_frame <- 60 | data.frame( 61 | cluster_id = c("a", "b"), 62 | cluster_annotation = c("macrophage", "dendritic cell") 63 | ) 64 | 65 | sim_data |> 66 | tof_annotate_clusters( 67 | cluster_col = cluster_id, 68 | annotations = annotation_data_frame 69 | ) 70 | 71 | } 72 | -------------------------------------------------------------------------------- /man/tof_apply_classifier.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/developmental_classifier_helpers.R 3 | \name{tof_apply_classifier} 4 | \alias{tof_apply_classifier} 5 | \title{Perform developmental clustering on CyTOF data using a pre-fit classifier} 6 | \usage{ 7 | tof_apply_classifier( 8 | cancer_tibble = NULL, 9 | classifier_fit = NULL, 10 | distance_function = c("mahalanobis", "cosine", "pearson"), 11 | num_cores = 1, 12 | parallel_vars 13 | ) 14 | } 15 | \arguments{ 16 | \item{cancer_tibble}{A `tibble` or `tof_tibble` containing cells to be classified 17 | into their nearest healthy subpopulation (generally cancer cells).} 18 | 19 | \item{classifier_fit}{A nested `tibble` produced by `tof_build_classifier` in which 20 | each row represents a healthy cell subpopulation into which the cells in `cancer_tibble` 21 | should be classified using minimum distance.} 22 | 23 | \item{distance_function}{A string indicating which distance function should 24 | be used to perform the classification. Options are "mahalanobis" (the default), 25 | "cosine", and "pearson".} 26 | 27 | \item{num_cores}{An integer indicating the number of CPU cores used to parallelize 28 | the classification. Defaults to 1 (a single core).} 29 | 30 | \item{parallel_vars}{Unquoted column names indicating which columns in `cancer_tibble` to 31 | use for breaking up the data in order to parallelize the classification. 32 | Defaults to NULL. Supports tidyselect helpers.} 33 | } 34 | \value{ 35 | A tibble with `nrow(cancer_tibble)` rows and `nrow(classifier_fit) + 1` 36 | columns. Each row represents a cell from `cancer_tibble`, and `nrow(classifier_fit)` 37 | of the columns represent the distance between the cell and each of the healthy 38 | subpopulations' cluster centroids. The final column represents the cluster id of 39 | the healthy subpopulation with the minimum distance to the cell represented 40 | by that row. 41 | } 42 | \description{ 43 | Perform developmental clustering on CyTOF data using a pre-fit classifier 44 | } 45 | \examples{ 46 | NULL 47 | 48 | } 49 | -------------------------------------------------------------------------------- /man/tof_assess_channels.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/quality_control.R 3 | \name{tof_assess_channels} 4 | \alias{tof_assess_channels} 5 | \title{Detect low-expression (i.e. potentially failed) channels in high-dimensional cytometry data} 6 | \usage{ 7 | tof_assess_channels( 8 | tof_tibble, 9 | channel_cols = where(tof_is_numeric), 10 | negative_threshold = asinh(10/5), 11 | negative_proportion_flag = 0.95 12 | ) 13 | } 14 | \arguments{ 15 | \item{tof_tibble}{A `tof_tbl` or `tibble`.} 16 | 17 | \item{channel_cols}{A vector of unquoted column names representing columns that contain 18 | single-cell protein measurements. Supports tidyselect helpers. 19 | If nothing is specified, the default is to analyze all numeric columns.} 20 | 21 | \item{negative_threshold}{A scalar indicating the threshold below 22 | which a measurement should be considered negative. Defaults to the hyperbolic 23 | arcsine transformation of 10 counts.} 24 | 25 | \item{negative_proportion_flag}{A scalar between 0 and 1 indicating the proportion of 26 | cells in tof_tibble that need to be below `negative_threshold` for a given marker 27 | in order for that marker to be flagged. Defaults to 0.95.} 28 | } 29 | \value{ 30 | A tibble 3 columns and a number of rows equal to the number of 31 | columns in `tof_tibble` chosen by `channel_cols`. The three columns are "channel", 32 | a character vector of channel names, "negative_proportion", a numeric vector with values 33 | between 0 and 1 indicating how many cells in `tof_tibble` below `negative_threshold` for 34 | each channel, and `flagged_channel`, a boolean vector indicating whether or not a channel 35 | has been flagged as potentially failed (TRUE means that the channel had a large number of 36 | cells below `negative_threshold`). 37 | } 38 | \description{ 39 | Detect low-expression (i.e. potentially failed) channels in high-dimensional cytometry data 40 | } 41 | \examples{ 42 | # simulate some data 43 | sim_data <- 44 | data.frame( 45 | cd4 = rnorm(n = 100, mean = 5, sd = 0.5), 46 | cd8 = rnorm(n = 100, mean = 0, sd = 0.1), 47 | cd33 = rnorm(n = 100, mean = 10, sd = 0.1) 48 | ) 49 | 50 | tof_assess_channels(tof_tibble = sim_data) 51 | 52 | tof_assess_channels(tof_tibble = sim_data, channel_cols = c(cd4, cd8)) 53 | 54 | tof_assess_channels(tof_tibble = sim_data, negative_threshold = 2) 55 | 56 | } 57 | -------------------------------------------------------------------------------- /man/tof_assess_model_new_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/modeling_helpers.R 3 | \name{tof_assess_model_new_data} 4 | \alias{tof_assess_model_new_data} 5 | \title{Compute a trained elastic net model's performance metrics using new_data.} 6 | \usage{ 7 | tof_assess_model_new_data(tof_model, new_data) 8 | } 9 | \arguments{ 10 | \item{tof_model}{A `tof_model` trained using \code{\link{tof_train_model}}} 11 | 12 | \item{new_data}{A tibble of new observations that should be used to evaluate 13 | the `tof_model`'s performance.} 14 | } 15 | \value{ 16 | A list of performance metrics whose components depend on the model type. 17 | } 18 | \description{ 19 | Compute a trained elastic net model's performance metrics using new_data. 20 | } 21 | -------------------------------------------------------------------------------- /man/tof_assess_model_tuning.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/modeling_helpers.R 3 | \name{tof_assess_model_tuning} 4 | \alias{tof_assess_model_tuning} 5 | \title{Access a trained elastic net model's performance metrics using its tuning data.} 6 | \usage{ 7 | tof_assess_model_tuning(tof_model) 8 | } 9 | \arguments{ 10 | \item{tof_model}{A `tof_model` trained using \code{\link{tof_train_model}}} 11 | } 12 | \value{ 13 | A list of performance metrics whose components depend on the model type. 14 | } 15 | \description{ 16 | Access a trained elastic net model's performance metrics using its tuning data. 17 | } 18 | -------------------------------------------------------------------------------- /man/tof_batch_correct.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/batch_correction.R 3 | \name{tof_batch_correct} 4 | \alias{tof_batch_correct} 5 | \title{Perform groupwise linear rescaling of high-dimensional cytometry measurements} 6 | \usage{ 7 | tof_batch_correct( 8 | tof_tibble, 9 | channel_cols, 10 | group_cols, 11 | augment = TRUE, 12 | method = c("rescale", "quantile") 13 | ) 14 | } 15 | \arguments{ 16 | \item{tof_tibble}{A `tof_tbl` or a `tibble`.} 17 | 18 | \item{channel_cols}{Unquoted column names representing columns that contain 19 | single-cell protein measurements. Supports tidyselect helpers.} 20 | 21 | \item{group_cols}{Optional. Unquoted column names indicating which columns 22 | should be used to group cells before batch correction. Batch correction is then 23 | performed independently within each group. Supports tidyselect helpers.} 24 | 25 | \item{augment}{A boolean value indicating if the output should replace the 26 | `channel_cols` in `tof_tibble` with the new, batch corrected columns (TRUE, the default) 27 | or if it should only return the batch-corrected columns (FALSE) with all other columns 28 | omitted.} 29 | 30 | \item{method}{A string indicating which batch correction method should be used. 31 | Valid options are "rescale" for linear scaling (the default) and "quantile" 32 | for quantile normalization using \code{\link[preprocessCore]{normalize.quantiles}}.} 33 | } 34 | \value{ 35 | If augment = TRUE, a tibble with the same number of rows and columns as 36 | tof_tibble, with the columns specified by `channel_cols` batch-corrected. If 37 | augment = FALSE, a tibble containing only the batch-corrected `channel_cols`. 38 | } 39 | \description{ 40 | This function performs quantile normalization on high-dimensional cytometry 41 | data in tidy format using either linear rescaling or quantile normalization. 42 | Each channel specified by `channel_cols` is batch corrected, and `group_cols` 43 | can be used to break cells 44 | into groups for which the batch correction should be performed separately. 45 | } 46 | \examples{ 47 | NULL 48 | 49 | } 50 | -------------------------------------------------------------------------------- /man/tof_batch_correct_quantile.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/batch_correction.R 3 | \name{tof_batch_correct_quantile} 4 | \alias{tof_batch_correct_quantile} 5 | \title{Batch-correct a tibble of high-dimensional cytometry data using quantile 6 | normalization.} 7 | \usage{ 8 | tof_batch_correct_quantile( 9 | tof_tibble, 10 | channel_cols, 11 | group_cols, 12 | augment = TRUE 13 | ) 14 | } 15 | \arguments{ 16 | \item{tof_tibble}{A `tof_tbl` or a `tibble`.} 17 | 18 | \item{channel_cols}{Unquoted column names representing columns that contain 19 | single-cell protein measurements. Supports tidyselect helpers.} 20 | 21 | \item{group_cols}{Optional. Unquoted column names indicating which columns 22 | should be used to group cells before batch correction. Batch correction is then 23 | performed independently within each group. Supports tidyselect helpers.} 24 | 25 | \item{augment}{A boolean value indicating if the output should replace the 26 | `channel_cols` in `tof_tibble` with the new, batch corrected columns (TRUE, the default) 27 | or if it should only return the batch-corrected columns (FALSE) with all other columns 28 | omitted.} 29 | } 30 | \value{ 31 | If augment = TRUE, a tibble with the same number of rows and columns as 32 | tof_tibble, with the columns specified by `channel_cols` batch-corrected. If 33 | augment = FALSE, a tibble containing only the batch-corrected `channel_cols`. 34 | } 35 | \description{ 36 | This function performs quantile normalization on high-dimensional cytometry 37 | data in tidy format using \code{\link[preprocessCore]{normalize.quantiles}}. 38 | Optionally, groups can be specified and normalized separately. 39 | } 40 | \examples{ 41 | NULL 42 | 43 | } 44 | -------------------------------------------------------------------------------- /man/tof_batch_correct_quantile_tibble.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/batch_correction.R 3 | \name{tof_batch_correct_quantile_tibble} 4 | \alias{tof_batch_correct_quantile_tibble} 5 | \title{Batch-correct a tibble of high-dimensional cytometry data using quantile 6 | normalization.} 7 | \usage{ 8 | tof_batch_correct_quantile_tibble(tof_tibble, channel_cols, augment = TRUE) 9 | } 10 | \arguments{ 11 | \item{tof_tibble}{A `tof_tbl` or a `tibble`.} 12 | 13 | \item{channel_cols}{Unquoted column names representing columns that contain 14 | single-cell protein measurements. Supports tidyselect helpers.} 15 | 16 | \item{augment}{A boolean value indicating if the output should replace the 17 | `channel_cols` in `tof_tibble` with the new, batch corrected columns (TRUE, the default) 18 | or if it should only return the batch-corrected columns (FALSE) with all other columns 19 | omitted.} 20 | } 21 | \value{ 22 | If augment = TRUE, a tibble with the same number of rows and columns as 23 | tof_tibble, with the columns specified by `channel_cols` batch-corrected. If 24 | augment = FALSE, a tibble containing only the batch-corrected `channel_cols`. 25 | } 26 | \description{ 27 | This function performs quantile normalization on high-dimensional cytometry 28 | data in tidy format using \code{\link[preprocessCore]{normalize.quantiles}}. 29 | } 30 | \examples{ 31 | NULL 32 | 33 | } 34 | -------------------------------------------------------------------------------- /man/tof_batch_correct_rescale.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/batch_correction.R 3 | \name{tof_batch_correct_rescale} 4 | \alias{tof_batch_correct_rescale} 5 | \title{Perform groupwise linear rescaling of high-dimensional cytometry measurements} 6 | \usage{ 7 | tof_batch_correct_rescale(tof_tibble, channel_cols, group_cols, augment = TRUE) 8 | } 9 | \arguments{ 10 | \item{tof_tibble}{A `tof_tbl` or a `tibble`.} 11 | 12 | \item{channel_cols}{Unquoted column names representing columns that contain 13 | single-cell protein measurements. Supports tidyselect helpers.} 14 | 15 | \item{group_cols}{Optional. Unquoted column names indicating which columns 16 | should be used to group cells before batch correction. Batch correction is then 17 | performed independently within each group. Supports tidyselect helpers.} 18 | 19 | \item{augment}{A boolean value indicating if the output should replace the 20 | `channel_cols` in `tof_tibble` with the new, batch corrected columns (TRUE, the default) 21 | or if it should only return the batch-corrected columns (FALSE) with all other columns 22 | omitted.} 23 | } 24 | \value{ 25 | If augment = TRUE, a tibble with the same number of rows and columns as 26 | tof_tibble, with the columns specified by `channel_cols` batch-corrected. If 27 | augment = FALSE, a tibble containing only the batch-corrected `channel_cols`. 28 | } 29 | \description{ 30 | This function performs quantile normalization on high-dimensional cytometry 31 | data in tidy format using linear rescaling. Each channel specified by 32 | `channel_cols` is rescaled such that the maximum value is 1 and the minimum 33 | value is 0. `group_cols` specifies the columns that should be used to break cells 34 | into groups in which the rescaling should be performed separately. 35 | } 36 | \examples{ 37 | NULL 38 | 39 | } 40 | -------------------------------------------------------------------------------- /man/tof_build_classifier.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/developmental_classifier_helpers.R 3 | \name{tof_build_classifier} 4 | \alias{tof_build_classifier} 5 | \title{Calculate centroids and covariance matrices for each cell subpopulation in 6 | healthy CyTOF data.} 7 | \usage{ 8 | tof_build_classifier( 9 | healthy_tibble = NULL, 10 | healthy_cell_labels = NULL, 11 | classifier_markers = where(tof_is_numeric), 12 | verbose = FALSE 13 | ) 14 | } 15 | \arguments{ 16 | \item{healthy_tibble}{A `tibble` or `tof_tibble` containing cells from only 17 | healthy control samples (i.e. not disease samples).} 18 | 19 | \item{healthy_cell_labels}{A character or integer vector of length `nrow(healthy_tibble)`. 20 | Each entry in this vector should represent the cell subpopulation label (or cluster id) for 21 | the corresponding row in `healthy_tibble`.} 22 | 23 | \item{classifier_markers}{Unquoted column names indicating which columns in `healthy_tibble` to 24 | use in the developmental classification. Defaults to all numeric columns 25 | in `healthy_tibble`. Supports tidyselect helpers.} 26 | 27 | \item{verbose}{A boolean value indicating if updates should be printed to the 28 | console during classification. Defaults to FALSE.} 29 | } 30 | \value{ 31 | A tibble with three columns: 32 | \strong{population} (id of the healthy cell population), 33 | \strong{centroid} (the centroid vector for that cell population), and 34 | \strong{covariance_matrix} (the covariance matrix for that cell population) 35 | } 36 | \description{ 37 | This function takes a `tibble` or `tof_tibble` storing healthy cell measurements 38 | in each of its rows and a vector (`healthy_cell_labels`) representing the 39 | cell subpopulation to which each cell belongs. It uses these values to calculate 40 | several values required to perform "developmental classification" as described in 41 | \href{https://pubmed.ncbi.nlm.nih.gov/29505032/}{this paper}. 42 | } 43 | -------------------------------------------------------------------------------- /man/tof_calculate_flow_rate.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/quality_control.R 3 | \name{tof_calculate_flow_rate} 4 | \alias{tof_calculate_flow_rate} 5 | \title{Calculate the relative flow rates of different timepoints throughout a flow 6 | or mass cytometry run.} 7 | \usage{ 8 | tof_calculate_flow_rate( 9 | tof_tibble, 10 | time_col, 11 | num_timesteps = nrow(tof_tibble)/1000 12 | ) 13 | } 14 | \arguments{ 15 | \item{tof_tibble}{A `tof_tbl` or `tibble`.} 16 | 17 | \item{time_col}{An unquoted column name indicating which column in `tof_tibble` 18 | contains the time at which each cell was collected.} 19 | 20 | \item{num_timesteps}{The number of bins into which `time_col` should be split. 21 | to define "timesteps" of the data collection process. The number of cells analyzed 22 | by the cytometer will be counted in each bin separately and will represent 23 | the relative average flow rate for that timestep in data collection.} 24 | } 25 | \value{ 26 | A tibble with 3 columns and num_timesteps rows. Each row will represent a single 27 | timestep (and an error will be thrown if `num_timesteps` is larger than the number of rows in 28 | `tof_tibble`). The three columns are as follows: "timestep", a numeric vector indicating which 29 | timestep is represented by a given row; "time_window", a factor showing the interval in `time_col` 30 | over which "timestep" is defined; and "num_cells", the number of cells that were collected during 31 | each timestep. 32 | } 33 | \description{ 34 | Calculate the relative flow rates of different timepoints throughout a flow 35 | or mass cytometry run. 36 | } 37 | \examples{ 38 | 39 | # simulate some data 40 | sim_data <- 41 | data.frame( 42 | cd4 = rnorm(n = 100, mean = 5, sd = 0.5), 43 | cd8 = rnorm(n = 100, mean = 0, sd = 0.1), 44 | cd33 = rnorm(n = 100, mean = 10, sd = 0.1), 45 | time = sample(1:300, size = 100) 46 | ) 47 | 48 | tof_calculate_flow_rate(tof_tibble = sim_data, time_col = time, num_timesteps = 20L) 49 | 50 | } 51 | -------------------------------------------------------------------------------- /man/tof_check_model_args.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/modeling_helpers.R 3 | \name{tof_check_model_args} 4 | \alias{tof_check_model_args} 5 | \title{Check argument specifications for a glmnet model.} 6 | \usage{ 7 | tof_check_model_args( 8 | split_data, 9 | model_type = c("linear", "two-class", "multiclass", "survival"), 10 | best_model_type = c("best", "best with sparsity"), 11 | response_col, 12 | time_col, 13 | event_col 14 | ) 15 | } 16 | \arguments{ 17 | \item{split_data}{An `rsplit` or `rset` object from the \code{\link[rsample]{rsample}} 18 | package containing the sample-level data to use for modeling. Alternatively, 19 | an unsplit tbl_df can be provided, though this is not recommended.} 20 | 21 | \item{model_type}{A string indicating which kind of elastic net model to build. 22 | If a continuous response is being predicted, use "linear" for linear regression; 23 | if a categorical response with only 2 classes is being predicted, use 24 | "two-class" for logistic regression; if a categorical response with more than 2 25 | levels is being predicted, use "multiclass" for multinomial regression; and if 26 | a time-to-event outcome is being predicted, use "survival" for Cox regression.} 27 | 28 | \item{best_model_type}{Currently unused.} 29 | 30 | \item{response_col}{Unquoted column name indicating which column in the data 31 | contained in `split_data` should be used as the outcome in a "two-class", "multiclass", 32 | or "linear" elastic net model. Must be a factor for "two-class" and "multiclass" 33 | models and must be a numeric for "linear" models. Ignored if `model_type` is "survival".} 34 | 35 | \item{time_col}{Unquoted column name indicating which column in the data 36 | contained in `split_data` represents the time-to-event outcome in a "survival" 37 | elastic net model. Must be numeric. Ignored if `model_type` is "two-class", "multiclass", 38 | or "linear".} 39 | 40 | \item{event_col}{Unquoted column name indicating which column in the data 41 | contained in `split_data` represents the time-to-event outcome in a "survival" 42 | elastic net model. Must be a binary column - all values should be either 0 or 1 43 | (with 1 indicating the adverse event) or FALSE and TRUE (with TRUE indicating the 44 | adverse event). Ignored if `model_type` is "two-class", "multiclass", 45 | or "linear".} 46 | } 47 | \value{ 48 | A tibble. If arguments are specified correctly, this tibble can be 49 | used to create a recipe for preprocessing. 50 | } 51 | \description{ 52 | Check argument specifications for a glmnet model. 53 | } 54 | -------------------------------------------------------------------------------- /man/tof_classify_cells.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/developmental_classifier_helpers.R 3 | \name{tof_classify_cells} 4 | \alias{tof_classify_cells} 5 | \title{Classify each cell (i.e. each row) in a matrix of cancer cells into its most 6 | similar healthy developmental subpopulation.} 7 | \usage{ 8 | tof_classify_cells( 9 | classifier_fit, 10 | cancer_data, 11 | distance_function = c("mahalanobis", "cosine", "pearson") 12 | ) 13 | } 14 | \arguments{ 15 | \item{classifier_fit}{A tibble produced by \code{\link{tof_build_classifier}}.} 16 | 17 | \item{cancer_data}{A matrix in which each row corresponds to a cell and each 18 | column corresponds to a measured CyTOF antigen.} 19 | 20 | \item{distance_function}{A string indicating which of three distance functions should 21 | be used to calculate the distances between each row of `cancer_data` and the 22 | healthy developmental subpopulations corresponding to each row of `classifier_fit`.} 23 | } 24 | \value{ 25 | A data.frame in which each column represents the distance between 26 | a cell in the input data and each healthy subpopulation cells are being 27 | classified into. 28 | } 29 | \description{ 30 | This function uses a specified distance metric to classify each cell in a data.frame 31 | or matrix (`cancer_data`) into one of `nrow(classifier_fit)` subpopulations 32 | based on minimum distance, as described in \href{https://pubmed.ncbi.nlm.nih.gov/29505032/}{this paper}. 33 | } 34 | -------------------------------------------------------------------------------- /man/tof_clean_metric_names.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/modeling_helpers.R 3 | \name{tof_clean_metric_names} 4 | \alias{tof_clean_metric_names} 5 | \title{Rename glmnet's default model evaluation metrics to make them more interpretable} 6 | \usage{ 7 | tof_clean_metric_names(metric_tibble, model_type) 8 | } 9 | \arguments{ 10 | \item{metric_tibble}{A tibble in which each column represents a glmnet 11 | model evaluation metric with its default name.} 12 | 13 | \item{model_type}{A string indicating which type of glmnet model was trained.} 14 | } 15 | \value{ 16 | A tibble in which each column represents a glmnet 17 | model evaluation metric with its "cleaned" name. 18 | } 19 | \description{ 20 | Rename glmnet's default model evaluation metrics to make them more interpretable 21 | } 22 | -------------------------------------------------------------------------------- /man/tof_cluster.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/clustering.R 3 | \name{tof_cluster} 4 | \alias{tof_cluster} 5 | \title{Cluster high-dimensional cytometry data.} 6 | \usage{ 7 | tof_cluster( 8 | tof_tibble, 9 | cluster_cols = where(tof_is_numeric), 10 | group_cols = NULL, 11 | ..., 12 | augment = TRUE, 13 | method 14 | ) 15 | } 16 | \arguments{ 17 | \item{tof_tibble}{A `tof_tbl` or `tibble`.} 18 | 19 | \item{cluster_cols}{Unquoted column names indicating which columns in `tof_tibble` to 20 | use in computing the clusters. Defaults to all numeric columns 21 | in `tof_tibble`. Supports tidyselect helpers.} 22 | 23 | \item{group_cols}{Optional. Unquoted column names indicating which columns 24 | should be used to group cells before clustering. Clustering is then performed 25 | on each group independently. Supports tidyselect helpers.} 26 | 27 | \item{...}{Additional arguments to pass to the `tof_cluster_*` 28 | function family member corresponding to the chosen method.} 29 | 30 | \item{augment}{A boolean value indicating if the output should column-bind the 31 | cluster ids of each cell as a new column in `tof_tibble` (TRUE, the default) or if 32 | a single-column tibble including only the cluster ids should be returned (FALSE).} 33 | 34 | \item{method}{A string indicating which clustering methods should be used. Valid 35 | values include "flowsom", "phenograph", "kmeans", "ddpr", and "xshift".} 36 | } 37 | \value{ 38 | A `tof_tbl` or `tibble` If augment = FALSE, it will have a single column encoding 39 | the cluster ids for each cell in `tof_tibble`. If augment = TRUE, it will have 40 | ncol(tof_tibble) + 1 columns: each of the (unaltered) columns in `tof_tibble` 41 | plus an additional column encoding the cluster ids. 42 | } 43 | \description{ 44 | This function is a wrapper around tidytof's tof_cluster_* function family. 45 | It performs clustering on high-dimensional cytometry data using a user-specified method (of 5 choices) 46 | and each method's corresponding input parameters. 47 | } 48 | \examples{ 49 | sim_data <- 50 | dplyr::tibble( 51 | cd45 = rnorm(n = 500), 52 | cd38 = rnorm(n = 500), 53 | cd34 = rnorm(n = 500), 54 | cd19 = rnorm(n = 500) 55 | ) 56 | 57 | tof_cluster(tof_tibble = sim_data, method = "kmeans") 58 | tof_cluster(tof_tibble = sim_data, method = "phenograph") 59 | 60 | } 61 | \seealso{ 62 | Other clustering functions: 63 | \code{\link{tof_cluster_ddpr}()}, 64 | \code{\link{tof_cluster_flowsom}()}, 65 | \code{\link{tof_cluster_kmeans}()}, 66 | \code{\link{tof_cluster_phenograph}()} 67 | } 68 | \concept{clustering functions} 69 | -------------------------------------------------------------------------------- /man/tof_cluster_grouped.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/clustering.R 3 | \name{tof_cluster_grouped} 4 | \alias{tof_cluster_grouped} 5 | \title{Cluster (grouped) high-dimensional cytometry data.} 6 | \usage{ 7 | tof_cluster_grouped(tof_tibble, group_cols, ..., augment = TRUE, method) 8 | } 9 | \arguments{ 10 | \item{tof_tibble}{A `tof_tbl` or `tibble`.} 11 | 12 | \item{group_cols}{An unquoted column name indicating which columns 13 | should be used to group cells before clustering. Clustering is then performed 14 | on each group independently.} 15 | 16 | \item{...}{Additional arguments to pass to the `tof_cluster_*` 17 | function family member corresponding to the chosen method.} 18 | 19 | \item{augment}{A boolean value indicating if the output should column-bind the 20 | cluster ids of each cell as a new column in `tof_tibble` (TRUE, the default) or if 21 | a single-column tibble including only the cluster ids should be returned (FALSE).} 22 | 23 | \item{method}{A string indicating which clustering methods should be used. Valid 24 | values include "flowsom", "phenograph", "kmeans", "ddpr", and "xshift".} 25 | } 26 | \value{ 27 | A `tof_tbl` or `tibble` If augment = FALSE, it will have a single column encoding 28 | the cluster ids for each cell in `tof_tibble`. If augment = TRUE, it will have 29 | ncol(tof_tibble) + 1 columns: each of the (unaltered) columns in `tof_tibble` 30 | plus an additional column encoding the cluster ids. 31 | } 32 | \description{ 33 | This function is a wrapper around tidytof's tof_cluster_* function family and 34 | provides a low-level API for clustering grouped data frames. It is a subroutine 35 | of tof_cluster and shouldn't be called directly by users. 36 | } 37 | -------------------------------------------------------------------------------- /man/tof_cluster_kmeans.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/clustering.R 3 | \name{tof_cluster_kmeans} 4 | \alias{tof_cluster_kmeans} 5 | \title{Perform k-means clustering on high-dimensional cytometry data.} 6 | \usage{ 7 | tof_cluster_kmeans( 8 | tof_tibble, 9 | cluster_cols = where(tof_is_numeric), 10 | num_clusters = 20, 11 | ... 12 | ) 13 | } 14 | \arguments{ 15 | \item{tof_tibble}{A `tof_tibble`.} 16 | 17 | \item{cluster_cols}{Unquoted column names indicating which columns in `tof_tibble` to 18 | use in computing the k-means clusters. Defaults to all numeric columns 19 | in `tof_tibble`. Supports tidyselect helpers.} 20 | 21 | \item{num_clusters}{An integer indicating the maximum number of clusters 22 | that should be returned. Defaults to 20.} 23 | 24 | \item{...}{Optional additional arguments that can be passed to 25 | \code{\link[stats]{kmeans}}.} 26 | } 27 | \value{ 28 | A tibble with one column named `.kmeans_cluster`. This column will contain an 29 | integer vector of length `nrow(tof_tibble)` indicating the id of 30 | the k-means cluster to which each cell (i.e. each row) in `tof_tibble` was assigned. 31 | } 32 | \description{ 33 | This function performs k-means clustering on high-dimensional cytometry data using a user-specified 34 | selection of input variables/high-dimensional cytometry measurements. It is mostly a convenient 35 | wrapper around \code{\link[stats]{kmeans}}. 36 | } 37 | \examples{ 38 | sim_data <- 39 | dplyr::tibble( 40 | cd45 = rnorm(n = 1000), 41 | cd38 = rnorm(n = 1000), 42 | cd34 = rnorm(n = 1000), 43 | cd19 = rnorm(n = 1000) 44 | ) 45 | tof_cluster_kmeans(tof_tibble = sim_data) 46 | tof_cluster_kmeans(tof_tibble = sim_data, cluster_cols = c(cd45, cd19)) 47 | 48 | } 49 | \seealso{ 50 | Other clustering functions: 51 | \code{\link{tof_cluster}()}, 52 | \code{\link{tof_cluster_ddpr}()}, 53 | \code{\link{tof_cluster_flowsom}()}, 54 | \code{\link{tof_cluster_phenograph}()} 55 | } 56 | \concept{clustering functions} 57 | -------------------------------------------------------------------------------- /man/tof_cluster_phenograph.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/clustering.R 3 | \name{tof_cluster_phenograph} 4 | \alias{tof_cluster_phenograph} 5 | \title{Perform PhenoGraph clustering on high-dimensional cytometry data.} 6 | \usage{ 7 | tof_cluster_phenograph( 8 | tof_tibble, 9 | cluster_cols = where(tof_is_numeric), 10 | num_neighbors = 30, 11 | distance_function = c("euclidean", "cosine"), 12 | ... 13 | ) 14 | } 15 | \arguments{ 16 | \item{tof_tibble}{A `tof_tbl` or `tibble`.} 17 | 18 | \item{cluster_cols}{Unquoted column names indicating which columns in `tof_tibble` to 19 | use in computing the PhenoGraph clusters. Defaults to all numeric columns 20 | in `tof_tibble`. Supports tidyselect helpers.} 21 | 22 | \item{num_neighbors}{An integer indicating the number of neighbors to use when 23 | constructing PhenoGraph's k-nearest-neighbor graph. Smaller values emphasize 24 | local graph structure; larger values emphasize global graph structure (and 25 | will add time to the computation). Defaults to 30.} 26 | 27 | \item{distance_function}{A string indicating which distance function to use 28 | for the nearest-neighbor calculation. Options include "euclidean" 29 | (the default) and "cosine" distances.} 30 | 31 | \item{...}{Optional additional parameters that can be passed to 32 | \code{\link{tof_find_knn}}.} 33 | } 34 | \value{ 35 | A tibble with one column named `.phenograph_cluster`. This column will contain an 36 | integer vector of length `nrow(tof_tibble)` indicating the id of 37 | the PhenoGraph cluster to which each cell (i.e. each row) in `tof_tibble` was assigned. 38 | } 39 | \description{ 40 | This function performs PhenoGraph clustering on high-dimensional cytometry data using a user-specified 41 | selection of input variables/high-dimensional cytometry measurements. 42 | } 43 | \details{ 44 | For additional details about the Phenograph algorithm, 45 | see \href{https://pubmed.ncbi.nlm.nih.gov/26095251/}{this paper}. 46 | } 47 | \examples{ 48 | sim_data <- 49 | dplyr::tibble( 50 | cd45 = rnorm(n = 1000), 51 | cd38 = rnorm(n = 1000), 52 | cd34 = rnorm(n = 1000), 53 | cd19 = rnorm(n = 1000) 54 | ) 55 | tof_cluster_phenograph(tof_tibble = sim_data) 56 | tof_cluster_phenograph(tof_tibble = sim_data, cluster_cols = c(cd45, cd19)) 57 | 58 | } 59 | \seealso{ 60 | Other clustering functions: 61 | \code{\link{tof_cluster}()}, 62 | \code{\link{tof_cluster_ddpr}()}, 63 | \code{\link{tof_cluster_flowsom}()}, 64 | \code{\link{tof_cluster_kmeans}()} 65 | } 66 | \concept{clustering functions} 67 | -------------------------------------------------------------------------------- /man/tof_cluster_tibble.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/clustering.R 3 | \name{tof_cluster_tibble} 4 | \alias{tof_cluster_tibble} 5 | \title{Cluster (ungrouped) high-dimensional cytometry data.} 6 | \usage{ 7 | tof_cluster_tibble(tof_tibble, ..., augment = TRUE, method) 8 | } 9 | \arguments{ 10 | \item{tof_tibble}{A `tof_tbl` or `tibble`.} 11 | 12 | \item{...}{Additional arguments to pass to the `tof_cluster_*` 13 | function family member corresponding to the chosen method.} 14 | 15 | \item{augment}{A boolean value indicating if the output should column-bind the 16 | cluster ids of each cell as a new column in `tof_tibble` (TRUE, the default) or if 17 | a single-column tibble including only the cluster ids should be returned (FALSE).} 18 | 19 | \item{method}{A string indicating which clustering methods should be used. Valid 20 | values include "flowsom", "phenograph", "kmeans", "ddpr", and "xshift".} 21 | } 22 | \value{ 23 | A `tof_tbl` or `tibble` If augment = FALSE, it will have a single column encoding 24 | the cluster ids for each cell in `tof_tibble`. If augment = TRUE, it will have 25 | ncol(tof_tibble) + 1 columns: each of the (unaltered) columns in `tof_tibble` 26 | plus an additional column encoding the cluster ids. 27 | } 28 | \description{ 29 | This function is a wrapper around tidytof's tof_cluster_* function family and 30 | provides a low-level API for clustering ungrouped data frames. It is a subroutine 31 | of tof_cluster and shouldn't be called directly by users. 32 | } 33 | -------------------------------------------------------------------------------- /man/tof_compute_km_curve.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{tof_compute_km_curve} 4 | \alias{tof_compute_km_curve} 5 | \title{Compute a Kaplan-Meier curve from sample-level survival data} 6 | \usage{ 7 | tof_compute_km_curve(survival_curves) 8 | } 9 | \arguments{ 10 | \item{survival_curves}{A tibble from which the Kaplan-Meier curve will be 11 | computed. Each row must represent an observation and must have two 12 | columns named "time_to_event" and "event".} 13 | } 14 | \value{ 15 | A tibble with 3 columns: time_to_event, survival_probability, and 16 | is_censored (whether or not an event was censored at that timepoint). 17 | } 18 | \description{ 19 | Compute a Kaplan-Meier curve from sample-level survival data 20 | } 21 | -------------------------------------------------------------------------------- /man/tof_cosine_dist.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/developmental_classifier_helpers.R 3 | \name{tof_cosine_dist} 4 | \alias{tof_cosine_dist} 5 | \title{A function for finding the cosine distance between each of the rows of a numeric 6 | matrix and a numeric vector.} 7 | \usage{ 8 | tof_cosine_dist(matrix, vector) 9 | } 10 | \arguments{ 11 | \item{matrix}{A numeric matrix.} 12 | 13 | \item{vector}{A numeric vector.} 14 | } 15 | \value{ 16 | A numeric vector of distances of length `nrow(matrix)` in which the 17 | ith entry represents the cosine distance between the ith row of `matrix` and 18 | `vector`. 19 | } 20 | \description{ 21 | A function for finding the cosine distance between each of the rows of a numeric 22 | matrix and a numeric vector. 23 | } 24 | \examples{ 25 | NULL 26 | } 27 | -------------------------------------------------------------------------------- /man/tof_create_grid.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/patient-level_modeling.R 3 | \name{tof_create_grid} 4 | \alias{tof_create_grid} 5 | \title{Create an elastic net hyperparameter search grid of a specified size} 6 | \usage{ 7 | tof_create_grid( 8 | penalty_values, 9 | mixture_values, 10 | num_penalty_values = 5, 11 | num_mixture_values = 5 12 | ) 13 | } 14 | \arguments{ 15 | \item{penalty_values}{A numeric vector of the unique elastic net penalty values ("lambda") 16 | to include in the 17 | hyperparameter grid. If unspecified, a regular grid with `num_penalty_values` between 18 | 10^(-10) and 10^(0) will be used.} 19 | 20 | \item{mixture_values}{A numeric vector of all elastic net mixture values ("alpha") to include in the 21 | hyperparameter grid. If unspecified, a regular grid with `num_mixture_values` between 22 | 0 and 1 will be used.} 23 | 24 | \item{num_penalty_values}{Optional. If `penalty_values` is not supplied, `num_penalty_values` 25 | (an integer) can be given to specify how many equally-spaced penalty values between 26 | 10^(-10) and 1 should be included in the hyperparameter grid. If this method is used, 27 | the regular grid will always be returned. Defaults to 5.} 28 | 29 | \item{num_mixture_values}{Optional. If `mixture_values` is not supplied, `num_mixture_values` 30 | (an integer) can be given to specify how many equally-spaced penalty values between 31 | 0 (ridge regression) and 1 (lasso) should be included in the hyperparameter grid. If this method is used, 32 | the regular grid will always be returned. Defaults to 5.} 33 | } 34 | \value{ 35 | A tibble with two numeric columns: `penalty` and `mixture`. 36 | } 37 | \description{ 38 | This function creates a regular hyperparameter search grid (in the form of a 39 | \code{\link[dplyr]{tibble}}) specifying the search space for the two 40 | hyperparameters of a generalized linear model using the glmnet package: 41 | the regularization penalty term 42 | and the lasso/ridge regression mixture term. 43 | } 44 | \examples{ 45 | tof_create_grid() 46 | 47 | tof_create_grid(num_penalty_values = 10, num_mixture_values = 5) 48 | 49 | tof_create_grid(penalty_values = c(0.01, 0.1, 0.5)) 50 | 51 | } 52 | \seealso{ 53 | Other modeling functions: 54 | \code{\link{tof_assess_model}()}, 55 | \code{\link{tof_predict}()}, 56 | \code{\link{tof_split_data}()}, 57 | \code{\link{tof_train_model}()} 58 | } 59 | \concept{modeling functions} 60 | -------------------------------------------------------------------------------- /man/tof_create_recipe.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/modeling_helpers.R 3 | \name{tof_create_recipe} 4 | \alias{tof_create_recipe} 5 | \title{Create a recipe for preprocessing sample-level cytometry data for an elastic net model} 6 | \usage{ 7 | tof_create_recipe( 8 | feature_tibble, 9 | predictor_cols, 10 | outcome_cols, 11 | standardize_predictors = TRUE, 12 | remove_zv_predictors = FALSE, 13 | impute_missing_predictors = FALSE 14 | ) 15 | } 16 | \arguments{ 17 | \item{feature_tibble}{A tibble in which each row represents a sample- or patient- 18 | level observation, such as those produced by \code{tof_extract_features}.} 19 | 20 | \item{predictor_cols}{Unquoted column names indicating which columns in the 21 | data contained in `feature_tibble` should be used as predictors in the elastic net model. 22 | Supports tidyselect helpers.} 23 | 24 | \item{outcome_cols}{Unquoted column names indicating which columns in 25 | `feature_tibble` should be used as outcome variables in the elastic net model. 26 | Supports tidyselect helpers.} 27 | 28 | \item{standardize_predictors}{A logical value indicating if numeric predictor columns 29 | should be standardized (centered and scaled) before model fitting. Defaults to TRUE.} 30 | 31 | \item{remove_zv_predictors}{A logical value indicating if predictor columns 32 | with near-zero variance should be removed before model fitting using 33 | \code{\link[recipes]{step_nzv}}. Defaults to FALSE.} 34 | 35 | \item{impute_missing_predictors}{A logical value indicating if predictor columns 36 | should have missing values imputed using k-nearest neighbors before model fitting (see 37 | \code{\link[recipes]{step_impute_knn}}). Imputation is performed using an observation's 38 | 5 nearest-neighbors. Defaults to FALSE.} 39 | } 40 | \value{ 41 | A \code{\link[recipes]{recipe}} object. 42 | } 43 | \description{ 44 | Create a recipe for preprocessing sample-level cytometry data for an elastic net model 45 | } 46 | -------------------------------------------------------------------------------- /man/tof_downsample.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/downsampling.R 3 | \name{tof_downsample} 4 | \alias{tof_downsample} 5 | \title{Downsample high-dimensional cytometry data.} 6 | \usage{ 7 | tof_downsample( 8 | tof_tibble, 9 | group_cols = NULL, 10 | ..., 11 | method = c("constant", "prop", "density") 12 | ) 13 | } 14 | \arguments{ 15 | \item{tof_tibble}{A `tof_tbl` or a `tibble`.} 16 | 17 | \item{group_cols}{Unquoted names of the columns in `tof_tibble` that should 18 | be used to define groups within which the downsampling will be performed. 19 | Supports tidyselect helpers. Defaults to `NULL` (no grouping).} 20 | 21 | \item{...}{Additional arguments to pass to the `tof_downsample_*` function 22 | family member corresponding to the chosen method.} 23 | 24 | \item{method}{A string indicating which downsampling method to use: "constant" 25 | (the default), "prop", or "density".} 26 | } 27 | \value{ 28 | A downsampled `tof_tbl` with the same number of columns as the input 29 | `tof_tibble`, but fewer rows. The number of rows in the result will depend 30 | on the chosen downsampling method. 31 | } 32 | \description{ 33 | This function downsamples the number of cells in a `tof_tbl` using the 34 | one of three methods (randomly sampling a constant number of cells, 35 | randomly sampling a proportion of cells, or performing density-dependent 36 | downsampling per the algorithm in 37 | \href{https://pubmed.ncbi.nlm.nih.gov/21964415/}{Qiu et al., (2011)}). 38 | } 39 | \examples{ 40 | sim_data <- 41 | dplyr::tibble( 42 | cd45 = rnorm(n = 1000), 43 | cd38 = rnorm(n = 1000), 44 | cd34 = rnorm(n = 1000), 45 | cd19 = rnorm(n = 1000), 46 | cluster_id = sample(letters, size = 1000, replace = TRUE) 47 | ) 48 | 49 | # sample 200 cells from the input data 50 | tof_downsample( 51 | tof_tibble = sim_data, 52 | num_cells = 200L, 53 | method = "constant" 54 | ) 55 | 56 | # sample 10\% of all cells from the input data 57 | tof_downsample( 58 | tof_tibble = sim_data, 59 | prop_cells = 0.1, 60 | method = "prop" 61 | ) 62 | 63 | # sample ~10\% of cells from the input data using density dependence 64 | tof_downsample( 65 | tof_tibble = sim_data, 66 | target_prop_cells = 0.1, 67 | method = "density" 68 | ) 69 | 70 | } 71 | \seealso{ 72 | Other downsampling functions: 73 | \code{\link{tof_downsample_constant}()}, 74 | \code{\link{tof_downsample_density}()}, 75 | \code{\link{tof_downsample_prop}()} 76 | } 77 | \concept{downsampling functions} 78 | -------------------------------------------------------------------------------- /man/tof_downsample_constant.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/downsampling.R 3 | \name{tof_downsample_constant} 4 | \alias{tof_downsample_constant} 5 | \title{Downsample high-dimensional cytometry data by randomly selecting a constant number of cells per group.} 6 | \usage{ 7 | tof_downsample_constant(tof_tibble, group_cols = NULL, num_cells) 8 | } 9 | \arguments{ 10 | \item{tof_tibble}{A `tof_tbl` or a `tibble`.} 11 | 12 | \item{group_cols}{Unquoted names of the columns in `tof_tibble` that should 13 | be used to define groups from which `num_cells` will be downsampled. 14 | Supports tidyselect helpers. Defaults to `NULL` (no grouping).} 15 | 16 | \item{num_cells}{An integer number of cells that should be sampled from each 17 | group defined by `group_cols`.} 18 | } 19 | \value{ 20 | A `tof_tbl` with the same number of columns as the input `tof_tibble`, 21 | but fewer rows. Specifically, the number of rows will be `num_cells` multiplied 22 | by the number of unique combinations of the values in `group_cols`. If any group 23 | has fewer than `num_cells` number of cells, all cells from that group will be 24 | kept. 25 | } 26 | \description{ 27 | This function downsamples the number of cells in a `tof_tbl` by randomly selecting 28 | `num_cells` cells from each unique combination of values in `group_cols`. 29 | } 30 | \examples{ 31 | sim_data <- 32 | dplyr::tibble( 33 | cd45 = rnorm(n = 1000), 34 | cd38 = rnorm(n = 1000), 35 | cd34 = rnorm(n = 1000), 36 | cd19 = rnorm(n = 1000), 37 | cluster_id = sample(letters, size = 1000, replace = TRUE) 38 | ) 39 | 40 | # sample 500 cells from the input data 41 | tof_downsample_constant( 42 | tof_tibble = sim_data, 43 | num_cells = 500L 44 | ) 45 | 46 | # sample 20 cells per cluster from the input data 47 | tof_downsample_constant( 48 | tof_tibble = sim_data, 49 | group_cols = cluster_id, 50 | num_cells = 20L 51 | ) 52 | 53 | } 54 | \seealso{ 55 | Other downsampling functions: 56 | \code{\link{tof_downsample}()}, 57 | \code{\link{tof_downsample_density}()}, 58 | \code{\link{tof_downsample_prop}()} 59 | } 60 | \concept{downsampling functions} 61 | -------------------------------------------------------------------------------- /man/tof_downsample_prop.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/downsampling.R 3 | \name{tof_downsample_prop} 4 | \alias{tof_downsample_prop} 5 | \title{Downsample high-dimensional cytometry data by randomly selecting a proportion of the cells in each group.} 6 | \usage{ 7 | tof_downsample_prop(tof_tibble, group_cols = NULL, prop_cells) 8 | } 9 | \arguments{ 10 | \item{tof_tibble}{A `tof_tbl` or a `tibble`.} 11 | 12 | \item{group_cols}{Unquoted names of the columns in `tof_tibble` that should 13 | be used to define groups from which `prop_cells` will be downsampled. 14 | Supports tidyselect helpers. Defaults to `NULL` (no grouping).} 15 | 16 | \item{prop_cells}{A proportion of cells (between 0 and 1) that should be sampled 17 | from each group defined by `group_cols`.} 18 | } 19 | \value{ 20 | A `tof_tbl` with the same number of columns as the input `tof_tibble`, 21 | but fewer rows. Specifically, the number of rows should be `prop_cells` times the 22 | number of rows in the input `tof_tibble`. 23 | } 24 | \description{ 25 | This function downsamples the number of cells in a `tof_tbl` by randomly selecting 26 | a `prop_cells` proportion of the total number of cells with each unique combination 27 | of values in `group_cols`. 28 | } 29 | \examples{ 30 | sim_data <- 31 | dplyr::tibble( 32 | cd45 = rnorm(n = 1000), 33 | cd38 = rnorm(n = 1000), 34 | cd34 = rnorm(n = 1000), 35 | cd19 = rnorm(n = 1000), 36 | cluster_id = sample(letters, size = 1000, replace = TRUE) 37 | ) 38 | 39 | # sample 10\% of all cells from the input data 40 | tof_downsample_prop( 41 | tof_tibble = sim_data, 42 | prop_cells = 0.1 43 | ) 44 | 45 | # sample 10\% of all cells from each cluster in the input data 46 | tof_downsample_prop( 47 | tof_tibble = sim_data, 48 | group_cols = cluster_id, 49 | prop_cells = 0.1 50 | ) 51 | 52 | } 53 | \seealso{ 54 | Other downsampling functions: 55 | \code{\link{tof_downsample}()}, 56 | \code{\link{tof_downsample_constant}()}, 57 | \code{\link{tof_downsample_density}()} 58 | } 59 | \concept{downsampling functions} 60 | -------------------------------------------------------------------------------- /man/tof_estimate_density.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{tof_estimate_density} 4 | \alias{tof_estimate_density} 5 | \title{Estimate the local densities for all cells in a high-dimensional cytometry dataset.} 6 | \usage{ 7 | tof_estimate_density( 8 | tof_tibble, 9 | distance_cols = where(tof_is_numeric), 10 | distance_function = c("euclidean", "cosine", "l2", "ip"), 11 | normalize = TRUE, 12 | ..., 13 | augment = TRUE, 14 | method = c("mean_distance", "sum_distance", "spade") 15 | ) 16 | } 17 | \arguments{ 18 | \item{tof_tibble}{A `tof_tbl` or a `tibble`.} 19 | 20 | \item{distance_cols}{Unquoted names of the columns in `tof_tibble` to use in 21 | calculating cell-to-cell distances during the local density estimation for 22 | each cell. Defaults to all numeric columns in `tof_tibble`.} 23 | 24 | \item{distance_function}{A string indicating which distance function to use 25 | for calculating cell-to-cell distances during local density estimation. Options 26 | include "euclidean" (the default) and "cosine".} 27 | 28 | \item{normalize}{A boolean value indicating if the vector of local density 29 | estimates should be normalized to values between 0 and 1. Defaults to TRUE.} 30 | 31 | \item{...}{Additional arguments to pass to the `tof_*_density()` function family 32 | member corresponding to the chosen `method`.} 33 | 34 | \item{augment}{A boolean value indicating if the output should column-bind the 35 | local density estimates of each cell as a new column in `tof_tibble` (TRUE; the default) or if 36 | a single-column tibble including only the local density estimates should be returned (FALSE).} 37 | 38 | \item{method}{A string indicating which local density estimation method should be used. 39 | Valid values include "mean_distance", "sum_distance", and "spade".} 40 | } 41 | \value{ 42 | A `tof_tbl` or `tibble` If augment = FALSE, it will have a single column encoding 43 | the local density estimates for each cell in `tof_tibble`. If augment = TRUE, it will have 44 | ncol(tof_tibble) + 1 columns: each of the (unaltered) columns in `tof_tibble` 45 | plus an additional column encoding the local density estimates. 46 | } 47 | \description{ 48 | This function is a wrapper around tidytof's tof_*_density() function family. 49 | It performs local density estimation on high-dimensional cytometry data using a user-specified 50 | method (of 3 choices) and each method's corresponding input parameters. 51 | } 52 | \examples{ 53 | sim_data <- 54 | dplyr::tibble( 55 | cd45 = rnorm(n = 1000), 56 | cd38 = rnorm(n = 1000), 57 | cd34 = rnorm(n = 1000), 58 | cd19 = rnorm(n = 1000) 59 | ) 60 | 61 | # perform the density estimation 62 | tof_estimate_density(tof_tibble = sim_data, method = "spade") 63 | 64 | # perform the density estimation with a smaller search radius around 65 | # each cell 66 | tof_estimate_density( 67 | tof_tibble = sim_data, 68 | alpha_multiplier = 2, 69 | method = "spade" 70 | ) 71 | 72 | } 73 | \seealso{ 74 | Other local density estimation functions: 75 | \code{\link{tof_knn_density}()}, 76 | \code{\link{tof_spade_density}()} 77 | } 78 | \concept{local density estimation functions} 79 | -------------------------------------------------------------------------------- /man/tof_find_best.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/modeling_helpers.R 3 | \name{tof_find_best} 4 | \alias{tof_find_best} 5 | \title{Find the optimal hyperparameters for an elastic net model from candidate performance metrics} 6 | \usage{ 7 | tof_find_best(performance_metrics, model_type, optimization_metric) 8 | } 9 | \arguments{ 10 | \item{performance_metrics}{A tibble of performance metrics for an elastic 11 | net model (in wide format)} 12 | 13 | \item{model_type}{A string indicating which type of glmnet model was trained.} 14 | 15 | \item{optimization_metric}{A string indicating which performance metric should 16 | be used to select the optimal model.} 17 | } 18 | \value{ 19 | A tibble with 3 columns: "mixture", "penalty", and a column containing 20 | the chosen optimization metric. If the returned tibble has more than 1 column, 21 | it means that more than 1 mixture/penalty combination yielded the optimal 22 | result (i.e. the tuning procedure resulted in a tie). 23 | } 24 | \description{ 25 | Find the optimal hyperparameters for an elastic net model from candidate performance metrics 26 | } 27 | -------------------------------------------------------------------------------- /man/tof_find_cv_predictions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/modeling_helpers.R 3 | \name{tof_find_cv_predictions} 4 | \alias{tof_find_cv_predictions} 5 | \title{Calculate and store the predicted outcomes for each validation set observation during model tuning} 6 | \usage{ 7 | tof_find_cv_predictions( 8 | split_data, 9 | prepped_recipe, 10 | lambda, 11 | alpha, 12 | model_type, 13 | outcome_colnames 14 | ) 15 | } 16 | \arguments{ 17 | \item{split_data}{An `rsplit` object from the \code{\link[rsample]{rsample}} 18 | package. 19 | Alternatively, an unsplit tbl_df can be provided, though this is not recommended.} 20 | 21 | \item{prepped_recipe}{A trained \code{\link[recipes]{recipe}}} 22 | 23 | \item{lambda}{A single numeric value indicating which penalty (lambda) value 24 | should be used to make the predictions} 25 | 26 | \item{alpha}{A single numeric value indicating which mixture (alpha) value 27 | should be used to make the predictions} 28 | 29 | \item{model_type}{A string indicating which kind of elastic net model to build. 30 | If a continuous response is being predicted, use "linear" for linear regression; 31 | if a categorical response with only 2 classes is being predicted, use 32 | "two-class" for logistic regression; if a categorical response with more than 2 33 | levels is being predicted, use "multiclass" for multinomial regression; and if 34 | a time-to-event outcome is being predicted, use "survival" for Cox regression.} 35 | 36 | \item{outcome_colnames}{Quoted column names indicating which columns in the data 37 | being fit represent the outcome variables (with all others assumed to be predictors).} 38 | } 39 | \value{ 40 | A tibble containing the predicted and true values for the outcome 41 | for each of the validation observations in `split_data`. 42 | } 43 | \description{ 44 | Calculate and store the predicted outcomes for each validation set observation during model tuning 45 | } 46 | -------------------------------------------------------------------------------- /man/tof_find_emd.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{tof_find_emd} 4 | \alias{tof_find_emd} 5 | \title{Find the earth-mover's distance between two numeric vectors} 6 | \usage{ 7 | tof_find_emd(vec_1, vec_2, num_bins = 100) 8 | } 9 | \arguments{ 10 | \item{vec_1}{A numeric vector.} 11 | 12 | \item{vec_2}{A numeric vector.} 13 | 14 | \item{num_bins}{An integer number of bins to use when performing kernel 15 | density estimation on the two vectors. Defaults to 100.} 16 | } 17 | \value{ 18 | A double (of length 1) representing the EMD between the two vectors. 19 | } 20 | \description{ 21 | Find the earth-mover's distance between two numeric vectors 22 | } 23 | -------------------------------------------------------------------------------- /man/tof_find_jsd.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{tof_find_jsd} 4 | \alias{tof_find_jsd} 5 | \title{Find the Jensen-Shannon Divergence (JSD) between two numeric vectors} 6 | \usage{ 7 | tof_find_jsd(vec_1, vec_2, num_bins = 100) 8 | } 9 | \arguments{ 10 | \item{vec_1}{A numeric vector.} 11 | 12 | \item{vec_2}{A numeric vector.} 13 | 14 | \item{num_bins}{An integer number of bins to use when binning 15 | across the two vectors' combined range. Defaults to 100.} 16 | } 17 | \value{ 18 | A double (of length 1) representing the JSD between the two vectors. 19 | } 20 | \description{ 21 | Find the Jensen-Shannon Divergence (JSD) between two numeric vectors 22 | } 23 | -------------------------------------------------------------------------------- /man/tof_find_knn.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{tof_find_knn} 4 | \alias{tof_find_knn} 5 | \title{Find the k-nearest neighbors of each cell in a high-dimensional cytometry dataset.} 6 | \usage{ 7 | tof_find_knn( 8 | .data, 9 | k = min(10, nrow(.data)), 10 | distance_function = c("euclidean", "cosine", "l2", "ip"), 11 | .query, 12 | ... 13 | ) 14 | } 15 | \arguments{ 16 | \item{.data}{A `tof_tibble` or `tibble` in which each row represents a cell 17 | and each column represents a high-dimensional cytometry measurement.} 18 | 19 | \item{k}{An integer indicating the number of nearest neighbors to return for 20 | each cell.} 21 | 22 | \item{distance_function}{A string indicating which distance function to use 23 | for the nearest-neighbor calculation. Options include "euclidean" 24 | (the default) and "cosine" distances.} 25 | 26 | \item{.query}{A set of cells to be queried against .data (i.e. a set of cells 27 | for which to find nearest neighbors within .data). Defaults to .data itself, 28 | i.e. finding nearest neighbors for all cells in .data.} 29 | 30 | \item{...}{Optional additional arguments to pass to \code{\link[RcppHNSW]{hnsw_knn}}} 31 | } 32 | \value{ 33 | A list with two elements: "neighbor_ids" and "neighbor_distances," 34 | both of which are n by k matrices (in which n is the number of cells in the 35 | input `.data`. The [i,j]-th entry of "neighbor_ids" represents the row index 36 | for the j-th nearest neighbor of the cell in the i-th row of `.data`. 37 | The [i,j]-th entry of "neighbor_distances" represents the distance between 38 | those two cells according to `distance_function`. 39 | } 40 | \description{ 41 | Find the k-nearest neighbors of each cell in a high-dimensional cytometry dataset. 42 | } 43 | \examples{ 44 | sim_data <- 45 | dplyr::tibble( 46 | cd45 = rnorm(n = 1000), 47 | cd38 = rnorm(n = 1000), 48 | cd34 = rnorm(n = 1000), 49 | cd19 = rnorm(n = 1000) 50 | ) 51 | 52 | # Find the 10 nearest neighbors of each cell in the dataset 53 | tof_find_knn( 54 | .data = sim_data, 55 | k = 10, 56 | distance_function = "euclidean" 57 | ) 58 | 59 | # Find the 10 approximate nearest neighbors 60 | tof_find_knn( 61 | .data = sim_data, 62 | k = 10, 63 | distance_function = "euclidean", 64 | ) 65 | 66 | } 67 | -------------------------------------------------------------------------------- /man/tof_find_log_rank_threshold.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/modeling_helpers.R 3 | \name{tof_find_log_rank_threshold} 4 | \alias{tof_find_log_rank_threshold} 5 | \title{Compute the log-rank test p-value for the difference between the two survival 6 | curves obtained by splitting a dataset into a "low" and "high" risk group 7 | using all possible relative-risk thresholds.} 8 | \usage{ 9 | tof_find_log_rank_threshold(input_data, relative_risk_col, time_col, event_col) 10 | } 11 | \arguments{ 12 | \item{input_data}{A tbl_df or data.frame in which each observation is a row.} 13 | 14 | \item{relative_risk_col}{An unquote column name indicating which column contains 15 | the relative-risk estimates for each observation.} 16 | 17 | \item{time_col}{An unquoted column name indicating which column contains the 18 | true time-to-event information for each observation.} 19 | 20 | \item{event_col}{An unquoted column name indicating which column contains the 21 | outcome (event or censorship). Must be a binary column - all values should be 22 | either 0 or 1 (with 1 indicating the adverse event and 0 indicating 23 | censorship) or FALSE and TRUE (with TRUE indicating the 24 | adverse event and FALSE indicating censorship).} 25 | } 26 | \value{ 27 | A tibble with 3 columns: "candidate_thresholds" (the relative-risk threshold 28 | used for the log-rank test), "log_rank_p_val" (the p-values of the log-rank 29 | tests) and "is_best" (a logical value indicating which candidate threshold gave 30 | the optimal, i.e. smallest, p-value). 31 | } 32 | \description{ 33 | Compute the log-rank test p-value for the difference between the two survival 34 | curves obtained by splitting a dataset into a "low" and "high" risk group 35 | using all possible relative-risk thresholds. 36 | } 37 | -------------------------------------------------------------------------------- /man/tof_find_panel_info.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/io.R 3 | \name{tof_find_panel_info} 4 | \alias{tof_find_panel_info} 5 | \title{Use tidytof's opinionated heuristic for extracted a high-dimensional cytometry panel's metal-antigen pairs 6 | from a flowFrame (read from a .fcs file.)} 7 | \usage{ 8 | tof_find_panel_info(input_flowFrame) 9 | } 10 | \arguments{ 11 | \item{input_flowFrame}{a raw flowFrame (just read from an .fcs file) from which 12 | a high-dimensional cytometry panel should be extracted} 13 | } 14 | \value{ 15 | A tibble with 2 columns (`metals` and `antigens`) that correspond to the 16 | metals and antigens of the high-dimensional cytometry panel used during data acquisition. 17 | } 18 | \description{ 19 | Using the character vectors obtained from the `name` and `desc` columns of 20 | the parameters of the data of a flowFrame, figure out the high-dimensional cytometry panel used 21 | to collect the data and return it as a tidy tibble. 22 | } 23 | -------------------------------------------------------------------------------- /man/tof_fit_split.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/modeling_helpers.R 3 | \name{tof_fit_split} 4 | \alias{tof_fit_split} 5 | \title{Fit a glmnet model and calculate performance metrics using a single rsplit object} 6 | \usage{ 7 | tof_fit_split( 8 | split_data, 9 | prepped_recipe, 10 | hyperparameter_grid, 11 | model_type, 12 | outcome_colnames 13 | ) 14 | } 15 | \arguments{ 16 | \item{split_data}{An `rsplit` object from the \code{\link[rsample]{rsample}} 17 | package. 18 | Alternatively, an unsplit tbl_df can be provided, though this is not recommended.} 19 | 20 | \item{prepped_recipe}{A trained \code{\link[recipes]{recipe}}} 21 | 22 | \item{hyperparameter_grid}{A tibble containing the hyperparameter values to tune. 23 | Can be created using \code{\link{tof_create_grid}}} 24 | 25 | \item{model_type}{A string representing the type of glmnet model being fit.} 26 | 27 | \item{outcome_colnames}{Quoted column names indicating which columns in the data 28 | being fit represent the outcome variables (with all others assumed to be predictors).} 29 | } 30 | \value{ 31 | A tibble with the same number of rows as the input hyperparameter grid. 32 | Each row represents a combination of mixture and penalty, and each column contains 33 | a performance metric for the fitted glmnet model on `split_data`'s holdout set. 34 | The specific performance metrics depend on the type of model being fit: 35 | \describe{ 36 | \item{"linear"}{mean-squared error (`mse`) and mean absolute error (`mae`)} 37 | \item{"two-class"}{binomial deviance (`binomial_deviance`); misclassification error rate 38 | `misclassification_error`; the area under the receiver-operating curve (`roc_auc`); 39 | and `mse` and `mse` as above} 40 | \item{"multiclass"}{multinomial deviance (`multinomial_deviance`); misclassification error rate 41 | `misclassification_error`; the area under the receiver-operating curve (`roc_auc`) 42 | computed using the Hand-Till method in \code{\link[yardstick]{roc_auc}}; 43 | and `mse` and `mse` as above} 44 | \item{"survival"}{the negative log2-transformed partial likelihood (`neg_log_partial_likelihood`) 45 | and Harrel's concordance index (often simply called "C"; `concordance_index`)} 46 | } 47 | } 48 | \description{ 49 | This function trains a glmnet model on the training set of an rsplit object, then 50 | calculates performance metrics of that model on the validation/holdout set 51 | at all combinations of the mixture and 52 | penalty hyperparameters provided in a hyperparameter grid. 53 | } 54 | \references{ 55 | Harrel Jr, F. E. and Lee, K. L. and Mark, D. B. (1996) Tutorial in biostatistics: multivariable prognostic models: issues in developing models, evaluating assumptions and adequacy, and measuring and reducing error, Statistics in Medicine, 15, pages 361–387. 56 | } 57 | -------------------------------------------------------------------------------- /man/tof_generate_palette.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/visualization.R 3 | \name{tof_generate_palette} 4 | \alias{tof_generate_palette} 5 | \title{Generate a color palette using tidytof.} 6 | \usage{ 7 | tof_generate_palette(num_colors) 8 | } 9 | \arguments{ 10 | \item{num_colors}{An integer specifying the number of colors you'd like to generate.} 11 | } 12 | \value{ 13 | A character vector of hex codes specifying the colors in the palette. 14 | } 15 | \description{ 16 | This function generates a color palette based on the color palette of the 17 | author's favorite pokemon. 18 | } 19 | \examples{ 20 | tof_generate_palette(num_colors = 5L) 21 | 22 | } 23 | -------------------------------------------------------------------------------- /man/tof_get_model_mixture.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/modeling_helpers.R 3 | \name{tof_get_model_mixture} 4 | \alias{tof_get_model_mixture} 5 | \title{Get a `tof_model`'s optimal mixture (alpha) value} 6 | \usage{ 7 | tof_get_model_mixture(tof_model) 8 | } 9 | \arguments{ 10 | \item{tof_model}{A tof_model} 11 | } 12 | \value{ 13 | A numeric value 14 | } 15 | \description{ 16 | Get a `tof_model`'s optimal mixture (alpha) value 17 | } 18 | \examples{ 19 | feature_tibble <- 20 | dplyr::tibble( 21 | sample = as.character(1:100), 22 | cd45 = runif(n = 100), 23 | pstat5 = runif(n = 100), 24 | cd34 = runif(n = 100), 25 | outcome = (3 * cd45) + (4 * pstat5) + rnorm(100), 26 | class = 27 | as.factor( 28 | dplyr::if_else(outcome > median(outcome), "class1", "class2") 29 | ), 30 | multiclass = 31 | as.factor( 32 | c(rep("class1", 30), rep("class2", 30), rep("class3", 40)) 33 | ), 34 | event = c(rep(0, times = 30), rep(1, times = 70)), 35 | time_to_event = rnorm(n = 100, mean = 10, sd = 2) 36 | ) 37 | 38 | split_data <- tof_split_data(feature_tibble, split_method = "simple") 39 | 40 | # train a regression model 41 | regression_model <- 42 | tof_train_model( 43 | split_data = split_data, 44 | predictor_cols = c(cd45, pstat5, cd34), 45 | response_col = outcome, 46 | model_type = "linear" 47 | ) 48 | 49 | tof_get_model_mixture(regression_model) 50 | 51 | } 52 | -------------------------------------------------------------------------------- /man/tof_get_model_outcomes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/modeling_helpers.R 3 | \name{tof_get_model_outcomes} 4 | \alias{tof_get_model_outcomes} 5 | \title{Get a `tof_model`'s outcome variable name(s)} 6 | \usage{ 7 | tof_get_model_outcomes(tof_model) 8 | } 9 | \arguments{ 10 | \item{tof_model}{A tof_model} 11 | } 12 | \value{ 13 | A character vector 14 | } 15 | \description{ 16 | Get a `tof_model`'s outcome variable name(s) 17 | } 18 | \examples{ 19 | feature_tibble <- 20 | dplyr::tibble( 21 | sample = as.character(1:100), 22 | cd45 = runif(n = 100), 23 | pstat5 = runif(n = 100), 24 | cd34 = runif(n = 100), 25 | outcome = (3 * cd45) + (4 * pstat5) + rnorm(100), 26 | class = 27 | as.factor( 28 | dplyr::if_else(outcome > median(outcome), "class1", "class2") 29 | ), 30 | multiclass = 31 | as.factor( 32 | c(rep("class1", 30), rep("class2", 30), rep("class3", 40)) 33 | ), 34 | event = c(rep(0, times = 30), rep(1, times = 70)), 35 | time_to_event = rnorm(n = 100, mean = 10, sd = 2) 36 | ) 37 | 38 | split_data <- tof_split_data(feature_tibble, split_method = "simple") 39 | 40 | # train a regression model 41 | regression_model <- 42 | tof_train_model( 43 | split_data = split_data, 44 | predictor_cols = c(cd45, pstat5, cd34), 45 | response_col = outcome, 46 | model_type = "linear" 47 | ) 48 | 49 | tof_get_model_outcomes(regression_model) 50 | 51 | } 52 | -------------------------------------------------------------------------------- /man/tof_get_model_penalty.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/modeling_helpers.R 3 | \name{tof_get_model_penalty} 4 | \alias{tof_get_model_penalty} 5 | \title{Get a `tof_model`'s optimal penalty (lambda) value} 6 | \usage{ 7 | tof_get_model_penalty(tof_model) 8 | } 9 | \arguments{ 10 | \item{tof_model}{A tof_model} 11 | } 12 | \value{ 13 | A numeric value 14 | } 15 | \description{ 16 | Get a `tof_model`'s optimal penalty (lambda) value 17 | } 18 | \examples{ 19 | feature_tibble <- 20 | dplyr::tibble( 21 | sample = as.character(1:100), 22 | cd45 = runif(n = 100), 23 | pstat5 = runif(n = 100), 24 | cd34 = runif(n = 100), 25 | outcome = (3 * cd45) + (4 * pstat5) + rnorm(100), 26 | class = 27 | as.factor( 28 | dplyr::if_else(outcome > median(outcome), "class1", "class2") 29 | ), 30 | multiclass = 31 | as.factor( 32 | c(rep("class1", 30), rep("class2", 30), rep("class3", 40)) 33 | ), 34 | event = c(rep(0, times = 30), rep(1, times = 70)), 35 | time_to_event = rnorm(n = 100, mean = 10, sd = 2) 36 | ) 37 | 38 | split_data <- tof_split_data(feature_tibble, split_method = "simple") 39 | 40 | # train a regression model 41 | regression_model <- 42 | tof_train_model( 43 | split_data = split_data, 44 | predictor_cols = c(cd45, pstat5, cd34), 45 | response_col = outcome, 46 | model_type = "linear" 47 | ) 48 | 49 | tof_get_model_penalty(regression_model) 50 | 51 | } 52 | -------------------------------------------------------------------------------- /man/tof_get_model_training_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/modeling_helpers.R 3 | \name{tof_get_model_training_data} 4 | \alias{tof_get_model_training_data} 5 | \title{Get a `tof_model`'s training data} 6 | \usage{ 7 | tof_get_model_training_data(tof_model) 8 | } 9 | \arguments{ 10 | \item{tof_model}{A tof_model} 11 | } 12 | \value{ 13 | A tibble of (non-preprocessed) training data used to fit the model 14 | } 15 | \description{ 16 | Get a `tof_model`'s training data 17 | } 18 | \examples{ 19 | feature_tibble <- 20 | dplyr::tibble( 21 | sample = as.character(1:100), 22 | cd45 = runif(n = 100), 23 | pstat5 = runif(n = 100), 24 | cd34 = runif(n = 100), 25 | outcome = (3 * cd45) + (4 * pstat5) + rnorm(100), 26 | class = 27 | as.factor( 28 | dplyr::if_else(outcome > median(outcome), "class1", "class2") 29 | ), 30 | multiclass = 31 | as.factor( 32 | c(rep("class1", 30), rep("class2", 30), rep("class3", 40)) 33 | ), 34 | event = c(rep(0, times = 30), rep(1, times = 70)), 35 | time_to_event = rnorm(n = 100, mean = 10, sd = 2) 36 | ) 37 | 38 | split_data <- tof_split_data(feature_tibble, split_method = "simple") 39 | 40 | # train a regression model 41 | regression_model <- 42 | tof_train_model( 43 | split_data = split_data, 44 | predictor_cols = c(cd45, pstat5, cd34), 45 | response_col = outcome, 46 | model_type = "linear" 47 | ) 48 | 49 | tof_get_model_training_data(regression_model) 50 | 51 | } 52 | -------------------------------------------------------------------------------- /man/tof_get_model_type.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/modeling_helpers.R 3 | \name{tof_get_model_type} 4 | \alias{tof_get_model_type} 5 | \title{Get a `tof_model`'s model type} 6 | \usage{ 7 | tof_get_model_type(tof_model) 8 | } 9 | \arguments{ 10 | \item{tof_model}{A tof_model} 11 | } 12 | \value{ 13 | A string 14 | } 15 | \description{ 16 | Get a `tof_model`'s model type 17 | } 18 | \examples{ 19 | feature_tibble <- 20 | dplyr::tibble( 21 | sample = as.character(1:100), 22 | cd45 = runif(n = 100), 23 | pstat5 = runif(n = 100), 24 | cd34 = runif(n = 100), 25 | outcome = (3 * cd45) + (4 * pstat5) + rnorm(100), 26 | class = 27 | as.factor( 28 | dplyr::if_else(outcome > median(outcome), "class1", "class2") 29 | ), 30 | multiclass = 31 | as.factor( 32 | c(rep("class1", 30), rep("class2", 30), rep("class3", 40)) 33 | ), 34 | event = c(rep(0, times = 30), rep(1, times = 70)), 35 | time_to_event = rnorm(n = 100, mean = 10, sd = 2) 36 | ) 37 | 38 | split_data <- tof_split_data(feature_tibble, split_method = "simple") 39 | 40 | # train a regression model 41 | regression_model <- 42 | tof_train_model( 43 | split_data = split_data, 44 | predictor_cols = c(cd45, pstat5, cd34), 45 | response_col = outcome, 46 | model_type = "linear" 47 | ) 48 | 49 | tof_get_model_type(regression_model) 50 | 51 | } 52 | -------------------------------------------------------------------------------- /man/tof_get_model_x.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/modeling_helpers.R 3 | \name{tof_get_model_x} 4 | \alias{tof_get_model_x} 5 | \title{Get a `tof_model`'s processed predictor matrix (for glmnet)} 6 | \usage{ 7 | tof_get_model_x(tof_model) 8 | } 9 | \arguments{ 10 | \item{tof_model}{A tof_model} 11 | } 12 | \value{ 13 | An x value formatted for glmnet 14 | } 15 | \description{ 16 | Get a `tof_model`'s processed predictor matrix (for glmnet) 17 | } 18 | \examples{ 19 | feature_tibble <- 20 | dplyr::tibble( 21 | sample = as.character(1:100), 22 | cd45 = runif(n = 100), 23 | pstat5 = runif(n = 100), 24 | cd34 = runif(n = 100), 25 | outcome = (3 * cd45) + (4 * pstat5) + rnorm(100), 26 | class = 27 | as.factor( 28 | dplyr::if_else(outcome > median(outcome), "class1", "class2") 29 | ), 30 | multiclass = 31 | as.factor( 32 | c(rep("class1", 30), rep("class2", 30), rep("class3", 40)) 33 | ), 34 | event = c(rep(0, times = 30), rep(1, times = 70)), 35 | time_to_event = rnorm(n = 100, mean = 10, sd = 2) 36 | ) 37 | 38 | split_data <- tof_split_data(feature_tibble, split_method = "simple") 39 | 40 | # train a regression model 41 | regression_model <- 42 | tof_train_model( 43 | split_data = split_data, 44 | predictor_cols = c(cd45, pstat5, cd34), 45 | response_col = outcome, 46 | model_type = "linear" 47 | ) 48 | 49 | tof_get_model_x(regression_model) 50 | 51 | } 52 | -------------------------------------------------------------------------------- /man/tof_get_model_y.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/modeling_helpers.R 3 | \name{tof_get_model_y} 4 | \alias{tof_get_model_y} 5 | \title{Get a `tof_model`'s processed outcome variable matrix (for glmnet)} 6 | \usage{ 7 | tof_get_model_y(tof_model) 8 | } 9 | \arguments{ 10 | \item{tof_model}{A tof_model} 11 | } 12 | \value{ 13 | A y value formatted for glmnet 14 | } 15 | \description{ 16 | Get a `tof_model`'s processed outcome variable matrix (for glmnet) 17 | } 18 | \examples{ 19 | feature_tibble <- 20 | dplyr::tibble( 21 | sample = as.character(1:100), 22 | cd45 = runif(n = 100), 23 | pstat5 = runif(n = 100), 24 | cd34 = runif(n = 100), 25 | outcome = (3 * cd45) + (4 * pstat5) + rnorm(100), 26 | class = 27 | as.factor( 28 | dplyr::if_else(outcome > median(outcome), "class1", "class2") 29 | ), 30 | multiclass = 31 | as.factor( 32 | c(rep("class1", 30), rep("class2", 30), rep("class3", 40)) 33 | ), 34 | event = c(rep(0, times = 30), rep(1, times = 70)), 35 | time_to_event = rnorm(n = 100, mean = 10, sd = 2) 36 | ) 37 | 38 | split_data <- tof_split_data(feature_tibble, split_method = "simple") 39 | 40 | # train a regression model 41 | regression_model <- 42 | tof_train_model( 43 | split_data = split_data, 44 | predictor_cols = c(cd45, pstat5, cd34), 45 | response_col = outcome, 46 | model_type = "linear" 47 | ) 48 | 49 | tof_get_model_y(regression_model) 50 | 51 | } 52 | -------------------------------------------------------------------------------- /man/tof_get_panel.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tof_tbl.R 3 | \name{tof_get_panel} 4 | \alias{tof_get_panel} 5 | \title{Get panel information from a tof_tibble} 6 | \usage{ 7 | tof_get_panel(tof_tibble) 8 | } 9 | \arguments{ 10 | \item{tof_tibble}{A `tof_tbl`.} 11 | } 12 | \value{ 13 | A tibble containing information about the CyTOF panel 14 | that was used during data acquisition for the data contained 15 | in `tof_tibble`. 16 | } 17 | \description{ 18 | Get panel information from a tof_tibble 19 | } 20 | \examples{ 21 | input_file <- dir(tidytof_example_data("aml"), full.names = TRUE)[[1]] 22 | tof_tibble <- tof_read_data(input_file) 23 | tof_get_panel(tof_tibble) 24 | 25 | } 26 | \seealso{ 27 | Other tof_tbl utilities: 28 | \code{\link{new_tof_tibble}()}, 29 | \code{\link{tof_set_panel}()} 30 | } 31 | \concept{tof_tbl utilities} 32 | -------------------------------------------------------------------------------- /man/tof_is_numeric.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{tof_is_numeric} 4 | \alias{tof_is_numeric} 5 | \title{Find if a vector is numeric} 6 | \usage{ 7 | tof_is_numeric(.vec) 8 | } 9 | \arguments{ 10 | \item{.vec}{A vector.} 11 | } 12 | \value{ 13 | A boolean value indicating if .vec is of type integer or double. 14 | } 15 | \description{ 16 | This function takes an input vector `.vec` and checks if it is either an 17 | integer or a double (i.e. is the type of vector that might encode high-dimensional cytometry 18 | measurements). 19 | } 20 | -------------------------------------------------------------------------------- /man/tof_knn_density.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{tof_knn_density} 4 | \alias{tof_knn_density} 5 | \title{Estimate cells' local densities using K-nearest-neighbor density estimation} 6 | \usage{ 7 | tof_knn_density( 8 | tof_tibble, 9 | distance_cols = where(tof_is_numeric), 10 | num_neighbors = min(15L, nrow(tof_tibble)), 11 | distance_function = c("euclidean", "cosine", "l2", "ip"), 12 | estimation_method = c("mean_distance", "sum_distance"), 13 | normalize = TRUE, 14 | ... 15 | ) 16 | } 17 | \arguments{ 18 | \item{tof_tibble}{A `tof_tbl` or a `tibble`.} 19 | 20 | \item{distance_cols}{Unquoted names of the columns in `tof_tibble` to use in 21 | calculating cell-to-cell distances during the local density estimation for 22 | each cell. Defaults to all numeric columns in `tof_tibble`.} 23 | 24 | \item{num_neighbors}{An integer indicating the number of nearest neighbors 25 | to use in estimating the local density of each cell. Defaults to the minimum 26 | of 15 and the number of rows in `tof_tibble`.} 27 | 28 | \item{distance_function}{A string indicating which distance function to use 29 | for calculating cell-to-cell distances during local density estimation. Options 30 | include "euclidean" (the default) and "cosine".} 31 | 32 | \item{estimation_method}{A string indicating how the relative density for each cell should be 33 | calculated from the distances between it and each of its k nearest neighbors. Options are 34 | "mean_distance" (the default; estimates the relative density for a cell's neighborhood by 35 | taking the negative average of the distances to its nearest neighbors) and "sum_distance" 36 | (estimates the relative density for a cell's neighborhood by taking the negative sum of the 37 | distances to its nearest neighbors).} 38 | 39 | \item{normalize}{A boolean value indicating if the vector of local density 40 | estimates should be normalized to values between 0 and 1. Defaults to TRUE.} 41 | 42 | \item{...}{Additional optional arguments to pass to 43 | \code{\link{tof_find_knn}}.} 44 | } 45 | \value{ 46 | A tibble with a single column named ".knn_density" containing the 47 | local density estimates for each input cell in `tof_tibble`. 48 | } 49 | \description{ 50 | This function uses the distances between a cell and each of its K nearest 51 | neighbors to estimate local density of each cell in a 52 | `tof_tbl` or `tibble` containing high-dimensional cytometry data. 53 | } 54 | \seealso{ 55 | Other local density estimation functions: 56 | \code{\link{tof_estimate_density}()}, 57 | \code{\link{tof_spade_density}()} 58 | } 59 | \concept{local density estimation functions} 60 | -------------------------------------------------------------------------------- /man/tof_log_rank_test.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/modeling_helpers.R 3 | \name{tof_log_rank_test} 4 | \alias{tof_log_rank_test} 5 | \title{Compute the log-rank test p-value for the difference between the two survival 6 | curves obtained by splitting a dataset into a "low" and "high" risk group 7 | using a given relative-risk threshold.} 8 | \usage{ 9 | tof_log_rank_test( 10 | input_data, 11 | relative_risk_col, 12 | time_col, 13 | event_col, 14 | threshold 15 | ) 16 | } 17 | \arguments{ 18 | \item{input_data}{A tbl_df or data.frame in which each observation is a row.} 19 | 20 | \item{relative_risk_col}{An unquote column name indicating which column contains 21 | the relative-risk estimates for each observation.} 22 | 23 | \item{time_col}{An unquoted column name indicating which column contains the 24 | true time-to-event information for each observation.} 25 | 26 | \item{event_col}{An unquoted column name indicating which column contains the 27 | outcome (event or censorship). Must be a binary column - all values should be 28 | either 0 or 1 (with 1 indicating the adverse event and 0 indicating 29 | censorship) or FALSE and TRUE (with TRUE indicating the 30 | adverse event and FALSE indicating censorship).} 31 | 32 | \item{threshold}{A numeric value indicating the relative-risk threshold that 33 | should be used to split observations into low- and high-risk groups.} 34 | } 35 | \value{ 36 | A numeric value <1, the p-value of the log-rank test. 37 | } 38 | \description{ 39 | Compute the log-rank test p-value for the difference between the two survival 40 | curves obtained by splitting a dataset into a "low" and "high" risk group 41 | using a given relative-risk threshold. 42 | } 43 | \examples{ 44 | NULL 45 | 46 | } 47 | -------------------------------------------------------------------------------- /man/tof_make_knn_graph.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{tof_make_knn_graph} 4 | \alias{tof_make_knn_graph} 5 | \title{Title} 6 | \usage{ 7 | tof_make_knn_graph( 8 | tof_tibble, 9 | knn_cols, 10 | num_neighbors, 11 | distance_function = c("euclidean", "cosine"), 12 | graph_type = c("weighted", "unweighted"), 13 | ... 14 | ) 15 | } 16 | \arguments{ 17 | \item{tof_tibble}{A tibble or tof_tbl.} 18 | 19 | \item{knn_cols}{Unquoted column names indicating which columns in tof_tibble 20 | should be used for the KNN calculation.} 21 | 22 | \item{num_neighbors}{An integer number of neighbors to find for each cell ( 23 | not including itself).} 24 | 25 | \item{distance_function}{A string indicating which distance function to use 26 | for the nearest-neighbor calculation. Options include "euclidean" 27 | (the default) and "cosine" distances.} 28 | 29 | \item{graph_type}{A string indicating if the graph's edges should have weights 30 | ("weighted"; the default) or not ("unweighted").} 31 | 32 | \item{...}{Optional additional arguments to pass to \code{\link[tidytof]{tof_find_knn}}} 33 | } 34 | \value{ 35 | A \code{\link[tidygraph]{tbl_graph}}. 36 | } 37 | \description{ 38 | Title 39 | } 40 | \examples{ 41 | NULL 42 | 43 | } 44 | -------------------------------------------------------------------------------- /man/tof_make_roc_curve.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/modeling_helpers.R 3 | \name{tof_make_roc_curve} 4 | \alias{tof_make_roc_curve} 5 | \title{Compute a receiver-operating curve (ROC) for a two-class or multiclass dataset} 6 | \usage{ 7 | tof_make_roc_curve(input_data, truth_col, prob_cols) 8 | } 9 | \arguments{ 10 | \item{input_data}{A tof_tbl, tbl_df, or data.frame in which each row is an 11 | observation.} 12 | 13 | \item{truth_col}{An unquoted column name indicating which column in `input_data` 14 | contains the true class labels for each observation. Must be a factor.} 15 | 16 | \item{prob_cols}{Unquoted column names indicating which columns in `input_data` 17 | contain the probability estimates for each class in `truth_col`. These columns 18 | must be specified in the same order as the factor levels in `truth_col`.} 19 | } 20 | \value{ 21 | A tibble that can be used to plot the ROC for a classification task. 22 | For each candidate probability threshold, the following are reported: 23 | specificity, sensitivity, true-positive rate (tpr), and false-positive rate 24 | (fpr). 25 | } 26 | \description{ 27 | Compute a receiver-operating curve (ROC) for a two-class or multiclass dataset 28 | } 29 | \examples{ 30 | feature_tibble <- 31 | dplyr::tibble( 32 | sample = as.character(1:100), 33 | cd45 = runif(n = 100), 34 | pstat5 = runif(n = 100), 35 | cd34 = runif(n = 100), 36 | outcome = (3 * cd45) + (4 * pstat5) + rnorm(100), 37 | class = 38 | as.factor( 39 | dplyr::if_else(outcome > median(outcome), "class1", "class2") 40 | ) 41 | ) 42 | 43 | split_data <- tof_split_data(feature_tibble, split_method = "simple") 44 | 45 | # train a logistic regression classifier 46 | log_model <- 47 | tof_train_model( 48 | split_data = split_data, 49 | predictor_cols = c(cd45, pstat5, cd34), 50 | response_col = class, 51 | model_type = "two-class" 52 | ) 53 | 54 | # make predictions 55 | predictions <- 56 | tof_predict( 57 | log_model, 58 | new_data = feature_tibble, 59 | prediction_type = "response" 60 | ) 61 | prediction_tibble <- 62 | dplyr::tibble( 63 | truth = feature_tibble$class, 64 | prediction = predictions$.pred 65 | ) 66 | 67 | # make ROC curve 68 | tof_make_roc_curve( 69 | input_data = prediction_tibble, 70 | truth_col = truth, 71 | prob_cols = prediction 72 | ) 73 | 74 | } 75 | -------------------------------------------------------------------------------- /man/tof_metacluster_kmeans.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/metaclustering.R 3 | \name{tof_metacluster_kmeans} 4 | \alias{tof_metacluster_kmeans} 5 | \title{Metacluster clustered CyTOF data using k-means clustering} 6 | \usage{ 7 | tof_metacluster_kmeans( 8 | tof_tibble, 9 | cluster_col, 10 | metacluster_cols = where(tof_is_numeric), 11 | central_tendency_function = stats::median, 12 | num_metaclusters = 10L, 13 | ... 14 | ) 15 | } 16 | \arguments{ 17 | \item{tof_tibble}{A `tof_tbl` or `tibble`.} 18 | 19 | \item{cluster_col}{An unquoted column name indicating which column in `tof_tibble` 20 | stores the cluster ids for the cluster to which each cell belongs. 21 | Cluster labels can be produced via any method the user chooses - including manual gating, 22 | any of the functions in the `tof_cluster_*` function family, or any other method.} 23 | 24 | \item{metacluster_cols}{Unquoted column names indicating which columns in 25 | `tof_tibble` to use in computing the metaclusters. 26 | Defaults to all numeric columns in `tof_tibble`. Supports tidyselect helpers.} 27 | 28 | \item{central_tendency_function}{The function that should be used to 29 | calculate the measurement of central tendency for each cluster before 30 | metaclustering. This function will be used to compute a summary statistic for 31 | each input cluster in `cluster_col` across all columns specified by 32 | `metacluster_cols`, and the resulting vector (one for each cluster) will be 33 | used as the input for metaclustering. 34 | Defaults to \code{\link[stats]{median}}.} 35 | 36 | \item{num_metaclusters}{An integer indicating the number of clusters 37 | that should be returned. Defaults to 10.} 38 | 39 | \item{...}{Optional additional method specifications to pass to 40 | \code{\link{tof_cluster_kmeans}}.} 41 | } 42 | \value{ 43 | A tibble with a single column (`.kmeans_metacluster`) and 44 | the same number of rows as the input `tof_tibble`. Each entry in the column 45 | indicates the metacluster label assigned to the same row in `tof_tibble`. 46 | } 47 | \description{ 48 | This function performs k-means metaclustering on a `tof_tbl` containing CyTOF data 49 | using a user-specified selection of input variables/CyTOF measurements and 50 | the number of desired metaclusters. See \code{\link[stats]{hclust}}. 51 | } 52 | \examples{ 53 | sim_data <- 54 | dplyr::tibble( 55 | cd45 = rnorm(n = 1000), 56 | cd38 = rnorm(n = 1000), 57 | cd34 = rnorm(n = 1000), 58 | cd19 = rnorm(n = 1000), 59 | cluster_id = sample(letters, size = 1000, replace = TRUE) 60 | ) 61 | 62 | tof_metacluster_kmeans(tof_tibble = sim_data, cluster_col = cluster_id) 63 | 64 | } 65 | \seealso{ 66 | Other metaclustering functions: 67 | \code{\link{tof_metacluster}()}, 68 | \code{\link{tof_metacluster_consensus}()}, 69 | \code{\link{tof_metacluster_flowsom}()}, 70 | \code{\link{tof_metacluster_hierarchical}()}, 71 | \code{\link{tof_metacluster_phenograph}()} 72 | } 73 | \concept{metaclustering functions} 74 | -------------------------------------------------------------------------------- /man/tof_metacluster_phenograph.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/metaclustering.R 3 | \name{tof_metacluster_phenograph} 4 | \alias{tof_metacluster_phenograph} 5 | \title{Metacluster clustered CyTOF data using PhenoGraph clustering} 6 | \usage{ 7 | tof_metacluster_phenograph( 8 | tof_tibble, 9 | cluster_col, 10 | metacluster_cols = where(tof_is_numeric), 11 | central_tendency_function = stats::median, 12 | num_neighbors = 5L, 13 | ... 14 | ) 15 | } 16 | \arguments{ 17 | \item{tof_tibble}{A `tof_tbl` or `tibble`.} 18 | 19 | \item{cluster_col}{An unquoted column name indicating which column in `tof_tibble` 20 | stores the cluster ids for the cluster to which each cell belongs. 21 | Cluster labels can be produced via any method the user chooses - including manual gating, 22 | any of the functions in the `tof_cluster_*` function family, or any other method.} 23 | 24 | \item{metacluster_cols}{Unquoted column names indicating which columns in 25 | `tof_tibble` to use in computing the metaclusters. 26 | Defaults to all numeric columns in `tof_tibble`. Supports tidyselect helpers.} 27 | 28 | \item{central_tendency_function}{The function that should be used to 29 | calculate the measurement of central tendency for each cluster before 30 | metaclustering. This function will be used to compute a summary statistic for 31 | each input cluster in `cluster_col` across all columns specified by 32 | `metacluster_cols`, and the resulting vector (one for each cluster) will be 33 | used as the input for metaclustering. 34 | Defaults to \code{\link[stats]{median}}.} 35 | 36 | \item{num_neighbors}{An integer indicating the number of neighbors to use when 37 | constructing PhenoGraph's k-nearest-neighbor graph. Smaller values emphasize 38 | local graph structure; larger values emphasize global graph structure (and 39 | will add time to the computation). Defaults to 5.} 40 | 41 | \item{...}{Optional additional method specifications to pass to 42 | \code{\link{tof_cluster_phenograph}}.} 43 | } 44 | \value{ 45 | A tibble with a single column (`.phenograph_metacluster`) and 46 | the same number of rows as the input `tof_tibble`. Each entry in the column 47 | indicates the metacluster label assigned to the same row in `tof_tibble`. 48 | } 49 | \description{ 50 | This function performs PhenoGraph metaclustering on a `tof_tbl` containing CyTOF data 51 | using a user-specified selection of input variables/CyTOF measurements. The number 52 | of metaclusters is automatically detected by the PhenoGraph algorithm. 53 | See \code{\link{tof_cluster_phenograph}}. 54 | } 55 | \examples{ 56 | sim_data <- 57 | dplyr::tibble( 58 | cd45 = rnorm(n = 1000), 59 | cd38 = rnorm(n = 1000), 60 | cd34 = rnorm(n = 1000), 61 | cd19 = rnorm(n = 1000), 62 | cluster_id = sample(letters, size = 1000, replace = TRUE) 63 | ) 64 | 65 | tof_metacluster_phenograph(tof_tibble = sim_data, cluster_col = cluster_id) 66 | 67 | } 68 | \seealso{ 69 | Other metaclustering functions: 70 | \code{\link{tof_metacluster}()}, 71 | \code{\link{tof_metacluster_consensus}()}, 72 | \code{\link{tof_metacluster_flowsom}()}, 73 | \code{\link{tof_metacluster_hierarchical}()}, 74 | \code{\link{tof_metacluster_kmeans}()} 75 | } 76 | \concept{metaclustering functions} 77 | -------------------------------------------------------------------------------- /man/tof_plot_cells_density.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/visualization.R 3 | \name{tof_plot_cells_density} 4 | \alias{tof_plot_cells_density} 5 | \title{Plot marker expression density plots} 6 | \usage{ 7 | tof_plot_cells_density( 8 | tof_tibble, 9 | marker_col, 10 | group_col, 11 | num_points = 512, 12 | theme = ggplot2::theme_bw(), 13 | use_ggridges = FALSE, 14 | scale = 1, 15 | ... 16 | ) 17 | } 18 | \arguments{ 19 | \item{tof_tibble}{A `tof_tbl` or a `tibble`.} 20 | 21 | \item{marker_col}{An unquoted column name representing which column in `tof_tibble` 22 | (i.e. which CyTOF protein measurement) should be included in the feature extraction 23 | calculation.} 24 | 25 | \item{group_col}{Unquoted column names representing which column in `tof_tibble` 26 | should be used to break the rows of `tof_tibble` into subgroups to be plotted 27 | as separate histograms. Defaults to plotting without subgroups.} 28 | 29 | \item{num_points}{The number of points along the full range of `marker_col` at 30 | which the density should be calculated} 31 | 32 | \item{theme}{The ggplot2 theme for the plot. Defaults to 33 | \code{\link[ggplot2]{theme_bw}}} 34 | 35 | \item{use_ggridges}{A boolean value indicting if 36 | \code{\link[ggridges]{geom_ridgeline}} should be used to plot overlain 37 | histograms. Defaults to FALSE. If TRUE, the ggridges package must be installed.} 38 | 39 | \item{scale}{Use to set the `scale` argument in \code{\link[ggridges]{geom_ridgeline}}, 40 | which controls how far apart (vertically) density plots are arranged along the 41 | y-axis. Defaults to 1.} 42 | 43 | \item{...}{Additional optional arguments to send to \code{\link[ggridges]{geom_ridgeline}}.} 44 | } 45 | \value{ 46 | A ggplot object 47 | } 48 | \description{ 49 | This function plots marker expression density plots for a user-specified 50 | column in a tof_tbl. Optionally, cells can be grouped to plot multiple 51 | vertically-arranged density plots 52 | } 53 | \examples{ 54 | sim_data <- 55 | dplyr::tibble( 56 | cd45 = rnorm(n = 1000), 57 | cd38 = rnorm(n = 1000), 58 | cd34 = rnorm(n = 1000), 59 | cd19 = rnorm(n = 1000), 60 | cluster_id = sample(c("a", "b"), size = 1000, replace = TRUE) 61 | ) 62 | 63 | density_plot <- 64 | tof_plot_cells_density( 65 | tof_tibble = sim_data, 66 | marker_col = cd45, 67 | group_col = cluster_id 68 | ) 69 | 70 | } 71 | -------------------------------------------------------------------------------- /man/tof_plot_cells_scatter.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/visualization.R 3 | \name{tof_plot_cells_scatter} 4 | \alias{tof_plot_cells_scatter} 5 | \title{Plot scatterplots of single-cell data.} 6 | \usage{ 7 | tof_plot_cells_scatter( 8 | tof_tibble, 9 | x_col, 10 | y_col, 11 | color_col, 12 | facet_cols, 13 | theme = ggplot2::theme_bw(), 14 | ..., 15 | method = c("ggplot2", "scattermore") 16 | ) 17 | } 18 | \arguments{ 19 | \item{tof_tibble}{A `tof_tbl` or a `tibble`.} 20 | 21 | \item{x_col}{An unquoted column name specifying which column in 22 | `tof_tibble` should be used as the x-axis.} 23 | 24 | \item{y_col}{An unquoted column name specifying which column in 25 | `tof_tibble` should be used as the y-axis.} 26 | 27 | \item{color_col}{An unquoted column name specifying which column in 28 | `tof_tibble` should be used to color each point in the scatterplot.} 29 | 30 | \item{facet_cols}{An unquoted column name specifying which column in 31 | `tof_tibble` should be used to break the scatterplot into facets using 32 | \code{\link[ggplot2]{facet_wrap}}.} 33 | 34 | \item{theme}{A ggplot2 theme to apply to the scatterplot. Defaults to 35 | \code{\link[ggplot2]{theme_bw}}.} 36 | 37 | \item{...}{Optional additional arguments to pass to \code{\link[ggplot2]{geom_point}} 38 | if \code{method = "ggplot2"} or \code{\link[scattermore]{geom_scattermore}} if 39 | \code{method = "scattermore"}.} 40 | 41 | \item{method}{A string indicating which plotting engine should be used. Valid 42 | values include "ggplot2" (the default) and "scattermore" (recommended if more than 43 | 100K cells are being plotted). Note that \code{method = "scattermore"} requires the 44 | scattermore package to be installed.} 45 | } 46 | \value{ 47 | A ggplot object. 48 | } 49 | \description{ 50 | This function makes scatterplots of single-cell data using user-specified 51 | x- and y-axes. Additionally, each point in the scatterplot can be colored 52 | using a user-specified variable. 53 | } 54 | \examples{ 55 | sim_data <- 56 | dplyr::tibble( 57 | cd45 = rnorm(n = 1000), 58 | cd38 = c(rnorm(n = 500), rnorm(n = 500, mean = 2)), 59 | cd34 = c(rnorm(n = 500), rnorm(n = 500, mean = 4)), 60 | cd19 = rnorm(n = 1000), 61 | cluster_id = c(rep("a", 500), rep("b", 500)) 62 | ) 63 | 64 | } 65 | \seealso{ 66 | Other visualization functions: 67 | \code{\link{tof_plot_cells_embedding}()}, 68 | \code{\link{tof_plot_cells_layout}()} 69 | } 70 | \concept{visualization functions} 71 | -------------------------------------------------------------------------------- /man/tof_plot_clusters_heatmap.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/visualization.R 3 | \name{tof_plot_clusters_heatmap} 4 | \alias{tof_plot_clusters_heatmap} 5 | \title{Make a heatmap summarizing cluster marker expression patterns in CyTOF data} 6 | \usage{ 7 | tof_plot_clusters_heatmap( 8 | tof_tibble, 9 | cluster_col, 10 | marker_cols = where(tof_is_numeric), 11 | central_tendency_function = stats::median, 12 | scale_markerwise = FALSE, 13 | scale_clusterwise = FALSE, 14 | cluster_markers = TRUE, 15 | cluster_clusters = TRUE, 16 | line_width = 0.25, 17 | theme = ggplot2::theme_minimal() 18 | ) 19 | } 20 | \arguments{ 21 | \item{tof_tibble}{A `tof_tbl` or a `tibble`.} 22 | 23 | \item{cluster_col}{An unquoted column name indicating which column in `tof_tibble` 24 | stores the cluster ids for the cluster to which each cell belongs. 25 | Cluster labels can be produced via any method the user chooses - including manual gating, 26 | any of the functions in the `tof_cluster_*` function family, or any other method.} 27 | 28 | \item{marker_cols}{Unquoted column names indicating which column in `tof_tibble` 29 | should be interpreted as markers to be plotted along the x-axis of the heatmap. 30 | Supports tidyselect helpers.} 31 | 32 | \item{central_tendency_function}{A function to use for computing the 33 | measure of central tendency that will be aggregated from each cluster in 34 | cluster_col. Defaults to the median.} 35 | 36 | \item{scale_markerwise}{A boolean value indicating if the heatmap should 37 | rescale the columns of the heatmap such that the maximum value for each 38 | marker is 1 and the minimum value is 0. Defaults to FALSE.} 39 | 40 | \item{scale_clusterwise}{A boolean value indicating if the heatmap should 41 | rescale the rows of the heatmap such that the maximum value for each 42 | cluster is 1 and the minimum value is 0. Defaults to FALSE.} 43 | 44 | \item{cluster_markers}{A boolean value indicating if the heatmap should 45 | order its columns (i.e. markers) using hierarchical clustering. Defaults to 46 | TRUE.} 47 | 48 | \item{cluster_clusters}{A boolean value indicating if the heatmap should 49 | order its rows (i.e. clusters) using hierarchical clustering. Defaults to 50 | TRUE.} 51 | 52 | \item{line_width}{A numeric value indicating how thick the lines separating 53 | the tiles of the heatmap should be. Defaults to 0.25.} 54 | 55 | \item{theme}{A ggplot2 theme to apply to the heatmap. 56 | Defaults to \code{\link[ggplot2]{theme_minimal}}} 57 | } 58 | \value{ 59 | A ggplot object. 60 | } 61 | \description{ 62 | This function makes a heatmap of cluster-to-cluster marker expression patterns 63 | in single-cell data. Markers are plotted along the horizontal (x-) axis of 64 | the heatmap and cluster IDs are plotted along the vertical (y-) axis of the 65 | heatmap. 66 | } 67 | \examples{ 68 | sim_data <- 69 | dplyr::tibble( 70 | cd45 = rnorm(n = 1000), 71 | cd38 = rnorm(n = 1000), 72 | cd34 = rnorm(n = 1000), 73 | cd19 = rnorm(n = 1000), 74 | cluster_id = sample(letters, size = 1000, replace = TRUE) 75 | ) 76 | 77 | heatmap <- 78 | tof_plot_clusters_heatmap( 79 | tof_tibble = sim_data, 80 | cluster_col = cluster_id 81 | ) 82 | 83 | } 84 | -------------------------------------------------------------------------------- /man/tof_plot_heatmap.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{tof_plot_heatmap} 4 | \alias{tof_plot_heatmap} 5 | \title{Make a heatmap summarizing group marker expression patterns in high-dimensional cytometry data} 6 | \usage{ 7 | tof_plot_heatmap( 8 | tof_tibble, 9 | y_col, 10 | marker_cols = where(tof_is_numeric), 11 | central_tendency_function = stats::median, 12 | scale_markerwise = FALSE, 13 | scale_ywise = FALSE, 14 | cluster_markers = TRUE, 15 | cluster_groups = TRUE, 16 | line_width = 0.25, 17 | theme = ggplot2::theme_minimal() 18 | ) 19 | } 20 | \arguments{ 21 | \item{tof_tibble}{A `tof_tbl` or a `tibble`.} 22 | 23 | \item{y_col}{An unquoted column name indicating which column in `tof_tibble` 24 | stores the ids for the group to which each cell belongs.} 25 | 26 | \item{marker_cols}{Unquoted column names indicating which column in `tof_tibble` 27 | should be interpreted as markers to be plotted along the x-axis of the heatmap. 28 | Supports tidyselect helpers.} 29 | 30 | \item{central_tendency_function}{A function to use for computing the 31 | measure of central tendency that will be aggregated from each cluster in 32 | cluster_col. Defaults to the median.} 33 | 34 | \item{scale_markerwise}{A boolean value indicating if the heatmap should 35 | rescale the columns of the heatmap such that the maximum value for each 36 | marker is 1 and the minimum value is 0. Defaults to FALSE.} 37 | 38 | \item{scale_ywise}{A boolean value indicating if the heatmap should 39 | rescale the rows of the heatmap such that the maximum value for each 40 | group is 1 and the minimum value is 0. Defaults to FALSE.} 41 | 42 | \item{cluster_markers}{A boolean value indicating if the heatmap should 43 | order its columns (i.e. markers) using hierarchical clustering. Defaults to 44 | TRUE.} 45 | 46 | \item{cluster_groups}{A boolean value indicating if the heatmap should 47 | order its rows (i.e. groups) using hierarchical clustering. Defaults to 48 | TRUE.} 49 | 50 | \item{line_width}{A numeric value indicating how thick the lines separating 51 | the tiles of the heatmap should be. Defaults to 0.25.} 52 | 53 | \item{theme}{A ggplot2 theme to apply to the heatmap. 54 | Defaults to \code{\link[ggplot2]{theme_minimal}}} 55 | } 56 | \value{ 57 | A ggplot object. 58 | } 59 | \description{ 60 | This function makes a heatmap of group-to-group marker expression patterns 61 | in single-cell data. Markers are plotted along the horizontal (x-) axis of 62 | the heatmap and groups are plotted along the vertical (y-) axis of the 63 | heatmap. 64 | } 65 | -------------------------------------------------------------------------------- /man/tof_plot_model.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/visualization.R 3 | \name{tof_plot_model} 4 | \alias{tof_plot_model} 5 | \title{Plot the results of a glmnet model fit on sample-level data.} 6 | \usage{ 7 | tof_plot_model(tof_model, new_data, theme = ggplot2::theme_bw()) 8 | } 9 | \arguments{ 10 | \item{tof_model}{A `tof_model` trained using \code{\link{tof_train_model}}} 11 | 12 | \item{new_data}{A tibble of new observations for which a plot should be made. 13 | If new_data isn't provided, the plot will be made using the training data used to 14 | fit the model. Alternatively, the string "tuning_data" can be provided, and the 15 | plot will be generated using the predictions generated during model tuning.} 16 | 17 | \item{theme}{A ggplot2 theme to apply to the plot 18 | Defaults to \code{\link[ggplot2]{theme_bw}}} 19 | } 20 | \value{ 21 | A ggplot object. If the `tof_model` is a linear model, a scatterplot 22 | of the predicted outcome vs. the true outcome will be returned. If the `tof_model` 23 | is a two-class model, an ROC curve will be returned. If the `tof_model` is a 24 | multiclass model, a one-versus-all ROC curve will be returned for each class. 25 | If `tof_model` is a survival model, a Kaplan-Meier curve will be returned. 26 | } 27 | \description{ 28 | Plot the results of a glmnet model fit on sample-level data. 29 | } 30 | \examples{ 31 | feature_tibble <- 32 | dplyr::tibble( 33 | sample = as.character(1:100), 34 | cd45 = runif(n = 100), 35 | pstat5 = runif(n = 100), 36 | cd34 = runif(n = 100), 37 | outcome = (3 * cd45) + (4 * pstat5) + rnorm(100), 38 | class = 39 | as.factor( 40 | dplyr::if_else(outcome > median(outcome), "class1", "class2") 41 | ) 42 | ) 43 | 44 | new_tibble <- 45 | dplyr::tibble( 46 | sample = as.character(1:20), 47 | cd45 = runif(n = 20), 48 | pstat5 = runif(n = 20), 49 | cd34 = runif(n = 20), 50 | outcome = (3 * cd45) + (4 * pstat5) + rnorm(20), 51 | class = 52 | as.factor( 53 | dplyr::if_else(outcome > median(outcome), "class1", "class2") 54 | ) 55 | ) 56 | 57 | split_data <- tof_split_data(feature_tibble, split_method = "simple") 58 | 59 | # train a regression model 60 | regression_model <- 61 | tof_train_model( 62 | split_data = split_data, 63 | predictor_cols = c(cd45, pstat5, cd34), 64 | response_col = outcome, 65 | model_type = "linear" 66 | ) 67 | 68 | # make the plot 69 | plot_1 <- tof_plot_model(tof_model = regression_model, new_data = new_tibble) 70 | 71 | # train a logistic regression classifier 72 | logistic_model <- 73 | tof_train_model( 74 | split_data = split_data, 75 | predictor_cols = c(cd45, pstat5, cd34), 76 | response_col = class, 77 | model_type = "two-class" 78 | ) 79 | 80 | # make the plot 81 | 82 | plot_2 <- tof_plot_model(tof_model = logistic_model, new_data = new_tibble) 83 | 84 | } 85 | -------------------------------------------------------------------------------- /man/tof_plot_model_linear.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/visualization.R 3 | \name{tof_plot_model_linear} 4 | \alias{tof_plot_model_linear} 5 | \title{Plot the results of a linear glmnet model fit on sample-level data.} 6 | \usage{ 7 | tof_plot_model_linear(tof_model, new_data, theme = ggplot2::theme_bw()) 8 | } 9 | \arguments{ 10 | \item{tof_model}{A `tof_model` trained using \code{\link{tof_train_model}}} 11 | 12 | \item{new_data}{A tibble of new observations for which a plot should be made. 13 | If new_data isn't provided, the plot will be made using the training data used to 14 | fit the model. Alternatively, the string "tuning_data" can be provided, and the 15 | plot will be generated using the predictions generated during model tuning.} 16 | 17 | \item{theme}{A ggplot2 theme to apply to the plot 18 | Defaults to \code{\link[ggplot2]{theme_bw}}} 19 | } 20 | \value{ 21 | A ggplot object. Specifically, a scatterplot 22 | of the predicted outcome vs. the true outcome will be returned. 23 | } 24 | \description{ 25 | Plot the results of a linear glmnet model fit on sample-level data. 26 | } 27 | -------------------------------------------------------------------------------- /man/tof_plot_model_logistic.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/visualization.R 3 | \name{tof_plot_model_logistic} 4 | \alias{tof_plot_model_logistic} 5 | \title{Plot the results of a two-class glmnet model fit on sample-level data.} 6 | \usage{ 7 | tof_plot_model_logistic(tof_model, new_data, theme = ggplot2::theme_bw()) 8 | } 9 | \arguments{ 10 | \item{tof_model}{A `tof_model` trained using \code{\link{tof_train_model}}} 11 | 12 | \item{new_data}{A tibble of new observations for which a plot should be made. 13 | If new_data isn't provided, the plot will be made using the training data used to 14 | fit the model. Alternatively, the string "tuning_data" can be provided, and the 15 | plot will be generated using the predictions generated during model tuning.} 16 | 17 | \item{theme}{A ggplot2 theme to apply to the plot. 18 | Defaults to \code{\link[ggplot2]{theme_bw}}} 19 | } 20 | \value{ 21 | A ggplot object. Specifically, an ROC curve.. 22 | } 23 | \description{ 24 | Plot the results of a two-class glmnet model fit on sample-level data. 25 | } 26 | -------------------------------------------------------------------------------- /man/tof_plot_model_multinomial.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/visualization.R 3 | \name{tof_plot_model_multinomial} 4 | \alias{tof_plot_model_multinomial} 5 | \title{Plot the results of a multiclass glmnet model fit on sample-level data.} 6 | \usage{ 7 | tof_plot_model_multinomial(tof_model, new_data, theme = ggplot2::theme_bw()) 8 | } 9 | \arguments{ 10 | \item{tof_model}{A `tof_model` trained using \code{\link{tof_train_model}}} 11 | 12 | \item{new_data}{A tibble of new observations for which a plot should be made. 13 | If new_data isn't provided, the plot will be made using the training data used to 14 | fit the model. Alternatively, the string "tuning_data" can be provided, and the 15 | plot will be generated using the predictions generated during model tuning.} 16 | 17 | \item{theme}{A ggplot2 theme to apply to the plot. 18 | Defaults to \code{\link[ggplot2]{theme_bw}}.} 19 | } 20 | \value{ 21 | A ggplot object. Specifically, a one-versus-all ROC curve 22 | (one for each class). 23 | } 24 | \description{ 25 | Plot the results of a multiclass glmnet model fit on sample-level data. 26 | } 27 | -------------------------------------------------------------------------------- /man/tof_plot_model_survival.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/visualization.R 3 | \name{tof_plot_model_survival} 4 | \alias{tof_plot_model_survival} 5 | \title{Plot the results of a survival glmnet model fit on sample-level data.} 6 | \usage{ 7 | tof_plot_model_survival( 8 | tof_model, 9 | new_data, 10 | censor_size = 2.5, 11 | theme = ggplot2::theme_bw() 12 | ) 13 | } 14 | \arguments{ 15 | \item{tof_model}{A `tof_model` trained using \code{\link{tof_train_model}}} 16 | 17 | \item{new_data}{A tibble of new observations for which a plot should be made. 18 | If new_data isn't provided, the plot will be made using the training data used to 19 | fit the model. Alternatively, the string "tuning_data" can be provided, and the 20 | plot will be generated using the predictions generated during model tuning.} 21 | 22 | \item{censor_size}{A numeric value indicating how large to plot the tick marks 23 | representing censored values in the Kaplan-Meier curve.} 24 | 25 | \item{theme}{A ggplot2 theme to apply to the plot. 26 | Defaults to \code{\link[ggplot2]{theme_bw}}} 27 | } 28 | \value{ 29 | A ggplot object. Specifically, a Kaplan-Meier curve. 30 | } 31 | \description{ 32 | Plot the results of a survival glmnet model fit on sample-level data. 33 | } 34 | -------------------------------------------------------------------------------- /man/tof_plot_sample_features.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/visualization.R 3 | \name{tof_plot_sample_features} 4 | \alias{tof_plot_sample_features} 5 | \title{Make a heatmap summarizing sample marker expression patterns in CyTOF data} 6 | \usage{ 7 | tof_plot_sample_features( 8 | feature_tibble, 9 | sample_col, 10 | feature_cols = where(tof_is_numeric), 11 | scale_featurewise = FALSE, 12 | scale_samplewise = FALSE, 13 | line_width = 0.25, 14 | theme = ggplot2::theme_minimal() 15 | ) 16 | } 17 | \arguments{ 18 | \item{feature_tibble}{A tbl_df or data.frame of aggregated sample-level features, 19 | such as that generated by \code{\link{tof_extract_features}}.} 20 | 21 | \item{sample_col}{An unquoted column name indicating which column in `tof_tibble` 22 | stores the IDs for each sample. If no sample IDs are present, a numeric ID 23 | will be assigned to each row of `feature_tibble` based on its row index.} 24 | 25 | \item{feature_cols}{Unquoted column names indicating which column in `feature_tibble` 26 | should be interpreted as features to be plotted along the x-axis of the heatmap. 27 | Supports tidyselect helpers.} 28 | 29 | \item{scale_featurewise}{A boolean value indicating if the heatmap should 30 | rescale the columns of the heatmap such that the maximum value for each 31 | marker is 1 and the minimum value is 0. Defaults to FALSE.} 32 | 33 | \item{scale_samplewise}{A boolean value indicating if the heatmap should 34 | rescale the rows of the heatmap such that the maximum value for each 35 | sample is 1 and the minimum value is 0. Defaults to FALSE.} 36 | 37 | \item{line_width}{A numeric value indicating how thick the lines separating 38 | the tiles of the heatmap should be. Defaults to 0.25.} 39 | 40 | \item{theme}{A ggplot2 theme to apply to the heatmap. 41 | Defaults to \code{\link[ggplot2]{theme_minimal}}} 42 | } 43 | \value{ 44 | A ggplot object. 45 | } 46 | \description{ 47 | This function makes a heatmap of sample-to-sample marker expression patterns 48 | in single-cell data. Markers are plotted along the horizontal (x-) axis of 49 | the heatmap and sample IDs are plotted along the vertical (y-) axis of the 50 | heatmap. 51 | } 52 | \examples{ 53 | 54 | # simulate single-cell data 55 | sim_data <- 56 | dplyr::tibble( 57 | cd45 = rnorm(n = 1000), 58 | cd38 = rnorm(n = 1000), 59 | cd34 = rnorm(n = 1000), 60 | cd19 = rnorm(n = 1000), 61 | cluster_id = sample(letters, size = 1000, replace = TRUE), 62 | sample_id = sample(paste0("sample", 1:5), size = 1000, replace = TRUE) 63 | ) 64 | 65 | # extract cluster proportions in each simulated patient 66 | feature_data <- 67 | tof_extract_proportion( 68 | tof_tibble = sim_data, 69 | cluster_col = cluster_id, 70 | group_cols = sample_id 71 | ) 72 | 73 | # plot the heatmap 74 | heatmap <- tof_plot_sample_features(feature_tibble = feature_data) 75 | 76 | } 77 | -------------------------------------------------------------------------------- /man/tof_plot_sample_heatmap.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/visualization.R 3 | \name{tof_plot_sample_heatmap} 4 | \alias{tof_plot_sample_heatmap} 5 | \title{Make a heatmap summarizing sample marker expression patterns in CyTOF data} 6 | \usage{ 7 | tof_plot_sample_heatmap( 8 | tof_tibble, 9 | sample_col, 10 | marker_cols = where(tof_is_numeric), 11 | central_tendency_function = stats::median, 12 | scale_markerwise = FALSE, 13 | scale_samplewise = FALSE, 14 | line_width = 0.25, 15 | theme = ggplot2::theme_minimal() 16 | ) 17 | } 18 | \arguments{ 19 | \item{tof_tibble}{A `tof_tbl` or a `tibble`.} 20 | 21 | \item{sample_col}{An unquoted column name indicating which column in `tof_tibble` 22 | stores the ids for the sample to which each cell belongs.} 23 | 24 | \item{marker_cols}{Unquoted column names indicating which column in `tof_tibble` 25 | should be interpreted as markers to be plotted along the x-axis of the heatmap. 26 | Supports tidyselect helpers.} 27 | 28 | \item{central_tendency_function}{A function to use for computing the 29 | measure of central tendency that will be aggregated from each sample in 30 | cluster_col. Defaults to the median.} 31 | 32 | \item{scale_markerwise}{A boolean value indicating if the heatmap should 33 | rescale the columns of the heatmap such that the maximum value for each 34 | marker is 1 and the minimum value is 0. Defaults to FALSE.} 35 | 36 | \item{scale_samplewise}{A boolean value indicating if the heatmap should 37 | rescale the rows of the heatmap such that the maximum value for each 38 | sample is 1 and the minimum value is 0. Defaults to FALSE.} 39 | 40 | \item{line_width}{A numeric value indicating how thick the lines separating 41 | the tiles of the heatmap should be. Defaults to 0.25.} 42 | 43 | \item{theme}{A ggplot2 theme to apply to the heatmap. 44 | Defaults to \code{\link[ggplot2]{theme_minimal}}} 45 | } 46 | \value{ 47 | A ggplot object. 48 | } 49 | \description{ 50 | This function makes a heatmap of sample-to-sample marker expression patterns 51 | in single-cell data. Markers are plotted along the horizontal (x-) axis of 52 | the heatmap and sample IDs are plotted along the vertical (y-) axis of the 53 | heatmap. 54 | } 55 | \examples{ 56 | sim_data <- 57 | dplyr::tibble( 58 | cd45 = rnorm(n = 1000), 59 | cd38 = rnorm(n = 1000), 60 | cd34 = rnorm(n = 1000), 61 | cd19 = rnorm(n = 1000), 62 | sample_id = sample(paste0("sample", 1:5), size = 1000, replace = TRUE) 63 | ) 64 | 65 | heatmap <- 66 | tof_plot_sample_heatmap( 67 | tof_tibble = sim_data, 68 | sample_col = sample_id 69 | ) 70 | 71 | } 72 | -------------------------------------------------------------------------------- /man/tof_postprocess.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/preprocessing.R 3 | \name{tof_postprocess} 4 | \alias{tof_postprocess} 5 | \title{Post-process transformed CyTOF data.} 6 | \usage{ 7 | tof_postprocess( 8 | tof_tibble = NULL, 9 | channel_cols = where(tof_is_numeric), 10 | redo_noise = FALSE, 11 | transform_fun = function(x) rev_asinh(x, shift_factor = 0, scale_factor = 0.2) 12 | ) 13 | } 14 | \arguments{ 15 | \item{tof_tibble}{A `tof_tibble` or a `tibble`.} 16 | 17 | \item{channel_cols}{A vector of non-quoted column names indicating which columns 18 | in `tof_tibble` contain protein measurements. Supports tidyselect helpers. 19 | If nothing is specified, the default is to transform all numeric columns.} 20 | 21 | \item{redo_noise}{A boolean value indicating whether to add uniform noise that 22 | to each CyTOF measurement for aesthetic and visualization purposes. See \href{https://pubmed.ncbi.nlm.nih.gov/30277658/}{this paper}. 23 | Defaults to FALSE} 24 | 25 | \item{transform_fun}{A vectorized function to apply to each column specified by 26 | `channel_cols` for post-processing. Defaults to \code{\link{rev_asinh}} transformation 27 | (with a cofactor of 5).} 28 | } 29 | \value{ 30 | A `tof_tbl` with identical dimensions to the input `tof_tibble`, with all 31 | columns specified in channel_cols transformed using `transform_fun` (with noise 32 | added or not removed depending on `redo_noise`). 33 | } 34 | \description{ 35 | This function transforms a `tof_tibble` of transformed ion counts from a mass 36 | cytometer back into something that looks more like an .fcs file that Fluidigm 37 | software generates. 38 | } 39 | \examples{ 40 | 41 | # read in an example .fcs file from tidytof's internal datasets 42 | input_file <- dir(tidytof_example_data("aml"), full.names = TRUE)[[1]] 43 | tof_tibble <- tof_read_data(input_file) 44 | 45 | # preprocess all numeric columns with default behavior 46 | # arcsinh transformation with a cofactor of 5 47 | preprocessed_tof_tibble <- tof_preprocess(tof_tibble) 48 | 49 | # postprocess all numeric columns to reverse the preprocessing 50 | tof_postprocess(tof_tibble) 51 | 52 | } 53 | \seealso{ 54 | [tof_preprocess()] 55 | } 56 | -------------------------------------------------------------------------------- /man/tof_prep_recipe.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/modeling_helpers.R 3 | \name{tof_prep_recipe} 4 | \alias{tof_prep_recipe} 5 | \title{Train a recipe or list of recipes for preprocessing sample-level cytometry data} 6 | \usage{ 7 | tof_prep_recipe(split_data, unprepped_recipe) 8 | } 9 | \arguments{ 10 | \item{split_data}{An `rsplit` or `rset` object from the \code{\link[rsample]{rsample}} 11 | package containing the sample-level data to use for modeling. 12 | The easiest way to generate this is to use \code{\link{tof_split_data}}. 13 | Alternatively, an unsplit tbl_df, though this is not recommended.} 14 | 15 | \item{unprepped_recipe}{A \code{\link[recipes]{recipe}} object (if `split_data` 16 | is an `rsplit` object or a `tbl_df`) or list of recipes 17 | (if `split_data` is an `rset` object).} 18 | } 19 | \value{ 20 | If split_data is an "rsplit" or "tbl_df" object, will return a single 21 | prepped recipe. If split_data is an "rset" object, will return a list of prepped 22 | recipes specific for each fold of the resampling procedure. 23 | } 24 | \description{ 25 | Train a recipe or list of recipes for preprocessing sample-level cytometry data 26 | } 27 | -------------------------------------------------------------------------------- /man/tof_preprocess.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/preprocessing.R 3 | \name{tof_preprocess} 4 | \alias{tof_preprocess} 5 | \title{Preprocess raw high-dimensional cytometry data.} 6 | \usage{ 7 | tof_preprocess( 8 | tof_tibble = NULL, 9 | channel_cols = where(tof_is_numeric), 10 | undo_noise = FALSE, 11 | transform_fun = function(x) asinh(x/5) 12 | ) 13 | } 14 | \arguments{ 15 | \item{tof_tibble}{A `tof_tbl` or a `tibble`.} 16 | 17 | \item{channel_cols}{Unquoted column names representing columns that contain 18 | single-cell protein measurements. Supports tidyselect helpers. 19 | If nothing is specified, the default is to transform all numeric columns.} 20 | 21 | \item{undo_noise}{A boolean value indicating whether to remove the uniform noise that 22 | Fluidigm software adds to CyTOF measurements for aesthetic 23 | and visualization purposes. See \href{https://pubmed.ncbi.nlm.nih.gov/30277658/}{this paper}. 24 | Defaults to FALSE.} 25 | 26 | \item{transform_fun}{A vectorized function to apply to each protein value for 27 | variance stabilization. Defaults to \code{\link[base]{asinh}} transformation 28 | (with a co-factor of 5).} 29 | } 30 | \value{ 31 | A `tof_tbl` with identical dimensions to the input `tof_tibble`, with all 32 | columns specified in channel_cols transformed using `transform_fun` (with noise 33 | removed or not removed depending on `undo_noise`). 34 | } 35 | \description{ 36 | This function transforms a `tof_tbl` of raw ion counts, reads, or 37 | fluorescence intensity units directly measured on a cytometer using a 38 | user-provided function. It can be used to perform 39 | standard pre-processing steps (i.e. arcsinh transformation) before cytometry 40 | data analysis. 41 | } 42 | \examples{ 43 | 44 | # read in an example .fcs file from tidytof's internal datasets 45 | input_file <- dir(tidytof_example_data("aml"), full.names = TRUE)[[1]] 46 | tof_tibble <- tof_read_data(input_file) 47 | 48 | # preprocess all numeric columns with default behavior 49 | # arcsinh transformation with a cofactor of 5 50 | tof_preprocess(tof_tibble) 51 | 52 | # preprocess all numeric columns using the log base 10 tranformation 53 | tof_preprocess(tof_tibble, transform_fun = log10) 54 | 55 | } 56 | \seealso{ 57 | [tof_postprocess()] 58 | } 59 | -------------------------------------------------------------------------------- /man/tof_read_csv.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/io.R 3 | \name{tof_read_csv} 4 | \alias{tof_read_csv} 5 | \title{Read high-dimensional cytometry data from a .csv file into a tidy tibble.} 6 | \usage{ 7 | tof_read_csv(file_path = NULL, panel_info = dplyr::tibble()) 8 | } 9 | \arguments{ 10 | \item{file_path}{A file path to a single .csv file.} 11 | 12 | \item{panel_info}{Optional. A tibble or data.frame containing information about the 13 | panel used during high-dimensional cytometry data acquisition. Two columns are required: 14 | "metals" and "antigens".} 15 | } 16 | \value{ 17 | A `tof_tbl` in which each row represents a single cell and each 18 | column represents a high-dimensional cytometry antigen channel. 19 | 20 | A `tof_tbl` is an S3 class that extends the "tibble" class by storing 21 | one additional attribute: "panel" (a tibble storing information about the 22 | panel used during data acquisition). Because panel information isn't 23 | obvious from data read as a .csv file, this information must be provided 24 | manually from the user (unlike in `tof_read_fcs`). 25 | } 26 | \description{ 27 | Read high-dimensional cytometry data from a .csv file into a tidy tibble. 28 | } 29 | -------------------------------------------------------------------------------- /man/tof_read_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/io.R 3 | \name{tof_read_data} 4 | \alias{tof_read_data} 5 | \title{Read data from an .fcs/.csv file or a directory of .fcs/.csv files.} 6 | \usage{ 7 | tof_read_data(path = NULL, sep = "|", panel_info = dplyr::tibble()) 8 | } 9 | \arguments{ 10 | \item{path}{A file path to a single file or to a directory of files. 11 | The only valid file types are .fcs files or .csv files 12 | containing high-dimensional cytometry data.} 13 | 14 | \item{sep}{Optional. A string to use to separate the antigen name and its associated 15 | metal in the column names of the output tibble. Defaults to "|". Only used if 16 | the input file is an .fcs file.} 17 | 18 | \item{panel_info}{Optional. A tibble or data.frame containing information about the 19 | panel used during high-dimensional cytometry data acquisition. Two columns are required: 20 | "metals" and "antigens". Only used if the input file is a .csv file.} 21 | } 22 | \value{ 23 | An [c by m+1] tibble in which each row represents a single cell (of c 24 | total in the dataset) and each column represents a high-dimensional cytometry measurement 25 | (of m total in the dataset). If more than one .fcs is read at once, 26 | the last column of the tibble (`file_name`) will represent the file name 27 | of the .fcs file from which each cell was read. 28 | } 29 | \description{ 30 | Read data from an .fcs/.csv file or a directory of .fcs/.csv files. 31 | } 32 | \examples{ 33 | 34 | input_file <- dir(tidytof_example_data("aml"), full.names = TRUE)[[1]] 35 | tof_read_data(input_file) 36 | 37 | } 38 | \seealso{ 39 | Other input/output functions: 40 | \code{\link{tof_write_csv}()}, 41 | \code{\link{tof_write_data}()}, 42 | \code{\link{tof_write_fcs}()} 43 | } 44 | \concept{input/output functions} 45 | -------------------------------------------------------------------------------- /man/tof_read_fcs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/io.R 3 | \name{tof_read_fcs} 4 | \alias{tof_read_fcs} 5 | \title{Read high-dimensional cytometry data from an .fcs file into a tidy tibble.} 6 | \usage{ 7 | tof_read_fcs(file_path = NULL, sep = "|") 8 | } 9 | \arguments{ 10 | \item{file_path}{A file path to a single .fcs file.} 11 | 12 | \item{sep}{A string to use to separate the antigen name and its associated 13 | metal in the column names of the output tibble. Defaults to "|".} 14 | } 15 | \value{ 16 | a `tof_tbl` in which each row represents a single cell and each 17 | column represents a high-dimensional cytometry antigen channel. 18 | 19 | A `tof_tbl` is an S3 class that extends the "tibble" class by storing 20 | one additional attribute: "panel" (a tibble storing information about the 21 | panel used during data acquisition). 22 | } 23 | \description{ 24 | This function reads high-dimensional cytometry data from a single .fcs file into a tidy data 25 | structure called a `tof_tbl` ("tof_tibble"). tof_tibbles are identical to normal 26 | tibbles except for an additional attribute ("panel") that stores information 27 | about the high-dimensional cytometry panel used during data acquisition. 28 | } 29 | -------------------------------------------------------------------------------- /man/tof_read_file.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/io.R 3 | \name{tof_read_file} 4 | \alias{tof_read_file} 5 | \title{Read high-dimensional cytometry data from a single .fcs or .csv file into a tidy tibble.} 6 | \usage{ 7 | tof_read_file(file_path = NULL, sep = "|", panel_info = dplyr::tibble()) 8 | } 9 | \arguments{ 10 | \item{file_path}{A file path to a single .fcs or .csv file.} 11 | 12 | \item{sep}{A string to use to separate the antigen name and its associated 13 | metal in the column names of the output tibble. Defaults to "|". Only used 14 | if the input file is an .fcs file.} 15 | 16 | \item{panel_info}{Optional. A tibble or data.frame containing information about the 17 | panel used during high-dimensional cytometry data acquisition. Two columns are required: 18 | "metals" and "antigens". Only used if the input file is a .csv file.} 19 | } 20 | \value{ 21 | A `tof_tbl` in which each row represents a single cell and each 22 | column represents a high-dimensional cytometry antigen channel. 23 | 24 | A `tof_tbl` is an S3 class that extends the "tibble" class by storing 25 | one additional attribute: "panel" (a tibble storing information about the 26 | panel used during data acquisition). Because panel information isn't 27 | obvious from data read as a .csv file, this information must be provided 28 | manually by the user. 29 | } 30 | \description{ 31 | Read high-dimensional cytometry data from a single .fcs or .csv file into a tidy tibble. 32 | } 33 | -------------------------------------------------------------------------------- /man/tof_reduce_dimensions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dimensionality_reduction.R 3 | \name{tof_reduce_dimensions} 4 | \alias{tof_reduce_dimensions} 5 | \title{Apply dimensionality reduction to a single-cell dataset.} 6 | \usage{ 7 | tof_reduce_dimensions( 8 | tof_tibble, 9 | ..., 10 | augment = TRUE, 11 | method = c("pca", "tsne", "umap") 12 | ) 13 | } 14 | \arguments{ 15 | \item{tof_tibble}{A `tof_tbl` or `tibble`.} 16 | 17 | \item{...}{Arguments to be passed to the tof_reduce_* function corresponding to 18 | the embedding method. See \code{\link{tof_reduce_pca}}, \code{\link{tof_reduce_tsne}}, and 19 | \code{\link{tof_reduce_umap}}.} 20 | 21 | \item{augment}{A boolean value indicating if the output should column-bind the 22 | dimensionality-reduced embedding vectors of each cell as a new column in `tof_tibble` 23 | (TRUE, the default) or if a tibble including only the low-dimensionality 24 | embeddings should be returned (FALSE).} 25 | 26 | \item{method}{A method of dimensionality reduction. Currently, PCA, tSNE, and 27 | UMAP embedding are supported.} 28 | } 29 | \value{ 30 | A tibble with the same number of rows as `tof_tibble`, each representing 31 | a single cell. Each of the `num_comp` columns represents each cell's embedding 32 | in the calculated embedding space. 33 | } 34 | \description{ 35 | This function is a wrapper around tidytof's tof_reduce_* function family. 36 | It performs dimensionality reduction on single-cell data using a user-specified method 37 | (of 3 choices) and each method's corresponding input parameters 38 | } 39 | \examples{ 40 | # simulate single-cell data 41 | sim_data <- 42 | dplyr::tibble( 43 | cd45 = rnorm(n = 100), 44 | cd38 = rnorm(n = 100), 45 | cd34 = rnorm(n = 100), 46 | cd19 = rnorm(n = 100) 47 | ) 48 | 49 | # calculate pca 50 | tof_reduce_dimensions(tof_tibble = sim_data, method = "pca") 51 | 52 | # calculate tsne 53 | tof_reduce_dimensions(tof_tibble = sim_data, method = "tsne") 54 | 55 | # calculate umap 56 | tof_reduce_dimensions(tof_tibble = sim_data, method = "umap") 57 | 58 | } 59 | \seealso{ 60 | Other dimensionality reduction functions: 61 | \code{\link{tof_reduce_pca}()}, 62 | \code{\link{tof_reduce_tsne}()}, 63 | \code{\link{tof_reduce_umap}()} 64 | } 65 | \concept{dimensionality reduction functions} 66 | -------------------------------------------------------------------------------- /man/tof_reduce_pca.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dimensionality_reduction.R 3 | \name{tof_reduce_pca} 4 | \alias{tof_reduce_pca} 5 | \title{Perform principal component analysis on single-cell data} 6 | \usage{ 7 | tof_reduce_pca( 8 | tof_tibble, 9 | pca_cols = where(tof_is_numeric), 10 | num_comp = 5, 11 | threshold = NA, 12 | center = TRUE, 13 | scale = TRUE, 14 | return_recipe = FALSE 15 | ) 16 | } 17 | \arguments{ 18 | \item{tof_tibble}{A `tof_tbl` or `tibble`.} 19 | 20 | \item{pca_cols}{Unquoted column names indicating which columns in `tof_tibble` to 21 | use for computing the principal components. Defaults to all numeric columns. 22 | Supports tidyselect helpers.} 23 | 24 | \item{num_comp}{The number of PCA components to calculate. Defaults 25 | to 5. See \code{\link[recipes]{step_pca}}.} 26 | 27 | \item{threshold}{A double between 0 and 1 representing the fraction of total 28 | variance that should be covered by the components returned in the output. See 29 | \code{\link[recipes]{step_pca}}.} 30 | 31 | \item{center}{A boolean value indicating if each column should be centered to 32 | mean 0 before PCA analysis. Defaults to TRUE.} 33 | 34 | \item{scale}{A boolean value indicating if each column should be scaled to 35 | standard deviation = 1 before PCA analysis. Defaults to TRUE.} 36 | 37 | \item{return_recipe}{A boolean value indicating if instead of the UMAP result, a 38 | prepped \code{\link[recipes]{recipe}} object containing the 39 | PCA embedding should be 40 | returned. Set this option to TRUE if you want to create the PCA embedding using 41 | one dataset but also want to project new observations onto the same embedding 42 | space later.} 43 | } 44 | \value{ 45 | A tibble with the same number of rows as `tof_tibble`, each representing 46 | a single cell. Each of the `num_comp` columns represents each cell's embedding 47 | in the calculated principal component space. 48 | } 49 | \description{ 50 | This function calculates principal components using single-cell data from a `tof_tibble`. 51 | } 52 | \examples{ 53 | # simulate single-cell data 54 | sim_data <- 55 | dplyr::tibble( 56 | cd45 = rnorm(n = 200), 57 | cd38 = rnorm(n = 200), 58 | cd34 = rnorm(n = 200), 59 | cd19 = rnorm(n = 200) 60 | ) 61 | new_data <- 62 | dplyr::tibble( 63 | cd45 = rnorm(n = 50), 64 | cd38 = rnorm(n = 50), 65 | cd34 = rnorm(n = 50), 66 | cd19 = rnorm(n = 50) 67 | ) 68 | 69 | # calculate pca 70 | tof_reduce_pca(tof_tibble = sim_data, num_comp = 2) 71 | 72 | # return recipe instead of embeddings 73 | pca_recipe <- tof_reduce_pca(tof_tibble = sim_data, return_recipe = TRUE) 74 | 75 | # apply recipe to new data 76 | recipes::bake(pca_recipe, new_data = new_data) 77 | 78 | } 79 | \seealso{ 80 | Other dimensionality reduction functions: 81 | \code{\link{tof_reduce_dimensions}()}, 82 | \code{\link{tof_reduce_tsne}()}, 83 | \code{\link{tof_reduce_umap}()} 84 | } 85 | \concept{dimensionality reduction functions} 86 | -------------------------------------------------------------------------------- /man/tof_reduce_tsne.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dimensionality_reduction.R 3 | \name{tof_reduce_tsne} 4 | \alias{tof_reduce_tsne} 5 | \title{Perform t-distributed stochastic neighborhood embedding on single-cell data} 6 | \usage{ 7 | tof_reduce_tsne( 8 | tof_tibble, 9 | tsne_cols = where(tof_is_numeric), 10 | num_comp = 2, 11 | perplexity = 30, 12 | theta = 0.5, 13 | max_iterations = 1000, 14 | verbose = FALSE, 15 | ... 16 | ) 17 | } 18 | \arguments{ 19 | \item{tof_tibble}{A `tof_tbl` or `tibble`.} 20 | 21 | \item{tsne_cols}{Unquoted column names indicating which columns in `tof_tibble` to 22 | use in computing the tSNE embedding. Defaults to all numeric columns 23 | in `tof_tibble`. Supports tidyselect helpers.} 24 | 25 | \item{num_comp}{The number of tSNE components to calculate for the embedding. 26 | Defaults to 2.} 27 | 28 | \item{perplexity}{A positive numeric value that represents represents the rough 29 | balance between the input data’s local and global structure emphasized in 30 | the embedding. Smaller values emphasize local structure; larger values emphasize 31 | global structure. The recommended range is generally 5-50. Defaults to 30.} 32 | 33 | \item{theta}{A numeric value representing the speed/accuracy tradeoff for the 34 | embedding. Set to 0 for the exact tSNE; increase for a faster approximation. 35 | Defaults to 0.5} 36 | 37 | \item{max_iterations}{An integer number of iterations to use during embedding 38 | calculation. Defaults to 1000.} 39 | 40 | \item{verbose}{A boolean value indicating whether progress updates should be 41 | printed during embedding calculation. Default is FALSE.} 42 | 43 | \item{...}{Additional arguments to pass to \code{\link[Rtsne]{Rtsne}}.} 44 | } 45 | \value{ 46 | A tibble with the same number of rows as `tof_tibble`, each representing 47 | a single cell. Each of the `num_comp` columns represents each cell's embedding 48 | in the calculated tSNE space. 49 | } 50 | \description{ 51 | This function calculates a tSNE embedding using single-cell data from a `tof_tibble`. 52 | } 53 | \examples{ 54 | # simulate single-cell data 55 | sim_data <- 56 | dplyr::tibble( 57 | cd45 = rnorm(n = 200), 58 | cd38 = rnorm(n = 200), 59 | cd34 = rnorm(n = 200), 60 | cd19 = rnorm(n = 200) 61 | ) 62 | 63 | # calculate tsne 64 | tof_reduce_tsne(tof_tibble = sim_data) 65 | 66 | # calculate tsne with only 2 columns 67 | tof_reduce_tsne(tof_tibble = sim_data, tsne_cols = c(cd34, cd38)) 68 | 69 | } 70 | \seealso{ 71 | Other dimensionality reduction functions: 72 | \code{\link{tof_reduce_dimensions}()}, 73 | \code{\link{tof_reduce_pca}()}, 74 | \code{\link{tof_reduce_umap}()} 75 | } 76 | \concept{dimensionality reduction functions} 77 | -------------------------------------------------------------------------------- /man/tof_set_panel.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tof_tbl.R 3 | \name{tof_set_panel} 4 | \alias{tof_set_panel} 5 | \title{Set panel information from a tof_tibble} 6 | \usage{ 7 | tof_set_panel(tof_tibble, panel) 8 | } 9 | \arguments{ 10 | \item{tof_tibble}{A `tof_tbl`.} 11 | 12 | \item{panel}{A tibble containing two columns (`metals` and `antigens`) representing 13 | the information about a panel} 14 | } 15 | \value{ 16 | A `tof_tibble` containing information about the CyTOF panel 17 | that was used during data acquisition for the data contained 18 | in the input `tof_tibble`. Two columns are required: "metals" and "antigens". 19 | } 20 | \description{ 21 | Set panel information from a tof_tibble 22 | } 23 | \examples{ 24 | # get current panel from an .fcs file 25 | input_file <- dir(tidytof_example_data("aml"), full.names = TRUE)[[1]] 26 | tof_tibble <- tof_read_data(input_file) 27 | current_panel <- tof_get_panel(tof_tibble) 28 | 29 | # create a new panel (remove empty channels) 30 | new_panel <- dplyr::filter(current_panel, antigens != "empty") 31 | tof_set_panel(tof_tibble = tof_tibble, panel = new_panel) 32 | 33 | } 34 | \seealso{ 35 | Other tof_tbl utilities: 36 | \code{\link{new_tof_tibble}()}, 37 | \code{\link{tof_get_panel}()} 38 | } 39 | \concept{tof_tbl utilities} 40 | -------------------------------------------------------------------------------- /man/tof_split_tidytof_reduced_dimensions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tof_tbl.R 3 | \name{tof_split_tidytof_reduced_dimensions} 4 | \alias{tof_split_tidytof_reduced_dimensions} 5 | \title{Split the dimensionality reduction data that tidytof combines during \code{\link[SingleCellExperiment]{SingleCellExperiment}} conversion} 6 | \usage{ 7 | tof_split_tidytof_reduced_dimensions(sce) 8 | } 9 | \arguments{ 10 | \item{sce}{A \code{\link[SingleCellExperiment]{SingleCellExperiment}} with an 11 | entry named "tidytof_reduced_dimensions" in its \code{\link[SingleCellExperiment]{reducedDims}} slot.} 12 | } 13 | \value{ 14 | A \code{\link[SingleCellExperiment]{SingleCellExperiment}} with separate entries 15 | named "tidytof_pca", "tidytof_umap", and "tidytof_tsne" in its 16 | \code{\link[SingleCellExperiment]{reducedDims}} slots (one for each of the 17 | dimensionality reduction methods for which tidytof has native support). 18 | } 19 | \description{ 20 | Split the dimensionality reduction data that tidytof combines during \code{\link[SingleCellExperiment]{SingleCellExperiment}} conversion 21 | } 22 | \examples{ 23 | NULL 24 | } 25 | -------------------------------------------------------------------------------- /man/tof_transform.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/preprocessing.R 3 | \name{tof_transform} 4 | \alias{tof_transform} 5 | \title{Transform raw high-dimensional cytometry data.} 6 | \usage{ 7 | tof_transform( 8 | tof_tibble = NULL, 9 | channel_cols = where(tof_is_numeric), 10 | transform_fun 11 | ) 12 | } 13 | \arguments{ 14 | \item{tof_tibble}{A `tof_tbl` or a `tibble`.} 15 | 16 | \item{channel_cols}{Unquoted column names representing columns that contain 17 | single-cell protein measurements. Supports tidyselect helpers. 18 | If nothing is specified, the default is to transform all numeric columns.} 19 | 20 | \item{transform_fun}{A vectorized function to apply to each protein value for 21 | variance stabilization.} 22 | } 23 | \value{ 24 | A `tof_tbl` with identical dimensions to the input `tof_tibble`, with all 25 | columns specified in channel_cols transformed using `transform_fun`. 26 | } 27 | \description{ 28 | This function transforms a `tof_tbl` of raw ion counts, reads, or 29 | fluorescence intensity units directly measured on a cytometer using a 30 | user-provided function. 31 | } 32 | \examples{ 33 | 34 | # read in an example .fcs file from tidytof's internal datasets 35 | input_file <- dir(tidytof_example_data("aml"), full.names = TRUE)[[1]] 36 | tof_tibble <- tof_read_data(input_file) 37 | 38 | # preprocess all numeric columns with default behavior 39 | # arcsinh transformation with a cofactor of 5 40 | tof_preprocess(tof_tibble) 41 | 42 | # preprocess all numeric columns using the log base 10 tranformation 43 | tof_preprocess(tof_tibble, transform_fun = log10) 44 | 45 | } 46 | -------------------------------------------------------------------------------- /man/tof_write_csv.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/io.R 3 | \name{tof_write_csv} 4 | \alias{tof_write_csv} 5 | \title{Write a series of .csv files from a tof_tbl} 6 | \usage{ 7 | tof_write_csv(tof_tibble, group_cols, out_path, sep = "_", file_name) 8 | } 9 | \arguments{ 10 | \item{tof_tibble}{A `tof_tbl` or a `tibble`.} 11 | 12 | \item{group_cols}{Optional. Unquoted names of the columns in `tof_tibble` that should 13 | be used to group cells into separate files. Supports tidyselect helpers. Defaults to 14 | NULL (all cells are written into a single file).} 15 | 16 | \item{out_path}{A system path indicating the directory where the output .csv 17 | files should be saved. If the directory doesn't exist, it will be created.} 18 | 19 | \item{sep}{Delimiter that should be used between each of the values of `group_cols` 20 | to create the output .csv file names. Defaults to "_".} 21 | 22 | \item{file_name}{If `group_cols` isn't specified, the name (without an extension) 23 | that should be used for the saved .csv file.} 24 | } 25 | \value{ 26 | This function does not return anything. Instead, it has the side-effect 27 | of saving .csv files to `out_path`. 28 | } 29 | \description{ 30 | This function takes a given `tof_tbl` and writes the single-cell data 31 | it contains into .csv files within the directory located at `out_path`. The 32 | `group_cols` argument specifies how the rows of the `tof_tbl` (each cell) 33 | should be broken into separate .csv files 34 | } 35 | \seealso{ 36 | Other input/output functions: 37 | \code{\link{tof_read_data}()}, 38 | \code{\link{tof_write_data}()}, 39 | \code{\link{tof_write_fcs}()} 40 | } 41 | \concept{input/output functions} 42 | -------------------------------------------------------------------------------- /man/tof_write_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/io.R 3 | \name{tof_write_data} 4 | \alias{tof_write_data} 5 | \title{Write high-dimensional cytometry data to a file or to a directory of files} 6 | \usage{ 7 | tof_write_data( 8 | tof_tibble = NULL, 9 | group_cols, 10 | out_path = NULL, 11 | format = c("fcs", "csv"), 12 | sep = "_", 13 | file_name 14 | ) 15 | } 16 | \arguments{ 17 | \item{tof_tibble}{A `tof_tbl` or a `tibble`.} 18 | 19 | \item{group_cols}{Optional. Unquoted names of the columns in `tof_tibble` that should 20 | be used to group cells into separate files. Supports tidyselect helpers. Defaults to 21 | no grouping (all cells are written into a single file).} 22 | 23 | \item{out_path}{Path to the directory where output files should be saved.} 24 | 25 | \item{format}{format for the files being written. Currently supports .csv and .fcs files} 26 | 27 | \item{sep}{Delimiter that should be used between each of the values of `group_cols` 28 | to create the output .csv/.fcs file names. Defaults to "_".} 29 | 30 | \item{file_name}{If `group_cols` isn't specified, the name (without an extension) 31 | that should be used for the saved file.} 32 | } 33 | \value{ 34 | This function does not explicitly return any values. Instead, 35 | it writes .csv and/or .fcs files to the specified `out_path`. 36 | } 37 | \description{ 38 | Write data (in the form of a `tof_tbl`) into either a .csv or an .fcs file for storage. 39 | } 40 | \examples{ 41 | NULL 42 | 43 | } 44 | \seealso{ 45 | Other input/output functions: 46 | \code{\link{tof_read_data}()}, 47 | \code{\link{tof_write_csv}()}, 48 | \code{\link{tof_write_fcs}()} 49 | } 50 | \concept{input/output functions} 51 | -------------------------------------------------------------------------------- /man/tof_write_fcs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/io.R 3 | \name{tof_write_fcs} 4 | \alias{tof_write_fcs} 5 | \title{Write a series of .fcs files from a tof_tbl} 6 | \usage{ 7 | tof_write_fcs(tof_tibble, group_cols, out_path, sep = "_", file_name) 8 | } 9 | \arguments{ 10 | \item{tof_tibble}{A `tof_tbl` or a `tibble`.} 11 | 12 | \item{group_cols}{Unquoted names of the columns in `tof_tibble` that should 13 | be used to group cells into separate files. Supports tidyselect helpers. Defaults to 14 | NULL (all cells are written into a single file).} 15 | 16 | \item{out_path}{A system path indicating the directory where the output .csv 17 | files should be saved. If the directory doesn't exist, it will be created.} 18 | 19 | \item{sep}{Delimiter that should be used between each of the values of `group_cols` 20 | to create the output .fcs file names. Defaults to "_".} 21 | 22 | \item{file_name}{If `group_cols` isn't specified, the name (without an extension) 23 | that should be used for the saved .csv file.} 24 | } 25 | \value{ 26 | This function does not return anything. Instead, it has the side-effect 27 | of saving .fcs files to `out_path`. 28 | } 29 | \description{ 30 | This function takes a given `tof_tbl` and writes the single-cell data 31 | it contains into .fcs files within the directory located at `out_path`. The 32 | `group_cols` argument specifies how the rows of the `tof_tbl` (each cell) 33 | should be broken into separate .fcs files 34 | } 35 | \examples{ 36 | NULL 37 | 38 | } 39 | \seealso{ 40 | Other input/output functions: 41 | \code{\link{tof_read_data}()}, 42 | \code{\link{tof_write_csv}()}, 43 | \code{\link{tof_write_data}()} 44 | } 45 | \concept{input/output functions} 46 | -------------------------------------------------------------------------------- /man/where.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/imports.R 3 | \name{where} 4 | \alias{where} 5 | \title{Select variables with a function} 6 | \usage{ 7 | where(fn) 8 | } 9 | \arguments{ 10 | \item{fn}{A function that returns TRUE or FALSE (technically, a predicate function). 11 | Can also be a purrr-like formula.} 12 | } 13 | \value{ 14 | A predicate that can be used to select columns from a data.frame. 15 | } 16 | \description{ 17 | This is a copy of \code{\link[tidyselect]{where}}, a selection helper that 18 | selects the variables for which a predicate function returns TRUE. See 19 | \code{\link[tidyselect]{language}} for more details about tidyselection. 20 | } 21 | \details{ 22 | This help file was replicated verbatim from \code{\link[tidyselect]{tidyselect-package}}. 23 | } 24 | \examples{ 25 | NULL 26 | 27 | } 28 | \references{ 29 | Lionel Henry and Hadley Wickham (2021). tidyselect: 30 | Select from a Set of Strings. R package version 1.1.1. 31 | https://CRAN.R-project.org/package=tidyselect 32 | } 33 | -------------------------------------------------------------------------------- /src/.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.so 3 | *.dll 4 | -------------------------------------------------------------------------------- /src/RcppExports.cpp: -------------------------------------------------------------------------------- 1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | #include 5 | 6 | using namespace Rcpp; 7 | 8 | #ifdef RCPP_USE_GLOBAL_ROSTREAM 9 | Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); 10 | Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); 11 | #endif 12 | 13 | // find_jaccard_coefficients 14 | NumericMatrix find_jaccard_coefficients(NumericMatrix knn_ids); 15 | RcppExport SEXP _tidytof_find_jaccard_coefficients(SEXP knn_idsSEXP) { 16 | BEGIN_RCPP 17 | Rcpp::RObject rcpp_result_gen; 18 | Rcpp::RNGScope rcpp_rngScope_gen; 19 | Rcpp::traits::input_parameter< NumericMatrix >::type knn_ids(knn_idsSEXP); 20 | rcpp_result_gen = Rcpp::wrap(find_jaccard_coefficients(knn_ids)); 21 | return rcpp_result_gen; 22 | END_RCPP 23 | } 24 | 25 | static const R_CallMethodDef CallEntries[] = { 26 | {"_tidytof_find_jaccard_coefficients", (DL_FUNC) &_tidytof_find_jaccard_coefficients, 1}, 27 | {NULL, NULL, 0} 28 | }; 29 | 30 | RcppExport void R_init_tidytof(DllInfo *dll) { 31 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); 32 | R_useDynamicSymbols(dll, FALSE); 33 | } 34 | -------------------------------------------------------------------------------- /src/code.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace Rcpp; 3 | 4 | // Find the Jaccard Similarity Coefficient between all cells (rows) of an input 5 | // matrix of k-nearest neighbor IDs for each cell 6 | // 7 | // @param knn_ids, a num_cells by num_neighbors matrix of nearest-neighbor indices 8 | // @return jaccards, a matrix in which each row is a tuple (from, to, jac) of indices 9 | // and jaccard coefficients between each pair of cells in the dataset. 10 | // 11 | // Citation: This code is heavily inspired by that of Chen Hao in the GitHub 12 | // package "Rphenograph" at github.com/JinmiaoChenLab/Rphenograph/ as well as 13 | // the Python package for PhenoGraph, written by Jacob Levine and hosted at 14 | // github.com/dpeerlab/PhenoGraph/ 15 | 16 | 17 | // [[Rcpp::export]] 18 | NumericMatrix find_jaccard_coefficients(NumericMatrix knn_ids) { 19 | int num_cells = knn_ids.nrow(); 20 | int num_neighbors = knn_ids.ncol(); 21 | NumericMatrix jaccards(num_cells * num_neighbors, 3); 22 | int row_index = 0; 23 | for (int cell = 0; cell < num_cells; cell++) { 24 | for (int neighbor = 0; neighbor < num_neighbors; neighbor++) { 25 | int neighbor_index = knn_ids(cell, neighbor) - 1; 26 | NumericVector node_cell = knn_ids(cell, _); 27 | NumericVector node_neighbor = knn_ids(neighbor_index , _); 28 | // count number of mutual neighbors between node_cell and node_neighbor 29 | int node_intersection = intersect(node_cell, node_neighbor).size(); 30 | // count unique cells in the neighborhood of node_cell and node_neighbor 31 | int node_union = union_(node_cell, node_neighbor).size(); 32 | // find jaccard coefficient and report tuples 33 | jaccards(row_index, 0) = cell + 1; 34 | jaccards(row_index, 1) = neighbor_index + 1; 35 | jaccards(row_index, 2) = (1.0 * node_intersection) / node_union; 36 | row_index++; 37 | } 38 | } 39 | 40 | return jaccards; 41 | } 42 | -------------------------------------------------------------------------------- /tests/spelling.R: -------------------------------------------------------------------------------- 1 | if (requireNamespace("spelling", quietly = TRUE)) { 2 | spelling::spell_check_test( 3 | vignettes = TRUE, error = FALSE, 4 | skip_on_cran = TRUE 5 | ) 6 | } 7 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | Sys.setenv(R_TESTS="") 2 | library(testthat) 3 | library(tidytof) 4 | test_check("tidytof") 5 | -------------------------------------------------------------------------------- /tests/testthat/test-developmental_classifier.R: -------------------------------------------------------------------------------- 1 | NULL 2 | -------------------------------------------------------------------------------- /tests/testthat/test-preprocessing.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(purrr) 3 | library(readr) 4 | library(stringr) 5 | library(tidytof) 6 | 7 | data(ddpr_data) 8 | 9 | # tof_preprocess --------------------------------------------------------------- 10 | 11 | test_that("Shape of transformed data is the same as the input data", { 12 | result <- tof_preprocess(tof_tibble = ddpr_data) 13 | expect_true(nrow(result) == nrow(ddpr_data)) 14 | expect_true(ncol(result) == ncol(ddpr_data)) 15 | }) 16 | 17 | test_that("tof_preprocess should give different values when undo_noise is TRUE and FALSE", { 18 | result <- tof_preprocess(tof_tibble = ddpr_data, undo_noise = TRUE) 19 | result_2 <- tof_preprocess(tof_tibble = ddpr_data, undo_noise = FALSE) 20 | 21 | expect_false(isTRUE(all.equal(result, result_2))) 22 | }) 23 | 24 | test_that("tof_preprocess should give different values when transform_fun is different", { 25 | result <- tof_preprocess(tof_tibble = ddpr_data) 26 | result_2 <- tof_preprocess(tof_tibble = ddpr_data, transform_fun = scale) 27 | 28 | expect_false(isTRUE(all.equal(result, result_2))) 29 | }) 30 | 31 | test_that("tof_preprocess should give different values when channel_cols is different", { 32 | result <- tof_preprocess(tof_tibble = ddpr_data) 33 | result_2 <- tof_preprocess(tof_tibble = ddpr_data, channel_cols = c(cd45, cd20, cd34)) 34 | 35 | expect_false(isTRUE(all.equal(result, result_2))) 36 | }) 37 | 38 | 39 | # tof_postprocess -------------------------------------------------------------- 40 | 41 | test_that("Shape of transformed data is the same as the input data", { 42 | result <- tof_postprocess(tof_tibble = ddpr_data) 43 | expect_true(nrow(result) == nrow(ddpr_data)) 44 | expect_true(ncol(result) == ncol(ddpr_data)) 45 | }) 46 | 47 | test_that("tof_postprocess should give different values when redo_noise is TRUE and FALSE", { 48 | result <- tof_postprocess(tof_tibble = ddpr_data, redo_noise = TRUE) 49 | result_2 <- tof_postprocess(tof_tibble = ddpr_data, redo_noise = FALSE) 50 | 51 | expect_false(isTRUE(all.equal(result, result_2))) 52 | }) 53 | 54 | test_that("tof_postprocess should give different values when transform_fun is different", { 55 | result <- tof_postprocess(tof_tibble = ddpr_data) 56 | result_2 <- tof_postprocess(tof_tibble = ddpr_data, transform_fun = scale) 57 | 58 | expect_false(isTRUE(all.equal(result, result_2))) 59 | }) 60 | 61 | test_that("tof_postprocess should give different values when transform_fun is different", { 62 | result <- tof_postprocess(tof_tibble = ddpr_data) 63 | result_2 <- tof_postprocess(tof_tibble = ddpr_data, transform_fun = scale) 64 | 65 | expect_false(isTRUE(all.equal(result, result_2))) 66 | }) 67 | 68 | test_that("tof_postprocess should give different values when channel_cols is different", { 69 | result <- tof_postprocess(tof_tibble = ddpr_data) 70 | result_2 <- tof_postprocess(tof_tibble = ddpr_data, channel_cols = c(cd45, cd20, cd34)) 71 | 72 | expect_false(isTRUE(all.equal(result, result_2))) 73 | }) 74 | -------------------------------------------------------------------------------- /tests/testthat/test-utils.R: -------------------------------------------------------------------------------- 1 | test_that("multiplication works", { 2 | expect_equal(2 * 2, 4) 3 | }) 4 | -------------------------------------------------------------------------------- /vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.R 3 | --------------------------------------------------------------------------------