├── .Rbuildignore
├── .github
    ├── .gitignore
    ├── CODE_OF_CONDUCT.md
    ├── CONTRIBUTING.md
    └── workflows
    │   └── check-bioc.yml
├── .gitignore
├── .vscode
    ├── extensions.json
    └── settings.json
├── DESCRIPTION
├── LICENSE
├── NAMESPACE
├── NEWS
├── R
    ├── cellbrowsers.R
    ├── clustifyR-package.R
    ├── common_dplyr.R
    ├── compare_genelist.R
    ├── compute_similarity.R
    ├── data.R
    ├── globals.R
    ├── main.R
    ├── object_access.R
    ├── plot.R
    ├── run_fgsea.R
    ├── shiny.R
    └── utils.R
├── README.Rmd
├── README.md
├── air.toml
├── codecov.yml
├── data-raw
    ├── cbmc_m.R
    ├── cbmc_ref.R
    ├── downrefs.R
    ├── human_genes_10x.R
    ├── mouse_genes_10x.R
    ├── object_loc_lookup.R
    ├── pbmc_bulk_matrix.R
    ├── pbmc_markers.R
    ├── pbmc_markers_M3Drop.R
    ├── pbmc_matrix_small.R
    ├── pbmc_meta.R
    └── pbmc_vargenes.R
├── data
    ├── cbmc_m.rda
    ├── cbmc_ref.rda
    ├── downrefs.rda
    ├── human_genes_10x.rda
    ├── mouse_genes_10x.rda
    ├── object_loc_lookup.rda
    ├── pbmc_markers.rda
    ├── pbmc_markers_M3Drop.rda
    ├── pbmc_matrix_small.rda
    ├── pbmc_meta.rda
    └── pbmc_vargenes.rda
├── inst
    ├── CITATION
    ├── extdata
    │   ├── c2.cp.reactome.v6.2.symbols.gmt.gz
    │   └── hsPBMC_markers.txt
    ├── scripts
    │   └── dl_reactome_gene_sets.R
    └── shinyapp
    │   ├── README.md
    │   ├── data
    │       ├── dependencies.csv
    │       └── example-input
    │       │   ├── get-data.R
    │       │   ├── matrix.csv.gz
    │       │   └── meta-data.csv.gz
    │   ├── global.R
    │   ├── logo.png
    │   ├── server.R
    │   ├── sheet
    │       └── 6c91f3dd95c2217959d38f926b96d7bb_bot4rf@gmail.com
    │   ├── ui.R
    │   └── www
    │       └── logo.png
├── man
    ├── append_genes.Rd
    ├── assess_rank_bias.Rd
    ├── assign_ident.Rd
    ├── average_clusters.Rd
    ├── binarize_expr.Rd
    ├── build_atlas.Rd
    ├── calc_distance.Rd
    ├── calc_similarity.Rd
    ├── calculate_pathway_gsea.Rd
    ├── call_consensus.Rd
    ├── call_to_metadata.Rd
    ├── cbmc_m.Rd
    ├── cbmc_ref.Rd
    ├── check_raw_counts.Rd
    ├── clustify.Rd
    ├── clustify_lists.Rd
    ├── clustify_nudge.Rd
    ├── clustifyr-package.Rd
    ├── clustifyr_methods.Rd
    ├── collapse_to_cluster.Rd
    ├── compare_lists.Rd
    ├── cor_to_call.Rd
    ├── cor_to_call_rank.Rd
    ├── cor_to_call_topn.Rd
    ├── cosine.Rd
    ├── downrefs.Rd
    ├── downsample_matrix.Rd
    ├── feature_select_PCA.Rd
    ├── figures
    │   ├── README-example-1.png
    │   ├── README-example-2.png
    │   ├── example-1.png
    │   ├── example-2.png
    │   ├── readme_example-1.png
    │   ├── readme_example-2.png
    │   └── test.png
    ├── file_marker_parse.Rd
    ├── find_rank_bias.Rd
    ├── gene_pct.Rd
    ├── gene_pct_markerm.Rd
    ├── get_best_match_matrix.Rd
    ├── get_best_str.Rd
    ├── get_common_elements.Rd
    ├── get_similarity.Rd
    ├── get_ucsc_reference.Rd
    ├── get_unique_column.Rd
    ├── get_vargenes.Rd
    ├── gmt_to_list.Rd
    ├── human_genes_10x.Rd
    ├── insert_meta_object.Rd
    ├── kl_divergence.Rd
    ├── make_comb_ref.Rd
    ├── marker_select.Rd
    ├── matrixize_markers.Rd
    ├── mouse_genes_10x.Rd
    ├── not_pretty_palette.Rd
    ├── object_data.Rd
    ├── object_loc_lookup.Rd
    ├── object_ref.Rd
    ├── overcluster.Rd
    ├── overcluster_test.Rd
    ├── parse_loc_object.Rd
    ├── pbmc_markers.Rd
    ├── pbmc_markers_M3Drop.Rd
    ├── pbmc_matrix_small.Rd
    ├── pbmc_meta.Rd
    ├── pbmc_vargenes.Rd
    ├── percent_clusters.Rd
    ├── permute_similarity.Rd
    ├── plot_best_call.Rd
    ├── plot_call.Rd
    ├── plot_cor.Rd
    ├── plot_cor_heatmap.Rd
    ├── plot_dims.Rd
    ├── plot_gene.Rd
    ├── plot_pathway_gsea.Rd
    ├── plot_rank_bias.Rd
    ├── pos_neg_marker.Rd
    ├── pos_neg_select.Rd
    ├── pretty_palette.Rd
    ├── pretty_palette2.Rd
    ├── pretty_palette_ramp_d.Rd
    ├── query_rank_bias.Rd
    ├── ref_feature_select.Rd
    ├── ref_marker_select.Rd
    ├── reverse_marker_matrix.Rd
    ├── run_clustifyr_app.Rd
    ├── run_gsea.Rd
    ├── sce_pbmc.Rd
    ├── seurat_meta.Rd
    ├── seurat_ref.Rd
    ├── so_pbmc.Rd
    ├── vector_similarity.Rd
    └── write_meta.Rd
├── pkgdown
    ├── _pkgdown.yml
    └── favicon
    │   ├── apple-touch-icon-120x120.png
    │   ├── apple-touch-icon-152x152.png
    │   ├── apple-touch-icon-180x180.png
    │   ├── apple-touch-icon-60x60.png
    │   ├── apple-touch-icon-76x76.png
    │   ├── apple-touch-icon.png
    │   ├── favicon-16x16.png
    │   ├── favicon-32x32.png
    │   └── favicon.ico
├── tests
    ├── testthat.R
    └── testthat
    │   ├── .gitignore
    │   ├── test_cor.R
    │   ├── test_gsea.R
    │   ├── test_list.R
    │   ├── test_plots.R
    │   └── test_utils.R
└── vignettes
    ├── .gitignore
    ├── clustifyr.Rmd
    └── geo-annotations.Rmd


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^codecov\.yml$
 2 | ^appveyor\.yml$
 3 | ^clustifyR\.Rproj$
 4 | ^\.Rproj\.user$
 5 | ^_pkgdown\.yml$
 6 | ^pkgdown$
 7 | ^docs$
 8 | ^\.travis\.yml$
 9 | ^travis_setup\.sh$
10 | ^README\.Rmd$
11 | ^data-raw$
12 | ^inst/manuscript$
13 | ^\.github$
14 | ^CODE_OF_CONDUCT\.md$
15 | ^air.toml$
16 | ^\.vscode$
17 | ^[\.]?air\.toml$
18 | 


--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/.github/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | # Contributor Covenant Code of Conduct
  2 | 
  3 | ## Our Pledge
  4 | 
  5 | We as members, contributors, and leaders pledge to make participation in our
  6 | community a harassment-free experience for everyone, regardless of age, body
  7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  8 | identity and expression, level of experience, education, socio-economic status,
  9 | nationality, personal appearance, race, religion, or sexual identity and
 10 | orientation.
 11 | 
 12 | We pledge to act and interact in ways that contribute to an open, welcoming,
 13 | diverse, inclusive, and healthy community.
 14 | 
 15 | ## Our Standards
 16 | 
 17 | Examples of behavior that contributes to a positive environment for our
 18 | community include:
 19 | 
 20 | * Demonstrating empathy and kindness toward other people
 21 | * Being respectful of differing opinions, viewpoints, and experiences
 22 | * Giving and gracefully accepting constructive feedback
 23 | * Accepting responsibility and apologizing to those affected by our mistakes,
 24 | and learning from the experience
 25 | * Focusing on what is best not just for us as individuals, but for the overall
 26 | community
 27 | 
 28 | Examples of unacceptable behavior include:
 29 | 
 30 | * The use of sexualized language or imagery, and sexual attention or
 31 | advances of any kind
 32 | * Trolling, insulting or derogatory comments, and personal or political attacks
 33 | * Public or private harassment
 34 | * Publishing others' private information, such as a physical or email
 35 | address, without their explicit permission
 36 | * Other conduct which could reasonably be considered inappropriate in a
 37 | professional setting
 38 | 
 39 | ## Enforcement Responsibilities
 40 | 
 41 | Community leaders are responsible for clarifying and enforcing our standards
 42 | of acceptable behavior and will take appropriate and fair corrective action in
 43 | response to any behavior that they deem inappropriate, threatening, offensive,
 44 | or harmful.
 45 | 
 46 | Community leaders have the right and responsibility to remove, edit, or reject
 47 | comments, commits, code, wiki edits, issues, and other contributions that are
 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 49 | decisions when appropriate.
 50 | 
 51 | ## Scope
 52 | 
 53 | This Code of Conduct applies within all community spaces, and also applies
 54 | when an individual is officially representing the community in public spaces.
 55 | Examples of representing our community include using an official e-mail
 56 | address, posting via an official social media account, or acting as an appointed
 57 | representative at an online or offline event.
 58 | 
 59 | ## Enforcement
 60 | 
 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 62 | reported to the community leaders responsible for enforcement at [INSERT CONTACT
 63 | METHOD]. All complaints will be reviewed and investigated promptly and fairly.
 64 | 
 65 | All community leaders are obligated to respect the privacy and security of the
 66 | reporter of any incident.
 67 | 
 68 | ## Enforcement Guidelines
 69 | 
 70 | Community leaders will follow these Community Impact Guidelines in determining
 71 | the consequences for any action they deem in violation of this Code of Conduct:
 72 | 
 73 | ### 1. Correction
 74 | 
 75 | **Community Impact**: Use of inappropriate language or other behavior deemed
 76 | unprofessional or unwelcome in the community.
 77 | 
 78 | **Consequence**: A private, written warning from community leaders, providing
 79 | clarity around the nature of the violation and an explanation of why the
 80 | behavior was inappropriate. A public apology may be requested.
 81 | 
 82 | ### 2. Warning
 83 | 
 84 | **Community Impact**: A violation through a single incident or series of
 85 | actions.
 86 | 
 87 | **Consequence**: A warning with consequences for continued behavior. No
 88 | interaction with the people involved, including unsolicited interaction with
 89 | those enforcing the Code of Conduct, for a specified period of time. This
 90 | includes avoiding interactions in community spaces as well as external channels
 91 | like social media. Violating these terms may lead to a temporary or permanent
 92 | ban.
 93 | 
 94 | ### 3. Temporary Ban
 95 | 
 96 | **Community Impact**: A serious violation of community standards, including
 97 | sustained inappropriate behavior.
 98 | 
 99 | **Consequence**: A temporary ban from any sort of interaction or public
100 | communication with the community for a specified period of time. No public or
101 | private interaction with the people involved, including unsolicited interaction
102 | with those enforcing the Code of Conduct, is allowed during this period.
103 | Violating these terms may lead to a permanent ban.
104 | 
105 | ### 4. Permanent Ban
106 | 
107 | **Community Impact**: Demonstrating a pattern of violation of community
108 | standards, including sustained inappropriate behavior, harassment of an
109 | individual, or aggression toward or disparagement of classes of individuals.
110 | 
111 | **Consequence**: A permanent ban from any sort of public interaction within the
112 | community.
113 | 
114 | ## Attribution
115 | 
116 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
117 | version 2.0,
118 | available at https://www.contributor-covenant.org/version/2/0/
119 | code_of_conduct.html.
120 | 
121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct
122 | enforcement ladder](https://github.com/mozilla/diversity).
123 | 
124 | [homepage]: https://www.contributor-covenant.org
125 | 
126 | For answers to common questions about this code of conduct, see the FAQ at
127 | https://www.contributor-covenant.org/faq. Translations are available at https://
128 | www.contributor-covenant.org/translations.
129 | 


--------------------------------------------------------------------------------
/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to clustifyr
 2 | 
 3 | Thank you for using and helping to improve [`clustifyr`](https://github.com/rnabioco/clustifyr/issues) and its data hub [`clustifyrdatahub`](https://github.com/rnabioco/clustifyrdatahub/issues). Any discussion of bugs, fixes, and feature enhancements is greatly appreciated.
 4 | 
 5 | ## Fixing typos
 6 | 
 7 | You can fix typos, spelling mistakes, or grammatical errors in the documentation directly using the GitHub web interface, as long as the changes are made in the _source_ file. 
 8 | This generally means you'll need to edit [roxygen2 comments](https://roxygen2.r-lib.org/articles/roxygen2.html) in an `.R`, not a `.Rd` file. 
 9 | You can find the `.R` file that generates the `.Rd` by reading the comment in the first line.
10 | 
11 | ## Bigger changes
12 | 
13 | If you want to make a bigger change, it's a good idea to first file an issue and make sure someone from the team agrees that it’s needed. 
14 | If you’ve found a bug, please file an issue that illustrates the bug with a minimal 
15 | [reprex](https://www.tidyverse.org/help/#reprex) (this will also help you write a unit test, if needed).
16 | 
17 | ### Pull request process
18 | 
19 | *   Fork the package and clone onto your computer. If you haven't done this before, we recommend using `usethis::create_from_github("rnabioco/clustifyr", fork = TRUE)`.
20 | 
21 | *   Install all development dependences with `devtools::install_dev_deps()`, and then make sure the package passes R CMD check by running `devtools::check()`. 
22 |     If R CMD check doesn't pass cleanly, it's a good idea to ask for help before continuing. 
23 | *   Create a Git branch for your pull request (PR). We recommend using `usethis::pr_init("brief-description-of-change")`.
24 | 
25 | *   Make your changes, commit to git, and then create a PR by running `usethis::pr_push()`, and following the prompts in your browser.
26 |     The title of your PR should briefly describe the change.
27 |     The body of your PR should contain `Fixes #issue-number`.
28 | 
29 | *  For user-facing changes, add a bullet to the top of `NEWS.md` (i.e. just below the first header). Follow the style described in <https://style.tidyverse.org/news.html>.
30 | 
31 | *. Increase the version numbers in the README.md and DESCRIPTION to the new version that this
32 |    Pull Request would represent. Please follow [Bioconductor](https://bioconductor.org/developers/how-to/version-numbering/) versioning schemes.
33 |    
34 | *. Please request code review for Pull Requests. Once you have the sign-off of two other developers, you may then merge the updated code.
35 | 
36 | 
37 | ### Code style
38 | 
39 | *   New code should follow the Bioconductor [style guide](https://bioconductor.org/developers/how-to/coding-style/). 
40 |     You can use the [styler](https://CRAN.R-project.org/package=styler) package to apply these styles, but please don't restyle code that has nothing to do with your PR.  
41 | 
42 | *  We use [roxygen2](https://cran.r-project.org/package=roxygen2), with [Markdown syntax](https://cran.r-project.org/web/packages/roxygen2/vignettes/rd-formatting.html), for documentation.  
43 | 
44 | *  We use [testthat](https://cran.r-project.org/package=testthat) for unit tests. 
45 |    Contributions with test cases included are easier to accept.  
46 | 
47 | ## Code of Conduct
48 | 
49 | Please note that the clustifyr project is released with a
50 | [Contributor Code of Conduct](CODE_OF_CONDUCT.md). By contributing to this
51 | project you agree to abide by its terms.
52 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .Rproj.user
 2 | .Rhistory
 3 | .RData
 4 | .Ruserdata
 5 | .DS_Store
 6 | .Rproj
 7 | .renviron
 8 | .rprofile
 9 | .rproj
10 | .rproj.user
11 | .rhistory
12 | .rapp.history
13 | .o
14 | .sl
15 | .so
16 | .dylib
17 | .a
18 | .dll
19 | .def
20 | .ds_store
21 | unsrturl.bst
22 | .log
23 | .aux
24 | .backups
25 | .cproject
26 | .directory
27 | .dropbox
28 | .exrc
29 | .gdb.history
30 | .gitattributes
31 | .gitmodules
32 | .hgtags
33 | .project
34 | .seed
35 | .settings
36 | .tm_properties
37 | clustifyr.Rproj
38 | inst/doc
39 | docs
40 | 


--------------------------------------------------------------------------------
/.vscode/extensions.json:
--------------------------------------------------------------------------------
1 | {
2 |     "recommendations": [
3 |         "Posit.air-vscode"
4 |     ]
5 | }
6 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "[r]": {
3 |         "editor.formatOnSave": true,
4 |         "editor.defaultFormatter": "Posit.air-vscode"
5 |     }
6 | }
7 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: clustifyr
 2 | Title: Classifier for Single-cell RNA-seq Using Cell Clusters
 3 | Version: 1.21.0
 4 | Description: Package designed to aid in classifying cells from single-cell RNA sequencing data
 5 |     using external reference data (e.g., bulk RNA-seq, scRNA-seq, microarray, gene lists). A 
 6 |     variety of correlation based methods and gene list enrichment methods are provided to assist cell
 7 |     type assignment.
 8 | Authors@R: 
 9 |     c(person(given = "Rui",
10 |              family = "Fu",
11 |              role = c("cre", "aut"), 
12 |              email = "ray.sinensis@gmail.com"),
13 |       person(given = "Kent",
14 |              family = "Riemondy",
15 |              role =  "aut",
16 |              email = "kent.riemondy@gmail.com"),
17 |       person(given = "Austin",
18 |              family = "Gillen",
19 |              role = "ctb",
20 |              email = "austin.gillen@ucdenver.edu"),
21 |       person(given = "Chengzhe",
22 |              family = "Tian",
23 |              role = "ctb",
24 |              email = "Chengzhe.Tian@colorado.edu"),
25 |       person(given = "Jay",
26 |              family = "Hesselberth",
27 |              role = "ctb",
28 |              email = "jay.hesselberth@gmail.com"),
29 |       person(given = "Yue",
30 |              family = "Hao",
31 |              role = "ctb",
32 |              email = "haoyuethink@gmail.com"),
33 |       person(given = "Michelle",
34 |              family = "Daya",
35 |              role = "ctb",
36 |              email = "michelle.daya@ucdenver.edu"),
37 |       person(given = "Sidhant",
38 |              family = "Puntambekar",
39 |              role = "ctb",
40 |              email = "sidhantnp@yahoo.com"),
41 |       person("RNA Bioscience Initiative", role = c("fnd", "cph"),
42 |            comment = c(ROR = "https://ror.org/03wmf1y16"))
43 |       )
44 | License: MIT + file LICENSE
45 | Depends: R (>= 2.10)
46 | Imports: 
47 |     cowplot,
48 |     dplyr,
49 |     entropy,
50 |     fgsea,
51 |     ggplot2,
52 |     Matrix,
53 |     rlang,
54 |     scales,
55 |     stringr,
56 |     tibble,
57 |     tidyr,
58 |     stats,
59 |     methods,
60 |     SingleCellExperiment,
61 |     SummarizedExperiment,
62 |     SeuratObject,
63 |     matrixStats,
64 |     S4Vectors,
65 |     proxy,
66 |     httr,
67 |     utils
68 | Suggests:
69 |     ComplexHeatmap,
70 |     covr,
71 |     knitr,
72 |     rmarkdown,
73 |     testthat,
74 |     ggrepel,
75 |     BiocStyle,
76 |     BiocManager,
77 |     remotes,
78 |     shiny,
79 |     gprofiler2,
80 |     purrr,
81 |     data.table,
82 |     R.utils
83 | biocViews: SingleCell, Annotation, Sequencing, Microarray, GeneExpression
84 | BugReports: https://github.com/rnabioco/clustifyr/issues
85 | URL: https://github.com/rnabioco/clustifyr,
86 |     https://rnabioco.github.io/clustifyr/
87 | VignetteBuilder: 
88 |     knitr
89 | ByteCompile: true
90 | Encoding: UTF-8
91 | Roxygen: list(markdown = TRUE)
92 | RoxygenNote: 7.3.2
93 | LazyData: true
94 | Config/Needs/website:
95 |     pkgdown,
96 |     rnabioco/rbitemplate
97 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 NCBI-Hackathons
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
  1 | # Generated by roxygen2: do not edit by hand
  2 | 
  3 | S3method(clustify,Seurat)
  4 | S3method(clustify,SingleCellExperiment)
  5 | S3method(clustify,default)
  6 | S3method(clustify_lists,Seurat)
  7 | S3method(clustify_lists,SingleCellExperiment)
  8 | S3method(clustify_lists,default)
  9 | S3method(clustify_nudge,Seurat)
 10 | S3method(clustify_nudge,default)
 11 | S3method(object_data,Seurat)
 12 | S3method(object_data,SingleCellExperiment)
 13 | S3method(object_ref,Seurat)
 14 | S3method(object_ref,SingleCellExperiment)
 15 | S3method(object_ref,default)
 16 | S3method(seurat_meta,Seurat)
 17 | S3method(seurat_ref,Seurat)
 18 | S3method(write_meta,Seurat)
 19 | S3method(write_meta,SingleCellExperiment)
 20 | export(append_genes)
 21 | export(assess_rank_bias)
 22 | export(average_clusters)
 23 | export(binarize_expr)
 24 | export(build_atlas)
 25 | export(calc_distance)
 26 | export(calculate_pathway_gsea)
 27 | export(call_consensus)
 28 | export(call_to_metadata)
 29 | export(check_raw_counts)
 30 | export(clustify)
 31 | export(clustify_lists)
 32 | export(clustify_nudge)
 33 | export(clustifyr_methods)
 34 | export(collapse_to_cluster)
 35 | export(compare_lists)
 36 | export(cor_to_call)
 37 | export(cor_to_call_rank)
 38 | export(cor_to_call_topn)
 39 | export(downsample_matrix)
 40 | export(feature_select_PCA)
 41 | export(file_marker_parse)
 42 | export(find_rank_bias)
 43 | export(gene_pct_markerm)
 44 | export(get_ucsc_reference)
 45 | export(get_vargenes)
 46 | export(gmt_to_list)
 47 | export(insert_meta_object)
 48 | export(make_comb_ref)
 49 | export(marker_select)
 50 | export(matrixize_markers)
 51 | export(object_data)
 52 | export(object_ref)
 53 | export(overcluster)
 54 | export(overcluster_test)
 55 | export(parse_loc_object)
 56 | export(plot_best_call)
 57 | export(plot_cor)
 58 | export(plot_cor_heatmap)
 59 | export(plot_dims)
 60 | export(plot_gene)
 61 | export(plot_pathway_gsea)
 62 | export(plot_rank_bias)
 63 | export(pos_neg_marker)
 64 | export(pos_neg_select)
 65 | export(query_rank_bias)
 66 | export(ref_feature_select)
 67 | export(ref_marker_select)
 68 | export(reverse_marker_matrix)
 69 | export(run_clustifyr_app)
 70 | export(run_gsea)
 71 | export(sce_pbmc)
 72 | export(seurat_meta)
 73 | export(seurat_ref)
 74 | export(so_pbmc)
 75 | export(write_meta)
 76 | import(Matrix)
 77 | import(dplyr)
 78 | import(entropy)
 79 | import(ggplot2)
 80 | import(scales)
 81 | import(tibble)
 82 | importFrom(S4Vectors,DataFrame)
 83 | importFrom(SeuratObject,"DefaultAssay<-")
 84 | importFrom(SeuratObject,"Key<-")
 85 | importFrom(SeuratObject,CreateDimReducObject)
 86 | importFrom(SeuratObject,CreateSeuratObject)
 87 | importFrom(SeuratObject,DefaultAssay)
 88 | importFrom(SeuratObject,Key)
 89 | importFrom(SeuratObject,VariableFeatures)
 90 | importFrom(SingleCellExperiment,colData)
 91 | importFrom(SingleCellExperiment,logcounts)
 92 | importFrom(SummarizedExperiment,"colData<-")
 93 | importFrom(cowplot,theme_cowplot)
 94 | importFrom(fgsea,fgsea)
 95 | importFrom(httr,build_url)
 96 | importFrom(httr,http_error)
 97 | importFrom(httr,parse_url)
 98 | importFrom(matrixStats,colRanks)
 99 | importFrom(matrixStats,rowMaxs)
100 | importFrom(matrixStats,rowMedians)
101 | importFrom(methods,is)
102 | importFrom(rlang,":=")
103 | importFrom(stats,cor)
104 | importFrom(stats,dhyper)
105 | importFrom(stats,median)
106 | importFrom(stats,p.adjust)
107 | importFrom(stats,prcomp)
108 | importFrom(stats,quantile)
109 | importFrom(tidyr,gather)
110 | importFrom(utils,packageVersion)
111 | importFrom(utils,read.csv)
112 | 


--------------------------------------------------------------------------------
/NEWS:
--------------------------------------------------------------------------------
 1 | Changes in version 0.0.1 (2019-02-01)
 2 | + Added direct support for `seurat` and other object classes
 3 | + Updated readme and tutorial pages
 4 | + Moved some larger reference data to `clustifyrdata`
 5 | 
 6 | Changes in version 0.0.2 (2019-05-29)
 7 | + Updated discussions, benchmark, and visualization example pages
 8 | + Reorganized `clustifyrdata` data package
 9 | 
10 | Changes in version 0.0.3 (2019-09-06)
11 | + Support of SingleCellExperiment objects and more
12 | + Bug fixes
13 | 
14 | Changes in version 0.99.0 (2019-11-06)
15 | + Update to tutorials
16 | + Refactor arguments
17 | + Bug fixes
18 | + Cleanup for Bioconductor submission
19 | 
20 | Changes in version 1.1.0 (2020-05-21)
21 | + Bioc release
22 | + Bug fixes
23 | 
24 | Changes in version 1.1.2 (2020-09-21)
25 | + USCS cell browser reference building
26 | + Tutorial update
27 | + Bug fixes
28 | 
29 | Changes in version 1.3.1 (2020-12-26)
30 | + Q&A section
31 | + Now defaults to top 1000 variable genes in Seurat (including v4)
32 | + Bug fixes
33 | 
34 | Changes in version 1.3.2 (2021-02-25)
35 | + `build_atlas()` for combining references
36 | + More Q&A
37 | 
38 | Changes in version 1.3.3 (2021-02-28)
39 | + Launch shiny app with `run_clustifyr_app()`
40 | + Plot and GO for most divergent ranks in correlation of query vs reference
41 | 
42 | Changes in version 1.5.1 (2021-08-04)
43 | + `clustify_lists()` support for uneven number of markers
44 | + Deprecated SeuratV2 support
45 | 
46 | Changes in version 1.5.2 (2021-10-04)
47 | + `clustify_lists()` support for output of overlapping genes (`details_out = TRUE`)
48 | + Added truncated mean and trimean modes to `average_clusters()`
49 | 
50 | Changes in version 1.7.3 (2022-03-09)
51 | + `vec_out` option for directly getting classification results as a vector, to be inserted into other metadata/workflow
52 | + Maintainer change
53 | 
54 | Changes in version 1.15.1 (2023-10-31)
55 | + Replace `Seurat` dependency with `SeuratObject`
56 | 
57 | Changes in version 1.15.2 (2024-04-03)
58 | + Add support for `Seurat` version 5 objects
59 | 


--------------------------------------------------------------------------------
/R/cellbrowsers.R:
--------------------------------------------------------------------------------
 1 | #' Build reference atlases from external UCSC cellbrowsers
 2 | #'
 3 | #' @param cb_url URL of cellbrowser dataset (e.g. http://cells.ucsc.edu/?ds=cortex-dev).
 4 | #' Note that the URL must contain the ds=dataset-name suffix.
 5 | #' @param cluster_col annotation field for summarizing gene expression (e.g. clustering,
 6 | #' cell-type name, samples, etc.)
 7 | #' @param ... additional args passed to average_clusters
 8 | #'
 9 | #' @importFrom httr http_error parse_url build_url
10 | #' @return reference matrix
11 | #' @examples
12 | #' \dontrun{
13 | #'
14 | #' # many datasets hosted by UCSC have UMI counts in the expression matrix
15 | #' # set if_log = FALSE if the expression matrix has not been natural log transformed
16 | #'
17 | #' get_ucsc_reference(
18 | #'     cb_url = "https://cells.ucsc.edu/?ds=evocell+mus-musculus+marrow",
19 | #'     cluster_col = "Clusters", if_log = FALSE
20 | #' )
21 | #'
22 | #' get_ucsc_reference(
23 | #'     cb_url = "http://cells.ucsc.edu/?ds=muscle-cell-atlas",
24 | #'     cluster_col = "cell_annotation",
25 | #'     if_log = FALSE
26 | #' )
27 | #' }
28 | #' @export
29 | get_ucsc_reference <- function(cb_url, cluster_col, ...) {
30 |   if (!requireNamespace("R.utils", quietly = TRUE)) {
31 |     stop(
32 |       "This function requires the R.utils package, please install\n",
33 |       "install.packages('R.utils')"
34 |     )
35 |   }
36 | 
37 |   if (!requireNamespace("data.table", quietly = TRUE)) {
38 |     stop(
39 |       "This function requires the data.table package, please install\n",
40 |       "install.packages('data.table')"
41 |     )
42 |   }
43 | 
44 |   url <- httr::parse_url(cb_url)
45 |   base_url <- url
46 |   ds <- url$query$ds
47 | 
48 |   # ds can include sub-datasets with syntax, "dataset+subdataset+and-so-on"
49 |   # files are hosted at urls: dataset/subdataset/andsoon/..."
50 |   ds_split <- strsplit(ds, "+", fixed = TRUE)[[1]]
51 |   ds <- paste0(ds_split, collapse = "/")
52 |   base_url$query <- ""
53 | 
54 |   mdata_url <- httr::modify_url(base_url, path = file.path(ds, "meta.tsv"))
55 |   if (!httr::http_error(mdata_url)) {
56 |     mdata <- data.table::fread(mdata_url, data.table = FALSE, sep = "\t")
57 |   } else {
58 |     stop("unable to find metadata at url: ", mdata_url)
59 |   }
60 | 
61 |   mat_url <- httr::modify_url(
62 |     base_url,
63 |     path = file.path(ds, "exprMatrix.tsv.gz")
64 |   )
65 |   if (!httr::http_error(mat_url)) {
66 |     mat <- data.table::fread(mat_url, data.table = FALSE, sep = "\t")
67 |   } else {
68 |     stop("unable to find matrix at url: ", mat_url)
69 |   }
70 | 
71 |   rownames(mat) <- mat[, 1]
72 |   mat[, 1] <- NULL
73 |   mat <- as.matrix(mat)
74 | 
75 |   mm <- max(mat)
76 | 
77 |   if (mm > 50) {
78 |     dots <- list(...)
79 |     if (!"if_log" %in% names(dots) || dots$if_log) {
80 |       warning(
81 |         "the data matrix has a maximum value of ",
82 |         mm,
83 |         "\n",
84 |         "the data are likely not log transformed,\n",
85 |         "please set the if_log argument for average clusters accordingly"
86 |       )
87 |     }
88 |   }
89 | 
90 |   average_clusters(mat, mdata, cluster_col = cluster_col, ...)
91 | }
92 | 


--------------------------------------------------------------------------------
/R/clustifyR-package.R:
--------------------------------------------------------------------------------
 1 | #' @docType package
 2 | #' @aliases clustifyr-package
 3 | #' @keywords internal
 4 | "_PACKAGE"
 5 | 
 6 | # roxygen namespace tags. Modify with care!
 7 | ## usethis namespace: start
 8 | #' @importFrom stats prcomp dhyper cor p.adjust quantile median
 9 | #' @import scales
10 | #' @import ggplot2
11 | #' @import tibble
12 | #' @import entropy
13 | #' @import Matrix
14 | #' @import dplyr
15 | #' @importFrom rlang :=
16 | #' @importFrom tidyr gather
17 | #' @importFrom cowplot theme_cowplot
18 | #' @importFrom fgsea fgsea
19 | #' @importFrom methods is
20 | #' @importFrom SeuratObject Key Key<- DefaultAssay DefaultAssay<-
21 | ## usethis namespace: end
22 | NULL
23 | 


--------------------------------------------------------------------------------
/R/data.R:
--------------------------------------------------------------------------------
 1 | #' Matrix of single-cell RNA-seq PBMCs.
 2 | #'
 3 | #' Count matrix of 3k pbmcs from Seurat3 tutorial, with only var.features
 4 | #'
 5 | #' @format A sparseMatrix with genes as rows and cells as columns.
 6 | #'
 7 | #' @family data
 8 | #'
 9 | #' @source \url{https://satijalab.org/seurat/v3.0/pbmc3k_tutorial.html}
10 | "pbmc_matrix_small"
11 | 
12 | #' Meta-data for single-cell RNA-seq PBMCs.
13 | #'
14 | #' Metadata, including umap, of 3k pbmcs from Seurat3 tutorial
15 | #'
16 | #' @family data
17 | #' @source `[pbmc_matrix]` processed by Seurat
18 | "pbmc_meta"
19 | 
20 | #' Marker genes identified by Seurat from single-cell RNA-seq PBMCs.
21 | #'
22 | #' Dataframe of markers from Seurat FindAllMarkers function
23 | #'
24 | #' @family data
25 | #' @source `[pbmc_matrix]` processed by Seurat
26 | "pbmc_markers"
27 | 
28 | #' Marker genes identified by M3Drop from single-cell RNA-seq PBMCs.
29 | #'
30 | #' Selected features of 3k pbmcs from Seurat3 tutorial
31 | #'
32 | #' @format A data frame with 3 variables:
33 | #'
34 | #' @family data
35 | #' @source `[pbmc_matrix]` processed by `[M3Drop]`
36 | "pbmc_markers_M3Drop"
37 | 
38 | #' Variable genes identified by Seurat from single-cell RNA-seq PBMCs.
39 | #'
40 | #' Top 2000 variable genes from 3k pbmcs from Seurat3 tutorial
41 | #'
42 | #' @family data
43 | #' @source `[pbmc_matrix]` processed by Seurat
44 | "pbmc_vargenes"
45 | 
46 | #' reference matrix from seurat citeseq CBMC tutorial
47 | #'
48 | #' @family data
49 | #' @source \url{https://satijalab.org/seurat/v3.0/multimodal_vignette.html#identify-differentially-expressed-proteins-between-clusters}
50 | "cbmc_ref"
51 | 
52 | #' reference marker matrix from seurat citeseq CBMC tutorial
53 | #'
54 | #' @family data
55 | #' @source \url{https://satijalab.org/seurat/v3.0/multimodal_vignette.html#identify-differentially-expressed-proteins-between-clusters}
56 | "cbmc_m"
57 | 
58 | #' table of references stored in clustifyrdata
59 | #'
60 | #' @family data
61 | #' @source  various packages
62 | "downrefs"
63 | 
64 | #' Vector of human genes for 10x cellranger pipeline
65 | #'
66 | #' @family data
67 | #' @source \url{https://support.10xgenomics.com/single-cell-gene-expression/software/downloads/latest}
68 | "human_genes_10x"
69 | 
70 | #' Vector of mouse genes for 10x cellranger pipeline
71 | #'
72 | #' @family data
73 | #' @source \url{https://support.10xgenomics.com/single-cell-gene-expression/software/downloads/latest}
74 | "mouse_genes_10x"
75 | 


--------------------------------------------------------------------------------
/R/globals.R:
--------------------------------------------------------------------------------
 1 | globalVariables(c(
 2 |   ".",
 3 |   "important",
 4 |   "gene",
 5 |   "cluster",
 6 |   "classified",
 7 |   "bulk_cluster",
 8 |   "error",
 9 |   "padj",
10 |   "intersection_size"
11 | ))
12 | 


--------------------------------------------------------------------------------
/R/run_fgsea.R:
--------------------------------------------------------------------------------
 1 | #' Run GSEA to compare a gene list(s) to per cell or
 2 | #' per cluster expression data
 3 | #' @description Use fgsea algorithm to compute normalized enrichment
 4 | #' scores and pvalues for gene
 5 | #' set ovelap
 6 | #' @param expr_mat single-cell expression matrix or Seurat object
 7 | #' @param query_genes A vector or named list of vectors of genesets of interest
 8 | #' to compare via GSEA. If supplying a named list, then the gene set names
 9 | #' will appear in the output.
10 | #' @param cluster_ids vector of cell cluster assignments, supplied as a
11 | #' vector with order that
12 | #' matches columns in `expr_mat`. Not required if running per cell.
13 | #' @param n_perm Number of permutation for fgsea function. Defaults to 1000.
14 | #' @param per_cell if true run per cell, otherwise per cluster.
15 | #' @param scale convert expr_mat into zscores prior to running GSEA?,
16 | #' default = FALSE
17 | #' @param no_warnings suppress warnings from gsea ties
18 | #' @return dataframe of gsea scores (pval, NES), with clusters as rownames
19 | #' @export
20 | run_gsea <- function(
21 |   expr_mat,
22 |   query_genes,
23 |   cluster_ids = NULL,
24 |   n_perm = 1000,
25 |   per_cell = FALSE,
26 |   scale = FALSE,
27 |   no_warnings = TRUE
28 | ) {
29 |   if (!is.list(query_genes)) {
30 |     geneset_list <- list("query_genes" = query_genes)
31 |   } else {
32 |     geneset_list <- query_genes
33 |   }
34 | 
35 |   if (!per_cell & (ncol(expr_mat) != length(cluster_ids))) {
36 |     stop(
37 |       "cluster_ids do not match number of cells (columns) ",
38 |       "in expr_mat",
39 |       call. = FALSE
40 |     )
41 |   }
42 | 
43 |   if (n_perm > 1e4 & per_cell) {
44 |     warning(
45 |       "run_gsea() take a long time if running many ",
46 |       "permutations and running per cell"
47 |     )
48 |   }
49 | 
50 |   if (scale) {
51 |     expr_mat <- t(scale(t(as.matrix(expr_mat))))
52 |   }
53 | 
54 |   if (!per_cell) {
55 |     avg_mat <- average_clusters(expr_mat, metadata = cluster_ids)
56 |   } else {
57 |     avg_mat <- expr_mat
58 |   }
59 | 
60 |   res <- list()
61 |   for (i in seq_along(colnames(avg_mat))) {
62 |     if (!(no_warnings)) {
63 |       gsea_res <- fgsea::fgsea(
64 |         geneset_list,
65 |         avg_mat[, i],
66 |         minSize = 1,
67 |         maxSize = max(vapply(geneset_list, length, FUN.VALUE = numeric(1))),
68 |         nproc = 1,
69 |         nperm = n_perm
70 |       )
71 |     } else {
72 |       suppressWarnings(
73 |         gsea_res <- fgsea::fgsea(
74 |           geneset_list,
75 |           avg_mat[, i],
76 |           minSize = 1,
77 |           maxSize = max(vapply(geneset_list, length, FUN.VALUE = numeric(1))),
78 |           nproc = 1,
79 |           nperm = n_perm
80 |         )
81 |       )
82 |     }
83 |     res[[i]] <- gsea_res[, c("pathway", "pval", "NES")]
84 |   }
85 |   gsea_res <- dplyr::bind_rows(res)
86 |   gsea_res <-
87 |     as.data.frame(dplyr::mutate(gsea_res, cell = colnames(avg_mat)))
88 |   if (tibble::has_rownames(gsea_res)) {
89 |     gsea_res <- tibble::remove_rownames(gsea_res)
90 |   }
91 |   gsea_res <- tibble::column_to_rownames(gsea_res, "cell")
92 | 
93 |   gsea_res
94 | }
95 | 


--------------------------------------------------------------------------------
/R/shiny.R:
--------------------------------------------------------------------------------
 1 | #' Launch Shiny app version of clustifyr,
 2 | #' may need to run install_clustifyr_app() at first time to install packages
 3 | #' @return instance of shiny app
 4 | #' @examples
 5 | #' \dontrun{
 6 | #' run_clustifyr_app()
 7 | #' }
 8 | #' @export
 9 | run_clustifyr_app <- function() {
10 |   appDir <- system.file("shinyapp", package = "clustifyr")
11 |   shiny::runApp(appDir, display.mode = "normal")
12 | }
13 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | output: github_document
  3 | ---
  4 | 
  5 | ```{r, echo = FALSE, message = FALSE}
  6 | knitr::opts_chunk$set(
  7 |     collapse = TRUE,
  8 |     comment = "#>",
  9 |     fig.path = "man/figures/",
 10 |     dpi = 300
 11 | )
 12 | ```
 13 | 
 14 | ```{r, echo=FALSE, message=FALSE}
 15 | st <- data.table::fread("https://bioconductor.org/packages/stats/bioc/clustifyr/clustifyr_stats.tab", data.table = FALSE, verbose = FALSE)
 16 | st_all <- dplyr::filter(st, Month == "all")
 17 | cl <- as.numeric(data.table::fread("https://raw.githubusercontent.com/raysinensis/clone_counts_public/main/clustifyr_total.txt", verbose = FALSE))
 18 | ```
 19 | 
 20 | # clustifyr 
 21 | 
 22 | <!-- badges: start -->
 23 | [![R-CMD-check-bioc](https://github.com/rnabioco/clustifyr/actions/workflows/check-bioc.yml/badge.svg)](https://github.com/rnabioco/clustifyr/actions/workflows/check-bioc.yml)
 24 | [![Codecov test coverage](https://codecov.io/gh/rnabioco/clustifyr/branch/devel/graph/badge.svg)](https://app.codecov.io/gh/rnabioco/clustifyr?branch=devel)
 25 | [![platforms](https://bioconductor.org/shields/availability/release/clustifyr.svg)](https://bioconductor.org/packages/release/bioc/html/clustifyr.html)
 26 | [![bioc](https://bioconductor.org/shields/years-in-bioc/clustifyr.svg)](https://bioconductor.org/packages/release/bioc/html/clustifyr.html)
 27 | [![#downloads](`r paste0("https://img.shields.io/badge/%23%20downloads-", sum(st_all[[4]]) + cl, "-brightgreen")`)](https://bioconductor.org/packages/stats/bioc/clustifyr/clustifyr_stats.tab)
 28 | <!-- badges: end -->
 29 | 
 30 | clustifyr classifies cells and clusters in single-cell RNA sequencing experiments using reference bulk RNA-seq data sets, sorted microarray expression data, single-cell gene signatures, or lists of marker genes. 
 31 | 
 32 | ## Installation
 33 | 
 34 | Install the Bioconductor version with:
 35 | 
 36 | ``` r
 37 | if (!requireNamespace("BiocManager", quietly = TRUE))
 38 |     install.packages("BiocManager")
 39 | 
 40 | BiocManager::install("clustifyr")
 41 | ```
 42 | 
 43 | Install the development version with:
 44 | 
 45 | ``` r
 46 | BiocManager::install("rnabioco/clustifyr")
 47 | ```
 48 |  
 49 | ## Example usage
 50 | 
 51 | In this example we use the following built-in input data:
 52 | 
 53 | - an expression matrix of single cell RNA-seq data (`pbmc_matrix_small`)
 54 | - a metadata data.frame (`pbmc_meta`), with cluster information stored (`"classified"`)
 55 | - a vector of variable genes (`pbmc_vargenes`)
 56 | - a matrix of mean normalized scRNA-seq UMI counts by cell type (`cbmc_ref`)
 57 | 
 58 | We then calculate correlation coefficients and plot them on a pre-calculated projection (stored in `pbmc_meta`).
 59 | 
 60 | ```{r readme_example, warning=F, message=F}
 61 | library(clustifyr)
 62 | 
 63 | # calculate correlation
 64 | res <- clustify(
 65 |     input = pbmc_matrix_small,
 66 |     metadata = pbmc_meta$classified,
 67 |     ref_mat = cbmc_ref,
 68 |     query_genes = pbmc_vargenes
 69 | )
 70 | 
 71 | # print assignments
 72 | cor_to_call(res)
 73 | 
 74 | # plot assignments on a projection
 75 | plot_best_call(
 76 |     cor_mat = res,
 77 |     metadata = pbmc_meta,
 78 |     cluster_col = "classified"
 79 | )
 80 | ```
 81 | 
 82 | `clustify()` can take a clustered `SingleCellExperiment` or `seurat` object (from v2 up to v5) and assign identities.
 83 | 
 84 | ```{r example_seurat, warning=F, message=F}
 85 | # for SingleCellExperiment
 86 | sce_small <- sce_pbmc()
 87 | clustify(
 88 |     input = sce_small, # an SCE object
 89 |     ref_mat = cbmc_ref, # matrix of RNA-seq expression data for each cell type
 90 |     cluster_col = "cell_type", # name of column in meta.data containing cell clusters
 91 |     obj_out = TRUE # output SCE object with cell type inserted as "type" column
 92 | )
 93 | 
 94 | # for Seurat
 95 | library(Seurat)
 96 | s_small <- so_pbmc()
 97 | clustify(
 98 |     input = s_small,
 99 |     cluster_col = "RNA_snn_res.0.5",
100 |     ref_mat = cbmc_ref,
101 |     seurat_out = TRUE
102 | )
103 | 
104 | # New output option, directly as a vector (in the order of the metadata), which can then be inserted into metadata dataframes and other workflows
105 | clustify(
106 |     input = s_small,
107 |     cluster_col = "RNA_snn_res.0.5",
108 |     ref_mat = cbmc_ref,
109 |     vec_out = TRUE
110 | )[1:10]
111 | ```
112 | 
113 | New reference matrix can be made directly from `SingleCellExperiment` and `Seurat` objects as well. Other scRNAseq experiment object types are supported as well.
114 | 
115 | ```{r example_ref_matrix}
116 | # make reference from SingleCellExperiment objects
117 | sce_small <- sce_pbmc()
118 | sce_ref <- object_ref(
119 |     input = sce_small, # SCE object
120 |     cluster_col = "cell_type" # name of column in colData containing cell identities
121 | )
122 | 
123 | # make reference from seurat objects
124 | s_small <- so_pbmc()
125 | s_ref <- seurat_ref(
126 |     seurat_object = s_small,
127 |     cluster_col = "RNA_snn_res.0.5"
128 | )
129 | 
130 | head(s_ref)
131 | ```
132 | 
133 | `clustify_lists()` handles identity assignment of matrix or `SingleCellExperiment` and `seurat` objects based on marker gene lists.
134 |  
135 | ```{r example_seurat3, warning=F, message=F}
136 | clustify_lists(
137 |     input = pbmc_matrix_small,
138 |     metadata = pbmc_meta,
139 |     cluster_col = "classified",
140 |     marker = pbmc_markers,
141 |     marker_inmatrix = FALSE
142 | )
143 | 
144 | clustify_lists(
145 |     input = s_small,
146 |     marker = pbmc_markers,
147 |     marker_inmatrix = FALSE,
148 |     cluster_col = "RNA_snn_res.0.5",
149 |     seurat_out = TRUE
150 | )
151 | ```
152 | 
153 | ## Additional resources
154 | 
155 | * [Script](https://github.com/rnabioco/clustifyrdata/blob/master/inst/run_clustifyr.R) for benchmarking, compatible with [`scRNAseq_Benchmark`](https://github.com/tabdelaal/scRNAseq_Benchmark)
156 | 
157 | * Additional reference data (including tabula muris, immgen, etc) are available in a supplemental package [`clustifyrdatahub`](https://github.com/rnabioco/clustifyrdatahub). Also see [list](https://rnabioco.github.io/clustifyrdata/articles/download_refs.html) for individual downloads. 
158 | 
159 | * See the [FAQ](https://github.com/rnabioco/clustifyr/wiki/Frequently-asked-questions) for more details.
160 | 


--------------------------------------------------------------------------------
/air.toml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rnabioco/clustifyr/d3521c26008fff720aac6b0135218e66fed1bf84/air.toml


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | comment: false
 2 | 
 3 | coverage:
 4 |   status:
 5 |     project:
 6 |       default:
 7 |         target: auto
 8 |         threshold: 1%
 9 |         informational: true
10 |     patch:
11 |       default:
12 |         target: auto
13 |         threshold: 1%
14 |         informational: true
15 | 


--------------------------------------------------------------------------------
/data-raw/cbmc_m.R:
--------------------------------------------------------------------------------
  1 | library(Seurat)
  2 | library(tidyverse)
  3 | library(clustifyr)
  4 | 
  5 | # following seurat tutorial from https://satijalab.org/seurat/v3.0/multimodal_vignette.html#identify-differentially-expressed-proteins-between-clusters
  6 | cbmc.rna <- as.sparse(read.csv(
  7 |   file = "GSE100866_CBMC_8K_13AB_10X-RNA_umi.csv.gz",
  8 |   sep = ",",
  9 |   header = TRUE,
 10 |   row.names = 1
 11 | ))
 12 | cbmc.rna <- CollapseSpeciesExpressionMatrix(cbmc.rna)
 13 | cbmc.adt <- as.sparse(read.csv(
 14 |   file = "GSE100866_CBMC_8K_13AB_10X-ADT_umi.csv.gz",
 15 |   sep = ",",
 16 |   header = TRUE,
 17 |   row.names = 1
 18 | ))
 19 | cbmc.adt <- cbmc.adt[
 20 |   setdiff(rownames(x = cbmc.adt), c("CCR5", "CCR7", "CD10")),
 21 | ]
 22 | cbmc <- CreateSeuratObject(counts = cbmc.rna)
 23 | cbmc <- NormalizeData(cbmc)
 24 | cbmc <- FindVariableFeatures(cbmc)
 25 | cbmc <- ScaleData(cbmc)
 26 | cbmc <- RunPCA(cbmc, verbose = FALSE)
 27 | cbmc <- FindNeighbors(cbmc, dims = 1:25)
 28 | cbmc <- FindClusters(cbmc, resolution = 0.8)
 29 | cbmc <- RunTSNE(cbmc, dims = 1:25, method = "FIt-SNE")
 30 | new.cluster.ids <- c(
 31 |   "Memory CD4 T",
 32 |   "CD14+ Mono",
 33 |   "Naive CD4 T",
 34 |   "NK",
 35 |   "CD14+ Mono",
 36 |   "Mouse",
 37 |   "B",
 38 |   "CD8 T",
 39 |   "CD16+ Mono",
 40 |   "T/Mono doublets",
 41 |   "NK",
 42 |   "CD34+",
 43 |   "Multiplets",
 44 |   "Mouse",
 45 |   "Eryth",
 46 |   "Mk",
 47 |   "Mouse",
 48 |   "DC",
 49 |   "pDCs"
 50 | )
 51 | names(new.cluster.ids) <- levels(cbmc)
 52 | cbmc <- RenameIdents(cbmc, new.cluster.ids)
 53 | cbmc[["ADT"]] <- CreateAssayObject(counts = cbmc.adt)
 54 | cbmc <- NormalizeData(cbmc, assay = "ADT", normalization.method = "CLR")
 55 | cbmc <- ScaleData(cbmc, assay = "ADT")
 56 | cbmc <- subset(cbmc, idents = c("Multiplets", "Mouse"), invert = TRUE)
 57 | DefaultAssay(cbmc) <- "ADT"
 58 | cbmc <- RunPCA(
 59 |   cbmc,
 60 |   features = rownames(cbmc),
 61 |   reduction.name = "pca_adt",
 62 |   reduction.key = "pca_adt_",
 63 |   verbose = FALSE
 64 | )
 65 | adt.data <- GetAssayData(cbmc, slot = "data")
 66 | adt.dist <- dist(t(adt.data))
 67 | cbmc[["rnaClusterID"]] <- Idents(cbmc)
 68 | cbmc[["tsne_adt"]] <- RunTSNE(
 69 |   adt.dist,
 70 |   assay = "ADT",
 71 |   reduction.key = "adtTSNE_"
 72 | )
 73 | cbmc[["adt_snn"]] <- FindNeighbors(adt.dist)$snn
 74 | cbmc <- FindClusters(cbmc, resolution = 0.2, graph.name = "adt_snn")
 75 | new.cluster.ids <- c(
 76 |   "CD4 T",
 77 |   "CD14+ Mono",
 78 |   "NK",
 79 |   "B",
 80 |   "CD8 T",
 81 |   "NK",
 82 |   "CD34+",
 83 |   "T/Mono doublets",
 84 |   "CD16+ Mono",
 85 |   "pDCs",
 86 |   "B"
 87 | )
 88 | names(new.cluster.ids) <- levels(cbmc)
 89 | cbmc <- RenameIdents(cbmc, new.cluster.ids)
 90 | cbmc[["citeID"]] <- Idents(cbmc)
 91 | 
 92 | m <- cbmc@meta.data %>%
 93 |   rownames_to_column("rn") %>%
 94 |   mutate(
 95 |     ID = ifelse(
 96 |       citeID != "CD8 T" & citeID != "CD4 T",
 97 |       as.character(rnaClusterID),
 98 |       as.character(citeID)
 99 |     )
100 |   ) %>%
101 |   mutate(
102 |     ID = ifelse(
103 |       (rnaClusterID == "CD4 T" & citeID != "CD4 T") |
104 |         (rnaClusterID == "CD8 T" & citeID != "CD8 T"),
105 |       "Unknown",
106 |       as.character(ID)
107 |     )
108 |   ) %>%
109 |   column_to_rownames("rn")
110 | cbmc@meta.data <- m
111 | 
112 | DefaultAssay(cbmc) <- "RNA"
113 | Idents(cbmc) <- "ID"
114 | m_cb <- FindAllMarkers(cbmc, only.pos = TRUE)
115 | 
116 | cbmc_m <- matrixize_markers(
117 |   m_cb %>%
118 |     filter(
119 |       pct.1 - pct.2 > 0.15,
120 |       cluster != "T/Mono doublets",
121 |       cluster != "Unknown"
122 |     ),
123 |   unique = TRUE,
124 |   remove_rp = TRUE,
125 |   n = 3
126 | )
127 | 
128 | usethis::use_data(cbmc_m, compress = "xz", overwrite = TRUE)
129 | 


--------------------------------------------------------------------------------
/data-raw/cbmc_ref.R:
--------------------------------------------------------------------------------
  1 | library(usethis)
  2 | library(Seurat)
  3 | library(tidyverse)
  4 | library(clustifyr)
  5 | 
  6 | # following seurat tutorial from https://satijalab.org/seurat/v3.0/multimodal_vignette.html#identify-differentially-expressed-proteins-between-clusters
  7 | cbmc.rna <- as.sparse(read.csv(
  8 |   file = "GSE100866_CBMC_8K_13AB_10X-RNA_umi.csv.gz",
  9 |   sep = ",",
 10 |   header = TRUE,
 11 |   row.names = 1
 12 | ))
 13 | cbmc.rna <- CollapseSpeciesExpressionMatrix(cbmc.rna)
 14 | cbmc.adt <- as.sparse(read.csv(
 15 |   file = "GSE100866_CBMC_8K_13AB_10X-ADT_umi.csv.gz",
 16 |   sep = ",",
 17 |   header = TRUE,
 18 |   row.names = 1
 19 | ))
 20 | cbmc.adt <- cbmc.adt[
 21 |   setdiff(rownames(x = cbmc.adt), c("CCR5", "CCR7", "CD10")),
 22 | ]
 23 | cbmc <- CreateSeuratObject(counts = cbmc.rna)
 24 | cbmc <- NormalizeData(cbmc)
 25 | cbmc <- FindVariableFeatures(cbmc)
 26 | cbmc <- ScaleData(cbmc)
 27 | cbmc <- RunPCA(cbmc, verbose = FALSE)
 28 | cbmc <- FindNeighbors(cbmc, dims = 1:25)
 29 | cbmc <- FindClusters(cbmc, resolution = 0.8)
 30 | cbmc <- RunTSNE(cbmc, dims = 1:25, method = "FIt-SNE")
 31 | new.cluster.ids <- c(
 32 |   "Memory CD4 T",
 33 |   "CD14+ Mono",
 34 |   "Naive CD4 T",
 35 |   "NK",
 36 |   "CD14+ Mono",
 37 |   "Mouse",
 38 |   "B",
 39 |   "CD8 T",
 40 |   "CD16+ Mono",
 41 |   "T/Mono doublets",
 42 |   "NK",
 43 |   "CD34+",
 44 |   "Multiplets",
 45 |   "Mouse",
 46 |   "Eryth",
 47 |   "Mk",
 48 |   "Mouse",
 49 |   "DC",
 50 |   "pDCs"
 51 | )
 52 | names(new.cluster.ids) <- levels(cbmc)
 53 | cbmc <- RenameIdents(cbmc, new.cluster.ids)
 54 | cbmc[["ADT"]] <- CreateAssayObject(counts = cbmc.adt)
 55 | cbmc <- NormalizeData(cbmc, assay = "ADT", normalization.method = "CLR")
 56 | cbmc <- ScaleData(cbmc, assay = "ADT")
 57 | cbmc <- subset(cbmc, idents = c("Multiplets", "Mouse"), invert = TRUE)
 58 | DefaultAssay(cbmc) <- "ADT"
 59 | cbmc <- RunPCA(
 60 |   cbmc,
 61 |   features = rownames(cbmc),
 62 |   reduction.name = "pca_adt",
 63 |   reduction.key = "pca_adt_",
 64 |   verbose = FALSE
 65 | )
 66 | adt.data <- GetAssayData(cbmc, slot = "data")
 67 | adt.dist <- dist(t(adt.data))
 68 | cbmc[["rnaClusterID"]] <- Idents(cbmc)
 69 | cbmc[["tsne_adt"]] <- RunTSNE(
 70 |   adt.dist,
 71 |   assay = "ADT",
 72 |   reduction.key = "adtTSNE_"
 73 | )
 74 | cbmc[["adt_snn"]] <- FindNeighbors(adt.dist)$snn
 75 | cbmc <- FindClusters(cbmc, resolution = 0.2, graph.name = "adt_snn")
 76 | new.cluster.ids <- c(
 77 |   "CD4 T",
 78 |   "CD14+ Mono",
 79 |   "NK",
 80 |   "B",
 81 |   "CD8 T",
 82 |   "NK",
 83 |   "CD34+",
 84 |   "T/Mono doublets",
 85 |   "CD16+ Mono",
 86 |   "pDCs",
 87 |   "B"
 88 | )
 89 | names(new.cluster.ids) <- levels(cbmc)
 90 | cbmc <- RenameIdents(cbmc, new.cluster.ids)
 91 | cbmc[["citeID"]] <- Idents(cbmc)
 92 | 
 93 | m <- cbmc@meta.data %>%
 94 |   rownames_to_column("rn") %>%
 95 |   mutate(
 96 |     ID = ifelse(
 97 |       citeID != "CD8 T" & citeID != "CD4 T",
 98 |       as.character(rnaClusterID),
 99 |       as.character(citeID)
100 |     )
101 |   ) %>%
102 |   mutate(
103 |     ID = ifelse(
104 |       (rnaClusterID == "CD4 T" & citeID != "CD4 T") |
105 |         (rnaClusterID == "CD8 T" & citeID != "CD8 T"),
106 |       "Unknown",
107 |       as.character(ID)
108 |     )
109 |   ) %>%
110 |   column_to_rownames("rn")
111 | cbmc@meta.data <- m
112 | 
113 | cbmc_refm <- use_seurat_comp(
114 |   cbmc,
115 |   cluster_col = "ID",
116 |   var_genes_only = FALSE,
117 |   assay_name = NULL
118 | )
119 | 
120 | cbmc_ref <- as.data.frame(cbmc_refm) %>%
121 |   as_tibble(rownames = "gene") %>%
122 |   select(-Unknown, -`T/Mono doublets`) %>%
123 |   filter(str_sub(gene, 1, 5) != "MOUSE" & str_sub(gene, 1, 5) != "ERCC_") %>%
124 |   column_to_rownames("gene") %>%
125 |   as.matrix()
126 | 
127 | cbmc_ref <- cbmc_ref[Matrix::rowSums(cbmc_ref) != 0, ]
128 | 
129 | var_genes <- ref_feature_select(cbmc_ref, n = 2000)
130 | cbmc_ref <- cbmc_ref[var_genes, ]
131 | 
132 | usethis::use_data(cbmc_ref, compress = "xz", overwrite = TRUE)
133 | 


--------------------------------------------------------------------------------
/data-raw/downrefs.R:
--------------------------------------------------------------------------------
  1 | library(tidyverse)
  2 | 
  3 | d1 <- c(
  4 |   paste0(
  5 |     "[",
  6 |     "ref_MCA",
  7 |     "](",
  8 |     "https://github.com/rnabioco/clustifyrdata/raw/master/data/ref_MCA.rda",
  9 |     ")"
 10 |   ),
 11 |   "Mouse Cell Atlas",
 12 |   dim(clustifyrdata::ref_MCA)[2],
 13 |   dim(clustifyrdata::ref_MCA)[1],
 14 |   "mouse",
 15 |   paste0(
 16 |     "[from](",
 17 |     "https://www.cell.com/cell/fulltext/S0092-8674(18)30116-8",
 18 |     ")"
 19 |   )
 20 | )
 21 | 
 22 | d2 <- c(
 23 |   paste0(
 24 |     "[",
 25 |     "ref_tabula_muris_drop",
 26 |     "](",
 27 |     "https://github.com/rnabioco/clustifyrdata/raw/master/data/ref_tabula_muris_drop.rda",
 28 |     ")"
 29 |   ),
 30 |   "Tabula Muris (10X)",
 31 |   dim(clustifyrdata::ref_tabula_muris_drop)[2],
 32 |   dim(clustifyrdata::ref_tabula_muris_drop)[1],
 33 |   "mouse",
 34 |   paste0("[from](", "https://www.nature.com/articles/s41586-018-0590-4", ")")
 35 | )
 36 | 
 37 | d3 <- c(
 38 |   paste0(
 39 |     "[",
 40 |     "ref_tabula_muris_facs",
 41 |     "](",
 42 |     "https://github.com/rnabioco/clustifyrdata/raw/master/data/ref_tabula_muris_facs.rda",
 43 |     ")"
 44 |   ),
 45 |   "Tabula Muris (SmartSeq2)",
 46 |   dim(clustifyrdata::ref_tabula_muris_facs)[2],
 47 |   dim(clustifyrdata::ref_tabula_muris_facs)[1],
 48 |   "mouse",
 49 |   paste0("[from](", "https://www.nature.com/articles/s41586-018-0590-4", ")")
 50 | )
 51 | 
 52 | d4 <- c(
 53 |   paste0(
 54 |     "[",
 55 |     "ref_mouse.rnaseq",
 56 |     "](",
 57 |     "https://github.com/rnabioco/clustifyrdata/raw/master/data/ref_mouse.rnaseq.rda",
 58 |     ")"
 59 |   ),
 60 |   "Mouse RNA-seq from 28 cell types",
 61 |   dim(clustifyrdata::ref_mouse.rnaseq)[2],
 62 |   dim(clustifyrdata::ref_mouse.rnaseq)[1],
 63 |   "mouse",
 64 |   paste0(
 65 |     "[from](",
 66 |     "https://genome.cshlp.org/content/early/2019/03/11/gr.240093.118",
 67 |     ")"
 68 |   )
 69 | )
 70 | 
 71 | d5 <- c(
 72 |   paste0(
 73 |     "[",
 74 |     "ref_moca_main",
 75 |     "](",
 76 |     "https://github.com/rnabioco/clustifyrdata/raw/master/data/ref_moca_main.rda",
 77 |     ")"
 78 |   ),
 79 |   "Mouse Organogenesis Cell Atlas (main cell types)",
 80 |   dim(clustifyrdata::ref_moca_main)[2],
 81 |   dim(clustifyrdata::ref_moca_main)[1],
 82 |   "mouse",
 83 |   paste0("[from](", "https://www.nature.com/articles/s41586-019-0969-x", ")")
 84 | )
 85 | 
 86 | d6 <- c(
 87 |   paste0(
 88 |     "[",
 89 |     "ref_hema_microarray",
 90 |     "](",
 91 |     "https://github.com/rnabioco/clustifyrdata/raw/master/data/ref_hema_microarray.rda",
 92 |     ")"
 93 |   ),
 94 |   "Human hematopoietic cell microarray",
 95 |   dim(clustifyrdata::ref_hema_microarray)[2],
 96 |   dim(clustifyrdata::ref_hema_microarray)[1],
 97 |   "human",
 98 |   paste0("[from](", "https://www.cell.com/fulltext/S0092-8674(11)00005-5", ")")
 99 | )
100 | 
101 | d7 <- c(
102 |   paste0(
103 |     "[",
104 |     "ref_cortex_dev",
105 |     "](",
106 |     "https://github.com/rnabioco/clustifyrdata/raw/master/data/ref_cortex_dev.rda",
107 |     ")"
108 |   ),
109 |   "Human cortex development scRNA-seq",
110 |   dim(clustifyrdata::ref_cortex_dev)[2],
111 |   dim(clustifyrdata::ref_cortex_dev)[1],
112 |   "human",
113 |   paste0(
114 |     "[from](",
115 |     "https://science.sciencemag.org/content/358/6368/1318.long",
116 |     ")"
117 |   )
118 | )
119 | 
120 | d8 <- c(
121 |   paste0(
122 |     "[",
123 |     "ref_pan_indrop",
124 |     "](",
125 |     "https://github.com/rnabioco/clustifyrdata/raw/master/data/ref_pan_indrop.rda",
126 |     ")"
127 |   ),
128 |   "Human pancreatic cell scRNA-seq (inDrop)",
129 |   dim(clustifyrdata::ref_pan_indrop)[2],
130 |   dim(clustifyrdata::ref_pan_indrop)[1],
131 |   "human",
132 |   paste0("[from](", "https://www.cell.com/fulltext/S2405-4712(16)30266-6", ")")
133 | )
134 | 
135 | d9 <- c(
136 |   paste0(
137 |     "[",
138 |     "ref_pan_smartseq2",
139 |     "](",
140 |     "https://github.com/rnabioco/clustifyrdata/raw/master/data/ref_pan_smartseq2.rda",
141 |     ")"
142 |   ),
143 |   "Human pancreatic cell scRNA-seq (SmartSeq2)",
144 |   dim(clustifyrdata::ref_pan_smartseq2)[2],
145 |   dim(clustifyrdata::ref_pan_smartseq2)[1],
146 |   "human",
147 |   paste0(
148 |     "[from](",
149 |     "https://www.sciencedirect.com/science/article/pii/S1550413116304363",
150 |     ")"
151 |   )
152 | )
153 | 
154 | d <- data.frame(d1, d2, d3, d4, d5, d6, d7, d8, d9) %>% t()
155 | colnames(d) <- c("name", "desc", "ntypes", "ngenes", "org", "from_pub")
156 | downrefs <- d %>% as.tibble()
157 | usethis::use_data(downrefs, compress = "xz", overwrite = TRUE)
158 | 


--------------------------------------------------------------------------------
/data-raw/human_genes_10x.R:
--------------------------------------------------------------------------------
1 | library(tidyverse)
2 | # read 10x gene.tsv file for all genes
3 | human_genes_10x <- read_tsv("genes.tsv", col_names = FALSE) %>%
4 |   pull(X2)
5 | 
6 | usethis::use_data(human_genes_10x, compress = "xz", overwrite = TRUE)
7 | 


--------------------------------------------------------------------------------
/data-raw/mouse_genes_10x.R:
--------------------------------------------------------------------------------
1 | library(tidyverse)
2 | # read 10x gene.tsv file for all genes
3 | mouse_genes_10x <- read_tsv("genes.tsv", col_names = FALSE) %>%
4 |   pull(X2)
5 | 
6 | usethis::use_data(mouse_genes_10x, compress = "xz", overwrite = TRUE)
7 | 


--------------------------------------------------------------------------------
/data-raw/object_loc_lookup.R:
--------------------------------------------------------------------------------
1 | library(usethis)
2 | 
3 | 
4 | usethis::use_data(object_loc_lookup, compress = "xz", overwrite = TRUE)
5 | 


--------------------------------------------------------------------------------
/data-raw/pbmc_bulk_matrix.R:
--------------------------------------------------------------------------------
  1 | library(dplyr)
  2 | library(purrr)
  3 | library(tidyr)
  4 | library(stringr)
  5 | library(recount)
  6 | 
  7 | dl_recount <- function(sra_id) {
  8 |   download_study(sra_id)
  9 |   load(file.path(sra_id, "rse_gene.Rdata"))
 10 |   # no longer need to downloaded data
 11 |   unlink(sra_id, recursive = TRUE)
 12 | 
 13 |   rse <- scale_counts(rse_gene)
 14 |   read_counts <- assay(rse, "counts")
 15 |   gene_ids <- rownames(read_counts)
 16 |   # get gene symbols, which are stored in rowData
 17 |   id2symbol <- data_frame(
 18 |     ids = rowData(rse_gene)$gene_id,
 19 |     symbols = rowData(rse_gene)$symbol@listData
 20 |   ) %>%
 21 |     mutate(symbols = map_chr(symbols, ~ .x[1]))
 22 | 
 23 |   # clean up metadata into a dataframe
 24 |   mdata <- colData(rse)
 25 |   mdata_cols <- lapply(
 26 |     mdata$characteristics,
 27 |     function(x) {
 28 |       str_match(x, "^([^:]+):")[, 2]
 29 |     }
 30 |   ) %>%
 31 |     unique() %>%
 32 |     unlist()
 33 | 
 34 |   mdata <- data_frame(
 35 |     run = mdata$run,
 36 |     all_data = as.list(mdata$characteristics)
 37 |   ) %>%
 38 |     mutate(
 39 |       out = purrr::map_chr(
 40 |         all_data,
 41 |         ~ str_c(.x, collapse = "::")
 42 |       )
 43 |     ) %>%
 44 |     tidyr::separate(out, sep = "::", into = mdata_cols) %>%
 45 |     select(-all_data) %>%
 46 |     mutate_at(
 47 |       .vars = vars(-matches("run")),
 48 |       .funs = function(x) str_match(x, ": (.+)")[, 2]
 49 |     )
 50 | 
 51 |   # convert ids to symbols
 52 |   row_ids_to_symbols <- left_join(
 53 |     data_frame(ids = gene_ids),
 54 |     id2symbol,
 55 |     by = "ids"
 56 |   )
 57 | 
 58 |   if (length(gene_ids) != nrow(row_ids_to_symbols)) {
 59 |     warning("gene id mapping to symbols produce more or less ids")
 60 |   }
 61 | 
 62 |   row_ids_to_symbols <- filter(row_ids_to_symbols, !is.na(symbols))
 63 | 
 64 |   out_df <- read_counts %>%
 65 |     as.data.frame() %>%
 66 |     tibble::rownames_to_column("gene_id") %>%
 67 |     left_join(., row_ids_to_symbols, by = c("gene_id" = "ids")) %>%
 68 |     dplyr::select(-gene_id) %>%
 69 |     dplyr::select(symbols, everything()) %>%
 70 |     filter(!is.na(symbols))
 71 | 
 72 |   out_matrix <- tidyr::gather(out_df, library, expr, -symbols) %>%
 73 |     group_by(symbols, library) %>%
 74 |     summarize(expr = sum(expr)) %>%
 75 |     tidyr::spread(library, expr) %>%
 76 |     as.data.frame() %>%
 77 |     tibble::column_to_rownames("symbols") %>%
 78 |     as.matrix()
 79 | 
 80 |   list(
 81 |     read_counts = out_matrix,
 82 |     meta_data = mdata
 83 |   )
 84 | }
 85 | 
 86 | # download
 87 | pbmc_data <- dl_recount("SRP051688")
 88 | # filter
 89 | good_libs <- filter(pbmc_data$meta_data, str_detect(time, "0"))
 90 | pbmc_data <- pbmc_data$read_counts[, good_libs$run]
 91 | # rename
 92 | new_ids <- left_join(
 93 |   data_frame(run = colnames(pbmc_data)),
 94 |   good_libs,
 95 |   by = "run"
 96 | ) %>%
 97 |   group_by(`cell type`) %>%
 98 |   mutate(cell_id = stringr::str_c(`cell type`, " rep ", row_number())) %>%
 99 |   pull(cell_id)
100 | 
101 | colnames(pbmc_data) <- new_ids
102 | pbmc_bulk_matrix <- pbmc_data
103 | usethis::use_data(pbmc_bulk_matrix, compress = "xz", overwrite = TRUE)
104 | 


--------------------------------------------------------------------------------
/data-raw/pbmc_markers.R:
--------------------------------------------------------------------------------
 1 | library(Seurat)
 2 | library(tidyverse)
 3 | library(usethis)
 4 | 
 5 | # follow seurat tutorial from https://satijalab.org/seurat/v3.0/pbmc3k_tutorial.html
 6 | pbmc.data <- Read10X(
 7 |   data.dir = "/Users/rf/Downloads/filtered_gene_bc_matrices/hg19"
 8 | )
 9 | pbmc <- CreateSeuratObject(
10 |   counts = pbmc.data,
11 |   project = "pbmc3k",
12 |   min.cells = 3,
13 |   min.features = 200
14 | )
15 | pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
16 | pbmc <- subset(
17 |   pbmc,
18 |   subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5
19 | )
20 | pbmc <- NormalizeData(
21 |   pbmc,
22 |   normalization.method = "LogNormalize",
23 |   scale.factor = 10000
24 | )
25 | pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
26 | all.genes <- rownames(pbmc)
27 | pbmc <- ScaleData(pbmc, features = all.genes)
28 | pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
29 | pbmc <- FindNeighbors(pbmc, dims = 1:10)
30 | pbmc <- FindClusters(pbmc, resolution = 0.5)
31 | pbmc <- RunUMAP(pbmc, dims = 1:10)
32 | 
33 | pbmc_markers <- FindAllMarkers(
34 |   pbmc,
35 |   only.pos = TRUE,
36 |   min.pct = 0.25,
37 |   logfc.threshold = 0.25
38 | )
39 | usethis::use_data(pbmc_markers, compress = "xz", overwrite = TRUE)
40 | 


--------------------------------------------------------------------------------
/data-raw/pbmc_markers_M3Drop.R:
--------------------------------------------------------------------------------
 1 | library(Seurat)
 2 | library(tidyverse)
 3 | library(M3Drop)
 4 | library(usethis)
 5 | 
 6 | # follow seurat tutorial from https://satijalab.org/seurat/v3.0/pbmc3k_tutorial.html
 7 | pbmc.data <- Read10X(
 8 |   data.dir = "/Users/rf/Downloads/filtered_gene_bc_matrices/hg19"
 9 | )
10 | pbmc <- CreateSeuratObject(
11 |   counts = pbmc.data,
12 |   project = "pbmc3k",
13 |   min.cells = 3,
14 |   min.features = 200
15 | )
16 | pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
17 | pbmc <- subset(
18 |   pbmc,
19 |   subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5
20 | )
21 | pbmc <- NormalizeData(
22 |   pbmc,
23 |   normalization.method = "LogNormalize",
24 |   scale.factor = 10000
25 | )
26 | pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
27 | all.genes <- rownames(pbmc)
28 | pbmc <- ScaleData(pbmc, features = all.genes)
29 | pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
30 | pbmc <- FindNeighbors(pbmc, dims = 1:10)
31 | pbmc <- FindClusters(pbmc, resolution = 0.5)
32 | pbmc <- RunUMAP(pbmc, dims = 1:10)
33 | 
34 | tm <- expm1(as.matrix(pbmc@assays$RNA@data))
35 | Normalized_data <- M3DropCleanData(
36 |   tm,
37 |   labels = rownames(pbmc@assays$RNA@data),
38 |   is.counts = FALSE
39 | )
40 | fits <- M3DropDropoutModels(Normalized_data$data)
41 | pbmc_markers_M3Drop <- M3DropDifferentialExpression(
42 |   Normalized_data$data,
43 |   mt_method = "fdr",
44 |   mt_threshold = 0.05
45 | )
46 | usethis::use_data(pbmc_markers_M3Drop, compress = "xz", overwrite = TRUE)
47 | 


--------------------------------------------------------------------------------
/data-raw/pbmc_matrix_small.R:
--------------------------------------------------------------------------------
 1 | library(Seurat)
 2 | library(tidyverse)
 3 | library(clustifyr)
 4 | library(usethis)
 5 | 
 6 | # follow seurat tutorial from https://satijalab.org/seurat/v3.0/pbmc3k_tutorial.html
 7 | pbmc.data <- Read10X(data.dir = "filtered_gene_bc_matrices/hg19")
 8 | pbmc <- CreateSeuratObject(
 9 |   counts = pbmc.data,
10 |   project = "pbmc3k",
11 |   min.cells = 3,
12 |   min.features = 200
13 | )
14 | pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
15 | pbmc <- subset(
16 |   pbmc,
17 |   subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5
18 | )
19 | pbmc <- NormalizeData(
20 |   pbmc,
21 |   normalization.method = "LogNormalize",
22 |   scale.factor = 10000
23 | )
24 | pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
25 | all.genes <- rownames(pbmc)
26 | pbmc <- ScaleData(pbmc, features = all.genes)
27 | pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
28 | pbmc <- FindNeighbors(pbmc, dims = 1:10)
29 | pbmc <- FindClusters(pbmc, resolution = 0.5)
30 | pbmc <- RunUMAP(pbmc, dims = 1:10)
31 | new.cluster.ids <- c(
32 |   "Naive CD4 T",
33 |   "Memory CD4 T",
34 |   "CD14+ Mono",
35 |   "B",
36 |   "CD8 T",
37 |   "FCGR3A+ Mono",
38 |   "NK",
39 |   "DC",
40 |   "Platelet"
41 | )
42 | names(new.cluster.ids) <- levels(pbmc)
43 | pbmc <- RenameIdents(pbmc, new.cluster.ids)
44 | pbmc <- StashIdent(pbmc, "classified")
45 | 
46 | pbmc_matrix <- pbmc@assays$RNA@data
47 | pbmc_matrix_small <- pbmc_matrix[pbmc@assays$RNA@var.features, ]
48 | usethis::use_data(pbmc_matrix_small, compress = "xz", overwrite = TRUE)
49 | 


--------------------------------------------------------------------------------
/data-raw/pbmc_meta.R:
--------------------------------------------------------------------------------
 1 | library(Seurat)
 2 | library(tidyverse)
 3 | library(usethis)
 4 | library(clustifyr)
 5 | 
 6 | # follow seurat tutorial from https://satijalab.org/seurat/v3.0/pbmc3k_tutorial.html
 7 | pbmc.data <- Read10X(
 8 |   data.dir = "/Users/rf/Downloads/filtered_gene_bc_matrices/hg19"
 9 | )
10 | pbmc <- CreateSeuratObject(
11 |   counts = pbmc.data,
12 |   project = "pbmc3k",
13 |   min.cells = 3,
14 |   min.features = 200
15 | )
16 | pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
17 | pbmc <- subset(
18 |   pbmc,
19 |   subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5
20 | )
21 | pbmc <- NormalizeData(
22 |   pbmc,
23 |   normalization.method = "LogNormalize",
24 |   scale.factor = 10000
25 | )
26 | pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
27 | all.genes <- rownames(pbmc)
28 | pbmc <- ScaleData(pbmc, features = all.genes)
29 | pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
30 | pbmc <- FindNeighbors(pbmc, dims = 1:10)
31 | pbmc <- FindClusters(pbmc, resolution = 0.5)
32 | pbmc <- RunUMAP(pbmc, dims = 1:10)
33 | new.cluster.ids <- c(
34 |   "Naive CD4 T",
35 |   "Memory CD4 T",
36 |   "CD14+ Mono",
37 |   "B",
38 |   "CD8 T",
39 |   "FCGR3A+ Mono",
40 |   "NK",
41 |   "DC",
42 |   "Platelet"
43 | )
44 | names(new.cluster.ids) <- levels(pbmc)
45 | pbmc <- RenameIdents(pbmc, new.cluster.ids)
46 | pbmc <- StashIdent(pbmc, "classified")
47 | 
48 | pbmc_meta <- use_seurat_meta(pbmc, dr = "umap")
49 | 
50 | usethis::use_data(pbmc_meta, compress = "xz", overwrite = TRUE)
51 | 


--------------------------------------------------------------------------------
/data-raw/pbmc_vargenes.R:
--------------------------------------------------------------------------------
 1 | library(Seurat)
 2 | library(tidyverse)
 3 | library(usethis)
 4 | 
 5 | # follow seurat tutorial from https://satijalab.org/seurat/v3.0/pbmc3k_tutorial.html
 6 | pbmc.data <- Read10X(
 7 |   data.dir = "/Users/rf/Downloads/filtered_gene_bc_matrices/hg19"
 8 | )
 9 | pbmc <- CreateSeuratObject(
10 |   counts = pbmc.data,
11 |   project = "pbmc3k",
12 |   min.cells = 3,
13 |   min.features = 200
14 | )
15 | pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
16 | pbmc <- subset(
17 |   pbmc,
18 |   subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5
19 | )
20 | pbmc <- NormalizeData(
21 |   pbmc,
22 |   normalization.method = "LogNormalize",
23 |   scale.factor = 10000
24 | )
25 | pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
26 | all.genes <- rownames(pbmc)
27 | pbmc <- ScaleData(pbmc, features = all.genes)
28 | pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
29 | pbmc <- FindNeighbors(pbmc, dims = 1:10)
30 | pbmc <- FindClusters(pbmc, resolution = 0.5)
31 | pbmc <- RunUMAP(pbmc, dims = 1:10)
32 | 
33 | pbmc_vargenes <- pbmc@assays$RNA@var.features
34 | usethis::use_data(pbmc_vargenes, compress = "xz", overwrite = TRUE)
35 | 


--------------------------------------------------------------------------------
/data/cbmc_m.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rnabioco/clustifyr/d3521c26008fff720aac6b0135218e66fed1bf84/data/cbmc_m.rda


--------------------------------------------------------------------------------
/data/cbmc_ref.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rnabioco/clustifyr/d3521c26008fff720aac6b0135218e66fed1bf84/data/cbmc_ref.rda


--------------------------------------------------------------------------------
/data/downrefs.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rnabioco/clustifyr/d3521c26008fff720aac6b0135218e66fed1bf84/data/downrefs.rda


--------------------------------------------------------------------------------
/data/human_genes_10x.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rnabioco/clustifyr/d3521c26008fff720aac6b0135218e66fed1bf84/data/human_genes_10x.rda


--------------------------------------------------------------------------------
/data/mouse_genes_10x.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rnabioco/clustifyr/d3521c26008fff720aac6b0135218e66fed1bf84/data/mouse_genes_10x.rda


--------------------------------------------------------------------------------
/data/object_loc_lookup.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rnabioco/clustifyr/d3521c26008fff720aac6b0135218e66fed1bf84/data/object_loc_lookup.rda


--------------------------------------------------------------------------------
/data/pbmc_markers.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rnabioco/clustifyr/d3521c26008fff720aac6b0135218e66fed1bf84/data/pbmc_markers.rda


--------------------------------------------------------------------------------
/data/pbmc_markers_M3Drop.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rnabioco/clustifyr/d3521c26008fff720aac6b0135218e66fed1bf84/data/pbmc_markers_M3Drop.rda


--------------------------------------------------------------------------------
/data/pbmc_matrix_small.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rnabioco/clustifyr/d3521c26008fff720aac6b0135218e66fed1bf84/data/pbmc_matrix_small.rda


--------------------------------------------------------------------------------
/data/pbmc_meta.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rnabioco/clustifyr/d3521c26008fff720aac6b0135218e66fed1bf84/data/pbmc_meta.rda


--------------------------------------------------------------------------------
/data/pbmc_vargenes.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rnabioco/clustifyr/d3521c26008fff720aac6b0135218e66fed1bf84/data/pbmc_vargenes.rda


--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
 1 | bibentry(bibtype = "Article",
 2 |          title   = "clustifyr: An R package for automated single-cell RNA sequencing cluster classification",
 3 |          year    = 2019,
 4 |          author  = c(
 5 |              person('Rui', 'Fu', role = c('aut', 'cre'), email = 'rui.fu@cuanschutz.edu'),
 6 |              person('Austin', 'Gillen', role = c('ctb')),
 7 |              person(c('Ryan', 'M.'), 'Sheridan', role = c('ctb')),
 8 |              person('Chengzhe', 'Tian', role = c('ctb')),
 9 |              person('Michelle', 'Daya', role = c('ctb')),
10 |              person('Yue', 'Hao', role = c('ctb')),
11 |              person(c('Jay', 'R.'), 'Hesselberth', role = c('aut', 'cre')),
12 |              person(c('Kent', 'A.'), 'Riemondy', role = c('aut'), email = 'kent.riemondy@gmail.com')
13 |          ),
14 |          journal = 'F1000 Research',
15 |          doi = '10.12688/f1000research.22969.2'
16 | )
17 | 


--------------------------------------------------------------------------------
/inst/extdata/c2.cp.reactome.v6.2.symbols.gmt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rnabioco/clustifyr/d3521c26008fff720aac6b0135218e66fed1bf84/inst/extdata/c2.cp.reactome.v6.2.symbols.gmt.gz


--------------------------------------------------------------------------------
/inst/extdata/hsPBMC_markers.txt:
--------------------------------------------------------------------------------
 1 | >CD34+
 2 | expressed: CD34, THY1, ENG, KIT, PROM1 
 3 | references: https://www.stemcell.com/human-hematopoietic-stem-and-progenitor-cell-phenotyping-panels.html, https://www.rndsystems.com/research-area/hematopoietic-stem-cell-markers
 4 | 
 5 | >NK cells
 6 | expressed: NCAM1, FCGR3A
 7 | references: https://www.biolegend.com/essential_markers
 8 | 
 9 | >Monocytes
10 | expressed: CD14, FCGR1A, CD68, S100A12
11 | references: https://www.biolegend.com/essential_markers
12 | 
13 | >B cells
14 | expressed: CD19, MS4A1, CD79A
15 | 
16 | >T cells
17 | expressed: CD3D, CD3E, CD3G
18 | 
19 | >CD4 T cells
20 | expressed: CD4, FOXP3, IL2RA, IL7R
21 | subtype of: T cells
22 | 
23 | >CD8 T cells
24 | expressed: CD8A, CD8B
25 | subtype of: T cells
26 | 
27 | >Dendritic cells
28 | expressed: 	IL3RA, CD1C, BATF3, THBD, CD209 
29 | references: https://www.biolegend.com/essential_markers, https://www.cell.com/pb-assets/products/nucleus/nucleus-phagocytes/rnd-systems-dendritic-cells-br.pdf
30 | 


--------------------------------------------------------------------------------
/inst/scripts/dl_reactome_gene_sets.R:
--------------------------------------------------------------------------------
 1 | library(here)
 2 | 
 3 | dl_url <- "http://software.broadinstitute.org/gsea/msigdb/download_file.jsp?filePath=/resources/msigdb/6.2/c2.cp.reactome.v6.2.symbols.gmt"
 4 | 
 5 | proj_dir <- here()
 6 | 
 7 | download.file(
 8 |   dl_url,
 9 |   file.path(proj_dir, "inst/extdata/c2.cp.reactome.v6.2.symbols.gmt")
10 | )
11 | 
12 | R.utils::gzip(file.path(
13 |   proj_dir,
14 |   "inst/extdata/c2.cp.reactome.v6.2.symbols.gmt"
15 | ))
16 | # hsPBMC_markers.txt taken from garnett website
17 | 
18 | dl_url <- "https://cole-trapnell-lab.github.io/garnett/marker_files/hsPBMC_markers.txt"
19 | 
20 | download.file(
21 |   dl_url,
22 |   file.path(proj_dir, "inst/extdata/c2.cp.reactome.v6.2.symbols.gmt")
23 | )
24 | 


--------------------------------------------------------------------------------
/inst/shinyapp/README.md:
--------------------------------------------------------------------------------
 1 | # Clustifyr Shiny App <img src="logo.png" align="right">
 2 | 
 3 | The purpose of this app is to enable quick classification of single-cell RNA sequencing data through an interactive web interface. Users can directly upload their matrix and metadata files or Seurat/SCE objects generated from single-cell RNA sequencing analyses and produce useful cell identity inference and visualization,using a built-in library of references (clustifyrdatahub) compiled from reference bulk RNA-seq experiments, microarray expression data, and single-cell gene signatures. An additional purpose of the app is to enable quick browsing, preview, and reference building directly from NCBI Gene Expression Omnibus (GEO) records. Data reuse for this application and many other reanalysis/extension purposes require accurate metadata, which is frustratingly rare. We call on data repositories, journals, and investigators to work together towards ensuring proper cell-level annotation deposition. Please see [someta](https://github.com/rnabioco/someta) for further discussions.
 4 | 
 5 | ## Workflow
 6 | 
 7 | 1. Upload expression, either raw counts or normalized, matrix. Or Seurat/SCE object. You can also retrieve GEO data through accession #.
 8 | 2. Upload cell-level metadata in text formats. Or Seurat/SCE object. You can also retrieve GEO data through accession #.
 9 | 3. Choose (in dropdown menu or just click on the preview) the column in metadata that represent clustering information.
10 | 4. Choose or upload reference dataset, a matrix containing average expression of each cell type. Mouse MCA is the default.
11 | 5. Go to clustify step and look at results: correlation matrix, called cell-types, and heatmap. Results can be downloaded as xlsx.
12 | 
13 | ## Clustifyr Background
14 | 
15 | Single cell transcriptomes are difficult to annotate without knowledge
16 | of the underlying biology. Even with this knowledge, accurate
17 | identification can be challenging due to the lack of detectable
18 | expression of common marker genes. [clustifyr](https://github.com/rnabioco/clustifyr) aims to alleviate this problem by
19 | automatically annotating single cells or clusters of cells using
20 | single-cell RNA-seq, bulk RNA-seq data, microarray, or marker gene
21 | lists. Additional functions enable exploratory analysis of similarities
22 | between single cell RNA-seq datasets and reference data.
23 | 
24 | ## Clustifyr Data Hub
25 | 
26 | Reference cell type gene signatures are located in the accompanying [Clustifyr Data Hub](https://github.com/rnabioco/clustifyrdatahub).
27 | Descriptions of each data set are present in the table below. 
28 | 
29 | ## Available references include
30 | 
31 | | Title                    | Species      | Description                                      | RDataPath                                     | BiocVersion | Genome | SourceType | SourceUrl                                                                                            |
32 | | :----------------------- | :----------- | :----------------------------------------------- | :-------------------------------------------- | ----------: | :----- | :--------- | :--------------------------------------------------------------------------------------------------- |
33 | | ref\_MCA                 | Mus musculus | Mouse Cell Atlas                                 | clustifyrdatahub/ref\_MCA.rda                 |        3.12 | mm10   | Zip        | <https://ndownloader.figshare.com/files/10756795>                                                    |
34 | | ref\_tabula\_muris\_drop | Mus musculus | Tabula Muris (10X)                               | clustifyrdatahub/ref\_tabula\_muris\_drop.rda |        3.12 | mm10   | Zip        | <https://ndownloader.figshare.com/articles/5821263>                                                  |
35 | | ref\_tabula\_muris\_facs | Mus musculus | Tabula Muris (SmartSeq2)                         | clustifyrdatahub/ref\_tabula\_muris\_facs.rda |        3.12 | mm10   | Zip        | <https://ndownloader.figshare.com/articles/5821263>                                                  |
36 | | ref\_mouse.rnaseq        | Mus musculus | Mouse RNA-seq from 28 cell types                 | clustifyrdatahub/ref\_mouse.rnaseq.rda        |        3.12 | mm10   | RDA        | <https://github.com/dviraran/SingleR/tree/master/data>                                               |
37 | | ref\_moca\_main          | Mus musculus | Mouse Organogenesis Cell Atlas (main cell types) | clustifyrdatahub/ref\_moca\_main.rda          |        3.12 | mm10   | RDA        | <https://oncoscape.v3.sttrcancer.org/atlas.gs.washington.edu.mouse.rna/downloads>                    |
38 | | ref\_immgen              | Mus musculus | Mouse sorted immune cells                        | clustifyrdatahub/ref\_immgen.rda              |        3.12 | mm10   | RDA        | <https://github.com/dviraran/SingleR/tree/master/data>                                               |
39 | | ref\_hema\_microarray    | Homo sapiens | Human hematopoietic cell microarray              | clustifyrdatahub/ref\_hema\_microarray.rda    |        3.12 | hg38   | TXT        | <https://ftp.ncbi.nlm.nih.gov/geo/series/GSE24nnn/GSE24759/matrix/GSE24759_series_matrix.txt.gz>     |
40 | | ref\_cortex\_dev         | Homo sapiens | Human cortex development scRNA-seq               | clustifyrdatahub/ref\_cortex\_dev.rda         |        3.12 | hg38   | TSV        | <https://cells.ucsc.edu/cortex-dev/exprMatrix.tsv.gz>                                                |
41 | | ref\_pan\_indrop         | Homo sapiens | Human pancreatic cell scRNA-seq (inDrop)         | clustifyrdatahub/ref\_pan\_indrop.rda         |        3.12 | hg38   | RDA        | <https://scrnaseq-public-datasets.s3.amazonaws.com/scater-objects/baron-human.rds>                   |
42 | | ref\_pan\_smartseq2      | Homo sapiens | Human pancreatic cell scRNA-seq (SmartSeq2)      | clustifyrdatahub/ref\_pan\_smartseq2.rda      |        3.12 | hg38   | RDA        | <https://scrnaseq-public-datasets.s3.amazonaws.com/scater-objects/segerstolpe.rds>                   |
43 | | ref\_mouse\_atlas        | Mus musculus | Mouse Atlas scRNA-seq from 321 cell types        | clustifyrdatahub/ref\_mouse\_atlas.rda        |        3.12 | mm10   | RDA        | <https://github.com/rnabioco/scRNA-seq-Cell-Ref-Matrix/blob/master/atlas/musMusculus/MouseAtlas.rda> |
44 | 


--------------------------------------------------------------------------------
/inst/shinyapp/data/example-input/get-data.R:
--------------------------------------------------------------------------------
 1 | library(Seurat)
 2 | library(tidyverse)
 3 | library(here)
 4 | 
 5 | # data from lung injury single cell RNA-seq dataset (GSE113049)
 6 | 
 7 | mdata <- read_tsv(
 8 |   "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE113nnn/GSE113049/suppl/GSE113049_cell_metadata.tsv.gz"
 9 | )
10 | mat <- read_tsv(
11 |   "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE113nnn/GSE113049/suppl/GSE113049_count_matrix.tsv.gz",
12 |   skip = 1,
13 |   col_names = c("gene", mdata$cell)
14 | )
15 | 
16 | mat <- column_to_rownames(mat, "gene") %>%
17 |   as.matrix()
18 | 
19 | # downsample to 25 cells per class
20 | set.seed(42)
21 | cell_ids <- mdata %>%
22 |   group_by(sample_type, cell_type) %>%
23 |   sample_frac(0.05) %>%
24 |   pull(cell)
25 | 
26 | mat <- mat[, cell_ids]
27 | 
28 | mdata <- mdata %>%
29 |   filter(cell %in% cell_ids) %>%
30 |   column_to_rownames("cell") %>%
31 |   .[cell_ids, ]
32 | 
33 | var_genes <- CreateSeuratObject(mat, meta.data = mdata) %>%
34 |   NormalizeData() %>%
35 |   FindVariableFeatures() %>%
36 |   VariableFeatures()
37 | 
38 | mat <- mat[var_genes, ]
39 | 
40 | mat <- as.data.frame(mat) %>%
41 |   rownames_to_column("gene")
42 | mdata <- mdata %>% rownames_to_column("cell")
43 | 
44 | write_csv(mat, here("data", "lung-data", "matrix.csv"))
45 | write_csv(mdata, here("data", "lung-data", "meta-data.csv"))
46 | 


--------------------------------------------------------------------------------
/inst/shinyapp/data/example-input/matrix.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rnabioco/clustifyr/d3521c26008fff720aac6b0135218e66fed1bf84/inst/shinyapp/data/example-input/matrix.csv.gz


--------------------------------------------------------------------------------
/inst/shinyapp/data/example-input/meta-data.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rnabioco/clustifyr/d3521c26008fff720aac6b0135218e66fed1bf84/inst/shinyapp/data/example-input/meta-data.csv.gz


--------------------------------------------------------------------------------
/inst/shinyapp/global.R:
--------------------------------------------------------------------------------
  1 | library(shiny)
  2 | library(shinyjs)
  3 | library(shinyWidgets)
  4 | library(waiter)
  5 | library(dplyr)
  6 | library(readr)
  7 | library(tools)
  8 | library(clustifyr)
  9 | library(clustifyrdatahub)
 10 | library(rsconnect)
 11 | library(ExperimentHub)
 12 | library(Seurat)
 13 | library(shinydashboard)
 14 | library(bsplus)
 15 | library(dashboardthemes)
 16 | library(tidyverse)
 17 | library(data.table)
 18 | library(R.utils)
 19 | library(DT)
 20 | library(GEOquery)
 21 | library(pheatmap)
 22 | library(googlesheets4)
 23 | library(openxlsx)
 24 | library(httr)
 25 | 
 26 | options(shiny.maxRequestSize = 1500 * 1024^2)
 27 | options(repos = BiocManager::repositories())
 28 | options(datatable.fread.datatable = FALSE)
 29 | options(shiny.reactlog = TRUE)
 30 | options(
 31 |   DT.options = list(
 32 |     dom = "tp",
 33 |     paging = TRUE,
 34 |     pageLength = 6,
 35 |     scrollX = TRUE,
 36 |     server = TRUE
 37 |   )
 38 | )
 39 | 
 40 | is_local <- Sys.getenv('SHINY_PORT') == ""
 41 | 
 42 | # google sheet
 43 | gs4_auth(cache = "sheet", email = TRUE, use_oob = TRUE)
 44 | sheetid <- "https://docs.google.com/spreadsheets/d/107qXuwo568wmPikaNDIe1supkGOYakwnRhiaTaL7U8c"
 45 | 
 46 | # setup experimenthub
 47 | eh <- ExperimentHub()
 48 | refs <- query(eh, "clustifyrdatahub")
 49 | refs_meta <- mcols(refs) %>% as.data.frame()
 50 | ref_dict <- refs$ah_id %>% setNames(refs$title)
 51 | 
 52 | # get clicked column
 53 | js <- c(
 54 |   "table.on('click', 'td', function(){",
 55 |   "  var cell = table.cell(this);",
 56 |   "  var colindex = cell.index().column;",
 57 |   "  var colname = table.column(colindex).header().innerText;",
 58 |   "  Shiny.setInputValue('column_clicked', colname);",
 59 |   "});"
 60 | )
 61 | 
 62 | # get active tab
 63 | js2 <- '
 64 | $(document).ready(function(){
 65 |   $("a[data-toggle=tab]").on("show.bs.tab", function(e){
 66 |     Shiny.setInputValue("activeTab", $(this).attr("data-value"));
 67 |   });
 68 | });
 69 | '
 70 | 
 71 | # GEO functions
 72 | make_button <- function(tbl) {
 73 |   function(i) {
 74 |     sprintf(
 75 |       paste0(
 76 |         '<button id="button_%s_%d',
 77 |         '_',
 78 |         format(Sys.time(), "%H_%M_%S"),
 79 |         '" type="button" onclick="%s">Preview</button>'
 80 |       ),
 81 |       tbl,
 82 |       i,
 83 |       "Shiny.setInputValue('button', this.id);"
 84 |     )
 85 |   }
 86 | }
 87 | 
 88 | get_tar <- function(link) {
 89 |   paste0(
 90 |     str_remove(link, "/GSE[0-9]+_RAW.tar"),
 91 |     "/filelist.txt"
 92 |   )
 93 | }
 94 | 
 95 | get_file_size <- function(url) {
 96 |   response <- httr::HEAD(url)
 97 |   size <- tryCatch(
 98 |     httr::headers(response)[["Content-Length"]] %>% as.numeric(),
 99 |     error = "error_get"
100 |   )
101 |   if (is.null(size)) {
102 |     return("error_get")
103 |   }
104 |   utils:::format.object_size(size, "auto")
105 | }
106 | 
107 | list_geo <- function(id) {
108 |   message("fetching info for all files available...")
109 |   # look for files
110 |   out <- tryCatch(
111 |     suppressMessages(GEOquery::getGEOSuppFiles(
112 |       id,
113 |       makeDirectory = FALSE,
114 |       fetch_files = FALSE
115 |     ))$fname,
116 |     error = function(e) {
117 |       "error_get"
118 |     }
119 |   )
120 |   # make links
121 |   if (is.null(out)) {
122 |     return("error_get")
123 |   }
124 | 
125 |   if (out == "error_get") {
126 |     return("error_get")
127 |   }
128 | 
129 |   out <- data.frame(file = out) %>%
130 |     mutate(
131 |       link = str_c(
132 |         "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE",
133 |         str_extract(file, "[0-9]{3}"),
134 |         "nnn/",
135 |         id,
136 |         "/suppl/",
137 |         file
138 |       )
139 |     )
140 |   out
141 | }
142 | 
143 | prep_email <- function(id) {
144 |   out <- tryCatch(
145 |     suppressMessages(GEOquery::getGEO(
146 |       GEO = id,
147 |       filename = NULL,
148 |       GSElimits = NULL,
149 |       GSEMatrix = FALSE,
150 |       AnnotGPL = FALSE,
151 |       getGPL = FALSE,
152 |       parseCharacteristics = FALSE
153 |     )),
154 |     error = function(e) {
155 |       "error_get"
156 |     }
157 |   )
158 |   if (class(out) != "GSE") {
159 |     return(out)
160 |   } else {
161 |     name <- paste0("Dr ", out@header$contact_name %>% str_remove(".+,"))
162 |     if (id == "GSE113049") {
163 |       email <- "placeholder@forexample.com"
164 |     } else {
165 |       email <- out@header$contact_email
166 |     }
167 | 
168 |     link <- paste0(
169 |       "mailto:",
170 |       email,
171 |       "?subject=additional info request for ",
172 |       id,
173 |       "&body=Dear ",
174 |       name,
175 |       ",%0D%0A%0D%0ACan you please provide additional metadata information for the single cell dataset deposited on GEO, ",
176 |       id,
177 |       ".",
178 |       "%0D%0A%0D%0AThank you so much"
179 |     )
180 |     return(link)
181 |   }
182 | }
183 | 
184 | preview_link <- function(link, n_row = 5, n_col = 50, verbose = TRUE) {
185 |   # make sure link works
186 |   message(link)
187 |   if (!str_starts(str_to_lower(link), "http")) {
188 |     return(NA)
189 |   }
190 | 
191 |   # stream in a few lines only
192 |   message("read")
193 |   url1 <- url(link)
194 |   if (str_ends(link, "\\.gz")) {
195 |     temp <- readLines(gzcon(url1), n = n_row)
196 |   } else {
197 |     temp <- readLines(url1, n = n_row)
198 |   }
199 |   close(url1)
200 |   readable <- map(temp, function(x) {
201 |     all(charToRaw(x[1]) <= as.raw(127))
202 |   }) %>%
203 |     unlist() %>%
204 |     all()
205 |   if (!readable) {
206 |     return(NULL)
207 |   }
208 | 
209 |   # parsing, using fread auto
210 |   temp_df <- tryCatch(
211 |     data.table::fread(text = temp), #, header = TRUE, fill = TRUE),
212 |     error = function() {
213 |       "parsing failed"
214 |     }
215 |   )
216 | 
217 |   return(temp_df)
218 | }
219 | 
220 | # load rdata to file name
221 | load_rdata <- function(file) {
222 |   env <- new.env()
223 |   nm <- load(file, envir = env)[1]
224 |   env[[nm]]
225 | }
226 | 
227 | # Plot correlation heatmap
228 | plot_hmap <- function(
229 |   cor_mat,
230 |   col = clustifyr:::not_pretty_palette,
231 |   legend_title = NULL,
232 |   ...
233 | ) {
234 |   pheatmap::pheatmap(cor_mat, color = colorRampPalette(col)(100), ...)
235 | }
236 | 
237 | # pull in someta
238 | someta <- readRDS(url(
239 |   "https://github.com/rnabioco/someta/raw/master/inst/extdata/current_geo.rds"
240 | ))
241 | someta <- someta[,] %>%
242 |   select(
243 |     id,
244 |     organism = org,
245 |     usable,
246 |     files = suppfiles,
247 |     tar_files = tarfiles,
248 |     geo,
249 |     pubmed
250 |   ) %>%
251 |   mutate(files = map_chr(files, function(x) paste0(x, collapse = "; "))) %>%
252 |   mutate(
253 |     tar_files = map_chr(tar_files, function(x) paste0(x, collapse = "; "))
254 |   ) %>%
255 |   mutate(files = ifelse(str_length(files) > 0, files, "none")) %>%
256 |   mutate(
257 |     tar_files = ifelse(
258 |       str_length(tar_files) > 0 & tar_files != "error_parse",
259 |       tar_files,
260 |       "none"
261 |     )
262 |   ) %>%
263 |   mutate(usable = factor(usable, levels = c("yes", "no"))) %>%
264 |   mutate(
265 |     summary = map_chr(geo, function(x) {
266 |       g <- tryCatch(x$summary, error = function(e) return(NULL))
267 |       if (is.null(g)) {
268 |         g <- "none"
269 |       }
270 |       if (length(g) > 0) {
271 |         g <- str_c(g, collapse = " ")
272 |       }
273 |       g
274 |     })
275 |   ) %>%
276 |   mutate(
277 |     pubmed_id = map_chr(pubmed, function(x) {
278 |       g <- tryCatch(x$pmid[1], error = function(e) return(NULL))
279 |       if (is.null(g)) {
280 |         g <- "none"
281 |       }
282 |       g
283 |     })
284 |   ) %>%
285 |   mutate(
286 |     pubmed_title = map_chr(pubmed, function(x) {
287 |       g <- tryCatch(x$title[1], error = function(e) return(NULL))
288 |       if (is.null(g)) {
289 |         g <- "none"
290 |       }
291 |       g
292 |     })
293 |   )
294 | 


--------------------------------------------------------------------------------
/inst/shinyapp/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rnabioco/clustifyr/d3521c26008fff720aac6b0135218e66fed1bf84/inst/shinyapp/logo.png


--------------------------------------------------------------------------------
/inst/shinyapp/sheet/6c91f3dd95c2217959d38f926b96d7bb_bot4rf@gmail.com:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rnabioco/clustifyr/d3521c26008fff720aac6b0135218e66fed1bf84/inst/shinyapp/sheet/6c91f3dd95c2217959d38f926b96d7bb_bot4rf@gmail.com


--------------------------------------------------------------------------------
/inst/shinyapp/www/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rnabioco/clustifyr/d3521c26008fff720aac6b0135218e66fed1bf84/inst/shinyapp/www/logo.png


--------------------------------------------------------------------------------
/man/append_genes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{append_genes}
 4 | \alias{append_genes}
 5 | \title{Given a reference matrix and a list of genes, take the union of
 6 | all genes in vector and genes in reference matrix
 7 | and insert zero counts for all remaining genes.}
 8 | \usage{
 9 | append_genes(gene_vector, ref_matrix)
10 | }
11 | \arguments{
12 | \item{gene_vector}{char vector with gene names}
13 | 
14 | \item{ref_matrix}{Reference matrix containing cell types vs.
15 | gene expression values}
16 | }
17 | \value{
18 | Reference matrix with union of all genes
19 | }
20 | \description{
21 | Given a reference matrix and a list of genes, take the union of
22 | all genes in vector and genes in reference matrix
23 | and insert zero counts for all remaining genes.
24 | }
25 | \examples{
26 | mat <- append_genes(
27 |     gene_vector = human_genes_10x,
28 |     ref_matrix = cbmc_ref
29 | )
30 | }
31 | 


--------------------------------------------------------------------------------
/man/assess_rank_bias.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{assess_rank_bias}
 4 | \alias{assess_rank_bias}
 5 | \title{Find rank bias}
 6 | \usage{
 7 | assess_rank_bias(
 8 |   avg_mat,
 9 |   ref_mat,
10 |   query_genes = NULL,
11 |   res,
12 |   organism,
13 |   plot_name = NULL,
14 |   rds_name = NULL,
15 |   expand_unassigned = FALSE
16 | )
17 | }
18 | \arguments{
19 | \item{avg_mat}{average expression matrix}
20 | 
21 | \item{ref_mat}{reference expression matrix}
22 | 
23 | \item{query_genes}{original vector of genes used to clustify}
24 | 
25 | \item{res}{dataframe of idents, such as output of cor_to_call}
26 | 
27 | \item{organism}{for GO term analysis, organism name: human - 'hsapiens', mouse - 'mmusculus'}
28 | 
29 | \item{plot_name}{name for saved pdf, if NULL then no file is written (default)}
30 | 
31 | \item{rds_name}{name for saved rds of rank_diff, if NULL then no file is written (default)}
32 | 
33 | \item{expand_unassigned}{test all ref clusters for unassigned results}
34 | }
35 | \value{
36 | pdf of ggplot object
37 | }
38 | \description{
39 | Find rank bias
40 | }
41 | \examples{
42 | \dontrun{
43 | avg <- average_clusters(
44 |     pbmc_matrix_small,
45 |     pbmc_meta$seurat_clusters
46 | )
47 | res <- clustify(
48 |     input = pbmc_matrix_small,
49 |     metadata = pbmc_meta,
50 |     ref_mat = cbmc_ref,
51 |     query_genes = pbmc_vargenes,
52 |     cluster_col = "seurat_clusters"
53 | )
54 | top_call <- cor_to_call(
55 |     res,
56 |     metadata = pbmc_meta,
57 |     cluster_col = "seurat_clusters",
58 |     collapse_to_cluster = FALSE,
59 |     threshold = 0.8
60 | )
61 | res_rank <- assess_rank_bias(
62 |     avg,
63 |     cbmc_ref,
64 |     res = top_call
65 | )
66 | }
67 | }
68 | 


--------------------------------------------------------------------------------
/man/assign_ident.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{assign_ident}
 4 | \alias{assign_ident}
 5 | \title{manually change idents as needed}
 6 | \usage{
 7 | assign_ident(
 8 |   metadata,
 9 |   cluster_col = "cluster",
10 |   ident_col = "type",
11 |   clusters,
12 |   idents
13 | )
14 | }
15 | \arguments{
16 | \item{metadata}{column of ident}
17 | 
18 | \item{cluster_col}{column in metadata containing cluster info}
19 | 
20 | \item{ident_col}{column in metadata containing identity assignment}
21 | 
22 | \item{clusters}{names of clusters to change, string or
23 | vector of strings}
24 | 
25 | \item{idents}{new idents to assign, must be length of 1 or
26 | same as clusters}
27 | }
28 | \value{
29 | new dataframe of metadata
30 | }
31 | \description{
32 | manually change idents as needed
33 | }
34 | 


--------------------------------------------------------------------------------
/man/average_clusters.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{average_clusters}
 4 | \alias{average_clusters}
 5 | \title{Average expression values per cluster}
 6 | \usage{
 7 | average_clusters(
 8 |   mat,
 9 |   metadata,
10 |   cluster_col = "cluster",
11 |   if_log = TRUE,
12 |   cell_col = NULL,
13 |   low_threshold = 0,
14 |   method = "mean",
15 |   output_log = TRUE,
16 |   subclusterpower = 0,
17 |   cut_n = NULL
18 | )
19 | }
20 | \arguments{
21 | \item{mat}{expression matrix}
22 | 
23 | \item{metadata}{data.frame or vector containing cluster assignments per cell.
24 | Order must match column order in supplied matrix. If a data.frame
25 | provide the cluster_col parameters.}
26 | 
27 | \item{cluster_col}{column in metadata with cluster number}
28 | 
29 | \item{if_log}{input data is natural log,
30 | averaging will be done on unlogged data}
31 | 
32 | \item{cell_col}{if provided, will reorder matrix first}
33 | 
34 | \item{low_threshold}{option to remove clusters with too few cells}
35 | 
36 | \item{method}{whether to take mean (default), median, 10\% truncated mean, or trimean, max, min}
37 | 
38 | \item{output_log}{whether to report log results}
39 | 
40 | \item{subclusterpower}{whether to get multiple averages per original cluster}
41 | 
42 | \item{cut_n}{set on a limit of genes as expressed, lower ranked genes
43 | are set to 0, considered unexpressed}
44 | }
45 | \value{
46 | average expression matrix, with genes for row names, and clusters
47 | for column names
48 | }
49 | \description{
50 | Average expression values per cluster
51 | }
52 | \examples{
53 | mat <- average_clusters(
54 |     mat = pbmc_matrix_small,
55 |     metadata = pbmc_meta,
56 |     cluster_col = "classified",
57 |     if_log = FALSE
58 | )
59 | mat[1:3, 1:3]
60 | }
61 | 


--------------------------------------------------------------------------------
/man/binarize_expr.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compare_genelist.R
 3 | \name{binarize_expr}
 4 | \alias{binarize_expr}
 5 | \title{Binarize scRNAseq data}
 6 | \usage{
 7 | binarize_expr(mat, n = 1000, cut = 0)
 8 | }
 9 | \arguments{
10 | \item{mat}{single-cell expression matrix}
11 | 
12 | \item{n}{number of top expressing genes to keep}
13 | 
14 | \item{cut}{cut off to set to 0}
15 | }
16 | \value{
17 | matrix of 1s and 0s
18 | }
19 | \description{
20 | Binarize scRNAseq data
21 | }
22 | \examples{
23 | pbmc_avg <- average_clusters(
24 |     mat = pbmc_matrix_small,
25 |     metadata = pbmc_meta,
26 |     cluster_col = "classified"
27 | )
28 | 
29 | mat <- binarize_expr(pbmc_avg)
30 | mat[1:3, 1:3]
31 | }
32 | 


--------------------------------------------------------------------------------
/man/build_atlas.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{build_atlas}
 4 | \alias{build_atlas}
 5 | \title{Function to combine records into single atlas}
 6 | \usage{
 7 | build_atlas(matrix_fns = NULL, genes_fn, matrix_objs = NULL, output_fn = NULL)
 8 | }
 9 | \arguments{
10 | \item{matrix_fns}{character vector of paths to study matrices stored as .rds files.
11 | If a named character vector, then the name will be added as a suffix to the cell type
12 | name in the final matrix. If it is not named, then the filename will be used (without .rds)}
13 | 
14 | \item{genes_fn}{text file with a single column containing genes and the ordering desired
15 | in the output matrix}
16 | 
17 | \item{matrix_objs}{Checks to see whether .rds files will be read or R objects in a
18 | local environment. A list of environmental objects can be passed to
19 | matrx_objs, and that names will be used, otherwise defaults to numbers}
20 | 
21 | \item{output_fn}{output filename for .rds file. If NULL the matrix will be returned instead of
22 | saving}
23 | }
24 | \value{
25 | Combined matrix with all genes given
26 | }
27 | \description{
28 | Function to combine records into single atlas
29 | }
30 | \examples{
31 | pbmc_ref_matrix <- average_clusters(
32 |     mat = pbmc_matrix_small,
33 |     metadata = pbmc_meta,
34 |     cluster_col = "classified",
35 |     if_log = TRUE # whether the expression matrix is already log transformed
36 | )
37 | references_to_combine <- list(pbmc_ref_matrix, cbmc_ref)
38 | atlas <- build_atlas(NULL, human_genes_10x, references_to_combine, NULL)
39 | }
40 | 


--------------------------------------------------------------------------------
/man/calc_distance.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{calc_distance}
 4 | \alias{calc_distance}
 5 | \title{Distance calculations for spatial coord}
 6 | \usage{
 7 | calc_distance(
 8 |   coord,
 9 |   metadata,
10 |   cluster_col = "cluster",
11 |   collapse_to_cluster = FALSE
12 | )
13 | }
14 | \arguments{
15 | \item{coord}{dataframe or matrix of spatial coordinates, cell barcode as rownames}
16 | 
17 | \item{metadata}{data.frame or vector containing cluster assignments per cell.
18 | Order must match column order in supplied matrix. If a data.frame
19 | provide the cluster_col parameters.}
20 | 
21 | \item{cluster_col}{column in metadata with cluster number}
22 | 
23 | \item{collapse_to_cluster}{instead of reporting min distance to cluster per cell, summarize to cluster level}
24 | }
25 | \value{
26 | min distance matrix
27 | }
28 | \description{
29 | Distance calculations for spatial coord
30 | }
31 | \examples{
32 | cbs <- paste0("cb_", 1:100)
33 | 
34 | spatial_coords <- data.frame(
35 |     row.names = cbs,
36 |     X = runif(100),
37 |     Y = runif(100)
38 | )
39 | group_ids <- sample(c("A", "B"), 100, replace = TRUE)
40 | dist_res <- calc_distance(
41 |     spatial_coords,
42 |     group_ids
43 | )
44 | }
45 | 


--------------------------------------------------------------------------------
/man/calc_similarity.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compute_similarity.R
 3 | \name{calc_similarity}
 4 | \alias{calc_similarity}
 5 | \title{compute similarity}
 6 | \usage{
 7 | calc_similarity(query_mat, ref_mat, compute_method, rm0 = FALSE, ...)
 8 | }
 9 | \arguments{
10 | \item{query_mat}{query data matrix}
11 | 
12 | \item{ref_mat}{reference data matrix}
13 | 
14 | \item{compute_method}{method(s) for computing similarity scores}
15 | 
16 | \item{rm0}{consider 0 as missing data, recommended for per_cell}
17 | 
18 | \item{...}{additional parameters}
19 | }
20 | \value{
21 | matrix of numeric values
22 | }
23 | \description{
24 | compute similarity
25 | }
26 | 


--------------------------------------------------------------------------------
/man/calculate_pathway_gsea.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{calculate_pathway_gsea}
 4 | \alias{calculate_pathway_gsea}
 5 | \title{Convert expression matrix to GSEA pathway scores
 6 | (would take a similar place in workflow before average_clusters/binarize)}
 7 | \usage{
 8 | calculate_pathway_gsea(
 9 |   mat,
10 |   pathway_list,
11 |   n_perm = 1000,
12 |   scale = TRUE,
13 |   no_warnings = TRUE
14 | )
15 | }
16 | \arguments{
17 | \item{mat}{expression matrix}
18 | 
19 | \item{pathway_list}{a list of vectors, each named for a specific pathway,
20 | or dataframe}
21 | 
22 | \item{n_perm}{Number of permutation for fgsea function. Defaults to 1000.}
23 | 
24 | \item{scale}{convert expr_mat into zscores prior to running GSEA?,
25 | default = FALSE}
26 | 
27 | \item{no_warnings}{suppress warnings from gsea ties}
28 | }
29 | \value{
30 | matrix of GSEA NES values, cell types as row names,
31 | pathways as column names
32 | }
33 | \description{
34 | Convert expression matrix to GSEA pathway scores
35 | (would take a similar place in workflow before average_clusters/binarize)
36 | }
37 | \examples{
38 | gl <- list(
39 |     "n" = c("PPBP", "LYZ", "S100A9"),
40 |     "a" = c("IGLL5", "GNLY", "FTL")
41 | )
42 | 
43 | pbmc_avg <- average_clusters(
44 |     mat = pbmc_matrix_small,
45 |     metadata = pbmc_meta,
46 |     cluster_col = "classified"
47 | )
48 | 
49 | calculate_pathway_gsea(
50 |     mat = pbmc_avg,
51 |     pathway_list = gl
52 | )
53 | }
54 | 


--------------------------------------------------------------------------------
/man/call_consensus.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/common_dplyr.R
 3 | \name{call_consensus}
 4 | \alias{call_consensus}
 5 | \title{get concensus calls for a list of cor calls}
 6 | \usage{
 7 | call_consensus(list_of_res)
 8 | }
 9 | \arguments{
10 | \item{list_of_res}{list of call dataframes from cor_to_call_rank}
11 | }
12 | \value{
13 | dataframe of cluster, new ident, and mean rank
14 | }
15 | \description{
16 | get concensus calls for a list of cor calls
17 | }
18 | \examples{
19 | res <- clustify(
20 |     input = pbmc_matrix_small,
21 |     metadata = pbmc_meta,
22 |     cluster_col = "classified",
23 |     ref_mat = cbmc_ref
24 | )
25 | 
26 | res2 <- cor_to_call_rank(res, threshold = "auto")
27 | res3 <- cor_to_call_rank(res)
28 | call_consensus(list(res2, res3))
29 | }
30 | 


--------------------------------------------------------------------------------
/man/call_to_metadata.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/common_dplyr.R
 3 | \name{call_to_metadata}
 4 | \alias{call_to_metadata}
 5 | \title{Insert called ident results into metadata}
 6 | \usage{
 7 | call_to_metadata(
 8 |   res,
 9 |   metadata,
10 |   cluster_col,
11 |   per_cell = FALSE,
12 |   rename_prefix = NULL
13 | )
14 | }
15 | \arguments{
16 | \item{res}{dataframe of idents, such as output of cor_to_call}
17 | 
18 | \item{metadata}{input metadata with tsne or umap coordinates and cluster ids}
19 | 
20 | \item{cluster_col}{metadata column, can be cluster or cellid}
21 | 
22 | \item{per_cell}{whether the res dataframe is listed per cell}
23 | 
24 | \item{rename_prefix}{prefix to add to type and r column names}
25 | }
26 | \value{
27 | new metadata with added columns
28 | }
29 | \description{
30 | Insert called ident results into metadata
31 | }
32 | \examples{
33 | res <- clustify(
34 |     input = pbmc_matrix_small,
35 |     metadata = pbmc_meta,
36 |     cluster_col = "classified",
37 |     ref_mat = cbmc_ref
38 | )
39 | 
40 | res2 <- cor_to_call(res, cluster_col = "classified")
41 | 
42 | call_to_metadata(
43 |     res = res2,
44 |     metadata = pbmc_meta,
45 |     cluster_col = "classified",
46 |     rename_prefix = "assigned"
47 | )
48 | }
49 | 


--------------------------------------------------------------------------------
/man/cbmc_m.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{cbmc_m}
 5 | \alias{cbmc_m}
 6 | \title{reference marker matrix from seurat citeseq CBMC tutorial}
 7 | \format{
 8 | An object of class \code{data.frame} with 3 rows and 13 columns.
 9 | }
10 | \source{
11 | \url{https://satijalab.org/seurat/v3.0/multimodal_vignette.html#identify-differentially-expressed-proteins-between-clusters}
12 | }
13 | \usage{
14 | cbmc_m
15 | }
16 | \description{
17 | reference marker matrix from seurat citeseq CBMC tutorial
18 | }
19 | \seealso{
20 | Other data: 
21 | \code{\link{cbmc_ref}},
22 | \code{\link{downrefs}},
23 | \code{\link{human_genes_10x}},
24 | \code{\link{mouse_genes_10x}},
25 | \code{\link{pbmc_markers}},
26 | \code{\link{pbmc_markers_M3Drop}},
27 | \code{\link{pbmc_matrix_small}},
28 | \code{\link{pbmc_meta}},
29 | \code{\link{pbmc_vargenes}}
30 | }
31 | \concept{data}
32 | \keyword{datasets}
33 | 


--------------------------------------------------------------------------------
/man/cbmc_ref.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{cbmc_ref}
 5 | \alias{cbmc_ref}
 6 | \title{reference matrix from seurat citeseq CBMC tutorial}
 7 | \format{
 8 | An object of class \code{matrix} (inherits from \code{array}) with 2000 rows and 13 columns.
 9 | }
10 | \source{
11 | \url{https://satijalab.org/seurat/v3.0/multimodal_vignette.html#identify-differentially-expressed-proteins-between-clusters}
12 | }
13 | \usage{
14 | cbmc_ref
15 | }
16 | \description{
17 | reference matrix from seurat citeseq CBMC tutorial
18 | }
19 | \seealso{
20 | Other data: 
21 | \code{\link{cbmc_m}},
22 | \code{\link{downrefs}},
23 | \code{\link{human_genes_10x}},
24 | \code{\link{mouse_genes_10x}},
25 | \code{\link{pbmc_markers}},
26 | \code{\link{pbmc_markers_M3Drop}},
27 | \code{\link{pbmc_matrix_small}},
28 | \code{\link{pbmc_meta}},
29 | \code{\link{pbmc_vargenes}}
30 | }
31 | \concept{data}
32 | \keyword{datasets}
33 | 


--------------------------------------------------------------------------------
/man/check_raw_counts.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{check_raw_counts}
 4 | \alias{check_raw_counts}
 5 | \title{Given a count matrix, determine if the matrix has been either
 6 | log-normalized, normalized, or contains raw counts}
 7 | \usage{
 8 | check_raw_counts(counts_matrix, max_log_value = 50)
 9 | }
10 | \arguments{
11 | \item{counts_matrix}{Count matrix containing scRNA-seq read data}
12 | 
13 | \item{max_log_value}{Static value to determine if a matrix is normalized}
14 | }
15 | \value{
16 | String either raw counts, log-normalized or normalized
17 | }
18 | \description{
19 | Given a count matrix, determine if the matrix has been either
20 | log-normalized, normalized, or contains raw counts
21 | }
22 | \examples{
23 | check_raw_counts(pbmc_matrix_small)
24 | }
25 | 


--------------------------------------------------------------------------------
/man/clustify.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/main.R
  3 | \name{clustify}
  4 | \alias{clustify}
  5 | \alias{clustify.default}
  6 | \alias{clustify.Seurat}
  7 | \alias{clustify.SingleCellExperiment}
  8 | \title{Compare scRNA-seq data to reference data.}
  9 | \usage{
 10 | clustify(input, ...)
 11 | 
 12 | \method{clustify}{default}(
 13 |   input,
 14 |   ref_mat,
 15 |   metadata = NULL,
 16 |   cluster_col = NULL,
 17 |   query_genes = NULL,
 18 |   n_genes = 1000,
 19 |   per_cell = FALSE,
 20 |   n_perm = 0,
 21 |   compute_method = "spearman",
 22 |   pseudobulk_method = "mean",
 23 |   verbose = TRUE,
 24 |   lookuptable = NULL,
 25 |   rm0 = FALSE,
 26 |   obj_out = TRUE,
 27 |   seurat_out = obj_out,
 28 |   vec_out = FALSE,
 29 |   rename_prefix = NULL,
 30 |   threshold = "auto",
 31 |   low_threshold_cell = 0,
 32 |   exclude_genes = c(),
 33 |   if_log = TRUE,
 34 |   organism = "hsapiens",
 35 |   plot_name = NULL,
 36 |   rds_name = NULL,
 37 |   expand_unassigned = FALSE,
 38 |   ...
 39 | )
 40 | 
 41 | \method{clustify}{Seurat}(
 42 |   input,
 43 |   ref_mat,
 44 |   cluster_col = NULL,
 45 |   query_genes = NULL,
 46 |   n_genes = 1000,
 47 |   per_cell = FALSE,
 48 |   n_perm = 0,
 49 |   compute_method = "spearman",
 50 |   pseudobulk_method = "mean",
 51 |   use_var_genes = TRUE,
 52 |   dr = "umap",
 53 |   obj_out = TRUE,
 54 |   seurat_out = obj_out,
 55 |   vec_out = FALSE,
 56 |   threshold = "auto",
 57 |   verbose = TRUE,
 58 |   rm0 = FALSE,
 59 |   rename_prefix = NULL,
 60 |   exclude_genes = c(),
 61 |   metadata = NULL,
 62 |   organism = "hsapiens",
 63 |   plot_name = NULL,
 64 |   rds_name = NULL,
 65 |   expand_unassigned = FALSE,
 66 |   ...
 67 | )
 68 | 
 69 | \method{clustify}{SingleCellExperiment}(
 70 |   input,
 71 |   ref_mat,
 72 |   cluster_col = NULL,
 73 |   query_genes = NULL,
 74 |   per_cell = FALSE,
 75 |   n_perm = 0,
 76 |   compute_method = "spearman",
 77 |   pseudobulk_method = "mean",
 78 |   use_var_genes = TRUE,
 79 |   dr = "umap",
 80 |   obj_out = TRUE,
 81 |   seurat_out = obj_out,
 82 |   vec_out = FALSE,
 83 |   threshold = "auto",
 84 |   verbose = TRUE,
 85 |   rm0 = FALSE,
 86 |   rename_prefix = NULL,
 87 |   exclude_genes = c(),
 88 |   metadata = NULL,
 89 |   organism = "hsapiens",
 90 |   plot_name = NULL,
 91 |   rds_name = NULL,
 92 |   expand_unassigned = FALSE,
 93 |   ...
 94 | )
 95 | }
 96 | \arguments{
 97 | \item{input}{single-cell expression matrix or Seurat object}
 98 | 
 99 | \item{...}{additional arguments to pass to compute_method function}
100 | 
101 | \item{ref_mat}{reference expression matrix}
102 | 
103 | \item{metadata}{cell cluster assignments,
104 | supplied as a vector or data.frame.
105 | If data.frame is supplied then \code{cluster_col} needs to be set.
106 | Not required if running correlation per cell.}
107 | 
108 | \item{cluster_col}{column in metadata that contains cluster ids per cell.
109 | Will default to first column of metadata if not supplied.
110 | Not required if running correlation per cell.}
111 | 
112 | \item{query_genes}{A vector of genes of interest to compare. If NULL, then
113 | common genes between the expr_mat and ref_mat
114 | will be used for comparision.}
115 | 
116 | \item{n_genes}{number of genes limit for Seurat variable genes, by default 1000,
117 | set to 0 to use all variable genes (generally not recommended)}
118 | 
119 | \item{per_cell}{if true run per cell, otherwise per cluster.}
120 | 
121 | \item{n_perm}{number of permutations, set to 0 by default}
122 | 
123 | \item{compute_method}{method(s) for computing similarity scores}
124 | 
125 | \item{pseudobulk_method}{method used for summarizing clusters, options are mean (default), median, truncate (10\% truncated mean), or trimean, max, min}
126 | 
127 | \item{verbose}{whether to report certain variables chosen and steps}
128 | 
129 | \item{lookuptable}{if not supplied, will look in built-in table
130 | for object parsing}
131 | 
132 | \item{rm0}{consider 0 as missing data, recommended for per_cell}
133 | 
134 | \item{obj_out}{whether to output object instead of cor matrix}
135 | 
136 | \item{seurat_out}{output cor matrix or called seurat object
137 | (deprecated, use obj_out instead)}
138 | 
139 | \item{vec_out}{only output a result vector in the same order as metadata}
140 | 
141 | \item{rename_prefix}{prefix to add to type and r column names}
142 | 
143 | \item{threshold}{identity calling minimum correlation score threshold,
144 | only used when obj_out = TRUE}
145 | 
146 | \item{low_threshold_cell}{option to remove clusters with too few cells}
147 | 
148 | \item{exclude_genes}{a vector of gene names to throw out of query}
149 | 
150 | \item{if_log}{input data is natural log,
151 | averaging will be done on unlogged data}
152 | 
153 | \item{organism}{for GO term analysis, organism name: human - 'hsapiens', mouse - 'mmusculus'}
154 | 
155 | \item{plot_name}{name for saved pdf, if NULL then no file is written (default)}
156 | 
157 | \item{rds_name}{name for saved rds of rank_diff, if NULL then no file is written (default)}
158 | 
159 | \item{expand_unassigned}{test all ref clusters for unassigned results}
160 | 
161 | \item{use_var_genes}{if providing a seurat object, use the variable genes
162 | (stored in seurat_object@var.genes) as the query_genes.}
163 | 
164 | \item{dr}{stored dimension reduction}
165 | }
166 | \value{
167 | single cell object with identity assigned in metadata,
168 | or matrix of correlation values, clusters from input as row names, cell
169 | types from ref_mat as column names
170 | }
171 | \description{
172 | Compare scRNA-seq data to reference data.
173 | }
174 | \examples{
175 | # Annotate a matrix and metadata
176 | clustify(
177 |     input = pbmc_matrix_small,
178 |     metadata = pbmc_meta,
179 |     ref_mat = cbmc_ref,
180 |     query_genes = pbmc_vargenes,
181 |     cluster_col = "RNA_snn_res.0.5",
182 |     verbose = TRUE
183 | )
184 | 
185 | # Annotate using a different method
186 | clustify(
187 |     input = pbmc_matrix_small,
188 |     metadata = pbmc_meta,
189 |     ref_mat = cbmc_ref,
190 |     query_genes = pbmc_vargenes,
191 |     cluster_col = "RNA_snn_res.0.5",
192 |     compute_method = "cosine"
193 | )
194 | 
195 | # Annotate a SingleCellExperiment object
196 | sce <- sce_pbmc()
197 | clustify(
198 |     sce,
199 |     cbmc_ref,
200 |     cluster_col = "clusters",
201 |     obj_out = TRUE,
202 |     per_cell = FALSE,
203 |     dr = "umap"
204 | )
205 | 
206 | # Annotate a Seurat object
207 | so <- so_pbmc()
208 | clustify(
209 |     so,
210 |     cbmc_ref,
211 |     cluster_col = "seurat_clusters",
212 |     obj_out = TRUE,
213 |     per_cell = FALSE,
214 |     dr = "umap"
215 | )
216 | 
217 | # Annotate (and return) a Seurat object per-cell
218 | clustify(
219 |     input = so,
220 |     ref_mat = cbmc_ref,
221 |     cluster_col = "seurat_clusters",
222 |     obj_out = TRUE,
223 |     per_cell = TRUE,
224 |     dr = "umap"
225 | )
226 | }
227 | 


--------------------------------------------------------------------------------
/man/clustify_lists.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/main.R
  3 | \name{clustify_lists}
  4 | \alias{clustify_lists}
  5 | \alias{clustify_lists.default}
  6 | \alias{clustify_lists.Seurat}
  7 | \alias{clustify_lists.SingleCellExperiment}
  8 | \title{Main function to compare scRNA-seq data to gene lists.}
  9 | \usage{
 10 | clustify_lists(input, ...)
 11 | 
 12 | \method{clustify_lists}{default}(
 13 |   input,
 14 |   marker,
 15 |   marker_inmatrix = TRUE,
 16 |   metadata = NULL,
 17 |   cluster_col = NULL,
 18 |   if_log = TRUE,
 19 |   per_cell = FALSE,
 20 |   topn = 800,
 21 |   cut = 0,
 22 |   genome_n = 30000,
 23 |   metric = "hyper",
 24 |   output_high = TRUE,
 25 |   lookuptable = NULL,
 26 |   obj_out = TRUE,
 27 |   seurat_out = obj_out,
 28 |   vec_out = FALSE,
 29 |   rename_prefix = NULL,
 30 |   threshold = 0,
 31 |   low_threshold_cell = 0,
 32 |   verbose = TRUE,
 33 |   input_markers = FALSE,
 34 |   details_out = FALSE,
 35 |   ...
 36 | )
 37 | 
 38 | \method{clustify_lists}{Seurat}(
 39 |   input,
 40 |   metadata = NULL,
 41 |   cluster_col = NULL,
 42 |   if_log = TRUE,
 43 |   per_cell = FALSE,
 44 |   topn = 800,
 45 |   cut = 0,
 46 |   marker,
 47 |   marker_inmatrix = TRUE,
 48 |   genome_n = 30000,
 49 |   metric = "hyper",
 50 |   output_high = TRUE,
 51 |   dr = "umap",
 52 |   obj_out = TRUE,
 53 |   seurat_out = obj_out,
 54 |   vec_out = FALSE,
 55 |   threshold = 0,
 56 |   rename_prefix = NULL,
 57 |   verbose = TRUE,
 58 |   details_out = FALSE,
 59 |   ...
 60 | )
 61 | 
 62 | \method{clustify_lists}{SingleCellExperiment}(
 63 |   input,
 64 |   metadata = NULL,
 65 |   cluster_col = NULL,
 66 |   if_log = TRUE,
 67 |   per_cell = FALSE,
 68 |   topn = 800,
 69 |   cut = 0,
 70 |   marker,
 71 |   marker_inmatrix = TRUE,
 72 |   genome_n = 30000,
 73 |   metric = "hyper",
 74 |   output_high = TRUE,
 75 |   dr = "umap",
 76 |   obj_out = TRUE,
 77 |   seurat_out = obj_out,
 78 |   vec_out = FALSE,
 79 |   threshold = 0,
 80 |   rename_prefix = NULL,
 81 |   verbose = TRUE,
 82 |   details_out = FALSE,
 83 |   ...
 84 | )
 85 | }
 86 | \arguments{
 87 | \item{input}{single-cell expression matrix, Seurat object, or SingleCellExperiment}
 88 | 
 89 | \item{...}{passed to matrixize_markers}
 90 | 
 91 | \item{marker}{matrix or dataframe of candidate genes for each cluster}
 92 | 
 93 | \item{marker_inmatrix}{whether markers genes are already in preprocessed
 94 | matrix form}
 95 | 
 96 | \item{metadata}{cell cluster assignments,
 97 | supplied as a vector or data.frame.
 98 | If data.frame is supplied then \code{cluster_col} needs to be set.
 99 | Not required if running correlation per cell.}
100 | 
101 | \item{cluster_col}{column in metadata with cluster number}
102 | 
103 | \item{if_log}{input data is natural log, averaging will be done on
104 | unlogged data}
105 | 
106 | \item{per_cell}{compare per cell or per cluster}
107 | 
108 | \item{topn}{number of top expressing genes to keep from input matrix}
109 | 
110 | \item{cut}{expression cut off from input matrix}
111 | 
112 | \item{genome_n}{number of genes in the genome}
113 | 
114 | \item{metric}{adjusted p-value for hypergeometric test, or jaccard index}
115 | 
116 | \item{output_high}{if true (by default to fit with rest of package),
117 | -log10 transform p-value}
118 | 
119 | \item{lookuptable}{if not supplied, will look in built-in table
120 | for object parsing}
121 | 
122 | \item{obj_out}{whether to output object instead of cor matrix}
123 | 
124 | \item{seurat_out}{output cor matrix or called seurat object
125 | (deprecated, use obj_out instead)}
126 | 
127 | \item{vec_out}{only output a result vector in the same order as metadata}
128 | 
129 | \item{rename_prefix}{prefix to add to type and r column names}
130 | 
131 | \item{threshold}{identity calling minimum correlation score threshold,
132 | only used when obj_out = T}
133 | 
134 | \item{low_threshold_cell}{option to remove clusters with too few cells}
135 | 
136 | \item{verbose}{whether to report certain variables chosen and steps}
137 | 
138 | \item{input_markers}{whether input is marker data.frame of 0 and 1s (output of pos_neg_marker), and uses alternate enrichment mode}
139 | 
140 | \item{details_out}{whether to also output shared gene list from jaccard}
141 | 
142 | \item{dr}{stored dimension reduction}
143 | }
144 | \value{
145 | matrix of numeric values, clusters from input as row names,
146 | cell types from marker_mat as column names
147 | }
148 | \description{
149 | Main function to compare scRNA-seq data to gene lists.
150 | }
151 | \examples{
152 | # Annotate a matrix and metadata
153 | 
154 | # Annotate using a different method
155 | clustify_lists(
156 |     input = pbmc_matrix_small,
157 |     marker = cbmc_m,
158 |     metadata = pbmc_meta,
159 |     cluster_col = "classified",
160 |     verbose = TRUE,
161 |     metric = "jaccard"
162 | )
163 | }
164 | 


--------------------------------------------------------------------------------
/man/clustify_nudge.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/utils.R
  3 | \name{clustify_nudge}
  4 | \alias{clustify_nudge}
  5 | \alias{clustify_nudge.default}
  6 | \alias{clustify_nudge.Seurat}
  7 | \title{Combined function to compare scRNA-seq data to
  8 | bulk RNA-seq data and marker list}
  9 | \usage{
 10 | clustify_nudge(input, ...)
 11 | 
 12 | \method{clustify_nudge}{default}(
 13 |   input,
 14 |   ref_mat,
 15 |   marker,
 16 |   metadata = NULL,
 17 |   cluster_col = NULL,
 18 |   query_genes = NULL,
 19 |   compute_method = "spearman",
 20 |   weight = 1,
 21 |   threshold = -Inf,
 22 |   dr = "umap",
 23 |   norm = "diff",
 24 |   call = TRUE,
 25 |   marker_inmatrix = TRUE,
 26 |   mode = "rank",
 27 |   obj_out = FALSE,
 28 |   seurat_out = obj_out,
 29 |   rename_prefix = NULL,
 30 |   lookuptable = NULL,
 31 |   ...
 32 | )
 33 | 
 34 | \method{clustify_nudge}{Seurat}(
 35 |   input,
 36 |   ref_mat,
 37 |   marker,
 38 |   cluster_col = NULL,
 39 |   query_genes = NULL,
 40 |   compute_method = "spearman",
 41 |   weight = 1,
 42 |   obj_out = TRUE,
 43 |   seurat_out = obj_out,
 44 |   threshold = -Inf,
 45 |   dr = "umap",
 46 |   norm = "diff",
 47 |   marker_inmatrix = TRUE,
 48 |   mode = "rank",
 49 |   rename_prefix = NULL,
 50 |   ...
 51 | )
 52 | }
 53 | \arguments{
 54 | \item{input}{express matrix or object}
 55 | 
 56 | \item{...}{passed to matrixize_markers}
 57 | 
 58 | \item{ref_mat}{reference expression matrix}
 59 | 
 60 | \item{marker}{matrix of markers}
 61 | 
 62 | \item{metadata}{cell cluster assignments, supplied as a vector
 63 | or data.frame. If
 64 | data.frame is supplied then \code{cluster_col} needs to be set.}
 65 | 
 66 | \item{cluster_col}{column in metadata that contains cluster ids per cell.
 67 | Will default to first
 68 | column of metadata if not supplied.
 69 | Not required if running correlation per cell.}
 70 | 
 71 | \item{query_genes}{A vector of genes of interest to compare.
 72 | If NULL, then common genes between
 73 | the expr_mat and ref_mat will be used for comparision.}
 74 | 
 75 | \item{compute_method}{method(s) for computing similarity scores}
 76 | 
 77 | \item{weight}{relative weight for the gene list scores,
 78 | when added to correlation score}
 79 | 
 80 | \item{threshold}{identity calling minimum score threshold,
 81 | only used when obj_out = T}
 82 | 
 83 | \item{dr}{stored dimension reduction}
 84 | 
 85 | \item{norm}{whether and how the results are normalized}
 86 | 
 87 | \item{call}{make call or just return score matrix}
 88 | 
 89 | \item{marker_inmatrix}{whether markers genes are already
 90 | in preprocessed matrix form}
 91 | 
 92 | \item{mode}{use marker expression pct or ranked cor score for nudging}
 93 | 
 94 | \item{obj_out}{whether to output object instead of cor matrix}
 95 | 
 96 | \item{seurat_out}{output cor matrix or called seurat object (deprecated, use obj_out)}
 97 | 
 98 | \item{rename_prefix}{prefix to add to type and r column names}
 99 | 
100 | \item{lookuptable}{if not supplied, will look in built-in
101 | table for object parsing}
102 | }
103 | \value{
104 | single cell object, or matrix of numeric values,
105 | clusters from input as row names, cell types from ref_mat as column names
106 | }
107 | \description{
108 | Combined function to compare scRNA-seq data to
109 | bulk RNA-seq data and marker list
110 | }
111 | \examples{
112 | 
113 | # Seurat
114 | so <- so_pbmc()
115 | clustify_nudge(
116 |     input = so,
117 |     ref_mat = cbmc_ref,
118 |     marker = cbmc_m,
119 |     cluster_col = "seurat_clusters",
120 |     threshold = 0.8,
121 |     obj_out = FALSE,
122 |     mode = "pct",
123 |     dr = "umap"
124 | )
125 | 
126 | # Matrix
127 | clustify_nudge(
128 |     input = pbmc_matrix_small,
129 |     ref_mat = cbmc_ref,
130 |     metadata = pbmc_meta,
131 |     marker = as.matrix(cbmc_m),
132 |     query_genes = pbmc_vargenes,
133 |     cluster_col = "classified",
134 |     threshold = 0.8,
135 |     call = FALSE,
136 |     marker_inmatrix = FALSE,
137 |     mode = "pct"
138 | )
139 | }
140 | 


--------------------------------------------------------------------------------
/man/clustifyr-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/clustifyR-package.R
 3 | \docType{package}
 4 | \name{clustifyr-package}
 5 | \alias{clustifyr}
 6 | \alias{clustifyr-package}
 7 | \title{clustifyr: Classifier for Single-cell RNA-seq Using Cell Clusters}
 8 | \description{
 9 | Package designed to aid in classifying cells from single-cell RNA sequencing data using external reference data (e.g., bulk RNA-seq, scRNA-seq, microarray, gene lists). A variety of correlation based methods and gene list enrichment methods are provided to assist cell type assignment.
10 | }
11 | \seealso{
12 | Useful links:
13 | \itemize{
14 |   \item \url{https://github.com/rnabioco/clustifyr}
15 |   \item \url{https://rnabioco.github.io/clustifyr/}
16 |   \item Report bugs at \url{https://github.com/rnabioco/clustifyr/issues}
17 | }
18 | 
19 | }
20 | \author{
21 | \strong{Maintainer}: Rui Fu \email{ray.sinensis@gmail.com}
22 | 
23 | Authors:
24 | \itemize{
25 |   \item Kent Riemondy \email{kent.riemondy@gmail.com}
26 | }
27 | 
28 | Other contributors:
29 | \itemize{
30 |   \item Austin Gillen \email{austin.gillen@ucdenver.edu} [contributor]
31 |   \item Chengzhe Tian \email{Chengzhe.Tian@colorado.edu} [contributor]
32 |   \item Jay Hesselberth \email{jay.hesselberth@gmail.com} [contributor]
33 |   \item Yue Hao \email{haoyuethink@gmail.com} [contributor]
34 |   \item Michelle Daya \email{michelle.daya@ucdenver.edu} [contributor]
35 |   \item Sidhant Puntambekar \email{sidhantnp@yahoo.com} [contributor]
36 |   \item RNA Bioscience Initiative [funder, copyright holder]
37 | }
38 | 
39 | }
40 | \keyword{internal}
41 | 


--------------------------------------------------------------------------------
/man/clustifyr_methods.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/main.R
 3 | \docType{data}
 4 | \name{clustifyr_methods}
 5 | \alias{clustifyr_methods}
 6 | \title{Correlation functions available in clustifyr}
 7 | \format{
 8 | An object of class \code{character} of length 5.
 9 | }
10 | \usage{
11 | clustifyr_methods
12 | }
13 | \description{
14 | Correlation functions available in clustifyr
15 | }
16 | \examples{
17 | clustifyr_methods
18 | }
19 | \keyword{datasets}
20 | 


--------------------------------------------------------------------------------
/man/collapse_to_cluster.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/common_dplyr.R
 3 | \name{collapse_to_cluster}
 4 | \alias{collapse_to_cluster}
 5 | \title{From per-cell calls, take highest freq call in each cluster}
 6 | \usage{
 7 | collapse_to_cluster(res, metadata, cluster_col, threshold = 0)
 8 | }
 9 | \arguments{
10 | \item{res}{dataframe of idents, such as output of cor_to_call}
11 | 
12 | \item{metadata}{input metadata with tsne or umap coordinates and cluster ids}
13 | 
14 | \item{cluster_col}{metadata column for cluster}
15 | 
16 | \item{threshold}{minimum correlation coefficent cutoff for calling clusters}
17 | }
18 | \value{
19 | new metadata with added columns
20 | }
21 | \description{
22 | From per-cell calls, take highest freq call in each cluster
23 | }
24 | \examples{
25 | res <- clustify(
26 |     input = pbmc_matrix_small,
27 |     metadata = pbmc_meta,
28 |     cluster_col = "classified",
29 |     ref_mat = cbmc_ref,
30 |     per_cell = TRUE
31 | )
32 | 
33 | res2 <- cor_to_call(res)
34 | 
35 | collapse_to_cluster(
36 |     res2,
37 |     metadata = pbmc_meta,
38 |     cluster_col = "classified",
39 |     threshold = 0
40 | )
41 | }
42 | 


--------------------------------------------------------------------------------
/man/compare_lists.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compare_genelist.R
 3 | \name{compare_lists}
 4 | \alias{compare_lists}
 5 | \title{Calculate adjusted p-values for hypergeometric test of gene lists
 6 | or jaccard index}
 7 | \usage{
 8 | compare_lists(
 9 |   bin_mat,
10 |   marker_mat,
11 |   n = 30000,
12 |   metric = "hyper",
13 |   output_high = TRUE,
14 |   details_out = FALSE
15 | )
16 | }
17 | \arguments{
18 | \item{bin_mat}{binarized single-cell expression matrix,
19 | feed in by_cluster mat, if desired}
20 | 
21 | \item{marker_mat}{matrix or dataframe of candidate genes for each cluster}
22 | 
23 | \item{n}{number of genes in the genome}
24 | 
25 | \item{metric}{adjusted p-value for hypergeometric test, or jaccard index}
26 | 
27 | \item{output_high}{if true (by default to fit with rest of package),
28 | -log10 transform p-value}
29 | 
30 | \item{details_out}{whether to also output shared gene list from jaccard}
31 | }
32 | \value{
33 | matrix of numeric values, clusters from expr_mat as row names,
34 | cell types from marker_mat as column names
35 | }
36 | \description{
37 | Calculate adjusted p-values for hypergeometric test of gene lists
38 | or jaccard index
39 | }
40 | \examples{
41 | pbmc_mm <- matrixize_markers(pbmc_markers)
42 | 
43 | pbmc_avg <- average_clusters(
44 |     pbmc_matrix_small,
45 |     pbmc_meta,
46 |     cluster_col = "classified"
47 | )
48 | 
49 | pbmc_avgb <- binarize_expr(pbmc_avg)
50 | 
51 | compare_lists(
52 |     pbmc_avgb,
53 |     pbmc_mm,
54 |     metric = "spearman"
55 | )
56 | }
57 | 


--------------------------------------------------------------------------------
/man/cor_to_call.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/common_dplyr.R
 3 | \name{cor_to_call}
 4 | \alias{cor_to_call}
 5 | \title{get best calls for each cluster}
 6 | \usage{
 7 | cor_to_call(
 8 |   cor_mat,
 9 |   metadata = NULL,
10 |   cluster_col = "cluster",
11 |   collapse_to_cluster = FALSE,
12 |   threshold = 0,
13 |   rename_prefix = NULL,
14 |   carry_r = FALSE
15 | )
16 | }
17 | \arguments{
18 | \item{cor_mat}{input similarity matrix}
19 | 
20 | \item{metadata}{input metadata with tsne or umap coordinates and cluster ids}
21 | 
22 | \item{cluster_col}{metadata column, can be cluster or cellid}
23 | 
24 | \item{collapse_to_cluster}{if a column name is provided, takes the most
25 | frequent call of entire cluster to color in plot}
26 | 
27 | \item{threshold}{minimum correlation coefficent cutoff for calling clusters}
28 | 
29 | \item{rename_prefix}{prefix to add to type and r column names}
30 | 
31 | \item{carry_r}{whether to include threshold in unassigned names}
32 | }
33 | \value{
34 | dataframe of cluster, new ident, and r info
35 | }
36 | \description{
37 | get best calls for each cluster
38 | }
39 | \examples{
40 | res <- clustify(
41 |     input = pbmc_matrix_small,
42 |     metadata = pbmc_meta,
43 |     cluster_col = "classified",
44 |     ref_mat = cbmc_ref
45 | )
46 | 
47 | cor_to_call(res)
48 | }
49 | 


--------------------------------------------------------------------------------
/man/cor_to_call_rank.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/common_dplyr.R
 3 | \name{cor_to_call_rank}
 4 | \alias{cor_to_call_rank}
 5 | \title{get ranked calls for each cluster}
 6 | \usage{
 7 | cor_to_call_rank(
 8 |   cor_mat,
 9 |   metadata = NULL,
10 |   cluster_col = "cluster",
11 |   collapse_to_cluster = FALSE,
12 |   threshold = 0,
13 |   rename_prefix = NULL,
14 |   top_n = NULL
15 | )
16 | }
17 | \arguments{
18 | \item{cor_mat}{input similarity matrix}
19 | 
20 | \item{metadata}{input metadata with tsne or umap coordinates
21 | and cluster ids}
22 | 
23 | \item{cluster_col}{metadata column, can be cluster or cellid}
24 | 
25 | \item{collapse_to_cluster}{if a column name is provided, takes the most
26 | frequent call of entire cluster to color in plot}
27 | 
28 | \item{threshold}{minimum correlation coefficent cutoff for calling clusters}
29 | 
30 | \item{rename_prefix}{prefix to add to type and r column names}
31 | 
32 | \item{top_n}{the number of ranks to keep, the rest will be set to 100}
33 | }
34 | \value{
35 | dataframe of cluster, new ident, and r info
36 | }
37 | \description{
38 | get ranked calls for each cluster
39 | }
40 | \examples{
41 | res <- clustify(
42 |     input = pbmc_matrix_small,
43 |     metadata = pbmc_meta,
44 |     cluster_col = "classified",
45 |     ref_mat = cbmc_ref
46 | )
47 | 
48 | cor_to_call_rank(res, threshold = "auto")
49 | }
50 | 


--------------------------------------------------------------------------------
/man/cor_to_call_topn.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{cor_to_call_topn}
 4 | \alias{cor_to_call_topn}
 5 | \title{get top calls for each cluster}
 6 | \usage{
 7 | cor_to_call_topn(
 8 |   cor_mat,
 9 |   metadata = NULL,
10 |   col = "cluster",
11 |   collapse_to_cluster = FALSE,
12 |   threshold = 0,
13 |   topn = 2
14 | )
15 | }
16 | \arguments{
17 | \item{cor_mat}{input similarity matrix}
18 | 
19 | \item{metadata}{input metadata with tsne or umap coordinates
20 | and cluster ids}
21 | 
22 | \item{col}{metadata column, can be cluster or cellid}
23 | 
24 | \item{collapse_to_cluster}{if a column name is provided,
25 | takes the most frequent call of entire cluster to color in plot}
26 | 
27 | \item{threshold}{minimum correlation coefficent cutoff for calling clusters}
28 | 
29 | \item{topn}{number of calls for each cluster}
30 | }
31 | \value{
32 | dataframe of cluster, new potential ident, and r info
33 | }
34 | \description{
35 | get top calls for each cluster
36 | }
37 | \examples{
38 | res <- clustify(
39 |     input = pbmc_matrix_small,
40 |     metadata = pbmc_meta,
41 |     ref_mat = cbmc_ref,
42 |     query_genes = pbmc_vargenes,
43 |     cluster_col = "classified"
44 | )
45 | 
46 | cor_to_call_topn(
47 |     cor_mat = res,
48 |     metadata = pbmc_meta,
49 |     col = "classified",
50 |     collapse_to_cluster = FALSE,
51 |     threshold = 0.5
52 | )
53 | }
54 | 


--------------------------------------------------------------------------------
/man/cosine.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compute_similarity.R
 3 | \name{cosine}
 4 | \alias{cosine}
 5 | \title{Cosine distance}
 6 | \usage{
 7 | cosine(vec1, vec2)
 8 | }
 9 | \arguments{
10 | \item{vec1}{test vector}
11 | 
12 | \item{vec2}{reference vector}
13 | }
14 | \value{
15 | numeric value of cosine distance between the vectors
16 | }
17 | \description{
18 | Cosine distance
19 | }
20 | 


--------------------------------------------------------------------------------
/man/downrefs.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{downrefs}
 5 | \alias{downrefs}
 6 | \title{table of references stored in clustifyrdata}
 7 | \format{
 8 | An object of class \code{tbl_df} (inherits from \code{tbl}, \code{data.frame}) with 9 rows and 6 columns.
 9 | }
10 | \source{
11 | various packages
12 | }
13 | \usage{
14 | downrefs
15 | }
16 | \description{
17 | table of references stored in clustifyrdata
18 | }
19 | \seealso{
20 | Other data: 
21 | \code{\link{cbmc_m}},
22 | \code{\link{cbmc_ref}},
23 | \code{\link{human_genes_10x}},
24 | \code{\link{mouse_genes_10x}},
25 | \code{\link{pbmc_markers}},
26 | \code{\link{pbmc_markers_M3Drop}},
27 | \code{\link{pbmc_matrix_small}},
28 | \code{\link{pbmc_meta}},
29 | \code{\link{pbmc_vargenes}}
30 | }
31 | \concept{data}
32 | \keyword{datasets}
33 | 


--------------------------------------------------------------------------------
/man/downsample_matrix.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{downsample_matrix}
 4 | \alias{downsample_matrix}
 5 | \title{downsample matrix by cluster or completely random}
 6 | \usage{
 7 | downsample_matrix(
 8 |   mat,
 9 |   n = 1,
10 |   keep_cluster_proportions = TRUE,
11 |   metadata = NULL,
12 |   cluster_col = "cluster"
13 | )
14 | }
15 | \arguments{
16 | \item{mat}{expression matrix}
17 | 
18 | \item{n}{number per cluster or fraction to keep}
19 | 
20 | \item{keep_cluster_proportions}{whether to subsample}
21 | 
22 | \item{metadata}{data.frame or
23 | vector containing cluster assignments per cell.
24 | Order must match column order in supplied matrix. If a data.frame
25 | provide the cluster_col parameters.}
26 | 
27 | \item{cluster_col}{column in metadata with cluster number}
28 | }
29 | \value{
30 | new smaller mat with less cell_id columns
31 | }
32 | \description{
33 | downsample matrix by cluster or completely random
34 | }
35 | \examples{
36 | set.seed(42)
37 | mat <- downsample_matrix(
38 |     mat = pbmc_matrix_small,
39 |     metadata = pbmc_meta$classified,
40 |     n = 10,
41 |     keep_cluster_proportions = TRUE
42 | )
43 | mat[1:3, 1:3]
44 | }
45 | 


--------------------------------------------------------------------------------
/man/feature_select_PCA.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{feature_select_PCA}
 4 | \alias{feature_select_PCA}
 5 | \title{Returns a list of variable genes based on PCA}
 6 | \usage{
 7 | feature_select_PCA(
 8 |   mat = NULL,
 9 |   pcs = NULL,
10 |   n_pcs = 10,
11 |   percentile = 0.99,
12 |   if_log = TRUE
13 | )
14 | }
15 | \arguments{
16 | \item{mat}{Expression matrix. Rownames are genes,
17 | colnames are single cell cluster name, and
18 | values are average single cell expression (log transformed).}
19 | 
20 | \item{pcs}{Precalculated pcs if available, will skip over processing on mat.}
21 | 
22 | \item{n_pcs}{Number of PCs to selected gene loadings from.
23 | See the explore_PCA_corr.Rmd vignette for details.}
24 | 
25 | \item{percentile}{Select the percentile of absolute values of
26 | PCA loadings to select genes from. E.g. 0.999 would select the
27 | top point 1 percent of genes with the largest loadings.}
28 | 
29 | \item{if_log}{whether the data is already log transformed}
30 | }
31 | \value{
32 | vector of genes
33 | }
34 | \description{
35 | Extract genes, i.e. "features", based on the top
36 | loadings of principal components
37 | formed from the bulk expression data set
38 | }
39 | \examples{
40 | feature_select_PCA(
41 |     cbmc_ref,
42 |     if_log = FALSE
43 | )
44 | }
45 | 


--------------------------------------------------------------------------------
/man/figures/README-example-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rnabioco/clustifyr/d3521c26008fff720aac6b0135218e66fed1bf84/man/figures/README-example-1.png


--------------------------------------------------------------------------------
/man/figures/README-example-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rnabioco/clustifyr/d3521c26008fff720aac6b0135218e66fed1bf84/man/figures/README-example-2.png


--------------------------------------------------------------------------------
/man/figures/example-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rnabioco/clustifyr/d3521c26008fff720aac6b0135218e66fed1bf84/man/figures/example-1.png


--------------------------------------------------------------------------------
/man/figures/example-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rnabioco/clustifyr/d3521c26008fff720aac6b0135218e66fed1bf84/man/figures/example-2.png


--------------------------------------------------------------------------------
/man/figures/readme_example-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rnabioco/clustifyr/d3521c26008fff720aac6b0135218e66fed1bf84/man/figures/readme_example-1.png


--------------------------------------------------------------------------------
/man/figures/readme_example-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rnabioco/clustifyr/d3521c26008fff720aac6b0135218e66fed1bf84/man/figures/readme_example-2.png


--------------------------------------------------------------------------------
/man/figures/test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rnabioco/clustifyr/d3521c26008fff720aac6b0135218e66fed1bf84/man/figures/test.png


--------------------------------------------------------------------------------
/man/file_marker_parse.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{file_marker_parse}
 4 | \alias{file_marker_parse}
 5 | \title{takes files with positive and negative markers, as described in garnett,
 6 | and returns list of markers}
 7 | \usage{
 8 | file_marker_parse(filename)
 9 | }
10 | \arguments{
11 | \item{filename}{txt file to load}
12 | }
13 | \value{
14 | list of positive and negative gene markers
15 | }
16 | \description{
17 | takes files with positive and negative markers, as described in garnett,
18 | and returns list of markers
19 | }
20 | \examples{
21 | marker_file <- system.file(
22 |     "extdata",
23 |     "hsPBMC_markers.txt",
24 |     package = "clustifyr"
25 | )
26 | 
27 | file_marker_parse(marker_file)
28 | }
29 | 


--------------------------------------------------------------------------------
/man/find_rank_bias.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{find_rank_bias}
 4 | \alias{find_rank_bias}
 5 | \title{Find rank bias}
 6 | \usage{
 7 | find_rank_bias(avg_mat, ref_mat, query_genes = NULL)
 8 | }
 9 | \arguments{
10 | \item{avg_mat}{average expression matrix}
11 | 
12 | \item{ref_mat}{reference expression matrix}
13 | 
14 | \item{query_genes}{original vector of genes used to clustify}
15 | }
16 | \value{
17 | list of matrix of rank diff values
18 | }
19 | \description{
20 | Find rank bias
21 | }
22 | \examples{
23 | avg <- average_clusters(
24 |     mat = pbmc_matrix_small,
25 |     metadata = pbmc_meta,
26 |     cluster_col = "classified",
27 |     if_log = FALSE
28 | )
29 | 
30 | rankdiff <- find_rank_bias(
31 |     avg,
32 |     cbmc_ref,
33 |     query_genes = pbmc_vargenes
34 | )
35 | }
36 | 


--------------------------------------------------------------------------------
/man/gene_pct.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{gene_pct}
 4 | \alias{gene_pct}
 5 | \title{pct of cells in each cluster that express genelist}
 6 | \usage{
 7 | gene_pct(matrix, genelist, clusters, returning = "mean")
 8 | }
 9 | \arguments{
10 | \item{matrix}{expression matrix}
11 | 
12 | \item{genelist}{vector of marker genes for one identity}
13 | 
14 | \item{clusters}{vector of cluster identities}
15 | 
16 | \item{returning}{whether to return mean, min,
17 | or max of the gene pct in the gene list}
18 | }
19 | \value{
20 | vector of numeric values
21 | }
22 | \description{
23 | pct of cells in each cluster that express genelist
24 | }
25 | 


--------------------------------------------------------------------------------
/man/gene_pct_markerm.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{gene_pct_markerm}
 4 | \alias{gene_pct_markerm}
 5 | \title{pct of cells in every cluster that express a series of genelists}
 6 | \usage{
 7 | gene_pct_markerm(matrix, marker_m, metadata, cluster_col = NULL, norm = NULL)
 8 | }
 9 | \arguments{
10 | \item{matrix}{expression matrix}
11 | 
12 | \item{marker_m}{matrixized markers}
13 | 
14 | \item{metadata}{data.frame or vector containing cluster
15 | assignments per cell.
16 | Order must match column order in supplied matrix. If a data.frame
17 | provide the cluster_col parameters.}
18 | 
19 | \item{cluster_col}{column in metadata with cluster number}
20 | 
21 | \item{norm}{whether and how the results are normalized}
22 | }
23 | \value{
24 | matrix of numeric values, clusters from mat as row names,
25 | cell types from marker_m as column names
26 | }
27 | \description{
28 | pct of cells in every cluster that express a series of genelists
29 | }
30 | \examples{
31 | gene_pct_markerm(
32 |     matrix = pbmc_matrix_small,
33 |     marker_m = cbmc_m,
34 |     metadata = pbmc_meta,
35 |     cluster_col = "classified"
36 | )
37 | }
38 | 


--------------------------------------------------------------------------------
/man/get_best_match_matrix.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{get_best_match_matrix}
 4 | \alias{get_best_match_matrix}
 5 | \title{Function to make best call from correlation matrix}
 6 | \usage{
 7 | get_best_match_matrix(cor_mat)
 8 | }
 9 | \arguments{
10 | \item{cor_mat}{correlation matrix}
11 | }
12 | \value{
13 | matrix of 1s and 0s
14 | }
15 | \description{
16 | Function to make best call from correlation matrix
17 | }
18 | 


--------------------------------------------------------------------------------
/man/get_best_str.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{get_best_str}
 4 | \alias{get_best_str}
 5 | \title{Function to make call and attach score}
 6 | \usage{
 7 | get_best_str(name, best_mat, cor_mat, carry_cor = TRUE)
 8 | }
 9 | \arguments{
10 | \item{name}{name of row to query}
11 | 
12 | \item{best_mat}{binarized call matrix}
13 | 
14 | \item{cor_mat}{correlation matrix}
15 | 
16 | \item{carry_cor}{whether the correlation score gets reported}
17 | }
18 | \value{
19 | string with ident call and possibly cor value
20 | }
21 | \description{
22 | Function to make call and attach score
23 | }
24 | 


--------------------------------------------------------------------------------
/man/get_common_elements.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{get_common_elements}
 4 | \alias{get_common_elements}
 5 | \title{Find entries shared in all vectors}
 6 | \usage{
 7 | get_common_elements(...)
 8 | }
 9 | \arguments{
10 | \item{...}{vectors}
11 | }
12 | \value{
13 | vector of shared elements
14 | }
15 | \description{
16 | return entries found in all supplied vectors.
17 | If the vector supplied is NULL or NA, then it will be excluded
18 | from the comparison.
19 | }
20 | 


--------------------------------------------------------------------------------
/man/get_similarity.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compute_similarity.R
 3 | \name{get_similarity}
 4 | \alias{get_similarity}
 5 | \title{Compute similarity of matrices}
 6 | \usage{
 7 | get_similarity(
 8 |   expr_mat,
 9 |   ref_mat,
10 |   cluster_ids,
11 |   compute_method,
12 |   pseudobulk_method = "mean",
13 |   per_cell = FALSE,
14 |   rm0 = FALSE,
15 |   if_log = TRUE,
16 |   low_threshold = 0,
17 |   ...
18 | )
19 | }
20 | \arguments{
21 | \item{expr_mat}{single-cell expression matrix}
22 | 
23 | \item{ref_mat}{reference expression matrix}
24 | 
25 | \item{cluster_ids}{vector of cluster ids for each cell}
26 | 
27 | \item{compute_method}{method(s) for computing similarity scores}
28 | 
29 | \item{pseudobulk_method}{method used for summarizing clusters, options are mean (default), median, truncate (10\% truncated mean), or trimean, max, min}
30 | 
31 | \item{per_cell}{run per cell?}
32 | 
33 | \item{rm0}{consider 0 as missing data, recommended for per_cell}
34 | 
35 | \item{if_log}{input data is natural log,
36 | averaging will be done on unlogged data}
37 | 
38 | \item{low_threshold}{option to remove clusters with too few cells}
39 | 
40 | \item{...}{additional parameters not used yet}
41 | }
42 | \value{
43 | matrix of numeric values, clusters from expr_mat as row names,
44 | cell types from ref_mat as column names
45 | }
46 | \description{
47 | Compute similarity of matrices
48 | }
49 | 


--------------------------------------------------------------------------------
/man/get_ucsc_reference.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/cellbrowsers.R
 3 | \name{get_ucsc_reference}
 4 | \alias{get_ucsc_reference}
 5 | \title{Build reference atlases from external UCSC cellbrowsers}
 6 | \usage{
 7 | get_ucsc_reference(cb_url, cluster_col, ...)
 8 | }
 9 | \arguments{
10 | \item{cb_url}{URL of cellbrowser dataset (e.g. http://cells.ucsc.edu/?ds=cortex-dev).
11 | Note that the URL must contain the ds=dataset-name suffix.}
12 | 
13 | \item{cluster_col}{annotation field for summarizing gene expression (e.g. clustering,
14 | cell-type name, samples, etc.)}
15 | 
16 | \item{...}{additional args passed to average_clusters}
17 | }
18 | \value{
19 | reference matrix
20 | }
21 | \description{
22 | Build reference atlases from external UCSC cellbrowsers
23 | }
24 | \examples{
25 | \dontrun{
26 | 
27 | # many datasets hosted by UCSC have UMI counts in the expression matrix
28 | # set if_log = FALSE if the expression matrix has not been natural log transformed
29 | 
30 | get_ucsc_reference(
31 |     cb_url = "https://cells.ucsc.edu/?ds=evocell+mus-musculus+marrow",
32 |     cluster_col = "Clusters", if_log = FALSE
33 | )
34 | 
35 | get_ucsc_reference(
36 |     cb_url = "http://cells.ucsc.edu/?ds=muscle-cell-atlas",
37 |     cluster_col = "cell_annotation",
38 |     if_log = FALSE
39 | )
40 | }
41 | }
42 | 


--------------------------------------------------------------------------------
/man/get_unique_column.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{get_unique_column}
 4 | \alias{get_unique_column}
 5 | \title{Generate a unique column id for a dataframe}
 6 | \usage{
 7 | get_unique_column(df, id = NULL)
 8 | }
 9 | \arguments{
10 | \item{df}{dataframe with column names}
11 | 
12 | \item{id}{desired id if unique}
13 | }
14 | \value{
15 | character
16 | }
17 | \description{
18 | Generate a unique column id for a dataframe
19 | }
20 | 


--------------------------------------------------------------------------------
/man/get_vargenes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compare_genelist.R
 3 | \name{get_vargenes}
 4 | \alias{get_vargenes}
 5 | \title{Generate variable gene list from marker matrix}
 6 | \usage{
 7 | get_vargenes(marker_mat)
 8 | }
 9 | \arguments{
10 | \item{marker_mat}{matrix or dataframe of candidate genes for each cluster}
11 | }
12 | \value{
13 | vector of marker gene names
14 | }
15 | \description{
16 | Variable gene list is required for \code{clustify} main function.
17 | This function parses variables genes from a matrix input.
18 | }
19 | \examples{
20 | get_vargenes(cbmc_m)
21 | }
22 | 


--------------------------------------------------------------------------------
/man/gmt_to_list.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{gmt_to_list}
 4 | \alias{gmt_to_list}
 5 | \title{convert gmt format of pathways to list of vectors}
 6 | \usage{
 7 | gmt_to_list(
 8 |   path,
 9 |   cutoff = 0,
10 |   sep = "\\thttp://www.broadinstitute.org/gsea/msigdb/cards/.*?\\t"
11 | )
12 | }
13 | \arguments{
14 | \item{path}{gmt file path}
15 | 
16 | \item{cutoff}{remove pathways with less genes than this cutoff}
17 | 
18 | \item{sep}{sep used in file to split path and genes}
19 | }
20 | \value{
21 | list of genes in each pathway
22 | }
23 | \description{
24 | convert gmt format of pathways to list of vectors
25 | }
26 | \examples{
27 | gmt_file <- system.file(
28 |     "extdata",
29 |     "c2.cp.reactome.v6.2.symbols.gmt.gz",
30 |     package = "clustifyr"
31 | )
32 | 
33 | gene.lists <- gmt_to_list(path = gmt_file)
34 | length(gene.lists)
35 | }
36 | 


--------------------------------------------------------------------------------
/man/human_genes_10x.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{human_genes_10x}
 5 | \alias{human_genes_10x}
 6 | \title{Vector of human genes for 10x cellranger pipeline}
 7 | \format{
 8 | An object of class \code{character} of length 33514.
 9 | }
10 | \source{
11 | \url{https://support.10xgenomics.com/single-cell-gene-expression/software/downloads/latest}
12 | }
13 | \usage{
14 | human_genes_10x
15 | }
16 | \description{
17 | Vector of human genes for 10x cellranger pipeline
18 | }
19 | \seealso{
20 | Other data: 
21 | \code{\link{cbmc_m}},
22 | \code{\link{cbmc_ref}},
23 | \code{\link{downrefs}},
24 | \code{\link{mouse_genes_10x}},
25 | \code{\link{pbmc_markers}},
26 | \code{\link{pbmc_markers_M3Drop}},
27 | \code{\link{pbmc_matrix_small}},
28 | \code{\link{pbmc_meta}},
29 | \code{\link{pbmc_vargenes}}
30 | }
31 | \concept{data}
32 | \keyword{datasets}
33 | 


--------------------------------------------------------------------------------
/man/insert_meta_object.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{insert_meta_object}
 4 | \alias{insert_meta_object}
 5 | \title{more flexible metadata update of single cell objects}
 6 | \usage{
 7 | insert_meta_object(
 8 |   input,
 9 |   new_meta,
10 |   type = class(input),
11 |   meta_loc = NULL,
12 |   lookuptable = NULL
13 | )
14 | }
15 | \arguments{
16 | \item{input}{input object}
17 | 
18 | \item{new_meta}{new metadata table to insert back into object}
19 | 
20 | \item{type}{look up predefined slots/loc}
21 | 
22 | \item{meta_loc}{metadata location}
23 | 
24 | \item{lookuptable}{if not supplied,
25 | will look in built-in table for object parsing}
26 | }
27 | \value{
28 | new object with new metadata inserted
29 | }
30 | \description{
31 | more flexible metadata update of single cell objects
32 | }
33 | \examples{
34 | so <- so_pbmc()
35 | insert_meta_object(so, seurat_meta(so, dr = "umap"))
36 | }
37 | 


--------------------------------------------------------------------------------
/man/kl_divergence.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compute_similarity.R
 3 | \name{kl_divergence}
 4 | \alias{kl_divergence}
 5 | \title{KL divergence}
 6 | \usage{
 7 | kl_divergence(vec1, vec2, if_log = FALSE, total_reads = 1000, max_KL = 1)
 8 | }
 9 | \arguments{
10 | \item{vec1}{Test vector}
11 | 
12 | \item{vec2}{Reference vector}
13 | 
14 | \item{if_log}{Whether the vectors are log-transformed. If so, the
15 | raw count should be computed before computing KL-divergence.}
16 | 
17 | \item{total_reads}{Pseudo-library size}
18 | 
19 | \item{max_KL}{Maximal allowed value of KL-divergence.}
20 | }
21 | \value{
22 | numeric value, with additional attributes, of kl divergence
23 | between the vectors
24 | }
25 | \description{
26 | Use package entropy to compute Kullback-Leibler divergence.
27 | The function first converts each vector's reads to pseudo-number of
28 | transcripts by normalizing the total reads to total_reads.
29 | The normalized read for each gene is then rounded to serve as the
30 | pseudo-number of transcripts.
31 | Function entropy::KL.shrink is called to compute the KL-divergence between
32 | the two vectors, and the maximal allowed divergence is set to max_KL.
33 | Finally, a linear transform is performed to convert the KL divergence,
34 | which is between 0 and max_KL, to a similarity score between -1 and 1.
35 | }
36 | 


--------------------------------------------------------------------------------
/man/make_comb_ref.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{make_comb_ref}
 4 | \alias{make_comb_ref}
 5 | \title{make combination ref matrix to assess intermixing}
 6 | \usage{
 7 | make_comb_ref(ref_mat, if_log = TRUE, sep = "_and_")
 8 | }
 9 | \arguments{
10 | \item{ref_mat}{reference expression matrix}
11 | 
12 | \item{if_log}{whether input data is natural}
13 | 
14 | \item{sep}{separator for name combinations}
15 | }
16 | \value{
17 | expression matrix
18 | }
19 | \description{
20 | make combination ref matrix to assess intermixing
21 | }
22 | \examples{
23 | ref <- make_comb_ref(
24 |     cbmc_ref,
25 |     sep = "_+_"
26 | )
27 | ref[1:3, 1:3]
28 | }
29 | 


--------------------------------------------------------------------------------
/man/marker_select.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{marker_select}
 4 | \alias{marker_select}
 5 | \title{decide for one gene whether it is a marker for a certain cell type}
 6 | \usage{
 7 | marker_select(row1, cols, cut = 1, compto = 1)
 8 | }
 9 | \arguments{
10 | \item{row1}{a numeric vector of expression values (row)}
11 | 
12 | \item{cols}{a vector of cell types (column)}
13 | 
14 | \item{cut}{an expression minimum cutoff}
15 | 
16 | \item{compto}{compare max expression to the value of next 1 or more}
17 | }
18 | \value{
19 | vector of cluster name and ratio value
20 | }
21 | \description{
22 | decide for one gene whether it is a marker for a certain cell type
23 | }
24 | \examples{
25 | pbmc_avg <- average_clusters(
26 |     mat = pbmc_matrix_small,
27 |     metadata = pbmc_meta,
28 |     cluster_col = "classified",
29 |     if_log = FALSE
30 | )
31 | 
32 | marker_select(
33 |     row1 = pbmc_avg["PPBP", ],
34 |     cols = names(pbmc_avg["PPBP", ])
35 | )
36 | }
37 | 


--------------------------------------------------------------------------------
/man/matrixize_markers.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compare_genelist.R
 3 | \name{matrixize_markers}
 4 | \alias{matrixize_markers}
 5 | \title{Convert candidate genes list into matrix}
 6 | \usage{
 7 | matrixize_markers(
 8 |   marker_df,
 9 |   ranked = FALSE,
10 |   n = NULL,
11 |   step_weight = 1,
12 |   background_weight = 0,
13 |   unique = FALSE,
14 |   metadata = NULL,
15 |   cluster_col = "classified",
16 |   remove_rp = FALSE
17 | )
18 | }
19 | \arguments{
20 | \item{marker_df}{dataframe of candidate genes, must contain
21 | "gene" and "cluster" columns, or a matrix of gene names to
22 | convert to ranked}
23 | 
24 | \item{ranked}{unranked gene list feeds into hyperp, the ranked
25 | gene list feeds into regular corr_coef}
26 | 
27 | \item{n}{number of genes to use}
28 | 
29 | \item{step_weight}{ranked genes are tranformed into pseudo
30 | expression by descending weight}
31 | 
32 | \item{background_weight}{ranked genes are tranformed into pseudo
33 | expression with added weight}
34 | 
35 | \item{unique}{whether to use only unique markers to 1 cluster}
36 | 
37 | \item{metadata}{vector or dataframe of cluster names, should
38 | have column named cluster}
39 | 
40 | \item{cluster_col}{column for cluster names to replace original
41 | cluster, if metadata is dataframe}
42 | 
43 | \item{remove_rp}{do not include rps, rpl, rp1-9 in markers}
44 | }
45 | \value{
46 | matrix of unranked gene marker names, or matrix of
47 | ranked expression
48 | }
49 | \description{
50 | Convert candidate genes list into matrix
51 | }
52 | \examples{
53 | matrixize_markers(pbmc_markers)
54 | }
55 | 


--------------------------------------------------------------------------------
/man/mouse_genes_10x.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{mouse_genes_10x}
 5 | \alias{mouse_genes_10x}
 6 | \title{Vector of mouse genes for 10x cellranger pipeline}
 7 | \format{
 8 | An object of class \code{character} of length 31017.
 9 | }
10 | \source{
11 | \url{https://support.10xgenomics.com/single-cell-gene-expression/software/downloads/latest}
12 | }
13 | \usage{
14 | mouse_genes_10x
15 | }
16 | \description{
17 | Vector of mouse genes for 10x cellranger pipeline
18 | }
19 | \seealso{
20 | Other data: 
21 | \code{\link{cbmc_m}},
22 | \code{\link{cbmc_ref}},
23 | \code{\link{downrefs}},
24 | \code{\link{human_genes_10x}},
25 | \code{\link{pbmc_markers}},
26 | \code{\link{pbmc_markers_M3Drop}},
27 | \code{\link{pbmc_matrix_small}},
28 | \code{\link{pbmc_meta}},
29 | \code{\link{pbmc_vargenes}}
30 | }
31 | \concept{data}
32 | \keyword{datasets}
33 | 


--------------------------------------------------------------------------------
/man/not_pretty_palette.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot.R
 3 | \docType{data}
 4 | \name{not_pretty_palette}
 5 | \alias{not_pretty_palette}
 6 | \title{black and white palette for plotting continous variables}
 7 | \format{
 8 | An object of class \code{character} of length 9.
 9 | }
10 | \usage{
11 | not_pretty_palette
12 | }
13 | \value{
14 | vector of colors
15 | }
16 | \description{
17 | black and white palette for plotting continous variables
18 | }
19 | \keyword{datasets}
20 | 


--------------------------------------------------------------------------------
/man/object_data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/object_access.R
 3 | \name{object_data}
 4 | \alias{object_data}
 5 | \alias{object_data.Seurat}
 6 | \alias{object_data.SingleCellExperiment}
 7 | \title{Function to access object data}
 8 | \usage{
 9 | object_data(object, ...)
10 | 
11 | \method{object_data}{Seurat}(object, slot, n_genes = 1000, ...)
12 | 
13 | \method{object_data}{SingleCellExperiment}(object, slot, ...)
14 | }
15 | \arguments{
16 | \item{object}{object after tsne or umap projections
17 | and clustering}
18 | 
19 | \item{...}{additional arguments}
20 | 
21 | \item{slot}{data to access}
22 | 
23 | \item{n_genes}{number of genes limit for Seurat variable genes, by default 1000,
24 | set to 0 to use all variable genes (generally not recommended)}
25 | }
26 | \value{
27 | expression matrix, with genes as row names,
28 | and cell types as column names
29 | }
30 | \description{
31 | Function to access object data
32 | }
33 | \examples{
34 | so <- so_pbmc()
35 | mat <- object_data(
36 |     object = so,
37 |     slot = "data"
38 | )
39 | mat[1:3, 1:3]
40 | sce <- sce_pbmc()
41 | mat <- object_data(
42 |     object = sce,
43 |     slot = "data"
44 | )
45 | mat[1:3, 1:3]
46 | }
47 | 


--------------------------------------------------------------------------------
/man/object_loc_lookup.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{object_loc_lookup}
 4 | \alias{object_loc_lookup}
 5 | \title{lookup table for single cell object structures}
 6 | \usage{
 7 | object_loc_lookup()
 8 | }
 9 | \value{
10 | A list populated with standardized functions to
11 | access relevant data structures in multiple single cell
12 | data formats.
13 | }
14 | \description{
15 | lookup table for single cell object structures
16 | }
17 | 


--------------------------------------------------------------------------------
/man/object_ref.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/object_access.R
 3 | \name{object_ref}
 4 | \alias{object_ref}
 5 | \alias{object_ref.default}
 6 | \alias{object_ref.Seurat}
 7 | \alias{object_ref.SingleCellExperiment}
 8 | \title{Function to convert labelled object to avg expression matrix}
 9 | \usage{
10 | object_ref(input, ...)
11 | 
12 | \method{object_ref}{default}(
13 |   input,
14 |   cluster_col = NULL,
15 |   var_genes_only = FALSE,
16 |   assay_name = NULL,
17 |   method = "mean",
18 |   lookuptable = NULL,
19 |   if_log = TRUE,
20 |   ...
21 | )
22 | 
23 | \method{object_ref}{Seurat}(
24 |   input,
25 |   cluster_col = NULL,
26 |   var_genes_only = FALSE,
27 |   assay_name = NULL,
28 |   method = "mean",
29 |   lookuptable = NULL,
30 |   if_log = TRUE,
31 |   ...
32 | )
33 | 
34 | \method{object_ref}{SingleCellExperiment}(
35 |   input,
36 |   cluster_col = NULL,
37 |   var_genes_only = FALSE,
38 |   assay_name = NULL,
39 |   method = "mean",
40 |   lookuptable = NULL,
41 |   if_log = TRUE,
42 |   ...
43 | )
44 | }
45 | \arguments{
46 | \item{input}{object after tsne or umap projections and clustering}
47 | 
48 | \item{...}{additional arguments}
49 | 
50 | \item{cluster_col}{column name where classified cluster names
51 | are stored in seurat meta data, cannot be "rn"}
52 | 
53 | \item{var_genes_only}{whether to keep only var.genes in the
54 | final matrix output, could also look up genes used for PCA}
55 | 
56 | \item{assay_name}{any additional assay data, such as ADT, to include.
57 | If more than 1, pass a vector of names}
58 | 
59 | \item{method}{whether to take mean (default) or median}
60 | 
61 | \item{lookuptable}{if not supplied, will look
62 | in built-in table for object parsing}
63 | 
64 | \item{if_log}{input data is natural log,
65 | averaging will be done on unlogged data}
66 | }
67 | \value{
68 | reference expression matrix, with genes as row names,
69 | and cell types as column names
70 | }
71 | \description{
72 | Function to convert labelled object to avg expression matrix
73 | }
74 | \examples{
75 | so <- so_pbmc()
76 | object_ref(
77 |     so,
78 |     cluster_col = "seurat_clusters"
79 | )
80 | }
81 | 


--------------------------------------------------------------------------------
/man/overcluster.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{overcluster}
 4 | \alias{overcluster}
 5 | \title{Overcluster by kmeans per cluster}
 6 | \usage{
 7 | overcluster(mat, cluster_id, power = 0.15)
 8 | }
 9 | \arguments{
10 | \item{mat}{expression matrix}
11 | 
12 | \item{cluster_id}{list of ids per cluster}
13 | 
14 | \item{power}{decides the number of clusters for kmeans}
15 | }
16 | \value{
17 | new cluster_id list of more clusters
18 | }
19 | \description{
20 | Overcluster by kmeans per cluster
21 | }
22 | \examples{
23 | res <- overcluster(
24 |     mat = pbmc_matrix_small,
25 |     cluster_id = split(colnames(pbmc_matrix_small), pbmc_meta$classified)
26 | )
27 | length(res)
28 | }
29 | 


--------------------------------------------------------------------------------
/man/overcluster_test.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{overcluster_test}
 4 | \alias{overcluster_test}
 5 | \title{compare clustering parameters and classification outcomes}
 6 | \usage{
 7 | overcluster_test(
 8 |   expr,
 9 |   metadata,
10 |   ref_mat,
11 |   cluster_col,
12 |   x_col = "UMAP_1",
13 |   y_col = "UMAP_2",
14 |   n = 5,
15 |   ngenes = NULL,
16 |   query_genes = NULL,
17 |   threshold = 0,
18 |   do_label = TRUE,
19 |   do_legend = FALSE,
20 |   newclustering = NULL,
21 |   combine = TRUE
22 | )
23 | }
24 | \arguments{
25 | \item{expr}{expression matrix}
26 | 
27 | \item{metadata}{metadata including cluster info and
28 | dimension reduction plotting}
29 | 
30 | \item{ref_mat}{reference matrix}
31 | 
32 | \item{cluster_col}{column of clustering from metadata}
33 | 
34 | \item{x_col}{column of metadata for x axis plotting}
35 | 
36 | \item{y_col}{column of metadata for y axis plotting}
37 | 
38 | \item{n}{expand n-fold for over/under clustering}
39 | 
40 | \item{ngenes}{number of genes to use for feature selection,
41 | use all genes if NULL}
42 | 
43 | \item{query_genes}{vector, otherwise genes with be recalculated}
44 | 
45 | \item{threshold}{type calling threshold}
46 | 
47 | \item{do_label}{whether to label each cluster at median center}
48 | 
49 | \item{do_legend}{whether to draw legend}
50 | 
51 | \item{newclustering}{use kmeans if NULL on dr
52 | or col name for second column of clustering}
53 | 
54 | \item{combine}{if TRUE return a single plot with combined panels, if
55 | FALSE return list of plots (default: TRUE)}
56 | }
57 | \value{
58 | faceted ggplot object
59 | }
60 | \description{
61 | compare clustering parameters and classification outcomes
62 | }
63 | \examples{
64 | set.seed(42)
65 | overcluster_test(
66 |     expr = pbmc_matrix_small,
67 |     metadata = pbmc_meta,
68 |     ref_mat = cbmc_ref,
69 |     cluster_col = "classified",
70 |     x_col = "UMAP_1",
71 |     y_col = "UMAP_2"
72 | )
73 | }
74 | 


--------------------------------------------------------------------------------
/man/parse_loc_object.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{parse_loc_object}
 4 | \alias{parse_loc_object}
 5 | \title{more flexible parsing of single cell objects}
 6 | \usage{
 7 | parse_loc_object(
 8 |   input,
 9 |   type = class(input),
10 |   expr_loc = NULL,
11 |   meta_loc = NULL,
12 |   var_loc = NULL,
13 |   cluster_col = NULL,
14 |   lookuptable = NULL
15 | )
16 | }
17 | \arguments{
18 | \item{input}{input object}
19 | 
20 | \item{type}{look up predefined slots/loc}
21 | 
22 | \item{expr_loc}{function that extracts expression matrix}
23 | 
24 | \item{meta_loc}{function that extracts metadata}
25 | 
26 | \item{var_loc}{function that extracts variable genes}
27 | 
28 | \item{cluster_col}{column of clustering from metadata}
29 | 
30 | \item{lookuptable}{if not supplied, will use object_loc_lookup() for parsing.}
31 | }
32 | \value{
33 | list of expression, metadata, vargenes, cluster_col info from object
34 | }
35 | \description{
36 | more flexible parsing of single cell objects
37 | }
38 | \examples{
39 | so <- so_pbmc()
40 | obj <- parse_loc_object(so)
41 | length(obj)
42 | }
43 | 


--------------------------------------------------------------------------------
/man/pbmc_markers.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{pbmc_markers}
 5 | \alias{pbmc_markers}
 6 | \title{Marker genes identified by Seurat from single-cell RNA-seq PBMCs.}
 7 | \format{
 8 | An object of class \code{data.frame} with 2304 rows and 7 columns.
 9 | }
10 | \source{
11 | \verb{[pbmc_matrix]} processed by Seurat
12 | }
13 | \usage{
14 | pbmc_markers
15 | }
16 | \description{
17 | Dataframe of markers from Seurat FindAllMarkers function
18 | }
19 | \seealso{
20 | Other data: 
21 | \code{\link{cbmc_m}},
22 | \code{\link{cbmc_ref}},
23 | \code{\link{downrefs}},
24 | \code{\link{human_genes_10x}},
25 | \code{\link{mouse_genes_10x}},
26 | \code{\link{pbmc_markers_M3Drop}},
27 | \code{\link{pbmc_matrix_small}},
28 | \code{\link{pbmc_meta}},
29 | \code{\link{pbmc_vargenes}}
30 | }
31 | \concept{data}
32 | \keyword{datasets}
33 | 


--------------------------------------------------------------------------------
/man/pbmc_markers_M3Drop.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{pbmc_markers_M3Drop}
 5 | \alias{pbmc_markers_M3Drop}
 6 | \title{Marker genes identified by M3Drop from single-cell RNA-seq PBMCs.}
 7 | \format{
 8 | A data frame with 3 variables:
 9 | }
10 | \source{
11 | \verb{[pbmc_matrix]} processed by \verb{[M3Drop]}
12 | }
13 | \usage{
14 | pbmc_markers_M3Drop
15 | }
16 | \description{
17 | Selected features of 3k pbmcs from Seurat3 tutorial
18 | }
19 | \seealso{
20 | Other data: 
21 | \code{\link{cbmc_m}},
22 | \code{\link{cbmc_ref}},
23 | \code{\link{downrefs}},
24 | \code{\link{human_genes_10x}},
25 | \code{\link{mouse_genes_10x}},
26 | \code{\link{pbmc_markers}},
27 | \code{\link{pbmc_matrix_small}},
28 | \code{\link{pbmc_meta}},
29 | \code{\link{pbmc_vargenes}}
30 | }
31 | \concept{data}
32 | \keyword{datasets}
33 | 


--------------------------------------------------------------------------------
/man/pbmc_matrix_small.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{pbmc_matrix_small}
 5 | \alias{pbmc_matrix_small}
 6 | \title{Matrix of single-cell RNA-seq PBMCs.}
 7 | \format{
 8 | A sparseMatrix with genes as rows and cells as columns.
 9 | }
10 | \source{
11 | \url{https://satijalab.org/seurat/v3.0/pbmc3k_tutorial.html}
12 | }
13 | \usage{
14 | pbmc_matrix_small
15 | }
16 | \description{
17 | Count matrix of 3k pbmcs from Seurat3 tutorial, with only var.features
18 | }
19 | \seealso{
20 | Other data: 
21 | \code{\link{cbmc_m}},
22 | \code{\link{cbmc_ref}},
23 | \code{\link{downrefs}},
24 | \code{\link{human_genes_10x}},
25 | \code{\link{mouse_genes_10x}},
26 | \code{\link{pbmc_markers}},
27 | \code{\link{pbmc_markers_M3Drop}},
28 | \code{\link{pbmc_meta}},
29 | \code{\link{pbmc_vargenes}}
30 | }
31 | \concept{data}
32 | \keyword{datasets}
33 | 


--------------------------------------------------------------------------------
/man/pbmc_meta.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{pbmc_meta}
 5 | \alias{pbmc_meta}
 6 | \title{Meta-data for single-cell RNA-seq PBMCs.}
 7 | \format{
 8 | An object of class \code{data.frame} with 2638 rows and 9 columns.
 9 | }
10 | \source{
11 | \verb{[pbmc_matrix]} processed by Seurat
12 | }
13 | \usage{
14 | pbmc_meta
15 | }
16 | \description{
17 | Metadata, including umap, of 3k pbmcs from Seurat3 tutorial
18 | }
19 | \seealso{
20 | Other data: 
21 | \code{\link{cbmc_m}},
22 | \code{\link{cbmc_ref}},
23 | \code{\link{downrefs}},
24 | \code{\link{human_genes_10x}},
25 | \code{\link{mouse_genes_10x}},
26 | \code{\link{pbmc_markers}},
27 | \code{\link{pbmc_markers_M3Drop}},
28 | \code{\link{pbmc_matrix_small}},
29 | \code{\link{pbmc_vargenes}}
30 | }
31 | \concept{data}
32 | \keyword{datasets}
33 | 


--------------------------------------------------------------------------------
/man/pbmc_vargenes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{pbmc_vargenes}
 5 | \alias{pbmc_vargenes}
 6 | \title{Variable genes identified by Seurat from single-cell RNA-seq PBMCs.}
 7 | \format{
 8 | An object of class \code{character} of length 2000.
 9 | }
10 | \source{
11 | \verb{[pbmc_matrix]} processed by Seurat
12 | }
13 | \usage{
14 | pbmc_vargenes
15 | }
16 | \description{
17 | Top 2000 variable genes from 3k pbmcs from Seurat3 tutorial
18 | }
19 | \seealso{
20 | Other data: 
21 | \code{\link{cbmc_m}},
22 | \code{\link{cbmc_ref}},
23 | \code{\link{downrefs}},
24 | \code{\link{human_genes_10x}},
25 | \code{\link{mouse_genes_10x}},
26 | \code{\link{pbmc_markers}},
27 | \code{\link{pbmc_markers_M3Drop}},
28 | \code{\link{pbmc_matrix_small}},
29 | \code{\link{pbmc_meta}}
30 | }
31 | \concept{data}
32 | \keyword{datasets}
33 | 


--------------------------------------------------------------------------------
/man/percent_clusters.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{percent_clusters}
 4 | \alias{percent_clusters}
 5 | \title{Percentage detected per cluster}
 6 | \usage{
 7 | percent_clusters(mat, metadata, cluster_col = "cluster", cut_num = 0.5)
 8 | }
 9 | \arguments{
10 | \item{mat}{expression matrix}
11 | 
12 | \item{metadata}{data.frame with cells}
13 | 
14 | \item{cluster_col}{column in metadata with cluster number}
15 | 
16 | \item{cut_num}{binary cutoff for detection}
17 | }
18 | \value{
19 | matrix of numeric values, with genes for row names,
20 | and clusters for column names
21 | }
22 | \description{
23 | Percentage detected per cluster
24 | }
25 | 


--------------------------------------------------------------------------------
/man/permute_similarity.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compute_similarity.R
 3 | \name{permute_similarity}
 4 | \alias{permute_similarity}
 5 | \title{Compute a p-value for similarity using permutation}
 6 | \usage{
 7 | permute_similarity(
 8 |   expr_mat,
 9 |   ref_mat,
10 |   cluster_ids,
11 |   n_perm,
12 |   per_cell = FALSE,
13 |   compute_method,
14 |   pseudobulk_method = "mean",
15 |   rm0 = FALSE,
16 |   ...
17 | )
18 | }
19 | \arguments{
20 | \item{expr_mat}{single-cell expression matrix}
21 | 
22 | \item{ref_mat}{reference expression matrix}
23 | 
24 | \item{cluster_ids}{clustering info of single-cell data assume that
25 | genes have ALREADY BEEN filtered}
26 | 
27 | \item{n_perm}{number of permutations}
28 | 
29 | \item{per_cell}{run per cell?}
30 | 
31 | \item{compute_method}{method(s) for computing similarity scores}
32 | 
33 | \item{pseudobulk_method}{method used for summarizing clusters, options are mean (default), median, truncate (10\% truncated mean), or trimean, max, min}
34 | 
35 | \item{rm0}{consider 0 as missing data, recommended for per_cell}
36 | 
37 | \item{...}{additional parameters}
38 | }
39 | \value{
40 | matrix of numeric values
41 | }
42 | \description{
43 | Permute cluster labels to calculate empirical p-value
44 | }
45 | 


--------------------------------------------------------------------------------
/man/plot_best_call.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot.R
 3 | \name{plot_best_call}
 4 | \alias{plot_best_call}
 5 | \title{Plot best calls for each cluster on a tSNE or umap}
 6 | \usage{
 7 | plot_best_call(
 8 |   cor_mat,
 9 |   metadata,
10 |   cluster_col = "cluster",
11 |   collapse_to_cluster = FALSE,
12 |   threshold = 0,
13 |   x = "UMAP_1",
14 |   y = "UMAP_2",
15 |   plot_r = FALSE,
16 |   per_cell = FALSE,
17 |   ...
18 | )
19 | }
20 | \arguments{
21 | \item{cor_mat}{input similarity matrix}
22 | 
23 | \item{metadata}{input metadata with tsne or
24 | umap coordinates and cluster ids}
25 | 
26 | \item{cluster_col}{metadata column, can be cluster or cellid}
27 | 
28 | \item{collapse_to_cluster}{if a column name is provided,
29 | takes the most frequent call of entire cluster to color in plot}
30 | 
31 | \item{threshold}{minimum correlation coefficent cutoff for calling clusters}
32 | 
33 | \item{x}{x variable}
34 | 
35 | \item{y}{y variable}
36 | 
37 | \item{plot_r}{whether to include second plot of cor eff for best call}
38 | 
39 | \item{per_cell}{whether the cor_mat was generate per cell or per cluster}
40 | 
41 | \item{...}{passed to plot_dims}
42 | }
43 | \value{
44 | ggplot object, cells projected by dr,
45 | colored by cell type classification
46 | }
47 | \description{
48 | Plot best calls for each cluster on a tSNE or umap
49 | }
50 | \examples{
51 | res <- clustify(
52 |     input = pbmc_matrix_small,
53 |     metadata = pbmc_meta,
54 |     ref_mat = cbmc_ref,
55 |     query_genes = pbmc_vargenes,
56 |     cluster_col = "classified"
57 | )
58 | 
59 | plot_best_call(
60 |     cor_mat = res,
61 |     metadata = pbmc_meta,
62 |     cluster_col = "classified"
63 | )
64 | }
65 | 


--------------------------------------------------------------------------------
/man/plot_call.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot.R
 3 | \name{plot_call}
 4 | \alias{plot_call}
 5 | \title{Plot called clusters on a tSNE or umap, for each reference cluster given}
 6 | \usage{
 7 | plot_call(cor_mat, metadata, data_to_plot = colnames(cor_mat), ...)
 8 | }
 9 | \arguments{
10 | \item{cor_mat}{input similarity matrix}
11 | 
12 | \item{metadata}{input metadata with tsne or
13 | umap coordinates and cluster ids}
14 | 
15 | \item{data_to_plot}{colname of data to plot, defaults to all}
16 | 
17 | \item{...}{passed to plot_dims}
18 | }
19 | \value{
20 | list of ggplot object, cells projected by dr,
21 | colored by cell type classification
22 | }
23 | \description{
24 | Plot called clusters on a tSNE or umap, for each reference cluster given
25 | }
26 | 


--------------------------------------------------------------------------------
/man/plot_cor.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot.R
 3 | \name{plot_cor}
 4 | \alias{plot_cor}
 5 | \title{Plot similarity measures on a tSNE or umap}
 6 | \usage{
 7 | plot_cor(
 8 |   cor_mat,
 9 |   metadata,
10 |   data_to_plot = colnames(cor_mat),
11 |   cluster_col = NULL,
12 |   x = "UMAP_1",
13 |   y = "UMAP_2",
14 |   scale_legends = FALSE,
15 |   ...
16 | )
17 | }
18 | \arguments{
19 | \item{cor_mat}{input similarity matrix}
20 | 
21 | \item{metadata}{input metadata with per cell tsne or
22 | umap coordinates and cluster ids}
23 | 
24 | \item{data_to_plot}{colname of data to plot, defaults to all}
25 | 
26 | \item{cluster_col}{colname of clustering data in metadata, defaults
27 | to rownames of the metadata if not supplied.}
28 | 
29 | \item{x}{metadata column name with 1st axis dimension.
30 | defaults to "UMAP_1".}
31 | 
32 | \item{y}{metadata column name with 2nd axis dimension.
33 | defaults to "UMAP_2".}
34 | 
35 | \item{scale_legends}{if TRUE scale all legends to maximum values in entire
36 | correlation matrix. if FALSE scale legends to maximum for each plot. A
37 | two-element numeric vector can also be passed to supply
38 | custom values i.e. c(0, 1)}
39 | 
40 | \item{...}{passed to plot_dims}
41 | }
42 | \value{
43 | list of ggplot objects, cells projected by dr,
44 | colored by cor values
45 | }
46 | \description{
47 | Plot similarity measures on a tSNE or umap
48 | }
49 | \examples{
50 | res <- clustify(
51 |     input = pbmc_matrix_small,
52 |     metadata = pbmc_meta,
53 |     ref_mat = cbmc_ref,
54 |     query_genes = pbmc_vargenes,
55 |     cluster_col = "classified"
56 | )
57 | 
58 | plot_cor(
59 |     cor_mat = res,
60 |     metadata = pbmc_meta,
61 |     data_to_plot = colnames(res)[1:2],
62 |     cluster_col = "classified",
63 |     x = "UMAP_1",
64 |     y = "UMAP_2"
65 | )
66 | }
67 | 


--------------------------------------------------------------------------------
/man/plot_cor_heatmap.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot.R
 3 | \name{plot_cor_heatmap}
 4 | \alias{plot_cor_heatmap}
 5 | \title{Plot similarity measures on heatmap}
 6 | \usage{
 7 | plot_cor_heatmap(
 8 |   cor_mat,
 9 |   metadata = NULL,
10 |   cluster_col = NULL,
11 |   col = not_pretty_palette,
12 |   legend_title = NULL,
13 |   ...
14 | )
15 | }
16 | \arguments{
17 | \item{cor_mat}{input similarity matrix}
18 | 
19 | \item{metadata}{input metadata with per cell tsne
20 | or umap cooordinates and cluster ids}
21 | 
22 | \item{cluster_col}{colname of clustering data in metadata,
23 | defaults to rownames of the metadata if not supplied.}
24 | 
25 | \item{col}{color ramp to use}
26 | 
27 | \item{legend_title}{legend title to pass to Heatmap}
28 | 
29 | \item{...}{passed to Heatmap}
30 | }
31 | \value{
32 | complexheatmap object
33 | }
34 | \description{
35 | Plot similarity measures on heatmap
36 | }
37 | \examples{
38 | res <- clustify(
39 |     input = pbmc_matrix_small,
40 |     metadata = pbmc_meta,
41 |     ref_mat = cbmc_ref,
42 |     query_genes = pbmc_vargenes,
43 |     cluster_col = "classified",
44 |     per_cell = FALSE
45 | )
46 | 
47 | plot_cor_heatmap(res)
48 | }
49 | 


--------------------------------------------------------------------------------
/man/plot_dims.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot.R
 3 | \name{plot_dims}
 4 | \alias{plot_dims}
 5 | \title{Plot a tSNE or umap colored by feature.}
 6 | \usage{
 7 | plot_dims(
 8 |   data,
 9 |   x = "UMAP_1",
10 |   y = "UMAP_2",
11 |   feature = NULL,
12 |   legend_name = "",
13 |   c_cols = pretty_palette2,
14 |   d_cols = NULL,
15 |   pt_size = 0.25,
16 |   alpha_col = NULL,
17 |   group_col = NULL,
18 |   scale_limits = NULL,
19 |   do_label = FALSE,
20 |   do_legend = TRUE,
21 |   do_repel = TRUE
22 | )
23 | }
24 | \arguments{
25 | \item{data}{input data}
26 | 
27 | \item{x}{x variable}
28 | 
29 | \item{y}{y variable}
30 | 
31 | \item{feature}{feature to color by}
32 | 
33 | \item{legend_name}{legend name to display, defaults to no name}
34 | 
35 | \item{c_cols}{character vector of colors to build color gradient
36 | for continuous values, defaults to \code{\link{pretty_palette}}}
37 | 
38 | \item{d_cols}{character vector of colors for discrete values.
39 | defaults to RColorBrewer paired palette}
40 | 
41 | \item{pt_size}{point size}
42 | 
43 | \item{alpha_col}{whether to refer to data column for alpha values}
44 | 
45 | \item{group_col}{group by another column instead of feature,
46 | useful for labels}
47 | 
48 | \item{scale_limits}{defaults to min = 0, max = max(data$x),
49 | otherwise a two-element numeric vector indicating min and max to plot}
50 | 
51 | \item{do_label}{whether to label each cluster at median center}
52 | 
53 | \item{do_legend}{whether to draw legend}
54 | 
55 | \item{do_repel}{whether to use ggrepel on labels}
56 | }
57 | \value{
58 | ggplot object, cells projected by dr, colored by feature
59 | }
60 | \description{
61 | Plot a tSNE or umap colored by feature.
62 | }
63 | \examples{
64 | plot_dims(
65 |     pbmc_meta,
66 |     feature = "classified"
67 | )
68 | }
69 | 


--------------------------------------------------------------------------------
/man/plot_gene.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot.R
 3 | \name{plot_gene}
 4 | \alias{plot_gene}
 5 | \title{Plot gene expression on to tSNE or umap}
 6 | \usage{
 7 | plot_gene(expr_mat, metadata, genes, cell_col = NULL, ...)
 8 | }
 9 | \arguments{
10 | \item{expr_mat}{input single cell matrix}
11 | 
12 | \item{metadata}{data.frame with tSNE or umap coordinates}
13 | 
14 | \item{genes}{gene(s) to color tSNE or umap}
15 | 
16 | \item{cell_col}{column name in metadata containing cell ids, defaults
17 | to rownames if not supplied}
18 | 
19 | \item{...}{additional arguments passed to \verb{[clustifyr::plot_dims()]}}
20 | }
21 | \value{
22 | list of ggplot object, cells projected by dr,
23 | colored by gene expression
24 | }
25 | \description{
26 | Plot gene expression on to tSNE or umap
27 | }
28 | \examples{
29 | genes <- c(
30 |     "RP11-314N13.3",
31 |     "ARF4"
32 | )
33 | 
34 | plot_gene(
35 |     expr_mat = pbmc_matrix_small,
36 |     metadata = tibble::rownames_to_column(pbmc_meta, "rn"),
37 |     genes = genes,
38 |     cell_col = "rn"
39 | )
40 | }
41 | 


--------------------------------------------------------------------------------
/man/plot_pathway_gsea.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{plot_pathway_gsea}
 4 | \alias{plot_pathway_gsea}
 5 | \title{plot GSEA pathway scores as heatmap,
 6 | returns a list containing results and plot.}
 7 | \usage{
 8 | plot_pathway_gsea(
 9 |   mat,
10 |   pathway_list,
11 |   n_perm = 1000,
12 |   scale = TRUE,
13 |   topn = 5,
14 |   returning = "both"
15 | )
16 | }
17 | \arguments{
18 | \item{mat}{expression matrix}
19 | 
20 | \item{pathway_list}{a list of vectors, each named for a specific pathway,
21 | or dataframe}
22 | 
23 | \item{n_perm}{Number of permutation for fgsea function. Defaults to 1000.}
24 | 
25 | \item{scale}{convert expr_mat into zscores prior to running GSEA?,
26 | default = TRUE}
27 | 
28 | \item{topn}{number of top pathways to plot}
29 | 
30 | \item{returning}{to return "both" list and plot, or either one}
31 | }
32 | \value{
33 | list of matrix and plot, or just plot, matrix of GSEA NES values,
34 | cell types as row names, pathways as column names
35 | }
36 | \description{
37 | plot GSEA pathway scores as heatmap,
38 | returns a list containing results and plot.
39 | }
40 | \examples{
41 | gl <- list(
42 |     "n" = c("PPBP", "LYZ", "S100A9"),
43 |     "a" = c("IGLL5", "GNLY", "FTL")
44 | )
45 | 
46 | pbmc_avg <- average_clusters(
47 |     mat = pbmc_matrix_small,
48 |     metadata = pbmc_meta,
49 |     cluster_col = "classified"
50 | )
51 | 
52 | plot_pathway_gsea(
53 |     pbmc_avg,
54 |     gl,
55 |     5
56 | )
57 | }
58 | 


--------------------------------------------------------------------------------
/man/plot_rank_bias.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{plot_rank_bias}
 4 | \alias{plot_rank_bias}
 5 | \title{Query rank bias results}
 6 | \usage{
 7 | plot_rank_bias(bias_df, organism = "hsapiens")
 8 | }
 9 | \arguments{
10 | \item{bias_df}{data.frame of rank diff matrix between cluster and reference cell types}
11 | 
12 | \item{organism}{for GO term analysis, organism name: human - 'hsapiens', mouse - 'mmusculus'}
13 | }
14 | \value{
15 | ggplot object of distribution and annotated GO terms
16 | }
17 | \description{
18 | Query rank bias results
19 | }
20 | \examples{
21 | \dontrun{
22 | avg <- average_clusters(
23 |     mat = pbmc_matrix_small,
24 |     metadata = pbmc_meta,
25 |     cluster_col = "classified",
26 |     if_log = FALSE
27 | )
28 | 
29 | rankdiff <- find_rank_bias(
30 |     avg,
31 |     cbmc_ref,
32 |     query_genes = pbmc_vargenes
33 | )
34 | 
35 | qres <- query_rank_bias(
36 |     rankdiff,
37 |     "CD14+ Mono",
38 |     "CD14+ Mono"
39 | )
40 | 
41 | g <- plot_rank_bias(
42 |     qres
43 | )
44 | }
45 | }
46 | 


--------------------------------------------------------------------------------
/man/pos_neg_marker.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{pos_neg_marker}
 4 | \alias{pos_neg_marker}
 5 | \title{generate pos and negative marker expression matrix from a
 6 | list/dataframe of positive markers}
 7 | \usage{
 8 | pos_neg_marker(mat)
 9 | }
10 | \arguments{
11 | \item{mat}{matrix or dataframe of markers}
12 | }
13 | \value{
14 | matrix of gene expression
15 | }
16 | \description{
17 | generate pos and negative marker expression matrix from a
18 | list/dataframe of positive markers
19 | }
20 | \examples{
21 | m1 <- pos_neg_marker(cbmc_m)
22 | }
23 | 


--------------------------------------------------------------------------------
/man/pos_neg_select.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{pos_neg_select}
 4 | \alias{pos_neg_select}
 5 | \title{adapt clustify to tweak score for pos and neg markers}
 6 | \usage{
 7 | pos_neg_select(
 8 |   input,
 9 |   ref_mat,
10 |   metadata,
11 |   cluster_col = "cluster",
12 |   cutoff_n = 0,
13 |   cutoff_score = 0.5
14 | )
15 | }
16 | \arguments{
17 | \item{input}{single-cell expression matrix}
18 | 
19 | \item{ref_mat}{reference expression matrix with positive and
20 | negative markers(set expression at 0)}
21 | 
22 | \item{metadata}{cell cluster assignments,
23 | supplied as a vector or data.frame. If
24 | data.frame is supplied then \code{cluster_col} needs to be set.
25 | Not required if running correlation per cell.}
26 | 
27 | \item{cluster_col}{column in metadata that contains cluster ids per cell.
28 | Will default to first
29 | column of metadata if not supplied.
30 | Not required if running correlation per cell.}
31 | 
32 | \item{cutoff_n}{expression cutoff where genes ranked below n are
33 | considered non-expressing}
34 | 
35 | \item{cutoff_score}{positive score lower than this cutoff will be
36 | considered as 0 to not influence scores}
37 | }
38 | \value{
39 | matrix of numeric values, clusters from input as row names,
40 | cell types from ref_mat as column names
41 | }
42 | \description{
43 | adapt clustify to tweak score for pos and neg markers
44 | }
45 | \examples{
46 | pn_ref <- data.frame(
47 |     "Myeloid" = c(1, 0.01, 0),
48 |     row.names = c("CD74", "clustifyr0", "CD79A")
49 | )
50 | 
51 | pos_neg_select(
52 |     input = pbmc_matrix_small,
53 |     ref_mat = pn_ref,
54 |     metadata = pbmc_meta,
55 |     cluster_col = "classified",
56 |     cutoff_score = 0.8
57 | )
58 | }
59 | 


--------------------------------------------------------------------------------
/man/pretty_palette.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot.R
 3 | \docType{data}
 4 | \name{pretty_palette}
 5 | \alias{pretty_palette}
 6 | \title{Color palette for plotting continous variables}
 7 | \format{
 8 | An object of class \code{character} of length 6.
 9 | }
10 | \usage{
11 | pretty_palette
12 | }
13 | \value{
14 | vector of colors
15 | }
16 | \description{
17 | Color palette for plotting continous variables
18 | }
19 | \keyword{datasets}
20 | 


--------------------------------------------------------------------------------
/man/pretty_palette2.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot.R
 3 | \docType{data}
 4 | \name{pretty_palette2}
 5 | \alias{pretty_palette2}
 6 | \title{Color palette for plotting continous variables, starting at gray}
 7 | \format{
 8 | An object of class \code{character} of length 9.
 9 | }
10 | \usage{
11 | pretty_palette2
12 | }
13 | \value{
14 | vector of colors
15 | }
16 | \description{
17 | Color palette for plotting continous variables, starting at gray
18 | }
19 | \keyword{datasets}
20 | 


--------------------------------------------------------------------------------
/man/pretty_palette_ramp_d.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot.R
 3 | \name{pretty_palette_ramp_d}
 4 | \alias{pretty_palette_ramp_d}
 5 | \title{Expanded color palette ramp for plotting discrete variables}
 6 | \usage{
 7 | pretty_palette_ramp_d(n)
 8 | }
 9 | \arguments{
10 | \item{n}{number of colors to use}
11 | }
12 | \value{
13 | color ramp
14 | }
15 | \description{
16 | Expanded color palette ramp for plotting discrete variables
17 | }
18 | 


--------------------------------------------------------------------------------
/man/query_rank_bias.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{query_rank_bias}
 4 | \alias{query_rank_bias}
 5 | \title{Query rank bias results}
 6 | \usage{
 7 | query_rank_bias(bias_list, id_mat, id_ref)
 8 | }
 9 | \arguments{
10 | \item{bias_list}{list of rank diff matrix between cluster and reference cell types}
11 | 
12 | \item{id_mat}{name of cluster from average cluster matrix}
13 | 
14 | \item{id_ref}{name of cell type in reference matrix}
15 | }
16 | \value{
17 | data.frame rank diff values
18 | }
19 | \description{
20 | Query rank bias results
21 | }
22 | \examples{
23 | avg <- average_clusters(
24 |     mat = pbmc_matrix_small,
25 |     metadata = pbmc_meta,
26 |     cluster_col = "classified",
27 |     if_log = FALSE
28 | )
29 | 
30 | rankdiff <- find_rank_bias(
31 |     avg,
32 |     cbmc_ref,
33 |     query_genes = pbmc_vargenes
34 | )
35 | 
36 | qres <- query_rank_bias(
37 |     rankdiff,
38 |     "CD14+ Mono",
39 |     "CD14+ Mono"
40 | )
41 | }
42 | 


--------------------------------------------------------------------------------
/man/ref_feature_select.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{ref_feature_select}
 4 | \alias{ref_feature_select}
 5 | \title{feature select from reference matrix}
 6 | \usage{
 7 | ref_feature_select(mat, n = 3000, mode = "var", rm.lowvar = TRUE)
 8 | }
 9 | \arguments{
10 | \item{mat}{reference matrix}
11 | 
12 | \item{n}{number of genes to return}
13 | 
14 | \item{mode}{the method of selecting features}
15 | 
16 | \item{rm.lowvar}{whether to remove lower variation genes first}
17 | }
18 | \value{
19 | vector of genes
20 | }
21 | \description{
22 | feature select from reference matrix
23 | }
24 | \examples{
25 | pbmc_avg <- average_clusters(
26 |     mat = pbmc_matrix_small,
27 |     metadata = pbmc_meta,
28 |     cluster_col = "classified"
29 | )
30 | 
31 | ref_feature_select(
32 |     mat = pbmc_avg[1:100, ],
33 |     n = 5
34 | )
35 | }
36 | 


--------------------------------------------------------------------------------
/man/ref_marker_select.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{ref_marker_select}
 4 | \alias{ref_marker_select}
 5 | \title{marker selection from reference matrix}
 6 | \usage{
 7 | ref_marker_select(mat, cut = 0.5, arrange = TRUE, compto = 1)
 8 | }
 9 | \arguments{
10 | \item{mat}{reference matrix}
11 | 
12 | \item{cut}{an expression minimum cutoff}
13 | 
14 | \item{arrange}{whether to arrange (lower means better)}
15 | 
16 | \item{compto}{compare max expression to the value of next 1 or more}
17 | }
18 | \value{
19 | dataframe, with gene, cluster, ratio columns
20 | }
21 | \description{
22 | marker selection from reference matrix
23 | }
24 | \examples{
25 | ref_marker_select(
26 |     cbmc_ref,
27 |     cut = 2
28 | )
29 | }
30 | 


--------------------------------------------------------------------------------
/man/reverse_marker_matrix.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{reverse_marker_matrix}
 4 | \alias{reverse_marker_matrix}
 5 | \title{generate negative markers from a list of exclusive positive markers}
 6 | \usage{
 7 | reverse_marker_matrix(mat)
 8 | }
 9 | \arguments{
10 | \item{mat}{matrix or dataframe of markers}
11 | }
12 | \value{
13 | matrix of gene names
14 | }
15 | \description{
16 | generate negative markers from a list of exclusive positive markers
17 | }
18 | \examples{
19 | reverse_marker_matrix(cbmc_m)
20 | }
21 | 


--------------------------------------------------------------------------------
/man/run_clustifyr_app.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/shiny.R
 3 | \name{run_clustifyr_app}
 4 | \alias{run_clustifyr_app}
 5 | \title{Launch Shiny app version of clustifyr,
 6 | may need to run install_clustifyr_app() at first time to install packages}
 7 | \usage{
 8 | run_clustifyr_app()
 9 | }
10 | \value{
11 | instance of shiny app
12 | }
13 | \description{
14 | Launch Shiny app version of clustifyr,
15 | may need to run install_clustifyr_app() at first time to install packages
16 | }
17 | \examples{
18 | \dontrun{
19 | run_clustifyr_app()
20 | }
21 | }
22 | 


--------------------------------------------------------------------------------
/man/run_gsea.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/run_fgsea.R
 3 | \name{run_gsea}
 4 | \alias{run_gsea}
 5 | \title{Run GSEA to compare a gene list(s) to per cell or
 6 | per cluster expression data}
 7 | \usage{
 8 | run_gsea(
 9 |   expr_mat,
10 |   query_genes,
11 |   cluster_ids = NULL,
12 |   n_perm = 1000,
13 |   per_cell = FALSE,
14 |   scale = FALSE,
15 |   no_warnings = TRUE
16 | )
17 | }
18 | \arguments{
19 | \item{expr_mat}{single-cell expression matrix or Seurat object}
20 | 
21 | \item{query_genes}{A vector or named list of vectors of genesets of interest
22 | to compare via GSEA. If supplying a named list, then the gene set names
23 | will appear in the output.}
24 | 
25 | \item{cluster_ids}{vector of cell cluster assignments, supplied as a
26 | vector with order that
27 | matches columns in \code{expr_mat}. Not required if running per cell.}
28 | 
29 | \item{n_perm}{Number of permutation for fgsea function. Defaults to 1000.}
30 | 
31 | \item{per_cell}{if true run per cell, otherwise per cluster.}
32 | 
33 | \item{scale}{convert expr_mat into zscores prior to running GSEA?,
34 | default = FALSE}
35 | 
36 | \item{no_warnings}{suppress warnings from gsea ties}
37 | }
38 | \value{
39 | dataframe of gsea scores (pval, NES), with clusters as rownames
40 | }
41 | \description{
42 | Use fgsea algorithm to compute normalized enrichment
43 | scores and pvalues for gene
44 | set ovelap
45 | }
46 | 


--------------------------------------------------------------------------------
/man/sce_pbmc.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/object_access.R
 3 | \name{sce_pbmc}
 4 | \alias{sce_pbmc}
 5 | \title{An example SingleCellExperiment object}
 6 | \usage{
 7 | sce_pbmc()
 8 | }
 9 | \value{
10 | a SingleCellExperiment object populated with data
11 | from the \link{pbmc_matrix_small} scRNA-seq dataset, additionally
12 | annotated with cluster assignments.
13 | }
14 | \description{
15 | An example SingleCellExperiment object
16 | }
17 | 


--------------------------------------------------------------------------------
/man/seurat_meta.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/object_access.R
 3 | \name{seurat_meta}
 4 | \alias{seurat_meta}
 5 | \alias{seurat_meta.Seurat}
 6 | \title{Function to convert labelled seurat object to fully prepared metadata}
 7 | \usage{
 8 | seurat_meta(seurat_object, ...)
 9 | 
10 | \method{seurat_meta}{Seurat}(seurat_object, dr = "umap", ...)
11 | }
12 | \arguments{
13 | \item{seurat_object}{seurat_object after tsne or
14 | umap projections and clustering}
15 | 
16 | \item{...}{additional arguments}
17 | 
18 | \item{dr}{dimension reduction method}
19 | }
20 | \value{
21 | dataframe of metadata, including dimension reduction plotting info
22 | }
23 | \description{
24 | Function to convert labelled seurat object to fully prepared metadata
25 | }
26 | \examples{
27 | so <- so_pbmc()
28 | m <- seurat_meta(so)
29 | }
30 | 


--------------------------------------------------------------------------------
/man/seurat_ref.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/object_access.R
 3 | \name{seurat_ref}
 4 | \alias{seurat_ref}
 5 | \alias{seurat_ref.Seurat}
 6 | \title{Function to convert labelled seurat object to avg expression matrix}
 7 | \usage{
 8 | seurat_ref(seurat_object, ...)
 9 | 
10 | \method{seurat_ref}{Seurat}(
11 |   seurat_object,
12 |   cluster_col = "classified",
13 |   var_genes_only = FALSE,
14 |   assay_name = NULL,
15 |   method = "mean",
16 |   subclusterpower = 0,
17 |   if_log = TRUE,
18 |   ...
19 | )
20 | }
21 | \arguments{
22 | \item{seurat_object}{seurat_object after tsne or umap projections
23 | and clustering}
24 | 
25 | \item{...}{additional arguments}
26 | 
27 | \item{cluster_col}{column name where classified cluster names
28 | are stored in  seurat meta data, cannot be "rn"}
29 | 
30 | \item{var_genes_only}{whether to keep only var_genes in the final
31 | matrix output, could also look up genes used for PCA}
32 | 
33 | \item{assay_name}{any additional assay data, such as ADT, to include.
34 | If more than 1, pass a vector of names}
35 | 
36 | \item{method}{whether to take mean (default) or median}
37 | 
38 | \item{subclusterpower}{whether to get multiple averages per
39 | original cluster}
40 | 
41 | \item{if_log}{input data is natural log,
42 | averaging will be done on unlogged data}
43 | }
44 | \value{
45 | reference expression matrix, with genes as row names,
46 | and cell types as column names
47 | }
48 | \description{
49 | Function to convert labelled seurat object to avg expression matrix
50 | }
51 | \examples{
52 | so <- so_pbmc()
53 | ref <- seurat_ref(so, cluster_col = "seurat_clusters")
54 | }
55 | 


--------------------------------------------------------------------------------
/man/so_pbmc.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/object_access.R
 3 | \name{so_pbmc}
 4 | \alias{so_pbmc}
 5 | \title{An example Seurat object}
 6 | \usage{
 7 | so_pbmc()
 8 | }
 9 | \value{
10 | a Seurat object populated with data
11 | from the \link{pbmc_matrix_small} scRNA-seq dataset, additionally
12 | annotated with cluster assignments.
13 | }
14 | \description{
15 | An example Seurat object
16 | }
17 | 


--------------------------------------------------------------------------------
/man/vector_similarity.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compute_similarity.R
 3 | \name{vector_similarity}
 4 | \alias{vector_similarity}
 5 | \title{Compute similarity between two vectors}
 6 | \usage{
 7 | vector_similarity(vec1, vec2, compute_method, ...)
 8 | }
 9 | \arguments{
10 | \item{vec1}{test vector}
11 | 
12 | \item{vec2}{reference vector}
13 | 
14 | \item{compute_method}{method to run i.e. corr_coef}
15 | 
16 | \item{...}{arguments to pass to compute_method function}
17 | }
18 | \value{
19 | numeric value of desired correlation or distance measurement
20 | }
21 | \description{
22 | Compute the similarity score between two vectors using a
23 | customized scoring function
24 | Two vectors may be from either scRNA-seq or bulk RNA-seq data.
25 | The lengths of vec1 and vec2 must match, and must be arranged in the
26 | same order of genes.
27 | Both vectors should be provided to this function after pre-processing,
28 | feature selection and dimension reduction.
29 | }
30 | 


--------------------------------------------------------------------------------
/man/write_meta.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/object_access.R
 3 | \name{write_meta}
 4 | \alias{write_meta}
 5 | \alias{write_meta.Seurat}
 6 | \alias{write_meta.SingleCellExperiment}
 7 | \title{Function to write metadata to object}
 8 | \usage{
 9 | write_meta(object, ...)
10 | 
11 | \method{write_meta}{Seurat}(object, meta, ...)
12 | 
13 | \method{write_meta}{SingleCellExperiment}(object, meta, ...)
14 | }
15 | \arguments{
16 | \item{object}{object after tsne or umap projections
17 | and clustering}
18 | 
19 | \item{...}{additional arguments}
20 | 
21 | \item{meta}{new metadata dataframe}
22 | }
23 | \value{
24 | object with newly inserted metadata columns
25 | }
26 | \description{
27 | Function to write metadata to object
28 | }
29 | \examples{
30 | so <- so_pbmc()
31 | obj <- write_meta(
32 |     object = so,
33 |     meta = seurat_meta(so)
34 | )
35 | sce <- sce_pbmc()
36 | obj <- write_meta(
37 |     object = sce,
38 |     meta = object_data(sce, "meta.data")
39 | )
40 | }
41 | 


--------------------------------------------------------------------------------
/pkgdown/_pkgdown.yml:
--------------------------------------------------------------------------------
 1 | url: http://rnabioco.github.io/clustifyr
 2 | 
 3 | development:
 4 |   mode: auto
 5 | 
 6 | template:
 7 |   package: rbitemplate
 8 |   bootstrap: 5
 9 |   
10 | home:
11 |   links:
12 |   - text: External data sets
13 |     href: http://github.com/rnabioco/clustifyrdata
14 | 
15 | reference:
16 | 
17 | - title: Main functions
18 |   desc:  Classify cell clusters by transcriptome profiles or gene lists, and building references
19 |   contents:
20 |   - starts_with("clustify")
21 |   - average_clusters
22 |   - matrixize_markers
23 |   - starts_with("cor_to_call")
24 |   - call_to_metadata
25 |   - call_consensus
26 | 
27 | - title: Object wrappers
28 |   desc:  Retrieving metadata and reference building from supported object types
29 |   contents:
30 |   - sce_pbmc
31 |   - so_pbmc
32 |   - seurat_meta
33 |   - seurat_ref
34 |   - object_ref
35 |   - object_loc_lookup
36 | 
37 | - title: Plotting
38 |   contents:
39 |   - starts_with("plot")
40 | 
41 | - title: Data sets
42 |   contents:
43 |   - has_concept("data")
44 |   
45 | - title: Shiny
46 |   contents:
47 |   - run_clustifyr_app
48 |   
49 | - title: Utilities
50 |   contents:
51 |   - run_gsea
52 |   - calculate_pathway_gsea
53 |   - starts_with("get")
54 |   - assess_rank_bias
55 |   - binarize_expr
56 |   - downsample_matrix
57 |   - feature_select_PCA
58 |   - file_marker_parse
59 |   - find_rank_bias
60 |   - gmt_to_list
61 |   - make_comb_ref
62 |   - marker_select
63 |   - overcluster_test
64 |   - parse_loc_object
65 |   - pos_neg_marker
66 |   - pos_neg_select
67 |   - query_rank_bias
68 |   - ref_feature_select
69 |   - ref_marker_select
70 |   - reverse_marker_matrix
71 |   - append_genes
72 |   - assign_ident
73 |   - build_atlas
74 |   - calc_similarity
75 |   - calc_distance
76 |   - check_raw_counts
77 |   - collapse_to_cluster
78 |   - compare_lists
79 |   - cosine
80 |   - gene_pct
81 |   - gene_pct_markerm
82 |   - insert_meta_object
83 |   - kl_divergence
84 |   - not_pretty_palette
85 |   - object_data
86 |   - overcluster
87 |   - percent_clusters
88 |   - permute_similarity
89 |   - pretty_palette
90 |   - pretty_palette2
91 |   - pretty_palette_ramp_d
92 |   - vector_similarity
93 |   - write_meta
94 | 
95 | 
96 | 


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-120x120.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rnabioco/clustifyr/d3521c26008fff720aac6b0135218e66fed1bf84/pkgdown/favicon/apple-touch-icon-120x120.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-152x152.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rnabioco/clustifyr/d3521c26008fff720aac6b0135218e66fed1bf84/pkgdown/favicon/apple-touch-icon-152x152.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-180x180.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rnabioco/clustifyr/d3521c26008fff720aac6b0135218e66fed1bf84/pkgdown/favicon/apple-touch-icon-180x180.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-60x60.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rnabioco/clustifyr/d3521c26008fff720aac6b0135218e66fed1bf84/pkgdown/favicon/apple-touch-icon-60x60.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-76x76.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rnabioco/clustifyr/d3521c26008fff720aac6b0135218e66fed1bf84/pkgdown/favicon/apple-touch-icon-76x76.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rnabioco/clustifyr/d3521c26008fff720aac6b0135218e66fed1bf84/pkgdown/favicon/apple-touch-icon.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rnabioco/clustifyr/d3521c26008fff720aac6b0135218e66fed1bf84/pkgdown/favicon/favicon-16x16.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rnabioco/clustifyr/d3521c26008fff720aac6b0135218e66fed1bf84/pkgdown/favicon/favicon-32x32.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rnabioco/clustifyr/d3521c26008fff720aac6b0135218e66fed1bf84/pkgdown/favicon/favicon.ico


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(clustifyr)
3 | 
4 | test_check("clustifyr")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/.gitignore:
--------------------------------------------------------------------------------
1 | *.pdf
2 | 


--------------------------------------------------------------------------------
/tests/testthat/test_gsea.R:
--------------------------------------------------------------------------------
  1 | context("run_gsea")
  2 | 
  3 | # use capture.output to quiet progress bar from fgsea
  4 | shush <- function(...) {
  5 |   capture.output(..., file = nullfile())
  6 | }
  7 | 
  8 | test_that("output is correctly formatted", {
  9 |   data("pbmc_vargenes")
 10 | 
 11 |   shush(
 12 |     res <- run_gsea(
 13 |       pbmc_matrix_small,
 14 |       query_genes = pbmc_vargenes[1:100],
 15 |       n_perm = 10,
 16 |       cluster_ids = pbmc_meta$classified,
 17 |       no_warnings = TRUE
 18 |     )
 19 |   )
 20 | 
 21 |   expect_equal(nrow(res), length(unique(pbmc_meta$classified)))
 22 |   expect_true(all(res$pval >= 0 & res$pval <= 1))
 23 | })
 24 | 
 25 | test_that("run_gsea checks for matching number of clusters", {
 26 |   data("pbmc_vargenes")
 27 |   expect_error(
 28 |     res <- run_gsea(
 29 |       pbmc_matrix_small,
 30 |       query_genes = pbmc_vargenes[1:100],
 31 |       n_perm = 10,
 32 |       cluster_ids = pbmc_meta$classified[1:3],
 33 |       no_warnings = TRUE
 34 |     )
 35 |   )
 36 | })
 37 | 
 38 | test_that("run_gsea warns slow runs", {
 39 |   data("pbmc_vargenes")
 40 | 
 41 |   expect_warning(
 42 |     shush(
 43 |       res <- run_gsea(
 44 |         pbmc_matrix_small[, 1:3],
 45 |         query_genes = pbmc_vargenes[1:2],
 46 |         n_perm = 10001,
 47 |         per_cell = TRUE,
 48 |         cluster_ids = pbmc_meta$classified,
 49 |         no_warnings = TRUE
 50 |       )
 51 |     )
 52 |   )
 53 | })
 54 | 
 55 | test_that("run_gsea warning suppression", {
 56 |   data("pbmc_vargenes")
 57 |   expect_warning(
 58 |     shush(
 59 |       res <- run_gsea(
 60 |         pbmc_matrix_small[, 1:3],
 61 |         query_genes = pbmc_vargenes[1:2],
 62 |         n_perm = 1,
 63 |         per_cell = TRUE,
 64 |         cluster_ids = pbmc_meta$classified,
 65 |         no_warnings = FALSE
 66 |       )
 67 |     )
 68 |   )
 69 | })
 70 | 
 71 | test_that("calculate_pathway_gsea gives appropriate output", {
 72 |   gl <- list(
 73 |     "n" = c("PPBP", "LYZ", "S100A9"),
 74 |     "a" = c("IGLL5", "GNLY", "FTL")
 75 |   )
 76 |   pbmc_avg <- average_clusters(
 77 |     pbmc_matrix_small,
 78 |     pbmc_meta,
 79 |     cluster_col = "classified"
 80 |   )
 81 |   shush(res <- calculate_pathway_gsea(pbmc_avg, gl, scale = TRUE))
 82 | 
 83 |   expect_equal(nrow(res), length(unique(pbmc_meta$classified)))
 84 | })
 85 | 
 86 | test_that("plot_pathway_gsea gives appropriate output", {
 87 |   gl <- list(
 88 |     "n" = c("PPBP", "LYZ", "S100A9"),
 89 |     "a" = c("IGLL5", "GNLY", "FTL")
 90 |   )
 91 |   pbmc_avg <- average_clusters(
 92 |     pbmc_matrix_small,
 93 |     pbmc_meta,
 94 |     cluster_col = "classified"
 95 |   )
 96 |   shush(g <- plot_pathway_gsea(pbmc_avg, gl, 5))
 97 |   expect_equal(length(g), 2)
 98 | })
 99 | 
100 | test_that("plot_pathway_gsea gives output depending on returning option", {
101 |   gl <- list(
102 |     "n" = c("PPBP", "LYZ", "S100A9"),
103 |     "a" = c("IGLL5", "GNLY", "FTL")
104 |   )
105 |   pbmc_avg <- average_clusters(
106 |     pbmc_matrix_small,
107 |     pbmc_meta,
108 |     cluster_col = "classified"
109 |   )
110 |   shush(g <- plot_pathway_gsea(pbmc_avg, gl, 5, returning = "plot"))
111 |   shush(g2 <- plot_pathway_gsea(pbmc_avg, gl, 5, returning = "res"))
112 | 
113 |   expect_true(is(g, "Heatmap") & is.data.frame(g2))
114 | })
115 | 


--------------------------------------------------------------------------------
/tests/testthat/test_list.R:
--------------------------------------------------------------------------------
  1 | context("compare_list")
  2 | # use capture.output to quiet progress bar from fgsea
  3 | shush <- function(...) {
  4 |   capture.output(..., file = nullfile())
  5 | }
  6 | 
  7 | test_that("warning if matrix is not binarized", {
  8 |   pbmc_mm <- matrixize_markers(pbmc_markers)
  9 |   pbmc_avg <- average_clusters(
 10 |     pbmc_matrix_small,
 11 |     pbmc_meta,
 12 |     cluster_col = "classified"
 13 |   )
 14 |   pbmc_avgb <- binarize_expr(pbmc_avg)
 15 |   gene_list_methods <- c("hyper")
 16 |   expect_warning(shush(
 17 |     results <- lapply(
 18 |       gene_list_methods,
 19 |       function(x) {
 20 |         compare_lists(pbmc_avg, pbmc_mm, metric = x)
 21 |       }
 22 |     )
 23 |   ))
 24 | })
 25 | 
 26 | 
 27 | test_that("run all gene list functions", {
 28 |   pbmc_mm <- matrixize_markers(pbmc_markers)
 29 |   pbmc_avg <- average_clusters(
 30 |     pbmc_matrix_small,
 31 |     pbmc_meta,
 32 |     cluster_col = "classified"
 33 |   )
 34 |   pbmc_avgb <- binarize_expr(pbmc_avg)
 35 |   gene_list_methods <- c("spearman", "hyper", "jaccard", "gsea")
 36 |   shush(
 37 |     results <- lapply(
 38 |       gene_list_methods,
 39 |       function(x) {
 40 |         compare_lists(pbmc_avgb, pbmc_mm, metric = x)
 41 |       }
 42 |     )
 43 |   )
 44 | 
 45 |   expect_equal(4, length(results))
 46 | })
 47 | 
 48 | test_that("output intersected genes with details_out option with hyper/jaccard", {
 49 |   pbmc_mm <- matrixize_markers(pbmc_markers)
 50 |   pbmc_avg <- average_clusters(
 51 |     pbmc_matrix_small,
 52 |     pbmc_meta,
 53 |     cluster_col = "classified"
 54 |   )
 55 |   pbmc_avgb <- binarize_expr(pbmc_avg)
 56 |   gene_list_methods <- c("hyper", "jaccard")
 57 |   shush(
 58 |     results <- lapply(
 59 |       gene_list_methods,
 60 |       function(x) {
 61 |         compare_lists(pbmc_avgb, pbmc_mm, metric = x, details_out = TRUE)
 62 |       }
 63 |     )
 64 |   )
 65 | 
 66 |   expect_equal(2, length(results))
 67 | })
 68 | 
 69 | test_that("gene list function options", {
 70 |   pbmc_mm <- matrixize_markers(pbmc_markers)
 71 |   pbmc_avg <- average_clusters(
 72 |     pbmc_matrix_small,
 73 |     pbmc_meta,
 74 |     cluster_col = "classified"
 75 |   )
 76 |   pbmc_avgb <- binarize_expr(pbmc_avg)
 77 |   expect_error(suppressWarnings(
 78 |     res <- compare_lists(
 79 |       pbmc_avgb,
 80 |       pbmc_mm,
 81 |       metric = "hyper",
 82 |       output_high = FALSE,
 83 |       n = 5
 84 |     )
 85 |   ))
 86 | })
 87 | 
 88 | test_that("run all gene list functions in clustify_lists", {
 89 |   gene_list_methods <- c("spearman", "hyper", "jaccard", "gsea")
 90 |   shush(
 91 |     results <- lapply(
 92 |       gene_list_methods,
 93 |       function(x) {
 94 |         clustify_lists(
 95 |           pbmc_matrix_small,
 96 |           per_cell = FALSE,
 97 |           metadata = pbmc_meta,
 98 |           cluster_col = "classified",
 99 |           marker = pbmc_markers,
100 |           marker_inmatrix = FALSE,
101 |           metric = x
102 |         )
103 |       }
104 |     )
105 |   )
106 | 
107 |   expect_equal(4, length(results))
108 | })
109 | 
110 | test_that("gsea outputs in cor matrix format", {
111 |   shush(
112 |     res <- clustify_lists(
113 |       pbmc_matrix_small,
114 |       per_cell = FALSE,
115 |       metadata = pbmc_meta,
116 |       cluster_col = "classified",
117 |       marker = pbmc_markers,
118 |       marker_inmatrix = FALSE,
119 |       metric = "gsea"
120 |     )
121 |   )
122 | 
123 |   res2 <- cor_to_call(res)
124 | 
125 |   expect_equal(9, nrow(res2))
126 | })
127 | 
128 | so <- so_pbmc()
129 | test_that("seurat3 object clustify_lists-ing", {
130 |   res <- clustify_lists(
131 |     so,
132 |     per_cell = FALSE,
133 |     marker = pbmc_markers,
134 |     marker_inmatrix = FALSE,
135 |     metric = "jaccard",
136 |     cluster_col = "seurat_clusters",
137 |     seurat_out = TRUE,
138 |     dr = "tsne"
139 |   )
140 |   res <- clustify_lists(
141 |     so,
142 |     per_cell = FALSE,
143 |     marker = pbmc_markers,
144 |     marker_inmatrix = FALSE,
145 |     metric = "jaccard",
146 |     cluster_col = "seurat_clusters",
147 |     seurat_out = FALSE,
148 |     dr = "tsne"
149 |   )
150 |   g <- plot_best_call(
151 |     res,
152 |     seurat_meta(so, dr = "tsne"),
153 |     cluster_col = "seurat_clusters",
154 |     plot_r = TRUE,
155 |     x = "tSNE_1",
156 |     y = "tSNE_2"
157 |   )
158 |   expect_true(ggplot2::is.ggplot(g[[1]]))
159 | })
160 | 
161 | test_that("clustify_lists inserts seurat3 metadata correctly", {
162 |   res <- clustify_lists(
163 |     so,
164 |     per_cell = FALSE,
165 |     marker = pbmc_markers,
166 |     marker_inmatrix = FALSE,
167 |     metric = "jaccard",
168 |     cluster_col = "seurat_clusters",
169 |     seurat_out = TRUE,
170 |     dr = "tsne"
171 |   )
172 |   res2 <- clustify_lists(
173 |     so,
174 |     per_cell = TRUE,
175 |     marker = pbmc_markers,
176 |     marker_inmatrix = FALSE,
177 |     metric = "jaccard",
178 |     cluster_col = "seurat_clusters",
179 |     seurat_out = TRUE,
180 |     dr = "tsne"
181 |   )
182 |   if ("SeuratObject" %in% loadedNamespaces()) {
183 |     expect_true(class(res) %in% c("Seurat"))
184 |   } else {
185 |     expect_true(is.matrix(res))
186 |   }
187 | })
188 | 
189 | test_that("run all gene list functions and then use consensus_call", {
190 |   pbmc_mm <- matrixize_markers(pbmc_markers)
191 |   pbmc_avg <- average_clusters(
192 |     pbmc_matrix_small,
193 |     pbmc_meta,
194 |     cluster_col = "classified"
195 |   )
196 |   pbmc_avgb <- binarize_expr(pbmc_avg)
197 |   gene_list_methods <- c("spearman", "hyper", "jaccard", "gsea")
198 |   shush(
199 |     results <- lapply(
200 |       gene_list_methods,
201 |       function(x) {
202 |         compare_lists(pbmc_avgb, pbmc_mm, metric = x)
203 |       }
204 |     )
205 |   )
206 |   call_list <- lapply(
207 |     results,
208 |     cor_to_call_rank
209 |   )
210 |   calls <- call_consensus(call_list)
211 |   expect_equal(4, length(results))
212 | })
213 | 
214 | test_that("run all gene list functions in clustify_lists", {
215 |   res <- clustify_lists(
216 |     pbmc_matrix_small,
217 |     cbmc_m,
218 |     metadata = pbmc_meta,
219 |     cluster_col = "classified",
220 |     metric = "consensus"
221 |   )
222 |   expect_equal(9, nrow(res))
223 | })
224 | 
225 | test_that("run all gene list functions in clustify_lists and seurat object", {
226 |   res <- clustify_lists(
227 |     so,
228 |     marker = cbmc_m,
229 |     dr = "tsne",
230 |     cluster_col = "seurat_clusters",
231 |     metric = "consensus",
232 |     seurat_out = TRUE
233 |   )
234 |   expect_true(is.data.frame(res) | "Seurat" %in% class(res))
235 | })
236 | 
237 | test_that("lists of genes will work with posneg", {
238 |   lst_of_markers <- split(pbmc_markers$gene, pbmc_markers$cluster)
239 |   res <- clustify_lists(
240 |     input = pbmc_matrix_small,
241 |     per_cell = FALSE,
242 |     cluster_col = "classified",
243 |     metadata = pbmc_meta,
244 |     marker = lst_of_markers,
245 |     marker_inmatrix = TRUE,
246 |     metric = "posneg",
247 |     seurat_out = FALSE
248 |   )
249 |   expect_true(ncol(res) == length(lst_of_markers))
250 | })
251 | 
252 | test_that("clustify_lists input_markers mode", {
253 |   pbmc_mm <- matrixize_markers(pbmc_markers)
254 |   pbmc_input_mm <- pos_neg_marker(pbmc_mm[1:3, ])
255 |   results <- lapply(
256 |     c("hyper", "spearman"),
257 |     function(x) {
258 |       clustify_lists(pbmc_input_mm, pbmc_mm, metric = x, input_markers = TRUE)
259 |     }
260 |   )
261 |   expect_equal(2, length(results))
262 | })
263 | 
264 | test_that("clustify_lists input_markers mode with uneven number of marker per cluster", {
265 |   pbmc_mm <- matrixize_markers(pbmc_markers)
266 |   pbmc_input_mm <- pos_neg_marker(pbmc_mm[1:3, ])
267 |   results <- lapply(
268 |     c("jaccard"),
269 |     function(x) {
270 |       clustify_lists(
271 |         pbmc_input_mm,
272 |         split(pbmc_markers$gene, pbmc_markers$cluster),
273 |         metric = x,
274 |         input_markers = TRUE
275 |       )
276 |     }
277 |   )
278 |   expect_equal(1, length(results))
279 | })
280 | 


--------------------------------------------------------------------------------
/tests/testthat/test_plots.R:
--------------------------------------------------------------------------------
  1 | context("plotting")
  2 | 
  3 | res <- clustify(
  4 |   input = pbmc_matrix_small,
  5 |   metadata = pbmc_meta,
  6 |   ref_mat = cbmc_ref,
  7 |   query_genes = pbmc_vargenes,
  8 |   cluster_col = "classified"
  9 | )
 10 | 
 11 | res2 <- clustify(
 12 |   input = pbmc_matrix_small,
 13 |   metadata = pbmc_meta,
 14 |   ref_mat = cbmc_ref,
 15 |   query_genes = pbmc_vargenes,
 16 |   cluster_col = "classified",
 17 |   per_cell = TRUE
 18 | )
 19 | 
 20 | test_that("plots can be generated", {
 21 |   plts <- plot_best_call(res, pbmc_meta, cluster_col = "classified")
 22 |   plts2 <- plot_dims(pbmc_meta)
 23 |   expect_true(ggplot2::is.ggplot(plts))
 24 | })
 25 | 
 26 | test_that("plot_best_call warns about colnames", {
 27 |   pbmc_meta2 <- pbmc_meta
 28 |   pbmc_meta2$type <- 1
 29 |   expect_warning(plts <- plot_best_call(res, pbmc_meta2))
 30 | })
 31 | 
 32 | test_that("call plots can be generated", {
 33 |   plts <- plot_cor(
 34 |     res,
 35 |     pbmc_meta,
 36 |     cluster_col = "classified"
 37 |   )
 38 | 
 39 |   expect_error(
 40 |     plts <- plot_cor(
 41 |       res,
 42 |       pbmc_meta,
 43 |       data_to_plot = "nonsense",
 44 |       cluster_col = "classified"
 45 |     )
 46 |   )
 47 | 
 48 |   expect_true(is.list(plts))
 49 |   expect_true(ggplot2::is.ggplot(plts[[1]]))
 50 | })
 51 | 
 52 | test_that("plot_cor for all clusters by default", {
 53 |   plts <- plot_cor(
 54 |     res,
 55 |     pbmc_meta,
 56 |     cluster_col = "classified",
 57 |     x = "UMAP_1",
 58 |     y = "UMAP_2"
 59 |   )
 60 | 
 61 |   plts2 <- plot_cor(
 62 |     res2,
 63 |     pbmc_meta %>% tibble::rownames_to_column("rn"),
 64 |     cluster_col = "rn",
 65 |     x = "UMAP_1",
 66 |     y = "UMAP_2"
 67 |   )
 68 | 
 69 |   expect_true(length(plts) == ncol(cbmc_ref))
 70 | })
 71 | 
 72 | test_that("plot_cor works with scale_legends option", {
 73 |   plts <- plot_cor(
 74 |     res,
 75 |     pbmc_meta,
 76 |     cluster_col = "classified",
 77 |     scale_legends = TRUE
 78 |   )
 79 | 
 80 |   plts2 <- plot_cor(
 81 |     res,
 82 |     pbmc_meta,
 83 |     cluster_col = "classified",
 84 |     scale_legends = c(0, 1)
 85 |   )
 86 |   expect_true(length(plts) == ncol(cbmc_ref))
 87 | })
 88 | 
 89 | test_that("plot_gene can handle strange and normal genenames", {
 90 |   genes <- c(
 91 |     "RP11-314N13.3",
 92 |     "ARF4"
 93 |   )
 94 |   plts <- plot_gene(
 95 |     pbmc_matrix_small,
 96 |     pbmc_meta %>% tibble::rownames_to_column("rn"),
 97 |     genes = genes,
 98 |     cell_col = "rn"
 99 |   )
100 | 
101 |   expect_true(is.list(plts))
102 |   expect_true(all(vapply(plts, ggplot2::is.ggplot, FUN.VALUE = logical(1))))
103 | })
104 | 
105 | test_that("plot_gene automatically plots all cells", {
106 |   genes <- c("ZYX")
107 |   expect_error(
108 |     plts <- plot_gene(
109 |       pbmc_matrix_small,
110 |       tibble::column_to_rownames(pbmc_meta, "rn"),
111 |       genes = genes,
112 |       cell_col = "nonsense"
113 |     )
114 |   )
115 | 
116 |   plts <- plot_gene(pbmc_matrix_small, pbmc_meta, genes = genes)
117 | 
118 |   expect_true(all(vapply(plts, ggplot2::is.ggplot, FUN.VALUE = logical(1))))
119 | })
120 | 
121 | test_that("plot_best_call threshold works as intended, on per cell and collapsing", {
122 |   res <- clustify(
123 |     input = pbmc_matrix_small,
124 |     metadata = pbmc_meta,
125 |     ref_mat = cbmc_ref,
126 |     query_genes = pbmc_vargenes,
127 |     cluster_col = "classified",
128 |     per_cell = TRUE
129 |   )
130 |   call1 <- plot_best_call(
131 |     res,
132 |     metadata = pbmc_meta,
133 |     per_cell = TRUE,
134 |     collapse_to_cluster = "classified",
135 |     threshold = 0.3
136 |   )
137 | 
138 |   expect_true(ggplot2::is.ggplot(call1))
139 | })
140 | 
141 | test_that("plot_gene checks for presence of gene name", {
142 |   expect_message(plot_gene(
143 |     pbmc_matrix_small,
144 |     pbmc_meta %>% tibble::rownames_to_column("rn"),
145 |     c("INIP", "ZFP36L3"),
146 |     cell_col = "rn",
147 |     do_label = TRUE,
148 |     do_legend = FALSE,
149 |     x = "UMAP_1",
150 |     y = "UMAP_2"
151 |   ))
152 |   expect_error(expect_warning(plot_gene(
153 |     pbmc_matrix_small,
154 |     pbmc_meta %>% tibble::rownames_to_column("rn"),
155 |     c("ZFP36L3"),
156 |     cell_col = "rn",
157 |     x = "UMAP_1",
158 |     y = "UMAP_2"
159 |   )))
160 | })
161 | 
162 | test_that("plot_cor_heatmap returns a ggplot object", {
163 |   res <- clustify(
164 |     input = pbmc_matrix_small,
165 |     metadata = pbmc_meta,
166 |     ref_mat = cbmc_ref,
167 |     query_genes = pbmc_vargenes,
168 |     cluster_col = "classified",
169 |     per_cell = FALSE
170 |   )
171 |   g <- plot_cor_heatmap(res)
172 |   expect_true(is(g, "Heatmap"))
173 | })
174 | 
175 | test_that("plot_call works on defaults", {
176 |   g <- plot_call(res, pbmc_meta, cluster_col = "classified")
177 | 
178 |   expect_true(ggplot2::is.ggplot(g[[1]]))
179 | })
180 | 
181 | test_that("plot_dims works with alpha_col", {
182 |   pbmc_meta2 <- pbmc_meta
183 |   pbmc_meta2$al <- 0
184 |   pbmc_meta2$al[1] <- 1
185 |   g <- plot_dims(
186 |     pbmc_meta2,
187 |     feature = "classified",
188 |     alpha_col = "al",
189 |     do_legend = FALSE
190 |   )
191 |   g2 <- plot_dims(
192 |     pbmc_meta2,
193 |     feature = "classified",
194 |     alpha_col = "al",
195 |     do_legend = FALSE,
196 |     do_repel = TRUE,
197 |     do_label = TRUE
198 |   )
199 |   expect_true(ggplot2::is.ggplot(g))
200 | })
201 | 
202 | test_that("plot_dims works with group_col", {
203 |   pbmc_meta2 <- pbmc_meta
204 |   pbmc_meta2$al <- 1
205 |   pbmc_meta2$al[1:1500] <- 0
206 |   pbmc_meta2$b <- pbmc_meta2$classified
207 |   g <- plot_dims(
208 |     pbmc_meta2,
209 |     feature = "classified",
210 |     group_col = "b",
211 |     do_legend = FALSE,
212 |     do_repel = TRUE,
213 |     do_label = FALSE
214 |   )
215 | 
216 |   g2 <- plot_dims(
217 |     pbmc_meta2,
218 |     feature = "classified",
219 |     alpha_col = "al",
220 |     group_col = "b",
221 |     do_legend = FALSE,
222 |     do_repel = TRUE,
223 |     do_label = TRUE
224 |   )
225 |   expect_true(ggplot2::is.ggplot(g2))
226 | })
227 | 


--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 | 


--------------------------------------------------------------------------------
/vignettes/geo-annotations.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Improving NCBI GEO submissions of scRNA-seq data"
 3 | output: rmarkdown::html_vignette
 4 | vignette: >
 5 |   %\VignetteIndexEntry{geo-annotations}
 6 |   %\VignetteEngine{knitr::rmarkdown}
 7 |   %\VignetteEncoding{UTF-8}
 8 | ---
 9 | 
10 | ```{r, include = FALSE}
11 | knitr::opts_chunk$set(
12 |     collapse = TRUE,
13 |     comment = "#>"
14 | )
15 | ```
16 | 
17 | ## Code and full report
18 | https://github.com/rnabioco/someta
19 | 
20 | ## The problem
21 | 
22 | To improve reproducibility and usability, we suggest the Gene Expression Omnibus (GEO) improve its submission guidelines for processed data stemming from single-cell RNA sequencing. 
23 | 
24 | Current GEO guidelines require that a count matrix is included with a [submission](https://www.ncbi.nlm.nih.gov/geo/info/seq.html). But because the current GEO requirements surrounding supplemental files are vague, many studies do not include downstream analysis results (e.g., cluster assignments, UMAP/t-SNE projection coordinates, and cell type classification metadata), which vary due to algorithm parameter settings and non-deterministic steps in the processing pipeline. 
25 | 
26 | For single-cell RNA-seq, cell type assignments are not labeled by sample names, as would be the case in bulk RNA-seq data. Hence, a count matrix is simply an intermediate file format, similar to other intermediate formats for bulk RNA-seq experiments (BAM/SAM/BED), which also do not enable reproduction of conclusions. This is counter to GEO’s own stance on processed data, “defined as the data on which the conclusions in the related manuscript are based”. The lack of a clear GEO guideline leads to reproducibility issues, as studies that only provide count matrices require significant domain-specific expertise to associate per-cell mRNA counts with a cell type or phenotype. 
27 | 
28 | We suggest that in addition to count matrices, a metadata table containing relevant information (e.g., sample, clustering, cell-type, pseudo-time information) should be a required component of single-cell RNA-seq submissions. These metadata tables are already generated by commonly used single-cell RNA-seq analysis workflows (Seurat, SingleCellExperiment, scanpy) and could easily be included with other submission components.
29 | 
30 | For example, we compare the usability and reproducibility of two single-cell mRNA-seq submissions in GEO:
31 | 
32 | GEO accession GSE137710 contains a metadata file for each sample (e.g., “GSE137710_human_melanoma_cell_metadata_9315x14.tsv.gz”), enabling rapid downstream analyses to find expression patterns and markers for newly described cell types.
33 | 
34 | In contrast, GEO accession GSE124494 only includes count matrix information, requiring a non-expert to recreate (i.e., guess) the published associations between cell-type and gene expression. This is also a particularly challenging example as the associated publication used now-outdated versions of Seurat and its integration algorithm, requiring even more work to reproduce. This metadata file was certainly created and could have easily been attached to the submission.
35 | 
36 | ## Our proposal
37 | 
38 | Moving forward, we suggest two remedies to this issue:
39 | 
40 | 1. Updating the GEO submission guidelines to require metadata file submissions. This would be done on https://www.ncbi.nlm.nih.gov/geo/info/seq.html by updating the “Processed data files” with a section specifically outlining required data types for single-cell mRNA-seq submissions.
41 | 
42 | 2. For single-cell mRNA-seq data, in addition to standard count matrices (genes-by-cells), we expect users to deposit metadata annotations generated during the course of analysis.
43 | 
44 | Encouraging previous depositors of single-cell mRNA-seq data to update their GEO records with metadata, if it was not included in the original submission.
45 | 
46 | # Session info
47 | 
48 | ```{r}
49 | sessionInfo()
50 | ```
51 | 


--------------------------------------------------------------------------------