├── .github ├── .gitignore ├── .DS_Store ├── ISSUE_TEMPLATE │ ├── feature_request.md │ └── bug_report.md └── workflows │ └── rworkflows.yml ├── inst ├── .DS_Store ├── hex │ ├── hex.png │ ├── 2BIgqrmzc0GSaaAGOqSeAu.jpg │ └── hexSticker.Rmd ├── extdata │ ├── tt_results.rda │ └── bootstrap_results.rda ├── CITATION └── cit │ └── EWCE.bib ├── tests ├── testthat.R └── testthat │ ├── test-list_species.R │ ├── test-check_percent_hits.R │ ├── test-filter_ctd_genes.R │ ├── test-bin_columns_into_quantiles.r │ ├── test-ewce_plot.r │ ├── test-generate_celltype_data.r │ ├── test-run_DGE.R │ ├── test-DelayedArray.R │ └── test-get_celltype_table.r ├── R ├── cells_in_ctd.R ├── check_mtc_method.R ├── message_parallel.R ├── max_ctd_depth.R ├── is_matrix.R ├── is_32bit.R ├── messager.R ├── is_celltypedataset.R ├── is_delayed_array.R ├── myScalesComma.R ├── rNorm.R ├── check_numeric.R ├── check_nas.R ├── read_ctd.R ├── package.R ├── load_rdata.R ├── is_sparse_matrix.R ├── list_species.R ├── dt_to_df.R ├── check_group_name.R ├── is_ctd_standardised.R ├── check_annotLevels.R ├── report_dge.R ├── convert_new_ewce_to_old.r ├── get_ctd_levels.R ├── check_ewce_expression_data_args.R ├── to_delayed_array.R ├── drop_nonexpressed_genes.R ├── cell_list_dist.r ├── filter_genes_without_1to1_homolog.r ├── run_limma.r ├── get_sig_results.R ├── create_quadrants.R ├── theme_graph.R ├── to_dataframe.R ├── to_sparse_matrix.R ├── calculate_specificity_for_level.R ├── get_ctd_matrix_names.R ├── drop_nonexpressed_cells.R ├── sct_normalize.R ├── create_list_network.R ├── fix_celltype_names.R ├── fix_celltype_names_full_results.R ├── report_results.R ├── check_full_results.R ├── filter_ctd_genes.R ├── sce_merged_apply.R ├── delayedarray_normalize.R ├── sce_lists_apply.R ├── get_summed_proportions_iterate.R ├── check_species.R ├── prepare_tt.R ├── assign_cores.r ├── run_deseq2.R ├── bin_specificity_into_quantiles.r ├── calculate_meanexp_for_level.R ├── check_generate_controlled_bootstrapped_geneset.R ├── compute_gene_counts.R ├── check_bootstrap_args.R ├── convert_old_ewce_to_new.r ├── check_sce.R ├── plot_with_bootstrap_distributions.R ├── prep_dendro.r ├── bin_columns_into_quantiles.r ├── check_args_for_bootstrap_plot_generation.R ├── run_mast.r ├── filter_variance_quantiles.r ├── get_exp_data_for_bootstrapped_genes.R ├── plot_ctd.R ├── check_controlled_args.R ├── example_bootstrap_results.R ├── ctd_to_sce.R ├── example_transcriptome_results.R └── check_percent_hits.R ├── man ├── is_32bit.Rd ├── check_nas.Rd ├── is_matrix.Rd ├── theme_graph.Rd ├── sce_merged_apply.Rd ├── dt_to_df.Rd ├── message_parallel.Rd ├── messager.Rd ├── check_sce.Rd ├── check_numeric.Rd ├── is_celltypedataset.Rd ├── is_delayed_array.Rd ├── is_sparse_matrix.Rd ├── create_list_network.Rd ├── max_ctd_depth.Rd ├── myScalesComma.Rd ├── report_results.Rd ├── to_dataframe.Rd ├── prep.dendro.Rd ├── sce_lists_apply.Rd ├── calculate_meanexp_for_level.Rd ├── check_group_name.Rd ├── list_species.Rd ├── is_ctd_standardised.Rd ├── drop_nonexpressed_genes.Rd ├── assign_cores.Rd ├── prep_dendro.Rd ├── to_sparse_matrix.Rd ├── compute_gene_counts.Rd ├── load_rdata.Rd ├── check_annotLevels.Rd ├── check_full_results.Rd ├── convert_new_ewce_to_old.Rd ├── to_delayed_array.Rd ├── drop_nonexpressed_cells.Rd ├── get_ctd_levels.Rd ├── plot_with_bootstrap_distributions.Rd ├── plot_log_bootstrap_distributions.Rd ├── bootstrap_plots_for_transcriptome.Rd ├── report_dge.Rd ├── sct_normalize.Rd ├── cell_list_dist.Rd ├── filter_ctd_genes.Rd ├── fix_celltype_names_full_results.Rd ├── get_ctd_matrix_names.Rd ├── calculate_specificity_for_level.Rd ├── ctd_to_sce.Rd ├── get_sig_results.Rd ├── convert_old_ewce_to_new.Rd ├── delayedarray_normalize.Rd ├── plot_ctd.Rd ├── filter_variance_quantiles.Rd ├── get_celltype_table.Rd ├── check_ewce_expression_data_args.Rd ├── fix_celltype_names.Rd ├── merge_sce_list.Rd ├── check_generate_controlled_bootstrap_geneset.Rd ├── get_exp_data_for_bootstrapped_genes.Rd ├── check_bootstrap_args.Rd ├── check_percent_hits.Rd ├── check_species.Rd ├── bin_specificity_into_quantiles.Rd ├── bin_columns_into_quantiles.Rd ├── compute_gene_scores.Rd ├── run_limma.Rd ├── bootstrap_plot.Rd ├── run_mast.Rd ├── generate_controlled_bootstrap_geneset.Rd ├── filter_genes_without_1to1_homolog.Rd ├── prepare_tt.Rd ├── check_controlled_args.Rd ├── example_bootstrap_results.Rd ├── run_deseq2.Rd ├── example_transcriptome_results.Rd ├── add_res_to_merging_list.Rd ├── EWCE-package.Rd ├── check_args_for_bootstrap_plot_generation.Rd ├── create_background_multilist.Rd ├── fix_bad_hgnc_symbols.Rd ├── ewce_plot.Rd ├── merged_ewce.Rd ├── merge_sce.Rd ├── merge_two_expfiles.Rd ├── prepare_genesize_control_network.Rd ├── fix_bad_mgi_symbols.Rd └── merge_ctd.Rd ├── .Rbuildignore ├── .gitignore └── DESCRIPTION /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /inst/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NathanSkene/EWCE/HEAD/inst/.DS_Store -------------------------------------------------------------------------------- /.github/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NathanSkene/EWCE/HEAD/.github/.DS_Store -------------------------------------------------------------------------------- /inst/hex/hex.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NathanSkene/EWCE/HEAD/inst/hex/hex.png -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(EWCE) 3 | 4 | test_check("EWCE") 5 | -------------------------------------------------------------------------------- /inst/extdata/tt_results.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NathanSkene/EWCE/HEAD/inst/extdata/tt_results.rda -------------------------------------------------------------------------------- /inst/extdata/bootstrap_results.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NathanSkene/EWCE/HEAD/inst/extdata/bootstrap_results.rda -------------------------------------------------------------------------------- /inst/hex/2BIgqrmzc0GSaaAGOqSeAu.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NathanSkene/EWCE/HEAD/inst/hex/2BIgqrmzc0GSaaAGOqSeAu.jpg -------------------------------------------------------------------------------- /R/cells_in_ctd.R: -------------------------------------------------------------------------------- 1 | cells_in_ctd <- function(ctdIN, 2 | cells) { 3 | if (sum(!cells %in% colnames(ctdIN$specificity) == 0)) { 4 | return(1) 5 | } else { 6 | return(0) 7 | } 8 | } -------------------------------------------------------------------------------- /tests/testthat/test-list_species.R: -------------------------------------------------------------------------------- 1 | test_that("list_species works", { 2 | if (!is_32bit()) { 3 | species <- EWCE::list_species() 4 | testthat::expect_true(methods::is(species, "data.frame")) 5 | testthat::expect_gte(nrow(species), 21) 6 | } 7 | }) 8 | -------------------------------------------------------------------------------- /R/check_mtc_method.R: -------------------------------------------------------------------------------- 1 | check_mtc_method <- function(mtc_method){ 2 | err_msg <- paste0( 3 | "ERROR: Invalid mtc_method argument. Please see", 4 | " '?p.adjust' for valid methods." 5 | ) 6 | if (!mtc_method %in% c( 7 | stats::p.adjust.methods 8 | )) { 9 | stop(err_msg) 10 | } 11 | } -------------------------------------------------------------------------------- /R/message_parallel.R: -------------------------------------------------------------------------------- 1 | #' Print messages 2 | #' 3 | #' Print messages even from within parallelised functions. 4 | #' 5 | #' @param ... Message input. 6 | #' 7 | #' @return Null output. 8 | #' 9 | #' @keywords internal 10 | message_parallel <- function(...) { 11 | system(sprintf('echo "%s"', paste0(..., collapse = ""))) 12 | } 13 | -------------------------------------------------------------------------------- /R/max_ctd_depth.R: -------------------------------------------------------------------------------- 1 | #' Get max CTD depth 2 | #' 3 | #' Get the maximum level depth from a list of CellTypeDataset objects. 4 | #' 5 | #' @param CTD_list A list of CellTypeDataset objects. 6 | #' 7 | #' @return integer 8 | #' 9 | #' @keywords internal 10 | max_ctd_depth <- function(CTD_list) { 11 | max(unlist(lapply(CTD_list, length))) 12 | } 13 | -------------------------------------------------------------------------------- /R/is_matrix.R: -------------------------------------------------------------------------------- 1 | #' Assess whether an object is a Matrix 2 | #' 3 | #' Assess whether an object is a Matrix or one of 4 | #' its derived object types. 5 | #' 6 | #' @param X Object. 7 | #' 8 | #' @return boolean 9 | #' 10 | #' @importFrom methods is 11 | is_matrix <- function(X) { 12 | methods::is(X, "Matrix") || methods::is(X, "matrix") 13 | } 14 | -------------------------------------------------------------------------------- /R/is_32bit.R: -------------------------------------------------------------------------------- 1 | #' Checks whether OS is a 32-bit Windows 2 | #' 3 | #' Helper function to avoid duplicate test runs on Windows OS. 4 | #' 5 | #' @return Null 6 | #' 7 | #' @keywords internal 8 | is_32bit <- function() { 9 | is_32bit_windows <- .Platform$OS.type == "windows" && 10 | .Platform$r_arch == "i386" 11 | return(is_32bit_windows) 12 | } 13 | -------------------------------------------------------------------------------- /R/messager.R: -------------------------------------------------------------------------------- 1 | #' Print messages 2 | #' 3 | #' Print messages with option to silence. 4 | #' 5 | #' @param ... Message input. 6 | #' @param v Whether to print messages. 7 | #' 8 | #' @return Null output. 9 | #' 10 | #' @keywords internal 11 | messager <- function(..., v = TRUE) { 12 | if (v) { 13 | msg <- paste(...) 14 | message(msg) 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /man/is_32bit.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/is_32bit.R 3 | \name{is_32bit} 4 | \alias{is_32bit} 5 | \title{Checks whether OS is a 32-bit Windows} 6 | \usage{ 7 | is_32bit() 8 | } 9 | \value{ 10 | Null 11 | } 12 | \description{ 13 | Helper function to avoid duplicate test runs on Windows OS. 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /R/is_celltypedataset.R: -------------------------------------------------------------------------------- 1 | #' Check whether object is a CellTypeDataset 2 | #' 3 | #' Check whether an object is a CellTypeDataset. 4 | #' 5 | #' @param ctd Object. 6 | #' 7 | #' @return boolean 8 | #' 9 | #' @keywords internal 10 | is_celltypedataset <- function(ctd) { 11 | (!is.function(ctd)) && 12 | all(c("annot", "mean_exp", "specificity") %in% names(ctd[[1]])) 13 | } 14 | -------------------------------------------------------------------------------- /R/is_delayed_array.R: -------------------------------------------------------------------------------- 1 | #' Assess whether an object is a DelayedArray. 2 | #' 3 | #' Assess whether an object is a DelayedArray or one of 4 | #' its derived object types. 5 | #' 6 | #' @param X Object. 7 | #' 8 | #' @return boolean 9 | #' 10 | #' @importFrom methods is 11 | is_delayed_array <- function(X) { 12 | methods::is(X, "DelayedMatrix") | 13 | methods::is(X, "DelayedArray") 14 | } 15 | -------------------------------------------------------------------------------- /R/myScalesComma.R: -------------------------------------------------------------------------------- 1 | #' \code{myScalesComma} 2 | #' 3 | #' Adjusts \pkg{ggplot2} label display. See \link[scales]{comma} for details. 4 | #' Support function for \link[EWCE]{plot_log_bootstrap_distributions}. 5 | #' 6 | #' @return Numeric vector 7 | #' 8 | #' @keywords internal 9 | myScalesComma <- function(x) { 10 | requireNamespace("scales") 11 | return(scales::comma(x = x, accuracy = 0.01)) 12 | } 13 | -------------------------------------------------------------------------------- /R/rNorm.R: -------------------------------------------------------------------------------- 1 | # Use the rank norm transformation on specificity 2 | rNorm <- function(ctdIN, 3 | as_sparse = TRUE, 4 | verbose = TRUE) { 5 | spec <- apply(ctdIN$specificity, 2, RNOmni::RankNorm) 6 | ctdIN$specificity <- to_sparse_matrix( 7 | exp = spec, 8 | as_sparse = as_sparse, 9 | verbose = verbose 10 | ) 11 | return(ctdIN) 12 | } 13 | -------------------------------------------------------------------------------- /man/check_nas.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/check_nas.R 3 | \name{check_nas} 4 | \alias{check_nas} 5 | \title{Check NAs} 6 | \usage{ 7 | check_nas(exp) 8 | } 9 | \arguments{ 10 | \item{exp}{Expression matrix.} 11 | } 12 | \value{ 13 | Null output. 14 | } 15 | \description{ 16 | Check for any NAs in an expression matrix. 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/is_matrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/is_matrix.R 3 | \name{is_matrix} 4 | \alias{is_matrix} 5 | \title{Assess whether an object is a Matrix} 6 | \usage{ 7 | is_matrix(X) 8 | } 9 | \arguments{ 10 | \item{X}{Object.} 11 | } 12 | \value{ 13 | boolean 14 | } 15 | \description{ 16 | Assess whether an object is a Matrix or one of 17 | its derived object types. 18 | } 19 | -------------------------------------------------------------------------------- /R/check_numeric.R: -------------------------------------------------------------------------------- 1 | #' Check numeric 2 | #' 3 | #' Ensure that a matrix is numeric. If not, it will be converted to numeric. 4 | #' @param exp Input matrix. 5 | #' @return Numeric expression matrix. 6 | #' 7 | #' @keywords internal 8 | #' @importFrom methods is 9 | check_numeric <- function(exp) { 10 | if (methods::is(exp[1, 1], "character")) { 11 | storage.mode(exp) <- "numeric" 12 | } 13 | return(exp) 14 | } 15 | -------------------------------------------------------------------------------- /man/theme_graph.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/theme_graph.R 3 | \name{theme_graph} 4 | \alias{theme_graph} 5 | \title{Get graph theme} 6 | \usage{ 7 | theme_graph() 8 | } 9 | \value{ 10 | \code{ggplot2} graph theme. 11 | } 12 | \description{ 13 | Get graph theme for plots created by 14 | \link[EWCE]{generate_bootstrap_plots_for_transcriptome}. 15 | } 16 | \keyword{internal} 17 | -------------------------------------------------------------------------------- /man/sce_merged_apply.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sce_merged_apply.R 3 | \name{sce_merged_apply} 4 | \alias{sce_merged_apply} 5 | \title{sce_merged_apply} 6 | \usage{ 7 | sce_merged_apply(SCE_merged, as_sparse = TRUE, as_DelayedArray = FALSE) 8 | } 9 | \value{ 10 | Merged SingleCellExperiment. 11 | } 12 | \description{ 13 | Merge a list of SingleCellExperiments. 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/dt_to_df.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dt_to_df.R 3 | \name{dt_to_df} 4 | \alias{dt_to_df} 5 | \title{Convert a \code{data.table} to a \code{data.frame}.} 6 | \usage{ 7 | dt_to_df(exp) 8 | } 9 | \value{ 10 | \link[base]{data.frame} 11 | } 12 | \description{ 13 | Converts a \code{data.table} to a \code{data.frame} by setting the 14 | first column as the rownames. 15 | } 16 | \keyword{internal} 17 | -------------------------------------------------------------------------------- /man/message_parallel.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/message_parallel.R 3 | \name{message_parallel} 4 | \alias{message_parallel} 5 | \title{Print messages} 6 | \usage{ 7 | message_parallel(...) 8 | } 9 | \arguments{ 10 | \item{...}{Message input.} 11 | } 12 | \value{ 13 | Null output. 14 | } 15 | \description{ 16 | Print messages even from within parallelised functions. 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/messager.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/messager.R 3 | \name{messager} 4 | \alias{messager} 5 | \title{Print messages} 6 | \usage{ 7 | messager(..., v = TRUE) 8 | } 9 | \arguments{ 10 | \item{...}{Message input.} 11 | 12 | \item{v}{Whether to print messages.} 13 | } 14 | \value{ 15 | Null output. 16 | } 17 | \description{ 18 | Print messages with option to silence. 19 | } 20 | \keyword{internal} 21 | -------------------------------------------------------------------------------- /man/check_sce.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/check_sce.R 3 | \name{check_sce} 4 | \alias{check_sce} 5 | \title{Check SingleCellExperiment} 6 | \usage{ 7 | check_sce(exp, verbose = TRUE) 8 | } 9 | \value{ 10 | List of extracted SCE components. 11 | } 12 | \description{ 13 | Check whether \code{exp} is a SingleCellExperiment (SCE) object and extract 14 | the relevant components. 15 | } 16 | \keyword{internal} 17 | -------------------------------------------------------------------------------- /R/check_nas.R: -------------------------------------------------------------------------------- 1 | #' Check NAs 2 | #' 3 | #' Check for any NAs in an expression matrix. 4 | #' 5 | #' @param exp Expression matrix. 6 | #' @return Null output. 7 | #' 8 | #' @keywords internal 9 | check_nas <- function(exp) { 10 | err_msg <- paste0( 11 | "NA values detected in expresson matrix. All NA values", 12 | " should be removed before running EWCE." 13 | ) 14 | if (sum(is.na(exp)) > 0) { 15 | stop(err_msg) 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /man/check_numeric.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/check_numeric.R 3 | \name{check_numeric} 4 | \alias{check_numeric} 5 | \title{Check numeric} 6 | \usage{ 7 | check_numeric(exp) 8 | } 9 | \arguments{ 10 | \item{exp}{Input matrix.} 11 | } 12 | \value{ 13 | Numeric expression matrix. 14 | } 15 | \description{ 16 | Ensure that a matrix is numeric. If not, it will be converted to numeric. 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/is_celltypedataset.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/is_celltypedataset.R 3 | \name{is_celltypedataset} 4 | \alias{is_celltypedataset} 5 | \title{Check whether object is a CellTypeDataset} 6 | \usage{ 7 | is_celltypedataset(ctd) 8 | } 9 | \arguments{ 10 | \item{ctd}{Object.} 11 | } 12 | \value{ 13 | boolean 14 | } 15 | \description{ 16 | Check whether an object is a CellTypeDataset. 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/is_delayed_array.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/is_delayed_array.R 3 | \name{is_delayed_array} 4 | \alias{is_delayed_array} 5 | \title{Assess whether an object is a DelayedArray.} 6 | \usage{ 7 | is_delayed_array(X) 8 | } 9 | \arguments{ 10 | \item{X}{Object.} 11 | } 12 | \value{ 13 | boolean 14 | } 15 | \description{ 16 | Assess whether an object is a DelayedArray or one of 17 | its derived object types. 18 | } 19 | -------------------------------------------------------------------------------- /man/is_sparse_matrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/is_sparse_matrix.R 3 | \name{is_sparse_matrix} 4 | \alias{is_sparse_matrix} 5 | \title{Assess whether an object is a sparse matrix} 6 | \usage{ 7 | is_sparse_matrix(X) 8 | } 9 | \arguments{ 10 | \item{X}{Object.} 11 | } 12 | \value{ 13 | boolean 14 | } 15 | \description{ 16 | Assess whether an object is a sparse matrix or one of 17 | its derived object types. 18 | } 19 | -------------------------------------------------------------------------------- /man/create_list_network.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/create_list_network.R 3 | \name{create_list_network} 4 | \alias{create_list_network} 5 | \title{\code{create_list_network}} 6 | \usage{ 7 | create_list_network(data_byGene2, hits_NEW, reps = 10000, no_cores = 1) 8 | } 9 | \value{ 10 | List network 11 | } 12 | \description{ 13 | Support function for \code{prepare_genesize_control_network}. 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/max_ctd_depth.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/max_ctd_depth.R 3 | \name{max_ctd_depth} 4 | \alias{max_ctd_depth} 5 | \title{Get max CTD depth} 6 | \usage{ 7 | max_ctd_depth(CTD_list) 8 | } 9 | \arguments{ 10 | \item{CTD_list}{A list of CellTypeDataset objects.} 11 | } 12 | \value{ 13 | integer 14 | } 15 | \description{ 16 | Get the maximum level depth from a list of CellTypeDataset objects. 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/myScalesComma.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/myScalesComma.R 3 | \name{myScalesComma} 4 | \alias{myScalesComma} 5 | \title{\code{myScalesComma}} 6 | \usage{ 7 | myScalesComma(x) 8 | } 9 | \value{ 10 | Numeric vector 11 | } 12 | \description{ 13 | Adjusts \pkg{ggplot2} label display. See \link[scales]{comma} for details. 14 | Support function for \link[EWCE]{plot_log_bootstrap_distributions}. 15 | } 16 | \keyword{internal} 17 | -------------------------------------------------------------------------------- /man/report_results.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/report_results.R 3 | \name{report_results} 4 | \alias{report_results} 5 | \title{Report cell type enrichment results} 6 | \usage{ 7 | report_results(results, sig_thresh = 0.05, verbose = TRUE) 8 | } 9 | \value{ 10 | NULL output. 11 | } 12 | \description{ 13 | Report cell type enrichment results generated by 14 | \link[EWCE]{bootstrap_enrichment_test}. 15 | } 16 | \keyword{internal} 17 | -------------------------------------------------------------------------------- /man/to_dataframe.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/to_dataframe.R 3 | \name{to_dataframe} 4 | \alias{to_dataframe} 5 | \title{Convert object to data.frame} 6 | \usage{ 7 | to_dataframe(X, verbose = TRUE) 8 | } 9 | \arguments{ 10 | \item{X}{Object.} 11 | 12 | \item{verbose}{Print messages.} 13 | } 14 | \value{ 15 | \link[base]{data.frame}. 16 | } 17 | \description{ 18 | Convert a variety of object types to data.frame format. 19 | } 20 | \keyword{internal} 21 | -------------------------------------------------------------------------------- /R/read_ctd.R: -------------------------------------------------------------------------------- 1 | # read_ctd <- function(CTD_meta_row){ 2 | # sourceRData <- function(fileName){ 3 | # repmis::source_data(fileName) 4 | # get(ls()[ls() != "fileName"]) 5 | # } 6 | # ROW <- CTD_meta_row 7 | # if(endsWith(tolower(ROW$url),".rds")){ 8 | # print("Reading in RDS file...") 9 | # ctd <- readRDS(url(ROW$url,"rb")) 10 | # }else { 11 | # print("Reading in RDA file...") 12 | # ctd <- sourceRData(ROW$url) 13 | # } 14 | # return(ctd) 15 | # } 16 | -------------------------------------------------------------------------------- /man/prep.dendro.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/prep_dendro.r 3 | \name{prep.dendro} 4 | \alias{prep.dendro} 5 | \title{prep.dendro} 6 | \usage{ 7 | prep.dendro(ctdIN) 8 | } 9 | \arguments{ 10 | \item{ctdIN}{A single annotLevel of a ctd, i.e. ctd[[1]] (the function is 11 | intended to be used via apply).} 12 | } 13 | \value{ 14 | A CellTypeDataset with dendrogram plotting info added. 15 | } 16 | \description{ 17 | \code{prep_dendro} adds a dendrogram to a CellTypeDataset (CTD). 18 | } 19 | -------------------------------------------------------------------------------- /man/sce_lists_apply.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sce_lists_apply.R 3 | \name{sce_lists_apply} 4 | \alias{sce_lists_apply} 5 | \title{sce_lists_apply} 6 | \usage{ 7 | sce_lists_apply( 8 | SCE_lists, 9 | return_genes = FALSE, 10 | level = 2, 11 | as_matrix = FALSE, 12 | as_DelayedArray = FALSE 13 | ) 14 | } 15 | \value{ 16 | List of \code{SingleCellExperiment}s. 17 | } 18 | \description{ 19 | Support function for \code{EWCE::merge_sce_list}. 20 | } 21 | \keyword{internal} 22 | -------------------------------------------------------------------------------- /R/package.R: -------------------------------------------------------------------------------- 1 | #' @details 2 | #' EWCE: Expression Weighted Celltype Enrichment 3 | #' 4 | #' Used to determine which cell types are enriched within gene lists. 5 | #' The package provides tools for testing enrichments within simple gene lists 6 | #' (such as human disease associated genes) and those resulting from 7 | #' differential expression studies. 8 | #' 9 | #' The package does not depend upon any particular Single Cell Transcriptome 10 | #' dataset and user defined datasets can be loaded in and used in the analyses. 11 | "_PACKAGE" 12 | -------------------------------------------------------------------------------- /man/calculate_meanexp_for_level.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/calculate_meanexp_for_level.R 3 | \name{calculate_meanexp_for_level} 4 | \alias{calculate_meanexp_for_level} 5 | \title{calculate_meanexp_for_level} 6 | \usage{ 7 | calculate_meanexp_for_level( 8 | ctd_oneLevel, 9 | expMatrix, 10 | as_sparse = TRUE, 11 | verbose = TRUE 12 | ) 13 | } 14 | \value{ 15 | One level of a CellTypeDataset. 16 | } 17 | \description{ 18 | calculate_meanexp_for_level 19 | } 20 | \keyword{internal} 21 | -------------------------------------------------------------------------------- /man/check_group_name.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/check_group_name.R 3 | \name{check_group_name} 4 | \alias{check_group_name} 5 | \title{Check group name} 6 | \usage{ 7 | check_group_name(groupName) 8 | } 9 | \arguments{ 10 | \item{groupName}{A human readable name for referring to the dataset 11 | being used.} 12 | } 13 | \value{ 14 | Null output. 15 | } 16 | \description{ 17 | Ensure \code{groupName} argument is provided to 18 | \link[EWCE]{generate_celltype_data}. 19 | } 20 | \keyword{internal} 21 | -------------------------------------------------------------------------------- /R/load_rdata.R: -------------------------------------------------------------------------------- 1 | #' \code{load_rdata} 2 | #' 3 | #' Load processed data (\emph{.rda} format) using a function that assigns it 4 | #' to a specific variable 5 | #' (so you don't have to guess what the loaded variable name is). 6 | #' 7 | #' @param fileName Name of the file to load. 8 | #' 9 | #' @return Data object. 10 | #' 11 | #' @export 12 | #' @examples 13 | #' tmp <- tempfile() 14 | #' save(mtcars, file = tmp) 15 | #' mtcars2 <- load_rdata(tmp) 16 | load_rdata <- function(fileName) { 17 | load(fileName) 18 | get(ls()[ls() != "fileName"]) 19 | } 20 | -------------------------------------------------------------------------------- /man/list_species.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/list_species.R 3 | \name{list_species} 4 | \alias{list_species} 5 | \title{List all species} 6 | \usage{ 7 | list_species(verbose = TRUE) 8 | } 9 | \arguments{ 10 | \item{verbose}{Print messages.} 11 | } 12 | \value{ 13 | List of species EWCE can input/output genes as. 14 | } 15 | \description{ 16 | List all species that EWCE can convert genes from/to. 17 | Wrapper function for \link[orthogene]{map_species}. 18 | } 19 | \examples{ 20 | list_species() 21 | } 22 | -------------------------------------------------------------------------------- /R/is_sparse_matrix.R: -------------------------------------------------------------------------------- 1 | #' Assess whether an object is a sparse matrix 2 | #' 3 | #' Assess whether an object is a sparse matrix or one of 4 | #' its derived object types. 5 | #' 6 | #' @param X Object. 7 | #' 8 | #' @return boolean 9 | #' 10 | #' @importFrom methods is 11 | is_sparse_matrix <- function(X) { 12 | methods::is(X, "sparseMatrix") | 13 | methods::is(X, "dgCMatrix") | 14 | methods::is(X, "dgRMatrix") | 15 | methods::is(X, "dgTMatrix") | 16 | methods::is(X, "dgeMatrix") | 17 | methods::is(X, "lgCMatrix") 18 | } 19 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | Icon? 2 | .*\.rda 3 | BootstrapPlots 4 | .*\.csv 5 | .*\.txt 6 | .*\.log 7 | \.travis\.yml 8 | BootstrapPlots/* 9 | README_files/* 10 | vignettes/.build.timestamp 11 | vignettes/EWCE_cache/* 12 | vignettes/EWCE_files/* 13 | Dockerfile 14 | DOCKERFILE 15 | \.dockerignore 16 | 17 | ^.*\.Rproj$ 18 | ^\.Rproj\.user$ 19 | ^LICENSE\.md$ 20 | ^\.Rproj\.user$ 21 | ^README.Rmd 22 | ^\.github$ 23 | ^Meta$ 24 | ^codecov\.yml$ 25 | ^_pkgdown\.yml$ 26 | ^docs$ 27 | /docs/ 28 | ^doc$ 29 | ^pkgdown$ 30 | 31 | node_modules$ 32 | package-lock\.json$ 33 | package\.json$ 34 | -------------------------------------------------------------------------------- /R/list_species.R: -------------------------------------------------------------------------------- 1 | #' List all species 2 | #' 3 | #' List all species that EWCE can convert genes from/to. 4 | #' Wrapper function for \link[orthogene]{map_species}. 5 | #' 6 | #' @param verbose Print messages. 7 | #' 8 | #' @return List of species EWCE can input/output genes as. 9 | #' 10 | #' @export 11 | #' @importFrom orthogene map_species 12 | #' @examples 13 | #' list_species() 14 | list_species <- function(verbose = TRUE) { 15 | orthogene::map_species( 16 | species = NULL, 17 | method = "homologene", 18 | verbose = verbose 19 | ) 20 | } 21 | -------------------------------------------------------------------------------- /man/is_ctd_standardised.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/is_ctd_standardised.R 3 | \name{is_ctd_standardised} 4 | \alias{is_ctd_standardised} 5 | \title{Check whether a CellTypeDataset is standardised} 6 | \usage{ 7 | is_ctd_standardised(ctd) 8 | } 9 | \arguments{ 10 | \item{ctd}{CellTypeDataset.} 11 | } 12 | \value{ 13 | Whether the \code{ctd} is standardised. 14 | } 15 | \description{ 16 | Check whether a CellTypeDataset was previously standardised 17 | using \link[EWCE]{standardise_ctd}. 18 | } 19 | \keyword{internal} 20 | -------------------------------------------------------------------------------- /R/dt_to_df.R: -------------------------------------------------------------------------------- 1 | #' Convert a \code{data.table} to a \code{data.frame}. 2 | #' 3 | #' Converts a \code{data.table} to a \code{data.frame} by setting the 4 | #' first column as the rownames. 5 | #' 6 | #' @return \link[base]{data.frame} 7 | #' 8 | #' @keywords internal 9 | dt_to_df <- function(exp) { 10 | if (methods::is(exp, "data.table")) { 11 | col1 <- colnames(exp)[1] 12 | exp <- data.frame(exp, 13 | row.names = col1, 14 | check.rows = FALSE, 15 | check.names = FALSE 16 | ) 17 | } 18 | return(exp) 19 | } 20 | -------------------------------------------------------------------------------- /man/drop_nonexpressed_genes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/drop_nonexpressed_genes.R 3 | \name{drop_nonexpressed_genes} 4 | \alias{drop_nonexpressed_genes} 5 | \title{Drop genes with zero counts} 6 | \usage{ 7 | drop_nonexpressed_genes(exp, verbose = TRUE) 8 | } 9 | \arguments{ 10 | \item{exp}{Gene expression matrix.} 11 | 12 | \item{verbose}{Print messages.} 13 | } 14 | \value{ 15 | List of filtered \code{exp}. 16 | } 17 | \description{ 18 | Remove rows (genes) in which counts sum to zero. 19 | } 20 | \keyword{internal} 21 | -------------------------------------------------------------------------------- /man/assign_cores.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/assign_cores.r 3 | \name{assign_cores} 4 | \alias{assign_cores} 5 | \title{Assign cores} 6 | \usage{ 7 | assign_cores(worker_cores = 0.9, verbose = TRUE) 8 | } 9 | \arguments{ 10 | \item{worker_cores}{Number (>1) or proportion (<1) of worker cores to use.} 11 | 12 | \item{verbose}{Print messages.} 13 | } 14 | \value{ 15 | List of core allocations. 16 | } 17 | \description{ 18 | Assign cores automatically for parallel processing, while reserving some. 19 | } 20 | \keyword{internal} 21 | -------------------------------------------------------------------------------- /man/prep_dendro.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/prep_dendro.r 3 | \name{prep_dendro} 4 | \alias{prep_dendro} 5 | \title{Prepare dendrogram} 6 | \usage{ 7 | prep_dendro(ctdIN, expand = c(0, 0.66)) 8 | } 9 | \arguments{ 10 | \item{ctdIN}{A single annotLevel of a ctd, i.e. ctd[[1]] (the function is 11 | intended to be used via apply).} 12 | } 13 | \value{ 14 | A CellTypeDataset with dendrogram plotting info added. 15 | } 16 | \description{ 17 | \code{prep_dendro} adds a dendrogram to a CellTypeDataset (CTD). 18 | } 19 | \keyword{internal} 20 | -------------------------------------------------------------------------------- /man/to_sparse_matrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/to_sparse_matrix.R 3 | \name{to_sparse_matrix} 4 | \alias{to_sparse_matrix} 5 | \title{Convert object to sparse matrix} 6 | \usage{ 7 | to_sparse_matrix(exp, as_sparse = TRUE, verbose = TRUE) 8 | } 9 | \arguments{ 10 | \item{exp}{Object.} 11 | 12 | \item{as_sparse}{Whether to convert \code{exp} to sparse matrix} 13 | 14 | \item{verbose}{Print messages.} 15 | } 16 | \value{ 17 | Sparse matrix. 18 | } 19 | \description{ 20 | Convert a variety of object types to sparse matrix format. 21 | } 22 | \keyword{internal} 23 | -------------------------------------------------------------------------------- /R/check_group_name.R: -------------------------------------------------------------------------------- 1 | #' Check group name 2 | #' 3 | #' Ensure \code{groupName} argument is provided to 4 | #' \link[EWCE]{generate_celltype_data}. 5 | #' @inheritParams generate_celltype_data 6 | #' @return Null output. 7 | #' 8 | #' @keywords internal 9 | check_group_name <- function(groupName) { 10 | err_msg3 <- paste0( 11 | "ERROR: groupName must be set. groupName is used to", 12 | " label the files created by this function." 13 | ) 14 | if (is.null(groupName)) { 15 | stop(err_msg3) 16 | } 17 | if (is.null(groupName) || groupName == "") { 18 | stop(err_msg3) 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /man/compute_gene_counts.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/compute_gene_counts.R 3 | \name{compute_gene_counts} 4 | \alias{compute_gene_counts} 5 | \title{Compute gene counts} 6 | \usage{ 7 | compute_gene_counts(bootstrap_list, verbose = TRUE) 8 | } 9 | \arguments{ 10 | \item{bootstrap_list}{The output of \code{get_summed_proportions_iterate}.} 11 | 12 | \item{verbose}{Print messages.} 13 | } 14 | \value{ 15 | \link[data.table]{data.table} 16 | } 17 | \description{ 18 | Counts the number of times each gene appeared in 19 | the randomly sampled gene lists. 20 | } 21 | \keyword{internal} 22 | -------------------------------------------------------------------------------- /man/load_rdata.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/load_rdata.R 3 | \name{load_rdata} 4 | \alias{load_rdata} 5 | \title{\code{load_rdata}} 6 | \usage{ 7 | load_rdata(fileName) 8 | } 9 | \arguments{ 10 | \item{fileName}{Name of the file to load.} 11 | } 12 | \value{ 13 | Data object. 14 | } 15 | \description{ 16 | Load processed data (\emph{.rda} format) using a function that assigns it 17 | to a specific variable 18 | (so you don't have to guess what the loaded variable name is). 19 | } 20 | \examples{ 21 | tmp <- tempfile() 22 | save(mtcars, file = tmp) 23 | mtcars2 <- load_rdata(tmp) 24 | } 25 | -------------------------------------------------------------------------------- /man/check_annotLevels.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/check_annotLevels.R 3 | \name{check_annotLevels} 4 | \alias{check_annotLevels} 5 | \title{check_annotLevels 6 | 7 | First, check the number of annotations equals the number of columns 8 | in the expression data.} 9 | \usage{ 10 | check_annotLevels(annotLevels, exp) 11 | } 12 | \arguments{ 13 | \item{exp}{exp (#fix).} 14 | } 15 | \value{ 16 | Null output. 17 | } 18 | \description{ 19 | check_annotLevels 20 | 21 | First, check the number of annotations equals the number of columns 22 | in the expression data. 23 | } 24 | \keyword{internal} 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | Icon? 2 | *.rda 3 | BootstrapPlots 4 | *.csv 5 | *.txt 6 | *.log 7 | docs 8 | /doc/ 9 | doc 10 | /doc/ 11 | Meta 12 | /Meta/ 13 | vignettes/.build.timestamp 14 | vignettes/EWCE_cache/* 15 | vignettes/EWCE_files/* 16 | tests/testthat/Rplots.pdf 17 | 18 | *.Rproj 19 | .Rproj.user 20 | .Rhistory 21 | ./**/.Rhistory 22 | .RData 23 | .Ruserdata 24 | 25 | vignettes/*.R$ 26 | vignettes/*.html$ 27 | 28 | # find . -name .DS_Store -print0 | xargs -0 git rm -f --ignore-unmatch 29 | ./.DS_Store 30 | ./**/.DS_Store 31 | ./**/**/.DS_Store 32 | ./**/**/**/.DS_Store 33 | ./**/**/**/**/.DS_Store 34 | ./**/**/**/**/**/.DS_Store 35 | ./**/**/**/**/**/**/.DS_Store 36 | -------------------------------------------------------------------------------- /man/check_full_results.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/check_full_results.R 3 | \name{check_full_results} 4 | \alias{check_full_results} 5 | \title{check_full_results} 6 | \usage{ 7 | check_full_results(full_results, sct_data) 8 | } 9 | \arguments{ 10 | \item{full_results}{The full output of 11 | \link[EWCE]{bootstrap_enrichment_test} for the same gene list.} 12 | 13 | \item{sct_data}{List generated using \link[EWCE]{generate_celltype_data}.} 14 | } 15 | \value{ 16 | Null output. 17 | } 18 | \description{ 19 | Check full results generated by \link[EWCE]{bootstrap_enrichment_test}. 20 | } 21 | \keyword{internal} 22 | -------------------------------------------------------------------------------- /man/convert_new_ewce_to_old.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/convert_new_ewce_to_old.r 3 | \name{convert_new_ewce_to_old} 4 | \alias{convert_new_ewce_to_old} 5 | \title{convert_new_ewce_to_old} 6 | \usage{ 7 | convert_new_ewce_to_old(ctd, lvl) 8 | } 9 | \arguments{ 10 | \item{ctd}{A cell type data structure containing 11 | "mean_exp" and "specificity".} 12 | 13 | \item{lvl}{The annotation level to extract.} 14 | } 15 | \value{ 16 | CellTypeData in the old data structure style. 17 | } 18 | \description{ 19 | \code{convert_new_ewce_to_old} Used to get an old style EWCE ctd file 20 | from a new one 21 | } 22 | \keyword{internal} 23 | -------------------------------------------------------------------------------- /R/is_ctd_standardised.R: -------------------------------------------------------------------------------- 1 | #' Check whether a CellTypeDataset is standardised 2 | #' 3 | #' Check whether a CellTypeDataset was previously standardised 4 | #' using \link[EWCE]{standardise_ctd}. 5 | #' 6 | #' @param ctd CellTypeDataset. 7 | #' 8 | #' @return Whether the \code{ctd} is standardised. 9 | #' 10 | #' @keywords internal 11 | is_ctd_standardised <- function(ctd) { 12 | std_list <- lapply(ctd, function(x) { 13 | if ("standardised" %in% names(x)) { 14 | return(x[["standardised"]]) 15 | } else { 16 | return(FALSE) 17 | } 18 | }) 19 | is_standardised <- all(unlist(std_list) == TRUE) 20 | return(is_standardised) 21 | } 22 | -------------------------------------------------------------------------------- /man/to_delayed_array.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/to_delayed_array.R 3 | \name{to_delayed_array} 4 | \alias{to_delayed_array} 5 | \title{Convert object to DelayedArray} 6 | \usage{ 7 | to_delayed_array(exp, as_DelayedArray = TRUE, verbose = TRUE) 8 | } 9 | \arguments{ 10 | \item{exp}{Object.} 11 | 12 | \item{as_DelayedArray}{Whether to convert \code{exp} to 13 | \link[DelayedArray]{DelayedArray}.} 14 | 15 | \item{verbose}{Print messages.} 16 | } 17 | \value{ 18 | \link[DelayedArray]{DelayedArray}. 19 | } 20 | \description{ 21 | Convert a variety of object types to 22 | \link[DelayedArray]{DelayedArray} format. 23 | } 24 | \keyword{internal} 25 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /tests/testthat/test-check_percent_hits.R: -------------------------------------------------------------------------------- 1 | test_that("check_percent_hits works", { 2 | if (!is_32bit()) { 3 | full_results <- EWCE::example_bootstrap_results() 4 | testthat::expect_true(length(full_results) %in% c(3,4) ) 5 | testthat::expect_true(methods::is(full_results$results, "data.frame")) 6 | 7 | report <- EWCE::check_percent_hits( 8 | full_results = full_results, 9 | target_celltype = "microglia" 10 | ) 11 | testthat::expect_true( 12 | all(c("target_hits", "percent_hits", "target_celltype") 13 | %in% names(report)) 14 | ) 15 | testthat::expect_equal(report$percent_hits, 14.3) 16 | } 17 | }) 18 | -------------------------------------------------------------------------------- /R/check_annotLevels.R: -------------------------------------------------------------------------------- 1 | #' check_annotLevels 2 | #' 3 | #' First, check the number of annotations equals the number of columns 4 | #' in the expression data. 5 | #' 6 | #' @param exp exp (#fix). 7 | #' @inheritParams bootstrap_enrichment_test 8 | #' @return Null output. 9 | #' 10 | #' @keywords internal 11 | check_annotLevels <- function(annotLevels, 12 | exp) { 13 | err_msg2 <- paste0( 14 | "Error: length of all annotation levels must equal", 15 | " the number of columns in exp matrix" 16 | ) 17 | out <- lapply(annotLevels, test <- function(x, exp) { 18 | if (length(x) != dim(exp)[2]) { 19 | stop(err_msg2) 20 | } 21 | }, exp) 22 | } 23 | -------------------------------------------------------------------------------- /man/drop_nonexpressed_cells.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/drop_nonexpressed_cells.R 3 | \name{drop_nonexpressed_cells} 4 | \alias{drop_nonexpressed_cells} 5 | \title{Drop cells with zero gene counts} 6 | \usage{ 7 | drop_nonexpressed_cells(exp, annotLevels, verbose = TRUE) 8 | } 9 | \arguments{ 10 | \item{exp}{Gene expression matrix.} 11 | 12 | \item{annotLevels}{Cell-wise annotations to be subset 13 | if some cells are dropped.} 14 | 15 | \item{verbose}{Print messages.} 16 | } 17 | \value{ 18 | List of filtered \code{exp} and \code{annotLevels}. 19 | } 20 | \description{ 21 | Remove columns (cells) in which (gene) counts sum to zero. 22 | } 23 | \keyword{internal} 24 | -------------------------------------------------------------------------------- /man/get_ctd_levels.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_ctd_levels.R 3 | \name{get_ctd_levels} 4 | \alias{get_ctd_levels} 5 | \title{Get the names of CellTypeDataset levels} 6 | \usage{ 7 | get_ctd_levels(ctd, max_only = FALSE) 8 | } 9 | \arguments{ 10 | \item{ctd}{CellTypeDataset.} 11 | 12 | \item{max_only}{Only return the level with the greatest depth 13 | (e.g. \code{"level3"} in \code{c("level1","level2","level3")}).} 14 | } 15 | \value{ 16 | List of levels in \code{ctd}. 17 | } 18 | \description{ 19 | Returns the level names of a CellTypeDataset. If none are available, 20 | will instead return a vector of numbers (one number per level). 21 | } 22 | \keyword{internal} 23 | -------------------------------------------------------------------------------- /R/report_dge.R: -------------------------------------------------------------------------------- 1 | #' Report DGE 2 | #' 3 | #' Report differential gene expression (DGE) results 4 | #' 5 | #' @param exp Gene expression matrix. 6 | #' @param keep_genes Genes kept after DGE. 7 | #' @inheritParams drop_uninformative_genes 8 | #' 9 | #' @return Null output. 10 | #' 11 | #' @keywords internal 12 | report_dge <- function(exp, 13 | keep_genes, 14 | adj_pval_thresh = .05, 15 | verbose = TRUE) { 16 | messager(paste( 17 | formatC(nrow(exp) - length(keep_genes), big.mark = ","), 18 | "/", 19 | formatC(nrow(exp), big.mark = ","), 20 | "genes dropped @ DGE adj_pval_thresh <", adj_pval_thresh 21 | ), v = verbose) 22 | } 23 | -------------------------------------------------------------------------------- /man/plot_with_bootstrap_distributions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_with_bootstrap_distributions.R 3 | \name{plot_with_bootstrap_distributions} 4 | \alias{plot_with_bootstrap_distributions} 5 | \title{Plot with bootstrap distributions} 6 | \usage{ 7 | plot_with_bootstrap_distributions( 8 | exp_mats, 9 | cc, 10 | hit_exp, 11 | tag, 12 | listFileName, 13 | graph_theme, 14 | save_dir = file.path(tempdir(), paste0("BootstrapPlots", "_for_transcriptome")), 15 | height = 3.5, 16 | width = 3.5 17 | ) 18 | } 19 | \value{ 20 | Null result. 21 | } 22 | \description{ 23 | Plot results of \link[EWCE]{generate_bootstrap_plots_for_transcriptome}. 24 | } 25 | \keyword{internal} 26 | -------------------------------------------------------------------------------- /man/plot_log_bootstrap_distributions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_log_bootstrap_distributions.R 3 | \name{plot_log_bootstrap_distributions} 4 | \alias{plot_log_bootstrap_distributions} 5 | \title{Plot log bootstrap distributions} 6 | \usage{ 7 | plot_log_bootstrap_distributions( 8 | dat, 9 | exp_mats, 10 | cc, 11 | hit_exp, 12 | tag, 13 | listFileName, 14 | graph_theme, 15 | save_dir = file.path(tempdir(), paste0("BootstrapPlots", "_for_transcriptome")), 16 | height = 3.5, 17 | width = 3.5 18 | ) 19 | } 20 | \value{ 21 | Null result. 22 | } 23 | \description{ 24 | Plot results of \link[EWCE]{generate_bootstrap_plots_for_transcriptome}. 25 | } 26 | \keyword{internal} 27 | -------------------------------------------------------------------------------- /man/bootstrap_plots_for_transcriptome.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bootstrap_plots_for_transcriptome.R 3 | \name{bootstrap_plots_for_transcriptome} 4 | \alias{bootstrap_plots_for_transcriptome} 5 | \title{Bootstrap plot} 6 | \usage{ 7 | bootstrap_plots_for_transcriptome( 8 | dat, 9 | tag, 10 | listFileName, 11 | cc, 12 | showGNameThresh, 13 | graph_theme, 14 | maxX, 15 | save_dir = file.path(tempdir(), paste0("BootstrapPlots", "_for_transcriptome")), 16 | height = 3.5, 17 | width = 3.5, 18 | show_plot = TRUE 19 | ) 20 | } 21 | \value{ 22 | Null result. 23 | } 24 | \description{ 25 | Plot results of \link[EWCE]{generate_bootstrap_plots_for_transcriptome}. 26 | } 27 | \keyword{internal} 28 | -------------------------------------------------------------------------------- /man/report_dge.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/report_dge.R 3 | \name{report_dge} 4 | \alias{report_dge} 5 | \title{Report DGE} 6 | \usage{ 7 | report_dge(exp, keep_genes, adj_pval_thresh = 0.05, verbose = TRUE) 8 | } 9 | \arguments{ 10 | \item{exp}{Gene expression matrix.} 11 | 12 | \item{keep_genes}{Genes kept after DGE.} 13 | 14 | \item{adj_pval_thresh}{Minimum differential expression significance 15 | that a gene must demonstrate across \code{level2annot} (i.e. cell types).} 16 | 17 | \item{verbose}{Print messages. 18 | #' @inheritParams orthogene::convert_orthologs} 19 | } 20 | \value{ 21 | Null output. 22 | } 23 | \description{ 24 | Report differential gene expression (DGE) results 25 | } 26 | \keyword{internal} 27 | -------------------------------------------------------------------------------- /man/sct_normalize.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sct_normalize.R 3 | \name{sct_normalize} 4 | \alias{sct_normalize} 5 | \title{Normalize expression matrix} 6 | \usage{ 7 | sct_normalize(exp, as_sparse = TRUE, verbose = TRUE) 8 | } 9 | \arguments{ 10 | \item{exp}{Gene x cell expression matrix.} 11 | 12 | \item{as_sparse}{Convert \code{exp} to sparse matrix.} 13 | 14 | \item{verbose}{Print messages.} 15 | } 16 | \value{ 17 | Normalised expression matrix. 18 | } 19 | \description{ 20 | Normalize expression matrix by accounting for library size. 21 | Uses \pkg{sctransform}. 22 | } 23 | \examples{ 24 | cortex_mrna <- ewceData::cortex_mrna() 25 | exp_sct_normed <- EWCE::sct_normalize(exp = cortex_mrna$exp[1:300, ]) 26 | } 27 | -------------------------------------------------------------------------------- /R/convert_new_ewce_to_old.r: -------------------------------------------------------------------------------- 1 | #' convert_new_ewce_to_old 2 | #' 3 | #' \code{convert_new_ewce_to_old} Used to get an old style EWCE ctd file 4 | #' from a new one 5 | #' 6 | #' @param ctd A cell type data structure containing 7 | #' "mean_exp" and "specificity". 8 | #' @param lvl The annotation level to extract. 9 | #' 10 | #' @return CellTypeData in the old data structure style. 11 | #' 12 | #' @keywords internal 13 | convert_new_ewce_to_old <- function(ctd, 14 | lvl) { 15 | celltype_data <- list() 16 | celltype_data[[1]] <- list() 17 | celltype_data[[1]]$cell_dists <- ctd[[lvl]]$specificity 18 | celltype_data[[1]]$all_scts <- ctd[[lvl]]$mean_exp 19 | celltype_data[[1]]$annot <- ctd[[lvl]]$annot 20 | return(celltype_data) 21 | } 22 | -------------------------------------------------------------------------------- /man/cell_list_dist.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cell_list_dist.r 3 | \name{cell_list_dist} 4 | \alias{cell_list_dist} 5 | \title{cell_list_dist} 6 | \usage{ 7 | cell_list_dist(hits, sct_data, annotLevel) 8 | } 9 | \arguments{ 10 | \item{hits}{List of gene symbols containing the target gene list.} 11 | 12 | \item{sct_data}{List generated using \link[EWCE]{generate_celltype_data}.} 13 | 14 | \item{annotLevel}{An integer indicating which level of \code{sct_data} to 15 | analyse (\emph{Default: 1}).} 16 | } 17 | \value{ 18 | The summed specificity of each celltype 19 | across a set of \code{hits}. 20 | } 21 | \description{ 22 | specificity is generated in the main_CellTypeAnalysis_Preperation.r file 23 | } 24 | \keyword{internal} 25 | -------------------------------------------------------------------------------- /man/filter_ctd_genes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/filter_ctd_genes.R 3 | \name{filter_ctd_genes} 4 | \alias{filter_ctd_genes} 5 | \title{Filter genes in a CellTypeDataset} 6 | \usage{ 7 | filter_ctd_genes(ctd, gene_subset) 8 | } 9 | \arguments{ 10 | \item{ctd}{CellTypeDataset.} 11 | 12 | \item{gene_subset}{Genes to subset to.} 13 | } 14 | \value{ 15 | Filtered CellTypeDataset. 16 | } 17 | \description{ 18 | Removes rows from each matrix within a CellTypeDataset (CTD) that are not 19 | within \code{gene_subset}. 20 | } 21 | \examples{ 22 | 23 | ctd <- ewceData::ctd() 24 | ctd <- standardise_ctd(ctd, input_species="mouse") 25 | gene_subset <- rownames(ctd[[1]]$mean_exp)[1:100] 26 | ctd_subset <- EWCE::filter_ctd_genes(ctd = ctd, gene_subset = gene_subset) 27 | } 28 | -------------------------------------------------------------------------------- /man/fix_celltype_names_full_results.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fix_celltype_names_full_results.R 3 | \name{fix_celltype_names_full_results} 4 | \alias{fix_celltype_names_full_results} 5 | \title{Fix celltype name in full results} 6 | \usage{ 7 | fix_celltype_names_full_results(full_results, verbose = TRUE) 8 | } 9 | \arguments{ 10 | \item{full_results}{Cell-type enrichment results generated by 11 | \link[EWCE]{bootstrap_enrichment_test}.} 12 | 13 | \item{verbose}{Print messages.} 14 | } 15 | \value{ 16 | Fixed full results. 17 | } 18 | \description{ 19 | Aligns celltype names in full results generated by 20 | \link[EWCE]{bootstrap_enrichment_test} with the standardised 21 | CellTypeDataset (CTD) produced by \link[EWCE]{standardise_ctd}. 22 | } 23 | \keyword{internal} 24 | -------------------------------------------------------------------------------- /man/get_ctd_matrix_names.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_ctd_matrix_names.R 3 | \name{get_ctd_matrix_names} 4 | \alias{get_ctd_matrix_names} 5 | \title{Get CTD matrix names} 6 | \usage{ 7 | get_ctd_matrix_names( 8 | ctd = NULL, 9 | matrices = c("mean_exp", "median_exp", "specificity", "median_specificity", 10 | "specificity_quantiles"), 11 | verbose = TRUE 12 | ) 13 | } 14 | \arguments{ 15 | \item{ctd}{CellTypeDataset. If set to \code{NULL} (default), 16 | will simply return all possible matrix names.} 17 | 18 | \item{matrices}{Matrix names to search for.} 19 | 20 | \item{verbose}{Print messages.} 21 | } 22 | \value{ 23 | List of matrix names. 24 | } 25 | \description{ 26 | Find the names of all data matrices in a CellTypeDataset. 27 | } 28 | \keyword{internal} 29 | -------------------------------------------------------------------------------- /man/calculate_specificity_for_level.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/calculate_specificity_for_level.R 3 | \name{calculate_specificity_for_level} 4 | \alias{calculate_specificity_for_level} 5 | \title{Calculate specificity for one CTD level} 6 | \usage{ 7 | calculate_specificity_for_level( 8 | ctd_oneLevel, 9 | matrix_name = "mean_exp", 10 | as_sparse = TRUE, 11 | verbose = TRUE 12 | ) 13 | } 14 | \arguments{ 15 | \item{ctd_oneLevel}{One level from a CTD.} 16 | 17 | \item{matrix_name}{Name of the matrix to extract.} 18 | 19 | \item{as_sparse}{Whether to convert \code{exp} to sparse matrix} 20 | 21 | \item{verbose}{Print messages.} 22 | } 23 | \value{ 24 | One CTD level. 25 | } 26 | \description{ 27 | Calculate specificity for one CellTypeDataset (CTD) level. 28 | } 29 | \keyword{internal} 30 | -------------------------------------------------------------------------------- /man/ctd_to_sce.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ctd_to_sce.R 3 | \name{ctd_to_sce} 4 | \alias{ctd_to_sce} 5 | \title{CellTypeDataset to SingleCellExperiment} 6 | \usage{ 7 | ctd_to_sce(object, as_sparse = TRUE, as_DelayedArray = FALSE, verbose = TRUE) 8 | } 9 | \arguments{ 10 | \item{object}{CellTypeDataset object.} 11 | 12 | \item{as_sparse}{Store SingleCellExperiment matrices as sparse.} 13 | 14 | \item{as_DelayedArray}{Store SingleCellExperiment matrices as DelayedArray.} 15 | 16 | \item{verbose}{Print messages.} 17 | } 18 | \value{ 19 | SingleCellExperiment 20 | } 21 | \description{ 22 | Copied from \href{https://github.com/neurogenomics/scKirby}{scKirby}, 23 | which is not yet on CRAN or Bioconductor. 24 | } 25 | \examples{ 26 | ctd <- ewceData::ctd() 27 | sce <- EWCE::ctd_to_sce(ctd) 28 | } 29 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | citHeader("To cite this package, please use:") 2 | 3 | citEntry( 4 | entry = "Article", 5 | title = "Identification of Vulnerable Cell 6 | Types in Major Brain Disorders Using Single Cell Transcriptomes 7 | and Expression Weighted Cell Type Enrichment", 8 | author = "Nathan G. Skene, Seth G. N. Grant", 9 | journal = "Frontiers in Neuroscience", 10 | year = "2016", 11 | volume = "10", 12 | number = NULL, 13 | pages = "16", 14 | url = "https://doi.org/10.3389/fnins.2016.00016", 15 | textVersion = paste("Nathan G. Skene, Seth G. N. Grant (2016) Identification of Vulnerable Cell Types in Major Brain Disorders Using Single Cell Transcriptomes and Expression Weighted Cell Type Enrichment, *Frontiers in Neuroscience*; 10, [https://doi.org/10.3389/fnins.2016.00016](https://doi.org/10.3389/fnins.2016.00016)" 16 | ) 17 | ) 18 | -------------------------------------------------------------------------------- /man/get_sig_results.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_sig_results.R 3 | \name{get_sig_results} 4 | \alias{get_sig_results} 5 | \title{Extract significant results} 6 | \usage{ 7 | get_sig_results( 8 | full_results, 9 | mtc_method = "BH", 10 | q_threshold = 0.05, 11 | verbose = TRUE 12 | ) 13 | } 14 | \arguments{ 15 | \item{full_results}{Output of \link[EWCE]{bootstrap_enrichment_test}.} 16 | 17 | \item{mtc_method}{Multiple-testing correction method 18 | (passed to \link[stats]{p.adjust}).} 19 | 20 | \item{q_threshold}{Maximum multiple-testing-corrected p-value to include.} 21 | 22 | \item{verbose}{Print messages.} 23 | } 24 | \value{ 25 | Filtered enrichment results table. 26 | } 27 | \description{ 28 | Extract significant results from output of 29 | \link[EWCE]{bootstrap_enrichment_test}. 30 | } 31 | \keyword{internal} 32 | -------------------------------------------------------------------------------- /R/get_ctd_levels.R: -------------------------------------------------------------------------------- 1 | #' Get the names of CellTypeDataset levels 2 | #' 3 | #' Returns the level names of a CellTypeDataset. If none are available, 4 | #' will instead return a vector of numbers (one number per level). 5 | #' 6 | #' @param ctd CellTypeDataset. 7 | #' @param max_only Only return the level with the greatest depth 8 | #' (e.g. \code{"level3"} in \code{c("level1","level2","level3")}). 9 | #' 10 | #' @return List of levels in \code{ctd}. 11 | #' 12 | #' @keywords internal 13 | get_ctd_levels <- function(ctd, 14 | max_only = FALSE) { 15 | # This is necessary in case further meta-data such as $name is used 16 | if (!is.null(names(ctd))) { 17 | lvls <- names(ctd) 18 | } else { 19 | lvls <- seq(1, length(ctd)) 20 | } 21 | if (max_only) { 22 | return(max(lvls)) 23 | } else { 24 | return(lvls) 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /R/check_ewce_expression_data_args.R: -------------------------------------------------------------------------------- 1 | #' check_ewce_expression_data_args 2 | #' 3 | #' Check the input arguments of the 4 | #' \link[EWCE]{ewce_expression_data}. 5 | #' 6 | #' @inheritParams ewce_expression_data 7 | #' @return Null output. 8 | #' 9 | #' @keywords internal 10 | check_ewce_expression_data_args <- function(sortBy, 11 | tt, 12 | thresh){ 13 | err_msg <- paste0( 14 | "ERROR: tt does not contain a column with value", 15 | " passed in sortBy argument" 16 | ) 17 | # Check the arguments 18 | if (!sortBy %in% colnames(tt)) { 19 | stop(err_msg) 20 | } 21 | err_msg2 <- paste0( 22 | "ERROR: length of table is less than", 23 | " twice the size of threshold" 24 | ) 25 | if (dim(tt)[1] < (thresh * 2)) { 26 | stop(err_msg2) 27 | } 28 | } -------------------------------------------------------------------------------- /tests/testthat/test-filter_ctd_genes.R: -------------------------------------------------------------------------------- 1 | test_that("filter_ctd_genes works", { 2 | 3 | ctd <- ewceData::ctd() 4 | n_genes <- 100 5 | gene_subset <- rownames(ctd[[1]]$mean_exp)[seq_len(n_genes)] 6 | 7 | #### Works with old CTD format #### 8 | ctd_subset <- filter_ctd_genes(ctd = ctd, 9 | gene_subset = gene_subset) 10 | testthat::expect_true( 11 | all(lapply(ctd_subset, function(x)nrow(x$mean_exp))==n_genes) 12 | ) 13 | 14 | #### Works with new CTD format #### 15 | ctd <- standardise_ctd(ctd, input_species="mouse") 16 | gene_subset <- rownames(ctd[[1]]$mean_exp)[seq_len(n_genes)] 17 | ctd_subset <- filter_ctd_genes(ctd = ctd, 18 | gene_subset = gene_subset) 19 | testthat::expect_true( 20 | all(lapply(ctd_subset, function(x)nrow(x$mean_exp))==n_genes) 21 | ) 22 | }) 23 | -------------------------------------------------------------------------------- /R/to_delayed_array.R: -------------------------------------------------------------------------------- 1 | #' Convert object to DelayedArray 2 | #' 3 | #' Convert a variety of object types to 4 | #' \link[DelayedArray]{DelayedArray} format. 5 | #' 6 | #' @param exp Object. 7 | #' @param as_DelayedArray Whether to convert \code{exp} to 8 | #' \link[DelayedArray]{DelayedArray}. 9 | #' @param verbose Print messages. 10 | #' 11 | #' @return \link[DelayedArray]{DelayedArray}. 12 | #' 13 | #' @keywords internal 14 | #' @importFrom DelayedArray DelayedArray 15 | to_delayed_array <- function(exp, 16 | as_DelayedArray = TRUE, 17 | verbose = TRUE) { 18 | if (as_DelayedArray && (!is_delayed_array(exp))) { 19 | messager("Converting to DelayedArray.", v = verbose) 20 | if (!is_matrix(exp)) { 21 | exp <- as.matrix(exp) 22 | } 23 | exp <- DelayedArray::DelayedArray(exp) 24 | } 25 | return(exp) 26 | } 27 | -------------------------------------------------------------------------------- /man/convert_old_ewce_to_new.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/convert_old_ewce_to_new.r 3 | \name{convert_old_ewce_to_new} 4 | \alias{convert_old_ewce_to_new} 5 | \title{convert_old_ewce_to_new} 6 | \usage{ 7 | convert_old_ewce_to_new(level1 = NA, level2 = NA, celltype_data = NA) 8 | } 9 | \arguments{ 10 | \item{level1}{File path to old level1 of EWCE ctd.} 11 | 12 | \item{level2}{File path to old level2 of EWCE ctd.} 13 | 14 | \item{celltype_data}{The ctd to be converted.} 15 | } 16 | \value{ 17 | CellTypeData in the new data structure style. 18 | } 19 | \description{ 20 | \code{convert_old_ewce_to_new} Used to get an new style EWCE ctd file 21 | (mean_exp/specificity) from old ones (all_scts). 22 | } 23 | \details{ 24 | If you've already loaded it and want to pass it as a celltype_data 25 | structure, then don't set level1 or level2. 26 | } 27 | \keyword{internal} 28 | -------------------------------------------------------------------------------- /R/drop_nonexpressed_genes.R: -------------------------------------------------------------------------------- 1 | #' Drop genes with zero counts 2 | #' 3 | #' Remove rows (genes) in which counts sum to zero. 4 | #' 5 | #' @param exp Gene expression matrix. 6 | #' @param verbose Print messages. 7 | #' 8 | #' @return List of filtered \code{exp}. 9 | #' 10 | #' @keywords internal 11 | #' @importFrom Matrix rowSums 12 | drop_nonexpressed_genes <- function(exp, 13 | verbose = TRUE) { 14 | messager("Checking for non-expressed genes.", v = verbose) 15 | orig.dims <- dim(exp) 16 | row.sums <- Matrix::rowSums(exp) # MUST be from Matrix 17 | n_zeros <- sum(row.sums <= 0, na.rm = TRUE) 18 | #### Drop genes #### 19 | if (n_zeros > 0) { 20 | exp <- exp[row.sums > 0, ] 21 | messager(nrow(exp) - orig.dims[1], "/", nrow(exp), 22 | "non-expressed genes dropped", 23 | v = verbose 24 | ) 25 | } 26 | return(exp) 27 | } 28 | -------------------------------------------------------------------------------- /man/delayedarray_normalize.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/delayedarray_normalize.R 3 | \name{delayedarray_normalize} 4 | \alias{delayedarray_normalize} 5 | \title{Efficiently normalize a DelayedArray} 6 | \usage{ 7 | delayedarray_normalize( 8 | exp, 9 | log_norm = TRUE, 10 | min_max = TRUE, 11 | plot_hists = FALSE, 12 | no_cores = 1 13 | ) 14 | } 15 | \arguments{ 16 | \item{exp}{Input matrix (e.g. gene expression).} 17 | 18 | \item{log_norm}{Whether to first log-normalise \code{exp} 19 | with \link[base]{log1p}.} 20 | 21 | \item{min_max}{Whether to min/max-normalise \code{exp}.} 22 | 23 | \item{no_cores}{Number of cores to parallelise across.} 24 | } 25 | \value{ 26 | Normalised matrix. 27 | } 28 | \description{ 29 | The following is a matrix normalization procedure that takes advantage of 30 | functions designed to be more efficient for DelayedArray objects. 31 | } 32 | \keyword{internal} 33 | -------------------------------------------------------------------------------- /R/cell_list_dist.r: -------------------------------------------------------------------------------- 1 | #' cell_list_dist 2 | #' 3 | #' specificity is generated in the main_CellTypeAnalysis_Preperation.r file 4 | #' 5 | #' @param hits List of gene symbols containing the target gene list. 6 | #' @inheritParams bootstrap_enrichment_test 7 | #' @returns The summed specificity of each celltype 8 | #' across a set of \code{hits}. 9 | #' 10 | #' @keywords internal 11 | cell_list_dist <- function(hits, 12 | sct_data, 13 | annotLevel) { 14 | ValidGenes <- 15 | rownames(sct_data[[annotLevel]]$specificity)[ 16 | rownames(sct_data[[annotLevel]]$specificity) %in% hits 17 | ] 18 | temp <- sct_data[[annotLevel]]$specificity[ValidGenes, ,drop=FALSE] 19 | 20 | # If the function was based a single gene list... just return temp 21 | # if(is.null(dim(hits)[1])){ 22 | # return(temp) 23 | # }else{ 24 | return(apply(temp, 2, sum)) 25 | # } 26 | } 27 | -------------------------------------------------------------------------------- /R/filter_genes_without_1to1_homolog.r: -------------------------------------------------------------------------------- 1 | #' filter_genes_without_1to1_homolog 2 | #' 3 | #' Deprecated function. Please use \link[EWCE]{filter_nonorthologs} instead. 4 | #' 5 | #' @inherit filter_nonorthologs 6 | #' @export 7 | filter_genes_without_1to1_homolog <- function(filenames, 8 | input_species = "mouse", 9 | convert_nonhuman_genes = TRUE, 10 | annot_levels = NULL, 11 | suffix = "_orthologs", 12 | verbose = TRUE) { 13 | .Deprecated("filter_nonorthologs") 14 | newFilenames <- filter_nonorthologs(filenames, 15 | input_species = "mouse", 16 | convert_nonhuman_genes = TRUE, 17 | annot_levels = NULL, 18 | suffix = "_orthologs", 19 | verbose = TRUE 20 | ) 21 | return(newFilenames) 22 | } 23 | -------------------------------------------------------------------------------- /R/run_limma.r: -------------------------------------------------------------------------------- 1 | #' Run DGE: \pkg{limma} 2 | #' 3 | #' Run Differential Gene Expression with \pkg{limma}. 4 | #' 5 | #' @return \code{limma} results. 6 | #' 7 | #' @inheritParams drop_uninformative_genes 8 | #' 9 | #' @keywords internal 10 | #' @importFrom limma lmFit eBayes 11 | #' @importFrom stats model.matrix p.adjust 12 | run_limma <- function(exp, 13 | level2annot, 14 | mtc_method = "BH", 15 | verbose = TRUE, 16 | ...) { 17 | messager("DGE:: Limma...", v = verbose) 18 | ## Prepare groupings 19 | level2_options <- as.factor(as.character(level2annot)) 20 | mod_matrix <- stats::model.matrix(~level2_options) 21 | fit <- limma::lmFit(exp, mod_matrix, ...) 22 | eb <- limma::eBayes(fit) 23 | #### Compute correct p-value #### 24 | eb$q <- stats::p.adjust( 25 | p = eb$F.p.value, 26 | method = mtc_method 27 | ) 28 | return(eb) 29 | } 30 | -------------------------------------------------------------------------------- /tests/testthat/test-bin_columns_into_quantiles.r: -------------------------------------------------------------------------------- 1 | test_that("bin_columns_into_quantiles works", { 2 | if (!is_32bit()) { 3 | set.seed(1234) 4 | #### Test 1: CTD #### 5 | ctd <- ewceData::ctd() 6 | ctd[[1]]$specificity_quantiles <- apply(ctd[[1]]$specificity, 2, 7 | FUN = bin_columns_into_quantiles, 8 | numberOfBins = 40 9 | ) 10 | all_values <- unlist(as.list(ctd[[1]]$specificity_quantiles)) 11 | testthat::expect_equal(sort(unique(all_values)), seq(0, 40)) 12 | 13 | #### Test 2: When <2 unique non-zero values #### 14 | mat <- ctd[[1]]$specificity_quantiles 15 | mat[, 1] <- sample(c(0, 1), size = nrow(mat), replace = TRUE) 16 | apply 17 | mat2 <- apply(mat, 2, 18 | FUN = bin_columns_into_quantiles, 19 | numberOfBins = 40 20 | ) 21 | testthat::expect_equal(sort(unique(mat2[, 1])), c(0, 20)) 22 | } 23 | }) 24 | -------------------------------------------------------------------------------- /man/plot_ctd.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_ctd.R 3 | \name{plot_ctd} 4 | \alias{plot_ctd} 5 | \title{Plot \emph{CellTypeData} metrics} 6 | \usage{ 7 | plot_ctd(ctd, genes, level = 1, metric = "specificity", show_plot = TRUE) 8 | } 9 | \arguments{ 10 | \item{ctd}{CellTypeDataset.} 11 | 12 | \item{genes}{Which genes in \code{ctd} to plot.} 13 | 14 | \item{level}{Annotation level in \code{ctd} to plot.} 15 | 16 | \item{metric}{Which metric in the \code{ctd} to plot: 17 | \itemize{ 18 | \item{"mean_exp"} 19 | \item{"specificity"} 20 | \item{"specificity_quantiles"} 21 | }} 22 | 23 | \item{show_plot}{Whether to print the plot or simply return it.} 24 | } 25 | \value{ 26 | ggplot object. 27 | } 28 | \description{ 29 | Plot \emph{CellTypeData} metrics such as mean_exp, specificity and/or 30 | specificity_quantiles. 31 | } 32 | \examples{ 33 | ctd <- ewceData::ctd() 34 | plt <- EWCE::plot_ctd(ctd, genes = c("Apoe", "Gfap", "Gapdh")) 35 | } 36 | -------------------------------------------------------------------------------- /R/get_sig_results.R: -------------------------------------------------------------------------------- 1 | #' Extract significant results 2 | #' 3 | #' Extract significant results from output of 4 | #' \link[EWCE]{bootstrap_enrichment_test}. 5 | #' 6 | #' @param full_results Output of \link[EWCE]{bootstrap_enrichment_test}. 7 | #' @param q_threshold Maximum multiple-testing-corrected p-value to include. 8 | #' @inheritParams bootstrap_enrichment_test 9 | #' 10 | #' @return Filtered enrichment results table. 11 | #' 12 | #' @keywords internal 13 | #' @importFrom stats p.adjust 14 | get_sig_results <- function(full_results, 15 | mtc_method = "BH", 16 | q_threshold = .05, 17 | verbose = TRUE) { 18 | res <- full_results$results 19 | if (!"q" %in% colnames(res)) { 20 | res$q <- stats::p.adjust(res$p, method = mtc_method) 21 | } 22 | messager(nrow(res), "signficiant enrichment results @", mtc_method, "<", 23 | q_threshold, 24 | v = verbose 25 | ) 26 | return(res) 27 | } 28 | -------------------------------------------------------------------------------- /R/create_quadrants.R: -------------------------------------------------------------------------------- 1 | create_quadrants <- function(data_byGene2) { 2 | #### GET QUANTILES FOR TRANSCRIPT LENGTH + GC CONTENT #### 3 | tl_quants <- stats::quantile(data_byGene2$transcript_length, 4 | probs = seq(0.1, 1, 0.1) 5 | ) 6 | gc_quants <- stats::quantile(data_byGene2$percentage_gene_gc_content, 7 | probs = seq(0.1, 1, 0.1) 8 | ) 9 | #### ASSIGN EACH GENE TO A QUANTILE QUADRANT #### 10 | quadrant <- matrix(0, 11 | nrow = dim(data_byGene2)[1], 12 | ncol = 2 13 | ) 14 | colnames(quadrant) <- c("TL", "GC") 15 | for (i in seq_len(dim(data_byGene2)[1])) { 16 | quadrant[i, 1] <- which(data_byGene2[i, 2] < tl_quants)[1] 17 | quadrant[i, 2] <- which(data_byGene2[i, 3] < gc_quants)[1] 18 | } 19 | data_byGene2$uniq_quad <- sprintf("%s_%s", quadrant[, 1], quadrant[, 2]) 20 | data_byGene2 <- data_byGene2[ 21 | !data_byGene2$uniq_quad %in% c("2_NA", "NA_2", "3_NA"), 22 | ] 23 | return(data_byGene2) 24 | } 25 | -------------------------------------------------------------------------------- /man/filter_variance_quantiles.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/filter_variance_quantiles.r 3 | \name{filter_variance_quantiles} 4 | \alias{filter_variance_quantiles} 5 | \title{Filter variance quantiles} 6 | \usage{ 7 | filter_variance_quantiles( 8 | exp, 9 | log10_norm = TRUE, 10 | n_quantiles = 10, 11 | min_variance_quantile = as.integer(n_quantiles/2), 12 | verbose = TRUE 13 | ) 14 | } 15 | \arguments{ 16 | \item{exp}{Gene expression matrix.} 17 | 18 | \item{log10_norm}{Log10-normalise \code{exp} before computing variance.} 19 | 20 | \item{n_quantiles}{Number of quantile bins to use. 21 | Defaults to deciles (\code{n_quantiles=10}).} 22 | 23 | \item{min_variance_quantile}{The minimum variance quantile 24 | to keep values from.} 25 | 26 | \item{verbose}{Print messages.} 27 | } 28 | \value{ 29 | Filtered \code{exp}. 30 | } 31 | \description{ 32 | Remove rows in \code{exp} that do not vary substantially across rows. 33 | } 34 | \keyword{internal} 35 | -------------------------------------------------------------------------------- /man/get_celltype_table.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_celltype_table.r 3 | \name{get_celltype_table} 4 | \alias{get_celltype_table} 5 | \title{get_celltype_table} 6 | \usage{ 7 | get_celltype_table(annot) 8 | } 9 | \arguments{ 10 | \item{annot}{An annotation dataframe, which columns named 'level1class', 11 | 'level2class' and 'dataset_name'} 12 | } 13 | \value{ 14 | A dataframe with columns 'name', 'level', 'freq' and 'dataset_name' 15 | } 16 | \description{ 17 | \code{ get_celltype_table} Generates a table that can be used for 18 | supplemenary tables of publications. 19 | The table lists how many cells are associated with each cell type, the level 20 | of annotation, and the dataset from which it was generated. 21 | } 22 | \examples{ 23 | # See PrepLDSC.Rmd for origin of merged_ALLCELLS$annot 24 | cortex_mrna <- ewceData::cortex_mrna() 25 | cortex_mrna$annot$dataset_name <- "cortex_mrna" 26 | celltype_table <- EWCE::get_celltype_table(cortex_mrna$annot) 27 | } 28 | -------------------------------------------------------------------------------- /R/theme_graph.R: -------------------------------------------------------------------------------- 1 | #' Get graph theme 2 | #' 3 | #' Get graph theme for plots created by 4 | #' \link[EWCE]{generate_bootstrap_plots_for_transcriptome}. 5 | #' @return \code{ggplot2} graph theme. 6 | #' @keywords internal 7 | theme_graph <- function(){ 8 | requireNamespace("ggplot2") 9 | ggplot2::theme_bw(base_size = 12, 10 | base_family = "Helvetica") + 11 | ggplot2::theme( 12 | panel.grid.major = ggplot2::element_line(linewidth = .25, 13 | color = "grey"), 14 | axis.line = ggplot2::element_line(linewidth = .35, 15 | color = "black"), 16 | text = ggplot2::element_text(size = 14), 17 | axis.title.x = ggplot2::element_text(vjust = -0.35), 18 | axis.title.y = ggplot2::element_text(vjust = 0.6), 19 | # legend.title = ggplot2::element_blank(), 20 | strip.background = ggplot2::element_rect(fill="transparent") 21 | ) 22 | } 23 | -------------------------------------------------------------------------------- /R/to_dataframe.R: -------------------------------------------------------------------------------- 1 | #' Convert object to data.frame 2 | #' 3 | #' Convert a variety of object types to data.frame format. 4 | #' 5 | #' @param X Object. 6 | #' @param verbose Print messages. 7 | #' 8 | #' @return \link[base]{data.frame}. 9 | #' 10 | #' @keywords internal 11 | #' @importFrom methods is 12 | to_dataframe <- function(X, 13 | verbose = TRUE) { 14 | if (methods::is(X, "data.frame")) { 15 | return(X) 16 | } else if (is_matrix(X)) { 17 | messager("Converting to data.frame", v = verbose) 18 | nn <- colnames(X) 19 | rr <- rownames(X) 20 | if (is_delayed_array(X) || is_sparse_matrix(X)) { 21 | X <- as.matrix(X) 22 | } 23 | X <- data.frame(X, 24 | stringsAsFactors = FALSE, 25 | check.rows = FALSE, 26 | check.names = FALSE 27 | ) 28 | colnames(X) <- nn 29 | rownames(X) <- rr 30 | return(X) 31 | } else { 32 | stop("Format ", methods::is(X)[1], " is not supported.") 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /R/to_sparse_matrix.R: -------------------------------------------------------------------------------- 1 | #' Convert object to sparse matrix 2 | #' 3 | #' Convert a variety of object types to sparse matrix format. 4 | #' 5 | #' @param exp Object. 6 | #' @param as_sparse Whether to convert \code{exp} to sparse matrix 7 | #' @param verbose Print messages. 8 | #' 9 | #' @return Sparse matrix. 10 | #' 11 | #' @keywords internal 12 | #' @importFrom DelayedArray DelayedArray 13 | to_sparse_matrix <- function(exp, 14 | as_sparse = TRUE, 15 | verbose = TRUE) { 16 | if (as_sparse) { 17 | messager("Converting to sparse matrix.", v = verbose) 18 | if (!is_sparse_matrix(exp)) { 19 | if (!is_matrix(exp)) { 20 | exp <- as.matrix(exp) 21 | } 22 | exp <- methods::as(exp, "sparseMatrix") 23 | } 24 | } else { 25 | #### Convert to dense matrix #### 26 | exp <- as.matrix(exp, "matrix") 27 | #### Convert characters to numbers #### 28 | exp <- check_numeric(exp = exp) 29 | } 30 | return(exp) 31 | } 32 | -------------------------------------------------------------------------------- /man/check_ewce_expression_data_args.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/check_ewce_expression_data_args.R 3 | \name{check_ewce_expression_data_args} 4 | \alias{check_ewce_expression_data_args} 5 | \title{check_ewce_expression_data_args} 6 | \usage{ 7 | check_ewce_expression_data_args(sortBy, tt, thresh) 8 | } 9 | \arguments{ 10 | \item{sortBy}{Column name of metric in \code{tt} 11 | which should be used to sort up- from down- regulated genes (Default: "t").} 12 | 13 | \item{tt}{Differential expression table. 14 | Can be output of \link[limma]{topTable} function. 15 | Minimum requirement is that one column stores a metric of 16 | increased/decreased expression (i.e. log fold change, t-statistic for 17 | differential expression etc) and another contains gene symbols.} 18 | 19 | \item{thresh}{The number of up- and down- regulated genes to be included in 20 | each analysis (Default: 250).} 21 | } 22 | \value{ 23 | Null output. 24 | } 25 | \description{ 26 | Check the input arguments of the 27 | \link[EWCE]{ewce_expression_data}. 28 | } 29 | \keyword{internal} 30 | -------------------------------------------------------------------------------- /man/fix_celltype_names.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fix_celltype_names.R 3 | \name{fix_celltype_names} 4 | \alias{fix_celltype_names} 5 | \title{Fix celltype names} 6 | \usage{ 7 | fix_celltype_names( 8 | celltypes, 9 | replace_chars = "[-]|[.]|[ ]|[//]|[\\\\/]", 10 | make_unique = TRUE 11 | ) 12 | } 13 | \arguments{ 14 | \item{celltypes}{Character vector of celltype names.} 15 | 16 | \item{replace_chars}{Regex string of characters to replace 17 | with "_" when renaming columns.} 18 | 19 | \item{make_unique}{Make all entries unique.} 20 | } 21 | \value{ 22 | Fixed celltype names. 23 | } 24 | \description{ 25 | Make sure celltypes don't contain characters that could interfere with 26 | downstream analyses. For example, the R package 27 | \href{https://github.com/neurogenomics/MAGMA_Celltyping}{MAGMA.Celltyping} 28 | cannot have spaces in celltype names because spaces are used as a delimiter 29 | in later steps. 30 | } 31 | \examples{ 32 | ct <- c("microglia", "astryocytes", "Pyramidal SS") 33 | ct_fixed <- fix_celltype_names(celltypes = ct) 34 | } 35 | -------------------------------------------------------------------------------- /man/merge_sce_list.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/merge_sce_list.R 3 | \name{merge_sce_list} 4 | \alias{merge_sce_list} 5 | \title{Merge of list of SingleCellExperiment objects} 6 | \usage{ 7 | merge_sce_list( 8 | SCE_lists = NULL, 9 | parent_folder = NULL, 10 | pattern = ".rds$", 11 | merge_levels = seq(1, 5), 12 | gene_union = TRUE, 13 | as_sparse = TRUE, 14 | as_DelayedArray = TRUE, 15 | verbose = TRUE 16 | ) 17 | } 18 | \arguments{ 19 | \item{SCE_lists}{A list of \link[SingleCellExperiment]{SingleCellExperiment} 20 | objects.} 21 | 22 | \item{parent_folder}{Can supply the path to a folder 23 | instead of \code{SCE_lists}. 24 | Any \link[SingleCellExperiment]{SingleCellExperiment} 25 | objects matching \code{pattern} will be imported.} 26 | 27 | \item{merge_levels}{CellTypeDataset levels to merge.} 28 | } 29 | \value{ 30 | SingleCellExperiment 31 | } 32 | \description{ 33 | Merge of list of CellTypeDatasets stored as 34 | \link[SingleCellExperiment]{SingleCellExperiment} objects 35 | into one \link[SingleCellExperiment]{SingleCellExperiment} object. 36 | } 37 | \keyword{internal} 38 | -------------------------------------------------------------------------------- /man/check_generate_controlled_bootstrap_geneset.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in 3 | % R/check_generate_controlled_bootstrapped_geneset.R 4 | \name{check_generate_controlled_bootstrap_geneset} 5 | \alias{check_generate_controlled_bootstrap_geneset} 6 | \title{generate_controlled_bootstrap_geneset} 7 | \usage{ 8 | check_generate_controlled_bootstrap_geneset( 9 | controlledCT, 10 | annotLevel, 11 | sct_data, 12 | hits 13 | ) 14 | } 15 | \arguments{ 16 | \item{controlledCT}{[Optional] If not NULL, and instead is the name of a 17 | cell type, then the bootstrapping controls for expression within that 18 | cell type.} 19 | 20 | \item{annotLevel}{An integer indicating which level of \code{sct_data} to 21 | analyse (\emph{Default: 1}).} 22 | 23 | \item{sct_data}{List generated using \link[EWCE]{generate_celltype_data}.} 24 | 25 | \item{hits}{List of gene symbols containing the target gene list. 26 | Will automatically be converted to human gene symbols 27 | if \code{geneSizeControl=TRUE}.} 28 | } 29 | \value{ 30 | Null output. 31 | } 32 | \description{ 33 | Check input arguments to \link[EWCE]{generate_controlled_bootstrap_geneset}. 34 | } 35 | \keyword{internal} 36 | -------------------------------------------------------------------------------- /man/get_exp_data_for_bootstrapped_genes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_exp_data_for_bootstrapped_genes.R 3 | \name{get_exp_data_for_bootstrapped_genes} 4 | \alias{get_exp_data_for_bootstrapped_genes} 5 | \title{get_exp_data_for_bootstrapped_genes} 6 | \usage{ 7 | get_exp_data_for_bootstrapped_genes( 8 | results, 9 | signif_res, 10 | sct_data, 11 | hits, 12 | combinedGenes, 13 | annotLevel, 14 | nReps = 100, 15 | as_sparse = TRUE, 16 | verbose = TRUE 17 | ) 18 | } 19 | \arguments{ 20 | \item{signif_res}{signif_res (#fix).} 21 | 22 | \item{sct_data}{List generated using \link[EWCE]{generate_celltype_data}.} 23 | 24 | \item{hits}{Gene hits.} 25 | 26 | \item{combinedGenes}{Combined list of genes from \code{sct_data}, 27 | \code{hits}, and background \code{bg}.} 28 | 29 | \item{annotLevel}{An integer indicating which level of \code{sct_data} to 30 | analyse (\emph{Default: 1}).} 31 | 32 | \item{verbose}{Print messages.} 33 | 34 | \item{full_results}{full_results (#fix).} 35 | } 36 | \value{ 37 | exp_mats 38 | } 39 | \description{ 40 | Support function for 41 | \link[EWCE]{generate_bootstrap_plots_for_transcriptome}. 42 | } 43 | \keyword{internal} 44 | -------------------------------------------------------------------------------- /tests/testthat/test-ewce_plot.r: -------------------------------------------------------------------------------- 1 | test_that("ewce_plot works", { 2 | 3 | full_results <- EWCE::example_bootstrap_results() 4 | ctd <- ewceData::ctd() 5 | #### ewce_plot #### 6 | ewce_plot_res <- ewce_plot( 7 | total_res = full_results$results, 8 | ctd = ctd, 9 | make_dendro = TRUE 10 | ) 11 | # Fail if any but ggplot returned 12 | testthat::expect_true(methods::is(ewce_plot_res$plain, "gg")) 13 | testthat::expect_true(methods::is(ewce_plot_res$withDendro, "gg")) 14 | #remove short cut dendrogram res from ctd anbd rerun, should get same order 15 | ctd_basic <- ctd 16 | ctd_basic[[1]]$plotting <- NULL 17 | #### ewce_plot #### 18 | ewce_plot_res_basic <- ewce_plot( 19 | total_res = full_results$results, 20 | ctd = ctd_basic, 21 | make_dendro = TRUE 22 | ) 23 | #so order of 4 plots should be the same 24 | testthat::expect_true( 25 | all(sapply(list(ewce_plot_res_basic$withDendro$data$CellType, 26 | ewce_plot_res$plain$data$CellType, 27 | ewce_plot_res_basic$plain$data$CellType), 28 | FUN = identical, ewce_plot_res$withDendro$data$CellType)) 29 | ) 30 | }) 31 | -------------------------------------------------------------------------------- /tests/testthat/test-generate_celltype_data.r: -------------------------------------------------------------------------------- 1 | test_that("generate_celltype_data works", { 2 | if (!is_32bit()) { 3 | # Load the single cell data 4 | cortex_mrna <- ewceData::cortex_mrna() 5 | # Use only a subset to keep the example quick 6 | expData <- cortex_mrna$exp[seq(1, 100), ] 7 | l1 <- cortex_mrna$annot$level1class 8 | l2 <- cortex_mrna$annot$level2class 9 | annotLevels <- list(l1 = l1, l2 = l2) 10 | #### As DelayedArray #### 11 | res <- EWCE::generate_celltype_data( 12 | exp = expData, 13 | annotLevels = annotLevels, 14 | convert_orths = TRUE, 15 | input_species = "mouse", 16 | output_species = "human", 17 | # Converts expData to DelayedArray before processed further. 18 | # Doesn't convert CTD matrices into DelayedArray. 19 | as_DelayedArray = TRUE, 20 | groupName = "allKImouse", 21 | return_ctd = TRUE 22 | ) 23 | testthat::expect_true(EWCE:::is_celltypedataset(res$ctd)) 24 | testthat::expect_true(EWCE:::is_sparse_matrix(res$ctd[[1]]$mean_exp)) 25 | testthat::expect_true(file.exists(res$fNames)) 26 | } 27 | }) 28 | -------------------------------------------------------------------------------- /R/calculate_specificity_for_level.R: -------------------------------------------------------------------------------- 1 | #' Calculate specificity for one CTD level 2 | #' 3 | #' Calculate specificity for one CellTypeDataset (CTD) level. 4 | #' 5 | #' @param ctd_oneLevel One level from a CTD. 6 | #' @param matrix_name Name of the matrix to extract. 7 | #' @inheritParams to_sparse_matrix 8 | #' @return One CTD level. 9 | #' 10 | #' @keywords internal 11 | #' @importFrom Matrix t 12 | calculate_specificity_for_level <- function(ctd_oneLevel, 13 | matrix_name = "mean_exp", 14 | as_sparse = TRUE, 15 | verbose = TRUE) { 16 | if (!matrix_name %in% names(ctd_oneLevel)) { 17 | messager(matrix_name, "not found in ctd_oneLevel.", v = verbose) 18 | } 19 | expMatrix <- ctd_oneLevel[[matrix_name]] 20 | normalised_meanExp <- Matrix::t(Matrix::t(expMatrix) * 21 | (1 / colSums(expMatrix))) 22 | spec <- normalised_meanExp / 23 | (apply(normalised_meanExp, 1, sum, na.rm = TRUE) + 1e-12) 24 | ctd_oneLevel$specificity <- to_sparse_matrix( 25 | exp = spec, 26 | as_sparse = as_sparse, 27 | verbose = verbose 28 | ) 29 | return(ctd_oneLevel) 30 | } 31 | -------------------------------------------------------------------------------- /R/get_ctd_matrix_names.R: -------------------------------------------------------------------------------- 1 | #' Get CTD matrix names 2 | #' 3 | #' Find the names of all data matrices in a CellTypeDataset. 4 | #' 5 | #' @param ctd CellTypeDataset. If set to \code{NULL} (default), 6 | #' will simply return all possible matrix names. 7 | #' @param matrices Matrix names to search for. 8 | #' @param verbose Print messages. 9 | #' @keywords internal 10 | #' @returns List of matrix names. 11 | get_ctd_matrix_names <- function(ctd=NULL, 12 | matrices=c("mean_exp", 13 | "median_exp", 14 | "specificity", 15 | "median_specificity", 16 | "specificity_quantiles"), 17 | verbose=TRUE){ 18 | if(is.null(ctd)){ 19 | messager("Returning all possible matrix names.",v=verbose) 20 | return(matrices) 21 | } 22 | nms <- lapply(ctd, function(ctd_lvl){ 23 | names(ctd_lvl)[names(ctd_lvl) %in% matrices] 24 | }) |> unlist() |> unique() 25 | messager("Found",length(nms),"matrix types across", 26 | length(ctd),"CTD levels.",v=verbose) 27 | return(nms) 28 | } 29 | -------------------------------------------------------------------------------- /man/check_bootstrap_args.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/check_bootstrap_args.R 3 | \name{check_bootstrap_args} 4 | \alias{check_bootstrap_args} 5 | \title{check_bootstrap_args} 6 | \usage{ 7 | check_bootstrap_args( 8 | sct_data, 9 | hits, 10 | annotLevel, 11 | reps, 12 | controlledCT = NULL, 13 | fix_celltypes = TRUE 14 | ) 15 | } 16 | \arguments{ 17 | \item{sct_data}{List generated using \link[EWCE]{generate_celltype_data}.} 18 | 19 | \item{hits}{List of gene symbols containing the target gene list. 20 | Will automatically be converted to human gene symbols 21 | if \code{geneSizeControl=TRUE}.} 22 | 23 | \item{annotLevel}{An integer indicating which level of \code{sct_data} to 24 | analyse (\emph{Default: 1}).} 25 | 26 | \item{reps}{Number of random gene lists to generate (\emph{Default: 100}, 27 | but should be >=10,000 for publication-quality results).} 28 | 29 | \item{controlledCT}{[Optional] If not NULL, and instead is the name of a 30 | cell type, then the bootstrapping controls for expression within that 31 | cell type.} 32 | } 33 | \value{ 34 | Null output. 35 | } 36 | \description{ 37 | Check the input arguments of the 38 | \link[EWCE]{bootstrap_enrichment_test}. 39 | } 40 | \keyword{internal} 41 | -------------------------------------------------------------------------------- /R/drop_nonexpressed_cells.R: -------------------------------------------------------------------------------- 1 | #' Drop cells with zero gene counts 2 | #' 3 | #' Remove columns (cells) in which (gene) counts sum to zero. 4 | #' 5 | #' @param exp Gene expression matrix. 6 | #' @param annotLevels Cell-wise annotations to be subset 7 | #' if some cells are dropped. 8 | #' @param verbose Print messages. 9 | #' 10 | #' @return List of filtered \code{exp} and \code{annotLevels}. 11 | #' 12 | #' @keywords internal 13 | #' @importFrom Matrix colSums 14 | drop_nonexpressed_cells <- function(exp, 15 | annotLevels, 16 | verbose = TRUE) { 17 | messager("Checking for cells with no expressed genes.", v = verbose) 18 | orig.dims <- dim(exp) 19 | col.sums <- Matrix::colSums(exp) # MUST be from Matrix 20 | n_zeros <- sum(col.sums <= 0, na.rm = TRUE) 21 | #### Drop genes #### 22 | if (n_zeros > 0) { 23 | exp <- exp[, col.sums > 0] 24 | ### Make sure annotLevels are also subset appropriately. 25 | annotLevels <- lapply(annotLevels, function(x) x[col.sums > 0]) 26 | messager(nrow(exp) - orig.dims[1], "/", nrow(exp), 27 | "cells dropped", 28 | v = verbose 29 | ) 30 | } 31 | return(list( 32 | exp = exp, 33 | annotLevels = annotLevels 34 | )) 35 | } 36 | -------------------------------------------------------------------------------- /R/sct_normalize.R: -------------------------------------------------------------------------------- 1 | #' Normalize expression matrix 2 | #' 3 | #' Normalize expression matrix by accounting for library size. 4 | #' Uses \pkg{sctransform}. 5 | #' 6 | #' @param exp Gene x cell expression matrix. 7 | #' @param as_sparse Convert \code{exp} to sparse matrix. 8 | #' @param verbose Print messages. 9 | #' 10 | #' @return Normalised expression matrix. 11 | #' 12 | #' @examples 13 | #' cortex_mrna <- ewceData::cortex_mrna() 14 | #' exp_sct_normed <- EWCE::sct_normalize(exp = cortex_mrna$exp[1:300, ]) 15 | #' @export 16 | #' @importFrom Matrix t colSums 17 | sct_normalize <- function(exp, 18 | as_sparse = TRUE, 19 | verbose = TRUE) { 20 | requireNamespace("sctransform") 21 | exp <- to_sparse_matrix( 22 | exp = exp, 23 | as_sparse = as_sparse, 24 | verbose = verbose 25 | ) 26 | sct <- sctransform::vst( 27 | umi = exp, 28 | return_cell_attr = TRUE, 29 | verbosity = if (verbose) 2 else 0 30 | ) 31 | exp_sct <- sctransform::correct_counts( 32 | x = sct, 33 | umi = exp, 34 | # UMI_corrected 35 | verbosity = if (verbose) 2 else 0 36 | ) 37 | exp_sct_normed <- Matrix::t(Matrix::t(exp_sct) * 38 | (1 / Matrix::colSums(exp_sct))) 39 | return(exp_sct_normed) 40 | } 41 | -------------------------------------------------------------------------------- /man/check_percent_hits.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/check_percent_hits.R 3 | \name{check_percent_hits} 4 | \alias{check_percent_hits} 5 | \title{Get percentage of target cell type hits} 6 | \usage{ 7 | check_percent_hits( 8 | full_results, 9 | target_celltype, 10 | mtc_method = "bonferroni", 11 | q_threshold = 0.05, 12 | verbose = TRUE 13 | ) 14 | } 15 | \arguments{ 16 | \item{full_results}{\code{bootstrap_enrichment_test} results.} 17 | 18 | \item{target_celltype}{Substring to search to matching 19 | cell types (case-insensitive).} 20 | 21 | \item{mtc_method}{Multiple-testing correction method.} 22 | 23 | \item{q_threshold}{Corrected significance threshold.} 24 | 25 | \item{verbose}{Print messages.} 26 | } 27 | \value{ 28 | Report list. 29 | } 30 | \description{ 31 | After you run \link[EWCE]{bootstrap_enrichment_test}, 32 | check what percentage of significantly enriched 33 | cell types match an expected cell type. 34 | } 35 | \examples{ 36 | ## Bootstrap significance test, 37 | ## no control for transcript length or GC content 38 | ## Use pre-computed results to speed up example 39 | full_results <- EWCE::example_bootstrap_results() 40 | 41 | report <- EWCE::check_percent_hits( 42 | full_results = full_results, 43 | target_celltype = "microglia" 44 | ) 45 | } 46 | -------------------------------------------------------------------------------- /R/create_list_network.R: -------------------------------------------------------------------------------- 1 | #' \code{create_list_network} 2 | #' 3 | #' Support function for \code{prepare_genesize_control_network}. 4 | #' 5 | #' @return List network 6 | #' 7 | #' @keywords internal 8 | #' @importFrom parallel mclapply 9 | #' @importFrom data.table data.table 10 | create_list_network <- function(data_byGene2, 11 | hits_NEW, 12 | reps = 10000, 13 | no_cores = 1) { 14 | 15 | # Get all sctSpecies genes in each quadrant 16 | quad_genes <- list() 17 | for (uq in unique(data_byGene2$uniq_quad)) { 18 | quad_genes[[uq]] <- 19 | unique(data_byGene2[data_byGene2$uniq_quad == uq, "HGNC.symbol"]) 20 | } 21 | 22 | list_network <- parallel::mclapply(hits_NEW, function(gene) { 23 | this_gene_quad <- data_byGene2[ 24 | data_byGene2$HGNC.symbol == gene, 25 | "uniq_quad" 26 | ][1] 27 | candidates <- as.vector(unlist(quad_genes[this_gene_quad])) 28 | data.table::data.table(sample( 29 | x = candidates, 30 | size = reps, 31 | replace = TRUE 32 | )) 33 | }, mc.cores = no_cores) |> 34 | do.call(what = "cbind") |> 35 | as.matrix() |> 36 | `colnames<-`(NULL) 37 | return(list_network) 38 | } 39 | -------------------------------------------------------------------------------- /R/fix_celltype_names.R: -------------------------------------------------------------------------------- 1 | #' Fix celltype names 2 | #' 3 | #' Make sure celltypes don't contain characters that could interfere with 4 | #' downstream analyses. For example, the R package 5 | #' \href{https://github.com/neurogenomics/MAGMA_Celltyping}{MAGMA.Celltyping} 6 | #' cannot have spaces in celltype names because spaces are used as a delimiter 7 | #' in later steps. 8 | #' 9 | #' @param celltypes Character vector of celltype names. 10 | #' @param replace_chars Regex string of characters to replace 11 | #' with "_" when renaming columns. 12 | #' @param make_unique Make all entries unique. 13 | #' @returns Fixed celltype names. 14 | #' 15 | #' @export 16 | #' @examples 17 | #' ct <- c("microglia", "astryocytes", "Pyramidal SS") 18 | #' ct_fixed <- fix_celltype_names(celltypes = ct) 19 | fix_celltype_names <- function(celltypes, 20 | replace_chars = "[-]|[.]|[ ]|[//]|[\\/]", 21 | make_unique = TRUE) { 22 | if (is.null(celltypes)) { 23 | return(NULL) 24 | } 25 | celltypes <- gsub(replace_chars, "_", celltypes) 26 | ### Remove repeating "_" #### 27 | celltypes <- gsub("[_]+", "_", celltypes) 28 | #### Make sure all are unique #### 29 | if(isTRUE(make_unique)){ 30 | celltypes <- make.unique(celltypes) 31 | } 32 | return(celltypes) 33 | } 34 | -------------------------------------------------------------------------------- /man/check_species.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/check_species.R 3 | \name{check_species} 4 | \alias{check_species} 5 | \title{Check species} 6 | \usage{ 7 | check_species( 8 | genelistSpecies = NULL, 9 | sctSpecies = NULL, 10 | sctSpecies_origin = NULL, 11 | sctSpecies_origin_default = "mouse", 12 | verbose = TRUE 13 | ) 14 | } 15 | \arguments{ 16 | \item{genelistSpecies}{Species that \code{hits} genes came from 17 | (no longer limited to just "mouse" and "human"). 18 | See \link[EWCE]{list_species} for all available species.} 19 | 20 | \item{sctSpecies}{Species that \code{sct_data} is currently formatted as 21 | (no longer limited to just "mouse" and "human"). 22 | See \link[EWCE]{list_species} for all available species.} 23 | 24 | \item{sctSpecies_origin}{Species that the \code{sct_data} 25 | originally came from, regardless of its current gene format 26 | (e.g. it was previously converted from mouse to human gene orthologs). 27 | This is used for computing an appropriate backgrund.} 28 | 29 | \item{sctSpecies_origin_default}{Default value for \code{sctSpecies_origin}.} 30 | 31 | \item{verbose}{Print messages.} 32 | } 33 | \value{ 34 | List of corrected species names. 35 | } 36 | \description{ 37 | If species arguments are \code{NULL}, set default species. 38 | } 39 | \keyword{internal} 40 | -------------------------------------------------------------------------------- /R/fix_celltype_names_full_results.R: -------------------------------------------------------------------------------- 1 | #' Fix celltype name in full results 2 | #' 3 | #' Aligns celltype names in full results generated by 4 | #' \link[EWCE]{bootstrap_enrichment_test} with the standardised 5 | #' CellTypeDataset (CTD) produced by \link[EWCE]{standardise_ctd}. 6 | #' 7 | #' @param full_results Cell-type enrichment results generated by 8 | #' \link[EWCE]{bootstrap_enrichment_test}. 9 | #' @param verbose Print messages. 10 | #' 11 | #' @return Fixed full results. 12 | #' 13 | #' @keywords internal 14 | fix_celltype_names_full_results <- function(full_results, 15 | verbose = TRUE) { 16 | if(all(is.na(full_results))) return(NA) 17 | if(is.null(full_results)) return(NULL) 18 | 19 | messager("Aligning celltype names with standardise_ctd format.", 20 | v = verbose 21 | ) 22 | rownames(full_results$results) <- fix_celltype_names( 23 | rownames(full_results$results) 24 | ) 25 | full_results$results$CellType <- fix_celltype_names( 26 | full_results$results$CellType 27 | ) 28 | names(full_results$hit.cells) <- fix_celltype_names( 29 | names(full_results$hit.cells) 30 | ) 31 | colnames(full_results$bootstrap_data) <- fix_celltype_names( 32 | colnames(full_results$bootstrap_data) 33 | ) 34 | return(full_results) 35 | } 36 | -------------------------------------------------------------------------------- /man/bin_specificity_into_quantiles.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bin_specificity_into_quantiles.r 3 | \name{bin_specificity_into_quantiles} 4 | \alias{bin_specificity_into_quantiles} 5 | \title{bin_specificity_into_quantiles} 6 | \usage{ 7 | bin_specificity_into_quantiles( 8 | ctdIN, 9 | numberOfBins, 10 | matrix_name = "specificity_quantiles", 11 | as_sparse = TRUE, 12 | verbose = TRUE 13 | ) 14 | } 15 | \arguments{ 16 | \item{ctdIN}{A single annotLevel of a ctd, i.e. \code{ctd[[1]]} 17 | (the function is intended to be used via \code{apply}).} 18 | 19 | \item{numberOfBins}{Number of quantile 'bins' to use (40 is recommended).} 20 | 21 | \item{matrix_name}{Name of the specificity matrix to create 22 | (default: "specificity_quantiles").} 23 | 24 | \item{as_sparse}{Convert to sparseMatrix.} 25 | 26 | \item{verbose}{Print messages.} 27 | } 28 | \value{ 29 | A ctd with "specificity_quantiles" matrix in each level 30 | (or whatever \code{matrix_name} was set to.). 31 | } 32 | \description{ 33 | \code{bin_specificity_into_quantiles} is an internal function used to convert 34 | add '$specificity_quantiles' to a ctd 35 | } 36 | \examples{ 37 | ctd <- ewceData::ctd() 38 | ctd <- lapply(ctd, EWCE::bin_specificity_into_quantiles, numberOfBins = 40) 39 | print(ctd[[1]]$specificity_quantiles[1:3, ]) 40 | } 41 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## 1. Bug description 11 | 12 | (A clear and concise description of what the bug is.) 13 | 14 | ### Console output 15 | 16 | ``` 17 | # Paste console output here (e.g. from R/python/command line) 18 | 19 | ``` 20 | 21 | ### Expected behaviour 22 | 23 | (A clear and concise description of what you expected to happen.) 24 | 25 | 26 | ## 2. Reproducible example 27 | 28 | ### Code 29 | 30 | (Please add the steps to reproduce the bug here. See [here](https://www.r-bloggers.com/2020/10/how-to-make-a-reprex/) for an intro to making a reproducible example (i.e. reprex) and why they're important! __This will help us to help you much faster.__) 31 | 32 | ```R 33 | # Paste example here 34 | 35 | ``` 36 | 37 | ### Data 38 | 39 | (If possible, upload a small sample of your data so that we can reproduce the bug on our end. If that's not possible, please at least include a screenshot of your data and other relevant details.) 40 | 41 | 42 | ## 3. Session info 43 | 44 | (Add output of the R function `utils::sessionInfo()` below. This helps us assess version/OS conflicts which could be causing bugs.) 45 | 46 |
47 | 48 | ``` 49 | # Paste utils::sessionInfo() output 50 | 51 | ``` 52 |
53 | -------------------------------------------------------------------------------- /R/report_results.R: -------------------------------------------------------------------------------- 1 | #' Report cell type enrichment results 2 | #' 3 | #' Report cell type enrichment results generated by 4 | #' \link[EWCE]{bootstrap_enrichment_test}. 5 | #' 6 | #' @returns NULL output. 7 | #' 8 | #' @keywords internal 9 | #' @importFrom utils capture.output 10 | report_results <- function(results, 11 | sig_thresh = 0.05, 12 | verbose = TRUE) { 13 | p <- q <- NULL 14 | 15 | #### Identify significant results #### 16 | if ("q" %in% colnames(results)) { 17 | sig_results <- subset(results, q < sig_thresh) 18 | messager(nrow(sig_results), 19 | "significant cell type enrichment results @", 20 | paste0("q<", sig_thresh, " :"), "\n", 21 | v = verbose 22 | ) 23 | } else { 24 | sig_results <- subset(results, p < sig_thresh) 25 | messager(nrow(sig_results), 26 | "significant cell type enrichment results @", 27 | paste0("p<", sig_thresh, " :"), "\n", 28 | v = verbose 29 | ) 30 | } 31 | #### Print significant results table #### 32 | rownames(sig_results) <- NULL 33 | if (nrow(sig_results) > 0) { 34 | messager(paste0( 35 | utils::capture.output(sig_results), 36 | collapse = "\n" 37 | ), 38 | v = verbose 39 | ) 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /man/bin_columns_into_quantiles.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bin_columns_into_quantiles.r 3 | \name{bin_columns_into_quantiles} 4 | \alias{bin_columns_into_quantiles} 5 | \title{\code{bin_columns_into_quantiles}} 6 | \usage{ 7 | bin_columns_into_quantiles( 8 | vec, 9 | numberOfBins = 40, 10 | defaultBin = as.integer(numberOfBins/2) 11 | ) 12 | } 13 | \arguments{ 14 | \item{vec}{The vector of gene of specificity values.} 15 | 16 | \item{numberOfBins}{Number of quantile bins to use (40 is recommended).} 17 | 18 | \item{defaultBin}{Which bin to assign when there's only one 19 | non-zero quantile. In situations where there's only one non-zero quantile, 20 | \link[base]{cut} throws an error. Avoid these situations by 21 | using a default quantile.} 22 | } 23 | \value{ 24 | A vector with same length as \code{vec} but with columns storing 25 | quantiles instead of specificity. 26 | } 27 | \description{ 28 | \code{bin_columns_into_quantiles} is an internal function used to convert a 29 | vector of specificity into a vector of specificity quantiles. 30 | This function can be iterated across a matrix using \link[base]{apply} 31 | to create a matrix of specificity quantiles. 32 | } 33 | \examples{ 34 | ctd <- ewceData::ctd() 35 | ctd[[1]]$specificity_quantiles <- apply(ctd[[1]]$specificity, 2, 36 | FUN = bin_columns_into_quantiles) 37 | } 38 | -------------------------------------------------------------------------------- /R/check_full_results.R: -------------------------------------------------------------------------------- 1 | #' check_full_results 2 | #' 3 | #' Check full results generated by \link[EWCE]{bootstrap_enrichment_test}. 4 | #' 5 | #' @inheritParams bootstrap_enrichment_test 6 | #' @inheritParams generate_bootstrap_plots 7 | #' @return Null output. 8 | #' 9 | #' @keywords internal 10 | check_full_results <- function(full_results, 11 | sct_data) { 12 | if (!is.null(full_results)) { 13 | err_msg <- paste0( 14 | "ERROR: full_results must be a list of length 3 or 4", 15 | "(not",length(full_results),")", 16 | "to be considered a valid output from bootstrap_enrichment_test." 17 | ) 18 | if (!length(full_results) %in% c(3,4)) { 19 | stop(err_msg) 20 | } 21 | err_msg2 <- paste0( 22 | "ERROR: No cell types in full_results are found in", 23 | " sct_data. Perhaps the wrong annotLevel was used?" 24 | ) 25 | if (sum(!as.character(unique(full_results$results$CellType)) %in% 26 | colnames(sct_data[[1]]$specificity)) == 27 | length(as.character(unique(full_results$results$CellType)))) { 28 | stop(err_msg2) 29 | } 30 | if (sum(!as.character(unique(full_results$results$CellType)) %in% 31 | colnames(sct_data[[1]]$specificity)) > 0) { 32 | stop(err_msg2) 33 | } 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /inst/cit/EWCE.bib: -------------------------------------------------------------------------------- 1 | @article{skene_2016, 2 | title = "Identification of Vulnerable Cell Types in Major Brain Disorders Using Single Cell Transcriptomes and Expression Weighted Cell Type Enrichment", 3 | author = {Skene, Nathan and Grant, Seth}, 4 | journal = "Frontiers in Neuroscience", 5 | year = "2016", 6 | url = {http://journal.frontiersin.org/article/10.3389/fnins.2016.00016/abstract}, 7 | doi = "10.3389/fnins.2016.00016", 8 | } 9 | 10 | @article{zeisel2015cell, 11 | title={Cell types in the mouse cortex and hippocampus revealed by single-cell RNA-seq}, 12 | author={Zeisel, Amit and Mu{\~n}oz-Manchado, Ana B and Codeluppi, Simone and L{\"o}nnerberg, Peter and La Manno, Gioele and Jur{\'e}us, Anna and Marques, Sueli and Munguba, Hermany and He, Liqun and Betsholtz, Christer and others}, 13 | journal={Science}, 14 | volume={347}, 15 | number={6226}, 16 | pages={1138--1142}, 17 | year={2015}, 18 | publisher={American Association for the Advancement of Science} 19 | } 20 | 21 | @article{haroutunian2009transcriptional, 22 | title={Transcriptional vulnerability of brain regions in Alzheimer's disease and dementia}, 23 | author={Haroutunian, Vahram and Katsel, Pavel and Schmeidler, James}, 24 | journal={Neurobiology of aging}, 25 | volume={30}, 26 | number={4}, 27 | pages={561--573}, 28 | year={2009}, 29 | publisher={Elsevier} 30 | } 31 | -------------------------------------------------------------------------------- /man/compute_gene_scores.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/compute_gene_scores.R 3 | \name{compute_gene_scores} 4 | \alias{compute_gene_scores} 5 | \title{Compute gene counts} 6 | \usage{ 7 | compute_gene_scores( 8 | sct_data, 9 | annotLevel, 10 | bootstrap_list = NULL, 11 | hits, 12 | combinedGenes, 13 | reps = NULL, 14 | exp_mats = NULL, 15 | return_hit_exp = FALSE, 16 | verbose = TRUE 17 | ) 18 | } 19 | \arguments{ 20 | \item{sct_data}{List generated using \link[EWCE]{generate_celltype_data}.} 21 | 22 | \item{annotLevel}{An integer indicating which level of \code{sct_data} to 23 | analyse (\emph{Default: 1}).} 24 | 25 | \item{bootstrap_list}{The output of \code{get_summed_proportions_iterate}.} 26 | 27 | \item{hits}{list of gene names. The target gene set.} 28 | 29 | \item{reps}{Number of random gene lists to generate (\emph{Default: 100}, 30 | but should be >=10,000 for publication-quality results).} 31 | 32 | \item{return_hit_exp}{Return the expression of each hit gene.} 33 | 34 | \item{verbose}{Print messages.} 35 | } 36 | \value{ 37 | \link[data.table]{data.table} 38 | } 39 | \description{ 40 | Aggregate gene-level scores across all bootstrap iterations. 41 | \itemize{ 42 | \item{boot: }{Mean specificity of all genes within a given cell type.} 43 | \item{hit: }{Mean specificity of a hit gene within a given cell type.} 44 | } 45 | } 46 | \keyword{internal} 47 | -------------------------------------------------------------------------------- /man/run_limma.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/run_limma.r 3 | \name{run_limma} 4 | \alias{run_limma} 5 | \title{Run DGE: \pkg{limma}} 6 | \usage{ 7 | run_limma(exp, level2annot, mtc_method = "BH", verbose = TRUE, ...) 8 | } 9 | \arguments{ 10 | \item{exp}{Expression matrix with gene names as rownames.} 11 | 12 | \item{level2annot}{Array of cell types, with each sequentially corresponding 13 | a column in the expression matrix.} 14 | 15 | \item{mtc_method}{Multiple-testing correction method used by DGE step. 16 | See \link[stats]{p.adjust} for more details.} 17 | 18 | \item{verbose}{Print messages. 19 | #' @inheritParams orthogene::convert_orthologs} 20 | 21 | \item{...}{Additional arguments to be passed to 22 | \link[gprofiler2]{gorth} or \link[homologene]{homologene}.\cr\cr 23 | \emph{NOTE}: To return only the most "popular" 24 | interspecies ortholog mappings, 25 | supply \code{mthreshold=1} here AND set \code{method="gprofiler"} above. 26 | This procedure tends to yield a greater number of returned genes but at 27 | the cost of many of them not being true biological 1:1 orthologs.\cr\cr 28 | For more details, please see 29 | \href{https://cran.r-project.org/web/packages/gprofiler2/vignettes/gprofiler2.html}{ 30 | here}.} 31 | } 32 | \value{ 33 | \code{limma} results. 34 | } 35 | \description{ 36 | Run Differential Gene Expression with \pkg{limma}. 37 | } 38 | \keyword{internal} 39 | -------------------------------------------------------------------------------- /R/filter_ctd_genes.R: -------------------------------------------------------------------------------- 1 | #' Filter genes in a CellTypeDataset 2 | #' 3 | #' Removes rows from each matrix within a CellTypeDataset (CTD) that are not 4 | #' within \code{gene_subset}. 5 | #' 6 | #' @param ctd CellTypeDataset. 7 | #' @param gene_subset Genes to subset to. 8 | #' 9 | #' @returns Filtered CellTypeDataset. 10 | #' 11 | #' @export 12 | #' @examples 13 | #' ctd <- ewceData::ctd() 14 | #' ctd <- standardise_ctd(ctd, input_species="mouse") 15 | #' gene_subset <- rownames(ctd[[1]]$mean_exp)[1:100] 16 | #' ctd_subset <- EWCE::filter_ctd_genes(ctd = ctd, gene_subset = gene_subset) 17 | filter_ctd_genes <- function(ctd, 18 | gene_subset) { 19 | message("Filtering CTD to ", 20 | formatC(length(gene_subset),big.mark = ","), " genes.") 21 | 22 | new_ctd <- lapply(get_ctd_levels(ctd), 23 | function(lvl) { 24 | message("level: ", lvl) 25 | mat_names <- get_ctd_matrix_names(ctd[lvl]) 26 | ctd_lvl <- ctd[[lvl]] 27 | other_names <- names(ctd_lvl)[!names(ctd_lvl) %in% mat_names] 28 | new_ctd_lvl <- lapply(mat_names, function(mat_nm) { 29 | message(" - ", mat_nm) 30 | ctd_lvl[[mat_nm]][rownames(ctd_lvl[[mat_nm]]) %in% gene_subset, ] 31 | }) |> `names<-`(mat_names) 32 | for (nm in other_names) { 33 | new_ctd_lvl[nm] <- ctd_lvl[nm] 34 | } 35 | return(new_ctd_lvl) 36 | }) 37 | return(new_ctd) 38 | } 39 | -------------------------------------------------------------------------------- /man/bootstrap_plot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bootstrap_plot.R 3 | \name{bootstrap_plot} 4 | \alias{bootstrap_plot} 5 | \title{Bootstrap plot} 6 | \usage{ 7 | bootstrap_plot( 8 | gene_data, 9 | exp_mats = NULL, 10 | save_dir = file.path(tempdir(), "BootstrapPlots"), 11 | listFileName, 12 | signif_ct = NULL, 13 | hit_thresh = 25, 14 | facets = "CellType", 15 | scales = "free_x", 16 | show_plot = TRUE, 17 | verbose = TRUE 18 | ) 19 | } 20 | \arguments{ 21 | \item{gene_data}{Output from \link[EWCE]{compute_gene_scores}.} 22 | 23 | \item{exp_mats}{Output of \code{generate_bootstrap_plots_exp_mats}.} 24 | 25 | \item{save_dir}{Directory to save plots to.} 26 | 27 | \item{listFileName}{listFileName} 28 | 29 | \item{signif_ct}{Significant celltypes to include the plots.} 30 | 31 | \item{facets}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Please use \code{rows} 32 | and \code{cols} instead.} 33 | 34 | \item{scales}{Are scales shared across all facets (the default, 35 | \code{"fixed"}), or do they vary across rows (\code{"free_x"}), 36 | columns (\code{"free_y"}), or both rows and columns (\code{"free"})?} 37 | 38 | \item{show_plot}{Print the plot.} 39 | } 40 | \value{ 41 | Null output. 42 | } 43 | \description{ 44 | Plot bootstrap enrichment results. 45 | Support function for \link[EWCE]{generate_bootstrap_plots}. 46 | } 47 | \keyword{internal} 48 | -------------------------------------------------------------------------------- /man/run_mast.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/run_mast.r 3 | \name{run_mast} 4 | \alias{run_mast} 5 | \title{Run DGE: \pkg{MAST}} 6 | \source{ 7 | \href{https://www.bioconductor.org/packages/release/bioc/vignettes/MAST/inst/doc/MAITAnalysis.html}{MAST tutorial} 8 | } 9 | \usage{ 10 | run_mast(exp, level2annot, test = "LRT", mtc_method = "BH", no_cores = 1, ...) 11 | } 12 | \arguments{ 13 | \item{exp}{Expression matrix with gene names as rownames.} 14 | 15 | \item{level2annot}{Array of cell types, with each sequentially corresponding 16 | a column in the expression matrix.} 17 | 18 | \item{mtc_method}{Multiple-testing correction method used by DGE step. 19 | See \link[stats]{p.adjust} for more details.} 20 | 21 | \item{no_cores}{Number of cores to parallelise DGE across.} 22 | 23 | \item{...}{Additional arguments to be passed to 24 | \link[gprofiler2]{gorth} or \link[homologene]{homologene}.\cr\cr 25 | \emph{NOTE}: To return only the most "popular" 26 | interspecies ortholog mappings, 27 | supply \code{mthreshold=1} here AND set \code{method="gprofiler"} above. 28 | This procedure tends to yield a greater number of returned genes but at 29 | the cost of many of them not being true biological 1:1 orthologs.\cr\cr 30 | For more details, please see 31 | \href{https://cran.r-project.org/web/packages/gprofiler2/vignettes/gprofiler2.html}{ 32 | here}.} 33 | } 34 | \value{ 35 | \code{MAST} results 36 | } 37 | \description{ 38 | Run Differential Gene Expression with \pkg{MAST}. 39 | } 40 | \keyword{internal} 41 | -------------------------------------------------------------------------------- /man/generate_controlled_bootstrap_geneset.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/generate_controlled_bootstrap_geneset.r 3 | \name{generate_controlled_bootstrap_geneset} 4 | \alias{generate_controlled_bootstrap_geneset} 5 | \title{generate_controlled_bootstrap_geneset} 6 | \usage{ 7 | generate_controlled_bootstrap_geneset( 8 | hits, 9 | sct_data, 10 | annotLevel, 11 | reps, 12 | controlledCT = FALSE, 13 | verbose = TRUE 14 | ) 15 | } 16 | \arguments{ 17 | \item{hits}{List of gene symbols containing the target gene list. 18 | Will automatically be converted to human gene symbols 19 | if \code{geneSizeControl=TRUE}.} 20 | 21 | \item{sct_data}{List generated using \link[EWCE]{generate_celltype_data}.} 22 | 23 | \item{annotLevel}{An integer indicating which level of \code{sct_data} to 24 | analyse (\emph{Default: 1}).} 25 | 26 | \item{reps}{Number of random gene lists to generate (\emph{Default: 100}, 27 | but should be >=10,000 for publication-quality results).} 28 | 29 | \item{controlledCT}{[Optional] If not NULL, and instead is the name of a 30 | cell type, then the bootstrapping controls for expression within that 31 | cell type.} 32 | 33 | \item{verbose}{Print messages.} 34 | } 35 | \value{ 36 | Matrix of genes 37 | (such that \code{nrows=length(hits)} and \code{ncols=reps}), where each 38 | column is a gene list. 39 | } 40 | \description{ 41 | Used to generated cell type-controlled bootstrapped gene sets. 42 | } 43 | \details{ 44 | See \link[EWCE]{controlled_geneset_enrichment} for examples. 45 | } 46 | \keyword{internal} 47 | -------------------------------------------------------------------------------- /R/sce_merged_apply.R: -------------------------------------------------------------------------------- 1 | #' sce_merged_apply 2 | #' 3 | #' Merge a list of SingleCellExperiments. 4 | #' 5 | #' @return Merged SingleCellExperiment. 6 | #' 7 | #' @keywords internal 8 | #' @importFrom methods as 9 | #' @importFrom SummarizedExperiment assayNames assay 10 | sce_merged_apply <- function(SCE_merged, 11 | as_sparse = TRUE, 12 | as_DelayedArray = FALSE) { 13 | lapply(names(SCE_merged), function(lvl, 14 | .as_DelayedArray = as_DelayedArray, 15 | .as_sparse = as_sparse) { 16 | print(lvl) 17 | sce_lvl <- SCE_merged[[lvl]] 18 | if (.as_sparse) { 19 | for (ass in SummarizedExperiment::assayNames(sce_lvl)) { 20 | SummarizedExperiment::assay(sce_lvl, ass) <- 21 | methods::as( 22 | SummarizedExperiment::assay(sce_lvl, ass), 23 | "sparseMatrix" 24 | ) 25 | } 26 | } 27 | if (.as_DelayedArray) { 28 | for (ass in SummarizedExperiment::assayNames(sce_lvl)) { 29 | SummarizedExperiment::assay(sce_lvl, ass) <- 30 | DelayedArray::DelayedArray( 31 | methods::as( 32 | SummarizedExperiment::assay(sce_lvl, ass), 33 | "sparseMatrix" 34 | ) 35 | ) 36 | } 37 | } 38 | return(sce_lvl) 39 | }) |> `names<-`(names(SCE_merged)) 40 | } 41 | -------------------------------------------------------------------------------- /man/filter_genes_without_1to1_homolog.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/filter_genes_without_1to1_homolog.r 3 | \name{filter_genes_without_1to1_homolog} 4 | \alias{filter_genes_without_1to1_homolog} 5 | \title{filter_genes_without_1to1_homolog} 6 | \usage{ 7 | filter_genes_without_1to1_homolog( 8 | filenames, 9 | input_species = "mouse", 10 | convert_nonhuman_genes = TRUE, 11 | annot_levels = NULL, 12 | suffix = "_orthologs", 13 | verbose = TRUE 14 | ) 15 | } 16 | \arguments{ 17 | \item{filenames}{List of file names for sct_data saved as \emph{.rda} files.} 18 | 19 | \item{input_species}{Which species the gene names in \code{exp} come from.} 20 | 21 | \item{convert_nonhuman_genes}{Whether to convert the \code{exp} 22 | row names to human gene names.} 23 | 24 | \item{annot_levels}{[Optional] Names of each annotation level.} 25 | 26 | \item{suffix}{Suffix to add to the file name (right before \emph{.rda}).} 27 | 28 | \item{verbose}{Print messages.} 29 | } 30 | \value{ 31 | List of the filtered CellTypeData file names. 32 | } 33 | \description{ 34 | Deprecated function. Please use \link[EWCE]{filter_nonorthologs} instead. 35 | } 36 | \details{ 37 | \bold{Note:} This function replaces the original 38 | \code{filter_genes_without_1to1_homolog} function. 39 | \code{filter_genes_without_1to1_homolog} is 40 | now a wrapper for \code{filter_nonorthologs}. 41 | } 42 | \examples{ 43 | # Load the single cell data 44 | ctd <- ewceData::ctd() 45 | tmp <- tempfile() 46 | save(ctd, file = tmp) 47 | fNames_ALLCELLS_orths <- EWCE::filter_nonorthologs(filenames = tmp) 48 | } 49 | -------------------------------------------------------------------------------- /R/delayedarray_normalize.R: -------------------------------------------------------------------------------- 1 | #' Efficiently normalize a DelayedArray 2 | #' 3 | #' The following is a matrix normalization procedure that takes advantage of 4 | #' functions designed to be more efficient for DelayedArray objects. 5 | #' 6 | #' @param exp Input matrix (e.g. gene expression). 7 | #' @param log_norm Whether to first log-normalise \code{exp} 8 | #' with \link[base]{log1p}. 9 | #' @param min_max Whether to min/max-normalise \code{exp}. 10 | #' @param no_cores Number of cores to parallelise across. 11 | #' 12 | #' @returns Normalised matrix. 13 | #' 14 | #' @keywords internal 15 | #' @importFrom methods show 16 | delayedarray_normalize <- function(exp, 17 | log_norm = TRUE, 18 | min_max = TRUE, 19 | plot_hists = FALSE, 20 | no_cores = 1) { 21 | requireNamespace("DelayedArray") 22 | requireNamespace("graphics") 23 | mat <- exp 24 | core_allocation <- assign_cores(worker_cores = no_cores) 25 | if (isTRUE(log_norm)) { 26 | mat_log <- log1p(mat) 27 | mat <- mat_log 28 | } 29 | if (isTRUE(min_max)) { 30 | col_max <- DelayedArray::colMaxs(mat, na.rm = TRUE) 31 | col_min <- DelayedArray::colMins(mat, na.rm = TRUE) 32 | mat_normed <- DelayedArray::t((DelayedArray::t(mat) - col_min) / 33 | (col_max - col_min)) 34 | mat <- mat_normed 35 | } 36 | if (plot_hists) { 37 | graphics::hist(DelayedArray::colMeans(exp, na.rm = TRUE)) |> 38 | methods::show() 39 | } 40 | return(mat) 41 | } 42 | -------------------------------------------------------------------------------- /man/prepare_tt.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/prepare_tt.R 3 | \name{prepare_tt} 4 | \alias{prepare_tt} 5 | \title{Prepare differential gene expression table} 6 | \usage{ 7 | prepare_tt( 8 | tt, 9 | tt_genecol = NULL, 10 | ttSpecies, 11 | output_species, 12 | method = "homologene", 13 | verbose = TRUE 14 | ) 15 | } 16 | \arguments{ 17 | \item{tt}{Differential expression table. 18 | Can be output of \link[limma]{topTable} function. 19 | Minimum requirement is that one column stores a metric of 20 | increased/decreased expression (i.e. log fold change, t-statistic for 21 | differential expression etc) and another contains gene symbols.} 22 | 23 | \item{ttSpecies}{The species the differential expression table 24 | was generated from.} 25 | 26 | \item{output_species}{Species to convert \code{sct_data} and \code{hits} to 27 | (Default: "human"). 28 | See \link[EWCE]{list_species} for all available species.} 29 | 30 | \item{method}{R package to use for gene mapping: 31 | \describe{ 32 | \item{\code{"gprofiler"}}{Slower but more species and genes.} 33 | \item{\code{"homologene"}}{Faster but fewer species and genes.} 34 | \item{\code{"babelgene"}}{Faster but fewer species and genes. 35 | Also gives consensus scores for each gene mapping based on a 36 | several different data sources.} 37 | }} 38 | 39 | \item{verbose}{Print messages.} 40 | } 41 | \value{ 42 | List of 3 items 43 | } 44 | \description{ 45 | Prepare differential gene expression table for 46 | \link[EWCE]{generate_bootstrap_plots_for_transcriptome} or 47 | \link[EWCE]{ewce_expression_data}. 48 | } 49 | \keyword{internal} 50 | -------------------------------------------------------------------------------- /R/sce_lists_apply.R: -------------------------------------------------------------------------------- 1 | #' sce_lists_apply 2 | #' 3 | #' Support function for \code{EWCE::merge_sce_list}. 4 | #' 5 | #' @return List of \code{SingleCellExperiment}s. 6 | #' 7 | #' @keywords internal 8 | sce_lists_apply <- function(SCE_lists, 9 | return_genes = FALSE, 10 | level = 2, 11 | as_matrix = FALSE, 12 | as_DelayedArray = FALSE) { 13 | lapply(names(SCE_lists), function(x, lvl = level, genes = return_genes) { 14 | print(x) 15 | sce_list <- SCE_lists[[x]] 16 | if (length(sce_list) < lvl) lvl <- length(sce_list) 17 | sce_lvl <- sce_list[[lvl]] 18 | print(paste(dim(sce_lvl), collapse = " x ")) 19 | if (as_matrix) { 20 | for (ass in SummarizedExperiment::assayNames(sce_lvl)) { 21 | SummarizedExperiment::assay(sce_lvl, ass) <- 22 | as(SummarizedExperiment::assay(sce_lvl, ass), "matrix") 23 | } 24 | } 25 | if (as_DelayedArray) { 26 | for (ass in SummarizedExperiment::assayNames(sce_lvl)) { 27 | SummarizedExperiment::assay(sce_lvl, ass) <- 28 | DelayedArray::DelayedArray( 29 | methods::as(SummarizedExperiment::assay( 30 | sce_lvl, ass 31 | ), "sparseMatrix") 32 | ) 33 | } 34 | } 35 | if (genes) { 36 | return(rownames(sce_lvl)) 37 | } else { 38 | return(sce_lvl) 39 | } 40 | }) |> `names<-`(names(SCE_lists)) 41 | } 42 | -------------------------------------------------------------------------------- /R/get_summed_proportions_iterate.R: -------------------------------------------------------------------------------- 1 | get_summed_proportions_iterate <- function(reps, 2 | geneSizeControl, 3 | control_network, 4 | controlledCT, 5 | controlled_bootstrap_set, 6 | combinedGenes, 7 | hits, 8 | sct_data, 9 | annotLevel, 10 | no_cores){ 11 | 12 | parallel::mclapply(seq_len(reps), function(s) { 13 | # Get 'bootstrap_set'...a list of genes of equivalent length as hits 14 | if (isTRUE(geneSizeControl)) { 15 | bootstrap_set <- control_network[s, ] 16 | } else { 17 | if (is.null(controlledCT)) { 18 | bootstrap_set <- sample( 19 | combinedGenes, 20 | length(hits) 21 | ) 22 | } else { 23 | bootstrap_set <- controlled_bootstrap_set[, s] 24 | } 25 | } 26 | # 'bootstrap_data' is a matrix of the summed proportions 27 | bootstrap_res <- cell_list_dist( 28 | hits = bootstrap_set, 29 | sct_data = sct_data, 30 | annotLevel = annotLevel 31 | ) 32 | 33 | return(list(celltypes=data.table::data.table(t(bootstrap_res)), 34 | genes=bootstrap_set 35 | ) 36 | ) 37 | }, mc.cores = no_cores) 38 | } 39 | -------------------------------------------------------------------------------- /man/check_controlled_args.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/check_controlled_args.R 3 | \name{check_controlled_args} 4 | \alias{check_controlled_args} 5 | \title{check_controlled_args} 6 | \usage{ 7 | check_controlled_args( 8 | bg, 9 | sct_data, 10 | annotLevel, 11 | disease_genes, 12 | hits, 13 | functional_genes, 14 | funcGenes, 15 | combinedGenes 16 | ) 17 | } 18 | \arguments{ 19 | \item{bg}{List of gene symbols containing the background gene list 20 | (including hit genes). If \code{bg=NULL}, 21 | an appropriate gene background will be created automatically.} 22 | 23 | \item{sct_data}{List generated using \link[EWCE]{generate_celltype_data}.} 24 | 25 | \item{annotLevel}{An integer indicating which level of \code{sct_data} to 26 | analyse (\emph{Default: 1}).} 27 | 28 | \item{disease_genes}{Array of gene symbols containing the disease gene list. 29 | Does not have to be disease genes. Must be from same species as the single 30 | cell transcriptome dataset.} 31 | 32 | \item{hits}{Hit genes.} 33 | 34 | \item{functional_genes}{Array of gene symbols containing the functional gene 35 | list. The enrichment of this gene set within the disease_genes is tested. 36 | Must be from same species as the single cell transcriptome dataset.} 37 | 38 | \item{funcGenes}{\code{functional_genes} that are within 39 | \code{combinedGenes}.} 40 | 41 | \item{combinedGenes}{\code{sct_data} genes that are in the background 42 | \code{bg}.} 43 | } 44 | \value{ 45 | Null output. 46 | } 47 | \description{ 48 | Check the input arguments of the 49 | \link[EWCE]{controlled_geneset_enrichment}. 50 | } 51 | \keyword{internal} 52 | -------------------------------------------------------------------------------- /R/check_species.R: -------------------------------------------------------------------------------- 1 | #' Check species 2 | #' 3 | #' If species arguments are \code{NULL}, set default species. 4 | #' 5 | #' @param sctSpecies_origin_default Default value for \code{sctSpecies_origin}. 6 | #' @inheritParams bootstrap_enrichment_test 7 | #' @returns List of corrected species names. 8 | #' 9 | #' @keywords internal 10 | check_species <- function(genelistSpecies = NULL, 11 | sctSpecies = NULL, 12 | sctSpecies_origin = NULL, 13 | sctSpecies_origin_default="mouse", 14 | verbose = TRUE) { 15 | if (is.null(genelistSpecies)) { 16 | messager( 17 | "Warning: genelistSpecies not provided.", 18 | "Setting to 'human' by default.", 19 | v = verbose 20 | ) 21 | genelistSpecies <- "human" 22 | } 23 | if (is.null(sctSpecies)) { 24 | messager( 25 | "Warning: sctSpecies not provided.", 26 | "Setting to 'mouse' by default.", 27 | v = verbose 28 | ) 29 | sctSpecies <- "mouse" 30 | } 31 | if (is.null(sctSpecies_origin) && 32 | !is.null(sctSpecies_origin_default)) { 33 | messager( 34 | "Warning: sctSpecies_origin not provided.", 35 | "Setting to",shQuote(sctSpecies_origin_default), 36 | "by default.", 37 | v = verbose 38 | ) 39 | sctSpecies_origin <- sctSpecies_origin_default 40 | } 41 | return(list( 42 | genelistSpecies = genelistSpecies, 43 | sctSpecies = sctSpecies, 44 | sctSpecies_origin = sctSpecies_origin 45 | )) 46 | } 47 | -------------------------------------------------------------------------------- /tests/testthat/test-run_DGE.R: -------------------------------------------------------------------------------- 1 | test_that("DGE works", { 2 | if (!is_32bit()) { 3 | set.seed(1234) 4 | cortex_mrna <- ewceData::cortex_mrna() 5 | n_genes <- 100 6 | exp <- cortex_mrna$exp[seq(1, n_genes), ] 7 | level2annot <- cortex_mrna$annot$level2class 8 | 9 | #### limma #### 10 | limma_res <- EWCE:::run_limma( 11 | exp = exp, 12 | level2annot = level2annot 13 | ) 14 | testthat::expect_true( 15 | all(c("coefficients", "rank", "assign", "F", "p.value") %in% names(limma_res)) 16 | ) 17 | testthat::expect_true(EWCE:::is_matrix(limma_res$coefficients)) 18 | testthat::expect_equal(sum(limma_res$q < .05), 100) 19 | 20 | #### DESeq2 #### 21 | deqseq2_res <- EWCE:::run_deseq2( 22 | exp = exp, 23 | level2annot = level2annot, 24 | test = "LRT" 25 | ) 26 | testthat::expect_true( 27 | all(c("baseMean", "log2FoldChange", "pvalue", "padj") %in% 28 | names(deqseq2_res)) 29 | ) 30 | testthat::expect_length(deqseq2_res$padj, n_genes) 31 | testthat::expect_equal(nrow(subset(deqseq2_res, padj < .05)), 80) 32 | 33 | #### MAST #### 34 | mast_res <- EWCE:::run_mast( 35 | exp = exp, 36 | level2annot = level2annot 37 | ) 38 | testthat::expect_true( 39 | all(c("Class", "lrstat", "p.value") %in% names(mast_res)) 40 | ) 41 | testthat::expect_true(is.data.frame(mast_res)) 42 | testthat::expect_equal(nrow(subset(mast_res, q < .05)), 1917) 43 | } 44 | }) 45 | -------------------------------------------------------------------------------- /.github/workflows/rworkflows.yml: -------------------------------------------------------------------------------- 1 | name: rworkflows 2 | 'on': 3 | push: 4 | branches: 5 | - master 6 | - main 7 | - RELEASE_** 8 | pull_request: 9 | branches: 10 | - master 11 | - main 12 | - RELEASE_** 13 | jobs: 14 | rworkflows: 15 | permissions: write-all 16 | runs-on: ${{ matrix.config.os }} 17 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 18 | container: ${{ matrix.config.cont }} 19 | strategy: 20 | fail-fast: ${{ false }} 21 | matrix: 22 | config: 23 | - os: ubuntu-latest 24 | bioc: devel 25 | r: auto 26 | cont: bioconductor/bioconductor_docker:devel 27 | - os: macOS-latest 28 | bioc: devel 29 | r: auto 30 | cont: ~ 31 | rspm: ~ 32 | - os: windows-latest 33 | bioc: devel 34 | r: auto 35 | cont: ~ 36 | rspm: ~ 37 | steps: 38 | - uses: neurogenomics/rworkflows@master 39 | with: 40 | run_bioccheck: ${{ true }} 41 | run_rcmdcheck: ${{ true }} 42 | as_cran: ${{ true }} 43 | run_vignettes: ${{ true }} 44 | has_testthat: ${{ true }} 45 | run_covr: ${{ true }} 46 | run_pkgdown: ${{ true }} 47 | has_runit: ${{ false }} 48 | has_latex: ${{ false }} 49 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 50 | run_docker: ${{ true }} 51 | docker_registry: docker.io 52 | docker_user: bschilder 53 | docker_org: neurogenomicslab 54 | DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }} 55 | runner_os: ${{ runner.os }} 56 | cache_version: cache-v1 57 | -------------------------------------------------------------------------------- /R/prepare_tt.R: -------------------------------------------------------------------------------- 1 | #' Prepare differential gene expression table 2 | #' 3 | #' Prepare differential gene expression table for 4 | #' \link[EWCE]{generate_bootstrap_plots_for_transcriptome} or 5 | #' \link[EWCE]{ewce_expression_data}. 6 | #' 7 | #' @inheritParams ewce_expression_data 8 | #' @inheritParams orthogene::convert_orthologs 9 | #' 10 | #' @returns List of 3 items 11 | #' 12 | #' @keywords internal 13 | prepare_tt <- function(tt, 14 | tt_genecol = NULL, 15 | ttSpecies, 16 | output_species, 17 | method = "homologene", 18 | verbose = TRUE){ 19 | #### Infer gene column in tt #### 20 | if(is.null(tt_genecol)){ 21 | tt_genecol <- colnames(tt)[1] 22 | messager("Using 1st column of tt as gene column:",tt_genecol, 23 | v=verbose) 24 | } 25 | #### Convert orthologs if needede #### 26 | if(ttSpecies!=output_species){ 27 | tt <- orthogene::convert_orthologs(gene_df = tt, 28 | gene_input = tt_genecol, 29 | gene_output = "columns", 30 | input_species = ttSpecies, 31 | output_species = output_species, 32 | method = method, 33 | verbose = verbose) 34 | tt_genecol <- "ortholog_gene" 35 | ttSpecies <- output_species 36 | } 37 | return(list(tt = tt, 38 | tt_genecol = tt_genecol, 39 | ttSpecies = ttSpecies)) 40 | } 41 | -------------------------------------------------------------------------------- /R/assign_cores.r: -------------------------------------------------------------------------------- 1 | #' Assign cores 2 | #' 3 | #' Assign cores automatically for parallel processing, while reserving some. 4 | #' 5 | #' @param worker_cores Number (>1) or proportion (<1) of worker cores to use. 6 | #' @param verbose Print messages. 7 | #' 8 | #' @return List of core allocations. 9 | #' 10 | #' @importFrom DelayedArray setAutoBPPARAM 11 | #' @importFrom parallel detectCores 12 | #' @keywords internal 13 | assign_cores <- function(worker_cores = .90, 14 | verbose = TRUE) { 15 | # Enable parallelization of HDF5 functions 16 | ## Allocate ~10% of your available cores to non-parallelized processes 17 | worker_cores <- if (is.null(worker_cores)) .90 else worker_cores 18 | total_cores <- parallel::detectCores() 19 | if (worker_cores < 1) { 20 | reserved_cores <- ceiling(total_cores * (1 - worker_cores)) 21 | workers <- total_cores - reserved_cores 22 | } else { 23 | workers <- worker_cores 24 | reserved_cores <- total_cores - workers 25 | } 26 | messager(workers, "core(s) assigned as workers", 27 | paste0("(",reserved_cores, " reserved)."), 28 | v = verbose 29 | ) 30 | #### Handle Windows #### 31 | if (.Platform$OS.type == "windows") { 32 | params <- BiocParallel::SnowParam(workers) 33 | } else { 34 | params <- BiocParallel::MulticoreParam(workers) 35 | } 36 | DelayedArray::setAutoBPPARAM(params) 37 | #### Not allowed to use internal functions #### 38 | # DelayedArray:::set_verbose_block_processing(verbose) 39 | return(list( 40 | worker_cores = workers, 41 | reserved_cores = reserved_cores, 42 | total_cores = total_cores 43 | )) 44 | } 45 | -------------------------------------------------------------------------------- /R/run_deseq2.R: -------------------------------------------------------------------------------- 1 | #' Run DGE: \pkg{DESeq2} 2 | #' 3 | #' Run Differential Gene Expression with \pkg{DESeq2}. 4 | #' 5 | #' @inheritParams drop_uninformative_genes 6 | #' @inheritParams DESeq2::DESeq 7 | #' 8 | #' @return \code{DESeq} results 9 | #' 10 | #' @keywords internal 11 | #' @importFrom stats formula 12 | run_deseq2 <- function(exp, 13 | level2annot, 14 | test = "LRT", 15 | no_cores = 1, 16 | verbose = TRUE, 17 | ...) { 18 | requireNamespace("DESeq2") 19 | messager("DGE:: DESeq2...", v = verbose) 20 | core_allocation <- assign_cores(worker_cores = no_cores) 21 | # NOTE:: When you're running DESeq2 on sparse exp data, 22 | ## there are two ways to avoid issues when DESeq() tries to log your data: 23 | ## 1) add 1 to you expression matrix (much faster). 24 | ## 2) set sfType = "iterate" to 25 | ## enable iterative size factor estimation (veerrrry slow). 26 | dds <- DESeq2::DESeqDataSetFromMatrix(exp + 1, 27 | colData = data.frame(level2annot = level2annot), 28 | design = stats::formula(paste("~", "level2annot")) 29 | ) 30 | dds <- DESeq2::DESeq(dds, 31 | # Best for scRNAseq data. 32 | test = test, 33 | reduced = ~1, 34 | # DESeq2 v1.31.10 (not yet released on BioC) 35 | # now has glmGamPoi integrated directly! 36 | ## https://gi th ub.com/mikelove/DESeq2/issues/29 37 | ## default="parametric" 38 | # fitType="glmGamPoi", 39 | # sfType = "iterate", 40 | parallel = no_cores > 1, 41 | ... 42 | ) 43 | dds_res <- DESeq2::results(dds) 44 | return(dds_res) 45 | } 46 | -------------------------------------------------------------------------------- /man/example_bootstrap_results.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/example_bootstrap_results.R 3 | \name{example_bootstrap_results} 4 | \alias{example_bootstrap_results} 5 | \title{Example bootstrap enrichment results} 6 | \source{ 7 | # Load the single cell data 8 | 9 | ctd <- ewceData::ctd() 10 | 11 | # Set the parameters for the analysis 12 | 13 | # Use 3 bootstrap lists for speed, for publishable analysis use >=10,000 14 | 15 | reps <- 3 16 | 17 | # Load gene list from Alzheimer's disease GWAS 18 | 19 | example_genelist <- ewceData::example_genelist() 20 | 21 | # Bootstrap significance test, no control for transcript length or GC content 22 | 23 | full_results <- EWCE::bootstrap_enrichment_test( 24 | sct_data = ctd, 25 | hits = example_genelist, 26 | reps = reps, 27 | annotLevel = 1, 28 | sctSpecies = "mouse", 29 | genelistSpecies = "human" 30 | ) 31 | 32 | bootstrap_results <- full_results 33 | 34 | save(bootstrap_results,file = "inst/extdata/bootstrap_results.rda") 35 | } 36 | \usage{ 37 | example_bootstrap_results(verbose = TRUE, localHub = FALSE) 38 | } 39 | \arguments{ 40 | \item{verbose}{Print messages.} 41 | 42 | \item{localHub}{If working offline, add argument localHub=TRUE to work 43 | with a local, non-updated hub; It will only have resources available that 44 | have previously been downloaded. If offline, Please also see BiocManager 45 | vignette section on offline use to ensure proper functionality.} 46 | } 47 | \value{ 48 | List with 3 items. 49 | } 50 | \description{ 51 | Example cell type enrichment 52 | results produced by \link[EWCE]{bootstrap_enrichment_test}. 53 | } 54 | \examples{ 55 | full_results <- example_bootstrap_results() 56 | } 57 | -------------------------------------------------------------------------------- /man/run_deseq2.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/run_deseq2.R 3 | \name{run_deseq2} 4 | \alias{run_deseq2} 5 | \title{Run DGE: \pkg{DESeq2}} 6 | \usage{ 7 | run_deseq2(exp, level2annot, test = "LRT", no_cores = 1, verbose = TRUE, ...) 8 | } 9 | \arguments{ 10 | \item{exp}{Expression matrix with gene names as rownames.} 11 | 12 | \item{level2annot}{Array of cell types, with each sequentially corresponding 13 | a column in the expression matrix.} 14 | 15 | \item{test}{either "Wald" or "LRT", which will then use either 16 | Wald significance tests (defined by \code{\link[DESeq2]{nbinomWaldTest}}), 17 | or the likelihood ratio test on the difference in deviance between a 18 | full and reduced model formula (defined by \code{\link[DESeq2]{nbinomLRT}})} 19 | 20 | \item{no_cores}{Number of cores to parallelise across. 21 | Set to \code{NULL} to automatically optimise.} 22 | 23 | \item{verbose}{Print messages. 24 | #' @inheritParams orthogene::convert_orthologs} 25 | 26 | \item{...}{Additional arguments to be passed to 27 | \link[gprofiler2]{gorth} or \link[homologene]{homologene}.\cr\cr 28 | \emph{NOTE}: To return only the most "popular" 29 | interspecies ortholog mappings, 30 | supply \code{mthreshold=1} here AND set \code{method="gprofiler"} above. 31 | This procedure tends to yield a greater number of returned genes but at 32 | the cost of many of them not being true biological 1:1 orthologs.\cr\cr 33 | For more details, please see 34 | \href{https://cran.r-project.org/web/packages/gprofiler2/vignettes/gprofiler2.html}{ 35 | here}.} 36 | } 37 | \value{ 38 | \code{DESeq} results 39 | } 40 | \description{ 41 | Run Differential Gene Expression with \pkg{DESeq2}. 42 | } 43 | \keyword{internal} 44 | -------------------------------------------------------------------------------- /R/bin_specificity_into_quantiles.r: -------------------------------------------------------------------------------- 1 | #' bin_specificity_into_quantiles 2 | #' 3 | #' \code{bin_specificity_into_quantiles} is an internal function used to convert 4 | #' add '$specificity_quantiles' to a ctd 5 | #' 6 | #' @param ctdIN A single annotLevel of a ctd, i.e. \code{ctd[[1]]} 7 | #' (the function is intended to be used via \code{apply}). 8 | #' @param numberOfBins Number of quantile 'bins' to use (40 is recommended). 9 | #' @param matrix_name Name of the specificity matrix to create 10 | #' (default: "specificity_quantiles"). 11 | #' @param as_sparse Convert to sparseMatrix. 12 | #' @param verbose Print messages. 13 | #' 14 | #' @returns A ctd with "specificity_quantiles" matrix in each level 15 | #' (or whatever \code{matrix_name} was set to.). 16 | #' @examples 17 | #' ctd <- ewceData::ctd() 18 | #' ctd <- lapply(ctd, EWCE::bin_specificity_into_quantiles, numberOfBins = 40) 19 | #' print(ctd[[1]]$specificity_quantiles[1:3, ]) 20 | #' @export 21 | #' @importFrom methods as 22 | bin_specificity_into_quantiles <- function(ctdIN, 23 | numberOfBins, 24 | matrix_name = 25 | "specificity_quantiles", 26 | as_sparse = TRUE, 27 | verbose = TRUE) { 28 | specQ <- apply(ctdIN$specificity, 2, 29 | FUN = bin_columns_into_quantiles, 30 | numberOfBins = numberOfBins 31 | ) 32 | ctdIN[[matrix_name]] <- to_sparse_matrix( 33 | exp = specQ, 34 | as_sparse = as_sparse, 35 | verbose = verbose 36 | ) 37 | rownames(ctdIN[[matrix_name]]) <- rownames(ctdIN$specificity) 38 | return(ctdIN) 39 | } 40 | -------------------------------------------------------------------------------- /R/calculate_meanexp_for_level.R: -------------------------------------------------------------------------------- 1 | #' calculate_meanexp_for_level 2 | #' 3 | #' @return One level of a CellTypeDataset. 4 | #' 5 | #' @keywords internal 6 | #' @importFrom stats model.matrix 7 | calculate_meanexp_for_level <- function(ctd_oneLevel, 8 | expMatrix, 9 | as_sparse = TRUE, 10 | verbose = TRUE) { 11 | err_msg <- paste0( 12 | "There are an equal number of cell types in expMatrix", 13 | " and ctd_oneLevel but the names do not match" 14 | ) 15 | if (dim(expMatrix)[2] == length(unique(ctd_oneLevel$annot))) { 16 | message(dim(expMatrix)[2]) 17 | message(length(ctd_oneLevel$annot)) 18 | if (sum(!colnames(expMatrix) == ctd_oneLevel$annot) != 0) { 19 | stop(err_msg) 20 | } 21 | ctd_oneLevel$mean_exp <- to_sparse_matrix( 22 | exp = expMatrix, 23 | as_sparse = as_sparse, 24 | verbose = verbose 25 | ) 26 | } else { 27 | # Sum reads in each cell type 28 | mm <- stats::model.matrix(~ 0 + ctd_oneLevel$annot) 29 | colnames(mm) <- names(table(ctd_oneLevel$annot)) 30 | mat.summary.mm1 <- expMatrix %*% mm 31 | 32 | # Divide by the number of cells to get the mean 33 | cellCounts <- table(ctd_oneLevel$annot) 34 | for (i in seq_len(dim(mat.summary.mm1)[2])) { 35 | mat.summary.mm1[, i] <- mat.summary.mm1[, i] / cellCounts[i] 36 | } 37 | ctd_oneLevel$mean_exp <- to_sparse_matrix( 38 | exp = mat.summary.mm1, 39 | as_sparse = as_sparse, 40 | verbose = verbose 41 | ) 42 | } 43 | return(ctd_oneLevel) 44 | } 45 | -------------------------------------------------------------------------------- /R/check_generate_controlled_bootstrapped_geneset.R: -------------------------------------------------------------------------------- 1 | #' generate_controlled_bootstrap_geneset 2 | #' 3 | #' Check input arguments to \link[EWCE]{generate_controlled_bootstrap_geneset}. 4 | #' 5 | #' @inheritParams generate_controlled_bootstrap_geneset 6 | #' @inheritParams bootstrap_enrichment_test 7 | #' @return Null output. 8 | #' 9 | #' @keywords internal 10 | check_generate_controlled_bootstrap_geneset <- function(controlledCT, 11 | annotLevel, 12 | sct_data, 13 | hits) { 14 | err_msg <- paste0( 15 | "ERROR: controlledCT cannot be NULL in", 16 | " generate_controlled_bootstrap_geneset" 17 | ) 18 | if (is.null(controlledCT)) { 19 | stop(err_msg) 20 | } 21 | err_msg2 <- paste0( 22 | "ERROR: annotLevel cannot be greater than the number", 23 | " of annotation levels in sct_data" 24 | ) 25 | if (annotLevel > length(sct_data)) { 26 | stop(err_msg2) 27 | } 28 | # Check all controlledCT are in single cell data 29 | err_msg3 <- paste0( 30 | "ERROR: not all controlledCT are in", 31 | " colnames(sct_data[[annotLevel]]$specificity)" 32 | ) 33 | if (sum(!controlledCT %in% 34 | colnames(sct_data[[annotLevel]]$specificity)) != 0) { 35 | stop(err_msg3) 36 | } 37 | err_msg4 <- paste0( 38 | "ERROR: length(hits)==0. Perhaps your gene list is", 39 | " from the wrong species? It should be converted to", 40 | " orthologs of the same species as the single cell", 41 | " dataset." 42 | ) 43 | if (length(hits) == 0) { 44 | stop(err_msg4) 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /R/compute_gene_counts.R: -------------------------------------------------------------------------------- 1 | #' Compute gene counts 2 | #' 3 | #' Counts the number of times each gene appeared in 4 | #' the randomly sampled gene lists. 5 | #' @param bootstrap_list The output of \code{get_summed_proportions_iterate}. 6 | #' @inheritParams get_summed_proportions 7 | #' @inheritParams bootstrap_enrichment_test 8 | #' @returns \link[data.table]{data.table} 9 | #' 10 | #' @keywords internal 11 | #' @importFrom data.table data.table as.data.table setorderv := 12 | compute_gene_counts<- function(bootstrap_list, 13 | # hits, 14 | verbose=TRUE){ 15 | 16 | count <- gene <- proportion_reps <- is_hit_gene <- reps <- NULL; 17 | 18 | messager("Computing gene counts.",v=verbose) 19 | gene_counts <- lapply(bootstrap_list,function(x){x$genes}) |> 20 | unlist() |> table() 21 | gene_agg <- data.table::as.data.table(gene_counts) |> 22 | `colnames<-`(c("gene","count")) 23 | gene_agg[,reps:=length(bootstrap_list)] 24 | #### Add genes that didn't appear in any randomly sampled list #### 25 | # extra_genes <- unname(combinedGenes[!combinedGenes %in% gene_agg$gene]) 26 | # if(length(extra_genes)>0){ 27 | # gene_agg <- rbind( 28 | # gene_agg, 29 | # data.table::data.table( 30 | # gene=extra_genes, 31 | # count=0, 32 | # reps=length(bootstrap_list)) 33 | # ) 34 | # } 35 | 36 | gene_agg[,proportion_reps:=count/reps,] 37 | # gene_agg[,is_hit_gene:=gene %in% hits] 38 | # data.table::setorderv(gene_agg, 39 | # cols = c("is_hit_gene","proportion_reps"), 40 | # order = c(-1,-1)) 41 | return(gene_agg) 42 | } 43 | -------------------------------------------------------------------------------- /man/example_transcriptome_results.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/example_transcriptome_results.R 3 | \name{example_transcriptome_results} 4 | \alias{example_transcriptome_results} 5 | \title{Example bootstrap celltype enrichment test for transcriptome data} 6 | \source{ 7 | ## Load the single cell data 8 | 9 | ctd <- ewceData::ctd() 10 | 11 | ## Set the parameters for the analysis 12 | 13 | ## Use 3 bootstrap lists for speed, for publishable analysis use >10,000 14 | 15 | reps <- 3 16 | 17 | annotLevel <- 1 # <- Use cell level annotations (i.e. Interneurons) 18 | 19 | ## Use 5 up/down regulated genes (thresh) for speed, default is 250 20 | 21 | thresh <- 5 22 | 23 | ## Load the top table 24 | 25 | tt_alzh <- ewceData::tt_alzh() 26 | 27 | tt_results <- EWCE::ewce_expression_data( 28 | sct_data = ctd, 29 | tt = tt_alzh, 30 | annotLevel = 1, 31 | thresh = thresh, 32 | reps = reps, 33 | ttSpecies = "human", 34 | sctSpecies = "mouse" 35 | ) 36 | 37 | save(tt_results, file = "inst/extdata/tt_results.rda") 38 | } 39 | \usage{ 40 | example_transcriptome_results(verbose = TRUE, localHub = FALSE) 41 | } 42 | \arguments{ 43 | \item{verbose}{Print messages.} 44 | 45 | \item{localHub}{If working offline, add argument localHub=TRUE to work 46 | with a local, non-updated hub; It will only have resources available that 47 | have previously been downloaded. If offline, Please also see BiocManager 48 | vignette section on offline use to ensure proper functionality.} 49 | } 50 | \value{ 51 | List with 5 items. 52 | } 53 | \description{ 54 | Example celltype enrichment 55 | results produced by \link[EWCE]{ewce_expression_data}. 56 | } 57 | \examples{ 58 | tt_results <- EWCE::example_transcriptome_results() 59 | } 60 | -------------------------------------------------------------------------------- /man/add_res_to_merging_list.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/add_res_to_merging_list.r 3 | \name{add_res_to_merging_list} 4 | \alias{add_res_to_merging_list} 5 | \title{Add to results to merging list} 6 | \usage{ 7 | add_res_to_merging_list(full_res, existing_results = NULL) 8 | } 9 | \arguments{ 10 | \item{full_res}{Results list generated using 11 | \link[EWCE]{bootstrap_enrichment_test} 12 | or \link[EWCE]{ewce_expression_data} functions. 13 | Multiple results tables can be merged into one 14 | results table, as long as the 'list' column is set to distinguish them.} 15 | 16 | \item{existing_results}{Output of previous rounds from adding results to 17 | list. Leave empty if this is the first item in the list.} 18 | } 19 | \value{ 20 | Merged results list. 21 | } 22 | \description{ 23 | \code{add_res_to_merging_list} adds EWCE results to a list 24 | for merging analysis. 25 | } 26 | \examples{ 27 | # Load the single cell data 28 | ctd <- ewceData::ctd() 29 | 30 | # Load the data 31 | tt_alzh <- ewceData::tt_alzh() 32 | # tt_alzh_BA36 <- ewceData::tt_alzh_BA36() 33 | # Use 3 bootstrap lists for speed, for publishable analysis use >10000 34 | reps <- 3 35 | # Use 5 up/down regulated genes (thresh) for speed, default is 250 36 | thresh <- 5 37 | # Run EWCE analysis 38 | # tt_results <- ewce_expression_data( 39 | # sct_data = ctd, tt = tt_alzh, annotLevel = 1, thresh = thresh, 40 | # reps = reps, ttSpecies = "human", sctSpecies = "mouse" 41 | # ) 42 | # tt_results_36 <- ewce_expression_data( 43 | # sct_data = ctd, tt = tt_alzh_BA36, annotLevel = 1, thresh = thresh, 44 | # reps = reps, ttSpecies = "human", sctSpecies = "mouse" 45 | # ) 46 | 47 | # Fill a list with the results 48 | results <- add_res_to_merging_list(tt_alzh) 49 | # results <- add_res_to_merging_list(tt_alzh_BA36, results) 50 | } 51 | -------------------------------------------------------------------------------- /man/EWCE-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/package.R 3 | \docType{package} 4 | \name{EWCE-package} 5 | \alias{EWCE} 6 | \alias{EWCE-package} 7 | \title{EWCE: Expression Weighted Celltype Enrichment} 8 | \description{ 9 | Used to determine which cell types are enriched within gene lists. The package provides tools for testing enrichments within simple gene lists (such as human disease associated genes) and those resulting from differential expression studies. The package does not depend upon any particular Single Cell Transcriptome dataset and user defined datasets can be loaded in and used in the analyses. 10 | } 11 | \details{ 12 | EWCE: Expression Weighted Celltype Enrichment 13 | 14 | Used to determine which cell types are enriched within gene lists. 15 | The package provides tools for testing enrichments within simple gene lists 16 | (such as human disease associated genes) and those resulting from 17 | differential expression studies. 18 | 19 | The package does not depend upon any particular Single Cell Transcriptome 20 | dataset and user defined datasets can be loaded in and used in the analyses. 21 | } 22 | \seealso{ 23 | Useful links: 24 | \itemize{ 25 | \item \url{https://github.com/NathanSkene/EWCE} 26 | \item Report bugs at \url{https://github.com/NathanSkene/EWCE/issues} 27 | } 28 | 29 | } 30 | \author{ 31 | \strong{Maintainer}: Hiranyamaya Dash \email{hdash.work@gmail.com} (\href{https://orcid.org/0009-0005-5514-505X}{ORCID}) 32 | 33 | Authors: 34 | \itemize{ 35 | \item Alan Murphy \email{alanmurph94@hotmail.com} (\href{https://orcid.org/0000-0002-2487-8753}{ORCID}) 36 | \item Brian Schilder \email{brian_schilder@alumni.brown.edu} (\href{https://orcid.org/0000-0001-5949-2191}{ORCID}) 37 | \item Nathan Skene \email{nathan.skene@gmail.com} (\href{https://orcid.org/0000-0002-6807-3180}{ORCID}) 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /tests/testthat/test-DelayedArray.R: -------------------------------------------------------------------------------- 1 | test_that("DelayedArray works", { 2 | 3 | if (!is_32bit()) { 4 | #### Setup data #### 5 | cortex_mrna <- ewceData::cortex_mrna() 6 | expMatrix <- DelayedArray::DelayedArray(cortex_mrna$exp) 7 | # ctd <- ewceData::ctd() 8 | ctd <- list( 9 | level1 = list(), 10 | level2 = list() 11 | ) 12 | ctd[[1]][["annot"]] <- cortex_mrna$annot$level1class 13 | #### Set DelayedArray parameters #### 14 | EWCE:::assign_cores(worker_cores = 1) 15 | DelayedArray:::set_verbose_block_processing(verbose = TRUE) 16 | 17 | #### Test calculate_meanexp_for_level #### 18 | ctd_oneLevel <- EWCE:::calculate_meanexp_for_level( 19 | ctd_oneLevel = ctd[[1]], 20 | expMatrix = expMatrix 21 | ) 22 | testthat::expect_length(ctd_oneLevel, 2) 23 | testthat::expect_true(all(c("annot", "mean_exp") %in% names(ctd_oneLevel))) 24 | 25 | #### Test calculate_specificity_for_level #### 26 | ctd_oneLevel_mod <- EWCE:::calculate_specificity_for_level( 27 | ctd_oneLevel = ctd_oneLevel 28 | ) 29 | testthat::expect_length(ctd_oneLevel_mod, 3) 30 | testthat::expect_true( 31 | all(c("annot", "mean_exp", "specificity") %in% names(ctd_oneLevel_mod)) 32 | ) 33 | 34 | #### Test delayedarray_normalize #### 35 | exp_norm <- EWCE:::delayedarray_normalize( 36 | exp = expMatrix, 37 | log_norm = TRUE, 38 | min_max = TRUE 39 | ) 40 | testthat::expect_equal(dim(exp_norm), dim(expMatrix)) 41 | 42 | #### Test DelayedArray as input to sct_normalize #### 43 | exp_norm_sct <- EWCE::sct_normalize(expMatrix) 44 | testthat::expect_true(EWCE:::is_sparse_matrix(exp_norm_sct)) 45 | } 46 | }) 47 | -------------------------------------------------------------------------------- /R/check_bootstrap_args.R: -------------------------------------------------------------------------------- 1 | #' check_bootstrap_args 2 | #' 3 | #' Check the input arguments of the 4 | #' \link[EWCE]{bootstrap_enrichment_test}. 5 | #' 6 | #' @inheritParams bootstrap_enrichment_test 7 | #' @return Null output. 8 | #' 9 | #' @keywords internal 10 | check_bootstrap_args <- function(sct_data, 11 | hits, 12 | annotLevel, 13 | reps, 14 | controlledCT = NULL, 15 | fix_celltypes = TRUE) { 16 | #### Check an SCT dataset was provided #### 17 | if (unique(is.null(sct_data)) || 18 | (!is_celltypedataset(ctd = sct_data))) { 19 | stop("Must provide valid CellTypeDataset to sct_data.") 20 | } 21 | #### Check an hits was provided #### 22 | if ((!exists("hits")) || unique(is.null(hits))) { 23 | stop("Must provide a gene list to hits.") 24 | } 25 | #### Check an hits was provided #### 26 | if ((!exists("annotLevel")) || 27 | (annotLevel < 1) || 28 | (annotLevel > length(sct_data))) { 29 | stop("Must provide a valid annotLevel <= ", length(sct_data), ".") 30 | } 31 | if ((!exists("reps")) || 32 | (reps < 1)) { 33 | stop("Must provide a valid reps > 0 .") 34 | } 35 | #### Check if controlling for another celltype ### 36 | if (!is.null(controlledCT)) { 37 | ct_names <- colnames(sct_data[[annotLevel]]$specificity) 38 | if(fix_celltypes){ 39 | ct_names <- fix_celltype_names(ct_names) 40 | } 41 | if (!controlledCT %in% ct_names) { 42 | err_msg <- paste0( 43 | "invalid celltype name passed in controlledCT.", 44 | " This argument is optional. Leave empty if you do not", 45 | " wish to control for a celltypes expression." 46 | ) 47 | stop(err_msg) 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /man/check_args_for_bootstrap_plot_generation.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/check_args_for_bootstrap_plot_generation.R 3 | \name{check_args_for_bootstrap_plot_generation} 4 | \alias{check_args_for_bootstrap_plot_generation} 5 | \title{check_args_for_bootstrap_plot_generation} 6 | \usage{ 7 | check_args_for_bootstrap_plot_generation( 8 | sct_data, 9 | tt, 10 | thresh, 11 | annotLevel, 12 | reps, 13 | full_results, 14 | listFileName, 15 | showGNameThresh, 16 | sortBy 17 | ) 18 | } 19 | \arguments{ 20 | \item{sct_data}{List generated using \link[EWCE]{generate_celltype_data}.} 21 | 22 | \item{tt}{Differential expression table. 23 | Can be output of \link[limma]{topTable} function. 24 | Minimum requirement is that one column stores a metric of 25 | increased/decreased expression (i.e. log fold change, t-statistic for 26 | differential expression etc) and another contains gene symbols.} 27 | 28 | \item{thresh}{The number of up- and down- regulated genes to be included in 29 | each analysis (Default: 250).} 30 | 31 | \item{annotLevel}{An integer indicating which level of \code{sct_data} to 32 | analyse (\emph{Default: 1}).} 33 | 34 | \item{reps}{Number of random gene lists to generate (\emph{Default: 100}, 35 | but should be >=10,000 for publication-quality results).} 36 | 37 | \item{full_results}{The full output of 38 | \link[EWCE]{ewce_expression_data} for the same gene list.} 39 | 40 | \item{listFileName}{String used as the root for files saved using 41 | this function.} 42 | 43 | \item{showGNameThresh}{Integer. If a gene has over X percent of it's 44 | expression proportion in a cell type, then list the gene name.} 45 | 46 | \item{sortBy}{Column name of metric in \code{tt} 47 | which should be used to sort up- from down- regulated genes (Default: "t").} 48 | } 49 | \value{ 50 | Null output. 51 | } 52 | \description{ 53 | Check the input arguments of the 54 | \link[EWCE]{generate_bootstrap_plots_for_transcriptome}. 55 | } 56 | \keyword{internal} 57 | -------------------------------------------------------------------------------- /R/convert_old_ewce_to_new.r: -------------------------------------------------------------------------------- 1 | #' convert_old_ewce_to_new 2 | #' 3 | #' \code{convert_old_ewce_to_new} Used to get an new style EWCE ctd file 4 | #' (mean_exp/specificity) from old ones (all_scts). 5 | #' 6 | #' If you've already loaded it and want to pass it as a celltype_data 7 | #' structure, then don't set level1 or level2. 8 | #' 9 | #' @param level1 File path to old level1 of EWCE ctd. 10 | #' @param level2 File path to old level2 of EWCE ctd. 11 | #' @param celltype_data The ctd to be converted. 12 | #' 13 | #' @return CellTypeData in the new data structure style. 14 | #' 15 | #' @keywords internal 16 | convert_old_ewce_to_new <- function(level1 = NA, 17 | level2 = NA, 18 | celltype_data = NA) { 19 | ctd <- list() 20 | if (!is.na(level1)) { 21 | celltype_data <- load_rdata(level1) 22 | 23 | ctd[[1]] <- list() 24 | ctd[[1]]$specificity <- celltype_data[[1]]$cell_dists 25 | ctd[[1]]$mean_exp <- celltype_data[[1]]$all_scts 26 | if ("annot" %in% names(celltype_data[[1]])) { 27 | ctd[[1]]$annot <- celltype_data[[1]]$annot 28 | } 29 | 30 | if (!is.na(level2)) { 31 | celltype_data <- load_rdata(level2) 32 | ctd[[2]] <- list() 33 | ctd[[2]]$specificity <- celltype_data[[1]]$cell_dists 34 | ctd[[2]]$mean_exp <- celltype_data[[1]]$all_scts 35 | if ("annot" %in% names(celltype_data[[1]])) { 36 | ctd[[2]]$annot <- celltype_data[[1]]$annot 37 | } 38 | } 39 | } else { 40 | for (i in seq_len(length(celltype_data))) { 41 | ctd[[i]] <- list() 42 | ctd[[i]]$specificity <- celltype_data[[i]]$cell_dists 43 | ctd[[i]]$mean_exp <- celltype_data[[i]]$all_scts 44 | if ("annot" %in% names(celltype_data[[i]])) { 45 | ctd[[i]]$annot <- celltype_data[[i]]$annot 46 | } 47 | } 48 | } 49 | return(ctd) 50 | } 51 | -------------------------------------------------------------------------------- /man/create_background_multilist.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/create_background_multilist.R 3 | \name{create_background_multilist} 4 | \alias{create_background_multilist} 5 | \title{Create background gene list for multiple species} 6 | \usage{ 7 | create_background_multilist( 8 | gene_list1, 9 | gene_list2, 10 | gene_list1_species, 11 | gene_list2_species, 12 | output_species = "human", 13 | bg = NULL, 14 | use_intersect = FALSE, 15 | method = "homologene", 16 | verbose = TRUE 17 | ) 18 | } 19 | \arguments{ 20 | \item{output_species}{Species to convert all genes from 21 | \code{species1} and \code{species2} to first. 22 | \code{Default="human"}, but can be to either any species 23 | supported by \pkg{orthogene}, including 24 | \code{species1} or \code{species2}.} 25 | 26 | \item{bg}{User supplied background list that will be returned to the 27 | user after removing duplicate genes.} 28 | 29 | \item{use_intersect}{When \code{species1} and \code{species2} are both 30 | different from \code{output_species}, this argument will determine whether 31 | to use the intersect (\code{TRUE}) or union (\code{FALSE}) of all genes 32 | from \code{species1} and \code{species2}.} 33 | 34 | \item{method}{R package to use for gene mapping: 35 | \describe{ 36 | \item{\code{"gprofiler"}}{Slower but more species and genes.} 37 | \item{\code{"homologene"}}{Faster but fewer species and genes.} 38 | \item{\code{"babelgene"}}{Faster but fewer species and genes. 39 | Also gives consensus scores for each gene mapping based on a 40 | several different data sources.} 41 | }} 42 | 43 | \item{verbose}{Print messages.} 44 | } 45 | \value{ 46 | Background and gene list. 47 | } 48 | \description{ 49 | Create background gene list for the 50 | intersection/union between multiple species 51 | (\code{gene_list1_species}, \code{gene_list2_species}, and 52 | \code{sctSpecies}), and then filter the gene lists to only include genes 53 | within the background. 54 | } 55 | \keyword{internal} 56 | -------------------------------------------------------------------------------- /tests/testthat/test-get_celltype_table.r: -------------------------------------------------------------------------------- 1 | test_that("get_celltype_table works", { 2 | if (!is_32bit()) { 3 | set.seed(1234) 4 | cortex_mrna <- ewceData::cortex_mrna() 5 | 6 | #### Test 1: get_celltype_table #### 7 | cortex_mrna$annot$dataset_name <- "cortex_mrna" 8 | celltype_table <- EWCE::get_celltype_table(cortex_mrna$annot) 9 | total_celltypes <- length(unique(cortex_mrna$annot$level1class)) + 10 | length(unique(cortex_mrna$annot$level2class)) 11 | testthat::expect_true(methods::is(celltype_table, "data.frame")) 12 | testthat::expect_equal(nrow(celltype_table), total_celltypes) 13 | testthat::expect_equal(ncol(celltype_table), 5) 14 | 15 | #### Test 2: filter_variance_quantiles #### 16 | exp <- cortex_mrna$exp[seq(1, 300), ] 17 | ## No normalization 18 | exp_filt1 <- EWCE:::filter_variance_quantiles( 19 | exp = exp, 20 | log10_norm = FALSE 21 | ) 22 | testthat::expect_equal(nrow(exp_filt1), 180) 23 | ## SCT normalization 24 | exp_norm <- EWCE::sct_normalize(exp = exp) 25 | exp_filt2 <- EWCE:::filter_variance_quantiles( 26 | exp = exp_norm, 27 | log10_norm = FALSE 28 | ) 29 | testthat::expect_equal(nrow(exp_filt2), 180) 30 | ## Log normalization 31 | exp_filt3 <- EWCE:::filter_variance_quantiles( 32 | exp = exp, 33 | log10_norm = TRUE 34 | ) 35 | testthat::expect_equal(nrow(exp_filt3), 180) 36 | ## SCT normalization + Log normalization 37 | exp_filt4 <- EWCE:::filter_variance_quantiles( 38 | exp = exp_norm, 39 | log10_norm = TRUE 40 | ) 41 | testthat::expect_equal(nrow(exp_filt4), 180) 42 | 43 | ## CONCLUSION: 44 | ## log normalisation has no effect using computing quantiles 45 | ## using the "EWCE" method (default), 46 | ## but is essential when computing quantiles using 47 | ## the stats::ecdf method. 48 | } 49 | }) 50 | -------------------------------------------------------------------------------- /man/fix_bad_hgnc_symbols.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fix_bad_hgnc_symbols.r 3 | \name{fix_bad_hgnc_symbols} 4 | \alias{fix_bad_hgnc_symbols} 5 | \title{fix_bad_hgnc_symbols} 6 | \usage{ 7 | fix_bad_hgnc_symbols( 8 | exp, 9 | dropNonHGNC = FALSE, 10 | as_sparse = TRUE, 11 | verbose = TRUE, 12 | localHub = FALSE 13 | ) 14 | } 15 | \arguments{ 16 | \item{exp}{An expression matrix where the rows are HGNC symbols or a 17 | SingleCellExperiment (SCE) or other 18 | Ranged Summarized Experiment (SE) type object.} 19 | 20 | \item{dropNonHGNC}{Boolean. Should symbols not recognised as HGNC symbols 21 | be dropped?} 22 | 23 | \item{as_sparse}{Convert \code{exp} to sparse matrix.} 24 | 25 | \item{verbose}{Print messages.} 26 | 27 | \item{localHub}{If working offline, add argument localHub=TRUE to work 28 | with a local, non-updated hub; It will only have resources available that 29 | have previously been downloaded. If offline, Please also see BiocManager 30 | vignette section on offline use to ensure proper functionality.} 31 | } 32 | \value{ 33 | Returns the expression matrix with the rownames corrected and rows 34 | representing the same gene merged. If a SingleCellExperiment (SCE) or other 35 | Ranged Summarized Experiment (SE) type object was inputted this will be 36 | returned with the corrected expression matrix under counts. 37 | } 38 | \description{ 39 | Given an expression matrix, wherein the rows are supposed to be HGNC 40 | symbols, find those symbols which are not official HGNC symbols, then 41 | correct them if possible. Return the expression matrix with corrected 42 | symbols. 43 | } 44 | \examples{ 45 | # create example expression matrix, could be part of a exp, annot list obj 46 | exp <- matrix(data = runif(70), ncol = 10) 47 | # Add HGNC gene names but add with an error: 48 | # MARCH8 is a HGNC symbol which if opened in excel will convert to Mar-08 49 | rownames(exp) <- 50 | c("MT-TF", "MT-RNR1", "MT-TV", "MT-RNR2", "MT-TL1", "MT-ND1", "Mar-08") 51 | exp <- fix_bad_hgnc_symbols(exp) 52 | # fix_bad_hgnc_symbols warns the user of this possible issue 53 | } 54 | -------------------------------------------------------------------------------- /man/ewce_plot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ewce_plot.r 3 | \name{ewce_plot} 4 | \alias{ewce_plot} 5 | \title{Plot EWCE results} 6 | \usage{ 7 | ewce_plot( 8 | total_res, 9 | mtc_method = "bonferroni", 10 | q_threshold = 0.05, 11 | ctd = NULL, 12 | annotLevel = 1, 13 | heights = c(0.3, 1), 14 | make_dendro = FALSE, 15 | verbose = TRUE 16 | ) 17 | } 18 | \arguments{ 19 | \item{total_res}{Results data.frame generated using 20 | \link[EWCE]{bootstrap_enrichment_test} or 21 | \link[EWCE]{ewce_expression_data} functions. 22 | Multiple results tables can be 23 | merged into one results table, as long as the 'list' column is set to 24 | distinguish them. 25 | Multiple testing correction is then applied across all merged results.} 26 | 27 | \item{mtc_method}{Method to be used for multiple testing correction. 28 | Argument is passed to \link[stats]{p.adjust} (DEFAULT: "bonferroni).} 29 | 30 | \item{q_threshold}{Corrected significance threshold.} 31 | 32 | \item{ctd}{CellTypeDataset object. 33 | Should be provided so that the dendrogram can be taken from it 34 | and added to plots.} 35 | 36 | \item{annotLevel}{An integer indicating which level of \code{ctd} to 37 | analyse (\emph{Default: 1}).} 38 | 39 | \item{heights}{The relative heights row in the grid. 40 | Will get repeated to match the dimensions of the grid. 41 | Passed to \link[patchwork]{wrap_plots}.} 42 | 43 | \item{make_dendro}{Add a dendrogram (requires \code{ctd}).} 44 | 45 | \item{verbose}{Print messages.} 46 | } 47 | \value{ 48 | A named list containing versions of the \link[ggplot2]{ggplot} 49 | with and without the dendrogram. Note that cell type order on the x-axis is 50 | based on hierarchical clustering for both plots if make_dendro = TRUE. 51 | } 52 | \description{ 53 | \code{ewce_plot} generates plots of EWCE enrichment results 54 | } 55 | \examples{ 56 | ## Bootstrap significance test, 57 | ## no control for transcript length or GC content 58 | ## Use pre-computed results to speed up example 59 | total_res <- EWCE::example_bootstrap_results()$results 60 | plt <- ewce_plot(total_res = total_res) 61 | } 62 | -------------------------------------------------------------------------------- /man/merged_ewce.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/merged_ewce.r 3 | \name{merged_ewce} 4 | \alias{merged_ewce} 5 | \title{Multiple EWCE results from multiple studies} 6 | \usage{ 7 | merged_ewce(results, reps = 100) 8 | } 9 | \arguments{ 10 | \item{results}{a list of EWCE results generated using 11 | \link[EWCE]{add_res_to_merging_list}.} 12 | 13 | \item{reps}{Number of random gene lists to generate (Default=100 but should 14 | be >=10,000 for publication-quality results).} 15 | } 16 | \value{ 17 | dataframe in which each row gives the statistics (p-value, fold 18 | change and number of standard deviations from the mean) associated with the 19 | enrichment of the stated cell type in the gene list. 20 | } 21 | \description{ 22 | \code{merged_ewce} combines enrichment results from multiple studies 23 | targetting the same scientific problem 24 | } 25 | \examples{ 26 | # Load the single cell data 27 | ctd <- ewceData::ctd() 28 | 29 | # Use 3 bootstrap lists for speed, for publishable analysis use >10000 30 | reps <- 3 31 | # Use 5 up/down regulated genes (thresh) for speed, default is 250 32 | thresh <- 5 33 | 34 | # Load the data 35 | tt_alzh_BA36 <- ewceData::tt_alzh_BA36() 36 | tt_alzh_BA44 <- ewceData::tt_alzh_BA44() 37 | 38 | # Run EWCE analysis 39 | tt_results_36 <- EWCE::ewce_expression_data( 40 | sct_data = ctd, 41 | tt = tt_alzh_BA36, 42 | thresh = thresh, 43 | annotLevel = 1, 44 | reps = reps, 45 | ttSpecies = "human", 46 | sctSpecies = "mouse" 47 | ) 48 | tt_results_44 <- EWCE::ewce_expression_data( 49 | sct_data = ctd, 50 | tt = tt_alzh_BA44, 51 | thresh = thresh, 52 | annotLevel = 1, 53 | reps = reps, 54 | ttSpecies = "human", 55 | sctSpecies = "mouse" 56 | ) 57 | 58 | # Fill a list with the results 59 | results <- EWCE::add_res_to_merging_list(tt_results_36) 60 | results <- EWCE::add_res_to_merging_list(tt_results_44, results) 61 | 62 | # Perform the merged analysis 63 | # For publication reps should be higher 64 | merged_res <- EWCE::merged_ewce( 65 | results = results, 66 | reps = 2 67 | ) 68 | print(merged_res) 69 | } 70 | -------------------------------------------------------------------------------- /man/merge_sce.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/merge_sce.R 3 | \name{merge_sce} 4 | \alias{merge_sce} 5 | \title{Merge multiple \code{SingleCellExperiment} objects} 6 | \source{ 7 | \href{https://bioconductor.org/packages/release/bioc/html/scMerge.html}{ 8 | scMerge}. 9 | } 10 | \usage{ 11 | merge_sce( 12 | sce_list, 13 | method = "intersect", 14 | cut_off_batch = 0.01, 15 | cut_off_overall = 0.01, 16 | use_assays = NULL, 17 | colData_names = NULL, 18 | batch_names = NULL, 19 | verbose = TRUE 20 | ) 21 | } 22 | \arguments{ 23 | \item{sce_list}{A list contains the \code{SingleCellExperiment} 24 | Object from each batch.} 25 | 26 | \item{method}{A string indicates the method of combining the 27 | gene expression matrix, either \code{union} or \code{intersect}. 28 | Default to \code{intersect}. \code{union} only supports matrix class.} 29 | 30 | \item{cut_off_batch}{A numeric vector indicating the cut-off for 31 | the proportion of a gene is expressed within each batch.} 32 | 33 | \item{cut_off_overall}{A numeric vector indicating the cut-off for 34 | the proportion of a gene is expressed overall data.} 35 | 36 | \item{use_assays}{A string vector indicating the expression matrices 37 | to be combined. 38 | The first assay named will be used to determine the proportion of zeros.} 39 | 40 | \item{colData_names}{A string vector indicating the \code{colData} 41 | that are combined.} 42 | 43 | \item{batch_names}{A string vector indicating the batch names for 44 | the output SCE object.} 45 | 46 | \item{verbose}{Print messages.} 47 | } 48 | \value{ 49 | A \code{SingleCellExperiment} object with the list of SCE 50 | objects combined. 51 | } 52 | \description{ 53 | Merge several \code{SingleCellExperiment} (SCE) objects from 54 | different batches/experiments. 55 | Extracted from the 56 | \href{https://bioconductor.org/packages/release/bioc/html/scMerge.html}{ 57 | scMerge} package. 58 | } 59 | \examples{ 60 | ctd <- ewceData::ctd() 61 | sce_list <- EWCE::ctd_to_sce(object = ctd) 62 | sce_combine <- merge_sce(sce_list = sce_list) 63 | } 64 | \author{ 65 | Yingxin Lin (modified by Brian Schilder) 66 | } 67 | -------------------------------------------------------------------------------- /R/check_sce.R: -------------------------------------------------------------------------------- 1 | #' Check SingleCellExperiment 2 | #' 3 | #' Check whether \code{exp} is a SingleCellExperiment (SCE) object and extract 4 | #' the relevant components. 5 | #' 6 | #' @return List of extracted SCE components. 7 | #' 8 | #' @keywords internal 9 | #' @importFrom methods is 10 | #' @importFrom SummarizedExperiment assays assayNames colData 11 | check_sce <- function(exp, 12 | verbose = TRUE) { 13 | requireNamespace("SummarizedExperiment") 14 | if (methods::is(exp, "SummarizedExperiment")) { 15 | # update exp to hold the counts from the SCE 16 | SE_exp <- exp 17 | if (!"counts" %in% names(SummarizedExperiment::assays(SE_exp))) { 18 | if ("raw" %in% names(SummarizedExperiment::assays(SE_exp))) { 19 | messager("Renaming assay: raw --> counts", v = verbose) 20 | SummarizedExperiment::assayNames(SE_exp) <- 21 | gsub( 22 | "raw", "counts", 23 | SummarizedExperiment::assayNames(SE_exp) 24 | ) 25 | } else { 26 | stop( 27 | "Please ensure counts is the assay name for your raw ", 28 | "experiment data in your SE/SCE object" 29 | ) 30 | } 31 | } 32 | exp <- SummarizedExperiment::assays(SE_exp)$counts 33 | metadata <- SummarizedExperiment::colData(SE_exp) 34 | # set boolean for later operations 35 | SE_obj <- TRUE 36 | } else { 37 | SE_exp <- NULL 38 | SE_obj <- FALSE 39 | metadata <- NULL 40 | } 41 | #also check exp is a matrix not a DF as this will cause a strange error, see 42 | # https://github.com/NathanSkene/EWCE/issues/92 43 | # test if a DF rather than not a matrix to include all types of matrix as 44 | # sparse matricies won't return TRUE with is.matrix() but this will catch 45 | # DT's/DF's/Tibbles 46 | if (is.data.frame(exp)){ 47 | exp <- as.matrix(exp) 48 | } 49 | return(list( 50 | exp = exp, 51 | metadata = metadata, 52 | SE_exp = SE_exp, 53 | SE_obj = SE_obj 54 | )) 55 | } 56 | -------------------------------------------------------------------------------- /man/merge_two_expfiles.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/merge_two_expfiles.r 3 | \name{merge_two_expfiles} 4 | \alias{merge_two_expfiles} 5 | \title{Merge two exp files} 6 | \usage{ 7 | merge_two_expfiles( 8 | exp1, 9 | exp2, 10 | annot1, 11 | annot2, 12 | name1 = "", 13 | name2 = "", 14 | as_sparse = TRUE, 15 | as_DelayedArray = FALSE, 16 | verbose = TRUE 17 | ) 18 | } 19 | \arguments{ 20 | \item{exp1}{Numerical expression matrix for dataset1 with row for each gene 21 | and column for each cell. Row names are gene symbols. Column names 22 | are cell IDs which can be cross referenced against the annot data frame.} 23 | 24 | \item{exp2}{Numerical expression matrix for dataset2 with row for each gene 25 | and column for each cell. Row names are gene symbols. Column names 26 | are cell IDs which can be cross referenced against the annot data frame.} 27 | 28 | \item{annot1}{Annotation data frame for dataset1 which contains three 29 | columns at least: cell_id, level1class and level2class} 30 | 31 | \item{annot2}{Annotation data frame for dataset2 which contains three 32 | columns at least: cell_id, level1class and level2class} 33 | 34 | \item{name1}{Name used to refer to dataset 1. Leave blank if it's already a 35 | merged dataset.} 36 | 37 | \item{name2}{Name used to refer to dataset 2. Leave blank if it's already a 38 | merged dataset.} 39 | 40 | \item{as_sparse}{Convert the merged \code{exp} to a sparse matrix.} 41 | 42 | \item{as_DelayedArray}{Convert the merged \code{exp} to 43 | a \code{DelayedArray}.} 44 | 45 | \item{verbose}{Print messages.} 46 | } 47 | \value{ 48 | List containing merged exp and annot. 49 | } 50 | \description{ 51 | \code{merge_two_expfiles} Used to combine two single cell type datasets. 52 | } 53 | \examples{ 54 | cortex_mrna <- ewceData::cortex_mrna() 55 | exp1 <- cortex_mrna$exp[, 1:50] 56 | exp2 <- cortex_mrna$exp[, 51:100] 57 | annot1 <- cortex_mrna$annot[1:50, ] 58 | annot2 <- cortex_mrna$annot[51:100, ] 59 | merged_res <- EWCE::merge_two_expfiles( 60 | exp1 = exp1, 61 | exp2 = exp2, 62 | annot1 = annot1, 63 | annot2 = annot2, 64 | name1 = "dataset1", 65 | name2 = "dataset2" 66 | ) 67 | } 68 | -------------------------------------------------------------------------------- /R/plot_with_bootstrap_distributions.R: -------------------------------------------------------------------------------- 1 | #' Plot with bootstrap distributions 2 | #' 3 | #' Plot results of \link[EWCE]{generate_bootstrap_plots_for_transcriptome}. 4 | #' 5 | #' @return Null result. 6 | #' 7 | #' @keywords internal 8 | plot_with_bootstrap_distributions <- function(exp_mats, 9 | cc, 10 | hit_exp, 11 | tag, 12 | listFileName, 13 | graph_theme, 14 | save_dir = file.path( 15 | tempdir(), 16 | paste0("BootstrapPlots", 17 | "_for_transcriptome")), 18 | height=3.5, 19 | width=3.5) { 20 | requireNamespace("ggplot2") 21 | requireNamespace("reshape2") 22 | 23 | messager(cc,": Saving bootstrap plot with distributions.") 24 | melt_boot <- reshape2::melt(as.matrix(exp_mats[[cc]])) 25 | melt_boot$Pos <- as.factor(melt_boot$Pos) 26 | colnames(melt_boot) <- c("Rep", "Pos", "Exp") 27 | actVals <- data.frame( 28 | pos = as.factor(seq_len(length(hit_exp))), 29 | vals = hit_exp 30 | ) 31 | #### Save path #### 32 | pdf_path <- file.path( 33 | save_dir, 34 | sprintf("bootDists_%s___%s____%s.pdf", 35 | tag, listFileName, cc 36 | )) 37 | dir.create(dirname(pdf_path),showWarnings = FALSE, recursive = TRUE) 38 | #### Make plot #### 39 | gp <- ggplot(melt_boot) + 40 | geom_boxplot(aes(x = .data$Pos, y = .data$Exp), outlier.size = 0) + 41 | geom_point(aes(x = .data$pos, y = .data$vals), 42 | col = "red", data = actVals 43 | ) + 44 | ylab("Expression in cell type (%)\n") + 45 | xlab("Least specific --> Most specific") + 46 | scale_x_discrete(breaks = NULL) + 47 | graph_theme 48 | return(list(plot=gp, 49 | path=pdf_path)) 50 | } 51 | -------------------------------------------------------------------------------- /R/prep_dendro.r: -------------------------------------------------------------------------------- 1 | #' Prepare dendrogram 2 | #' 3 | #' \code{prep_dendro} adds a dendrogram to a CellTypeDataset (CTD). 4 | #' 5 | #' @param ctdIN A single annotLevel of a ctd, i.e. ctd[[1]] (the function is 6 | #' intended to be used via apply). 7 | #' @return A CellTypeDataset with dendrogram plotting info added. 8 | #' 9 | #' @keywords internal 10 | #' @importFrom stats hclust dist 11 | #' @importFrom Matrix t 12 | prep_dendro <- function(ctdIN, 13 | expand=c(0, 0.66)) { 14 | requireNamespace("ggplot2") 15 | requireNamespace("ggdendro") 16 | # euclidean distances between the rows 17 | binned_file_dist <- stats::dist(Matrix::t(ctdIN$specificity_quantiles)) 18 | binned_file_dist_hclust <- stats::hclust(binned_file_dist) 19 | ddata <- ggdendro::dendro_data(binned_file_dist_hclust, 20 | type = "rectangle" 21 | ) 22 | ordered_cells <- as.character(ddata$labels$label) 23 | #### Vertical dendrogram #### 24 | a1 <- ggplot2::ggplot(ggdendro::segment(ddata)) + 25 | ggplot2::geom_segment( 26 | ggplot2::aes( 27 | x = .data$x, y = .data$y, 28 | xend = .data$xend, yend = .data$yend 29 | )) + 30 | ggplot2::coord_flip() + 31 | ggdendro::theme_dendro() 32 | if(!is.null(expand)){ 33 | a1 <- a1 + ggplot2::scale_x_continuous(expand = expand) 34 | } 35 | #### Horizontal dendrogram #### 36 | b1 <- ggplot(ggdendro::segment(ddata)) + 37 | ggplot2::geom_segment(ggplot2::aes( 38 | x = .data$x, y = .data$y, 39 | xend = .data$xend, yend = .data$yend 40 | )) + 41 | ggdendro::theme_dendro() 42 | if(!is.null(expand)){ 43 | b1 <- b1 + ggplot2::scale_x_continuous(expand = expand) 44 | } 45 | #### Make nested list #### 46 | ctdIN$plotting <- list() 47 | ctdIN$plotting$ggdendro_vertical <- a1 48 | ctdIN$plotting$ggdendro_horizontal <- b1 49 | ctdIN$plotting$cell_ordering <- ordered_cells 50 | return(ctdIN) 51 | } 52 | 53 | #' prep.dendro 54 | #' 55 | #' @inherit prep_dendro 56 | prep.dendro <- function(ctdIN) { 57 | .Deprecated("prep_dendro") 58 | ctdIN <- prep_dendro(ctdIN = ctdIN) 59 | return(ctdIN) 60 | } 61 | -------------------------------------------------------------------------------- /R/bin_columns_into_quantiles.r: -------------------------------------------------------------------------------- 1 | #' \code{bin_columns_into_quantiles} 2 | #' 3 | #' \code{bin_columns_into_quantiles} is an internal function used to convert a 4 | #' vector of specificity into a vector of specificity quantiles. 5 | #' This function can be iterated across a matrix using \link[base]{apply} 6 | #' to create a matrix of specificity quantiles. 7 | #' @param vec The vector of gene of specificity values. 8 | #' @param numberOfBins Number of quantile bins to use (40 is recommended). 9 | #' @param defaultBin Which bin to assign when there's only one 10 | #' non-zero quantile. In situations where there's only one non-zero quantile, 11 | #' \link[base]{cut} throws an error. Avoid these situations by 12 | #' using a default quantile. 13 | #' @returns A vector with same length as \code{vec} but with columns storing 14 | #' quantiles instead of specificity. 15 | #' @examples 16 | #' ctd <- ewceData::ctd() 17 | #' ctd[[1]]$specificity_quantiles <- apply(ctd[[1]]$specificity, 2, 18 | #' FUN = bin_columns_into_quantiles) 19 | #' @export 20 | #' @importFrom stats quantile 21 | bin_columns_into_quantiles <- function(vec, 22 | numberOfBins = 40, 23 | defaultBin = as.integer( 24 | numberOfBins / 2) 25 | ) { 26 | 27 | quantileValues <- rep(0, length(vec)) 28 | breaks <- unique(stats::quantile(vec[vec > 0], 29 | probs = seq(0, 1, by = 1 / numberOfBins), 30 | na.rm = TRUE 31 | )) 32 | if (length(breaks) > 1) { 33 | quantileValues[vec > 0] <- as.numeric(cut(vec[vec > 0], 34 | breaks = breaks, 35 | include.lowest = TRUE 36 | )) 37 | } else { 38 | ## In situations where there's only one non-zero quantile, 39 | ## cut() throws an error. 40 | ## Avoid these situations by using a default quantile. 41 | messager( 42 | "+ <2 non-zero quantile bins detected in column.", 43 | "Assigning these values to default quantile ", 44 | "(", defaultBin, ")" 45 | ) 46 | quantileValues[vec > 0] <- defaultBin 47 | } 48 | return(quantileValues) 49 | } 50 | -------------------------------------------------------------------------------- /R/check_args_for_bootstrap_plot_generation.R: -------------------------------------------------------------------------------- 1 | #' check_args_for_bootstrap_plot_generation 2 | #' 3 | #' Check the input arguments of the 4 | #' \link[EWCE]{generate_bootstrap_plots_for_transcriptome}. 5 | #' 6 | #' @inheritParams generate_bootstrap_plots_for_transcriptome 7 | #' @return Null output. 8 | #' 9 | #' @keywords internal 10 | check_args_for_bootstrap_plot_generation <- function(sct_data, 11 | tt, 12 | thresh, 13 | annotLevel, 14 | reps, 15 | full_results, 16 | listFileName, 17 | showGNameThresh, 18 | sortBy) { 19 | # Check the arguments 20 | if(all(is.na(full_results))) { 21 | stop_msg <- paste("Must provide valid full_results", 22 | "from ewce_expression_data function.") 23 | stop(stop_msg) 24 | } 25 | correct_length <- length(full_results) == 5 26 | required_names <- c( 27 | "joint_results", "hit.cells.up", 28 | "hit.cells.down", "bootstrap_data.up", 29 | "bootstrap_data.down" 30 | ) 31 | all_required_names <- sum(names(full_results) %in% required_names) == 5 32 | err_msg <- paste0( 33 | "ERROR: full_results is not valid output from the", 34 | " ewce_expression_data function. This function only", 35 | " takes data generated from transcriptome analyses." 36 | ) 37 | if (!correct_length | !all_required_names) { 38 | stop(err_msg) 39 | } 40 | 41 | # Check the arguments 42 | err_msg2 <- paste0( 43 | "ERROR: tt does not contain a column with value", 44 | " passed in sortBy argument" 45 | ) 46 | if (!sortBy %in% colnames(tt)) { 47 | stop(err_msg2) 48 | } 49 | err_msg3 <- paste0( 50 | "ERROR: length of table is less than twice the", 51 | " size of threshold" 52 | ) 53 | if (dim(tt)[1] < (thresh * 2)) { 54 | stop(err_msg3) 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /R/run_mast.r: -------------------------------------------------------------------------------- 1 | #' Run DGE: \pkg{MAST} 2 | #' 3 | #' Run Differential Gene Expression with \pkg{MAST}. 4 | #' 5 | #' @param no_cores Number of cores to parallelise DGE across. 6 | #' @inheritParams drop_uninformative_genes 7 | #' @inheritParams MAST::zlm 8 | #' 9 | #' @source \href{https://www.bioconductor.org/packages/release/bioc/vignettes/MAST/inst/doc/MAITAnalysis.html}{MAST tutorial} 10 | #' 11 | #' @return \code{MAST} results 12 | #' 13 | #' @keywords internal 14 | run_mast <- function(exp, 15 | level2annot, 16 | test = "LRT", 17 | mtc_method = "BH", 18 | no_cores = 1, 19 | ...) { 20 | requireNamespace("MAST") 21 | sca <- MAST::FromMatrix( 22 | exprsArray = exp, 23 | cData = data.frame(Class = level2annot), 24 | fData = data.frame(Gene = rownames(exp)), 25 | check_sanity = FALSE 26 | ) 27 | options(mc.cores = no_cores) 28 | if (test == "LRT") { 29 | mast_res <- MAST::LRT( 30 | sca = sca, 31 | comparison = "Class", 32 | ... 33 | ) 34 | } # else { 35 | # zlm_res <- MAST::zlm(formula = ~Class, 36 | # sca = sca, 37 | # parallel = no_cores > 1, 38 | # ... 39 | # ) 40 | # summ <- summary(object = zlm_res, 41 | # doLRT = "Class") 42 | # summaryDt <- summ$datatable 43 | # fcHurdle <- merge(summaryDt[component=='H',.(primerid, `Pr(>Chisq)`)], #hurdle P values 44 | # summaryDt[component=='logFC', .(primerid, coef, ci.hi, ci.lo)], by='primerid') #logFC coefficients 45 | # 46 | # fcHurdle[,fdr:=p.adjust(`Pr(>Chisq)`, 'fdr')] 47 | # fcHurdleSig <- merge(fcHurdle[fdr<.05 & abs(coef)>FCTHRESHOLD], as.data.table(mcols(sca)), by='primerid') 48 | # setorder(fcHurdleSig, fdr) 49 | # 50 | # # mast_res <- MAST::waldTest(object = r, 51 | # # hypothesis = MAST::CoefficientHypothesis("Class")) 52 | # } 53 | mast_res$q <- stats::p.adjust( 54 | p = mast_res$p.value, 55 | method = mtc_method 56 | ) 57 | return(mast_res) 58 | } 59 | -------------------------------------------------------------------------------- /man/prepare_genesize_control_network.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/prepare_genesize_control_network.r 3 | \name{prepare_genesize_control_network} 4 | \alias{prepare_genesize_control_network} 5 | \title{Prepare genesize control network} 6 | \usage{ 7 | prepare_genesize_control_network( 8 | hits, 9 | bg = NULL, 10 | reps = 10000, 11 | no_cores = 1, 12 | sctSpecies = NULL, 13 | genelistSpecies = NULL, 14 | verbose = TRUE, 15 | localHub = FALSE 16 | ) 17 | } 18 | \arguments{ 19 | \item{hits}{List of gene symbols containing the target gene list. 20 | Will automatically be converted to human gene symbols 21 | if \code{geneSizeControl=TRUE}.} 22 | 23 | \item{bg}{List of gene symbols containing the background gene list 24 | (including hit genes). If \code{bg=NULL}, 25 | an appropriate gene background will be created automatically.} 26 | 27 | \item{reps}{Number of gene lists to sample.} 28 | 29 | \item{no_cores}{Number of cores to parallelise 30 | bootstrapping \code{reps} over.} 31 | 32 | \item{sctSpecies}{Species that \code{sct_data} is currently formatted as 33 | (no longer limited to just "mouse" and "human"). 34 | See \link[EWCE]{list_species} for all available species.} 35 | 36 | \item{genelistSpecies}{Species that \code{hits} genes came from 37 | (no longer limited to just "mouse" and "human"). 38 | See \link[EWCE]{list_species} for all available species.} 39 | 40 | \item{verbose}{Print messages.} 41 | 42 | \item{localHub}{If working offline, add argument localHub=TRUE to work 43 | with a local, non-updated hub; It will only have resources available that 44 | have previously been downloaded. If offline, Please also see BiocManager 45 | vignette section on offline use to ensure proper functionality.} 46 | } 47 | \value{ 48 | A list containing three data frames: 49 | \itemize{ 50 | \item \code{hits}: Array of HGNC symbols containing the hit genes. 51 | May be slightly reduced if gene length / GC content could not be found 52 | for all genes. 53 | \item \code{list_network}: The control gene lists as a data frame of HGNC 54 | symbols 55 | } 56 | } 57 | \description{ 58 | \code{prepare_genesize_control_network} takes a gene list and finds 59 | semi-randomly selected gene lists which are matched for gene length and 60 | GC content. 61 | } 62 | \keyword{internal} 63 | -------------------------------------------------------------------------------- /R/filter_variance_quantiles.r: -------------------------------------------------------------------------------- 1 | #' Filter variance quantiles 2 | #' 3 | #' Remove rows in \code{exp} that do not vary substantially across rows. 4 | #' 5 | #' @param exp Gene expression matrix. 6 | #' @param log10_norm Log10-normalise \code{exp} before computing variance. 7 | #' @param n_quantiles Number of quantile bins to use. 8 | #' Defaults to deciles (\code{n_quantiles=10}). 9 | #' @param min_variance_quantile The minimum variance quantile 10 | #' to keep values from. 11 | #' @param verbose Print messages. 12 | #' 13 | #' @returns Filtered \code{exp}. 14 | #' @keywords internal 15 | filter_variance_quantiles <- function(exp, 16 | log10_norm = TRUE, 17 | n_quantiles = 10, 18 | min_variance_quantile = as.integer( 19 | n_quantiles / 2 20 | ), 21 | verbose = TRUE) { 22 | # templateR:::args2vars(filter_variance_quantiles) 23 | # exp <- ewceData::ctd()[[1]]$mean_exp 24 | 25 | exp_orig <- exp 26 | messager("Filtering by variance quantiles.", v = verbose) 27 | #### Log normalise to avoid skewed quantiles #### 28 | if (isTRUE(log10_norm)) { 29 | exp <- log10(exp + 1e-12) 30 | } 31 | #### Convert to DelayedArray to take advantage of rowVars func ##### 32 | exp <- to_delayed_array(exp, verbose = verbose) 33 | #### Calculate gene variance across cell types means #### 34 | gene_variance <- stats::setNames( 35 | DelayedMatrixStats::rowVars(exp), 36 | rownames(exp) 37 | ) 38 | #### Convert to quantiles #### 39 | quant <- bin_columns_into_quantiles(vec = gene_variance, 40 | numberOfBins = n_quantiles) 41 | #### Remove genes below the min_variance_quantile #### 42 | gene_variance <- gene_variance[quant>=min_variance_quantile] 43 | messager(paste( 44 | formatC(nrow(exp) - length(gene_variance), big.mark = ","), 45 | "/", 46 | formatC(nrow(exp), big.mark = ","), 47 | "genes dropped @ DGE min_variance_quantile >=", min_variance_quantile 48 | ), v = verbose) 49 | #### Return filtered original data #### 50 | return(exp_orig[names(gene_variance), ]) 51 | } 52 | -------------------------------------------------------------------------------- /R/get_exp_data_for_bootstrapped_genes.R: -------------------------------------------------------------------------------- 1 | #' get_exp_data_for_bootstrapped_genes 2 | #' 3 | #' Support function for 4 | #' \link[EWCE]{generate_bootstrap_plots_for_transcriptome}. 5 | #' 6 | #' @param full_results full_results (#fix). 7 | #' @param signif_res signif_res (#fix). 8 | #' @param hits Gene hits. 9 | #' @param combinedGenes Combined list of genes from \code{sct_data}, 10 | #' \code{hits}, and background \code{bg}. 11 | #' @inheritParams generate_bootstrap_plots_for_transcriptome 12 | #' @returns exp_mats 13 | #' 14 | #' @keywords internal 15 | get_exp_data_for_bootstrapped_genes <- function(results, 16 | signif_res, 17 | sct_data, 18 | hits, 19 | combinedGenes, 20 | annotLevel, 21 | nReps = 100, 22 | as_sparse = TRUE, 23 | verbose = TRUE) { 24 | messager("Generating exp data for bootstrap genes.",v=verbose) 25 | #### Extract specificity matrix #### 26 | spec <- sct_data[[annotLevel]]$specificity 27 | sct_genes <- rownames(spec) 28 | #### intialize empty matrices #### 29 | exp_mats <- list() 30 | for(cc in signif_res){ 31 | exp_mats[[cc]] <- matrix(0, nrow = nReps, ncol = length(hits)) 32 | rownames(exp_mats[[cc]]) <- sprintf("Rep%s", seq_len(nReps)) 33 | } 34 | #### populate matrices #### 35 | 36 | for(s in seq_len(nReps)){ 37 | bootstrap_set <- sample(combinedGenes, length(hits)) 38 | ValidGenes <- sct_genes[sct_genes %in% bootstrap_set] 39 | expD <- spec[ValidGenes, ] 40 | for (cc in signif_res) { 41 | exp_mats[[cc]][s, ] <- sort(expD[, cc]) 42 | } 43 | } 44 | #### Convert to sparse matrices #### 45 | if(as_sparse){ 46 | messager("Converting data for bootstrap tests to sparse matrices.", 47 | v=verbose) 48 | for(cc in signif_res){ 49 | exp_mats[[cc]] <- to_sparse_matrix(exp = exp_mats[[cc]], 50 | verbose = FALSE) 51 | } 52 | } 53 | return(exp_mats) 54 | } 55 | -------------------------------------------------------------------------------- /R/plot_ctd.R: -------------------------------------------------------------------------------- 1 | #' Plot \emph{CellTypeData} metrics 2 | #' 3 | #' Plot \emph{CellTypeData} metrics such as mean_exp, specificity and/or 4 | #' specificity_quantiles. 5 | #' 6 | #' @param ctd CellTypeDataset. 7 | #' @param genes Which genes in \code{ctd} to plot. 8 | #' @param level Annotation level in \code{ctd} to plot. 9 | #' @param metric Which metric in the \code{ctd} to plot: 10 | #' \itemize{ 11 | #' \item{"mean_exp"} 12 | #' \item{"specificity"} 13 | #' \item{"specificity_quantiles"} 14 | #' } 15 | #' @param show_plot Whether to print the plot or simply return it. 16 | #' 17 | #' @return ggplot object. 18 | #' 19 | #' @examples 20 | #' ctd <- ewceData::ctd() 21 | #' plt <- EWCE::plot_ctd(ctd, genes = c("Apoe", "Gfap", "Gapdh")) 22 | #' @export 23 | #' @import ggplot2 24 | #' @importFrom stringr str_to_sentence 25 | #' @importFrom reshape2 melt 26 | plot_ctd <- function(ctd, 27 | genes, 28 | level = 1, 29 | metric = "specificity", 30 | show_plot = TRUE) { 31 | #### Standardise metric name #### 32 | if (tolower(metric) %in% c( 33 | "expr", "exp", "expression", 34 | "mean_exp", "avgexp" 35 | )) { 36 | metric <- "mean_exp" 37 | } 38 | metric <- stringr::str_to_sentence(metric) 39 | ## convert to dense matrix so reshape2::melt can recognize it. 40 | mat <- as.matrix(ctd[[level]][[tolower(metric)]]) 41 | genes <- genes[genes %in% rownames(mat)] 42 | plot_data <- reshape2::melt(mat[genes, ], 43 | id.vars = "genes") 44 | colnames(plot_data) <- c("Gene", "Celltype", metric) 45 | 46 | gp <- ggplot( 47 | plot_data, 48 | aes(x = .data$Celltype, y = .data[[metric]], fill = .data[[metric]]) 49 | ) + 50 | scale_fill_gradient(low = "blue", high = "red") + 51 | geom_bar(stat = "identity") + 52 | facet_grid(rows = vars(.data$Gene)) + 53 | theme_bw() + 54 | theme( 55 | axis.text.x = element_text(angle = 45, hjust = 1), 56 | strip.background = element_rect(fill = "white"), 57 | strip.text = element_text(color = "black") 58 | ) 59 | 60 | if (metric == "Specificity") { 61 | gp <- gp + scale_y_continuous(breaks = c(0, .5, 1), limits = c(0, 1)) 62 | } 63 | if (show_plot) print(gp) 64 | return(gp) 65 | } 66 | -------------------------------------------------------------------------------- /man/fix_bad_mgi_symbols.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fix_bad_mgi_symbols.r 3 | \name{fix_bad_mgi_symbols} 4 | \alias{fix_bad_mgi_symbols} 5 | \title{fix_bad_mgi_symbols 6 | - Given an expression matrix, wherein the rows are supposed to be MGI 7 | symbols, find those symbols which are not official MGI symbols, then 8 | check in the MGI synonm database for whether they match to a proper MGI 9 | symbol. Where a symbol is found to be an aliases for a gene that is already 10 | in the dataset, the combined reads are summed together.} 11 | \usage{ 12 | fix_bad_mgi_symbols( 13 | exp, 14 | mrk_file_path = NULL, 15 | printAllBadSymbols = FALSE, 16 | as_sparse = TRUE, 17 | verbose = TRUE, 18 | localHub = FALSE 19 | ) 20 | } 21 | \arguments{ 22 | \item{exp}{An expression matrix where the rows are MGI symbols, or a 23 | SingleCellExperiment (SCE) or 24 | other Ranged Summarized Experiment (SE) type object.} 25 | 26 | \item{mrk_file_path}{Path to the MRK_List2 file which can be downloaded 27 | from www.informatics.jax.org/downloads/reports/index.html} 28 | 29 | \item{printAllBadSymbols}{Output to console all the bad gene symbols} 30 | 31 | \item{as_sparse}{Convert \code{exp} to sparse matrix.} 32 | 33 | \item{verbose}{Print messages.} 34 | 35 | \item{localHub}{If working offline, add argument localHub=TRUE to work 36 | with a local, non-updated hub; It will only have resources available that 37 | have previously been downloaded. If offline, Please also see BiocManager 38 | vignette section on offline use to ensure proper functionality.} 39 | } 40 | \value{ 41 | Returns the expression matrix with the rownames corrected and rows 42 | representing the same gene merged. If no corrections are necessary, input 43 | expression matrix is returned. If a SingleCellExperiment (SCE) or other 44 | Ranged Summarized Experiment (SE) type object was inputted this will be 45 | returned with the corrected expression matrix under counts. 46 | } 47 | \description{ 48 | Also checks whether any gene names contain "Sep", "Mar" or "Feb". 49 | These should be checked for any suggestion that excel has corrupted the 50 | gene names. 51 | } 52 | \examples{ 53 | # Load the single cell data 54 | cortex_mrna <- ewceData::cortex_mrna() 55 | # take a subset for speed 56 | cortex_mrna$exp <- cortex_mrna$exp[1:50, 1:5] 57 | cortex_mrna$exp <- fix_bad_mgi_symbols(cortex_mrna$exp) 58 | } 59 | -------------------------------------------------------------------------------- /R/check_controlled_args.R: -------------------------------------------------------------------------------- 1 | #' check_controlled_args 2 | #' 3 | #' Check the input arguments of the 4 | #' \link[EWCE]{controlled_geneset_enrichment}. 5 | #' 6 | #' @param hits Hit genes. 7 | #' @param funcGenes \code{functional_genes} that are within 8 | #' \code{combinedGenes}. 9 | #' @param combinedGenes \code{sct_data} genes that are in the background 10 | #' \code{bg}. 11 | #' @inheritParams controlled_geneset_enrichment 12 | #' @return Null output. 13 | #' 14 | #' @keywords internal 15 | check_controlled_args <- function(bg, 16 | sct_data, 17 | annotLevel, 18 | disease_genes, 19 | hits, 20 | functional_genes, 21 | funcGenes, 22 | combinedGenes) { 23 | err_msg <- paste0( 24 | "ERROR: no bg are present in the single cell", 25 | " dataset. Perhaps it is from the wrong species?" 26 | ) 27 | if (sum(bg %in% rownames(sct_data[[annotLevel]]$mean_exp), 28 | na.rm = TRUE 29 | ) == 0) { 30 | stop(err_msg) 31 | } 32 | err_msg2 <- paste0( 33 | "ERROR: no disease_genes are present in the single cell", 34 | " dataset. Perhaps it is from the wrong species?" 35 | ) 36 | if (sum(disease_genes %in% combinedGenes, 37 | na.rm = TRUE 38 | ) == 0) { 39 | stop(err_msg2) 40 | } 41 | err_msg3 <- paste0( 42 | "ERROR: insufficient disease_genes. Must provide at", 43 | " least five that are present in the background", 44 | " gene set & single cell dataset" 45 | ) 46 | if (sum(hits %in% combinedGenes, na.rm = TRUE) < 5) { 47 | stop(err_msg3) 48 | } 49 | err_msg4 <- paste0( 50 | "ERROR: no functional_genes are present in the", 51 | " single cell dataset. Perhaps it is from the", 52 | " wrong species?" 53 | ) 54 | if (sum(functional_genes %in% combinedGenes, na.rm = TRUE) == 0) { 55 | stop(err_msg4) 56 | } 57 | err_msg5 <- paste0( 58 | "ERROR: insufficient functional_genes Must provide", 59 | " at least five that are present in the background", 60 | " gene set & single cell dataset" 61 | ) 62 | if (sum(funcGenes %in% combinedGenes, na.rm = TRUE) < 5) { 63 | stop(err_msg5) 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /R/example_bootstrap_results.R: -------------------------------------------------------------------------------- 1 | #' Example bootstrap enrichment results 2 | #' 3 | #' Example cell type enrichment 4 | #' results produced by \link[EWCE]{bootstrap_enrichment_test}. 5 | #' 6 | #' @param verbose Print messages. 7 | #' @param localHub If working offline, add argument localHub=TRUE to work 8 | #' with a local, non-updated hub; It will only have resources available that 9 | #' have previously been downloaded. If offline, Please also see BiocManager 10 | #' vignette section on offline use to ensure proper functionality. 11 | #' @source 12 | #' # Load the single cell data 13 | #' 14 | #' ctd <- ewceData::ctd() 15 | #' 16 | #' # Set the parameters for the analysis 17 | #' 18 | #' # Use 3 bootstrap lists for speed, for publishable analysis use >=10,000 19 | #' 20 | #' reps <- 3 21 | #' 22 | #' # Load gene list from Alzheimer's disease GWAS 23 | #' 24 | #' example_genelist <- ewceData::example_genelist() 25 | #' 26 | #' # Bootstrap significance test, no control for transcript length or GC content 27 | #' 28 | #' full_results <- EWCE::bootstrap_enrichment_test( 29 | #' sct_data = ctd, 30 | #' hits = example_genelist, 31 | #' reps = reps, 32 | #' annotLevel = 1, 33 | #' sctSpecies = "mouse", 34 | #' genelistSpecies = "human" 35 | #' ) 36 | #' 37 | #' bootstrap_results <- full_results 38 | #' 39 | #' save(bootstrap_results,file = "inst/extdata/bootstrap_results.rda") 40 | #' @returns List with 3 items. 41 | #' @export 42 | #' @examples 43 | #' full_results <- example_bootstrap_results() 44 | example_bootstrap_results <- function(verbose = TRUE, 45 | localHub = FALSE) { 46 | fname <- system.file("extdata","bootstrap_results.rda", 47 | package = "EWCE" 48 | ) 49 | if (file.exists(fname)) { 50 | messager("Loading precomputed example bootstrap results.", v = verbose) 51 | full_results <- load_rdata(fname) 52 | } else { 53 | messager("Recomputing example bootstrap results.", v = verbose) 54 | ctd <- ewceData::ctd(localHub = localHub) 55 | hits <- ewceData::example_genelist(localHub = localHub) 56 | full_results <- bootstrap_enrichment_test( 57 | sct_data = ctd, 58 | hits = hits, 59 | reps = 100, 60 | annotLevel = 1, 61 | sctSpecies = "mouse", 62 | genelistSpecies = "human", 63 | verbose = verbose, 64 | localHub = localHub 65 | ) 66 | } 67 | return(full_results) 68 | } 69 | -------------------------------------------------------------------------------- /R/ctd_to_sce.R: -------------------------------------------------------------------------------- 1 | #' CellTypeDataset to SingleCellExperiment 2 | #' 3 | #' Copied from \href{https://github.com/neurogenomics/scKirby}{scKirby}, 4 | #' which is not yet on CRAN or Bioconductor. 5 | #' 6 | #' @param object CellTypeDataset object. 7 | #' @param as_sparse Store SingleCellExperiment matrices as sparse. 8 | #' @param as_DelayedArray Store SingleCellExperiment matrices as DelayedArray. 9 | #' @param verbose Print messages. 10 | #' 11 | #' @return SingleCellExperiment 12 | #' 13 | #' @examples 14 | #' ctd <- ewceData::ctd() 15 | #' sce <- EWCE::ctd_to_sce(ctd) 16 | #' @export 17 | ctd_to_sce <- function(object, 18 | as_sparse = TRUE, 19 | as_DelayedArray = FALSE, 20 | verbose = TRUE) { 21 | messager("+ CTD ==> SingleCellExperiment", v = verbose) 22 | ctd <- object 23 | #### Name CTD levels #### 24 | if (is.null(names(ctd))) { 25 | names(ctd) <- paste0("level_", seq(1, length(ctd))) 26 | } else { 27 | names(ctd) <- names(ctd) 28 | } 29 | sce_list <- lapply(names(ctd), function(lvl) { 30 | messager("Converting level: ", lvl, v = verbose) 31 | ctd_lvl <- ctd[[lvl]] 32 | #### Use matrices that are present ### 33 | matrix_list <- list() 34 | for (mtx_name in get_ctd_matrix_names()) { 35 | if (mtx_name %in% names(ctd_lvl)) { 36 | mtx <- ctd_lvl[[mtx_name]] 37 | mtx <- to_sparse_matrix( 38 | exp = mtx, 39 | as_sparse = as_sparse, 40 | verbose = FALSE 41 | ) 42 | mtx <- to_delayed_array( 43 | exp = mtx, 44 | as_DelayedArray = as_DelayedArray, 45 | verbose = FALSE 46 | ) 47 | matrix_list[[mtx_name]] <- mtx 48 | } 49 | } 50 | sce <- SingleCellExperiment::SingleCellExperiment( 51 | assays = matrix_list, 52 | colData = data.frame(colnames(matrix_list[[1]])) |> 53 | `colnames<-`(lvl), 54 | rowData = data.frame( 55 | gene = row.names(matrix_list[[1]]), 56 | row.names = row.names(matrix_list[[1]]) 57 | ) 58 | ) 59 | # sce <- check_sce_rownames(sce, verbose = verbose) 60 | }) |> `names<-`(names(ctd)) 61 | ## "SCE_list" class messes up other functions that expect class "list" 62 | # class(sce_list) <- "SCE_list" 63 | return(sce_list) 64 | } 65 | -------------------------------------------------------------------------------- /R/example_transcriptome_results.R: -------------------------------------------------------------------------------- 1 | #' Example bootstrap celltype enrichment test for transcriptome data 2 | #' 3 | #' Example celltype enrichment 4 | #' results produced by \link[EWCE]{ewce_expression_data}. 5 | #' 6 | #' @param verbose Print messages. 7 | #' @param localHub If working offline, add argument localHub=TRUE to work 8 | #' with a local, non-updated hub; It will only have resources available that 9 | #' have previously been downloaded. If offline, Please also see BiocManager 10 | #' vignette section on offline use to ensure proper functionality. 11 | #' 12 | #' @source 13 | #' ## Load the single cell data 14 | #' 15 | #' ctd <- ewceData::ctd() 16 | #' 17 | #' ## Set the parameters for the analysis 18 | #' 19 | #' ## Use 3 bootstrap lists for speed, for publishable analysis use >10,000 20 | #' 21 | #' reps <- 3 22 | #' 23 | #' annotLevel <- 1 # <- Use cell level annotations (i.e. Interneurons) 24 | #' 25 | #' ## Use 5 up/down regulated genes (thresh) for speed, default is 250 26 | #' 27 | #' thresh <- 5 28 | #' 29 | #' ## Load the top table 30 | #' 31 | #' tt_alzh <- ewceData::tt_alzh() 32 | #' 33 | #' tt_results <- EWCE::ewce_expression_data( 34 | #' sct_data = ctd, 35 | #' tt = tt_alzh, 36 | #' annotLevel = 1, 37 | #' thresh = thresh, 38 | #' reps = reps, 39 | #' ttSpecies = "human", 40 | #' sctSpecies = "mouse" 41 | #' ) 42 | #' 43 | #' save(tt_results, file = "inst/extdata/tt_results.rda") 44 | #' @returns List with 5 items. 45 | #' @export 46 | #' @examples 47 | #' tt_results <- EWCE::example_transcriptome_results() 48 | example_transcriptome_results <- function(verbose = TRUE, localHub=FALSE) { 49 | fname <- system.file("extdata/tt_results.rda", 50 | package = "EWCE" 51 | ) 52 | if (file.exists(fname)) { 53 | messager("Loading precomputed example transcriptome results.", 54 | v = verbose) 55 | tt_results <- load_rdata(fname) 56 | } else { 57 | messager("Recomputing example transcriptome results.", 58 | v = verbose) 59 | ctd <- ewceData::ctd(localHub = localHub) 60 | reps <- 3 61 | annotLevel <- 1 # <- Use cell level annotations (i.e. Interneurons) 62 | thresh <- 5 63 | ## Load the top table 64 | tt_alzh <- ewceData::tt_alzh(localHub = localHub) 65 | tt_results <- ewce_expression_data( 66 | sct_data = ctd, 67 | tt = tt_alzh, 68 | annotLevel = 1, 69 | thresh = thresh, 70 | reps = reps, 71 | ttSpecies = "human", 72 | sctSpecies = "mouse", 73 | localHub = localHub 74 | ) 75 | } 76 | return(tt_results) 77 | } 78 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: EWCE 2 | Type: Package 3 | Title: Expression Weighted Celltype Enrichment 4 | Version: 1.19.0 5 | Authors@R: 6 | c(person(given = "Alan", 7 | family = "Murphy", 8 | role = c("aut"), 9 | email = "alanmurph94@hotmail.com", 10 | comment = c(ORCID = "0000-0002-2487-8753")), 11 | person(given = "Brian", 12 | family = "Schilder", 13 | role = c("aut"), 14 | email = "brian_schilder@alumni.brown.edu", 15 | comment = c(ORCID = "0000-0001-5949-2191")), 16 | person(given="Hiranyamaya", 17 | family="Dash", 18 | email = "hdash.work@gmail.com", 19 | role = c("cre"), 20 | comment = c(ORCID = "0009-0005-5514-505X")), 21 | person(given = "Nathan", 22 | family = "Skene", 23 | role = c("aut"), 24 | email = "nathan.skene@gmail.com", 25 | comment = c(ORCID = "0000-0002-6807-3180"))) 26 | Description: Used to determine which cell types are enriched within gene lists. The package 27 | provides tools for testing enrichments within simple gene lists (such as human disease 28 | associated genes) and those resulting from differential expression studies. The package does 29 | not depend upon any particular Single Cell Transcriptome dataset and user defined datasets 30 | can be loaded in and used in the analyses. 31 | URL: https://github.com/NathanSkene/EWCE 32 | BugReports: https://github.com/NathanSkene/EWCE/issues 33 | License: GPL-3 34 | Depends: R (>= 4.2), 35 | RNOmni (>= 1.0) 36 | VignetteBuilder: knitr 37 | Imports: 38 | stats, 39 | utils, 40 | methods, 41 | ewceData (>= 1.7.1), 42 | dplyr, 43 | ggplot2, 44 | reshape2, 45 | limma, 46 | stringr, 47 | HGNChelper, 48 | Matrix, 49 | parallel, 50 | SingleCellExperiment, 51 | SummarizedExperiment, 52 | DelayedArray, 53 | BiocParallel, 54 | orthogene (>= 0.99.8), 55 | data.table 56 | Suggests: 57 | rworkflows, 58 | remotes, 59 | knitr, 60 | BiocStyle, 61 | rmarkdown, 62 | testthat (>= 3.0.0), 63 | readxl, 64 | memoise, 65 | markdown, 66 | sctransform, 67 | DESeq2, 68 | MAST, 69 | DelayedMatrixStats, 70 | ggdendro, 71 | scales, 72 | patchwork 73 | biocViews: GeneExpression, Transcription, DifferentialExpression, 74 | GeneSetEnrichment, Genetics, Microarray, 75 | mRNAMicroarray, OneChannel, RNASeq, 76 | BiomedicalInformatics, Proteomics, Visualization, 77 | FunctionalGenomics, SingleCell 78 | RoxygenNote: 7.3.3 79 | Encoding: UTF-8 80 | Config/testthat/edition: 3 81 | -------------------------------------------------------------------------------- /R/check_percent_hits.R: -------------------------------------------------------------------------------- 1 | #' Get percentage of target cell type hits 2 | #' 3 | #' After you run \link[EWCE]{bootstrap_enrichment_test}, 4 | #' check what percentage of significantly enriched 5 | #' cell types match an expected cell type. 6 | #' 7 | #' @param full_results \code{bootstrap_enrichment_test} results. 8 | #' @param target_celltype Substring to search to matching 9 | #' cell types (case-insensitive). 10 | #' @param mtc_method Multiple-testing correction method. 11 | #' @param q_threshold Corrected significance threshold. 12 | #' @param verbose Print messages. 13 | #' 14 | #' @returns Report list. 15 | #' 16 | #' @export 17 | #' @examples 18 | #' ## Bootstrap significance test, 19 | #' ## no control for transcript length or GC content 20 | #' ## Use pre-computed results to speed up example 21 | #' full_results <- EWCE::example_bootstrap_results() 22 | #' 23 | #' report <- EWCE::check_percent_hits( 24 | #' full_results = full_results, 25 | #' target_celltype = "microglia" 26 | #' ) 27 | check_percent_hits <- function(full_results, 28 | target_celltype, 29 | mtc_method = "bonferroni", 30 | q_threshold = .05, 31 | verbose = TRUE) { 32 | #### Align celltype names to standardise_ctd standards #### 33 | full_results <- fix_celltype_names_full_results( 34 | full_results = full_results 35 | ) 36 | target_celltype <- fix_celltype_names(celltypes = target_celltype) 37 | #### Extract sig results #### 38 | sig_results <- get_sig_results( 39 | full_results = full_results, 40 | mtc_method = mtc_method, 41 | q_threshold = q_threshold, 42 | verbose = verbose 43 | ) 44 | # if(any(ctd_reference=="Zeisel2018")){ 45 | # z18.terms <- search_Zeisel2018_celltypes( 46 | # target_celltype=target_celltype, 47 | # verbose = F) 48 | # target_hits <- grep(paste(z18.terms,collapse = "|"), 49 | # unique(sig_results$CellType), value = TRUE) 50 | # }else { 51 | target_hits <- grep(target_celltype, sig_results$CellType, 52 | ignore.case = TRUE, value = TRUE 53 | ) 54 | z18.terms <- NULL 55 | # } 56 | percent_hits <- round(length(unique(target_hits)) / 57 | length(unique(sig_results$CellType)) * 100, 1) 58 | msg <- paste( 59 | paste0(percent_hits, "%"), 60 | "of hits are of the target cell type." 61 | ) 62 | messager(msg, v = verbose) 63 | return(list( 64 | target_hits = target_hits, 65 | percent_hits = percent_hits, 66 | target_celltype = target_celltype, 67 | z18.terms = z18.terms 68 | )) 69 | } 70 | -------------------------------------------------------------------------------- /inst/hex/hexSticker.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "hexSticker" 3 | date: "

Updated: `r format( Sys.Date(), '%b-%d-%Y')`

" 4 | output: 5 | BiocStyle::html_document 6 | vignette: > 7 | %\VignetteIndexEntry{hexSticker} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | %\VignetteEncoding{UTF-8} 10 | --- 11 | 12 | ```{r setup, echo=FALSE, include=TRUE} 13 | pkg <- read.dcf(here::here("DESCRIPTION"), fields = "Package")[1] 14 | description <- read.dcf(here::here("DESCRIPTION"), fields = "Description")[1] 15 | 16 | # If you're using R<4.1.1, need this version of rvcheck 17 | # devtools::install_version('rvcheck',version='0.1.8') 18 | library(hexSticker) 19 | library(dplyr) 20 | library(ggplot2) 21 | library(ggimage) 22 | # library(ggpattern)# remotes::install_github("coolbutuseless/ggpattern") 23 | ``` 24 | 25 | You can make awesome hex stickers for your R packages using: 26 | 27 | - [hexSticker](https://github.com/GuangchuangYu/hexSticker) 28 | - [ggimage](https://github.com/GuangchuangYu/ggimage) 29 | lets you render images as data points. 30 | - [ggpattern](https://coolbutuseless.github.io/package/ggpattern/) 31 | lets you fill objects with patterns or images. 32 | - [magick](https://cran.r-project.org/web/packages/magick/vignettes/intro.html) 33 | modify PNGs. 34 | 35 | # `r pkg` 36 | 37 | ## File path 38 | 39 | Create file path to save hex sticker to. 40 | 41 | ```{r} 42 | filename <- here::here("inst/hex/hex.png") 43 | dir.create(dirname(filename), showWarnings = FALSE, recursive = TRUE) 44 | ``` 45 | 46 | ## Subplot 47 | 48 | ```{r} 49 | # URL <- "https://github.com/NathanSkene/EWCE/releases/download/v1.1/DALL.E.2023-03-10.01.14.58.-.cell.type.specific.gene.enrichment.png" 50 | # tmp <- tempfile(fileext = ".png") 51 | # utils::download.file(URL, tmp) 52 | 53 | tmp <- here::here("inst/hex/2BIgqrmzc0GSaaAGOqSeAu.jpg") 54 | ``` 55 | 56 | 57 | 58 | ## hexSticker 59 | 60 | ```{r} 61 | # pkg <- paste("E xpression","W eighted","C elltype","E nrichment", sep = "
") 62 | 63 | s_size = .35 64 | stick <- hexSticker::sticker( 65 | subplot = tmp, 66 | #### Package name #### 67 | package = paste0(strsplit(pkg, " ")[[1]],collapse = " "), 68 | p_size=20, p_y = 1.5, p_color = ggplot2::alpha("white",.95), 69 | #### Subplot ##### 70 | s_x=1, s_y=1, s_height = s_size, s_width = s_size*2.4, 71 | #### Fill & border #### 72 | h_fill = "#060b1a", h_color =ggplot2::alpha("white",.5),h_size = 2, 73 | #### Spotlight #### 74 | spotlight = TRUE, l_alpha = .2, l_width = 5, l_x = .5, 75 | #### File output #### 76 | white_around_sticker = TRUE, 77 | filename = filename, dpi = 300) 78 | print(stick) 79 | ``` 80 | 81 | # Session Info 82 | 83 |
84 | 85 | ```{r Session Info} 86 | utils::sessionInfo() 87 | ``` 88 | 89 |
90 | -------------------------------------------------------------------------------- /man/merge_ctd.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/merge_ctd.R 3 | \name{merge_ctd} 4 | \alias{merge_ctd} 5 | \title{Merge multiple CellTypeDataset references} 6 | \usage{ 7 | merge_ctd( 8 | CTD_list, 9 | save_dir = tempdir(), 10 | standardise_CTD = FALSE, 11 | as_SCE = FALSE, 12 | gene_union = TRUE, 13 | merge_levels = seq(1, 5), 14 | save_split_SCE = FALSE, 15 | save_split_CTD = FALSE, 16 | save_merged_SCE = TRUE, 17 | force_new_quantiles = FALSE, 18 | numberOfBins = 40, 19 | as_sparse = TRUE, 20 | as_DelayedArray = FALSE, 21 | verbose = TRUE, 22 | ... 23 | ) 24 | } 25 | \arguments{ 26 | \item{CTD_list}{(Named) list of \code{CellTypeDatasets}.} 27 | 28 | \item{save_dir}{The directory to save merged files in.} 29 | 30 | \item{standardise_CTD}{Whether to run \code{standardise_ctd}.} 31 | 32 | \item{as_SCE}{If \code{TRUE} (default), 33 | returns the merged results as a named list of 34 | \link[SingleCellExperiment]{SingleCellExperiment}s. 35 | If \code{FALSE}, returns as a CTD object.} 36 | 37 | \item{gene_union}{Whether to take the gene union or intersection 38 | when merging matrices (mean_exp,specificity, etc.).} 39 | 40 | \item{merge_levels}{Which CTD levels you want to merge. 41 | Can be a single value (e.g. \code{merge_levels=5}) 42 | or a list c(e.g. \code{merge_levels=c(1:5)}). 43 | If some CTD don't have the same number of levels, 44 | the maximum level depth available in that CTD will be used instead.} 45 | 46 | \item{save_split_SCE}{Whether to save individual SCE files 47 | in the subdirectory \emph{standardized_CTD_SCE}.} 48 | 49 | \item{save_split_CTD}{Whether to save individual CTD files 50 | in the subdirectory \emph{standardized_CTD}.} 51 | 52 | \item{save_merged_SCE}{Save the final merged SCE object, or simply 53 | to return it.} 54 | 55 | \item{force_new_quantiles}{If specificity quantiles matrix already exists, 56 | create a new one.} 57 | 58 | \item{numberOfBins}{Number of bins to compute specificity quantiles with.} 59 | 60 | \item{as_sparse}{Convert matrices to sparse matrix.} 61 | 62 | \item{as_DelayedArray}{Convert matrices to \code{DelayedArray}.} 63 | 64 | \item{verbose}{Print messages.} 65 | 66 | \item{...}{Additional arguments to be passed to \code{standardise_ctd}.} 67 | } 68 | \value{ 69 | List of CellTypeDatasets or SingleCellExperiments. 70 | } 71 | \description{ 72 | Import CellTypeDataset (CTD) references from a remote repository, 73 | standardize each, and then merge into one CTD. 74 | Optionally, can return these as a merged 75 | \link[SingleCellExperiment]{SingleCellExperiment}. 76 | } 77 | \examples{ 78 | ## Let's pretend these are different CTD datasets 79 | ctd1 <- ewceData::ctd() 80 | ctd2 <- ctd1 81 | CTD_list <- list(ctd1, ctd2) 82 | CTD_merged <- EWCE::merge_ctd(CTD_list = CTD_list) 83 | } 84 | --------------------------------------------------------------------------------