├── .Rbuildignore
├── .github
    ├── README.html
    └── README.md
├── .gitignore
├── DESCRIPTION
├── NAMESPACE
├── NEWS
├── R
    ├── contrasting_functions.r
    ├── data.r
    ├── group_labelling_functions.r
    ├── loading_helper_functions.r
    └── plotting_functions.R
├── data
    ├── de_table.demo_query.rda
    ├── de_table.demo_ref.rda
    ├── demo_cell_info_table.rda
    ├── demo_counts_matrix.rda
    ├── demo_gene_info_table.rda
    ├── demo_microarray_expr.rda
    ├── demo_microarray_sample_sheet.rda
    ├── demo_query_se.rda
    └── demo_ref_se.rda
├── inst
    └── extdata
    │   ├── demo_microarray_expression.tab
    │   ├── demo_microarray_info.tab
    │   ├── larger_doco_examples.rdata
    │   ├── sim_cr_dataset
    │       ├── analysis
    │       │   └── clustering
    │       │   │   └── kmeans_4_clusters
    │       │   │       └── clusters.csv
    │       └── filtered_gene_bc_matrices
    │       │   └── GRCh38
    │       │       ├── barcodes.tsv
    │       │       ├── genes.tsv
    │       │       └── matrix.mtx
    │   ├── sim_query_cell_info.tab
    │   ├── sim_query_counts.tab
    │   ├── sim_query_gene_info.tab
    │   ├── sim_ref_cell_info.tab
    │   ├── sim_ref_counts.tab
    │   └── sim_ref_gene_info.tab
├── man
    ├── contrast_each_group_to_the_rest.Rd
    ├── contrast_each_group_to_the_rest_for_norm_ma_with_limma.Rd
    ├── contrast_the_group_to_the_rest.Rd
    ├── contrast_the_group_to_the_rest_with_limma_for_microarray.Rd
    ├── convert_se_gene_ids.Rd
    ├── de_table.demo_query.Rd
    ├── de_table.demo_ref.Rd
    ├── demo_cell_info_table.Rd
    ├── demo_counts_matrix.Rd
    ├── demo_gene_info_table.Rd
    ├── demo_microarray_expr.Rd
    ├── demo_microarray_sample_sheet.Rd
    ├── demo_query_se.Rd
    ├── demo_ref_se.Rd
    ├── find_within_match_differences.Rd
    ├── get_counts_index.Rd
    ├── get_inner_or_outer_ci.Rd
    ├── get_limma_top_table_with_ci.Rd
    ├── get_matched_stepped_mwtest_res_table.Rd
    ├── get_ranking_and_test_results.Rd
    ├── get_rankstat_table.Rd
    ├── get_reciprocal_matches.Rd
    ├── get_stepped_pvals_str.Rd
    ├── get_the_up_genes_for_all_possible_groups.Rd
    ├── get_the_up_genes_for_group.Rd
    ├── get_vs_random_pval.Rd
    ├── load_dataset_10Xdata.Rd
    ├── load_se_from_tables.Rd
    ├── make_ranking_violin_plot.Rd
    ├── make_ref_similarity_names.Rd
    ├── make_ref_similarity_names_for_group.Rd
    ├── run_pair_test_stats.Rd
    ├── subset_cells_by_group.Rd
    ├── subset_se_cells_for_group_test.Rd
    └── trim_small_groups_and_low_expression_genes.Rd
├── tests
    ├── testthat.R
    └── testthat
    │   ├── test-contrasting_functions.R
    │   └── test-loading_helper_functions.R
└── vignettes
    ├── celaref.bib
    ├── celaref_doco.Rmd
    └── images
        ├── pbmc4k_cloupe_kmeans7.png
        ├── violin_plot_example.png
        └── workflow_diagram.png


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | 


--------------------------------------------------------------------------------
/.github/README.md:
--------------------------------------------------------------------------------
 1 | # celaref
 2 | 
 3 | 
 4 | ### Function  
 5 | 
 6 | Single cell RNA sequencing (scRNAseq) has made it possible to examine the 
 7 | cellular heterogeny within a tissue or sample, and observe changes and 
 8 | characteristics in specific cell types. To do this, we need to group the cells
 9 | into clusters and figure out what they are.
10 | 
11 | The celaref (*ce*ll *la*belling by *ref*erence) package aims to streamline the cell-type identification step, by 
12 | suggesting cluster labels on the basis of similarity to an already-characterised
13 | reference dataset - wheather that's from a similar experiment performed 
14 | previously in the same lab, or from a public dataset from a similar sample. 
15 | 
16 | ### Input
17 | 
18 | To look for cluster similarities celaref needs:
19 | 
20 | * The query dataset :
21 |     - a table of read counts per cell per gene
22 |     - a list of which cells belong in which cluster
23 |    
24 | * A reference dataset:
25 |     - a table of read counts per cell per gene
26 |     - a list of which cells belong in which *annotated* cluster
27 |    
28 | ### Output
29 | 
30 | 
31 | 
32 | ![](../vignettes/images/violin_plot_example.png) 
33 | 
34 | 
35 | Query Group | Short Label                        | pval    |
36 | ------------|------------------------------------|---------|
37 | cluster_1   |cluster_1:astrocytes_ependymal      |2.98e-23 |
38 | cluster_2   |cluster_2:endothelial-mural         |8.44e-10 |
39 | cluster_3   |cluster_3:no_similarity             |NA       |
40 | cluster_4   |cluster_4:microglia                 |2.71e-19 |
41 | cluster_5   |cluster_5:pyramidal SS\|interneurons|3.49e-10 |
42 | cluster_6   |cluster_6:oligodendrocytes          |2.15e-28 |
43 | 
44 | 
45 | 
46 | 
47 | This is a comparison of brain scRNAseq data from :
48 | 
49 |  * Zeisel, A., Manchado, A. B. M., Codeluppi, S., Lonnerberg, P., La Manno, G., Jureus, A., … Linnarsson, S. (2015). *Cell types in the mouse cortex and hippocampus revealed by single-cell RNA-seq.* Science, 347(6226), 1138–42. http://doi.org/10.1126/science.aaa1934
50 |  * Darmanis, S., Sloan, S. A., Zhang, Y., Enge, M., Caneda, C., Shuer, L. M., … Quake, S. R. (2015). *A survey of human brain transcriptome diversity at the single cell level.* Proceedings of the National Academy of Sciences, 112(23), 201507125. http://doi.org/10.1073/pnas.1507125112
51 | 
52 | 
53 | ### More information?
54 | 
55 | Full details in the vignette [html](http://bioinformatics.erc.monash.edu/home/sarah.williams/projects/cell_groupings/doco/celaref_doco.html) - method description, manual and example analyses.
56 | 
57 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .Rproj.user
 2 | .Rhistory
 3 | .RData
 4 | inst/doc
 5 | github
 6 | vignettes/celaref_doco_files
 7 | vignettes/celaref_doco_cache
 8 | vignettes/celaref_doco.html
 9 | github/README.html
10 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: celaref
 2 | Title: Single-cell RNAseq cell cluster labelling by reference
 3 | Version: 1.3.0
 4 | Authors@R: person("Sarah", "Williams", email = "sarah.williams1@monash.edu", role = c("aut", "cre"))
 5 | Description: After the clustering step of a single-cell RNAseq experiment, this
 6 |    package aims to suggest labels/cell types for the clusters, on the basis of 
 7 |    similarity to a reference dataset. It requires a table of read counts per 
 8 |    cell per gene, and a list of the cells belonging to each of the clusters, 
 9 |    (for both test and reference data). 
10 | Depends: R (>= 3.5.0),
11 |    SummarizedExperiment
12 | Imports:
13 |    MAST,
14 |    ggplot2,
15 |    Matrix,
16 |    dplyr,
17 |    magrittr,
18 |    stats,
19 |    utils,
20 |    rlang,
21 |    BiocGenerics,
22 |    S4Vectors,
23 |    readr,
24 |    tibble,
25 |    DelayedArray
26 | Suggests:
27 |     limma,
28 |     parallel,
29 |     knitr,
30 |     rmarkdown,
31 |     ExperimentHub,
32 |     testthat
33 | biocViews: SingleCell
34 | VignetteBuilder: knitr
35 | License: GPL-3
36 | Encoding: UTF-8
37 | LazyData: true
38 | RoxygenNote: 6.1.1
39 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(contrast_each_group_to_the_rest)
 4 | export(contrast_each_group_to_the_rest_for_norm_ma_with_limma)
 5 | export(convert_se_gene_ids)
 6 | export(get_rankstat_table)
 7 | export(get_the_up_genes_for_all_possible_groups)
 8 | export(get_the_up_genes_for_group)
 9 | export(load_dataset_10Xdata)
10 | export(load_se_from_files)
11 | export(load_se_from_tables)
12 | export(make_ranking_violin_plot)
13 | export(make_ref_similarity_names)
14 | export(make_ref_similarity_names_using_marked)
15 | export(subset_cells_by_group)
16 | export(trim_small_groups_and_low_expression_genes)
17 | import(MAST)
18 | import(SummarizedExperiment)
19 | importFrom(dplyr,n)
20 | importFrom(magrittr,"%>%")
21 | importFrom(rlang,.data)
22 | 


--------------------------------------------------------------------------------
/NEWS:
--------------------------------------------------------------------------------
  1 | CHANGES IN VERSION 1.3.x 
  2 | -----------------------
  3 | 
  4 | UPDATES
  5 | 
  6 |     o Added factors_to_rm option. 
  7 | 
  8 | 
  9 | CHANGES IN VERSION 1.2.0 
 10 | -----------------------
 11 | 
 12 | UPDATES
 13 | 
 14 |     o Support for passing hdf5-backed SummarizedExperiment objects 
 15 |       (internal conversion to sparse)
 16 |     o Explicitly handle multiple assay() in the summarizedExperiment. Must have
 17 |       a named 'counts' assay, or,  just the one unnamed assay.
 18 |     o Same as 1.1.10
 19 |     
 20 | 
 21 | BUG FIXES
 22 | 
 23 |     o Subsampling per group should actually be used now.
 24 |     
 25 | 
 26 | CHANGES IN VERSION 1.1.8
 27 | -----------------------
 28 | 
 29 | UPDATES
 30 | 
 31 |     o Updated vignette with large data handling.
 32 |     o Doco updates
 33 |     o make_ranking_violin_plot can pass parameters (e.g. rankmetrics) with ...
 34 |     
 35 | 
 36 | CHANGES IN VERSION 1.1.4
 37 | -----------------------
 38 | 
 39 | UPDATES
 40 | 
 41 |     o Methods/options to subset large datatsets. 
 42 | 
 43 | 
 44 | CHANGES IN VERSION 1.1.3
 45 | -----------------------
 46 | 
 47 | UPDATES
 48 | 
 49 |     o None. Version bump for build only.
 50 | 
 51 | 
 52 | CHANGES IN VERSION 1.1.2
 53 | -----------------------
 54 | 
 55 | UPDATES
 56 | 
 57 |     o Testing tests
 58 | 
 59 | 
 60 | 
 61 | CHANGES IN VERSION 1.1.1
 62 | -----------------------
 63 | 
 64 | BUG FIXES
 65 | 
 66 |     o Internal SingleCellAssay coercion bugfix. Should no longer require 
 67 |       library(MAST) call to work.
 68 |     
 69 | 
 70 | UPDATES
 71 | 
 72 |     o Partying hard with unit tests woo
 73 | 
 74 | 
 75 | CHANGES IN VERSION 1.1.0
 76 | -----------------------
 77 | 
 78 | UPDATES
 79 | 
 80 |     o Option to change the gene ranking metric. Maybe useful for similar cell 
 81 |       types like PBMCs.
 82 |     o Use sparse matricies to support larger datasets in less RAM.
 83 |  
 84 | 
 85 | CHANGES IN VERSION 1.0.1
 86 | -----------------------
 87 | 
 88 | UPDATES
 89 | 
 90 |     o First bioconductor version
 91 | 
 92 | 
 93 | CHANGES IN VERSION 0.99.1
 94 | -----------------------
 95 | 
 96 | BUG FIXES
 97 | 
 98 |     o Code style. 
 99 |     o Do not attempt to multithread on windows (suggest mulithread on linux). 
100 | 
101 | 
102 | CHANGES IN VERSION 0.99.0
103 | -----------------------
104 | 
105 | NEW FEATURES
106 | 
107 |     o Initial Version. 
108 | 


--------------------------------------------------------------------------------
/R/data.r:
--------------------------------------------------------------------------------
 1 | #' Demo query de table
 2 | #' 
 3 | #' Small example dataset that is the output of
 4 | #' \link{contrast_each_group_to_the_rest}. It contains the results
 5 | #' of each group compared to the rest of the sample (ie within sample 
 6 | #' differential expression)
 7 | #'
 8 | #' @return An example de_table from 
 9 | #' \link{contrast_each_group_to_the_rest} (for demo query dataset)
10 | "de_table.demo_query"
11 | 
12 | 
13 | #' Demo ref de table
14 | #' 
15 | #' Small example dataset that is the output of
16 | #' \link{contrast_each_group_to_the_rest}. It contains the results
17 | #' of each group compared to the rest of the sample (ie within sample 
18 | #' differential expression)
19 | #'
20 | #' @return An example de_table from 
21 | #' \link{contrast_each_group_to_the_rest} (for demo ref dataset)
22 | "de_table.demo_ref" 
23 | 
24 | 
25 | #' Demo cell info table
26 | #' 
27 | #' Sample sheet table listing each cell, its assignd cluster/group, and 
28 | #' any other information that might be interesting (replicate, individual e.t.c)
29 | #'
30 | #' @return An example cell info table
31 | "demo_cell_info_table"
32 | 
33 | 
34 | 
35 | #' Demo count matrix
36 | #' 
37 | #' Counts matrix for a small, demo example datasets. Raw counts of 
38 | #' reads per gene (row) per cell (column).
39 | #' @return An example counts matrix.
40 | "demo_counts_matrix" 
41 | 
42 | 
43 | #' Demo gene info table
44 | #'
45 | #' Extra table of gene-level information for the demo example dataset.
46 | #' Can contain anything as long as theres a unique gene id.
47 | #' @return An example table of genes.
48 | "demo_gene_info_table" 
49 | 
50 | #' Demo microarray expression table
51 | #' 
52 | #' Microarray-style expression table for the demo example dataset. 
53 | #' Rows are genes, columns are samples, as per counts matrix.
54 | #' @return An example table of (fake) microarray data.
55 | "demo_microarray_expr"
56 | 
57 | 
58 | #' Demo microarray sample sheet table
59 | #' 
60 | #' Microarray sample sheet table for the demo example dataset. 
61 | #' Contains array identifiers, their group and any other information that could
62 | #' be useful.
63 | #' @return An example microarray sample sheet
64 | "demo_microarray_sample_sheet" 
65 | 
66 | 
67 | #' Demo query se (summarizedExperiment)
68 | #' 
69 | #' A summarisedExperiment object loaded from demo info tables, for a query set. 
70 | #' @return An example summarised experiment (for demo query dataset)
71 | "demo_query_se" 
72 | 
73 | #' Demo reference se (summarizedExperiment)
74 | #' 
75 | #' A summarisedExperiment object loaded from demo info tables, for a reference
76 | #' set. 
77 | #' @return An example summarised experiment (for demo reference dataset)
78 | "demo_ref_se"
79 | 
80 | 
81 | 
82 | 
83 | 


--------------------------------------------------------------------------------
/R/loading_helper_functions.r:
--------------------------------------------------------------------------------
  1 | #' load_se_from_tables
  2 | #'
  3 | #' Create a SummarizedExperiment object (dataset_se) from a count matrix, cell 
  4 | #' information and optionally gene information.
  5 | #'
  6 | #' This function makes a SummarizedExperiment object in a form that
  7 | #' should work for celaref functions. Specifically, that means it will have an
  8 | #' 'ID' feild for genes (view with \code{rowData(dataset_se)}), and both
  9 | #' 'cell_sample' and 'group' feild for cells (view with
 10 | #' \code{colData(dataset_se)}). See parameters for detail.
 11 | #' Additionally, the counts will be an integer matrix (not a
 12 | #' sparse matrix), and the \emph{group} feild (but not \emph{cell_sample}
 13 | #' or \emph{ID}) will be a factor.
 14 | #'
 15 | #' Note that data will be subsetted to cells present in both the counts matrix
 16 | #' and cell info, this is handy for loading subsets of cells.
 17 | #' However, if \bold{gene_info_file} is defined, all genes must match exactly.
 18 | #'
 19 | #' The \code{load_se_from_files} form of this function will run the same 
 20 | #' checks, but will read everything from files in one go. The 
 21 | #' \code{load_se_from_tables}
 22 | #' form is perhaps more useful when the annotations need to be modified (e.g. 
 23 | #' programmatically adding a different gene identifier, renaming groups, 
 24 | #' removing unwanted samples). 
 25 | #' 
 26 | #' Note that the SummarizedExperiment object can also be created without using
 27 | #' these functions, it just needs the \emph{cell_sample}, \emph{ID} and
 28 | #' \emph{group} feilds as described above. Since sometimes it might be easier
 29 | #' to add these to an existing \emph{SummarizedExperiment} from upstream
 30 | #' analyses.
 31 | #'
 32 | #'
 33 | #' @param counts_matrix A tab-separated matrix of read counts for each gene
 34 | #' (row) and each cell (column). Columns and rows should be named.
 35 | #' 
 36 | #' @param cell_info_table Table of cell information. 
 37 | #' If there is a column labelled
 38 | #' \emph{cell_sample}, that will be used as the unique cell identifiers. 
 39 | #' If not, the first column is assumed to be cell identifiers, and will be 
 40 | #' copied to a new feild labelled \emph{cell_sample}.
 41 | #' Similarly - the clusters of these cells should be listed in one column -
 42 | #' which can be called 'group' (case-sensitive) or specified with
 43 | #' \bold{group_col_name}. \emph{Minimal data format: <cell_sample> <group>}
 44 | #' 
 45 | #' @param gene_info_table Optional table of gene information. If there is a
 46 | #' column labelled
 47 | #' \emph{ID}, that will be used as the gene identifiers (they must be unique!).
 48 | #' If not, the first column is assumed to be a gene identifier, and will be 
 49 | #' copied to a
 50 | #' new feild labelled \emph{ID}. Must match all rownames in 
 51 | #' \bold{counts_matrix}.
 52 | #' If omitted, ID wll be generated from the rownames of counts_matrix. 
 53 | #' Default=NA
 54 | #' 
 55 | #' @param group_col_name Name of the column in \bold{cell_info_table} 
 56 | #' containing
 57 | #' the cluster/group that each cell belongs to. Case-sensitive. Default='group'
 58 | #' 
 59 | #' @param cell_col_name Name of the column in \bold{cell_info_table} containing
 60 | #' a cell id. Ignored if \emph{cell_sample} column is already present. 
 61 | #' If omitted, (and no \emph{cell_sample} column) will use first column.
 62 | #' Case-sensitive. Default=NA
 63 | #'
 64 | #' @return A SummarisedExperiment object containing the count data, cell info
 65 | #' and gene info.
 66 | #'
 67 | #' @examples
 68 | #'
 69 | #' # From data frames (or a matrix for counts) :
 70 | #' demo_se <- load_se_from_tables(counts_matrix=demo_counts_matrix, 
 71 | #'                                cell_info_table=demo_cell_info_table)
 72 | #' demo_se <- load_se_from_tables(counts_matrix=demo_counts_matrix, 
 73 | #'                                cell_info_table=demo_cell_info_table, 
 74 | #'                                gene_info_table=demo_gene_info_table)
 75 | #'
 76 | #' # Or from data files : 
 77 | #' counts_filepath    <- system.file("extdata", "sim_query_counts.tab",    package = "celaref")
 78 | #' cell_info_filepath <- system.file("extdata", "sim_query_cell_info.tab", package = "celaref")
 79 | #' gene_info_filepath <- system.file("extdata", "sim_query_gene_info.tab", package = "celaref")
 80 | #'
 81 | #' demo_se <- load_se_from_files(counts_file=counts_filepath, cell_info_file=cell_info_filepath)
 82 | #' demo_se <- load_se_from_files(counts_file=counts_filepath, cell_info_file=cell_info_filepath, 
 83 | #'                               gene_info_file=gene_info_filepath )
 84 | #'
 85 | #' @seealso \href{https://bioconductor.org/packages/release/bioc/html/SummarizedExperiment.html}{SummarizedExperiment} For general doco on the SummarizedExperiment objects.
 86 | #'
 87 | #' @family Data-loading functions
 88 | #'
 89 | #' @import SummarizedExperiment
 90 | #' 
 91 | #' @export   
 92 | load_se_from_tables <- function(
 93 |    counts_matrix, cell_info_table, gene_info_table = NA, 
 94 |    group_col_name="group", cell_col_name=NA 
 95 | ) {
 96 | 
 97 |    cell_info_table <- data.frame(cell_info_table, stringsAsFactors = FALSE)
 98 |    
 99 |    # If there's no cell_sample, and no cell_col_name, make the first 
100 |    # one 'cell_sample', else use cell_col_name.
101 |    if (! "cell_sample" %in% colnames(cell_info_table)) {
102 |       if (is.na(cell_col_name)) {
103 |          cell_info_table <- cbind.data.frame(cell_sample=cell_info_table[,1], 
104 |                                              cell_info_table, 
105 |                                              stringsAsFactors = FALSE )
106 |       }
107 |       else {
108 |          stopifnot(cell_col_name %in% colnames(cell_info_table))
109 |          cell_info_table <- cbind.data.frame(
110 |             cell_sample=cell_info_table[,cell_col_name], 
111 |             cell_info_table, 
112 |             stringsAsFactors = FALSE )
113 |       }
114 |    }
115 |    
116 |    # Check for 'group' anything from group_col_name will be copied into group
117 |    if (! group_col_name %in% colnames(cell_info_table)) {
118 |       stop( "Couldn't find group/cluster column ", group_col_name ,
119 |             " in cell_info_table ",cell_info_table )
120 |    }
121 |    if (group_col_name != "group") {
122 |       cell_info_table$group <- cell_info_table[,group_col_name]
123 |    }
124 |    
125 |    
126 |    # Only keep common cells, match order
127 |    cells <- intersect(cell_info_table$cell_sample, colnames(counts_matrix))
128 |    if (length(cells) <= 1) { 
129 |       stop("Couldn't find cells in common between counts matrix ",
130 |            "(col names) and cell_info_file (cell_sample column, ",
131 |            "first col or specified as cell_col_name)") 
132 |    }
133 |    if (   length(cells) != nrow(cell_info_table) 
134 |        || length(cells) != ncol(counts_matrix)   ) {
135 |       message("Not all cells were listed in both ",
136 |          "counts matrix and cell_info_file. ",
137 |          "Is this expected? Keeping the ", length(cells), " in common")
138 |    }
139 |    cell_info_table<-cell_info_table[match(cells, cell_info_table$cell_sample),]
140 |    counts_matrix  <-counts_matrix[,cells]
141 |    
142 |    
143 |    # NB factorising group after removal of unmatched cells
144 |    cell_info_table$group <- factor(cell_info_table$group)
145 |    
146 |    
147 | 
148 |    
149 |    # Make summarised experiment.
150 |    # With or without gene info file.
151 |    dataset_se <- NA
152 |    if (all(is.na(gene_info_table))) {
153 |       dataset_se  <- SummarizedExperiment(
154 |                            assays  = S4Vectors::SimpleList(counts=counts_matrix),
155 |                            colData=base::as.data.frame(cell_info_table))
156 |       rowData(dataset_se)$ID <- rownames(assay(dataset_se,'counts'))
157 |    }
158 |    else {
159 |       gene_info_table <- data.frame(gene_info_table, stringsAsFactors = FALSE)
160 |       # If there's no ID col, make the first 'ID'
161 |       if (! "ID" %in% colnames(gene_info_table)) {
162 |          gene_info_table <- cbind.data.frame(
163 |             "ID"=as.character(gene_info_table[,1]), 
164 |             gene_info_table, 
165 |             stringsAsFactors=FALSE)
166 |       }
167 |       
168 |       # Cells might not, but genes should be matching.
169 |       genes     <- intersect(gene_info_table$ID, rownames(counts_matrix))
170 |       num_genes <- length(genes)
171 |       if (    num_genes != nrow(gene_info_table) 
172 |            || num_genes != nrow(counts_matrix)   ) { 
173 |          stop( "Gene IDs did not match between ID feild of ",
174 |                "gene_info_file (or first column), and row names of ",
175 |                "counts matrix")
176 |       }
177 |       
178 |       # Create a summarised experiment object.
179 |       dataset_se  <- SummarizedExperiment(
180 |                         assays = S4Vectors::SimpleList(counts=counts_matrix),
181 |                         colData=S4Vectors::DataFrame(cell_info_table),
182 |                         rowData=S4Vectors::DataFrame(gene_info_table))
183 |    }
184 |    
185 |    return(dataset_se)
186 | }
187 | 
188 | 
189 | 
190 | 
191 | #' load_se_from_files
192 | #'
193 | #' \code{load_se_from_files} is a wrapper for \code{load_se_from_tables} that
194 | #' will read in tables from specified files. 
195 | #' 
196 | #' @param counts_file A tab-separated file of a matrix of read counts. As per 
197 | #' \bold{counts_matrix}. First column should be gene ID, and top row cell ids.
198 | #'
199 | #' @param cell_info_file Tab-separated text file of cell information, as per
200 | #' \bold{cell_info_table}. Columns must have names. 
201 | #'
202 | #' @param gene_info_file Optional tab-separated text file of gene information, 
203 | #' as per \bold{gene_info_file}. Columns must have names. Default=NA
204 | #' 
205 | #' @family Data loading functions
206 | #'
207 | #' @describeIn load_se_from_tables To read from files
208 | #' 
209 | #' @import SummarizedExperiment
210 | #' 
211 | #' @export   
212 | load_se_from_files <- function(
213 |    counts_file, cell_info_file, gene_info_file = NA, group_col_name="group", 
214 |    cell_col_name=NA 
215 | ) {
216 |    
217 |    counts_matrix   <- as.matrix(utils::read.table(
218 |       counts_file, row.names=1, header=TRUE, sep = "\t", 
219 |       stringsAsFactors = FALSE, check.names=FALSE ))
220 |    
221 |    cell_info_table <- utils::read.table(cell_info_file, header=TRUE, 
222 |                                         sep = "\t", stringsAsFactors = FALSE )
223 |    
224 |    # Read gene Info table, if specified
225 |    gene_info_table <- NA
226 |    if (! is.na(gene_info_file)) {
227 |       gene_info_table <- utils::read.table(gene_info_file, 
228 |                                            header=TRUE, 
229 |                                            sep = "\t", 
230 |                                            stringsAsFactors = FALSE )
231 |    }
232 |    
233 |    return(load_se_from_tables(counts_matrix, cell_info_table, gene_info_table, 
234 |                               group_col_name, cell_col_name = cell_col_name) )
235 | }
236 | 
237 | 
238 | 
239 | 
240 | 
241 | 
242 | 
243 |    
244 | 
245 | #' load_dataset_10Xdata
246 | #'
247 | #' Convenience function to create a SummarizedExperiment object (dataset_se) 
248 | #' from a the output of 10X cell ranger pipeline run. 
249 | #' 
250 | #' 
251 | #' This function makes a SummarizedExperiment object in a form that
252 | #' should work for celaref functions. Specifically, that means it will have an
253 | #' 'ID' feild for genes (view with \code{rowData(dataset_se)}), and both
254 | #' 'cell_sample' and 'group' feild for cells (view with
255 | #' \code{colData(dataset_se)}). See parameters for detail.
256 | #' Additionally, the counts will be an integer matrix (not a
257 | #' sparse matrix), and the \emph{group} feild (but not \emph{cell_sample}
258 | #' or \emph{ID}) will be a factor.
259 | #' 
260 | #' The clustering information can be read from whichever cluster is specified,
261 | #' usually there will be several choices. 
262 | #' 
263 | #' This funciton is designed to work with output of version 2.0.1 of the 
264 | #' cellRanger pipeline, may not work with others (will not work for 1.x).
265 | #'     
266 | #' @param dataset_path Path to the directory of 10X data, as generated by the 
267 | #' cellRanger pipeline (versions 2.1.0 and 2.0.1). The directory should have 
268 | #' subdirecotires \emph{analysis}, \emph{filtered_gene_bc_matrices} and
269 | #' \emph{raw_gene_bc_matrices} (only the first 2 are read).
270 | #' @param dataset_genome The genome that the reads were aligned against, 
271 | #' e.g. GRCh38.  Check for this as a directory name under the 
272 | #' \emph{filtered_gene_bc_matrices} subdirectory if unsure.
273 | #' @param clustering_set The 10X cellRanger pipeline produces several 
274 | #' different  cluster definitions per dataset. Specify which one to use e.g. 
275 | #' kmeans_10_clusters Find them as directory names under 
276 | #' \emph{analysis/clustering/}
277 | #' @param gene_id_cols_10X Vector of the names of the columns in the gene 
278 | #' description file (\emph{filtered_gene_bc_matrices/GRCh38/genes.csv}). The 
279 | #' first element of this will become the ID. 
280 | #' Default = c("ensembl_ID","GeneSymbol")
281 | #' @param id_to_use Column from \bold{gene_id_cols_10X} that defines the gene 
282 | #' identifier to use as 'ID' in the returned SummarisedExperiment object.
283 | #' Many-to-one relationships betwen the assumed unique first element of 
284 | #' \bold{gene_id_cols_10X} and \bold{id_to_use} will be handled gracefully by 
285 | #' \code{\link{convert_se_gene_ids}}. 
286 | #' Defaults to first element of \bold{gene_id_cols_10X}
287 | #' 
288 | #' @return A SummarisedExperiment object containing the count data, cell info
289 | #' and gene info.
290 | #'
291 | #' @examples
292 | #' example_10X_dir <- system.file("extdata", "sim_cr_dataset", package = "celaref")
293 | #' dataset_se <- load_dataset_10Xdata(example_10X_dir, dataset_genome="GRCh38", 
294 | #'     clustering_set="kmeans_4_clusters", gene_id_cols_10X=c("gene")) 
295 | #' 
296 | #' \dontrun{
297 | #' dataset_se <- load_dataset_10Xdata('~/path/to/data/10X_pbmc4k', 
298 | #'     dataset_genome="GRCh38", 
299 | #'     clustering_set="kmeans_7_clusters") 
300 | #' } 
301 | #'
302 | #' @seealso \href{https://bioconductor.org/packages/release/bioc/html/SummarizedExperiment.html}{SummarizedExperiment} 
303 | #' For general doco on the SummarizedExperiment objects.
304 | #' @seealso \code{\link{convert_se_gene_ids}} describes method for 
305 | #' converting IDs.
306 | #' 
307 | #' @family Data loading functions
308 | #'
309 | #' @import SummarizedExperiment
310 | #' 
311 | #' @export
312 | load_dataset_10Xdata <- function(
313 |    dataset_path, dataset_genome, clustering_set, 
314 |    gene_id_cols_10X =c("ensembl_ID","GeneSymbol"), 
315 |    id_to_use = gene_id_cols_10X[1] 
316 | ) {
317 | 
318 |    matrix_file <- file.path(dataset_path,"filtered_gene_bc_matrices",
319 |                             dataset_genome,"matrix.mtx")
320 |    cells_file  <- file.path(dataset_path,"filtered_gene_bc_matrices",
321 |                             dataset_genome,"barcodes.tsv")
322 |    genes_file  <- file.path(dataset_path,"filtered_gene_bc_matrices",
323 |                             dataset_genome,"genes.tsv")
324 |    
325 |    #.../10X_pbmc5pExpr/analysis/clustering/kmeans_5_clusters/clusters.csv
326 |    clustering_file <- file.path(dataset_path,"analysis","clustering",
327 |                                 clustering_set,"clusters.csv")
328 |    clustering_table <- readr::read_csv(clustering_file, col_types=readr::cols())
329 |    colnames(clustering_table) <- c('cell_sample', 'group')
330 |    clustering_table$group <- factor(clustering_table$group)
331 |    
332 |    # gene info
333 |    # Start with the first id (assumed uniq!), but change to specified after.
334 |    genes_table    <- readr::read_tsv(genes_file, 
335 |                                      col_names = gene_id_cols_10X, 
336 |                                      col_types = readr::cols())
337 |    genes_table$ID <- dplyr::pull(genes_table, gene_id_cols_10X[1])
338 |    
339 |    
340 |    # Not there's no lables here, but <genes> rows and <cells> columns
341 |    filtered_matrix <- as.matrix(Matrix::readMM(matrix_file)) # from Matrix
342 |    storage.mode(filtered_matrix ) <- "integer"
343 |    order_of_cells <- scan(cells_file, what=character())
344 |    colnames(filtered_matrix) <- order_of_cells 
345 |    rownames(filtered_matrix) <- genes_table$ID    
346 |    
347 |    # Create a summarised experiment objct.
348 |    dataset_se  <- SummarizedExperiment(
349 |                         assays = S4Vectors::SimpleList(counts=filtered_matrix),
350 |                         colData=clustering_table,
351 |                         rowData=genes_table)
352 |    
353 |    # Optionally change id (handles m:1)
354 |    rowData(dataset_se)$total_count <- 
355 |       Matrix::rowSums(assay(dataset_se,'counts'))
356 |    if (id_to_use != gene_id_cols_10X[1] ) { 
357 |       dataset_se <- convert_se_gene_ids(dataset_se, 
358 |                                         new_id=id_to_use, 
359 |                                         eval_col='total_count')
360 |    }
361 |    
362 |    return(dataset_se)
363 | } 
364 | 
365 | 
366 | 
367 | 
368 | 
369 | 
370 | 
371 | 
372 | 
373 | 
374 | 
375 | 
376 | #' convert_se_gene_ids
377 | #'
378 | #' Change the gene IDs in in the supplied datatset_se object to some other id 
379 | #' already present in the gene info (as seen with \code{rowData()})
380 | #'
381 | #' @param dataset_se Summarised experiment object containing count data. Also
382 | #' requires 'ID' and 'group' to be set within the cell information
383 | #' (see \code{colData()})
384 | #' @param new_id  A column within the feature information (view 
385 | #' \code{colData(dataset_se)})) of the \bold{dataset_se}, which will become
386 | #' the new ID column. Non-uniqueness of this column is handled gracefully! 
387 | #' Any \emph{NAs} will be dropped.
388 | #' @param eval_col Which column to use to break ties of duplicate 
389 | #' \bold{new_id}. Must be a column within the feature information (view 
390 | #' \code{colData(dataset_se)})) of the \bold{dataset_se}. Total reads per gene
391 | #' feature is a good choice.
392 | #' @param find_max If false, this will choose the minimal \bold{eval_col} 
393 | #' instead of max. Default = TRUE
394 | #'
395 | #' @return A modified dataset_se - ID will now be \bold{new_id}, and unique. 
396 | #' It will have fewer genes if old ID to new ID was not a 1:1 mapping. 
397 | #' The selected genes will be according to the eval col max (or min). 
398 | #' \emph{should} pick the alphabetical first on ties, but could change. 
399 | #'
400 | #' @examples
401 | #' 
402 | #' # The demo dataset doesn't have other names, so make some up 
403 | #' # (don't do this)
404 | #' dataset_se <- demo_ref_se
405 | #' rowData(dataset_se)$dummyname <- toupper(rowData(dataset_se)$ID)
406 | #'
407 | #' # If not already present, define a column to evaluate, 
408 | #' # typically total reads/gene.
409 | #' rowData(dataset_se)$total_count <- rowSums(assay(dataset_se))
410 | #' 
411 | #' dataset_se <- convert_se_gene_ids(dataset_se, new_id='dummyname', eval_col='total_count') 
412 | #'
413 | #' @seealso \href{https://bioconductor.org/packages/release/bioc/html/SummarizedExperiment.html}{SummarizedExperiment} 
414 | #' For general doco on the SummarizedExperiment objects.
415 | #' @seealso \code{\link{load_se_from_files}} For reading data from flat 
416 | #' files (not 10X cellRanger output)
417 | #'
418 | #' @import SummarizedExperiment
419 | #' @importFrom magrittr %>%
420 | #' @importFrom rlang .data
421 | #' @export
422 | convert_se_gene_ids <- function(dataset_se, new_id, eval_col, find_max=TRUE) {
423 |    
424 |    old_id = "ID" 
425 |    if (! all(c(old_id, new_id, eval_col) %in% colnames(rowData(dataset_se)))) {
426 |       stop("Can't find all of ", c(old_id, new_id, eval_col), 
427 |            " in rowData(dataset_se) colnames")
428 |    } 
429 |    
430 |    row_data_df <- BiocGenerics::as.data.frame(rowData(dataset_se))[,c(old_id,new_id, eval_col)]
431 |    colnames(row_data_df) <- c("old_lab", "new_lab", "eval_lab")
432 |    row_data_df <- row_data_df[!is.na(row_data_df$new_lab),] 
433 |    if (find_max) {
434 |       row_data_df <- row_data_df %>% 
435 |          dplyr::arrange(.data$new_lab, 
436 |                         dplyr::desc(.data$eval_lab), 
437 |                         .data$old_lab)
438 |    }else { #min
439 |       row_data_df <- row_data_df %>% 
440 |          dplyr::arrange(.data$new_lab, 
441 |                         .data$eval_lab, 
442 |                         .data$old_lab)
443 |    }
444 |    row_data_unique <- row_data_df %>%
445 |       dplyr::group_by(.data$new_lab) %>% 
446 |       dplyr::slice(1)
447 |    
448 |    # Subset to just those representative old ids, and give the unique new id 
449 |    dataset_se<- dataset_se[  row_data_unique$old_lab , ]
450 |    rowData(dataset_se)$ID <- rowData(dataset_se)[[new_id]] # overwrite the ID
451 |    rownames(dataset_se)   <- rowData(dataset_se)[["ID"]]
452 |    
453 |    return(dataset_se)
454 | } 
455 | 
456 | 
457 | 
458 | 
459 | 
460 | 
461 | 
462 | #' trim_small_groups_and_low_expression_genes
463 | #'
464 | #' Filter and return a SummarizedExperiment object (dataset_se) by several
465 | #' metrics:
466 | #' \itemize{
467 | #'   \item Cells with at least \bold{min_lib_size} total reads.
468 | #'   \item Genes expressed in at least \bold{min_detected_by_min_samples} 
469 | #'   cells, at a threshold of \bold{min_reads_in_sample} per cell.
470 | #'   \item Remove entire groups (clusters) of cells where there are fewer than
471 | #'   \bold{min_group_membership} cells in that group.
472 | #' }
473 | #'
474 | #' If it hasn't been done already, it is highly reccomended to use this 
475 | #' function to filter out genes with no/low total counts 
476 | #' (especially in single cell data,
477 | #' there can be many) - without expression they are not useful and may reduce
478 | #' statistical power.
479 | #'
480 | #' Likewise, very small groups (<5 cells) are unlikely to give useful
481 | #' results with this method. And cells with abnormally small library sizes may
482 | #' not be desireable.
483 | #'
484 | #'
485 | #' Of course 'reasonable' thresholds for filtering cells/genes are subjective.
486 | #' Defaults are moderately sensible starting points.
487 | #'
488 | #' @param dataset_se Summarised experiment object containing count data. Also
489 | #' requires 'ID' and 'group' to be set within the cell information
490 | #' (see \code{colData()})
491 | #' @param min_lib_size Minimum library size. Cells with fewer than this many 
492 | #' reads removed. Default = 1000
493 | #' @param min_reads_in_sample Require this many reads to consider a gene 
494 | #' detected in a sample. Default = 1
495 | #' @param min_detected_by_min_samples Keep genes detected in this many 
496 | #' samples.  May change with experiment size. Default = 5
497 | #' @param min_group_membership Throw out groups/clusters with fewer than this 
498 | #' many cells. May change with experiment size. Default = 5
499 | #'
500 | #' @return A filtered dataset_se, ready for use.
501 | #'
502 | #' @examples
503 | #' 
504 | #' demo_query_se.trimmed  <- 
505 | #'    trim_small_groups_and_low_expression_genes(demo_query_se)
506 | #' demo_query_se.trimmed2 <- 
507 | #'    trim_small_groups_and_low_expression_genes(demo_ref_se, 
508 | #'                                               min_group_membership = 10)
509 | #'
510 | #' @import SummarizedExperiment
511 | #' 
512 | #' @export
513 | trim_small_groups_and_low_expression_genes <- function(
514 |    dataset_se, min_lib_size=1000, min_group_membership=5,
515 |    min_reads_in_sample=1, min_detected_by_min_samples=5 
516 | ) {
517 |    
518 |    counts_index <- get_counts_index(n_assays=length(assays(dataset_se)), 
519 |                                     assay_names = names(assays(dataset_se)))
520 |    
521 |    ## Filter by min lib size, num samples detected in
522 |    # Use different rowSums for hdf5-backed SCE.
523 |    if (is(assay(dataset_se, counts_index) , "DelayedMatrix")) {
524 |       samples_per_gene <- DelayedArray::rowSums(assay(dataset_se, counts_index) >= min_reads_in_sample)
525 |       dataset_se <- dataset_se[,DelayedArray::colSums(assay(dataset_se, counts_index))>=min_lib_size ]
526 |       
527 |    } else {
528 |       samples_per_gene <- Matrix::rowSums(assay(dataset_se, counts_index) >= min_reads_in_sample)
529 |       dataset_se <- dataset_se[,Matrix::colSums(assay(dataset_se, counts_index))>=min_lib_size ]
530 |    }
531 |    dataset_se <- dataset_se[ samples_per_gene >=  min_detected_by_min_samples, ]
532 |    
533 | 
534 |    ## Less than a certain number of cells in a group, 
535 |    # discard the group, and its cells.
536 |    # NB: also removes 'NA' group entries.
537 |    cell_group_sizes <- table(dataset_se$group)
538 |    groups_to_keep   <- names(cell_group_sizes)[cell_group_sizes >= min_group_membership]
539 |    dataset_se       <- dataset_se[,dataset_se$group %in% groups_to_keep]
540 |    dataset_se$group <- droplevels(dataset_se$group)
541 |    
542 |    return(dataset_se)
543 | }
544 | 
545 | 
546 | 
547 | 
548 | 


--------------------------------------------------------------------------------
/R/plotting_functions.R:
--------------------------------------------------------------------------------
  1 | 
  2 | #' make_ranking_violin_plot
  3 | #'
  4 | #' Plot a panel of violin plots showing the distribution of the 'top' genes of 
  5 | #' each of query group, across the reference dataset.
  6 | #'
  7 | #' In the plot output, each panel correponsds to a different group/cluster in 
  8 | #' the query experiment. The x-axis has the groups in the reference dataset. 
  9 | #' The y-axis is the rescaled rank of each 'top' gene from the query group, 
 10 | #' within each reference group.
 11 | #' 
 12 | #' Only the 'top' genes for each query group are plotted, forming the violin
 13 | #' plots - each individual gene is shown as a tickmark. Some groups have few 
 14 | #' top genes, and so their uncertanty can be seen on this plot. 
 15 | #' 
 16 | #' The thick black lines reprenset the median gene rescaled ranking for each 
 17 | #' query group / reference group combination. Having this fall above the dotted 
 18 | #' median threshold marker is a quick indication of potential similarity. 
 19 | #' A complete lack of similarity would have a median rank around 0.5. Median 
 20 | #' rankings much less than 0.5 are common though (an 'anti-cell-groupA' 
 21 | #' signature), because genes overrepresented in one group in an experiment, 
 22 | #' are likely to be relatively 'underrepresented' in the other groups. 
 23 | #' Taken to an  
 24 | #' extreme, if there are only two reference groups, they'll be complete 
 25 | #' opposites.
 26 | #' 
 27 | #' Input can be either the precomputed \emph{de_table.marked} object for the 
 28 | #' comparison, OR both \emph{de_table.test} and \emph{de_table.ref} 
 29 | #' differential expression results to compare from 
 30 | #' \code{\link{contrast_each_group_to_the_rest}} 
 31 | #' 
 32 | #' @param de_table.marked The output of 
 33 | #'    \code{\link{get_the_up_genes_for_all_possible_groups}} 
 34 | #'    for the contrast of interest.
 35 | #' @param de_table.test A differential expression table of the 
 36 | #'    query experiment,
 37 | #'    as generated from \code{\link{contrast_each_group_to_the_rest}}
 38 | #' @param de_table.ref A differential expression table of the 
 39 | #'    reference dataset,
 40 | #'    as generated from \code{\link{contrast_each_group_to_the_rest}}
 41 | #' @param log10trans  Plot on a log scale? Useful for distinishing multiple 
 42 | #'    similar, yet distinct cell type that bunch at top of plot. Default=FALSE.
 43 | #' 
 44 | #' @param ... Further options to be passed to 
 45 | #'    \code{\link{get_the_up_genes_for_all_possible_groups}}, 
 46 | #'    e.g. rankmetric
 47 | #' 
 48 | #' @return  A ggplot object.
 49 | #'
 50 | #' @examples
 51 | #'
 52 | #' # Make input
 53 | #' # de_table.demo_query <- contrast_each_group_to_the_rest(demo_query_se, "demo_query")
 54 | #' # de_table.demo_ref   <- contrast_each_group_to_the_rest(demo_ref_se,   "demo_ref")
 55 | #'    
 56 | #' # This:                                                  
 57 | #' make_ranking_violin_plot(de_table.test=de_table.demo_query, 
 58 | #'                          de_table.ref=de_table.demo_ref ) 
 59 | #'                         
 60 | #' # Is equivalent to this:
 61 | #' de_table.marked.query_vs_ref <- 
 62 | #'      get_the_up_genes_for_all_possible_groups( de_table.test=de_table.demo_query, 
 63 | #'                                                de_table.ref=de_table.demo_ref)
 64 | #' make_ranking_violin_plot(de_table.marked.query_vs_ref)
 65 | #'
 66 | #'
 67 | #' @seealso \code{\link{get_the_up_genes_for_all_possible_groups}} To make 
 68 | #' the input data.
 69 | #'
 70 | #' 
 71 | #' @export
 72 | make_ranking_violin_plot <- function(
 73 |    de_table.marked=NA, de_table.test=NA, de_table.ref=NA, log10trans=FALSE , 
 74 |    ... ) {
 75 |    
 76 |    defined_de_table.marked <- any(! is.na(de_table.marked))
 77 |    defined_de_table.test   <- any(! is.na(de_table.test))
 78 |    defined_de_table.ref    <- any(! is.na(de_table.ref) ) 
 79 |    
 80 |    if ( !defined_de_table.marked 
 81 |         & defined_de_table.test 
 82 |         & defined_de_table.ref ) {
 83 |       de_table.marked <- get_the_up_genes_for_all_possible_groups(de_table.test,
 84 |                                                                   de_table.ref,
 85 |                                                                   ... )
 86 |       
 87 |    } else if (!( defined_de_table.marked 
 88 |                  & !defined_de_table.test  
 89 |                  & !defined_de_table.ref )) {
 90 |       stop("Specify either 'de_table.marked' or both de_table.test ",
 91 |            "AND de_table.ref (naming parameters)")
 92 |    } #Else, de_table.marked provided, continue
 93 |    
 94 |    
 95 |    if (log10trans) { 
 96 |       #happily, it'll never be 0
 97 |       de_table.marked$rescaled_rank <- log10(de_table.marked$rescaled_rank) 
 98 |    }
 99 |    
100 |    p <- ggplot2::ggplot(de_table.marked, 
101 |                         ggplot2::aes_string(y='rescaled_rank', 
102 |                                             x='group', 
103 |                                             fill='group')) +
104 |       ggplot2::geom_violin(ggplot2::aes_string(colour='group')) +
105 |       ggplot2::geom_point(alpha=0.5, size=3, pch='-', show.legend = FALSE) +
106 |       ggplot2::scale_y_reverse() +
107 |       ggplot2::ylab("Test geneset rank in reference cluster") + 
108 |       ggplot2::xlab("") +  
109 |       ggplot2::stat_summary(fun.y = stats::median, 
110 |                             fun.ymin = stats::median, 
111 |                             fun.ymax = stats::median, 
112 |                             geom = "crossbar", 
113 |                             col="black", 
114 |                             show.legend = FALSE) +
115 |       ggplot2::theme_bw() +    
116 |       ggplot2::theme(panel.grid.major = ggplot2::element_blank(), 
117 |                      panel.grid.minor = ggplot2::element_blank()) + 
118 |       ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, 
119 |                                                          hjust = 1, 
120 |                                                          vjust=0.5)) +
121 |       ggplot2::facet_wrap(~test_group)
122 | 
123 |    return (p)
124 | }
125 | 
126 | 
127 | 
128 | 
129 | 


--------------------------------------------------------------------------------
/data/de_table.demo_query.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MonashBioinformaticsPlatform/celaref/585f2fb96f8d382803cebea3c6fc7adefa8d2054/data/de_table.demo_query.rda


--------------------------------------------------------------------------------
/data/de_table.demo_ref.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MonashBioinformaticsPlatform/celaref/585f2fb96f8d382803cebea3c6fc7adefa8d2054/data/de_table.demo_ref.rda


--------------------------------------------------------------------------------
/data/demo_cell_info_table.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MonashBioinformaticsPlatform/celaref/585f2fb96f8d382803cebea3c6fc7adefa8d2054/data/demo_cell_info_table.rda


--------------------------------------------------------------------------------
/data/demo_counts_matrix.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MonashBioinformaticsPlatform/celaref/585f2fb96f8d382803cebea3c6fc7adefa8d2054/data/demo_counts_matrix.rda


--------------------------------------------------------------------------------
/data/demo_gene_info_table.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MonashBioinformaticsPlatform/celaref/585f2fb96f8d382803cebea3c6fc7adefa8d2054/data/demo_gene_info_table.rda


--------------------------------------------------------------------------------
/data/demo_microarray_expr.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MonashBioinformaticsPlatform/celaref/585f2fb96f8d382803cebea3c6fc7adefa8d2054/data/demo_microarray_expr.rda


--------------------------------------------------------------------------------
/data/demo_microarray_sample_sheet.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MonashBioinformaticsPlatform/celaref/585f2fb96f8d382803cebea3c6fc7adefa8d2054/data/demo_microarray_sample_sheet.rda


--------------------------------------------------------------------------------
/data/demo_query_se.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MonashBioinformaticsPlatform/celaref/585f2fb96f8d382803cebea3c6fc7adefa8d2054/data/demo_query_se.rda


--------------------------------------------------------------------------------
/data/demo_ref_se.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MonashBioinformaticsPlatform/celaref/585f2fb96f8d382803cebea3c6fc7adefa8d2054/data/demo_ref_se.rda


--------------------------------------------------------------------------------
/inst/extdata/demo_microarray_info.tab:
--------------------------------------------------------------------------------
 1 | cell_sample	group
 2 | array1	Dunno
 3 | array10	Dunno
 4 | array12	Dunno
 5 | array24	Dunno
 6 | array26	Dunno
 7 | array14	Exciting
 8 | array17	Exciting
 9 | array19	Exciting
10 | array38	Exciting
11 | array41	Exciting
12 | array4	Mystery celltype
13 | array8	Mystery celltype
14 | array13	Mystery celltype
15 | array15	Mystery celltype
16 | array18	Mystery celltype
17 | array9	Weird subtype
18 | array42	Weird subtype
19 | array82	Weird subtype
20 | array95	Weird subtype
21 | array128	Weird subtype
22 | 


--------------------------------------------------------------------------------
/inst/extdata/larger_doco_examples.rdata:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MonashBioinformaticsPlatform/celaref/585f2fb96f8d382803cebea3c6fc7adefa8d2054/inst/extdata/larger_doco_examples.rdata


--------------------------------------------------------------------------------
/inst/extdata/sim_cr_dataset/analysis/clustering/kmeans_4_clusters/clusters.csv:
--------------------------------------------------------------------------------
  1 | Barcode,Cluster
  2 | Cell2,c4
  3 | Cell3,c2
  4 | Cell5,c1
  5 | Cell6,c3
  6 | Cell7,c4
  7 | Cell11,c2
  8 | Cell16,c4
  9 | Cell20,c2
 10 | Cell22,c2
 11 | Cell23,c3
 12 | Cell27,c2
 13 | Cell30,c2
 14 | Cell31,c3
 15 | Cell32,c2
 16 | Cell33,c3
 17 | Cell34,c3
 18 | Cell35,c3
 19 | Cell36,c4
 20 | Cell39,c4
 21 | Cell40,c2
 22 | Cell43,c2
 23 | Cell48,c3
 24 | Cell49,c2
 25 | Cell50,c3
 26 | Cell56,c3
 27 | Cell61,c4
 28 | Cell62,c3
 29 | Cell63,c4
 30 | Cell65,c2
 31 | Cell66,c4
 32 | Cell69,c3
 33 | Cell71,c2
 34 | Cell72,c2
 35 | Cell73,c1
 36 | Cell74,c2
 37 | Cell76,c2
 38 | Cell80,c3
 39 | Cell83,c4
 40 | Cell84,c3
 41 | Cell87,c2
 42 | Cell89,c1
 43 | Cell91,c2
 44 | Cell96,c4
 45 | Cell97,c2
 46 | Cell100,c4
 47 | Cell101,c4
 48 | Cell102,c3
 49 | Cell103,c1
 50 | Cell104,c3
 51 | Cell106,c2
 52 | Cell107,c4
 53 | Cell111,c3
 54 | Cell112,c4
 55 | Cell114,c2
 56 | Cell116,c4
 57 | Cell119,c3
 58 | Cell121,c4
 59 | Cell122,c4
 60 | Cell123,c3
 61 | Cell124,c4
 62 | Cell125,c4
 63 | Cell126,c4
 64 | Cell127,c4
 65 | Cell129,c1
 66 | Cell130,c3
 67 | Cell131,c3
 68 | Cell132,c2
 69 | Cell133,c4
 70 | Cell136,c2
 71 | Cell137,c4
 72 | Cell138,c1
 73 | Cell139,c4
 74 | Cell143,c4
 75 | Cell147,c4
 76 | Cell148,c3
 77 | Cell149,c2
 78 | Cell150,c3
 79 | Cell152,c4
 80 | Cell154,c2
 81 | Cell155,c3
 82 | Cell156,c4
 83 | Cell158,c3
 84 | Cell159,c2
 85 | Cell161,c3
 86 | Cell163,c2
 87 | Cell164,c3
 88 | Cell167,c2
 89 | Cell170,c4
 90 | Cell173,c4
 91 | Cell176,c3
 92 | Cell177,c4
 93 | Cell180,c4
 94 | Cell184,c4
 95 | Cell187,c2
 96 | Cell188,c3
 97 | Cell191,c4
 98 | Cell193,c3
 99 | Cell194,c4
100 | Cell197,c2
101 | Cell199,c4
102 | 


--------------------------------------------------------------------------------
/inst/extdata/sim_cr_dataset/filtered_gene_bc_matrices/GRCh38/barcodes.tsv:
--------------------------------------------------------------------------------
  1 | Cell2
  2 | Cell3
  3 | Cell5
  4 | Cell6
  5 | Cell7
  6 | Cell11
  7 | Cell16
  8 | Cell20
  9 | Cell22
 10 | Cell23
 11 | Cell27
 12 | Cell30
 13 | Cell31
 14 | Cell32
 15 | Cell33
 16 | Cell34
 17 | Cell35
 18 | Cell36
 19 | Cell39
 20 | Cell40
 21 | Cell43
 22 | Cell48
 23 | Cell49
 24 | Cell50
 25 | Cell56
 26 | Cell61
 27 | Cell62
 28 | Cell63
 29 | Cell65
 30 | Cell66
 31 | Cell69
 32 | Cell71
 33 | Cell72
 34 | Cell73
 35 | Cell74
 36 | Cell76
 37 | Cell80
 38 | Cell83
 39 | Cell84
 40 | Cell87
 41 | Cell89
 42 | Cell91
 43 | Cell96
 44 | Cell97
 45 | Cell100
 46 | Cell101
 47 | Cell102
 48 | Cell103
 49 | Cell104
 50 | Cell106
 51 | Cell107
 52 | Cell111
 53 | Cell112
 54 | Cell114
 55 | Cell116
 56 | Cell119
 57 | Cell121
 58 | Cell122
 59 | Cell123
 60 | Cell124
 61 | Cell125
 62 | Cell126
 63 | Cell127
 64 | Cell129
 65 | Cell130
 66 | Cell131
 67 | Cell132
 68 | Cell133
 69 | Cell136
 70 | Cell137
 71 | Cell138
 72 | Cell139
 73 | Cell143
 74 | Cell147
 75 | Cell148
 76 | Cell149
 77 | Cell150
 78 | Cell152
 79 | Cell154
 80 | Cell155
 81 | Cell156
 82 | Cell158
 83 | Cell159
 84 | Cell161
 85 | Cell163
 86 | Cell164
 87 | Cell167
 88 | Cell170
 89 | Cell173
 90 | Cell176
 91 | Cell177
 92 | Cell180
 93 | Cell184
 94 | Cell187
 95 | Cell188
 96 | Cell191
 97 | Cell193
 98 | Cell194
 99 | Cell197
100 | Cell199
101 | 


--------------------------------------------------------------------------------
/inst/extdata/sim_cr_dataset/filtered_gene_bc_matrices/GRCh38/genes.tsv:
--------------------------------------------------------------------------------
  1 | Gene1
  2 | Gene2
  3 | Gene3
  4 | Gene4
  5 | Gene5
  6 | Gene6
  7 | Gene7
  8 | Gene8
  9 | Gene9
 10 | Gene10
 11 | Gene11
 12 | Gene12
 13 | Gene13
 14 | Gene14
 15 | Gene15
 16 | Gene16
 17 | Gene17
 18 | Gene18
 19 | Gene19
 20 | Gene20
 21 | Gene21
 22 | Gene22
 23 | Gene23
 24 | Gene24
 25 | Gene25
 26 | Gene26
 27 | Gene27
 28 | Gene28
 29 | Gene29
 30 | Gene30
 31 | Gene31
 32 | Gene32
 33 | Gene33
 34 | Gene34
 35 | Gene35
 36 | Gene36
 37 | Gene37
 38 | Gene38
 39 | Gene39
 40 | Gene40
 41 | Gene41
 42 | Gene42
 43 | Gene43
 44 | Gene44
 45 | Gene45
 46 | Gene46
 47 | Gene47
 48 | Gene48
 49 | Gene49
 50 | Gene50
 51 | Gene51
 52 | Gene52
 53 | Gene53
 54 | Gene54
 55 | Gene55
 56 | Gene56
 57 | Gene57
 58 | Gene58
 59 | Gene59
 60 | Gene60
 61 | Gene61
 62 | Gene62
 63 | Gene63
 64 | Gene64
 65 | Gene65
 66 | Gene66
 67 | Gene67
 68 | Gene68
 69 | Gene69
 70 | Gene70
 71 | Gene71
 72 | Gene72
 73 | Gene73
 74 | Gene74
 75 | Gene75
 76 | Gene76
 77 | Gene77
 78 | Gene78
 79 | Gene79
 80 | Gene80
 81 | Gene81
 82 | Gene82
 83 | Gene83
 84 | Gene84
 85 | Gene85
 86 | Gene86
 87 | Gene87
 88 | Gene88
 89 | Gene89
 90 | Gene90
 91 | Gene91
 92 | Gene92
 93 | Gene93
 94 | Gene94
 95 | Gene95
 96 | Gene96
 97 | Gene97
 98 | Gene98
 99 | Gene99
100 | Gene100
101 | Gene101
102 | Gene102
103 | Gene103
104 | Gene104
105 | Gene105
106 | Gene106
107 | Gene107
108 | Gene108
109 | Gene109
110 | Gene110
111 | Gene111
112 | Gene112
113 | Gene113
114 | Gene114
115 | Gene115
116 | Gene116
117 | Gene117
118 | Gene118
119 | Gene119
120 | Gene120
121 | Gene121
122 | Gene122
123 | Gene123
124 | Gene124
125 | Gene125
126 | Gene126
127 | Gene127
128 | Gene128
129 | Gene129
130 | Gene130
131 | Gene131
132 | Gene132
133 | Gene133
134 | Gene134
135 | Gene135
136 | Gene136
137 | Gene137
138 | Gene138
139 | Gene139
140 | Gene140
141 | Gene141
142 | Gene142
143 | Gene143
144 | Gene144
145 | Gene145
146 | Gene146
147 | Gene147
148 | Gene148
149 | Gene149
150 | Gene150
151 | Gene151
152 | Gene152
153 | Gene153
154 | Gene154
155 | Gene155
156 | Gene156
157 | Gene157
158 | Gene158
159 | Gene159
160 | Gene160
161 | Gene161
162 | Gene162
163 | Gene163
164 | Gene164
165 | Gene165
166 | Gene166
167 | Gene167
168 | Gene168
169 | Gene169
170 | Gene170
171 | Gene171
172 | Gene172
173 | Gene173
174 | Gene174
175 | Gene175
176 | Gene176
177 | Gene177
178 | Gene178
179 | Gene179
180 | Gene180
181 | Gene181
182 | Gene182
183 | Gene183
184 | Gene184
185 | Gene185
186 | Gene186
187 | Gene187
188 | Gene188
189 | Gene189
190 | Gene190
191 | Gene191
192 | Gene192
193 | Gene193
194 | Gene194
195 | Gene195
196 | Gene196
197 | Gene197
198 | Gene198
199 | Gene199
200 | Gene200
201 | 


--------------------------------------------------------------------------------
/inst/extdata/sim_query_cell_info.tab:
--------------------------------------------------------------------------------
  1 | Cell	Batch	group	ExpLibSize
  2 | Cell2	Batch1	Group4	55045.211935979336
  3 | Cell3	Batch1	Group2	46942.962765139586
  4 | Cell5	Batch1	Group1	72692.13311775407
  5 | Cell6	Batch1	Group3	50267.15714478282
  6 | Cell7	Batch1	Group4	71431.82077986447
  7 | Cell11	Batch1	Group2	57801.51179249036
  8 | Cell16	Batch1	Group4	60782.525139232435
  9 | Cell20	Batch1	Group2	32291.376395933472
 10 | Cell22	Batch1	Group2	64176.6589613515
 11 | Cell23	Batch1	Group3	59747.499047330995
 12 | Cell27	Batch1	Group2	60470.09152848558
 13 | Cell30	Batch1	Group2	69802.51829486103
 14 | Cell31	Batch1	Group3	55998.58265270814
 15 | Cell32	Batch1	Group2	64108.98003048688
 16 | Cell33	Batch1	Group3	35474.646469347615
 17 | Cell34	Batch1	Group3	60575.56962875652
 18 | Cell35	Batch1	Group3	51377.9749089028
 19 | Cell36	Batch1	Group4	62404.34849785896
 20 | Cell39	Batch1	Group4	54364.24118877251
 21 | Cell40	Batch1	Group2	60255.227829446245
 22 | Cell43	Batch1	Group2	44600.34473848163
 23 | Cell48	Batch1	Group3	66989.11596906635
 24 | Cell49	Batch1	Group2	49643.92670026899
 25 | Cell50	Batch1	Group3	76592.1406419619
 26 | Cell56	Batch1	Group3	98457.3787529401
 27 | Cell61	Batch1	Group4	73917.23161412794
 28 | Cell62	Batch1	Group3	55466.48636108551
 29 | Cell63	Batch1	Group4	46823.27775549405
 30 | Cell65	Batch1	Group2	51513.85532245833
 31 | Cell66	Batch1	Group4	54505.22979637784
 32 | Cell69	Batch1	Group3	74491.16758845026
 33 | Cell71	Batch1	Group2	51169.56853776723
 34 | Cell72	Batch1	Group2	84853.52779481266
 35 | Cell73	Batch1	Group1	47008.062171698446
 36 | Cell74	Batch1	Group2	57964.54317140159
 37 | Cell76	Batch1	Group2	63575.42053761479
 38 | Cell80	Batch1	Group3	57186.93516037964
 39 | Cell83	Batch1	Group4	44021.57951072861
 40 | Cell84	Batch1	Group3	60670.07180588897
 41 | Cell87	Batch1	Group2	66248.06747901038
 42 | Cell89	Batch1	Group1	79171.18267921747
 43 | Cell91	Batch1	Group2	85936.28365559668
 44 | Cell96	Batch1	Group4	52966.215119169225
 45 | Cell97	Batch1	Group2	63829.30664853533
 46 | Cell100	Batch1	Group4	59095.981328852904
 47 | Cell101	Batch1	Group4	59795.92531779838
 48 | Cell102	Batch1	Group3	90898.4676206882
 49 | Cell103	Batch1	Group1	55420.939959992604
 50 | Cell104	Batch1	Group3	69590.99062192626
 51 | Cell106	Batch1	Group2	55855.15819670576
 52 | Cell107	Batch1	Group4	75738.78220679189
 53 | Cell111	Batch1	Group3	56032.77076099252
 54 | Cell112	Batch1	Group4	60325.32160538211
 55 | Cell114	Batch1	Group2	57698.416843254374
 56 | Cell116	Batch1	Group4	48027.30256714334
 57 | Cell119	Batch1	Group3	59859.33104285597
 58 | Cell121	Batch1	Group4	55903.31429105042
 59 | Cell122	Batch1	Group4	45783.67785044417
 60 | Cell123	Batch1	Group3	48668.03566329688
 61 | Cell124	Batch1	Group4	53512.53003792753
 62 | Cell125	Batch1	Group4	53652.21569609339
 63 | Cell126	Batch1	Group4	57462.08564837251
 64 | Cell127	Batch1	Group4	47253.62682017155
 65 | Cell129	Batch1	Group1	53390.57536562024
 66 | Cell130	Batch1	Group3	69888.33576605862
 67 | Cell131	Batch1	Group3	50933.28208504171
 68 | Cell132	Batch1	Group2	57873.64943427228
 69 | Cell133	Batch1	Group4	59831.67218172945
 70 | Cell136	Batch1	Group2	77171.4079794374
 71 | Cell137	Batch1	Group4	95548.54981559514
 72 | Cell138	Batch1	Group1	56602.141116393555
 73 | Cell139	Batch1	Group4	56601.3159577096
 74 | Cell143	Batch1	Group4	59909.04617733183
 75 | Cell147	Batch1	Group4	78264.66497132627
 76 | Cell148	Batch1	Group3	75516.81549977609
 77 | Cell149	Batch1	Group2	65085.63854658878
 78 | Cell150	Batch1	Group3	50787.36548481734
 79 | Cell152	Batch1	Group4	63987.997020185954
 80 | Cell154	Batch1	Group2	60181.518371716695
 81 | Cell155	Batch1	Group3	55504.249641624956
 82 | Cell156	Batch1	Group4	94403.59117901337
 83 | Cell158	Batch1	Group3	69381.95947033467
 84 | Cell159	Batch1	Group2	56042.544621374
 85 | Cell161	Batch1	Group3	52104.30868933979
 86 | Cell163	Batch1	Group2	43188.91094666741
 87 | Cell164	Batch1	Group3	75463.09414633608
 88 | Cell167	Batch1	Group2	59186.99978757687
 89 | Cell170	Batch1	Group4	71418.01533778882
 90 | Cell173	Batch1	Group4	53250.77141610082
 91 | Cell176	Batch1	Group3	58644.36674368383
 92 | Cell177	Batch1	Group4	48901.83438692313
 93 | Cell180	Batch1	Group4	85635.94617540664
 94 | Cell184	Batch1	Group4	45614.84304096037
 95 | Cell187	Batch1	Group2	53423.054389264944
 96 | Cell188	Batch1	Group3	64087.436031051926
 97 | Cell191	Batch1	Group4	63001.0418395676
 98 | Cell193	Batch1	Group3	59702.21760197293
 99 | Cell194	Batch1	Group4	69362.5647529825
100 | Cell197	Batch1	Group2	63211.8496261272
101 | Cell199	Batch1	Group4	49670.03482735444
102 | Cell201	Batch1	Group4	61760.324055229874
103 | Cell203	Batch1	Group4	55610.85456721159
104 | Cell204	Batch1	Group4	54099.61390827727
105 | Cell207	Batch1	Group4	74498.04712152222
106 | Cell208	Batch1	Group2	45217.554856646595
107 | Cell210	Batch1	Group2	79239.6468347486
108 | Cell212	Batch1	Group3	53610.78804963595
109 | Cell214	Batch1	Group4	63823.4218287253
110 | Cell215	Batch1	Group3	69466.79167030232
111 | Cell216	Batch1	Group4	90569.66327381074
112 | Cell217	Batch1	Group3	77167.35346164237
113 | Cell219	Batch1	Group3	53251.50839422345
114 | Cell220	Batch1	Group2	58481.37700236844
115 | Cell221	Batch1	Group4	55439.75632970351
116 | Cell223	Batch1	Group3	84559.29206558104
117 | Cell225	Batch1	Group4	58217.26279283668
118 | Cell226	Batch1	Group4	67051.11240341056
119 | Cell228	Batch1	Group1	76564.73024682916
120 | Cell232	Batch1	Group4	38691.538758825904
121 | Cell233	Batch1	Group4	53280.2808677595
122 | Cell234	Batch1	Group2	48173.52500200363
123 | Cell236	Batch1	Group4	72524.4729245371
124 | Cell237	Batch1	Group4	64174.92680381625
125 | Cell238	Batch1	Group4	54816.70556623328
126 | Cell239	Batch1	Group2	55990.728826870036
127 | Cell246	Batch1	Group2	54321.82176169253
128 | Cell247	Batch1	Group2	53360.17465025401
129 | Cell251	Batch1	Group3	46695.29400759923
130 | Cell255	Batch1	Group4	55656.354014515775
131 | Cell256	Batch1	Group3	71952.2459361064
132 | Cell257	Batch1	Group4	81893.50918336774
133 | Cell259	Batch1	Group3	78186.42435296281
134 | Cell260	Batch1	Group4	59869.213068357465
135 | Cell261	Batch1	Group2	74398.95104287741
136 | Cell262	Batch1	Group2	62349.11870707226
137 | Cell264	Batch1	Group2	57504.926294828096
138 | Cell265	Batch1	Group4	60832.27057959668
139 | Cell266	Batch1	Group3	45972.20651528205
140 | Cell269	Batch1	Group2	48448.50013740261
141 | Cell271	Batch1	Group1	56007.57369606149
142 | Cell273	Batch1	Group4	61799.83974912702
143 | Cell274	Batch1	Group3	64634.64664309145
144 | Cell275	Batch1	Group2	51232.623421348115
145 | Cell276	Batch1	Group2	78361.07311185921
146 | Cell277	Batch1	Group4	80830.34291836408
147 | Cell280	Batch1	Group4	67289.1328441728
148 | Cell281	Batch1	Group3	69457.79339855688
149 | Cell283	Batch1	Group4	49448.93383046807
150 | Cell286	Batch1	Group1	51534.88742390086
151 | Cell289	Batch1	Group4	47101.43206249451
152 | Cell291	Batch1	Group4	61647.86726580255
153 | Cell293	Batch1	Group3	58056.78573732392
154 | Cell294	Batch1	Group3	55102.676990906766
155 | Cell295	Batch1	Group4	54475.22518722605
156 | Cell296	Batch1	Group4	44364.39562863301
157 | Cell301	Batch1	Group3	46311.135368258416
158 | Cell302	Batch1	Group4	90744.13913059059
159 | Cell303	Batch1	Group4	55630.879403561725
160 | Cell304	Batch1	Group4	74750.16256368313
161 | Cell306	Batch1	Group2	77460.2380963579
162 | Cell308	Batch1	Group3	75377.2923583555
163 | Cell312	Batch1	Group4	39728.35599603908
164 | Cell317	Batch1	Group2	54947.605965535186
165 | Cell318	Batch1	Group3	60146.32204119423
166 | Cell319	Batch1	Group4	83756.79312788113
167 | Cell322	Batch1	Group4	72666.94045254748
168 | Cell327	Batch1	Group4	58871.142675962226
169 | Cell328	Batch1	Group4	53486.849877579974
170 | Cell332	Batch1	Group3	61433.48853632637
171 | Cell333	Batch1	Group1	58100.61612447167
172 | Cell335	Batch1	Group3	66839.12495747086
173 | Cell337	Batch1	Group4	65401.01185146557
174 | Cell340	Batch1	Group4	39425.533670533776
175 | Cell342	Batch1	Group4	68096.42132102429
176 | Cell343	Batch1	Group4	45095.93081737535
177 | Cell344	Batch1	Group4	66612.03042149784
178 | Cell345	Batch1	Group4	66665.70355166619
179 | Cell346	Batch1	Group4	67671.66083623093
180 | Cell348	Batch1	Group4	84599.48862710161
181 | Cell349	Batch1	Group4	61530.03742538303
182 | Cell351	Batch1	Group4	38535.057433454815
183 | Cell353	Batch1	Group2	75237.76359998228
184 | Cell354	Batch1	Group4	67650.68102592663
185 | Cell356	Batch1	Group2	45820.03577410458
186 | Cell357	Batch1	Group2	79261.91611565062
187 | Cell358	Batch1	Group3	64836.975633354654
188 | Cell360	Batch1	Group3	51805.79877815144
189 | Cell363	Batch1	Group4	50772.38991840925
190 | Cell365	Batch1	Group2	43331.86653196263
191 | Cell366	Batch1	Group2	64636.06226633208
192 | Cell367	Batch1	Group4	80303.48134486075
193 | Cell369	Batch1	Group4	76462.0969793205
194 | Cell371	Batch1	Group2	58218.55534893042
195 | Cell372	Batch1	Group4	69895.79492766099
196 | Cell377	Batch1	Group2	75972.72254440194
197 | Cell378	Batch1	Group4	67390.23969831594
198 | Cell380	Batch1	Group4	106641.73501079918
199 | Cell381	Batch1	Group3	53767.87223004134
200 | Cell383	Batch1	Group3	61774.49750877812
201 | Cell385	Batch1	Group4	60777.03251179295
202 | Cell386	Batch1	Group4	64811.5244158795
203 | Cell392	Batch1	Group4	108499.42301691597
204 | Cell393	Batch1	Group4	75905.72036316429
205 | Cell396	Batch1	Group3	77607.99259447245
206 | Cell404	Batch1	Group2	67568.70056143394
207 | Cell406	Batch1	Group3	49161.92808659108
208 | Cell407	Batch1	Group2	49078.77320243258
209 | Cell408	Batch1	Group4	69089.20159516943
210 | Cell412	Batch1	Group4	95465.68748465856
211 | Cell413	Batch1	Group2	73702.00184719308
212 | Cell414	Batch1	Group2	71394.02567102032
213 | Cell416	Batch1	Group4	81893.11190268018
214 | Cell419	Batch1	Group2	110513.15104214237
215 | Cell420	Batch1	Group4	70522.19271647074
216 | Cell423	Batch1	Group2	58937.64447575753
217 | Cell427	Batch1	Group4	52361.39307076591
218 | Cell429	Batch1	Group2	55180.82222002167
219 | Cell430	Batch1	Group4	70021.82788295683
220 | Cell434	Batch1	Group1	50686.938732602095
221 | Cell435	Batch1	Group2	47065.35656147533
222 | Cell437	Batch1	Group2	56480.13497027942
223 | Cell441	Batch1	Group2	76451.06969737713
224 | Cell444	Batch1	Group4	59937.6313876502
225 | Cell446	Batch1	Group4	62068.95576994645
226 | Cell447	Batch1	Group4	83231.64776864702
227 | Cell448	Batch1	Group4	56051.78479698383
228 | Cell449	Batch1	Group2	52595.8645832927
229 | Cell450	Batch1	Group3	54523.07506508324
230 | Cell452	Batch1	Group1	88912.54012495978
231 | Cell453	Batch1	Group4	47975.37568170933
232 | Cell454	Batch1	Group3	67365.12391083316
233 | Cell458	Batch1	Group4	70581.01015564842
234 | Cell459	Batch1	Group4	69205.01571501343
235 | Cell463	Batch1	Group2	64115.49510965184
236 | Cell464	Batch1	Group4	45090.425997240454
237 | Cell467	Batch1	Group3	51791.76436126253
238 | Cell468	Batch1	Group3	65987.1684275486
239 | Cell469	Batch1	Group3	63497.285946210024
240 | Cell472	Batch1	Group4	47497.64494193834
241 | Cell475	Batch1	Group4	58466.27279262591
242 | Cell479	Batch1	Group3	55160.73373239489
243 | Cell480	Batch1	Group4	65683.64657740944
244 | Cell482	Batch1	Group2	51124.56103667991
245 | Cell486	Batch1	Group3	54496.917493266104
246 | Cell489	Batch1	Group3	73877.84763763017
247 | Cell491	Batch1	Group4	57635.529324289324
248 | Cell495	Batch1	Group4	61390.957814298774
249 | Cell497	Batch1	Group2	52490.66031446426
250 | Cell500	Batch1	Group4	64452.66943504912
251 | Cell502	Batch1	Group4	59427.28297858763
252 | Cell503	Batch1	Group2	71988.13416816102
253 | Cell505	Batch1	Group3	70213.06423993556
254 | Cell509	Batch1	Group2	77470.57359426001
255 | Cell511	Batch1	Group4	82077.149146208
256 | Cell512	Batch1	Group3	56735.65896134725
257 | Cell513	Batch1	Group2	56929.9412186641
258 | Cell515	Batch1	Group3	60528.19275150474
259 | Cell516	Batch1	Group2	44130.96417960521
260 | Cell519	Batch1	Group1	65004.64479156216
261 | Cell522	Batch1	Group3	73051.72183489446
262 | Cell525	Batch1	Group3	83381.01151682546
263 | Cell526	Batch1	Group4	78239.89183872836
264 | Cell530	Batch1	Group3	92857.29314682864
265 | Cell531	Batch1	Group4	64524.440590123755
266 | Cell533	Batch1	Group2	52616.81595754013
267 | Cell534	Batch1	Group4	66837.57564223155
268 | Cell535	Batch1	Group3	68305.10656084202
269 | Cell538	Batch1	Group3	52953.32014207346
270 | Cell539	Batch1	Group3	56548.681933774926
271 | Cell540	Batch1	Group4	45324.30694147125
272 | Cell541	Batch1	Group2	57636.35312419474
273 | Cell544	Batch1	Group2	68469.48418404363
274 | Cell546	Batch1	Group1	50850.45859297336
275 | Cell548	Batch1	Group4	81738.98575687404
276 | Cell551	Batch1	Group4	73565.03679525343
277 | Cell553	Batch1	Group4	58857.092554393195
278 | Cell555	Batch1	Group4	72065.33637405386
279 | Cell556	Batch1	Group3	73333.44656516777
280 | Cell557	Batch1	Group2	78669.73070688167
281 | Cell559	Batch1	Group3	80824.8793849364
282 | Cell560	Batch1	Group3	48006.1976665016
283 | Cell564	Batch1	Group3	59489.81729474708
284 | Cell565	Batch1	Group3	47819.619988074584
285 | Cell567	Batch1	Group4	63290.14850777825
286 | Cell568	Batch1	Group3	62052.18322117015
287 | Cell569	Batch1	Group3	88119.50214326676
288 | Cell572	Batch1	Group3	77014.44501942628
289 | Cell573	Batch1	Group4	83428.89361342315
290 | Cell574	Batch1	Group3	45953.19043948141
291 | Cell576	Batch1	Group2	61950.01382429024
292 | Cell577	Batch1	Group4	57322.9614609041
293 | Cell578	Batch1	Group4	48886.57995672078
294 | Cell581	Batch1	Group2	49748.23779418759
295 | Cell587	Batch1	Group3	62210.62459097196
296 | Cell589	Batch1	Group4	72614.55310010164
297 | Cell592	Batch1	Group4	45335.531676595725
298 | Cell597	Batch1	Group2	61611.073947419725
299 | Cell598	Batch1	Group4	59535.945353196126
300 | Cell600	Batch1	Group3	51968.955654015925
301 | Cell601	Batch1	Group4	56073.94340189981
302 | Cell604	Batch1	Group2	57888.138813698395
303 | Cell606	Batch1	Group4	58233.19098839832
304 | Cell607	Batch1	Group2	42447.481661548176
305 | Cell608	Batch1	Group4	65330.72452582818
306 | Cell610	Batch1	Group3	94173.20045108836
307 | Cell613	Batch1	Group3	73212.51102571294
308 | Cell615	Batch1	Group3	63305.74806568626
309 | Cell617	Batch1	Group4	41103.05759277336
310 | Cell623	Batch1	Group1	46872.24452304599
311 | Cell625	Batch1	Group3	59769.01527724249
312 | Cell630	Batch1	Group2	47735.62398141603
313 | Cell635	Batch1	Group4	50328.34791033202
314 | Cell636	Batch1	Group2	53645.0813665348
315 | Cell637	Batch1	Group2	33066.639198324694
316 | Cell640	Batch1	Group2	70787.7539691832
317 | Cell642	Batch1	Group2	67478.32266456228
318 | Cell644	Batch1	Group3	37342.0804662592
319 | Cell645	Batch1	Group4	64732.0231081017
320 | Cell648	Batch1	Group4	70687.07618562752
321 | Cell649	Batch1	Group4	62117.709615804466
322 | Cell651	Batch1	Group4	66762.34875113204
323 | Cell653	Batch1	Group3	51337.98052996924
324 | Cell654	Batch1	Group4	57174.79404452918
325 | Cell655	Batch1	Group4	52219.20595934258
326 | Cell656	Batch1	Group4	65032.75449480318
327 | Cell657	Batch1	Group1	56481.82375922139
328 | Cell658	Batch1	Group2	77642.16706770551
329 | Cell663	Batch1	Group4	44475.65957829542
330 | Cell666	Batch1	Group2	54300.99028714421
331 | Cell667	Batch1	Group4	52202.42552486367
332 | Cell669	Batch1	Group2	63381.59168839421
333 | Cell671	Batch1	Group3	68583.56220842896
334 | Cell672	Batch1	Group2	66072.45716488647
335 | Cell675	Batch1	Group4	43541.45031715768
336 | Cell679	Batch1	Group2	53196.216005959795
337 | Cell680	Batch1	Group4	56839.063347097224
338 | Cell681	Batch1	Group2	49420.4555093929
339 | Cell682	Batch1	Group3	64654.22505009
340 | Cell683	Batch1	Group4	54296.256376663674
341 | Cell684	Batch1	Group3	48325.1824836285
342 | Cell687	Batch1	Group3	73337.32420781028
343 | Cell689	Batch1	Group3	64357.10570963703
344 | Cell691	Batch1	Group4	60182.7172869254
345 | Cell692	Batch1	Group4	66259.09419562017
346 | Cell695	Batch1	Group4	49012.67028671377
347 | Cell696	Batch1	Group4	60763.59646618964
348 | Cell697	Batch1	Group2	57538.088270904795
349 | Cell701	Batch1	Group2	45188.24825655201
350 | Cell704	Batch1	Group3	40370.86441903496
351 | Cell708	Batch1	Group4	67620.48993825342
352 | Cell710	Batch1	Group4	67519.48603742913
353 | Cell713	Batch1	Group4	74080.3374718697
354 | Cell714	Batch1	Group3	55757.673763471845
355 | Cell715	Batch1	Group1	62097.84979431549
356 | Cell717	Batch1	Group4	55436.42114769647
357 | Cell718	Batch1	Group4	44566.189825468195
358 | Cell719	Batch1	Group4	55892.958365087725
359 | Cell721	Batch1	Group2	64303.51464051373
360 | Cell722	Batch1	Group4	53719.070337791214
361 | Cell724	Batch1	Group4	65997.28103442215
362 | Cell734	Batch1	Group4	58931.651174542116
363 | Cell735	Batch1	Group4	44891.869352932314
364 | Cell740	Batch1	Group4	69768.58923555358
365 | Cell743	Batch1	Group3	59375.048972886725
366 | Cell744	Batch1	Group4	72255.56405499107
367 | Cell745	Batch1	Group4	77042.51042511035
368 | Cell753	Batch1	Group2	41460.7757666524
369 | Cell756	Batch1	Group1	46849.941934094
370 | Cell757	Batch1	Group2	66244.40127587176
371 | Cell760	Batch1	Group2	61143.684854501145
372 | Cell761	Batch1	Group2	83473.00264850094
373 | Cell762	Batch1	Group3	49202.39835700463
374 | Cell763	Batch1	Group3	70650.22771710184
375 | Cell765	Batch1	Group2	64722.34589481799
376 | Cell766	Batch1	Group2	65664.0322938361
377 | Cell767	Batch1	Group1	41868.395254806506
378 | Cell768	Batch1	Group2	47643.78854594363
379 | Cell769	Batch1	Group2	90364.64509068437
380 | Cell770	Batch1	Group3	62335.71527106323
381 | Cell772	Batch1	Group4	65766.53076444786
382 | Cell775	Batch1	Group4	47903.74573186333
383 | Cell776	Batch1	Group4	45784.37285802186
384 | Cell777	Batch1	Group2	64256.4262129188
385 | Cell778	Batch1	Group3	54068.79605332756
386 | Cell779	Batch1	Group1	78587.15708901401
387 | Cell780	Batch1	Group4	46984.13480352631
388 | Cell783	Batch1	Group4	78333.46080900206
389 | Cell784	Batch1	Group4	87659.96078369247
390 | Cell786	Batch1	Group2	56566.506938958526
391 | Cell788	Batch1	Group2	43713.58868696966
392 | Cell789	Batch1	Group4	39559.39112929476
393 | Cell794	Batch1	Group4	64575.233010369855
394 | Cell796	Batch1	Group3	52081.273242571486
395 | Cell798	Batch1	Group4	71747.80263367867
396 | Cell799	Batch1	Group3	57641.734168853494
397 | Cell800	Batch1	Group4	54863.9336016859
398 | Cell803	Batch1	Group2	56557.29627510162
399 | Cell804	Batch1	Group3	57492.37701488207
400 | Cell805	Batch1	Group3	46975.80631112637
401 | Cell808	Batch1	Group2	62551.94574169659
402 | Cell809	Batch1	Group1	51342.46987463979
403 | Cell810	Batch1	Group4	53065.80775883584
404 | Cell811	Batch1	Group2	51674.50351739878
405 | Cell818	Batch1	Group2	59266.86315473463
406 | Cell819	Batch1	Group2	72030.03576858308
407 | Cell821	Batch1	Group3	70880.8312828581
408 | Cell823	Batch1	Group2	56911.76592546331
409 | Cell824	Batch1	Group4	40685.66252848761
410 | Cell825	Batch1	Group3	62911.386250443364
411 | Cell826	Batch1	Group4	57101.73904288724
412 | Cell835	Batch1	Group3	65448.91980892367
413 | Cell837	Batch1	Group4	54238.85692070112
414 | Cell841	Batch1	Group3	44869.954906124294
415 | Cell843	Batch1	Group4	55988.887441902145
416 | Cell848	Batch1	Group2	70249.55818106886
417 | Cell851	Batch1	Group4	52649.020712481135
418 | Cell852	Batch1	Group4	61823.67740393481
419 | Cell854	Batch1	Group2	51911.90776104299
420 | Cell855	Batch1	Group2	73448.55949409209
421 | Cell856	Batch1	Group4	66099.24997674157
422 | Cell857	Batch1	Group2	58099.889187982175
423 | Cell858	Batch1	Group2	71596.41862670988
424 | Cell859	Batch1	Group2	58176.605365988544
425 | Cell866	Batch1	Group1	52303.208531428674
426 | Cell868	Batch1	Group1	43561.982005104255
427 | Cell873	Batch1	Group2	59442.417067045724
428 | Cell875	Batch1	Group2	56182.70972715173
429 | Cell878	Batch1	Group2	67139.7507150235
430 | Cell879	Batch1	Group3	55951.18801485375
431 | Cell882	Batch1	Group3	73063.24062862612
432 | Cell887	Batch1	Group1	54868.651642313365
433 | Cell888	Batch1	Group4	66045.06591550316
434 | Cell890	Batch1	Group4	53815.54419620353
435 | Cell891	Batch1	Group3	60458.723744747185
436 | Cell894	Batch1	Group4	51143.52254020912
437 | Cell895	Batch1	Group1	74432.51608438807
438 | Cell897	Batch1	Group3	72014.09703285371
439 | Cell898	Batch1	Group4	63831.86378201816
440 | Cell899	Batch1	Group4	73473.5934320163
441 | Cell901	Batch1	Group1	67425.61946244954
442 | Cell903	Batch1	Group1	58838.40718690198
443 | Cell907	Batch1	Group1	59450.43405054954
444 | Cell909	Batch1	Group4	37619.55516116481
445 | Cell910	Batch1	Group4	52222.36485086904
446 | Cell911	Batch1	Group3	47838.310240377126
447 | Cell914	Batch1	Group4	65577.56871884658
448 | Cell915	Batch1	Group2	43895.9051042549
449 | Cell918	Batch1	Group3	69769.09190087122
450 | Cell919	Batch1	Group4	61216.7945059993
451 | Cell920	Batch1	Group4	70630.25080127409
452 | Cell927	Batch1	Group2	51289.46481187577
453 | Cell931	Batch1	Group3	64130.41694661509
454 | Cell934	Batch1	Group2	78308.23885334641
455 | Cell941	Batch1	Group2	64525.783487762674
456 | Cell942	Batch1	Group4	54755.250906064444
457 | Cell943	Batch1	Group2	60170.130978370165
458 | Cell944	Batch1	Group2	57845.116354270795
459 | Cell945	Batch1	Group2	83507.54364238998
460 | Cell946	Batch1	Group2	101432.36859619302
461 | Cell951	Batch1	Group3	54296.37830393065
462 | Cell952	Batch1	Group3	45202.496889546295
463 | Cell955	Batch1	Group3	64438.533682449255
464 | Cell959	Batch1	Group4	85432.79490783332
465 | Cell962	Batch1	Group4	58692.108713920694
466 | Cell963	Batch1	Group4	73959.54568610046
467 | Cell967	Batch1	Group2	64688.77009092675
468 | Cell969	Batch1	Group3	50729.75002803493
469 | Cell970	Batch1	Group4	66381.80890662316
470 | Cell972	Batch1	Group2	50881.56943644793
471 | Cell973	Batch1	Group2	101075.54404900614
472 | Cell975	Batch1	Group2	70832.73872971303
473 | Cell976	Batch1	Group2	75159.32291042985
474 | Cell978	Batch1	Group2	70867.30161605243
475 | Cell979	Batch1	Group4	57250.88799421547
476 | Cell981	Batch1	Group3	62461.18938637405
477 | Cell982	Batch1	Group2	39859.54803445133
478 | Cell984	Batch1	Group4	66110.07926926417
479 | Cell986	Batch1	Group4	69059.71894564742
480 | Cell988	Batch1	Group4	59239.85517944907
481 | Cell992	Batch1	Group4	50334.1456987133
482 | Cell994	Batch1	Group2	61957.68939191427
483 | Cell996	Batch1	Group2	63270.44124687236
484 | Cell997	Batch1	Group2	51036.5735110997
485 | Cell998	Batch1	Group3	58885.13566264156
486 | Cell999	Batch1	Group2	53571.58520836509
487 | 


--------------------------------------------------------------------------------
/inst/extdata/sim_query_gene_info.tab:
--------------------------------------------------------------------------------
  1 | Gene	BaseGeneMean
  2 | Gene1	15.070166941865256
  3 | Gene2	1.3214264125078057
  4 | Gene3	2.6471940552392117
  5 | Gene4	0.4314126274103263
  6 | Gene5	1.3263294447897767
  7 | Gene6	2.6556251736653076
  8 | Gene7	1.5087404287107207
  9 | Gene8	2.3980012018206116
 10 | Gene9	1.174393945899882
 11 | Gene10	0.16520007149995963
 12 | Gene11	0.6063222399660461
 13 | Gene12	1.1567203151084895
 14 | Gene13	0.5697996434879279
 15 | Gene14	5.910077587592439
 16 | Gene15	1.2584703860405693
 17 | Gene16	3.7312510321195247
 18 | Gene17	3.732506094739015
 19 | Gene18	0.9527541186413107
 20 | Gene19	0.8299824883603495
 21 | Gene20	5.419859401553251
 22 | Gene21	0.12596021483220884
 23 | Gene22	0.2838042849946762
 24 | Gene23	0.19182208419572352
 25 | Gene24	3.1676347186761196
 26 | Gene25	0.5341929920646847
 27 | Gene26	4.869121526664764
 28 | Gene27	2.6688329911337663
 29 | Gene28	0.8680950634977899
 30 | Gene29	1.6676711932219812
 31 | Gene30	0.7773442566244723
 32 | Gene31	0.13776280782261727
 33 | Gene32	1.62467446974602
 34 | Gene33	3.408362066133117
 35 | Gene34	0.1551462319196136
 36 | Gene35	1.3693638794774319
 37 | Gene36	6.539464980895205
 38 | Gene37	0.9702595159605595
 39 | Gene38	4.936128580990508e-4
 40 | Gene39	1.4130366607242324
 41 | Gene40	7.1535748703456825
 42 | Gene41	0.8945476522623658
 43 | Gene42	0.5154778965230534
 44 | Gene43	5.282330848092755
 45 | Gene44	3.53610188530163
 46 | Gene45	5.439591965553024
 47 | Gene46	0.5049124358836893
 48 | Gene47	1.4218930614187446
 49 | Gene48	0.3595905853302735
 50 | Gene49	5.119043134371494
 51 | Gene50	0.2502446473365546
 52 | Gene51	0.006118641284641165
 53 | Gene52	0.012327696958109266
 54 | Gene53	2.270533237587709
 55 | Gene54	0.2277377522940588
 56 | Gene55	0.24543799611272676
 57 | Gene56	1.0140785160835557
 58 | Gene57	3.2974078405543414
 59 | Gene58	3.1500104445427897
 60 | Gene59	0.7890490693371935
 61 | Gene60	6.322817718066329
 62 | Gene61	0.418082717549265
 63 | Gene62	2.034700826504123
 64 | Gene63	2.440291341133348
 65 | Gene64	0.002391602636955899
 66 | Gene65	0.6029098216167582
 67 | Gene66	3.3016014257423305
 68 | Gene67	0.7482611461409928
 69 | Gene68	0.12812044564547306
 70 | Gene69	0.35066175036892044
 71 | Gene70	0.1516936710876119
 72 | Gene71	0.009882651695215254
 73 | Gene72	0.31386242457573105
 74 | Gene73	0.29994401190711634
 75 | Gene74	1.4015772692111752
 76 | Gene75	1.2497823102933978
 77 | Gene76	0.5632321953622131
 78 | Gene77	1.6801835300877066
 79 | Gene78	0.2984888953070142
 80 | Gene79	1.180053288644517
 81 | Gene80	3.496870702720702
 82 | Gene81	3.031213248293015
 83 | Gene82	2.4544117966947323
 84 | Gene83	0.6426568584477319
 85 | Gene84	2.118998746932126
 86 | Gene85	5.998968724366211
 87 | Gene86	0.6370660786673482
 88 | Gene87	4.878501798012513
 89 | Gene88	4.236375736446719
 90 | Gene89	2.284970524462676
 91 | Gene90	0.9048320211852259
 92 | Gene91	3.328864243871868
 93 | Gene92	5.99485292531768
 94 | Gene93	0.5110956753927606
 95 | Gene94	0.667408242586006
 96 | Gene95	0.4217533489766145
 97 | Gene96	13.667521456964762
 98 | Gene97	12.94918139728537
 99 | Gene98	2.7655551566988636
100 | Gene99	0.026034366925850677
101 | Gene100	8.41958370917556
102 | Gene101	0.629002401311236
103 | Gene102	0.3984773552739324
104 | Gene103	6.648385010819624
105 | Gene104	0.52025440038014
106 | Gene105	0.6041210844214472
107 | Gene106	1.2171295700541882
108 | Gene107	2.1725070518596286
109 | Gene108	0.8655766682126647
110 | Gene109	0.20528408017518773
111 | Gene110	0.2871928791336786
112 | Gene111	0.6272521086349571
113 | Gene112	0.20938989155484852
114 | Gene113	1.1982525476674017
115 | Gene114	2.161009771439923
116 | Gene115	0.2108280997361641
117 | Gene116	5.490780408446104
118 | Gene117	7.901244927017148
119 | Gene118	1.6497365669481299
120 | Gene119	4.764385944692947
121 | Gene120	0.23214880715569838
122 | Gene121	1.826709920091376
123 | Gene122	2.222420903422962
124 | Gene123	0.28732215327512833
125 | Gene124	0.3270285059720184
126 | Gene125	0.028345690324059893
127 | Gene126	0.6385121429116922
128 | Gene127	0.45777438372310086
129 | Gene128	3.564006959063369
130 | Gene129	0.22257284891712545
131 | Gene130	3.06866579942385
132 | Gene131	0.010164450289338305
133 | Gene132	2.2159808283796263
134 | Gene133	0.18777347338931308
135 | Gene134	0.15308684636813338
136 | Gene135	5.029457080816111
137 | Gene136	0.19252884382655167
138 | Gene137	4.817288261266247
139 | Gene138	1.3571315484166406
140 | Gene139	0.9551584585099973
141 | Gene140	6.358966828719138
142 | Gene141	0.04758510724490084
143 | Gene142	0.5387642785968534
144 | Gene143	0.40903272802201124
145 | Gene144	0.9046274593654208
146 | Gene145	2.5300499331280255
147 | Gene146	0.007103927806247059
148 | Gene147	0.06617422376829078
149 | Gene148	1.1814835278920117
150 | Gene149	0.00730382422326564
151 | Gene150	4.523440167023822
152 | Gene151	5.0714787992616595
153 | Gene152	0.058381702597344695
154 | Gene153	9.849902477078747e-4
155 | Gene154	2.2714248681125615
156 | Gene155	2.8062016343972034
157 | Gene156	0.005019413599753972
158 | Gene157	1.1414874803343211
159 | Gene158	7.98467666157701
160 | Gene159	4.750130376606671
161 | Gene160	0.41449314009314947
162 | Gene161	0.15629966419234909
163 | Gene162	2.0033323934363065
164 | Gene163	1.9133958051043465
165 | Gene164	1.8693826892142416
166 | Gene165	4.264060971136903
167 | Gene166	0.9041637960610055
168 | Gene167	1.7028366821036134
169 | Gene168	0.11251934733987087
170 | Gene169	1.9744619054281058
171 | Gene170	0.7657225174288028
172 | Gene171	2.9666842911519877
173 | Gene172	2.5384890421122717
174 | Gene173	0.03553526322132183
175 | Gene174	0.17140160127119666
176 | Gene175	0.5375172760119603
177 | Gene176	1.1957388264762312
178 | Gene177	2.233141923375682
179 | Gene178	5.910669733366996
180 | Gene179	19.45900667463009
181 | Gene180	3.4749784094647818
182 | Gene181	1.813842633144611
183 | Gene182	3.875603753958344
184 | Gene183	0.36373824951414807
185 | Gene184	0.15860239608952753
186 | Gene185	1.5602047184804921
187 | Gene186	4.6993917033637205
188 | Gene187	8.789618789385647
189 | Gene188	0.7302572334579468
190 | Gene189	7.049027042557773
191 | Gene190	0.8125960912689993
192 | Gene191	0.43486651275110694
193 | Gene192	0.0038062829718254923
194 | Gene193	0.13273796373459007
195 | Gene194	0.5723144816254031
196 | Gene195	0.46429625203852326
197 | Gene196	4.781896261324305
198 | Gene197	0.0562914344264723
199 | Gene198	3.982487795926734
200 | Gene199	0.05075770320152484
201 | Gene200	1.329913146761545
202 | 


--------------------------------------------------------------------------------
/inst/extdata/sim_ref_cell_info.tab:
--------------------------------------------------------------------------------
  1 | Cell	Batch	group	ExpLibSize
  2 | Cell1	Batch1	Dunno	81807.75476847978
  3 | Cell4	Batch1	Mystery celltype	62730.6390003853
  4 | Cell8	Batch1	Mystery celltype	71526.13637673049
  5 | Cell9	Batch1	Weird subtype	55765.83796711112
  6 | Cell10	Batch1	Dunno	57154.759125106255
  7 | Cell12	Batch1	Dunno	44653.9600760209
  8 | Cell13	Batch1	Mystery celltype	56977.54532670995
  9 | Cell14	Batch1	Exciting	79621.80423861962
 10 | Cell15	Batch1	Mystery celltype	73021.20728154207
 11 | Cell17	Batch1	Exciting	68663.57960914526
 12 | Cell18	Batch1	Mystery celltype	61380.02858108507
 13 | Cell19	Batch1	Exciting	49105.04257428885
 14 | Cell21	Batch1	Mystery celltype	48745.50674600157
 15 | Cell24	Batch1	Dunno	80939.53138566627
 16 | Cell25	Batch1	Mystery celltype	60337.961437687976
 17 | Cell26	Batch1	Dunno	71523.80437229766
 18 | Cell28	Batch1	Dunno	51926.06440250282
 19 | Cell29	Batch1	Dunno	58694.172070632536
 20 | Cell37	Batch1	Mystery celltype	51880.90421612222
 21 | Cell38	Batch1	Exciting	64803.71304274764
 22 | Cell41	Batch1	Exciting	52603.54365645042
 23 | Cell42	Batch1	Weird subtype	44843.82704880153
 24 | Cell44	Batch1	Mystery celltype	68350.05378878301
 25 | Cell45	Batch1	Mystery celltype	63382.24898997591
 26 | Cell46	Batch1	Exciting	60935.07649125598
 27 | Cell47	Batch1	Mystery celltype	56677.477360313824
 28 | Cell51	Batch1	Dunno	47362.250320482985
 29 | Cell52	Batch1	Exciting	57894.476854201625
 30 | Cell53	Batch1	Exciting	70101.1065954678
 31 | Cell54	Batch1	Dunno	59282.29335149211
 32 | Cell55	Batch1	Mystery celltype	55565.21798107001
 33 | Cell57	Batch1	Mystery celltype	91518.44169688094
 34 | Cell58	Batch1	Exciting	56104.558317678595
 35 | Cell59	Batch1	Mystery celltype	56100.782528681935
 36 | Cell60	Batch1	Mystery celltype	74079.76508395211
 37 | Cell64	Batch1	Dunno	51327.57192778397
 38 | Cell67	Batch1	Dunno	56461.29586910634
 39 | Cell68	Batch1	Dunno	54679.35005965752
 40 | Cell70	Batch1	Exciting	70215.6929388313
 41 | Cell75	Batch1	Exciting	57225.43833009415
 42 | Cell77	Batch1	Dunno	29879.48368797497
 43 | Cell78	Batch1	Mystery celltype	58249.49191451221
 44 | Cell79	Batch1	Dunno	62975.96035025468
 45 | Cell81	Batch1	Exciting	66246.82544356218
 46 | Cell82	Batch1	Weird subtype	93246.34528982667
 47 | Cell85	Batch1	Exciting	79589.44691883637
 48 | Cell86	Batch1	Exciting	64036.026674086905
 49 | Cell88	Batch1	Mystery celltype	58718.03624087147
 50 | Cell90	Batch1	Exciting	64365.74649030141
 51 | Cell92	Batch1	Mystery celltype	66218.46267656592
 52 | Cell93	Batch1	Exciting	72973.980033263
 53 | Cell94	Batch1	Dunno	79160.24380591838
 54 | Cell95	Batch1	Weird subtype	64088.031522976635
 55 | Cell98	Batch1	Dunno	53420.482214087046
 56 | Cell99	Batch1	Mystery celltype	55899.847634598016
 57 | Cell105	Batch1	Exciting	82964.58191610064
 58 | Cell108	Batch1	Mystery celltype	64238.852406326834
 59 | Cell109	Batch1	Dunno	56544.30180616188
 60 | Cell110	Batch1	Dunno	58837.71107179407
 61 | Cell113	Batch1	Exciting	39050.78756002819
 62 | Cell115	Batch1	Mystery celltype	42787.25855694832
 63 | Cell117	Batch1	Dunno	64127.35849047081
 64 | Cell118	Batch1	Dunno	53634.30521743906
 65 | Cell120	Batch1	Dunno	62831.76965151383
 66 | Cell128	Batch1	Weird subtype	49168.365335475275
 67 | Cell134	Batch1	Exciting	48870.960893990785
 68 | Cell135	Batch1	Weird subtype	54873.72119222294
 69 | Cell140	Batch1	Mystery celltype	54859.58984385942
 70 | Cell141	Batch1	Exciting	83376.68228099668
 71 | Cell142	Batch1	Mystery celltype	55971.30698920121
 72 | Cell144	Batch1	Exciting	46879.47787048267
 73 | Cell145	Batch1	Exciting	67878.99955034911
 74 | Cell146	Batch1	Dunno	55562.100480375855
 75 | Cell151	Batch1	Exciting	74276.64780383078
 76 | Cell153	Batch1	Exciting	56111.32170147118
 77 | Cell157	Batch1	Mystery celltype	69035.40856601347
 78 | Cell160	Batch1	Mystery celltype	71638.95396712197
 79 | Cell162	Batch1	Mystery celltype	59310.28826991561
 80 | Cell165	Batch1	Mystery celltype	54678.946737595004
 81 | Cell166	Batch1	Mystery celltype	57245.59746935213
 82 | Cell168	Batch1	Exciting	73350.32963614837
 83 | Cell169	Batch1	Exciting	53979.55283679607
 84 | Cell171	Batch1	Mystery celltype	51236.72745269203
 85 | Cell172	Batch1	Dunno	72477.87092243957
 86 | Cell174	Batch1	Weird subtype	48998.17955999089
 87 | Cell175	Batch1	Mystery celltype	67618.9003205934
 88 | Cell178	Batch1	Mystery celltype	64338.60678140598
 89 | Cell179	Batch1	Dunno	82347.14301485407
 90 | Cell181	Batch1	Mystery celltype	50579.42513230821
 91 | Cell182	Batch1	Dunno	70628.91792338541
 92 | Cell183	Batch1	Dunno	73006.42094973422
 93 | Cell185	Batch1	Exciting	70256.1424370556
 94 | Cell186	Batch1	Exciting	44968.786524416486
 95 | Cell189	Batch1	Dunno	69565.64579270386
 96 | Cell190	Batch1	Exciting	70875.24020658476
 97 | Cell192	Batch1	Dunno	60129.941634371746
 98 | Cell195	Batch1	Exciting	49205.88335127697
 99 | Cell196	Batch1	Mystery celltype	39889.40310194166
100 | Cell198	Batch1	Mystery celltype	63159.400769407235
101 | Cell200	Batch1	Mystery celltype	47032.689748694414
102 | Cell202	Batch1	Exciting	65503.045438960275
103 | Cell205	Batch1	Exciting	52580.34831716724
104 | Cell206	Batch1	Mystery celltype	73953.2545036953
105 | Cell209	Batch1	Exciting	86205.33155590425
106 | Cell211	Batch1	Exciting	62282.41608670635
107 | Cell213	Batch1	Mystery celltype	62199.789214546705
108 | Cell218	Batch1	Mystery celltype	59298.18336733352
109 | Cell222	Batch1	Weird subtype	43655.17044370976
110 | Cell224	Batch1	Mystery celltype	48351.48506715231
111 | Cell227	Batch1	Exciting	75763.7955813687
112 | Cell229	Batch1	Mystery celltype	60655.41686891719
113 | Cell230	Batch1	Dunno	58779.80516234371
114 | Cell231	Batch1	Exciting	49546.80040872587
115 | Cell235	Batch1	Exciting	53141.669888693665
116 | Cell240	Batch1	Mystery celltype	58713.082429138136
117 | Cell241	Batch1	Mystery celltype	76112.73294680292
118 | Cell242	Batch1	Mystery celltype	50450.87852683166
119 | Cell243	Batch1	Mystery celltype	75652.89691471095
120 | Cell244	Batch1	Weird subtype	49912.80449902912
121 | Cell245	Batch1	Exciting	51187.39363283295
122 | Cell248	Batch1	Mystery celltype	58215.93764980519
123 | Cell249	Batch1	Exciting	68906.31864078039
124 | Cell250	Batch1	Mystery celltype	68610.31209030513
125 | Cell252	Batch1	Mystery celltype	42757.503340227704
126 | Cell253	Batch1	Mystery celltype	48064.019523638206
127 | Cell254	Batch1	Exciting	70366.73081233972
128 | Cell258	Batch1	Exciting	87038.13312069164
129 | Cell263	Batch1	Dunno	88283.01861665332
130 | Cell267	Batch1	Dunno	62275.42505737917
131 | Cell268	Batch1	Dunno	55921.35946356151
132 | Cell270	Batch1	Dunno	46562.23963203289
133 | Cell272	Batch1	Dunno	58826.122208443354
134 | Cell278	Batch1	Dunno	56665.444451203424
135 | Cell279	Batch1	Mystery celltype	85926.43198037753
136 | Cell282	Batch1	Exciting	42968.7540495921
137 | Cell284	Batch1	Mystery celltype	77742.70394705818
138 | Cell285	Batch1	Mystery celltype	47166.67198543781
139 | Cell287	Batch1	Dunno	50111.2502275346
140 | Cell288	Batch1	Dunno	82493.90597167228
141 | Cell290	Batch1	Dunno	67881.40035008473
142 | Cell292	Batch1	Mystery celltype	59215.0103723406
143 | Cell297	Batch1	Mystery celltype	55774.158236177835
144 | Cell298	Batch1	Dunno	66644.28042367943
145 | Cell299	Batch1	Dunno	78552.69644137434
146 | Cell300	Batch1	Exciting	71322.50268357595
147 | Cell305	Batch1	Mystery celltype	98096.9655645084
148 | Cell307	Batch1	Exciting	73956.1818316257
149 | Cell309	Batch1	Dunno	53629.99440978181
150 | Cell310	Batch1	Mystery celltype	73586.46368562838
151 | Cell311	Batch1	Dunno	69426.46218642656
152 | Cell313	Batch1	Dunno	73736.89330866672
153 | Cell314	Batch1	Exciting	77510.7842623073
154 | Cell315	Batch1	Mystery celltype	62970.71822940966
155 | Cell316	Batch1	Exciting	87257.9578797986
156 | Cell320	Batch1	Dunno	72457.7227430852
157 | Cell321	Batch1	Exciting	57874.94767184463
158 | Cell323	Batch1	Exciting	62349.33386915656
159 | Cell324	Batch1	Dunno	58651.652601725225
160 | Cell325	Batch1	Exciting	56546.17817691532
161 | Cell326	Batch1	Exciting	54768.976058638
162 | Cell329	Batch1	Mystery celltype	87593.28762181934
163 | Cell330	Batch1	Dunno	51036.400212527355
164 | Cell331	Batch1	Dunno	81041.87901135806
165 | Cell334	Batch1	Mystery celltype	61691.92665258174
166 | Cell336	Batch1	Dunno	58992.66535025075
167 | Cell338	Batch1	Dunno	52063.4996265077
168 | Cell339	Batch1	Mystery celltype	49542.437593599825
169 | Cell341	Batch1	Dunno	60508.588401668974
170 | Cell347	Batch1	Mystery celltype	81185.27119543543
171 | Cell350	Batch1	Dunno	65539.07591285396
172 | Cell352	Batch1	Exciting	54313.644396865864
173 | Cell355	Batch1	Exciting	70403.87619190854
174 | Cell359	Batch1	Mystery celltype	58294.253745828355
175 | Cell361	Batch1	Exciting	64073.97559123764
176 | Cell362	Batch1	Exciting	48932.612529710124
177 | Cell364	Batch1	Exciting	50986.72561130079
178 | Cell368	Batch1	Mystery celltype	48451.51059068924
179 | Cell370	Batch1	Exciting	60143.409875790916
180 | Cell373	Batch1	Exciting	101255.23768731704
181 | Cell374	Batch1	Mystery celltype	68037.66852231452
182 | Cell375	Batch1	Exciting	90439.55606548753
183 | Cell376	Batch1	Exciting	68892.6046870921
184 | Cell379	Batch1	Dunno	51410.75193092789
185 | Cell382	Batch1	Mystery celltype	59950.403336530835
186 | Cell384	Batch1	Exciting	85139.82979429308
187 | Cell387	Batch1	Mystery celltype	52360.47569417261
188 | Cell388	Batch1	Dunno	58256.2485453605
189 | Cell389	Batch1	Exciting	42724.60209830365
190 | Cell390	Batch1	Exciting	57007.70297865485
191 | Cell391	Batch1	Dunno	66072.25864104759
192 | Cell394	Batch1	Mystery celltype	74182.07163435398
193 | Cell395	Batch1	Dunno	47631.59757952375
194 | Cell397	Batch1	Exciting	53165.4524317683
195 | Cell398	Batch1	Dunno	83096.6506509114
196 | Cell399	Batch1	Dunno	59888.83557096457
197 | Cell400	Batch1	Mystery celltype	60745.74881671979
198 | Cell401	Batch1	Dunno	59474.08398927108
199 | Cell402	Batch1	Mystery celltype	67329.54672421503
200 | Cell403	Batch1	Mystery celltype	63082.571728269155
201 | Cell405	Batch1	Dunno	42108.029073011436
202 | Cell409	Batch1	Exciting	56078.862237949266
203 | Cell410	Batch1	Exciting	61301.194330446066
204 | Cell411	Batch1	Exciting	93972.76158956725
205 | Cell415	Batch1	Dunno	68818.54370560612
206 | Cell417	Batch1	Mystery celltype	69596.63839047254
207 | Cell418	Batch1	Dunno	54643.34342350498
208 | Cell421	Batch1	Dunno	49502.5247815001
209 | Cell422	Batch1	Mystery celltype	69352.93633465214
210 | Cell424	Batch1	Mystery celltype	55403.50513551321
211 | Cell425	Batch1	Weird subtype	51357.607136771556
212 | Cell426	Batch1	Mystery celltype	37802.08995695506
213 | Cell428	Batch1	Dunno	47395.02408822196
214 | Cell431	Batch1	Mystery celltype	57760.294055991326
215 | Cell432	Batch1	Exciting	55727.42997261764
216 | Cell433	Batch1	Exciting	49075.48660293653
217 | Cell436	Batch1	Dunno	54957.357170455674
218 | Cell438	Batch1	Dunno	73249.70537624441
219 | Cell439	Batch1	Exciting	83176.58833450306
220 | Cell440	Batch1	Dunno	50865.81695062725
221 | Cell442	Batch1	Exciting	64701.1256678348
222 | Cell443	Batch1	Exciting	59014.02721629023
223 | Cell445	Batch1	Exciting	63773.26720757371
224 | Cell451	Batch1	Weird subtype	53764.10206251263
225 | Cell455	Batch1	Mystery celltype	75449.72182186146
226 | Cell456	Batch1	Dunno	53218.680081457984
227 | Cell457	Batch1	Exciting	85026.22745982851
228 | Cell460	Batch1	Dunno	79062.0684719292
229 | Cell461	Batch1	Dunno	58137.92521728268
230 | Cell462	Batch1	Exciting	57678.32329935796
231 | Cell465	Batch1	Dunno	55685.398676793055
232 | Cell466	Batch1	Mystery celltype	83705.23059210832
233 | Cell470	Batch1	Dunno	49351.161985836705
234 | Cell471	Batch1	Mystery celltype	46525.06377298973
235 | Cell473	Batch1	Dunno	69972.26834490715
236 | Cell474	Batch1	Dunno	75577.30087930195
237 | Cell476	Batch1	Weird subtype	69530.43643290483
238 | Cell477	Batch1	Dunno	48477.8585685877
239 | Cell478	Batch1	Mystery celltype	43432.79476529873
240 | Cell481	Batch1	Mystery celltype	80141.0003101273
241 | Cell483	Batch1	Mystery celltype	56050.46718357846
242 | Cell484	Batch1	Mystery celltype	40099.174277372105
243 | Cell485	Batch1	Dunno	46579.832656207414
244 | Cell487	Batch1	Exciting	45331.87064944953
245 | Cell488	Batch1	Exciting	67426.51336242554
246 | Cell490	Batch1	Exciting	56758.486777894155
247 | Cell492	Batch1	Mystery celltype	45025.979091999725
248 | Cell493	Batch1	Exciting	56702.40308039261
249 | Cell494	Batch1	Mystery celltype	60617.35013346793
250 | Cell496	Batch1	Mystery celltype	59412.44501506707
251 | Cell498	Batch1	Dunno	52872.094812063544
252 | Cell499	Batch1	Mystery celltype	80622.24876492086
253 | Cell501	Batch1	Mystery celltype	61024.59422595926
254 | Cell504	Batch1	Mystery celltype	83454.86757289598
255 | Cell506	Batch1	Mystery celltype	56341.76015419253
256 | Cell507	Batch1	Dunno	44974.63914653621
257 | Cell508	Batch1	Exciting	44407.01226516687
258 | Cell510	Batch1	Mystery celltype	54285.0113116402
259 | Cell514	Batch1	Exciting	63768.025745437524
260 | Cell517	Batch1	Mystery celltype	46668.24402505373
261 | Cell518	Batch1	Dunno	65325.70001000344
262 | Cell520	Batch1	Exciting	62736.84248924784
263 | Cell521	Batch1	Exciting	58380.38448777898
264 | Cell523	Batch1	Exciting	67360.46401291544
265 | Cell524	Batch1	Weird subtype	61144.817793029644
266 | Cell527	Batch1	Exciting	57796.696946227545
267 | Cell528	Batch1	Mystery celltype	64425.15603134294
268 | Cell529	Batch1	Exciting	44857.79201776551
269 | Cell532	Batch1	Exciting	75181.2654638172
270 | Cell536	Batch1	Mystery celltype	45800.23400736708
271 | Cell537	Batch1	Dunno	74263.01956322035
272 | Cell542	Batch1	Exciting	60177.77078332433
273 | Cell543	Batch1	Exciting	43778.15982525974
274 | Cell545	Batch1	Mystery celltype	67253.07115123785
275 | Cell547	Batch1	Exciting	77546.31023451254
276 | Cell549	Batch1	Dunno	55221.56146902432
277 | Cell550	Batch1	Exciting	66551.76621601294
278 | Cell552	Batch1	Mystery celltype	64645.7310966531
279 | Cell554	Batch1	Mystery celltype	52864.76383286277
280 | Cell558	Batch1	Dunno	49503.16551763513
281 | Cell561	Batch1	Mystery celltype	49747.827370732746
282 | Cell562	Batch1	Exciting	77796.29361199462
283 | Cell563	Batch1	Exciting	32517.801451913903
284 | Cell566	Batch1	Mystery celltype	49761.78778759733
285 | Cell570	Batch1	Dunno	67672.15191428228
286 | Cell571	Batch1	Mystery celltype	69175.65444702056
287 | Cell575	Batch1	Exciting	56709.75805172777
288 | Cell579	Batch1	Exciting	54052.30800505988
289 | Cell580	Batch1	Exciting	74598.90128966793
290 | Cell582	Batch1	Dunno	41938.583858660175
291 | Cell583	Batch1	Mystery celltype	72111.31916836997
292 | Cell584	Batch1	Mystery celltype	69429.64281585328
293 | Cell585	Batch1	Mystery celltype	72503.45888976833
294 | Cell586	Batch1	Mystery celltype	80708.89694882413
295 | Cell588	Batch1	Exciting	30840.752800798415
296 | Cell590	Batch1	Exciting	69172.59871626447
297 | Cell591	Batch1	Dunno	82647.26830144694
298 | Cell593	Batch1	Mystery celltype	44869.642073155
299 | Cell594	Batch1	Mystery celltype	67373.43624525052
300 | Cell595	Batch1	Dunno	60551.42027935148
301 | Cell596	Batch1	Dunno	56997.77917260365
302 | Cell599	Batch1	Dunno	58071.664457033876
303 | Cell602	Batch1	Mystery celltype	41389.43818547396
304 | Cell603	Batch1	Mystery celltype	64471.42955457428
305 | Cell605	Batch1	Exciting	59349.332968371295
306 | Cell609	Batch1	Mystery celltype	35342.969251708106
307 | Cell611	Batch1	Exciting	78384.32851069582
308 | Cell612	Batch1	Mystery celltype	59339.222517357804
309 | Cell614	Batch1	Mystery celltype	76706.61761351599
310 | Cell616	Batch1	Dunno	56949.44134777487
311 | Cell618	Batch1	Mystery celltype	54261.099929363096
312 | Cell619	Batch1	Exciting	66047.9927719561
313 | Cell620	Batch1	Exciting	73085.20289662106
314 | Cell621	Batch1	Dunno	58793.49531685555
315 | Cell622	Batch1	Mystery celltype	54943.73433204054
316 | Cell624	Batch1	Dunno	54097.59785961883
317 | Cell626	Batch1	Exciting	53855.07469884097
318 | Cell627	Batch1	Weird subtype	44416.32087036199
319 | Cell628	Batch1	Exciting	57903.75618818855
320 | Cell629	Batch1	Dunno	62644.41547684184
321 | Cell631	Batch1	Exciting	60017.41514845921
322 | Cell632	Batch1	Mystery celltype	59910.910029747654
323 | Cell633	Batch1	Mystery celltype	49732.15075467038
324 | Cell634	Batch1	Exciting	56589.77566652936
325 | Cell638	Batch1	Exciting	62447.44259068599
326 | Cell639	Batch1	Exciting	64133.64450491816
327 | Cell641	Batch1	Exciting	44089.20554144063
328 | Cell643	Batch1	Dunno	73522.18182880066
329 | Cell646	Batch1	Mystery celltype	61802.93889261675
330 | Cell647	Batch1	Mystery celltype	69012.0854740952
331 | Cell650	Batch1	Mystery celltype	74714.79586975073
332 | Cell652	Batch1	Weird subtype	56062.62499714292
333 | Cell659	Batch1	Mystery celltype	69894.64218398865
334 | Cell660	Batch1	Dunno	66592.586718327555
335 | Cell661	Batch1	Dunno	85591.02358008553
336 | Cell662	Batch1	Dunno	44516.24819323479
337 | Cell664	Batch1	Dunno	61307.004898365776
338 | Cell665	Batch1	Exciting	64705.31155970481
339 | Cell668	Batch1	Weird subtype	71455.41809041442
340 | Cell670	Batch1	Exciting	42053.06380443934
341 | Cell673	Batch1	Mystery celltype	55095.33915013706
342 | Cell674	Batch1	Dunno	45486.15169293153
343 | Cell676	Batch1	Exciting	63517.160284769445
344 | Cell677	Batch1	Exciting	73489.07959872915
345 | Cell678	Batch1	Mystery celltype	71814.41470489615
346 | Cell685	Batch1	Dunno	47441.372998551655
347 | Cell686	Batch1	Dunno	49938.59656240519
348 | Cell688	Batch1	Dunno	49738.568822691675
349 | Cell690	Batch1	Mystery celltype	52789.31899776843
350 | Cell693	Batch1	Dunno	52209.58603909123
351 | Cell694	Batch1	Exciting	65974.73334813816
352 | Cell698	Batch1	Mystery celltype	64468.974478183445
353 | Cell699	Batch1	Exciting	64012.37086466773
354 | Cell700	Batch1	Exciting	55742.29025149838
355 | Cell702	Batch1	Exciting	58774.06532284232
356 | Cell703	Batch1	Exciting	52129.503219550716
357 | Cell705	Batch1	Mystery celltype	60585.22086924787
358 | Cell706	Batch1	Exciting	68973.56697136941
359 | Cell707	Batch1	Dunno	70457.08797883583
360 | Cell709	Batch1	Mystery celltype	74559.04863400896
361 | Cell711	Batch1	Exciting	54858.45915215607
362 | Cell712	Batch1	Exciting	50166.67818033528
363 | Cell716	Batch1	Exciting	73099.67341725297
364 | Cell720	Batch1	Exciting	67445.83981177946
365 | Cell723	Batch1	Exciting	53446.52691235099
366 | Cell725	Batch1	Dunno	51813.53140782089
367 | Cell726	Batch1	Exciting	73544.824521771
368 | Cell727	Batch1	Mystery celltype	77108.1982912516
369 | Cell728	Batch1	Mystery celltype	64581.15229087629
370 | Cell729	Batch1	Dunno	61697.683973040716
371 | Cell730	Batch1	Dunno	66889.12767705396
372 | Cell731	Batch1	Mystery celltype	48456.99291942505
373 | Cell732	Batch1	Mystery celltype	40238.11346060553
374 | Cell733	Batch1	Mystery celltype	54992.66697274335
375 | Cell736	Batch1	Exciting	42050.56317254396
376 | Cell737	Batch1	Weird subtype	39975.41657072051
377 | Cell738	Batch1	Mystery celltype	76628.4179457589
378 | Cell739	Batch1	Mystery celltype	66319.98992634375
379 | Cell741	Batch1	Dunno	69603.29382237795
380 | Cell742	Batch1	Weird subtype	56063.76398353835
381 | Cell746	Batch1	Dunno	76474.29292901258
382 | Cell747	Batch1	Exciting	66562.59488683096
383 | Cell748	Batch1	Mystery celltype	110327.72657035479
384 | Cell749	Batch1	Mystery celltype	51073.62800414176
385 | Cell750	Batch1	Exciting	59000.37259510889
386 | Cell751	Batch1	Dunno	59410.0011177861
387 | Cell752	Batch1	Mystery celltype	65667.35453780026
388 | Cell754	Batch1	Exciting	75800.52544906427
389 | Cell755	Batch1	Exciting	43302.72149825256
390 | Cell758	Batch1	Exciting	80637.93996155521
391 | Cell759	Batch1	Dunno	58290.90065131635
392 | Cell764	Batch1	Mystery celltype	56589.4473843959
393 | Cell771	Batch1	Mystery celltype	74817.87862836638
394 | Cell773	Batch1	Dunno	45359.98044989292
395 | Cell774	Batch1	Mystery celltype	60468.7985422407
396 | Cell781	Batch1	Mystery celltype	56729.539384588774
397 | Cell782	Batch1	Exciting	65464.334406033464
398 | Cell785	Batch1	Dunno	84827.91766206028
399 | Cell787	Batch1	Mystery celltype	69995.89537319269
400 | Cell790	Batch1	Dunno	75428.60638060226
401 | Cell791	Batch1	Mystery celltype	51940.56168878204
402 | Cell792	Batch1	Exciting	59360.403835508274
403 | Cell793	Batch1	Exciting	49305.59270006465
404 | Cell795	Batch1	Mystery celltype	67961.80619423035
405 | Cell797	Batch1	Exciting	66215.25871847503
406 | Cell801	Batch1	Mystery celltype	74773.00831045811
407 | Cell802	Batch1	Mystery celltype	69503.48690314239
408 | Cell806	Batch1	Exciting	75483.32950416845
409 | Cell807	Batch1	Exciting	63791.25301962973
410 | Cell812	Batch1	Mystery celltype	70710.59238823927
411 | Cell813	Batch1	Dunno	66386.01610004039
412 | Cell814	Batch1	Mystery celltype	57003.8002986488
413 | Cell815	Batch1	Dunno	49434.401234209894
414 | Cell816	Batch1	Mystery celltype	81019.48316453358
415 | Cell817	Batch1	Dunno	57660.14672159395
416 | Cell820	Batch1	Mystery celltype	81139.42740797685
417 | Cell822	Batch1	Exciting	51758.33356656971
418 | Cell827	Batch1	Exciting	82163.52899908184
419 | Cell828	Batch1	Mystery celltype	44510.20628733321
420 | Cell829	Batch1	Exciting	49830.26339342397
421 | Cell830	Batch1	Weird subtype	79645.9067006031
422 | Cell831	Batch1	Mystery celltype	66678.27291821793
423 | Cell832	Batch1	Dunno	70804.76796286086
424 | Cell833	Batch1	Mystery celltype	88773.24491561459
425 | Cell834	Batch1	Mystery celltype	76984.23136078312
426 | Cell836	Batch1	Exciting	79299.10284013295
427 | Cell838	Batch1	Dunno	50232.62520234292
428 | Cell839	Batch1	Exciting	67677.05755288526
429 | Cell840	Batch1	Mystery celltype	59751.913327324786
430 | Cell842	Batch1	Dunno	69948.02665545353
431 | Cell844	Batch1	Dunno	56626.2838206254
432 | Cell845	Batch1	Exciting	54436.92059598208
433 | Cell846	Batch1	Mystery celltype	72918.98140644566
434 | Cell847	Batch1	Exciting	57252.83703691692
435 | Cell849	Batch1	Dunno	61261.36391609695
436 | Cell850	Batch1	Exciting	61237.867728430676
437 | Cell853	Batch1	Dunno	50055.0538727858
438 | Cell860	Batch1	Exciting	62464.39017823284
439 | Cell861	Batch1	Mystery celltype	43952.10003838061
440 | Cell862	Batch1	Mystery celltype	49429.93452585513
441 | Cell863	Batch1	Exciting	63089.664479451036
442 | Cell864	Batch1	Exciting	50012.16143696786
443 | Cell865	Batch1	Mystery celltype	39810.15418311501
444 | Cell867	Batch1	Mystery celltype	57648.67434413288
445 | Cell869	Batch1	Mystery celltype	52830.146137773714
446 | Cell870	Batch1	Exciting	64698.36491185619
447 | Cell871	Batch1	Mystery celltype	62351.319631986924
448 | Cell872	Batch1	Dunno	66782.4195643286
449 | Cell874	Batch1	Exciting	55992.78928064643
450 | Cell876	Batch1	Exciting	57838.32227614392
451 | Cell877	Batch1	Mystery celltype	84778.63929336573
452 | Cell880	Batch1	Exciting	39790.36493991507
453 | Cell881	Batch1	Exciting	67698.2318412466
454 | Cell883	Batch1	Dunno	75401.70206051634
455 | Cell884	Batch1	Dunno	42776.43027168227
456 | Cell885	Batch1	Exciting	66251.97025539962
457 | Cell886	Batch1	Exciting	59728.89109650733
458 | Cell889	Batch1	Dunno	65862.16280115274
459 | Cell892	Batch1	Dunno	64886.72759060191
460 | Cell893	Batch1	Exciting	62741.3812670592
461 | Cell896	Batch1	Exciting	66700.44446821426
462 | Cell900	Batch1	Mystery celltype	89838.24189988746
463 | Cell902	Batch1	Exciting	49720.91127972059
464 | Cell904	Batch1	Dunno	61698.31414189178
465 | Cell905	Batch1	Mystery celltype	75890.78025812042
466 | Cell906	Batch1	Dunno	49332.671183011495
467 | Cell908	Batch1	Mystery celltype	60911.50170090127
468 | Cell912	Batch1	Dunno	48371.35697778317
469 | Cell913	Batch1	Dunno	43300.84285321089
470 | Cell916	Batch1	Mystery celltype	41294.94179371393
471 | Cell917	Batch1	Exciting	78215.83123964709
472 | Cell921	Batch1	Dunno	68510.89876505475
473 | Cell922	Batch1	Weird subtype	47675.61036223717
474 | Cell923	Batch1	Mystery celltype	50285.32470667179
475 | Cell924	Batch1	Exciting	50810.458266410016
476 | Cell925	Batch1	Dunno	54502.55586418292
477 | Cell926	Batch1	Dunno	66428.82045412977
478 | Cell928	Batch1	Mystery celltype	50810.16890843113
479 | Cell929	Batch1	Dunno	52505.54964244125
480 | Cell930	Batch1	Dunno	48660.587675347415
481 | Cell932	Batch1	Mystery celltype	68166.19623600527
482 | Cell933	Batch1	Mystery celltype	61781.75459550566
483 | Cell935	Batch1	Exciting	61965.20553453278
484 | Cell936	Batch1	Exciting	59552.082663036985
485 | Cell937	Batch1	Weird subtype	44121.63202588367
486 | Cell938	Batch1	Dunno	86090.04391328803
487 | Cell939	Batch1	Weird subtype	51965.66952959045
488 | Cell940	Batch1	Mystery celltype	55573.07455502759
489 | Cell947	Batch1	Exciting	78966.42110849377
490 | Cell948	Batch1	Dunno	57893.58454573163
491 | Cell949	Batch1	Dunno	108399.51473060224
492 | Cell950	Batch1	Exciting	63941.65092858417
493 | Cell953	Batch1	Dunno	52494.101352971746
494 | Cell954	Batch1	Exciting	60801.11137383105
495 | Cell956	Batch1	Mystery celltype	53317.811588200435
496 | Cell957	Batch1	Mystery celltype	66956.63625350388
497 | Cell958	Batch1	Dunno	61169.891441098334
498 | Cell960	Batch1	Mystery celltype	70297.72026231191
499 | Cell961	Batch1	Mystery celltype	73180.26381577374
500 | Cell964	Batch1	Exciting	51129.50161236589
501 | Cell965	Batch1	Dunno	57332.79065619364
502 | Cell966	Batch1	Weird subtype	100920.05054716372
503 | Cell968	Batch1	Mystery celltype	56613.22063144928
504 | Cell971	Batch1	Exciting	48814.80998613285
505 | Cell974	Batch1	Mystery celltype	42222.48434667154
506 | Cell977	Batch1	Weird subtype	56934.87403202956
507 | Cell980	Batch1	Mystery celltype	63527.13835053482
508 | Cell983	Batch1	Mystery celltype	60243.19147127942
509 | Cell985	Batch1	Weird subtype	48036.537458381434
510 | Cell987	Batch1	Weird subtype	76719.92184793137
511 | Cell989	Batch1	Dunno	67576.9088681226
512 | Cell990	Batch1	Mystery celltype	76899.69013529253
513 | Cell991	Batch1	Mystery celltype	54174.509374250156
514 | Cell993	Batch1	Exciting	58906.90399288473
515 | Cell995	Batch1	Dunno	81652.84927522646
516 | Cell1000	Batch1	Mystery celltype	45436.644641530926
517 | 


--------------------------------------------------------------------------------
/inst/extdata/sim_ref_gene_info.tab:
--------------------------------------------------------------------------------
  1 | Gene	BaseGeneMean
  2 | Gene1	15.070166941865256
  3 | Gene2	1.3214264125078057
  4 | Gene3	2.6471940552392117
  5 | Gene4	0.4314126274103263
  6 | Gene5	1.3263294447897767
  7 | Gene6	2.6556251736653076
  8 | Gene7	1.5087404287107207
  9 | Gene8	2.3980012018206116
 10 | Gene9	1.174393945899882
 11 | Gene10	0.16520007149995963
 12 | Gene11	0.6063222399660461
 13 | Gene12	1.1567203151084895
 14 | Gene13	0.5697996434879279
 15 | Gene14	5.910077587592439
 16 | Gene15	1.2584703860405693
 17 | Gene16	3.7312510321195247
 18 | Gene17	3.732506094739015
 19 | Gene18	0.9527541186413107
 20 | Gene19	0.8299824883603495
 21 | Gene20	5.419859401553251
 22 | Gene21	0.12596021483220884
 23 | Gene22	0.2838042849946762
 24 | Gene23	0.19182208419572352
 25 | Gene24	3.1676347186761196
 26 | Gene25	0.5341929920646847
 27 | Gene26	4.869121526664764
 28 | Gene27	2.6688329911337663
 29 | Gene28	0.8680950634977899
 30 | Gene29	1.6676711932219812
 31 | Gene30	0.7773442566244723
 32 | Gene31	0.13776280782261727
 33 | Gene32	1.62467446974602
 34 | Gene33	3.408362066133117
 35 | Gene34	0.1551462319196136
 36 | Gene35	1.3693638794774319
 37 | Gene36	6.539464980895205
 38 | Gene37	0.9702595159605595
 39 | Gene38	4.936128580990508e-4
 40 | Gene39	1.4130366607242324
 41 | Gene40	7.1535748703456825
 42 | Gene41	0.8945476522623658
 43 | Gene42	0.5154778965230534
 44 | Gene43	5.282330848092755
 45 | Gene44	3.53610188530163
 46 | Gene45	5.439591965553024
 47 | Gene46	0.5049124358836893
 48 | Gene47	1.4218930614187446
 49 | Gene48	0.3595905853302735
 50 | Gene49	5.119043134371494
 51 | Gene50	0.2502446473365546
 52 | Gene51	0.006118641284641165
 53 | Gene52	0.012327696958109266
 54 | Gene53	2.270533237587709
 55 | Gene54	0.2277377522940588
 56 | Gene55	0.24543799611272676
 57 | Gene56	1.0140785160835557
 58 | Gene57	3.2974078405543414
 59 | Gene58	3.1500104445427897
 60 | Gene59	0.7890490693371935
 61 | Gene60	6.322817718066329
 62 | Gene61	0.418082717549265
 63 | Gene62	2.034700826504123
 64 | Gene63	2.440291341133348
 65 | Gene64	0.002391602636955899
 66 | Gene65	0.6029098216167582
 67 | Gene66	3.3016014257423305
 68 | Gene67	0.7482611461409928
 69 | Gene68	0.12812044564547306
 70 | Gene69	0.35066175036892044
 71 | Gene70	0.1516936710876119
 72 | Gene71	0.009882651695215254
 73 | Gene72	0.31386242457573105
 74 | Gene73	0.29994401190711634
 75 | Gene74	1.4015772692111752
 76 | Gene75	1.2497823102933978
 77 | Gene76	0.5632321953622131
 78 | Gene77	1.6801835300877066
 79 | Gene78	0.2984888953070142
 80 | Gene79	1.180053288644517
 81 | Gene80	3.496870702720702
 82 | Gene81	3.031213248293015
 83 | Gene82	2.4544117966947323
 84 | Gene83	0.6426568584477319
 85 | Gene84	2.118998746932126
 86 | Gene85	5.998968724366211
 87 | Gene86	0.6370660786673482
 88 | Gene87	4.878501798012513
 89 | Gene88	4.236375736446719
 90 | Gene89	2.284970524462676
 91 | Gene90	0.9048320211852259
 92 | Gene91	3.328864243871868
 93 | Gene92	5.99485292531768
 94 | Gene93	0.5110956753927606
 95 | Gene94	0.667408242586006
 96 | Gene95	0.4217533489766145
 97 | Gene96	13.667521456964762
 98 | Gene97	12.94918139728537
 99 | Gene98	2.7655551566988636
100 | Gene99	0.026034366925850677
101 | Gene100	8.41958370917556
102 | Gene101	0.629002401311236
103 | Gene102	0.3984773552739324
104 | Gene103	6.648385010819624
105 | Gene104	0.52025440038014
106 | Gene105	0.6041210844214472
107 | Gene106	1.2171295700541882
108 | Gene107	2.1725070518596286
109 | Gene108	0.8655766682126647
110 | Gene109	0.20528408017518773
111 | Gene110	0.2871928791336786
112 | Gene111	0.6272521086349571
113 | Gene112	0.20938989155484852
114 | Gene113	1.1982525476674017
115 | Gene114	2.161009771439923
116 | Gene115	0.2108280997361641
117 | Gene116	5.490780408446104
118 | Gene117	7.901244927017148
119 | Gene118	1.6497365669481299
120 | Gene119	4.764385944692947
121 | Gene120	0.23214880715569838
122 | Gene121	1.826709920091376
123 | Gene122	2.222420903422962
124 | Gene123	0.28732215327512833
125 | Gene124	0.3270285059720184
126 | Gene125	0.028345690324059893
127 | Gene126	0.6385121429116922
128 | Gene127	0.45777438372310086
129 | Gene128	3.564006959063369
130 | Gene129	0.22257284891712545
131 | Gene130	3.06866579942385
132 | Gene131	0.010164450289338305
133 | Gene132	2.2159808283796263
134 | Gene133	0.18777347338931308
135 | Gene134	0.15308684636813338
136 | Gene135	5.029457080816111
137 | Gene136	0.19252884382655167
138 | Gene137	4.817288261266247
139 | Gene138	1.3571315484166406
140 | Gene139	0.9551584585099973
141 | Gene140	6.358966828719138
142 | Gene141	0.04758510724490084
143 | Gene142	0.5387642785968534
144 | Gene143	0.40903272802201124
145 | Gene144	0.9046274593654208
146 | Gene145	2.5300499331280255
147 | Gene146	0.007103927806247059
148 | Gene147	0.06617422376829078
149 | Gene148	1.1814835278920117
150 | Gene149	0.00730382422326564
151 | Gene150	4.523440167023822
152 | Gene151	5.0714787992616595
153 | Gene152	0.058381702597344695
154 | Gene153	9.849902477078747e-4
155 | Gene154	2.2714248681125615
156 | Gene155	2.8062016343972034
157 | Gene156	0.005019413599753972
158 | Gene157	1.1414874803343211
159 | Gene158	7.98467666157701
160 | Gene159	4.750130376606671
161 | Gene160	0.41449314009314947
162 | Gene161	0.15629966419234909
163 | Gene162	2.0033323934363065
164 | Gene163	1.9133958051043465
165 | Gene164	1.8693826892142416
166 | Gene165	4.264060971136903
167 | Gene166	0.9041637960610055
168 | Gene167	1.7028366821036134
169 | Gene168	0.11251934733987087
170 | Gene169	1.9744619054281058
171 | Gene170	0.7657225174288028
172 | Gene171	2.9666842911519877
173 | Gene172	2.5384890421122717
174 | Gene173	0.03553526322132183
175 | Gene174	0.17140160127119666
176 | Gene175	0.5375172760119603
177 | Gene176	1.1957388264762312
178 | Gene177	2.233141923375682
179 | Gene178	5.910669733366996
180 | Gene179	19.45900667463009
181 | Gene180	3.4749784094647818
182 | Gene181	1.813842633144611
183 | Gene182	3.875603753958344
184 | Gene183	0.36373824951414807
185 | Gene184	0.15860239608952753
186 | Gene185	1.5602047184804921
187 | Gene186	4.6993917033637205
188 | Gene187	8.789618789385647
189 | Gene188	0.7302572334579468
190 | Gene189	7.049027042557773
191 | Gene190	0.8125960912689993
192 | Gene191	0.43486651275110694
193 | Gene192	0.0038062829718254923
194 | Gene193	0.13273796373459007
195 | Gene194	0.5723144816254031
196 | Gene195	0.46429625203852326
197 | Gene196	4.781896261324305
198 | Gene197	0.0562914344264723
199 | Gene198	3.982487795926734
200 | Gene199	0.05075770320152484
201 | Gene200	1.329913146761545
202 | 


--------------------------------------------------------------------------------
/man/contrast_each_group_to_the_rest.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/contrasting_functions.r
 3 | \name{contrast_each_group_to_the_rest}
 4 | \alias{contrast_each_group_to_the_rest}
 5 | \title{contrast_each_group_to_the_rest}
 6 | \usage{
 7 | contrast_each_group_to_the_rest(dataset_se, dataset_name,
 8 |   groups2test = NA, num_cores = 1, n.group = Inf, n.other = n.group
 9 |   * 5, factors_to_rm = c())
10 | }
11 | \arguments{
12 | \item{dataset_se}{Summarised experiment object containing count data. Also
13 | requires 'ID' and 'group' to be set within the cell information
14 | (see \code{colData()})}
15 | 
16 | \item{dataset_name}{Short, meaningful name for this dataset/experiment.}
17 | 
18 | \item{groups2test}{An optional character vector specificing specific groups 
19 | to check. By default (set to NA), all groups will be tested.}
20 | 
21 | \item{num_cores}{Number of cores to use to run MAST jobs in parallel.
22 | Ignored if parallel package not available. Set to 1 to avoid
23 | parallelisation. Default = 1}
24 | 
25 | \item{n.group}{How many cells to keep for each group in groupwise 
26 | comparisons. Default = Inf}
27 | 
28 | \item{n.other}{How many cells to keep from everything not in the group.
29 | Default = \bold{n.group} * 5}
30 | 
31 | \item{factors_to_rm}{If there are extra confounding factors that should be
32 | removed from MAST's zlm model (e.g individual, run), specify the column name(s)  
33 | from colData in a vector here. Default=c().}
34 | }
35 | \value{
36 | A tibble the within-experiment de_table (differential expression 
37 | table). This is a core summary of the individual experiment/dataset, 
38 | which is used for the cross-dataset comparisons.
39 | 
40 | The table feilds won't neccesarily match across datasets, as they include
41 | cell annotations information. Important columns 
42 | (used in downstream analysis) are:
43 | 
44 | \describe{
45 | \item{ID}{Gene identifier}
46 | \item{ci_inner}{ Inner (conservative) 95\% confidence interval of 
47 |     log2 fold-change.}
48 | \item{fdr}{Multiple hypothesis corrected p-value (using BH/FDR method)}
49 | \item{group}{Cells from this group were compared to everything else}
50 | \item{sig_up}{Significnatly differentially expressed (fdr < 0.01), with a 
51 |      positive fold change?}
52 | \item{rank}{Rank position (within group), ranked by CI inner, highest to 
53 |     lowest. }
54 | \item{rescaled_rank}{Rank scaled 0(top most overrepresented genes in group) -
55 |     1(top most not-present genes)}
56 | \item{dataset}{Name of dataset/experiment}
57 | }
58 | }
59 | \description{
60 | Produces a table of within-experiment differential expression results (for
61 | either query or reference experiment), where each group (cluster) is
62 | compared to the rest of the cells.
63 | }
64 | \details{
65 | Note that this function is \emph{slow}, because it runs the differential
66 | expression. It only needs to be run once per dataset though (unless group 
67 | labels change). 
68 | Having package \pkg{parallel} installed is highly recomended.
69 | 
70 | If this function runs out of memory, consider specifying \emph{n.group} and
71 | \emph{n.other} to run on a subset of cells (taken from each group, 
72 | and proportionally from the rest for each test). 
73 | Alternatively use \emph{subset_cells_by_group} to subset \bold{dataset_se}
74 | for each group independantly.
75 | 
76 | Both reference and query datasets should be processed with this
77 | function.
78 | 
79 | The tables produced by this function (usually named something like
80 | \emph{de_table.datasetname}) contain summarised results of MAST results.
81 | Each group is compared versus cells in the group, versus not in the group,
82 | (Ie. always a 2-group contrast, other groups information is ignored). 
83 | As per MAST reccomendataions, the proportion of genes seen in each cell is 
84 | included in the model.
85 | }
86 | \examples{
87 | 
88 | de_table.demo_query  <- contrast_each_group_to_the_rest(
89 |      demo_query_se, "a_demo_query")
90 |      
91 | de_table.demo_ref    <- contrast_each_group_to_the_rest(
92 |      demo_ref_se, "a_demo_ref", num_cores=2)
93 | 
94 | 
95 | }
96 | 


--------------------------------------------------------------------------------
/man/contrast_each_group_to_the_rest_for_norm_ma_with_limma.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/contrasting_functions.r
  3 | \name{contrast_each_group_to_the_rest_for_norm_ma_with_limma}
  4 | \alias{contrast_each_group_to_the_rest_for_norm_ma_with_limma}
  5 | \title{contrast_each_group_to_the_rest_for_norm_ma_with_limma}
  6 | \usage{
  7 | 
  8 |   contrast_each_group_to_the_rest_for_norm_ma_with_limma(norm_expression_table,
  9 |   sample_sheet_table, dataset_name, sample_name, group_name = "group",
 10 |   groups2test = NA, extra_factor_name = NA, pval_threshold = 0.01)
 11 | }
 12 | \arguments{
 13 | \item{norm_expression_table}{A logged, normalised expression table. Any 
 14 | filtering (removal of low-expression probes/genes)}
 15 | 
 16 | \item{sample_sheet_table}{Tab-separated text file of sample information.
 17 | Columns must have names. Sample/microarray ids should be listed under 
 18 | \bold{sample_name} column. The cell-type (or 'group') of each sample should 
 19 | be listed under a column \bold{group_name}.}
 20 | 
 21 | \item{dataset_name}{Short, meaningful name for this dataset/experiment.}
 22 | 
 23 | \item{sample_name}{Name of \bold{sample_sheet_table} with sample ID}
 24 | 
 25 | \item{group_name}{Name of \bold{sample_sheet_table} with group/cell-type. 
 26 | Default = "group"}
 27 | 
 28 | \item{groups2test}{An optional character vector specificing specific groups 
 29 | to check. By default (set to NA), all groups will be tested.}
 30 | 
 31 | \item{extra_factor_name}{Optionally, an extra cross-group factor (as column 
 32 | name in \bold{sample_sheet_table}) to include in the model used by limma. 
 33 | E.g. An individual/mouse id. Refer limma docs. Default = NA}
 34 | 
 35 | \item{pval_threshold}{For reporting only, a p-value threshold. 
 36 | Default = 0.01}
 37 | }
 38 | \value{
 39 | A tibble, the within-experiment de_table (differential expression
 40 | table)
 41 | }
 42 | \description{
 43 | This function loads and processes microarray data (from purified cell 
 44 | populations) that can be used as a reference.
 45 | }
 46 | \details{
 47 | Sometimes there are microarray studies measureing purified cell populations 
 48 | that would be measured together in a single-cell sequenicng experiment. 
 49 | E.g. comparing PBMC scRNA to FACs-sorted blood cell populations. 
 50 | This function 
 51 | will process microarray data with limma and format it for comparisions.
 52 | 
 53 | The microarray data used should consist of purified cell types 
 54 | from /emph{one single study/experiment} (due to batch effects). 
 55 | Ideally just those cell-types expected in the 
 56 | scRNAseq, but the method appears relatively robust to a few extra cell 
 57 | types.
 58 | 
 59 | Note that unlike the single-cell workflow there are no summarisedExperiment 
 60 | objects (they're not really comparable) - this function reads data and 
 61 | generates a table of within-dataset differentential expression contrasts in 
 62 | one step. Ie. equivalent to the output of 
 63 | \code{\link{contrast_each_group_to_the_rest}}.
 64 | 
 65 | Also, note that while downstream functions can accept 
 66 | the microarray-derived data as query datasets, 
 67 | its not really intended and assumptions might not
 68 | hold (Generally, its known what got loaded onto a microarray!)
 69 | 
 70 | The (otherwise optional) 'limma' package must be installed to use this 
 71 | function.
 72 | }
 73 | \examples{
 74 | 
 75 | contrast_each_group_to_the_rest_for_norm_ma_with_limma(
 76 |     norm_expression_table=demo_microarray_expr, 
 77 |     sample_sheet_table=demo_microarray_sample_sheet,
 78 |     dataset_name="DemoSimMicroarrayRef", 
 79 |     sample_name="cell_sample", group_name="group") 
 80 |     
 81 | \dontrun{ 
 82 | contrast_each_group_to_the_rest_for_norm_ma_with_limma(
 83 |    norm_expression_table, sample_sheet_table=samples_table, 
 84 |    dataset_name="Watkins2009PBMCs", extra_factor_name='description')
 85 | }
 86 | 
 87 | 
 88 | }
 89 | \seealso{
 90 | \code{\link{contrast_each_group_to_the_rest}} is the 
 91 | funciton that makes comparable output on the scRNAseq data (dataset_se 
 92 | objects).
 93 | 
 94 | \href{https://bioconductor.org/packages/release/bioc/html/limma.html}{Limma} 
 95 | Limma package for differential expression.
 96 | 
 97 | Other Data loading functions: \code{\link{load_dataset_10Xdata}},
 98 |   \code{\link{load_se_from_tables}}
 99 | }
100 | \concept{Data loading functions}
101 | 


--------------------------------------------------------------------------------
/man/contrast_the_group_to_the_rest.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/contrasting_functions.r
 3 | \name{contrast_the_group_to_the_rest}
 4 | \alias{contrast_the_group_to_the_rest}
 5 | \title{contrast_the_group_to_the_rest}
 6 | \usage{
 7 | contrast_the_group_to_the_rest(dataset_se, the_group,
 8 |   pvalue_threshold = 0.01, n.group = Inf, n.other = n.group * 5,
 9 |   factors_to_rm = c())
10 | }
11 | \arguments{
12 | \item{dataset_se}{Datast summarisedExperiment object.}
13 | 
14 | \item{the_group}{group to test}
15 | 
16 | \item{pvalue_threshold}{Default = 0.01}
17 | 
18 | \item{n.group}{How many cells to keep for each group in groupwise 
19 | comparisons. Default = Inf}
20 | 
21 | \item{n.other}{How many cells to keep from everything not in the group.
22 | Default = \bold{n.group} * 5}
23 | 
24 | \item{factors_to_rm}{If there are extra confounding factors that should be
25 | removed from MAST's zlm model (e.g individual, run), specify the column name(s)  
26 | from colData in a vector here. Default=c().}
27 | }
28 | \value{
29 | A tibble, the within-experiment de_table (differential expression
30 | table), for the group specified.
31 | }
32 | \description{
33 | Internal function to calculate differential expression within an experiment
34 | between a specified group and cells not in that group.
35 | }
36 | \details{
37 | This function should only be called by 
38 | \code{contrast_each_group_to_the_rest}
39 | (which can be passed a single group name if desired). Else 'pofgenes' will
40 | not be defined.
41 | 
42 | MAST is supplied with log2(counts + 1.1), and zlm called with model
43 | '~ TvsR + pofgenes' . The p-values reported are from the hurdle model. FDR 
44 | is with default fdr/BH method.
45 | }
46 | \seealso{
47 | \code{\link{contrast_each_group_to_the_rest}}
48 | }
49 | 


--------------------------------------------------------------------------------
/man/contrast_the_group_to_the_rest_with_limma_for_microarray.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/contrasting_functions.r
 3 | \name{contrast_the_group_to_the_rest_with_limma_for_microarray}
 4 | \alias{contrast_the_group_to_the_rest_with_limma_for_microarray}
 5 | \title{contrast_the_group_to_the_rest_with_limma_for_microarray}
 6 | \usage{
 7 | 
 8 |   contrast_the_group_to_the_rest_with_limma_for_microarray(norm_expression_table,
 9 |   sample_sheet_table, the_group, sample_name, extra_factor_name = NA,
10 |   pval_threshold = 0.01)
11 | }
12 | \arguments{
13 | \item{norm_expression_table}{A logged, normalised expression table. Any 
14 | filtering (removal of low-expression probes/genes)}
15 | 
16 | \item{sample_sheet_table}{Tab-separated text file of sample information.
17 | Columns must have names. Sample/microarray ids should be listed under 
18 | \bold{sample_name} column. The cell-type (or 'group') of each sample should 
19 | be listed under a column \bold{group_name}.}
20 | 
21 | \item{the_group}{Which query group is being tested.}
22 | 
23 | \item{sample_name}{Name of \bold{sample_sheet_table} with sample ID}
24 | 
25 | \item{extra_factor_name}{Optionally, an extra cross-group factor (as column 
26 | name in \bold{sample_sheet_table}) to include in the model used by limma. 
27 | E.g. An  individual/mouse id. Refer limma docs. Default = NA}
28 | 
29 | \item{pval_threshold}{For reporting only, a p-value threshold. Default = 0.01}
30 | }
31 | \value{
32 | A tibble, the within-experiment de_table (differential expression
33 | table), for the group specified.
34 | }
35 | \description{
36 | Private function used by 
37 | contrast_each_group_to_the_rest_for_norm_ma_with_limma
38 | }
39 | \seealso{
40 | \code{\link{contrast_each_group_to_the_rest_for_norm_ma_with_limma}}
41 | public calling function
42 | 
43 | \href{https://bioconductor.org/packages/release/bioc/html/limma.html}{Limma}
44 | Limma package for differential expression.
45 | }
46 | 


--------------------------------------------------------------------------------
/man/convert_se_gene_ids.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/loading_helper_functions.r
 3 | \name{convert_se_gene_ids}
 4 | \alias{convert_se_gene_ids}
 5 | \title{convert_se_gene_ids}
 6 | \usage{
 7 | convert_se_gene_ids(dataset_se, new_id, eval_col, find_max = TRUE)
 8 | }
 9 | \arguments{
10 | \item{dataset_se}{Summarised experiment object containing count data. Also
11 | requires 'ID' and 'group' to be set within the cell information
12 | (see \code{colData()})}
13 | 
14 | \item{new_id}{A column within the feature information (view 
15 | \code{colData(dataset_se)})) of the \bold{dataset_se}, which will become
16 | the new ID column. Non-uniqueness of this column is handled gracefully! 
17 | Any \emph{NAs} will be dropped.}
18 | 
19 | \item{eval_col}{Which column to use to break ties of duplicate 
20 | \bold{new_id}. Must be a column within the feature information (view 
21 | \code{colData(dataset_se)})) of the \bold{dataset_se}. Total reads per gene
22 | feature is a good choice.}
23 | 
24 | \item{find_max}{If false, this will choose the minimal \bold{eval_col} 
25 | instead of max. Default = TRUE}
26 | }
27 | \value{
28 | A modified dataset_se - ID will now be \bold{new_id}, and unique. 
29 | It will have fewer genes if old ID to new ID was not a 1:1 mapping. 
30 | The selected genes will be according to the eval col max (or min). 
31 | \emph{should} pick the alphabetical first on ties, but could change.
32 | }
33 | \description{
34 | Change the gene IDs in in the supplied datatset_se object to some other id 
35 | already present in the gene info (as seen with \code{rowData()})
36 | }
37 | \examples{
38 | 
39 | # The demo dataset doesn't have other names, so make some up 
40 | # (don't do this)
41 | dataset_se <- demo_ref_se
42 | rowData(dataset_se)$dummyname <- toupper(rowData(dataset_se)$ID)
43 | 
44 | # If not already present, define a column to evaluate, 
45 | # typically total reads/gene.
46 | rowData(dataset_se)$total_count <- rowSums(assay(dataset_se))
47 | 
48 | dataset_se <- convert_se_gene_ids(dataset_se, new_id='dummyname', eval_col='total_count') 
49 | 
50 | }
51 | \seealso{
52 | \href{https://bioconductor.org/packages/release/bioc/html/SummarizedExperiment.html}{SummarizedExperiment} 
53 | For general doco on the SummarizedExperiment objects.
54 | 
55 | \code{\link{load_se_from_files}} For reading data from flat 
56 | files (not 10X cellRanger output)
57 | }
58 | 


--------------------------------------------------------------------------------
/man/de_table.demo_query.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.r
 3 | \docType{data}
 4 | \name{de_table.demo_query}
 5 | \alias{de_table.demo_query}
 6 | \title{Demo query de table}
 7 | \format{An object of class \code{data.frame} with 800 rows and 13 columns.}
 8 | \usage{
 9 | de_table.demo_query
10 | }
11 | \value{
12 | An example de_table from 
13 | \link{contrast_each_group_to_the_rest} (for demo query dataset)
14 | }
15 | \description{
16 | Small example dataset that is the output of
17 | \link{contrast_each_group_to_the_rest}. It contains the results
18 | of each group compared to the rest of the sample (ie within sample 
19 | differential expression)
20 | }
21 | \keyword{datasets}
22 | 


--------------------------------------------------------------------------------
/man/de_table.demo_ref.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.r
 3 | \docType{data}
 4 | \name{de_table.demo_ref}
 5 | \alias{de_table.demo_ref}
 6 | \title{Demo ref de table}
 7 | \format{An object of class \code{data.frame} with 800 rows and 13 columns.}
 8 | \usage{
 9 | de_table.demo_ref
10 | }
11 | \value{
12 | An example de_table from 
13 | \link{contrast_each_group_to_the_rest} (for demo ref dataset)
14 | }
15 | \description{
16 | Small example dataset that is the output of
17 | \link{contrast_each_group_to_the_rest}. It contains the results
18 | of each group compared to the rest of the sample (ie within sample 
19 | differential expression)
20 | }
21 | \keyword{datasets}
22 | 


--------------------------------------------------------------------------------
/man/demo_cell_info_table.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.r
 3 | \docType{data}
 4 | \name{demo_cell_info_table}
 5 | \alias{demo_cell_info_table}
 6 | \title{Demo cell info table}
 7 | \format{An object of class \code{data.frame} with 515 rows and 4 columns.}
 8 | \usage{
 9 | demo_cell_info_table
10 | }
11 | \value{
12 | An example cell info table
13 | }
14 | \description{
15 | Sample sheet table listing each cell, its assignd cluster/group, and 
16 | any other information that might be interesting (replicate, individual e.t.c)
17 | }
18 | \keyword{datasets}
19 | 


--------------------------------------------------------------------------------
/man/demo_counts_matrix.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.r
 3 | \docType{data}
 4 | \name{demo_counts_matrix}
 5 | \alias{demo_counts_matrix}
 6 | \title{Demo count matrix}
 7 | \format{An object of class \code{matrix} with 200 rows and 514 columns.}
 8 | \usage{
 9 | demo_counts_matrix
10 | }
11 | \value{
12 | An example counts matrix.
13 | }
14 | \description{
15 | Counts matrix for a small, demo example datasets. Raw counts of 
16 | reads per gene (row) per cell (column).
17 | }
18 | \keyword{datasets}
19 | 


--------------------------------------------------------------------------------
/man/demo_gene_info_table.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.r
 3 | \docType{data}
 4 | \name{demo_gene_info_table}
 5 | \alias{demo_gene_info_table}
 6 | \title{Demo gene info table}
 7 | \format{An object of class \code{data.frame} with 200 rows and 2 columns.}
 8 | \usage{
 9 | demo_gene_info_table
10 | }
11 | \value{
12 | An example table of genes.
13 | }
14 | \description{
15 | Extra table of gene-level information for the demo example dataset.
16 | Can contain anything as long as theres a unique gene id.
17 | }
18 | \keyword{datasets}
19 | 


--------------------------------------------------------------------------------
/man/demo_microarray_expr.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.r
 3 | \docType{data}
 4 | \name{demo_microarray_expr}
 5 | \alias{demo_microarray_expr}
 6 | \title{Demo microarray expression table}
 7 | \format{An object of class \code{matrix} with 200 rows and 20 columns.}
 8 | \usage{
 9 | demo_microarray_expr
10 | }
11 | \value{
12 | An example table of (fake) microarray data.
13 | }
14 | \description{
15 | Microarray-style expression table for the demo example dataset. 
16 | Rows are genes, columns are samples, as per counts matrix.
17 | }
18 | \keyword{datasets}
19 | 


--------------------------------------------------------------------------------
/man/demo_microarray_sample_sheet.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.r
 3 | \docType{data}
 4 | \name{demo_microarray_sample_sheet}
 5 | \alias{demo_microarray_sample_sheet}
 6 | \title{Demo microarray sample sheet table}
 7 | \format{An object of class \code{grouped_df} (inherits from \code{tbl_df}, \code{tbl}, \code{data.frame}) with 20 rows and 2 columns.}
 8 | \usage{
 9 | demo_microarray_sample_sheet
10 | }
11 | \value{
12 | An example microarray sample sheet
13 | }
14 | \description{
15 | Microarray sample sheet table for the demo example dataset. 
16 | Contains array identifiers, their group and any other information that could
17 | be useful.
18 | }
19 | \keyword{datasets}
20 | 


--------------------------------------------------------------------------------
/man/demo_query_se.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.r
 3 | \docType{data}
 4 | \name{demo_query_se}
 5 | \alias{demo_query_se}
 6 | \title{Demo query se (summarizedExperiment)}
 7 | \format{An object of class \code{SummarizedExperiment} with 200 rows and 485 columns.}
 8 | \usage{
 9 | demo_query_se
10 | }
11 | \value{
12 | An example summarised experiment (for demo query dataset)
13 | }
14 | \description{
15 | A summarisedExperiment object loaded from demo info tables, for a query set.
16 | }
17 | \keyword{datasets}
18 | 


--------------------------------------------------------------------------------
/man/demo_ref_se.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.r
 3 | \docType{data}
 4 | \name{demo_ref_se}
 5 | \alias{demo_ref_se}
 6 | \title{Demo reference se (summarizedExperiment)}
 7 | \format{An object of class \code{SummarizedExperiment} with 200 rows and 515 columns.}
 8 | \usage{
 9 | demo_ref_se
10 | }
11 | \value{
12 | An example summarised experiment (for demo reference dataset)
13 | }
14 | \description{
15 | A summarisedExperiment object loaded from demo info tables, for a reference
16 | set.
17 | }
18 | \keyword{datasets}
19 | 


--------------------------------------------------------------------------------
/man/find_within_match_differences.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/group_labelling_functions.r
 3 | \name{find_within_match_differences}
 4 | \alias{find_within_match_differences}
 5 | \title{find_within_match_differences}
 6 | \usage{
 7 | find_within_match_differences(de_table.ref.marked, matches, the_test_group,
 8 |   the_test_dataset, the_ref_dataset, the_pval)
 9 | }
10 | \arguments{
11 | \item{de_table.ref.marked}{see make_ref_similarity_names_for_group}
12 | 
13 | \item{matches}{see make_ref_similarity_names_for_group}
14 | 
15 | \item{the_test_group}{see make_ref_similarity_names_for_group}
16 | 
17 | \item{the_test_dataset}{see make_ref_similarity_names_for_group}
18 | 
19 | \item{the_ref_dataset}{see make_ref_similarity_names_for_group}
20 | 
21 | \item{the_pval}{see make_ref_similarity_names_for_group}
22 | }
23 | \value{
24 | String of within match differences
25 | }
26 | \description{
27 | Internal function to find if there are significant difference between the
28 | distribitions, when there are multiple match groups.
29 | }
30 | \details{
31 | For use by make_ref_similarity_names_for_group
32 | }
33 | \seealso{
34 | \code{\link{make_ref_similarity_names_for_group}}
35 | }
36 | 


--------------------------------------------------------------------------------
/man/get_counts_index.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/contrasting_functions.r
 3 | \name{get_counts_index}
 4 | \alias{get_counts_index}
 5 | \title{get_counts_index}
 6 | \usage{
 7 | get_counts_index(n_assays, assay_names)
 8 | }
 9 | \arguments{
10 | \item{n_assays}{How many assays are there? ie: length(assays(dataset_se))}
11 | 
12 | \item{assay_names}{What are the assays called? ie: names(assays(dataset_se))}
13 | }
14 | \value{
15 | The index of an assay in assays called 'counts', or, if there's just
16 |      the one unnamed assay - happily assume that that is counts.
17 | }
18 | \description{
19 | \code{get_counts_index} is an internal utility function to find out where 
20 | the counts are (if anywhere.). Stops if there's no assay called 'counts',
21 | (unless there is only a single unnamed assay).
22 | }
23 | 


--------------------------------------------------------------------------------
/man/get_inner_or_outer_ci.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/contrasting_functions.r
 3 | \name{get_inner_or_outer_ci}
 4 | \alias{get_inner_or_outer_ci}
 5 | \title{get_inner_or_outer_ci}
 6 | \usage{
 7 | get_inner_or_outer_ci(fc, ci.hi, ci.lo, get_inner = TRUE)
 8 | }
 9 | \arguments{
10 | \item{fc}{Fold-change}
11 | 
12 | \item{ci.hi}{Higher fold-change CI (numerically)}
13 | 
14 | \item{ci.lo}{smaller fold-change CI (numerically)}
15 | 
16 | \item{get_inner}{If TRUE, get the more conservative inner CI, else the 
17 | bigger outside one.}
18 | }
19 | \value{
20 | inner or outer CI from \bold{ci.hi} or \bold{ci.low}
21 | }
22 | \description{
23 | Given a fold-change, and high and low confidence interval (where lower <
24 | higher), pick the innermost/most conservative one.
25 | }
26 | 


--------------------------------------------------------------------------------
/man/get_limma_top_table_with_ci.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/contrasting_functions.r
 3 | \name{get_limma_top_table_with_ci}
 4 | \alias{get_limma_top_table_with_ci}
 5 | \title{get_limma_top_table_with_ci}
 6 | \usage{
 7 | get_limma_top_table_with_ci(fit2, the_coef, ci = 0.95)
 8 | }
 9 | \arguments{
10 | \item{fit2}{The fit2 object after calling eBayes as per standard limma 
11 | workflow. Ie object that topTable gets called on.}
12 | 
13 | \item{the_coef}{Coeffient. As passed to topTable.}
14 | 
15 | \item{ci}{Confidence interval. Number between 0 and 1, default 0.95 (95\%)}
16 | }
17 | \value{
18 | Output of topTable, but with the (95%) confidence interval reported 
19 | for the logFC.
20 | }
21 | \description{
22 | Internal function that wraps limma topTable output but also adds upper and 
23 | lower confidence intervals to the logFC. Calculated according to 
24 | \url{https://support.bioconductor.org/p/36108/}
25 | }
26 | \seealso{
27 | \code{\link{contrast_the_group_to_the_rest_with_limma_for_microarray}} 
28 | Calling function.
29 | }
30 | 


--------------------------------------------------------------------------------
/man/get_matched_stepped_mwtest_res_table.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/group_labelling_functions.r
 3 | \name{get_matched_stepped_mwtest_res_table}
 4 | \alias{get_matched_stepped_mwtest_res_table}
 5 | \title{get_matched_stepped_mwtest_res_table}
 6 | \usage{
 7 | get_matched_stepped_mwtest_res_table(mwtest_res_table.this, the_pval)
 8 | }
 9 | \arguments{
10 | \item{mwtest_res_table.this}{Combined output of 
11 | \code{\link{get_ranking_and_test_results}}}
12 | 
13 | \item{the_pval}{Pvalue threshold}
14 | }
15 | \value{
16 | Stepped pvalues string
17 | }
18 | \description{
19 | Internal function to grab a table of the matched group(s).
20 | }
21 | \details{
22 | For use by make_ref_similarity_names_for_group
23 | }
24 | \seealso{
25 | \code{\link{make_ref_similarity_names_for_group}}
26 | }
27 | 


--------------------------------------------------------------------------------
/man/get_ranking_and_test_results.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/group_labelling_functions.r
 3 | \name{get_ranking_and_test_results}
 4 | \alias{get_ranking_and_test_results}
 5 | \title{get_ranking_and_test_results}
 6 | \usage{
 7 | get_ranking_and_test_results(de_table.ref.marked, the_test_group,
 8 |   the_test_dataset, the_ref_dataset, num_steps, pval = 0.01)
 9 | }
10 | \arguments{
11 | \item{de_table.ref.marked}{see 
12 | \link{make_ref_similarity_names_using_marked}}
13 | 
14 | \item{the_test_group}{The group to calculate the stats on.}
15 | 
16 | \item{the_test_dataset}{see 
17 | \link{make_ref_similarity_names_using_marked}}
18 | 
19 | \item{the_ref_dataset}{see 
20 | \link{make_ref_similarity_names_using_marked}}
21 | 
22 | \item{num_steps}{see 
23 | \link{make_ref_similarity_names_using_marked}}
24 | 
25 | \item{pval}{see 
26 | \link{make_ref_similarity_names_using_marked}}
27 | }
28 | \value{
29 | Table of similarity contrast results/assigned names 
30 | e.t.c for a single group. 
31 | Used internally for populating mwtest_res_table tables.
32 | }
33 | \description{
34 | Internal function to get reference group similarity contrasts for an 
35 | individual query qroup.
36 | }
37 | \details{
38 | For use by \bold{make_ref_similarity_names_using_marked}, see that function 
39 | for parameter details.
40 | This function just runs this for a single query group \bold{the_test_group}
41 | }
42 | \seealso{
43 | \code{\link{make_ref_similarity_names_using_marked}} 
44 | which calls this.
45 | }
46 | 


--------------------------------------------------------------------------------
/man/get_rankstat_table.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/group_labelling_functions.r
 3 | \name{get_rankstat_table}
 4 | \alias{get_rankstat_table}
 5 | \title{get_rankstat_table}
 6 | \usage{
 7 | get_rankstat_table(de_table.ref.marked, the_test_group)
 8 | }
 9 | \arguments{
10 | \item{de_table.ref.marked}{The output of 
11 | \code{\link{get_the_up_genes_for_all_possible_groups}} for the contrast 
12 | of interest.}
13 | 
14 | \item{the_test_group}{Name of query group to test}
15 | }
16 | \value{
17 | A tibble of query group name (test_group), 
18 | number of 'top' genes (n), 
19 | reference dataset group (group) with its ranking (grouprank) and the median 
20 | (rescaled 0..1) ranking of 'top' genes (median_rank).
21 | }
22 | \description{
23 | Summarise the comparison of the specified query group against in the 
24 | comparison in \bold{de_table.ref.marked} - number of 'top' genes and their 
25 | median rank in each of the reference groups, with reference group rankings.
26 | }
27 | \examples{
28 | 
29 | # Make input
30 | # de_table.demo_query <- contrast_each_group_to_the_rest(demo_query_se, "demo_query")
31 | # de_table.demo_ref   <- contrast_each_group_to_the_rest(demo_ref_se,   "demo_ref")
32 | 
33 | de_table.marked.query_vs_ref <- get_the_up_genes_for_all_possible_groups(
34 |     de_table.demo_query, 
35 |     de_table.demo_ref)
36 | 
37 | get_rankstat_table(de_table.marked.query_vs_ref, "Group3")
38 | 
39 | }
40 | \seealso{
41 | \code{\link{get_the_up_genes_for_all_possible_groups}} To 
42 | prepare the \bold{de_table.ref.marked} input.
43 | }
44 | 


--------------------------------------------------------------------------------
/man/get_reciprocal_matches.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/group_labelling_functions.r
 3 | \name{get_reciprocal_matches}
 4 | \alias{get_reciprocal_matches}
 5 | \title{get_reciprocal_matches}
 6 | \usage{
 7 | get_reciprocal_matches(mwtest_res_table.recip, de_table.recip.marked,
 8 |   the_pval)
 9 | }
10 | \arguments{
11 | \item{mwtest_res_table.recip}{Combined output of 
12 | \code{\link{get_ranking_and_test_results}} for reciprocal test - 
13 | ref vs query.}
14 | 
15 | \item{de_table.recip.marked}{Recriprocal ref vs query de_table.ref.marked}
16 | 
17 | \item{the_pval}{See make_ref_similarity_names_using_marked}
18 | }
19 | \value{
20 | List of table of reciprocal matches tested from reference to query.
21 | }
22 | \description{
23 | Internal function to run a bionomial test of 
24 | median test rank > 0.5 (random).
25 | }
26 | \details{
27 | For use by make_ref_similarity_names_using_marked
28 | }
29 | \seealso{
30 | \code{\link{make_ref_similarity_names_using_marked}}
31 | }
32 | 


--------------------------------------------------------------------------------
/man/get_stepped_pvals_str.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/group_labelling_functions.r
 3 | \name{get_stepped_pvals_str}
 4 | \alias{get_stepped_pvals_str}
 5 | \title{get_stepped_pvals_str}
 6 | \usage{
 7 | get_stepped_pvals_str(mwtest_res_table.this)
 8 | }
 9 | \arguments{
10 | \item{mwtest_res_table.this}{Combined output of 
11 | \code{\link{get_ranking_and_test_results}}}
12 | }
13 | \value{
14 | Stepped pvalues string
15 | }
16 | \description{
17 | Internal function to construct the string of stepped pvalues reported by 
18 | make_ref_similarity_names_using_marked
19 | }
20 | \details{
21 | For use by make_ref_similarity_names_for_group
22 | }
23 | \seealso{
24 | \code{\link{make_ref_similarity_names_for_group}}
25 | }
26 | 


--------------------------------------------------------------------------------
/man/get_the_up_genes_for_all_possible_groups.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/contrasting_functions.r
 3 | \name{get_the_up_genes_for_all_possible_groups}
 4 | \alias{get_the_up_genes_for_all_possible_groups}
 5 | \title{get_the_up_genes_for_all_possible_groups}
 6 | \usage{
 7 | get_the_up_genes_for_all_possible_groups(de_table.test, de_table.ref,
 8 |   rankmetric = "TOP100_LOWER_CI_GTE1", n = 100)
 9 | }
10 | \arguments{
11 | \item{de_table.test}{A differential expression table of the query 
12 | experiment, as generated from 
13 | \code{\link{contrast_each_group_to_the_rest}}}
14 | 
15 | \item{de_table.ref}{A differential expression table of the reference 
16 | dataset, as generated from 
17 | \code{\link{contrast_each_group_to_the_rest}}}
18 | 
19 | \item{rankmetric}{Specifiy ranking method used to pick the
20 | 'top' genes. The default 'TOP100_LOWER_CI_GTE1' picks genes from the top 100
21 | overrepresented genes (ranked by inner 95% confidence interval) - appears to 
22 | work best for distinct cell types (e.g. tissue sample.). 'TOP100_SIG' again 
23 | picks from the top 100 ranked genes, but requires only statistical 
24 | significance, 95% CI threshold - may perform better on more similar cell 
25 | clusters (e.g. PBMCs).}
26 | 
27 | \item{n}{For tweaking maximum returned genes from different ranking methods.
28 | Will change the p-values! Suggest leaving as default unless you're keen.}
29 | }
30 | \value{
31 | \emph{de_table.marked} This will alsmost be a subset of 
32 | \bold{de_table.ref}, 
33 | with an added column \emph{test_group} set to the query groups, and 
34 | \emph{test_dataset} set to \bold{test_dataset_name}.
35 | 
36 | If nothing passes the rankmetric criteria, a warning is thrown and NA is 
37 | returned. (This can be a genuine inability to pick out the 
38 | representative 'up' genes, or due to some problem in the analysis)
39 | }
40 | \description{
41 | For the most overrepresented genes of each group in the test 
42 | dataset, get their rankings in all the groups of the reference dataset.
43 | }
44 | \details{
45 | This is effectively a subset of the reference data, 'marked' with the 'top'
46 | genes that represent the groups in the query data. The 
47 | distribution of the \emph{rescaled ranks} of these marked genes in each 
48 | reference data group indicate how similar they are to the query group.
49 | 
50 | This function is simply a conveinent wrapper for 
51 | \code{\link{get_the_up_genes_for_group}} that merges output for 
52 | each group in the query into one table.
53 | }
54 | \examples{
55 | de_table.marked.query_vs_ref <- get_the_up_genes_for_all_possible_groups(
56 |    de_table.test=de_table.demo_query ,
57 |    de_table.ref=de_table.demo_ref )
58 | 
59 | }
60 | \seealso{
61 | \code{\link{get_the_up_genes_for_group}} Function for 
62 | testing a single group.
63 | }
64 | 


--------------------------------------------------------------------------------
/man/get_the_up_genes_for_group.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/contrasting_functions.r
 3 | \name{get_the_up_genes_for_group}
 4 | \alias{get_the_up_genes_for_group}
 5 | \title{get_the_up_genes_for_group}
 6 | \usage{
 7 | get_the_up_genes_for_group(the_group, de_table.test, de_table.ref,
 8 |   rankmetric = "TOP100_LOWER_CI_GTE1", n = 100)
 9 | }
10 | \arguments{
11 | \item{the_group}{The group (from the test/query experiment) to examine.}
12 | 
13 | \item{de_table.test}{A differential expression table of the query 
14 | experiment, as generated from 
15 | \code{\link{contrast_each_group_to_the_rest}}}
16 | 
17 | \item{de_table.ref}{A differential expression table of the reference 
18 | dataset, as generated from 
19 | \code{\link{contrast_each_group_to_the_rest}}}
20 | 
21 | \item{rankmetric}{Specifiy ranking method used to pick the
22 | 'top' genes. The default 'TOP100_LOWER_CI_GTE1' picks genes from the top 100
23 | overrepresented genes (ranked by inner 95% confidence interval) - appears to 
24 | work best for distinct cell types (e.g. tissue sample.). 'TOP100_SIG' again 
25 | picks from the top 100 ranked genes, but requires only statistical 
26 | significance, 95% CI threshold - may perform better on more similar cell 
27 | clusters (e.g. PBMCs).}
28 | 
29 | \item{n}{For tweaking maximum returned genes from different ranking methods.
30 | Will change the p-values! Suggest leaving as default unless you're keen.}
31 | }
32 | \value{
33 | \emph{de_table.marked} This will be a subset of 
34 | \bold{de_table.ref}, with an added column \emph{test_group} set to 
35 | \bold{the_group}. If nothing passes the rankmetric criteria, NA.
36 | }
37 | \description{
38 | For the most overrepresented genes of the specified group in the test 
39 | dataset, get their rankings in all the groups of the reference dataset.
40 | }
41 | \details{
42 | This is effectively a subset of the reference data, 'marked' with the 'top'
43 | genes that represent the group of interest in the query data. The 
44 | distribution of the \emph{rescaled ranks} of these marked genes in each 
45 | reference data group indicate how similar they are to the query group.
46 | }
47 | \examples{
48 | de_table.marked.Group3vsRef <- get_the_up_genes_for_group(
49 |                                   the_group="Group3",
50 |                                   de_table.test=de_table.demo_query, 
51 |                                   de_table.ref=de_table.demo_ref)
52 | 
53 | }
54 | \seealso{
55 | \code{\link{contrast_each_group_to_the_rest}} For prepraring the 
56 | de_table.* tables.
57 | \code{\link{get_the_up_genes_for_all_possible_groups}} For running 
58 | all query groups at once.
59 | }
60 | 


--------------------------------------------------------------------------------
/man/get_vs_random_pval.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/group_labelling_functions.r
 3 | \name{get_vs_random_pval}
 4 | \alias{get_vs_random_pval}
 5 | \title{get_vs_random_pval}
 6 | \usage{
 7 | get_vs_random_pval(de_table.ref.marked, the_group, the_test_group)
 8 | }
 9 | \arguments{
10 | \item{de_table.ref.marked}{see make_ref_similarity_names_for_group}
11 | 
12 | \item{the_group}{Reference group name}
13 | 
14 | \item{the_test_group}{Test group name
15 | #'}
16 | }
17 | \value{
18 | Pvalue result of a binomial test of each 'top gene' being greater 
19 | than the theoretical random median rank of 0.5 (halfway).
20 | }
21 | \description{
22 | Internal function to run a bionomial test of 
23 | median test rank > 0.5 (random).
24 | }
25 | \details{
26 | For use by make_ref_similarity_names_for_group
27 | }
28 | \seealso{
29 | \code{\link{make_ref_similarity_names_for_group}}
30 | }
31 | 


--------------------------------------------------------------------------------
/man/load_dataset_10Xdata.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/loading_helper_functions.r
 3 | \name{load_dataset_10Xdata}
 4 | \alias{load_dataset_10Xdata}
 5 | \title{load_dataset_10Xdata}
 6 | \usage{
 7 | load_dataset_10Xdata(dataset_path, dataset_genome, clustering_set,
 8 |   gene_id_cols_10X = c("ensembl_ID", "GeneSymbol"),
 9 |   id_to_use = gene_id_cols_10X[1])
10 | }
11 | \arguments{
12 | \item{dataset_path}{Path to the directory of 10X data, as generated by the 
13 | cellRanger pipeline (versions 2.1.0 and 2.0.1). The directory should have 
14 | subdirecotires \emph{analysis}, \emph{filtered_gene_bc_matrices} and
15 | \emph{raw_gene_bc_matrices} (only the first 2 are read).}
16 | 
17 | \item{dataset_genome}{The genome that the reads were aligned against, 
18 | e.g. GRCh38.  Check for this as a directory name under the 
19 | \emph{filtered_gene_bc_matrices} subdirectory if unsure.}
20 | 
21 | \item{clustering_set}{The 10X cellRanger pipeline produces several 
22 | different  cluster definitions per dataset. Specify which one to use e.g. 
23 | kmeans_10_clusters Find them as directory names under 
24 | \emph{analysis/clustering/}}
25 | 
26 | \item{gene_id_cols_10X}{Vector of the names of the columns in the gene 
27 | description file (\emph{filtered_gene_bc_matrices/GRCh38/genes.csv}). The 
28 | first element of this will become the ID. 
29 | Default = c("ensembl_ID","GeneSymbol")}
30 | 
31 | \item{id_to_use}{Column from \bold{gene_id_cols_10X} that defines the gene 
32 | identifier to use as 'ID' in the returned SummarisedExperiment object.
33 | Many-to-one relationships betwen the assumed unique first element of 
34 | \bold{gene_id_cols_10X} and \bold{id_to_use} will be handled gracefully by 
35 | \code{\link{convert_se_gene_ids}}. 
36 | Defaults to first element of \bold{gene_id_cols_10X}}
37 | }
38 | \value{
39 | A SummarisedExperiment object containing the count data, cell info
40 | and gene info.
41 | }
42 | \description{
43 | Convenience function to create a SummarizedExperiment object (dataset_se) 
44 | from a the output of 10X cell ranger pipeline run.
45 | }
46 | \details{
47 | This function makes a SummarizedExperiment object in a form that
48 | should work for celaref functions. Specifically, that means it will have an
49 | 'ID' feild for genes (view with \code{rowData(dataset_se)}), and both
50 | 'cell_sample' and 'group' feild for cells (view with
51 | \code{colData(dataset_se)}). See parameters for detail.
52 | Additionally, the counts will be an integer matrix (not a
53 | sparse matrix), and the \emph{group} feild (but not \emph{cell_sample}
54 | or \emph{ID}) will be a factor.
55 | 
56 | The clustering information can be read from whichever cluster is specified,
57 | usually there will be several choices.
58 | 
59 | This funciton is designed to work with output of version 2.0.1 of the 
60 | cellRanger pipeline, may not work with others (will not work for 1.x).
61 | }
62 | \examples{
63 | example_10X_dir <- system.file("extdata", "sim_cr_dataset", package = "celaref")
64 | dataset_se <- load_dataset_10Xdata(example_10X_dir, dataset_genome="GRCh38", 
65 |     clustering_set="kmeans_4_clusters", gene_id_cols_10X=c("gene")) 
66 | 
67 | \dontrun{
68 | dataset_se <- load_dataset_10Xdata('~/path/to/data/10X_pbmc4k', 
69 |     dataset_genome="GRCh38", 
70 |     clustering_set="kmeans_7_clusters") 
71 | } 
72 | 
73 | }
74 | \seealso{
75 | \href{https://bioconductor.org/packages/release/bioc/html/SummarizedExperiment.html}{SummarizedExperiment} 
76 | For general doco on the SummarizedExperiment objects.
77 | 
78 | \code{\link{convert_se_gene_ids}} describes method for 
79 | converting IDs.
80 | 
81 | Other Data loading functions: \code{\link{contrast_each_group_to_the_rest_for_norm_ma_with_limma}},
82 |   \code{\link{load_se_from_tables}}
83 | }
84 | \concept{Data loading functions}
85 | 


--------------------------------------------------------------------------------
/man/load_se_from_tables.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/loading_helper_functions.r
  3 | \name{load_se_from_tables}
  4 | \alias{load_se_from_tables}
  5 | \alias{load_se_from_files}
  6 | \title{load_se_from_tables}
  7 | \usage{
  8 | load_se_from_tables(counts_matrix, cell_info_table, gene_info_table = NA,
  9 |   group_col_name = "group", cell_col_name = NA)
 10 | 
 11 | load_se_from_files(counts_file, cell_info_file, gene_info_file = NA,
 12 |   group_col_name = "group", cell_col_name = NA)
 13 | }
 14 | \arguments{
 15 | \item{counts_matrix}{A tab-separated matrix of read counts for each gene
 16 | (row) and each cell (column). Columns and rows should be named.}
 17 | 
 18 | \item{cell_info_table}{Table of cell information. 
 19 | If there is a column labelled
 20 | \emph{cell_sample}, that will be used as the unique cell identifiers. 
 21 | If not, the first column is assumed to be cell identifiers, and will be 
 22 | copied to a new feild labelled \emph{cell_sample}.
 23 | Similarly - the clusters of these cells should be listed in one column -
 24 | which can be called 'group' (case-sensitive) or specified with
 25 | \bold{group_col_name}. \emph{Minimal data format: <cell_sample> <group>}}
 26 | 
 27 | \item{gene_info_table}{Optional table of gene information. If there is a
 28 | column labelled
 29 | \emph{ID}, that will be used as the gene identifiers (they must be unique!).
 30 | If not, the first column is assumed to be a gene identifier, and will be 
 31 | copied to a
 32 | new feild labelled \emph{ID}. Must match all rownames in 
 33 | \bold{counts_matrix}.
 34 | If omitted, ID wll be generated from the rownames of counts_matrix. 
 35 | Default=NA}
 36 | 
 37 | \item{group_col_name}{Name of the column in \bold{cell_info_table} 
 38 | containing
 39 | the cluster/group that each cell belongs to. Case-sensitive. Default='group'}
 40 | 
 41 | \item{cell_col_name}{Name of the column in \bold{cell_info_table} containing
 42 | a cell id. Ignored if \emph{cell_sample} column is already present. 
 43 | If omitted, (and no \emph{cell_sample} column) will use first column.
 44 | Case-sensitive. Default=NA}
 45 | 
 46 | \item{counts_file}{A tab-separated file of a matrix of read counts. As per 
 47 | \bold{counts_matrix}. First column should be gene ID, and top row cell ids.}
 48 | 
 49 | \item{cell_info_file}{Tab-separated text file of cell information, as per
 50 | \bold{cell_info_table}. Columns must have names.}
 51 | 
 52 | \item{gene_info_file}{Optional tab-separated text file of gene information, 
 53 | as per \bold{gene_info_file}. Columns must have names. Default=NA}
 54 | }
 55 | \value{
 56 | A SummarisedExperiment object containing the count data, cell info
 57 | and gene info.
 58 | }
 59 | \description{
 60 | Create a SummarizedExperiment object (dataset_se) from a count matrix, cell 
 61 | information and optionally gene information.
 62 | 
 63 | \code{load_se_from_files} is a wrapper for \code{load_se_from_tables} that
 64 | will read in tables from specified files.
 65 | }
 66 | \details{
 67 | This function makes a SummarizedExperiment object in a form that
 68 | should work for celaref functions. Specifically, that means it will have an
 69 | 'ID' feild for genes (view with \code{rowData(dataset_se)}), and both
 70 | 'cell_sample' and 'group' feild for cells (view with
 71 | \code{colData(dataset_se)}). See parameters for detail.
 72 | Additionally, the counts will be an integer matrix (not a
 73 | sparse matrix), and the \emph{group} feild (but not \emph{cell_sample}
 74 | or \emph{ID}) will be a factor.
 75 | 
 76 | Note that data will be subsetted to cells present in both the counts matrix
 77 | and cell info, this is handy for loading subsets of cells.
 78 | However, if \bold{gene_info_file} is defined, all genes must match exactly.
 79 | 
 80 | The \code{load_se_from_files} form of this function will run the same 
 81 | checks, but will read everything from files in one go. The 
 82 | \code{load_se_from_tables}
 83 | form is perhaps more useful when the annotations need to be modified (e.g. 
 84 | programmatically adding a different gene identifier, renaming groups, 
 85 | removing unwanted samples).
 86 | 
 87 | Note that the SummarizedExperiment object can also be created without using
 88 | these functions, it just needs the \emph{cell_sample}, \emph{ID} and
 89 | \emph{group} feilds as described above. Since sometimes it might be easier
 90 | to add these to an existing \emph{SummarizedExperiment} from upstream
 91 | analyses.
 92 | }
 93 | \section{Functions}{
 94 | \itemize{
 95 | \item \code{load_se_from_files}: To read from files
 96 | }}
 97 | 
 98 | \examples{
 99 | 
100 | # From data frames (or a matrix for counts) :
101 | demo_se <- load_se_from_tables(counts_matrix=demo_counts_matrix, 
102 |                                cell_info_table=demo_cell_info_table)
103 | demo_se <- load_se_from_tables(counts_matrix=demo_counts_matrix, 
104 |                                cell_info_table=demo_cell_info_table, 
105 |                                gene_info_table=demo_gene_info_table)
106 | 
107 | # Or from data files : 
108 | counts_filepath    <- system.file("extdata", "sim_query_counts.tab",    package = "celaref")
109 | cell_info_filepath <- system.file("extdata", "sim_query_cell_info.tab", package = "celaref")
110 | gene_info_filepath <- system.file("extdata", "sim_query_gene_info.tab", package = "celaref")
111 | 
112 | demo_se <- load_se_from_files(counts_file=counts_filepath, cell_info_file=cell_info_filepath)
113 | demo_se <- load_se_from_files(counts_file=counts_filepath, cell_info_file=cell_info_filepath, 
114 |                               gene_info_file=gene_info_filepath )
115 | 
116 | }
117 | \seealso{
118 | \href{https://bioconductor.org/packages/release/bioc/html/SummarizedExperiment.html}{SummarizedExperiment} For general doco on the SummarizedExperiment objects.
119 | 
120 | Other Data loading functions: \code{\link{contrast_each_group_to_the_rest_for_norm_ma_with_limma}},
121 |   \code{\link{load_dataset_10Xdata}}
122 | }
123 | \concept{Data loading functions}
124 | \concept{Data-loading functions}
125 | 


--------------------------------------------------------------------------------
/man/make_ranking_violin_plot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plotting_functions.R
 3 | \name{make_ranking_violin_plot}
 4 | \alias{make_ranking_violin_plot}
 5 | \title{make_ranking_violin_plot}
 6 | \usage{
 7 | make_ranking_violin_plot(de_table.marked = NA, de_table.test = NA,
 8 |   de_table.ref = NA, log10trans = FALSE, ...)
 9 | }
10 | \arguments{
11 | \item{de_table.marked}{The output of 
12 | \code{\link{get_the_up_genes_for_all_possible_groups}} 
13 | for the contrast of interest.}
14 | 
15 | \item{de_table.test}{A differential expression table of the 
16 | query experiment,
17 | as generated from \code{\link{contrast_each_group_to_the_rest}}}
18 | 
19 | \item{de_table.ref}{A differential expression table of the 
20 | reference dataset,
21 | as generated from \code{\link{contrast_each_group_to_the_rest}}}
22 | 
23 | \item{log10trans}{Plot on a log scale? Useful for distinishing multiple 
24 | similar, yet distinct cell type that bunch at top of plot. Default=FALSE.}
25 | 
26 | \item{...}{Further options to be passed to 
27 | \code{\link{get_the_up_genes_for_all_possible_groups}}, 
28 | e.g. rankmetric}
29 | }
30 | \value{
31 | A ggplot object.
32 | }
33 | \description{
34 | Plot a panel of violin plots showing the distribution of the 'top' genes of 
35 | each of query group, across the reference dataset.
36 | }
37 | \details{
38 | In the plot output, each panel correponsds to a different group/cluster in 
39 | the query experiment. The x-axis has the groups in the reference dataset. 
40 | The y-axis is the rescaled rank of each 'top' gene from the query group, 
41 | within each reference group.
42 | 
43 | Only the 'top' genes for each query group are plotted, forming the violin
44 | plots - each individual gene is shown as a tickmark. Some groups have few 
45 | top genes, and so their uncertanty can be seen on this plot.
46 | 
47 | The thick black lines reprenset the median gene rescaled ranking for each 
48 | query group / reference group combination. Having this fall above the dotted 
49 | median threshold marker is a quick indication of potential similarity. 
50 | A complete lack of similarity would have a median rank around 0.5. Median 
51 | rankings much less than 0.5 are common though (an 'anti-cell-groupA' 
52 | signature), because genes overrepresented in one group in an experiment, 
53 | are likely to be relatively 'underrepresented' in the other groups. 
54 | Taken to an  
55 | extreme, if there are only two reference groups, they'll be complete 
56 | opposites.
57 | 
58 | Input can be either the precomputed \emph{de_table.marked} object for the 
59 | comparison, OR both \emph{de_table.test} and \emph{de_table.ref} 
60 | differential expression results to compare from 
61 | \code{\link{contrast_each_group_to_the_rest}}
62 | }
63 | \examples{
64 | 
65 | # Make input
66 | # de_table.demo_query <- contrast_each_group_to_the_rest(demo_query_se, "demo_query")
67 | # de_table.demo_ref   <- contrast_each_group_to_the_rest(demo_ref_se,   "demo_ref")
68 |    
69 | # This:                                                  
70 | make_ranking_violin_plot(de_table.test=de_table.demo_query, 
71 |                          de_table.ref=de_table.demo_ref ) 
72 |                         
73 | # Is equivalent to this:
74 | de_table.marked.query_vs_ref <- 
75 |      get_the_up_genes_for_all_possible_groups( de_table.test=de_table.demo_query, 
76 |                                                de_table.ref=de_table.demo_ref)
77 | make_ranking_violin_plot(de_table.marked.query_vs_ref)
78 | 
79 | 
80 | }
81 | \seealso{
82 | \code{\link{get_the_up_genes_for_all_possible_groups}} To make 
83 | the input data.
84 | }
85 | 


--------------------------------------------------------------------------------
/man/make_ref_similarity_names.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/group_labelling_functions.r
  3 | \name{make_ref_similarity_names}
  4 | \alias{make_ref_similarity_names}
  5 | \alias{make_ref_similarity_names_using_marked}
  6 | \title{make_ref_similarity_names}
  7 | \usage{
  8 | make_ref_similarity_names(de_table.test, de_table.ref, pval = 0.01,
  9 |   num_steps = 5, rankmetric = "TOP100_LOWER_CI_GTE1", n = 100)
 10 | 
 11 | make_ref_similarity_names_using_marked(de_table.ref.marked,
 12 |   de_table.recip.marked = NA, the_test_dataset = NA,
 13 |   the_ref_dataset = NA, pval = 0.01, num_steps = 5)
 14 | }
 15 | \arguments{
 16 | \item{de_table.test}{A differential expression table of the query 
 17 | experiment, as generated from 
 18 | \code{\link{contrast_each_group_to_the_rest}}}
 19 | 
 20 | \item{de_table.ref}{A differential expression table of the reference 
 21 | dataset, as generated from 
 22 | \code{\link{contrast_each_group_to_the_rest}}}
 23 | 
 24 | \item{pval}{Differences between the rescaled ranking distribution of 'top'
 25 | genes on different reference groups are tested with a Mann-Whitney U test. 
 26 | If they are \emph{significantly different}, 
 27 | only the top group(s) are reported. 
 28 | It isn't a simple cutoff threshold as it can change the number of similar 
 29 | groups reported.  ie. A more stringent \bold{pval} is more likely to decide 
 30 | that groups are similar -
 31 | which would result in multiple group reporting, or no similarity at all.
 32 | Unlikely that this parameter will ever need to change. Default = 0.01.}
 33 | 
 34 | \item{num_steps}{After ranking reference groups according to median 'top' 
 35 | gene ranking, how many adjacent pairs to test for differences. 
 36 | Set to 1 to only compare each group to the next, or NA to perform an 
 37 | all-vs-all comparison. 
 38 | Setting too low may means it is possible to miss groups with some similarity 
 39 | to the reported matches (\emph{similar_non_match} column)).
 40 | Too high (or NA) with a large number of reference groups could be slow. 
 41 | Default = 5.}
 42 | 
 43 | \item{rankmetric}{Specifiy ranking method used to pick the
 44 | 'top' genes. The default 'TOP100_LOWER_CI_GTE1' picks genes from the top 100
 45 | overrepresented genes (ranked by inner 95% confidence interval) - appears to 
 46 | work best for distinct cell types (e.g. tissue sample.). 'TOP100_SIG' again 
 47 | picks from the top 100 ranked genes, but requires only statistical 
 48 | significance, 95% CI threshold - may perform better on more similar cell 
 49 | clusters (e.g. PBMCs).}
 50 | 
 51 | \item{n}{For tweaking maximum returned genes from different ranking methods.}
 52 | 
 53 | \item{de_table.ref.marked}{The output of 
 54 | \code{\link{get_the_up_genes_for_all_possible_groups}} for the contrast 
 55 | of interest.}
 56 | 
 57 | \item{de_table.recip.marked}{Optional. The (reciprocal) output of 
 58 | \code{\link{get_the_up_genes_for_all_possible_groups}} with the test and 
 59 | reference datasets swapped. 
 60 | If omitted a reciprocal test will not be done. Default = NA.}
 61 | 
 62 | \item{the_test_dataset}{Optional. A short meaningful name for the 
 63 | experiment. 
 64 | (Should match \emph{test_dataset} column in \bold{de_table.marked}). 
 65 | Only needed in a table of more than one dataset. Default = NA.}
 66 | 
 67 | \item{the_ref_dataset}{Optional. A short meaningful name for the 
 68 | experiment. 
 69 | (Should match \emph{dataset} column in \bold{de_table.marked}). 
 70 | Only needed in a table of more than one dataset. Default = NA.}
 71 | }
 72 | \value{
 73 | A table of automagically-generated labels for each query group, 
 74 | given their similarity to reference groups. 
 75 | 
 76 | The columns in this table:
 77 | \itemize{
 78 |   \item \bold{test_group} : Query group e.g. "c1"
 79 |   \item \bold{shortlab} : The cluster label described above e.g. 
 80 |   "c1:macrophage"
 81 |   \item \bold{pval} : If there is a similarity flagged, this is the P-value 
 82 |   from a Mann-Whitney U test from the last 'matched' group to the adjacent  
 83 |   'non-matched' group. Ie. If only one label in shortlab, this will be the 
 84 |   first of the stepped_pvals, if there are 2, it will be the second. 
 85 |   If there is 'no_similarity' this will be NA 
 86 |   (Because there is no confidence in what 
 87 |   is the most appropriate of the all non-significant stepped pvalues.).
 88 |   \item \bold{stepped_pvals} : P-values from Mann-Whitney U tests across 
 89 |   adjacent pairs of reference groups ordered from most to least similar 
 90 |   (ascending median rank).
 91 |   ie. 1st-2nd most similar first, 2nd-3rd, 3rd-4th e.t.c. The last value 
 92 |   will always be NA (no more reference group).
 93 |   e.g. 
 94 |   refA:8.44e-10,refB:2.37e-06,refC:0.000818,refD:0.435,refE:0.245,refF:NA
 95 |   \item \bold{pval_to_random} : P-value of test of median rank (of last 
 96 |   matched reference group) < random, from binomial test on top gene 
 97 |   ranks (being < 0.5). 
 98 |   \item \bold{matches} : List of all reference groups that 'match', 
 99 |   as described, except it also includes (rare) examples where 
100 |   pval_to_random is not significant. "|" delimited.
101 |   \item \bold{reciprocal_matches} : List of all reference groups that  
102 |   flagged test group as a match when directon of comparison is reversed.
103 |   (significant pval and pval_to_random). "|" delimited.
104 |   \item \bold{similar_non_match}: This column lists any reference groups 
105 |   outside of shortlab that are not signifcantly different to a reported 
106 |   match group. Limited by \emph{num_steps}, and will never find anything 
107 |   if num_steps==1. "|" delimited. Usually NA.
108 |   \item \bold{similar_non_match_detail} : P-values for any details about 
109 |   similar_non_match results. These p-values will always be non-significant.
110 |   E.g. "A > C (p=0.0214,n.s)". "|" delimited. Usually NA.
111 |   \item \bold{differences_within} :  This feild lists any pairs of 
112 |   reference groups in shortlab that are significantly different. 
113 |   "|" delimited. Usually NA.
114 | }
115 | }
116 | \description{
117 | Construct some sensible labels or the groups/clusters in the query dataset, 
118 | based on similarity the reference dataset.
119 | 
120 | This is a more low level/customisable version of 
121 | \code{\link{make_ref_similarity_names}}, (would usually use that instead).  
122 | Suitable for rare cases to reuse an existing \bold{de_table.ref.marked} 
123 | object. Or use a \bold{de_table.ref.marked} table with more than one dataset
124 | present (discoraged). Or to skip the reciprocal comparison step.
125 | }
126 | \details{
127 | This function aims to report a) the top most similar reference group, if 
128 | there's a clear frontrunner, b) A list of multiple similar groups if they 
129 | have similar similarity, or c) 'No similarity', if there is none.
130 | 
131 | Each group is named according to the following rules. 
132 | Testing for significant 
133 | (smaller) differences with a one-directional Mann-Whitney U test on their 
134 | rescaled ranks:
135 | \enumerate{
136 |   \item The first (as ranked by median rescaled rank) reference group is 
137 |   significantly more similar than the next: Report \emph{first only}.
138 |   \item When comparing differences betwen groups stepwise ranked by 
139 |   median rescaled rank - no group is significantly different to its 
140 |   neighbour: Report \emph{no similarity}
141 |   \item There's no significant differences in the stepwise comparisons 
142 |   of the first N reference groups - but there is a significant 
143 |   difference later on : Report \emph{multiple group similarity}
144 | }
145 | 
146 | There are some further heuristic caveats:
147 | \enumerate{
148 |   \item The distribution of top genes in the last (or only) match group is 
149 |   tested versus a theroetical random distribution around 0.5 (as reported 
150 |   in \emph{pval_vs_random} column). If the distribution is not 
151 |   significantly above random  
152 |   (It is possible in edge cases where there is a skewed dataset
153 |   and no/few matches),
154 |   \emph{no similarity} is reported. The significnat \emph{pval} column is 
155 |   left intact.
156 |   \item The comparison is repeated reciprocally - reference groups vs the 
157 |   query groups. This helps sensitivity of heterogenous query groups - 
158 |   and investigating the reciprocal matches can be informative in these 
159 |   cases.
160 |   If a query group doens't 'match' a reference group, but the reference 
161 |   group does match that query group - it is reported in the group label in 
162 |   brackets.
163 |   e.g. \emph{c1:th_lymphocytes(tc_lympocytes)}. 
164 |   Its even possible if there was no match (and pval = NA) 
165 |   e.g. emph{c2:(tc_lymphocytes)}
166 | }
167 | 
168 | 
169 | 
170 | The similarity is formatted into a group label. Where there are 
171 | multiple similar groups, they're listed from most to least similar by their 
172 | median ranks.
173 |  
174 | For instance, a query dataset of clusters c1, c2, c3 and c4 againsts a 
175 | cell-type labelled reference datatset might get names like:
176 | E.g.
177 | \itemize{
178 |   \item c1:macrophage
179 |   \item c2:endotheial|mesodermal
180 |   \item c3:no_similarity
181 |   \item c4:mesodermal(endothelial)
182 | }
183 | 
184 | Function \code{make_ref_similarity_names} is a convenience wrapper function 
185 | for \code{make_ref_similarity_names_from_marked}. It accepts two 'de_table' 
186 | outputs of function \code{contrast_each_group_to_the_rest} to compare
187 | and handles running
188 | \code{\link{get_the_up_genes_for_all_possible_groups}}. 
189 | Sister function \code{make_ref_similarity_names_from_marked} may (rarely) be 
190 | of use if the \bold{de_table.marked} object has already been created, 
191 | or if reciprocal tests are not wanted.
192 | }
193 | \section{Functions}{
194 | \itemize{
195 | \item \code{make_ref_similarity_names_using_marked}: Construct some sensible cluster 
196 | labels, but using a premade marked table.
197 | }}
198 | 
199 | \examples{
200 | 
201 | # Make input
202 | # de_table.demo_query <- contrast_each_group_to_the_rest(demo_query_se, "demo_query")
203 | # de_table.demo_ref   <- contrast_each_group_to_the_rest(demo_ref_se,   "demo_ref")
204 | 
205 | make_ref_similarity_names(de_table.demo_query, de_table.demo_ref)
206 | make_ref_similarity_names(de_table.demo_query, de_table.demo_ref, num_steps=3)
207 | make_ref_similarity_names(de_table.demo_query, de_table.demo_ref, num_steps=NA)
208 | 
209 | 
210 | # Make input
211 | # de_table.demo_query <- contrast_each_group_to_the_rest(demo_query_se, "demo_query")
212 | # de_table.demo_ref   <- contrast_each_group_to_the_rest(demo_ref_se,   "demo_ref")
213 | 
214 | de_table.marked.query_vs_ref <- get_the_up_genes_for_all_possible_groups(
215 |      de_table.demo_query, de_table.demo_ref) 
216 | de_table.marked.reiprocal <- get_the_up_genes_for_all_possible_groups(
217 |      de_table.demo_ref, de_table.demo_query)
218 |      
219 | 
220 | make_ref_similarity_names_using_marked(de_table.marked.query_vs_ref, 
221 |                                        de_table.marked.reiprocal)
222 |                                        
223 | make_ref_similarity_names_using_marked(de_table.marked.query_vs_ref)
224 | 
225 | 
226 | }
227 | \seealso{
228 | \code{\link{contrast_each_group_to_the_rest}} For 
229 | preparing de_table input
230 | 
231 | \code{\link{get_the_up_genes_for_all_possible_groups}} 
232 | To prepare the \bold{de_table.ref.marked} input.
233 | }
234 | 


--------------------------------------------------------------------------------
/man/make_ref_similarity_names_for_group.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/group_labelling_functions.r
 3 | \name{make_ref_similarity_names_for_group}
 4 | \alias{make_ref_similarity_names_for_group}
 5 | \title{make_ref_similarity_names_for_group}
 6 | \usage{
 7 | make_ref_similarity_names_for_group(the_test_group, mwtest_res_table,
 8 |   de_table.ref.marked, reciprocal_matches = NA, the_test_dataset,
 9 |   the_ref_dataset, the_pval)
10 | }
11 | \arguments{
12 | \item{the_test_group}{Query group to make name for}
13 | 
14 | \item{mwtest_res_table}{Mann-whitney test results as constructed 
15 | in \code{\link{make_ref_similarity_names_using_marked}}}
16 | 
17 | \item{de_table.ref.marked}{The output of 
18 | \code{\link{get_the_up_genes_for_all_possible_groups}} for the contrast of 
19 | interest.}
20 | 
21 | \item{reciprocal_matches}{Simplified table of reciprocal matches prepared 
22 | within \code{\link{make_ref_similarity_names_using_marked}}. 
23 | If omitted no reciprocal matching done. Default = NA.}
24 | 
25 | \item{the_test_dataset}{A short meaningful name for the experiment. 
26 | (Should match \emph{test_dataset} column in \bold{de_table.marked})}
27 | 
28 | \item{the_ref_dataset}{A short meaningful name for the experiment. 
29 | (Should match \emph{dataset} column in \bold{de_table.marked})}
30 | 
31 | \item{the_pval}{pval as per 
32 | \code{\link{make_ref_similarity_names_using_marked}}}
33 | }
34 | \value{
35 | A tibble with just one group's labelling information, as per 
36 | \code{\link{make_ref_similarity_names_using_marked}}
37 | }
38 | \description{
39 | Internal function, called by make_ref_similarity_names_using_marked 
40 | for each group.
41 | }
42 | \seealso{
43 | \code{\link{make_ref_similarity_names_using_marked}} 
44 | Only place that uses this function, details there.
45 | }
46 | 


--------------------------------------------------------------------------------
/man/run_pair_test_stats.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/group_labelling_functions.r
 3 | \name{run_pair_test_stats}
 4 | \alias{run_pair_test_stats}
 5 | \title{run_pair_test_stats}
 6 | \usage{
 7 | run_pair_test_stats(de_table.ref.marked, the_test_group, groupA, groupB,
 8 |   enforceAgtB = TRUE)
 9 | }
10 | \arguments{
11 | \item{de_table.ref.marked}{The output of 
12 | \code{\link{get_the_up_genes_for_all_possible_groups}} for the contrast 
13 | of interest.}
14 | 
15 | \item{the_test_group}{Name of the test group in query dataset.}
16 | 
17 | \item{groupA}{One of the reference group names}
18 | 
19 | \item{groupB}{Another of the reference group names}
20 | 
21 | \item{enforceAgtB}{Do a one tailed test of A 'less' B (more similar)? 
22 | Or two-tailed. Default = TRUE.}
23 | }
24 | \value{
25 | A tibble of wilcox / man-whitneyU test results for this contrast.
26 | }
27 | \description{
28 | Internal function to compare the distribution of a query datasets 'top' 
29 | genes between two different reference datasete groups with a 
30 | Mann–Whitney U test. One directional test if groupA median < group B.
31 | }
32 | \details{
33 | For use by make_ref_similarity_names_using_marked
34 | }
35 | \seealso{
36 | \code{\link{make_ref_similarity_names_using_marked}}
37 | }
38 | 


--------------------------------------------------------------------------------
/man/subset_cells_by_group.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/contrasting_functions.r
 3 | \name{subset_cells_by_group}
 4 | \alias{subset_cells_by_group}
 5 | \title{subset_cells_by_group}
 6 | \usage{
 7 | subset_cells_by_group(dataset_se, n.group = 1000)
 8 | }
 9 | \arguments{
10 | \item{dataset_se}{Summarised experiment object containing count data. Also
11 | requires 'ID' and 'group' to be set within the cell information.}
12 | 
13 | \item{n.group}{How many cells to keep for each group. Default = 1000}
14 | }
15 | \value{
16 | \emph{dataset_se} A hopefully more managably subsetted version of
17 | the inputted \bold{dataset_se}.
18 | }
19 | \description{
20 | Utility function to randomly subset very large datasets (that use too much
21 | memory). Specify a maximum number of cells to keep per group and use the 
22 | subsetted version to analysis.
23 | }
24 | \details{
25 | The resulting 
26 | differential expression table \emph{de_table} will have reduced statistical 
27 | power.
28 | But as long as enough cells are left to reasonably accurately
29 | calculate differnetial expression between groups this should be enough for
30 | celaref to work with.
31 | 
32 | Also, this function will lose proportionality of groups
33 |  (there'll be \emph{n.groups} or less of each). 
34 | Consider using the n.group/n.other parameters in 
35 | \emph{contrast_each_group_to_the_rest} or 
36 | \emph{contrast_the_group_to_the_rest}  - 
37 | which subsets non-group cells independantly for each group. 
38 | That may be more approriate for tissue type samples which would have similar 
39 | compositions of cells. 
40 |  
41 | So this function is intended for use when either; the 
42 | proportionality isn't relevant (e.g. FACs purified cell populations),
43 | or, the data is just too big to work with otherwise.
44 | 
45 | Cells are randomly sampled, so set the random seed (with \emph{set.seed()})
46 | for consistant results across runs.
47 | }
48 | \examples{
49 | 
50 | dataset_se.30pergroup <- subset_cells_by_group(demo_query_se, n.group=30)
51 | 
52 | }
53 | \seealso{
54 | \code{\link{contrast_each_group_to_the_rest}} For alternative method 
55 | of subsetting cells proportionally.
56 | }
57 | 


--------------------------------------------------------------------------------
/man/subset_se_cells_for_group_test.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/contrasting_functions.r
 3 | \name{subset_se_cells_for_group_test}
 4 | \alias{subset_se_cells_for_group_test}
 5 | \title{subset_se_cells_for_group_test}
 6 | \usage{
 7 | subset_se_cells_for_group_test(dataset_se, the_group, n.group = Inf,
 8 |   n.other = n.group * 5)
 9 | }
10 | \arguments{
11 | \item{dataset_se}{Summarised experiment object containing count data. Also
12 | requires 'ID' and 'group' to be set within the cell information.}
13 | 
14 | \item{the_group}{The group being subsetted for}
15 | 
16 | \item{n.group}{How many cells to keep for each group. Default = Inf}
17 | 
18 | \item{n.other}{How many cells to keep from everything not in the group.
19 | Default = \bold{n.group} * 5}
20 | }
21 | \value{
22 | \emph{dataset_se} A hopefully more managably subsetted version of
23 | the inputted \bold{dataset_se}
24 | }
25 | \description{
26 | This function for use by \code{\link{contrast_each_group_to_the_rest}} 
27 | downsamples cells from a summarizedExperiment 
28 | (\emph{dataset_se}) - keeping \bold{n.group} (or all if fewer) 
29 | cells from the specified group, and \bold{n.other} from the rest. 
30 | This maintains the proportions of cells in the 'other' part of the 
31 | differential expression comparisons.
32 | }
33 | \details{
34 | Cells are randomly sampled, so set the random seed (with \emph{set.seed()})
35 | for consistant results across runs.
36 | }
37 | \seealso{
38 | Calling function \code{\link{contrast_each_group_to_the_rest}}
39 | 
40 | \code{\link{subset_cells_by_group}} Exported function for 
41 | subsetting each group independantly upfront. 
42 | (For when this approach is still unmanageable)
43 | }
44 | 


--------------------------------------------------------------------------------
/man/trim_small_groups_and_low_expression_genes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/loading_helper_functions.r
 3 | \name{trim_small_groups_and_low_expression_genes}
 4 | \alias{trim_small_groups_and_low_expression_genes}
 5 | \title{trim_small_groups_and_low_expression_genes}
 6 | \usage{
 7 | trim_small_groups_and_low_expression_genes(dataset_se,
 8 |   min_lib_size = 1000, min_group_membership = 5,
 9 |   min_reads_in_sample = 1, min_detected_by_min_samples = 5)
10 | }
11 | \arguments{
12 | \item{dataset_se}{Summarised experiment object containing count data. Also
13 | requires 'ID' and 'group' to be set within the cell information
14 | (see \code{colData()})}
15 | 
16 | \item{min_lib_size}{Minimum library size. Cells with fewer than this many 
17 | reads removed. Default = 1000}
18 | 
19 | \item{min_group_membership}{Throw out groups/clusters with fewer than this 
20 | many cells. May change with experiment size. Default = 5}
21 | 
22 | \item{min_reads_in_sample}{Require this many reads to consider a gene 
23 | detected in a sample. Default = 1}
24 | 
25 | \item{min_detected_by_min_samples}{Keep genes detected in this many 
26 | samples.  May change with experiment size. Default = 5}
27 | }
28 | \value{
29 | A filtered dataset_se, ready for use.
30 | }
31 | \description{
32 | Filter and return a SummarizedExperiment object (dataset_se) by several
33 | metrics:
34 | \itemize{
35 |   \item Cells with at least \bold{min_lib_size} total reads.
36 |   \item Genes expressed in at least \bold{min_detected_by_min_samples} 
37 |   cells, at a threshold of \bold{min_reads_in_sample} per cell.
38 |   \item Remove entire groups (clusters) of cells where there are fewer than
39 |   \bold{min_group_membership} cells in that group.
40 | }
41 | }
42 | \details{
43 | If it hasn't been done already, it is highly reccomended to use this 
44 | function to filter out genes with no/low total counts 
45 | (especially in single cell data,
46 | there can be many) - without expression they are not useful and may reduce
47 | statistical power.
48 | 
49 | Likewise, very small groups (<5 cells) are unlikely to give useful
50 | results with this method. And cells with abnormally small library sizes may
51 | not be desireable.
52 | 
53 | Of course 'reasonable' thresholds for filtering cells/genes are subjective.
54 | Defaults are moderately sensible starting points.
55 | }
56 | \examples{
57 | 
58 | demo_query_se.trimmed  <- 
59 |    trim_small_groups_and_low_expression_genes(demo_query_se)
60 | demo_query_se.trimmed2 <- 
61 |    trim_small_groups_and_low_expression_genes(demo_ref_se, 
62 |                                               min_group_membership = 10)
63 | 
64 | }
65 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(celaref)
3 | 
4 | test_check("celaref")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/test-contrasting_functions.R:
--------------------------------------------------------------------------------
  1 | context("test-contrasting_functions")
  2 | 
  3 | 
  4 | test_that("MAST contrasts - dense, sparse and empty", {
  5 |    
  6 |    # Checkt the first 5 genes are the same. 
  7 |    asubset   <- seq_len(30)
  8 |    top_check <- seq_len(5)
  9 |    de_genes   <- c("Gene3",  "Gene23", "Gene10", "Gene25", "Gene30") 
 10 |    de_genes.3 <- c("Gene1",  "Gene4",  "Gene3",  "Gene30", "Gene10") #altered data.
 11 |    
 12 |    # Densse
 13 |    demo_query_se.1 <- demo_query_se[asubset,asubset]
 14 |    de_table1.demo_query  <- contrast_each_group_to_the_rest(
 15 |       demo_query_se.1, "a_demo_query", num_cores=1)
 16 |    
 17 |    expect_equal(de_table1.demo_query$ID[top_check], de_genes )
 18 |    
 19 |    
 20 |    #now sparse
 21 |    demo_query_se.2 <- demo_query_se.1
 22 |    assays(demo_query_se.2)[[1]] <-  Matrix::Matrix(assay(demo_query_se.1), sparse=TRUE)
 23 |    de_table2.demo_query  <- contrast_each_group_to_the_rest(
 24 |       demo_query_se.2, "a_demo_query", num_cores=1)
 25 |    expect_equal(de_table1.demo_query$ID[top_check], de_genes )
 26 |    
 27 |    
 28 |    # what is one gene's expression for entire group totally empty?
 29 |    # (previously caused errors and there's a workaround now.)
 30 |    demo_query_se.3 <- demo_query_se.1
 31 |    assays(demo_query_se.3)[[1]][1,demo_query_se.1$group =="Group2"] <- 0
 32 |    de_table3.demo_query  <- contrast_each_group_to_the_rest(
 33 |       demo_query_se.3, "a_demo_query", num_cores=1)
 34 |    expect_equal(de_table3.demo_query$ID[top_check], de_genes.3 )
 35 |    #ID       pval      log2FC ci_inner  ci_outer       fdr  group   sig sig_up gene_count rank rescaled_rank      dataset
 36 |    #1  Gene1 0.06701189 -1.98643911 1.734379 -5.707257 0.4505910 Group1 FALSE  FALSE         30    1    0.03333333 a_demo_query
 37 |    #2  Gene4 0.99903639 -0.04353777 1.726446 -1.813522 0.9990364 Group1 FALSE  FALSE         30    2    0.06666667 a_demo_query
 38 |    #3  Gene3 0.50595300 -2.71865608 1.394369 -6.831681 0.5837919 Group1 FALSE  FALSE         30    3    0.10000000 a_demo_query
 39 |    #4 Gene30 0.67979358 -0.94039185 1.281941 -3.162725 0.7279509 Group1 FALSE  FALSE         30    4    0.13333333 a_demo_query
 40 |    #5 Gene10 0.68075516 -0.46524523 1.147719 -2.078209 0.7279509 Group1 FALSE  FALSE         30    5    0.16666667 a_demo_query
 41 | })
 42 | 
 43 | 
 44 | 
 45 | 
 46 | test_that("MAST contrasts - hdf5-backed assays and SCE objects", {
 47 |    # Just test that runs - 
 48 |    # these things are succeptible to format / object changes.
 49 | 
 50 |    # dense sce 
 51 |    d.sce.den <- as(demo_query_se, "SingleCellExperiment")
 52 |    expect_equal( 10, nrow(
 53 |       contrast_each_group_to_the_rest(d.sce.den[1:10,],'test', 
 54 |       groups2test = "Group2", n.group = 20, num_cores = 1)))
 55 |    
 56 |    # sparse SCE   
 57 |    d.sce.sp        <- d.sce.den 
 58 |    assays(d.sce.sp)[[1]] <- Matrix::Matrix(assays(d.sce.sp)[[1]], sparse=TRUE)
 59 |    expect_equal( 10, nrow(
 60 |       contrast_each_group_to_the_rest(d.sce.sp[1:10,],'test',     
 61 |       groups2test = "Group2", n.group = 20, num_cores = 1)))
 62 | 
 63 | 
 64 |    # hdf5 SCE
 65 |    d.sce.hdf       <- HDF5Array::saveHDF5SummarizedExperiment( d.sce.sp , replace=TRUE)
 66 |    expect_equal( 10, nrow(
 67 |       contrast_each_group_to_the_rest(d.sce.hdf[1:10,],'test',     
 68 |       groups2test = "Group2", n.group = 20, num_cores = 1)))
 69 | 
 70 | })
 71 | 
 72 | 
 73 | 
 74 | 
 75 | 
 76 | test_that("Microarray reference", {
 77 |    
 78 |    top5 <- c("Gene100", "Gene150", "Gene57",  "Gene80",  "Gene21" )
 79 |    de_table.ma <- contrast_each_group_to_the_rest_for_norm_ma_with_limma(
 80 |       norm_expression_table=demo_microarray_expr, 
 81 |       sample_sheet_table=demo_microarray_sample_sheet,
 82 |       dataset_name="DemoSimMicroarrayRef", 
 83 |       sample_name="cell_sample", group_name="group") 
 84 |    
 85 |    
 86 |    expect_equal(de_table.ma$ID[seq_len(5)], top5)
 87 |    
 88 | })
 89 | 
 90 | 
 91 | 
 92 | 
 93 | test_that("Rankmetrics", {
 94 |    
 95 |    # Ask for just 10 genes and check them. Actually same for both mehods.
 96 |    genes.TOP100_LOWER_CI_GTE1 <- 
 97 |       c("Gene100", "Gene150", "Gene57",  "Gene80",  "Gene21",  
 98 |         "Gene30",  "Gene23",  "Gene65",  "Gene101", "Gene10")
 99 |    genes.TOP100_SIG <- genes.TOP100_LOWER_CI_GTE1 # are same
100 | 
101 |    
102 |    de_table.marked.Group3vsRef.TOP100_LOWER_CI_GTE1 <- 
103 |       get_the_up_genes_for_group(
104 |                      the_group="Group3",
105 |                      de_table.test=de_table.demo_query, 
106 |                      de_table.ref=de_table.demo_ref,
107 |                      rankmetric = "TOP100_LOWER_CI_GTE1",
108 |                      n=10)
109 |    expect_equal(de_table.marked.Group3vsRef.TOP100_LOWER_CI_GTE1$ID[
110 |       de_table.marked.Group3vsRef.TOP100_LOWER_CI_GTE1$group == "Dunno"],
111 |                 genes.TOP100_LOWER_CI_GTE1)
112 |    
113 |    
114 |    
115 |    de_table.marked.Group3vsRef.TOP100_SIG <- 
116 |       get_the_up_genes_for_group(
117 |          the_group="Group3",
118 |          de_table.test=de_table.demo_query, 
119 |          de_table.ref=de_table.demo_ref,
120 |          rankmetric = 'TOP100_SIG', n=10)
121 | 
122 |    expect_equal(de_table.marked.Group3vsRef.TOP100_SIG$ID[
123 |       de_table.marked.Group3vsRef.TOP100_SIG$group == "Dunno"],
124 |       genes.TOP100_SIG)
125 | 
126 |    
127 | })
128 | 
129 | 
130 | 
131 | 
132 | test_that("Subsetting ses", {
133 |    
134 |    dataset_se.30pergroup <- subset_cells_by_group(demo_query_se, n.group=30)
135 |    expect_equal(sum(dataset_se.30pergroup$group == "Group3"),30) 
136 |    expect_equal(sum(dataset_se.30pergroup$group == "Group1"),28) 
137 |    
138 |    demo_query_se.subset2 <- subset_se_cells_for_group_test(demo_query_se, 
139 |                              the_group="Group3", 
140 |                              n.group=20, 
141 |                              n.other=30)
142 |    expect_equal(sum(demo_query_se.subset2$group == "Group3"),20) 
143 |    expect_equal(sum(demo_query_se.subset2$group != "Group3"),30) 
144 |    
145 | })
146 | 
147 | 
148 | 
149 | #test_that("Finding counts", {
150 | #   
151 | #})
152 | 
153 | 


--------------------------------------------------------------------------------
/tests/testthat/test-loading_helper_functions.R:
--------------------------------------------------------------------------------
 1 | context("Loading functions")
 2 | library(celaref)
 3 | 
 4 | 
 5 | test_that("Load se from files, tables, 10X", {
 6 |   
 7 | 
 8 |    expect_something_in_demo_se <- function(test_se) {
 9 |       
10 |       # any 0-length (or 1 length) dimensions are a fail, 
11 |       # and is usual fail case.
12 |       # but don't check what's actually there, because it could change
13 |       # These are different sized datasets anyway.
14 |       
15 |       expect_gt(base::ncol(test_se), 1) # cells
16 |       expect_gt(nrow(test_se),  1) # genes # 1 gene would be wrong too.
17 |       expect_gt(sum(assays(test_se)[[1]]), 1) #total counts aren't all 0
18 |       
19 |       expect_gt(nrow(colData(test_se)) , 1 ) 
20 |       expect_gt(ncol(colData(test_se)) , 1 ) 
21 |       
22 |       expect_gt(nrow(rowData(test_se)) , 1 ) 
23 |       expect_gt(ncol(rowData(test_se)) , 1 ) 
24 |    }
25 |    
26 |    
27 |    counts_filepath    <- system.file("extdata", "sim_query_counts.tab",    package = "celaref")
28 |    cell_info_filepath <- system.file("extdata", "sim_query_cell_info.tab", package = "celaref")
29 |    gene_info_filepath <- system.file("extdata", "sim_query_gene_info.tab", package = "celaref")
30 |    
31 |    demo_se.files <- load_se_from_files(counts_filepath, 
32 |                                  cell_info_file = cell_info_filepath, 
33 |                                  gene_info_file = gene_info_filepath)
34 |    expect_something_in_demo_se(demo_se.files)
35 |    
36 | 
37 |    
38 |    demo_se.tables <- load_se_from_tables(counts_matrix=demo_counts_matrix, 
39 |                                   cell_info_table=demo_cell_info_table, 
40 |                                   gene_info_table=demo_gene_info_table)
41 |    expect_something_in_demo_se(demo_se.tables)
42 | 
43 |    
44 |    example_10X_dir <- system.file("extdata", "sim_cr_dataset", package = "celaref")
45 |    dataset_se.10X <- load_dataset_10Xdata(example_10X_dir, dataset_genome="GRCh38", 
46 |         clustering_set="kmeans_4_clusters", gene_id_cols_10X=c("gene")) 
47 |    
48 |    expect_something_in_demo_se(dataset_se.10X)
49 |    
50 | })
51 | 
52 | 
53 | 
54 | test_that("Filtering low expression genes and groups", {
55 |    
56 |    demo_ref_se.trim <- trim_small_groups_and_low_expression_genes(
57 |       dataset_se=demo_ref_se, 
58 |       min_lib_size=1000, min_group_membership=50,
59 |       min_reads_in_sample=1, min_detected_by_min_samples=20 
60 |    )
61 | 
62 |    expect_equal(length(levels(colData(demo_ref_se.trim)$group)), 3)
63 |    expect_equal(nrow(demo_ref_se.trim), 199)
64 |    expect_equal(ncol(demo_ref_se.trim), 489)
65 |    
66 | })
67 | 
68 | 
69 | 
70 | 
71 | test_that("Converting gene ids",{
72 |    
73 |    dataset_se <- demo_ref_se[1:10, 1:10]
74 |    rowData(dataset_se)$dummyname <- c(rep("A",5), rep("B",5))
75 |    rowData(dataset_se)$total_not_count <- 1:10
76 |    
77 |    dataset_se.2 <- convert_se_gene_ids(dataset_se, new_id='dummyname', eval_col='total_not_count')
78 |    
79 |    expect_equal(rowData(dataset_se.2)["A","total_not_count"], 5)
80 |       
81 | })


--------------------------------------------------------------------------------
/vignettes/celaref.bib:
--------------------------------------------------------------------------------
  1 | @article{Farmer2017,
  2 | abstract = {The tear producing lacrimal gland is a tubular organ that protects and lubricates the ocular surface. While the lacrimal gland possesses many features that make it an excellent model to understand tubulogenesis, the cell types and lineage relationships that drive lacrimal gland formation are unclear. Using single cell sequencing and other molecular tools, we reveal novel cell identities and epithelial lineage dynamics that underlie lacrimal gland development. We show that the lacrimal gland from its earliest developmental stages is composed of multiple subpopulations of immune, epithelial, and mesenchymal cell lineages. The epithelial lineage exhibits the most substantiative cellular changes, transitioning through a series of unique transcriptional states to become terminally differentiated acinar, ductal and myoepithelial cells. Furthermore, lineage tracing in postnatal and adult glands provides the first direct evidence of unipotent KRT5+ epithelial cells in the lacrimal gland. Finally, we show conservation of developmental markers between the developing mouse and human lacrimal gland, supporting the use of mice to understand human development. Together, our data reveal critical features of lacrimal gland development that have broad implications for understanding epithelial organogenesis.},
  3 | author = {Farmer, D'Juan T. and Nathan, Sara and Finley, Jennifer K. and {Shengyang Yu}, Kevin and Emmerson, Elaine and Byrnes, Lauren E. and Sneddon, Julie B. and McManus, Michael T. and Tward, Aaron D. and Knox, Sarah M.},
  4 | doi = {10.1242/dev.150789},
  5 | file = {:Users/swil0005/mendeley/mendeley{\_}pdfs/Farmer et al. - 2017 - Defining epithelial cell dynamics and lineage relationships in the developing lacrimal gland.pdf:pdf},
  6 | issn = {0950-1991},
  7 | journal = {Development},
  8 | keywords = {development,epithelia,lacrimal gland,single cell sequencing,tubulogenesis},
  9 | number = {13},
 10 | pages = {2517--2528},
 11 | pmid = {28576768},
 12 | title = {{Defining epithelial cell dynamics and lineage relationships in the developing lacrimal gland}},
 13 | url = {http://dev.biologists.org/lookup/doi/10.1242/dev.150789},
 14 | volume = {144},
 15 | year = {2017}
 16 | }
 17 | @article{Finak2015,
 18 | abstract = {Single-cell transcriptomics reveals gene expression heterogeneity but suffers from stochastic dropout and characteristic bimodal expression distributions in which expression is either strongly non-zero or non-detectable. We propose a two-part, generalized linear model for such bimodal data that parameterizes both of these features. We argue that the cellular detection rate, the fraction of genes expressed in a cell, should be adjusted for as a source of nuisance variation. Our model provides gene set enrichment analysis tailored to single-cell data. It provides insights into how networks of co-expressed genes evolve across an experimental treatment. MAST is available at https://github.com/RGLab/MAST .},
 19 | author = {Finak, Greg and McDavid, Andrew and Yajima, Masanao and Deng, Jingyuan and Gersuk, Vivian and Shalek, Alex K. and Slichter, Chloe K. and Miller, Hannah W. and McElrath, M. Juliana and Prlic, Martin and Linsley, Peter S. and Gottardo, Raphael},
 20 | doi = {10.1186/s13059-015-0844-5},
 21 | file = {:Users/swil0005/mendeley/mendeley{\_}pdfs/Finak et al. - 2015 - MAST A flexible statistical framework for assessing transcriptional changes and characterizing heterogeneity in si.pdf:pdf},
 22 | isbn = {10.1186/s13059-015-0844-5},
 23 | issn = {1474760X},
 24 | journal = {Genome Biology},
 25 | keywords = {Bimodality,Cellular detection rate,Co-expression,Empirical Bayes,Gene set enrichment analysis,Generalized linear model},
 26 | number = {1},
 27 | pages = {1--13},
 28 | pmid = {26653891},
 29 | publisher = {Genome Biology},
 30 | title = {{MAST: A flexible statistical framework for assessing transcriptional changes and characterizing heterogeneity in single-cell RNA sequencing data}},
 31 | url = {http://dx.doi.org/10.1186/s13059-015-0844-5},
 32 | volume = {16},
 33 | year = {2015}
 34 | }
 35 | @article{Freytag2017,
 36 | abstract = {The commercially available 10X Genomics protocol to generate droplet-based single cell RNA-seq (scRNA-seq) data is enjoying growing popularity among researchers. Fundamental to the analysis of such scRNA-seq data is the ability to cluster similar or same cells into non-overlapping groups. Many competing methods have been proposed for this task, but there is currently little guidance with regards to which method offers most accuracy. Answering this question is complicated by the fact that 10X Genomics data lack cell labels that would allow a direct performance evaluation. Thus in this review, we focused on comparing clustering solutions of a dozen methods for three datasets on human peripheral mononuclear cells generated with the 10X Genomics technology. While clustering solutions appeared robust, we found that solutions produced by different methods have little in common with each other. They also failed to replicate cell type assignment generated with supervised labeling approaches. Furthermore, we demonstrate that all clustering methods tested clustered cells to a large degree according to the amount of ribosomal RNA in each cell.},
 37 | author = {Freytag, Saskia and Lonnstedt, Ingrid and Ng, Milica and Bahlo, Melanie},
 38 | doi = {10.1101/203752},
 39 | file = {:Users/swil0005/mendeley/mendeley{\_}pdfs/Freytag et al. - 2017 - Cluster Headache Comparing Clustering Tools for 10X Single Cell Sequencing Data.pdf:pdf},
 40 | number = {4},
 41 | title = {{Cluster Headache: Comparing Clustering Tools for 10X Single Cell Sequencing Data}},
 42 | year = {2017}
 43 | }
 44 | @article{DeGraaf2016,
 45 | abstract = {Hematopoiesis is a multistage process involving the differentiation of stem and progenitor cells into distinct mature cell lineages. Here we present Haemopedia, an atlas of murine gene-expression data containing 54 hematopoietic cell types, covering all the mature lineages in hematopoiesis. We include rare cell populations such as eosinophils, mast cells, basophils, and megakaryocytes, and a broad collection of progenitor and stem cells. We show that lineage branching and maturation during hematopoiesis can be reconstructed using the expression patterns of small sets of genes. We also have identified genes with enriched expression in each of the mature blood cell lineages, many of which show conserved lineage-enriched expression in human hematopoiesis. We have created an online web portal called Haemosphere to make analyses of Haemopedia and other blood cell transcriptional datasets easier. This resource provides simple tools to interrogate gene-expression-based relationships between hematopoietic cell types and genes of interest.},
 46 | author = {de Graaf, Carolyn A. and Choi, Jarny and Baldwin, Tracey M. and Bolden, Jessica E. and Fairfax, Kirsten A. and Robinson, Aaron J. and Biben, Christine and Morgan, Clare and Ramsay, Kerry and Ng, Ashley P. and Kauppi, Maria and Kruse, Elizabeth A. and Sargeant, Tobias J. and Seidenman, Nick and D'Amico, Angela and D'Ombrain, Marthe C. and Lucas, Erin C. and Koernig, Sandra and {Baz Morelli}, Adriana and Wilson, Michael J. and Dower, Steven K. and Williams, Brenda and Heazlewood, Shen Y. and Hu, Yifang and Nilsson, Susan K. and Wu, Li and Smyth, Gordon K. and Alexander, Warren S. and Hilton, Douglas J.},
 47 | doi = {10.1016/j.stemcr.2016.07.007},
 48 | file = {:Users/swil0005/mendeley/mendeley{\_}pdfs/de Graaf et al. - 2016 - Haemopedia An Expression Atlas of Murine Hematopoietic Cells.pdf:pdf},
 49 | isbn = {2213-6711 (Electronic)$\backslash$r2213-6711 (Linking)},
 50 | issn = {22136711},
 51 | journal = {Stem Cell Reports},
 52 | number = {3},
 53 | pages = {571--582},
 54 | pmid = {27499199},
 55 | title = {{Haemopedia: An Expression Atlas of Murine Hematopoietic Cells}},
 56 | volume = {7},
 57 | year = {2016}
 58 | }
 59 | @article{Harrison2018,
 60 | abstract = {},
 61 | author = {Harrison, Paul and Pattison, Andrew and Powell, David and Beilharz, Traude and Corresponding, Co-},
 62 | doi = {10.1101/343145},
 63 | file = {:Users/swil0005/mendeley/mendeley{\_}pdfs/Harrison et al. - 2018 - Topconfects a package for confident effect sizes in differential expression analysis provides improved usabilit.pdf:pdf},
 64 | title = {{Topconfects: a package for confident effect sizes in differential expression analysis provides improved usability ranking genes of interest}},
 65 | year = {2018}
 66 | }
 67 | @article{Kiselev2018,
 68 | abstract = {Single-cell RNA-seq (scRNA-seq) allows researchers to define cell types on the basis of unsupervised clustering of the transcriptome. However, differences in experimental methods and computational analyses make it challenging to compare data across experiments. Here we present scmap (http://bioconductor.org/packages/scmap; web version at http://www.sanger.ac.uk/science/tools/scmap), a method for projecting cells from an scRNA-seq data set onto cell types or individual cells from other experiments.},
 69 | author = {Kiselev, Vladimir Yu and Yiu, Andrew and Hemberg, Martin},
 70 | doi = {10.1038/nmeth.4644},
 71 | file = {:Users/swil0005/mendeley/mendeley{\_}pdfs/Kiselev, Yiu, Hemberg - 2018 - scmap projection of single-cell RNA-seq data across data sets.pdf:pdf},
 72 | issn = {1548-7091},
 73 | journal = {Nature Methods},
 74 | month = {apr},
 75 | number = {5},
 76 | pages = {359--362},
 77 | pmid = {29608555},
 78 | publisher = {Nature Publishing Group},
 79 | title = {{scmap: projection of single-cell RNA-seq data across data sets}},
 80 | url = {http://dx.doi.org/10.1038/nmeth.4644 http://www.ncbi.nlm.nih.gov/pubmed/29608555 http://www.nature.com/doifinder/10.1038/nmeth.4644},
 81 | volume = {15},
 82 | year = {2018}
 83 | }
 84 | @article{Kiselev2017,
 85 | abstract = {Single-cell RNA-seq enables the quantitative characterization of cell types based on global transcriptome profiles. We present single-cell consensus clustering (SC3), a user-friendly tool for unsupervised clustering, which achieves high accuracy and robustness by combining multiple clustering solutions through a consensus approach (http://bioconductor.org/packages/SC3). We demonstrate that SC3 is capable of identifying subclones from the transcriptomes of neoplastic cells collected from patients.},
 86 | author = {Kiselev, Vladimir Yu and Kirschner, Kristina and Schaub, Michael T. and Andrews, Tallulah and Yiu, Andrew and Chandra, Tamir and Natarajan, Kedar N. and Reik, Wolf and Barahona, Mauricio and Green, Anthony R. and Hemberg, Martin},
 87 | doi = {10.1038/nmeth.4236},
 88 | file = {:Users/swil0005/mendeley/mendeley{\_}pdfs/Kiselev et al. - 2017 - SC3 Consensus clustering of single-cell RNA-seq data.pdf:pdf},
 89 | issn = {15487105},
 90 | journal = {Nature Methods},
 91 | number = {5},
 92 | pages = {483--486},
 93 | pmid = {28346451},
 94 | title = {{SC3: Consensus clustering of single-cell RNA-seq data}},
 95 | volume = {14},
 96 | year = {2017}
 97 | }
 98 | @article{Satija2015,
 99 | abstract = {Spatial localization is a key determinant of cellular fate and behavior, but methods for spatially resolved, transcriptome-wide gene expression profiling across complex tissues are lacking. RNA staining methods assay only a small number of transcripts, whereas single-cell RNA-seq, which measures global gene expression, separates cells from their native spatial context. Here we present Seurat, a computational strategy to infer cellular localization by integrating single-cell RNA-seq data with in situ RNA patterns. We applied Seurat to spatially map 851 single cells from dissociated zebrafish (Danio rerio) embryos and generated a transcriptome-wide map of spatial patterning. We confirmed Seurat's accuracy using several experimental approaches, then used the strategy to identify a set of archetypal expression patterns and spatial markers. Seurat correctly localizes rare subpopulations, accurately mapping both spatially restricted and scattered groups. Seurat will be applicable to mapping cellular localization within complex patterned tissues in diverse systems.},
100 | author = {Satija, Rahul and Farrell, Jeffrey A and Gennert, David and Schier, Alexander F and Regev, Aviv},
101 | doi = {10.1038/nbt.3192},
102 | file = {:Users/swil0005/mendeley/mendeley{\_}pdfs/Satija et al. - 2015 - Spatial reconstruction of single-cell gene expression data.pdf:pdf},
103 | isbn = {1546-1696 (Electronic)$\backslash$r1087-0156 (Linking)},
104 | issn = {1087-0156},
105 | journal = {Nature Biotechnology},
106 | number = {5},
107 | pages = {495--502},
108 | pmid = {25867923},
109 | title = {{Spatial reconstruction of single-cell gene expression data}},
110 | url = {http://dx.doi.org/10.1038/nbt.3192},
111 | volume = {33},
112 | year = {2015}
113 | }
114 | @article{Watkins2009,
115 | abstract = {Hematopoiesis is a carefully controlled process that is regulated by complex networks of transcription factors that are, in part, controlled by signals resulting from ligand binding to cell-surface receptors. To further understand hematopoiesis, we have compared gene expression profiles of human erythroblasts, megakaryocytes, B cells, cytotoxic and helper T cells, natural killer cells, granulocytes, and monocytes using whole genome microarrays. A bioinformatics analysis of these data was performed focusing on transcription factors, immunoglobulin superfamily members, and lineage-specific transcripts. We observed that the numbers of lineage-specific genes varies by 2 orders of magnitude, ranging from 5 for cytotoxic T cells to 878 for granulocytes. In addition, we have identified novel coexpression patterns for key transcription factors involved in hematopoiesis (eg, GATA3-GFI1 and GATA2-KLF1). This study represents the most comprehensive analysis of gene expression in hematopoietic cells to date and has identified genes that play key roles in lineage commitment and cell function. The data, which are freely accessible, will be invaluable for future studies on hematopoiesis and the role of specific genes and will also aid the understanding of the recent genome-wide association studies.},
116 | author = {Watkins, Nicholas a and Gusnanto, Arief and de Bono, Bernard and De, Subhajyoti and Miranda-Saavedra, Diego and Hardie, Debbie L and Angenent, Will G J and Attwood, Antony P and Ellis, Peter D and Erber, Wendy and Foad, Nicola S and Garner, Stephen F and Isacke, Clare M and Jolley, Jennifer and Koch, Kerstin and Macaulay, Iain C and Morley, Sarah L and Rendon, Augusto and Rice, Kate M and Taylor, Niall and Thijssen-Timmer, Daphne C and Tijssen, Marloes R and van der Schoot, C Ellen and Wernisch, Lorenz and Winzer, Thilo and Dudbridge, Frank and Buckley, Christopher D and Langford, Cordelia F and Teichmann, Sarah and G{\"{o}}ttgens, Berthold and Ouwehand, Willem H and {Bloodomics Consortium}},
117 | doi = {10.1182/blood-2008-06-162958},
118 | file = {:Users/swil0005/mendeley/mendeley{\_}pdfs/Watkins et al. - 2009 - A HaemAtlas characterizing gene expression in differentiated human blood cells.pdf:pdf},
119 | issn = {1528-0020},
120 | journal = {Blood},
121 | month = {may},
122 | number = {19},
123 | pages = {e1--9},
124 | pmid = {19228925},
125 | title = {{A HaemAtlas: characterizing gene expression in differentiated human blood cells.}},
126 | url = {http://www.ncbi.nlm.nih.gov/pubmed/19228925 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC2680378},
127 | volume = {113},
128 | year = {2009}
129 | }
130 | @article{Zappia2017,
131 | abstract = {As single-cell RNA sequencing (scRNA-seq) technologies have rapidly developed, so have analysis methods. Many methods have been tested, developed, and validated using simulated datasets. Unfortunately, current simulations are often poorly documented, their similarity to real data is not demonstrated, or reproducible code is not available. Here, we present the Splatter Bioconductor package for simple, reproducible, and well-documented simulation of scRNA-seq data. Splatter provides an interface to multiple simulation methods including Splat, our own simulation, based on a gamma-Poisson distribution. Splat can simulate single populations of cells, populations with multiple cell types, or differentiation paths.},
132 | author = {Zappia, Luke and Phipson, Belinda and Oshlack, Alicia},
133 | doi = {10.1186/s13059-017-1305-0},
134 | file = {:Users/swil0005/mendeley/mendeley{\_}pdfs/Zappia, Phipson, Oshlack - 2017 - Splatter Simulation of single-cell RNA sequencing data.pdf:pdf},
135 | issn = {1474760X},
136 | journal = {Genome Biology},
137 | keywords = {RNA-seq,Simulation,Single-cell,Software},
138 | number = {1},
139 | pages = {1--15},
140 | pmid = {28899397},
141 | publisher = {Genome Biology},
142 | title = {{Splatter: Simulation of single-cell RNA sequencing data}},
143 | volume = {18},
144 | year = {2017}
145 | }
146 | @article{Zeisel2015,
147 | abstract = {The mammalian cerebral cortex supports cognitive functions such as sensorimotor integration, memory, and social behaviors. Normal brain function relies on a diverse set of differentiated cell types, including neurons, glia, and vasculature. Here, we have used large-scale single-cell RNA sequencing (RNA-seq) to classify cells in the mouse somatosensory cortex and hippocampal CA1 region. We found 47 molecularly distinct subclasses, comprising all known major cell types in the cortex. We identified numerous marker genes, which allowed alignment with known cell types, morphology, and location. We found a layer I interneuron expressing Pax6 and a distinct postmitotic oligodendrocyte subclass marked by Itpr2. Across the diversity of cortical cell types, transcription factors formed a complex, layered regulatory code, suggesting a mechanism for the maintenance of adult cell type identity.},
148 | archivePrefix = {arXiv},
149 | arxivId = {arXiv:gr-qc/9809069v1},
150 | author = {Zeisel, A. and Manchado, A. B. M. and Codeluppi, S. and Lonnerberg, P. and {La Manno}, G. and Jureus, A. and Marques, S. and Munguba, H. and He, L. and Betsholtz, C. and Rolny, C. and Castelo-Branco, G. and Hjerling-Leffler, J. and Linnarsson, S.},
151 | doi = {10.1126/science.aaa1934},
152 | eprint = {9809069v1},
153 | file = {:Users/swil0005/mendeley/mendeley{\_}pdfs/Zeisel et al. - 2015 - Cell types in the mouse cortex and hippocampus revealed by single-cell RNA-seq.pdf:pdf;:Users/swil0005/mendeley/mendeley{\_}pdfs/Zeisel et al. - 2015 - Cell types in the mouse cortex and hippocampus revealed by single-cell RNA-seq(2).pdf:pdf},
154 | isbn = {1095-9203 (Electronic)$\backslash$r0036-8075 (Linking)},
155 | issn = {0036-8075},
156 | journal = {Science},
157 | keywords = {mgstuff},
158 | mendeley-tags = {mgstuff},
159 | number = {6226},
160 | pages = {1138--42},
161 | pmid = {25700174},
162 | primaryClass = {arXiv:gr-qc},
163 | title = {{Cell types in the mouse cortex and hippocampus revealed by single-cell RNA-seq}},
164 | url = {http://science.sciencemag.org.docelec.univ-lyon1.fr/content/347/6226/1138.abstract},
165 | volume = {347},
166 | year = {2015}
167 | }
168 | 
169 | 
170 | 
171 | 
172 | 


--------------------------------------------------------------------------------
/vignettes/images/pbmc4k_cloupe_kmeans7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MonashBioinformaticsPlatform/celaref/585f2fb96f8d382803cebea3c6fc7adefa8d2054/vignettes/images/pbmc4k_cloupe_kmeans7.png


--------------------------------------------------------------------------------
/vignettes/images/violin_plot_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MonashBioinformaticsPlatform/celaref/585f2fb96f8d382803cebea3c6fc7adefa8d2054/vignettes/images/violin_plot_example.png


--------------------------------------------------------------------------------
/vignettes/images/workflow_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MonashBioinformaticsPlatform/celaref/585f2fb96f8d382803cebea3c6fc7adefa8d2054/vignettes/images/workflow_diagram.png


--------------------------------------------------------------------------------