├── .Rbuildignore ├── .github ├── .gitignore └── workflows │ └── check-standard.yaml ├── .gitignore ├── .travis.yml ├── DESCRIPTION ├── LICENSE ├── NAMESPACE ├── NEWS ├── R ├── activityScores.R ├── aggregate.R ├── cicero.R ├── data.R ├── fromGviz.R ├── plotting.R ├── runCicero.R └── utils.R ├── README.md ├── code-of-conduct.md ├── codecov.yml ├── data ├── cell_data.rda ├── cicero_data.rda ├── gene_annotation_sample.rda └── human.hg19.genome.rda ├── inst └── CITATION ├── man ├── aggregate_by_cell_bin.Rd ├── aggregate_nearby_peaks.Rd ├── annotate_cds_by_site.Rd ├── assemble_connections.Rd ├── build_gene_activity_matrix.Rd ├── cell_data.Rd ├── cicero-package.Rd ├── cicero_data.Rd ├── compare_connections.Rd ├── df_for_coords.Rd ├── estimate_distance_parameter.Rd ├── find_overlapping_ccans.Rd ├── find_overlapping_coordinates.Rd ├── gene_annotation_sample.Rd ├── generate_ccans.Rd ├── generate_cicero_models.Rd ├── human.hg19.genome.Rd ├── make_atac_cds.Rd ├── make_cicero_cds.Rd ├── make_sparse_matrix.Rd ├── normalize_gene_activities.Rd ├── plot_accessibility_in_pseudotime.Rd ├── plot_connections.Rd ├── ranges_for_coords.Rd └── run_cicero.Rd ├── tests ├── cicero_data_sub.txt ├── feat.txt ├── feat_head.txt ├── figs │ ├── deps.txt │ └── plotting │ │ ├── basic-bar-high-breaks.svg │ │ ├── basic-bar-one.svg │ │ ├── basic-bar.svg │ │ ├── basic-connections-all-bp.svg │ │ ├── basic-connections-chr-bp1.svg │ │ ├── basic-connections-chr.svg │ │ ├── basic-connections-comparison-plot.svg │ │ ├── basic-connections-high-comparison-cutoff.svg │ │ ├── basic-connections-high-cutoff.svg │ │ ├── basic-connections-include-axis-track.svg │ │ ├── basic-connections-plot-bad-chr.svg │ │ ├── basic-connections-plot-comparison-cutoff.svg │ │ ├── basic-connections-plot-cutoff.svg │ │ ├── basic-connections-plot-dt.svg │ │ ├── basic-connections-plot-with-viewpoint-change-colors.svg │ │ ├── basic-connections-plot-with-viewpoint-no-comp.svg │ │ ├── basic-connections-plot-with-viewpoint.svg │ │ ├── basic-connections-plot.svg │ │ ├── comparison-connection-color-color-column.svg │ │ ├── comparison-connection-color-comparison-connection-width.svg │ │ ├── comparison-connection-color-type-column-coaccess-no-legend.svg │ │ ├── comparison-connection-color-type-column-coaccess.svg │ │ ├── comparison-connection-color-type-column.svg │ │ ├── comparison-connection-color.svg │ │ ├── comparison-peak-color-color-column.svg │ │ ├── comparison-peak-color-logical-column.svg │ │ ├── comparison-peak-color-type-column.svg │ │ ├── comparison-peak-color.svg │ │ ├── comparison-ymax-plus-cutoff.svg │ │ ├── comparison-ymax.svg │ │ ├── connection-color-color-column.svg │ │ ├── connection-color-connection-width.svg │ │ ├── connection-color-type-column-coaccess-no-legend.svg │ │ ├── connection-color-type-column-coaccess.svg │ │ ├── connection-color-type-column.svg │ │ ├── connection-color.svg │ │ ├── connection-ymax-plus-cutoff.svg │ │ ├── connection-ymax.svg │ │ ├── connections-plot-with-collapsetranscripts-gene.svg │ │ ├── connections-plot-with-collapsetranscripts-longest.svg │ │ ├── connections-plot-with-collapsetranscripts-meta.svg │ │ ├── connections-plot-with-collapsetranscripts-shortest.svg │ │ ├── connections-plot-with-collapsetranscripts-true.svg │ │ ├── connections-plot-with-comparison-color.svg │ │ ├── connections-plot-with-comparison-peak-color-hex.svg │ │ ├── connections-plot-with-comparison-peak-color.svg │ │ ├── connections-plot-with-comparison.svg │ │ ├── connections-plot-with-gene-model-color.svg │ │ ├── connections-plot-with-gene-model-no-genes.svg │ │ ├── connections-plot-with-gene-model-with-comparison.svg │ │ ├── connections-plot-with-gene-model.svg │ │ ├── peak-color-color-column.svg │ │ ├── peak-color-logical-column.svg │ │ ├── peak-color-type-column.svg │ │ └── peak-color.svg ├── human.hg19.genome_sub.txt ├── testthat.R ├── testthat │ ├── _snaps │ │ └── plotting │ │ │ ├── basic-bar-high-breaks.svg │ │ │ ├── basic-bar-one.svg │ │ │ ├── basic-bar.svg │ │ │ ├── basic-connections-all-bp.svg │ │ │ ├── basic-connections-chr-bp1.svg │ │ │ ├── basic-connections-chr.svg │ │ │ ├── basic-connections-comparison-plot.svg │ │ │ ├── basic-connections-high-comparison-cutoff.svg │ │ │ ├── basic-connections-high-cutoff.svg │ │ │ ├── basic-connections-include-axis-track.svg │ │ │ ├── basic-connections-plot-bad-chr.svg │ │ │ ├── basic-connections-plot-comparison-cutoff.svg │ │ │ ├── basic-connections-plot-cutoff.svg │ │ │ ├── basic-connections-plot-dt.svg │ │ │ ├── basic-connections-plot-with-viewpoint-change-colors.svg │ │ │ ├── basic-connections-plot-with-viewpoint-no-comp.svg │ │ │ ├── basic-connections-plot-with-viewpoint.svg │ │ │ ├── basic-connections-plot.svg │ │ │ ├── comparison-connection-color-color-column.svg │ │ │ ├── comparison-connection-color-comparison-connection-width.svg │ │ │ ├── comparison-connection-color-type-column-coaccess-no-legend.svg │ │ │ ├── comparison-connection-color-type-column-coaccess.svg │ │ │ ├── comparison-connection-color-type-column.svg │ │ │ ├── comparison-connection-color.svg │ │ │ ├── comparison-peak-color-color-column.svg │ │ │ ├── comparison-peak-color-logical-column.svg │ │ │ ├── comparison-peak-color-type-column.svg │ │ │ ├── comparison-peak-color.svg │ │ │ ├── comparison-ymax-plus-cutoff.svg │ │ │ ├── comparison-ymax.svg │ │ │ ├── connection-color-color-column.svg │ │ │ ├── connection-color-connection-width.svg │ │ │ ├── connection-color-type-column-coaccess-no-legend.svg │ │ │ ├── connection-color-type-column-coaccess.svg │ │ │ ├── connection-color-type-column.svg │ │ │ ├── connection-color.svg │ │ │ ├── connection-ymax-plus-cutoff.svg │ │ │ ├── connection-ymax.svg │ │ │ ├── connections-plot-with-collapsetranscripts-gene.svg │ │ │ ├── connections-plot-with-collapsetranscripts-longest.svg │ │ │ ├── connections-plot-with-collapsetranscripts-meta.svg │ │ │ ├── connections-plot-with-collapsetranscripts-shortest.svg │ │ │ ├── connections-plot-with-collapsetranscripts-true.svg │ │ │ ├── connections-plot-with-comparison-color.svg │ │ │ ├── connections-plot-with-comparison-peak-color-hex.svg │ │ │ ├── connections-plot-with-comparison-peak-color.svg │ │ │ ├── connections-plot-with-comparison.svg │ │ │ ├── connections-plot-with-gene-model-color.svg │ │ │ ├── connections-plot-with-gene-model-no-genes.svg │ │ │ ├── connections-plot-with-gene-model-with-comparison.svg │ │ │ ├── connections-plot-with-gene-model.svg │ │ │ ├── peak-color-color-column.svg │ │ │ ├── peak-color-logical-column.svg │ │ │ ├── peak-color-type-column.svg │ │ │ └── peak-color.svg │ ├── test-aggregate.R │ ├── test-plotting.R │ ├── test-runCicero.R │ └── test-utils.R └── tsne_coord.Rdata └── vignettes └── website.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^\.travis\.yml$ 4 | ^codecov\.yml$ 5 | ^\.github$ 6 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/workflows/check-standard.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | schedule: 5 | - cron: '0 0 1 * *' 6 | push: 7 | branches: [main, master, monocle3] 8 | pull_request: 9 | branches: [main, master, monocle3] 10 | 11 | name: R-CMD-check 12 | 13 | jobs: 14 | R-CMD-check: 15 | runs-on: ${{ matrix.config.os }} 16 | 17 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 18 | 19 | strategy: 20 | fail-fast: false 21 | matrix: 22 | config: 23 | - {os: macos-latest, r: 'release'} 24 | - {os: windows-latest, r: 'release'} 25 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 26 | - {os: ubuntu-latest, r: 'release'} 27 | 28 | env: 29 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 30 | R_KEEP_PKG_SOURCE: yes 31 | 32 | steps: 33 | - uses: actions/checkout@v3 34 | 35 | - uses: r-lib/actions/setup-pandoc@v2 36 | 37 | - uses: r-lib/actions/setup-r@v2 38 | with: 39 | r-version: ${{ matrix.config.r }} 40 | http-user-agent: ${{ matrix.config.http-user-agent }} 41 | use-public-rspm: true 42 | 43 | - uses: r-lib/actions/setup-r-dependencies@v2 44 | with: 45 | extra-packages: any::rcmdcheck 46 | needs: check 47 | 48 | - uses: r-lib/actions/check-r-package@v2 49 | with: 50 | upload-snapshots: true 51 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | cicero.Rproj 6 | .DS_Store 7 | cicero-release.Rproj 8 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | cache: packages 2 | sudo: required 3 | warnings_are_errors: true 4 | 5 | before_install: [ 6 | "sudo add-apt-repository \"deb http://archive.ubuntu.com/ubuntu/ xenial main\" -y", 7 | "sudo apt-get update -q", 8 | "sudo apt-get install libfreetype6" 9 | ] 10 | 11 | language: r 12 | r: 13 | - bioc-release 14 | 15 | r_binary_packages: 16 | - Matrix 17 | - boot 18 | - class 19 | - cluster 20 | - codetools 21 | - colorspace 22 | - foreign 23 | - gtable 24 | - kernsmooth 25 | - lattice 26 | - littler 27 | - mass 28 | - mgcv 29 | - nlme 30 | - nnet 31 | - rpart 32 | - spatial 33 | - survival 34 | 35 | after_success: 36 | - Rscript -e 'covr::codecov()' 37 | 38 | notifications: 39 | slack: 40 | secure: sFOlT9BYAUZLqt+RDtLdbGbG411FU56IEALGIoPiHHhmCoWEbyaVJfhasRh5SZShhAjUvxiTMPkA05prjrQBHkzVozkPhNPUZSwezp5zaKpsQerBVHtc5jN5xaZinvqLK0TNKk1zq5pnBMwcRg54RVQe/petTsiuttxkmyJSAgztiterifMnhUefaJURNvd2AYvi57agUhu41j3xQnarnXdJR8YZtAaXDoYdC9kckJwrf6ZsBBWCe/P0kg1YZt/86SiCGidcjmNOqbAud7/tKJKzjFaU2R1v8ecyZiGSXWFVfbUt/27Ur+8IzJiV0Qn08Ru6TwJSqS/fbc+2vWYo6MWKkfZtK4HhoWsYXyk7jnOP2DTynFMidthGR+qxLxpBuvQbGtPRgcm1DVjDl2Z3MqXz1RbAmnbCDOWNF21go1yIAKCxgHd2a/2TtLsbJsZwyGwSmrxhoF+CH4kJ2mgKarwyKGTLs5L6/eQfXR31hQc0h0eKMLlBkweTkxOguCrO30cuQvZ+bm/pFmR9doyZLP23/cYbnu0AsxgOa/SJ57MjDG2gKYmuzlBXPhNl30M9OIc0P1WRNHmk6aW4wZn6VKYBOAq1gGMM2k8g5x/OPM01R2c5ZJ7Fwizh4kaocXydfxGoJOSO1JPE+r/G339GgTuXyEekgA0dcF0j3recLyQ= 41 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: cicero 2 | Type: Package 3 | Title: Predict cis-co-accessibility from single-cell chromatin accessibility data 4 | Version: 1.17.2 5 | Authors@R: c( 6 | person("Hannah", "Pliner", email = "hpliner@uw.edu", role = c("aut", "cre")), 7 | person("Cole", "Trapnell", email = "coletrap@uw.edu", role = c("aut"))) 8 | Description: Cicero computes putative cis-regulatory maps from single-cell chromatin 9 | accessibility data. It also extends monocle 2 for use in chromatin accessibility 10 | data. 11 | Depends: 12 | R (>= 3.5.0), 13 | monocle, 14 | Gviz (>= 1.22.3) 15 | License: MIT + file LICENSE 16 | Encoding: UTF-8 17 | Imports: 18 | assertthat (>= 0.2.0), 19 | Biobase (>= 2.37.2), 20 | BiocGenerics (>= 0.23.0), 21 | data.table (>= 1.10.4), 22 | dplyr (>= 0.7.4), 23 | FNN (>= 1.1), 24 | GenomicRanges (>= 1.30.3), 25 | ggplot2 (>= 2.2.1), 26 | glasso (>= 1.8), 27 | grDevices, 28 | igraph (>= 1.1.0), 29 | IRanges (>= 2.10.5), 30 | Matrix (>= 1.2-12), 31 | methods, 32 | parallel, 33 | plyr (>= 1.8.4), 34 | reshape2 (>= 1.4.3), 35 | S4Vectors (>= 0.14.7), 36 | stats, 37 | stringi, 38 | stringr (>= 1.2.0), 39 | tibble (>= 1.4.2), 40 | tidyr, 41 | VGAM (>= 1.0-5), 42 | utils 43 | RoxygenNote: 7.2.3 44 | Suggests: 45 | AnnotationDbi (>= 1.38.2), 46 | knitr, 47 | markdown, 48 | rmarkdown, 49 | rtracklayer (>= 1.36.6), 50 | testthat, 51 | vdiffr (>= 0.2.3), 52 | covr 53 | VignetteBuilder: knitr 54 | biocViews: Sequencing, Clustering, CellBasedAssays, ImmunoOncology, 55 | GeneRegulation, GeneTarget, Epigenetics, ATACSeq, SingleCell 56 | LazyData: true 57 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Hannah Pliner and Cole Trapnell 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(aggregate_by_cell_bin) 4 | export(aggregate_nearby_peaks) 5 | export(annotate_cds_by_site) 6 | export(assemble_connections) 7 | export(build_gene_activity_matrix) 8 | export(compare_connections) 9 | export(df_for_coords) 10 | export(estimate_distance_parameter) 11 | export(find_overlapping_ccans) 12 | export(find_overlapping_coordinates) 13 | export(generate_ccans) 14 | export(generate_cicero_models) 15 | export(make_atac_cds) 16 | export(make_cicero_cds) 17 | export(normalize_gene_activities) 18 | export(plot_accessibility_in_pseudotime) 19 | export(plot_connections) 20 | export(ranges_for_coords) 21 | export(run_cicero) 22 | import(Gviz) 23 | import(VGAM) 24 | import(data.table) 25 | import(ggplot2) 26 | import(monocle) 27 | importFrom(Biobase,"fData<-") 28 | importFrom(Biobase,"pData<-") 29 | importFrom(Biobase,ExpressionSet) 30 | importFrom(Biobase,annotatedDataFrameFrom) 31 | importFrom(Biobase,assayDataNew) 32 | importFrom(Biobase,exprs) 33 | importFrom(Biobase,fData) 34 | importFrom(Biobase,multiassign) 35 | importFrom(Biobase,pData) 36 | importFrom(BiocGenerics,estimateDispersions) 37 | importFrom(BiocGenerics,estimateSizeFactors) 38 | importFrom(IRanges,findOverlaps) 39 | importFrom(data.table,melt.data.table) 40 | importFrom(dplyr,"%>%") 41 | importFrom(grDevices,col2rgb) 42 | importFrom(grDevices,dev.cur) 43 | importFrom(grDevices,dev.off) 44 | importFrom(grDevices,palette) 45 | importFrom(grDevices,rainbow) 46 | importFrom(methods,as) 47 | importFrom(methods,callNextMethod) 48 | importFrom(methods,is) 49 | importFrom(methods,new) 50 | importFrom(plyr,.) 51 | importFrom(stats,as.formula) 52 | importFrom(stats,cov) 53 | importFrom(stats,dist) 54 | importFrom(stats,filter) 55 | importFrom(stats,median) 56 | importFrom(utils,combn) 57 | importFrom(utils,read.table) 58 | -------------------------------------------------------------------------------- /NEWS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cole-trapnell-lab/cicero-release/07f8731b2d2029ab774621b768b20259238ede4d/NEWS -------------------------------------------------------------------------------- /R/activityScores.R: -------------------------------------------------------------------------------- 1 | #' Calculate initial Cicero gene activity matrix 2 | #' 3 | #' This function calculates the initial Cicero gene activity matrix. After this 4 | #' function, the activity matrix should be normalized with any comparison 5 | #' matrices using the function \code{\link{normalize_gene_activities}}. 6 | #' 7 | #' @param input_cds Binary sci-ATAC-seq input CDS. The input CDS must have a 8 | #' column in the fData table called "gene" which is the gene name if the 9 | #' site is a promoter, and \code{NA} if the site is distal. 10 | #' @param cicero_cons_info Cicero connections table, generally the output of 11 | #' \code{\link{run_cicero}}. This table is a data frame with three required 12 | #' columns named "Peak1", "Peak2", and "coaccess". Peak1 and Peak2 contain 13 | #' coordinates for the two compared elements, and coaccess contains their 14 | #' Cicero co-accessibility score. 15 | #' @param site_weights NULL or an individual weight for each site in input_cds. 16 | #' @param dist_thresh The maximum distance in base pairs between pairs of sites 17 | #' to include in the gene activity calculation. 18 | #' @param coaccess_cutoff The minimum Cicero co-accessibility score that should 19 | #' be considered connected. 20 | #' 21 | #' @return Unnormalized gene activity matrix. 22 | #' @export 23 | #' 24 | #' @examples 25 | #' data("cicero_data") 26 | #' data("human.hg19.genome") 27 | #' sample_genome <- subset(human.hg19.genome, V1 == "chr18") 28 | #' sample_genome$V2[1] <- 100000 29 | #' input_cds <- make_atac_cds(cicero_data, binarize = TRUE) 30 | #' input_cds <- detectGenes(input_cds) 31 | #' input_cds <- reduceDimension(input_cds, max_components = 2, num_dim=6, 32 | #' reduction_method = 'tSNE', 33 | #' norm_method = "none") 34 | #' tsne_coords <- t(reducedDimA(input_cds)) 35 | #' row.names(tsne_coords) <- row.names(pData(input_cds)) 36 | #' cicero_cds <- make_cicero_cds(input_cds, 37 | #' reduced_coordinates = tsne_coords) 38 | #' cons <- run_cicero(cicero_cds, sample_genome, sample_num=2) 39 | #' 40 | #' data(gene_annotation_sample) 41 | #' gene_annotation_sub <- gene_annotation_sample[,c(1:3, 8)] 42 | #' names(gene_annotation_sub)[4] <- "gene" 43 | #' input_cds <- annotate_cds_by_site(input_cds, gene_annotation_sub) 44 | #' num_genes <- pData(input_cds)$num_genes_expressed 45 | #' names(num_genes) <- row.names(pData(input_cds)) 46 | #' unnorm_ga <- build_gene_activity_matrix(input_cds, cons) 47 | #' 48 | #' 49 | build_gene_activity_matrix <- function(input_cds, 50 | cicero_cons_info, 51 | site_weights=NULL, 52 | dist_thresh=250000, 53 | coaccess_cutoff=0.25){ 54 | assertthat::assert_that(is(input_cds, "CellDataSet")) 55 | assertthat::assert_that(is.data.frame(cicero_cons_info)) 56 | assertthat::assert_that(assertthat::has_name(cicero_cons_info, "Peak1"), 57 | assertthat::has_name(cicero_cons_info, "Peak2"), 58 | assertthat::has_name(cicero_cons_info, "coaccess")) 59 | 60 | assertthat::assert_that(assertthat::has_name(fData(input_cds), "gene"), 61 | msg = paste("The fData table of the input CDS must", 62 | "have a column called 'gene'. See", 63 | "documentation for details.", 64 | collapse=" ")) 65 | 66 | accessibility_mat <- exprs(input_cds) 67 | if (is.null(site_weights)) { 68 | site_weights <- Matrix::rowMeans(accessibility_mat) / 69 | Matrix::rowMeans(accessibility_mat) 70 | site_weights[names(site_weights)] <- 1 71 | } 72 | 73 | gene_promoter_activity <- 74 | build_composite_gene_activity_matrix(input_cds, 75 | site_weights, 76 | cicero_cons_info, 77 | dist_thresh=dist_thresh, 78 | coaccess_cutoff=coaccess_cutoff) 79 | 80 | 81 | gene_activity_scores <- gene_promoter_activity 82 | 83 | return(gene_activity_scores) 84 | } 85 | 86 | build_composite_gene_activity_matrix <- function(input_cds, 87 | site_weights, 88 | cicero_cons_info, 89 | dist_thresh=250000, 90 | coaccess_cutoff=0.25) { 91 | accessibility_mat <- exprs(input_cds) 92 | promoter_peak_table <- fData(input_cds) 93 | promoter_peak_table$peak <- as.character(row.names(promoter_peak_table)) 94 | promoter_peak_table <- 95 | promoter_peak_table[!is.na(promoter_peak_table$gene),] 96 | promoter_peak_table <- promoter_peak_table[,c("peak", "gene")] 97 | promoter_peak_table$gene <- as.character(promoter_peak_table$gene) 98 | 99 | # Make site_weight matrix 100 | site_names <- names(site_weights) 101 | site_weights <- as(Matrix::Diagonal(x=as.numeric(site_weights)), 102 | "sparseMatrix") 103 | row.names(site_weights) <- site_names 104 | colnames(site_weights) <- site_names 105 | 106 | # Find distance between cicero peaks. If distance already calculated, skip 107 | if ("dist" %in% colnames(cicero_cons_info) == FALSE) { 108 | Peak1_cols <- split_peak_names(cicero_cons_info$Peak1) 109 | Peak2_cols <- split_peak_names(cicero_cons_info$Peak2) 110 | Peak1_bp <- round((as.integer(Peak1_cols[,3]) + 111 | as.integer(Peak1_cols[,2])) / 2) 112 | Peak2_bp <- round((as.integer(Peak2_cols[,3]) + 113 | as.integer(Peak2_cols[,2])) / 2) 114 | cicero_cons_info$dist <- abs(Peak2_bp - Peak1_bp) 115 | } 116 | 117 | # Get connections between promoters and distal sites above coaccess 118 | # threshold 119 | nonneg_cons <- 120 | cicero_cons_info[(cicero_cons_info$Peak1 %in% 121 | promoter_peak_table$peak | 122 | cicero_cons_info$Peak2 %in% 123 | promoter_peak_table$peak) & 124 | cicero_cons_info$coaccess >= coaccess_cutoff & 125 | cicero_cons_info$dist < dist_thresh,] 126 | nonneg_cons <- nonneg_cons[,c("Peak1", "Peak2", "coaccess")] 127 | nonneg_cons <- nonneg_cons[!duplicated(nonneg_cons),] 128 | 129 | nonneg_cons$Peak1 <- as.character(nonneg_cons$Peak1) 130 | nonneg_cons$Peak2 <- as.character(nonneg_cons$Peak2) 131 | 132 | nonneg_cons <- rbind(nonneg_cons, 133 | data.frame(Peak1=unique(promoter_peak_table$peak), 134 | Peak2=unique(promoter_peak_table$peak), 135 | coaccess=0)) 136 | 137 | # Make square matrix of connections from distal to proximal 138 | distal_connectivity_matrix <- make_sparse_matrix(nonneg_cons, 139 | x.name="coaccess") 140 | 141 | # Make connectivity matrix of promoters versus all 142 | promoter_conn_matrix <- 143 | distal_connectivity_matrix[unique(promoter_peak_table$peak),] 144 | 145 | # Get list of promoter and distal sites in accessibility mat 146 | promoter_safe_sites <- intersect(rownames(promoter_conn_matrix), 147 | row.names(accessibility_mat)) 148 | distal_safe_sites <- intersect(colnames(promoter_conn_matrix), 149 | row.names(accessibility_mat)) 150 | distal_safe_sites <- setdiff(distal_safe_sites, promoter_safe_sites) 151 | 152 | # Get accessibility info for promoters 153 | promoter_access_mat_in_cicero_map <- accessibility_mat[promoter_safe_sites,, drop=FALSE] 154 | 155 | # Get accessibility for distal sites 156 | distal_activity_scores <- accessibility_mat[distal_safe_sites,, drop=FALSE] 157 | 158 | # Scale connectivity matrix by site_weights 159 | scaled_site_weights <- site_weights[distal_safe_sites,distal_safe_sites, drop=FALSE] 160 | total_linked_site_weights <- promoter_conn_matrix[,distal_safe_sites, drop=FALSE] %*% 161 | scaled_site_weights 162 | total_linked_site_weights <- 1/Matrix::rowSums(total_linked_site_weights, 163 | na.rm=TRUE) 164 | total_linked_site_weights[is.finite(total_linked_site_weights) == FALSE] <- 0 165 | total_linked_site_weights[is.na(total_linked_site_weights)] <- 0 166 | total_linked_site_weights[is.nan(total_linked_site_weights)] <- 0 167 | site_names <- names(total_linked_site_weights) 168 | total_linked_site_weights <- Matrix::Diagonal(x=total_linked_site_weights) 169 | row.names(total_linked_site_weights) <- site_names 170 | colnames(total_linked_site_weights) <- site_names 171 | scaled_site_weights <- total_linked_site_weights %*% 172 | promoter_conn_matrix[,distal_safe_sites, drop=FALSE] %*% 173 | scaled_site_weights 174 | scaled_site_weights@x[scaled_site_weights@x > 1] <- 1 175 | 176 | # Multiply distal accessibility by site weights 177 | distal_activity_scores <- scaled_site_weights %*% distal_activity_scores 178 | 179 | distal_activity_scores <- 180 | distal_activity_scores[row.names(promoter_access_mat_in_cicero_map),, drop=FALSE] 181 | 182 | # Sum distal and promoter scores 183 | promoter_activity_scores <- distal_activity_scores + 184 | promoter_access_mat_in_cicero_map 185 | 186 | # Make and populate final matrix 187 | promoter_gene_mat <- 188 | Matrix::sparseMatrix(j=as.numeric(factor(promoter_peak_table$peak)), 189 | i=as.numeric(factor(promoter_peak_table$gene)), 190 | x=1) 191 | colnames(promoter_gene_mat) = levels(factor(promoter_peak_table$peak)) 192 | row.names(promoter_gene_mat) = levels(factor(promoter_peak_table$gene)) 193 | promoter_gene_mat <- promoter_gene_mat[,row.names(promoter_activity_scores)] 194 | gene_activity_scores <- promoter_gene_mat %*% promoter_activity_scores 195 | 196 | return(gene_activity_scores) 197 | } 198 | 199 | #' Normalize gene activities 200 | #' 201 | #' Normalize the output of \code{\link{build_gene_activity_matrix}}. Input is 202 | #' either one or multiple gene activity matrices. Any gene activities to be 203 | #' compared amongst each other should be normalized together. 204 | #' 205 | #' 206 | #' @param activity_matrices A gene activity matrix, output from 207 | #' \code{\link{build_gene_activity_matrix}}, or a list of gene activity 208 | #' matrices to be normalized together. 209 | #' @param cell_num_genes A named vector of the total number of accessible sites 210 | #' per cell. Names should correspond to the cell names in the activity 211 | #' matrices. These values can be found in the "num_genes_expressed" column 212 | #' of the pData table of the CDS used to calculate the gene activity matrix. 213 | #' 214 | #' @return Normalized activity matrix or matrices. 215 | #' @export 216 | #' 217 | #' @examples 218 | #' data("cicero_data") 219 | #' data("human.hg19.genome") 220 | #' sample_genome <- subset(human.hg19.genome, V1 == "chr18") 221 | #' sample_genome$V2[1] <- 100000 222 | #' input_cds <- make_atac_cds(cicero_data, binarize = TRUE) 223 | #' input_cds <- detectGenes(input_cds) 224 | #' input_cds <- reduceDimension(input_cds, max_components = 2, num_dim=6, 225 | #' reduction_method = 'tSNE', 226 | #' norm_method = "none") 227 | #' tsne_coords <- t(reducedDimA(input_cds)) 228 | #' row.names(tsne_coords) <- row.names(pData(input_cds)) 229 | #' cicero_cds <- make_cicero_cds(input_cds, 230 | #' reduced_coordinates = tsne_coords) 231 | #' cons <- run_cicero(cicero_cds, sample_genome, sample_num=2) 232 | #' 233 | #' data(gene_annotation_sample) 234 | #' gene_annotation_sub <- gene_annotation_sample[,c(1:3, 8)] 235 | #' names(gene_annotation_sub)[4] <- "gene" 236 | #' input_cds <- annotate_cds_by_site(input_cds, gene_annotation_sub) 237 | #' num_genes <- pData(input_cds)$num_genes_expressed 238 | #' names(num_genes) <- row.names(pData(input_cds)) 239 | #' unnorm_ga <- build_gene_activity_matrix(input_cds, cons) 240 | #' cicero_gene_activities <- normalize_gene_activities(unnorm_ga, num_genes) 241 | #' 242 | #' 243 | normalize_gene_activities <- function(activity_matrices, 244 | cell_num_genes){ 245 | if (!is.list(activity_matrices)) { 246 | scores <- activity_matrices 247 | normalization_df <- data.frame(cell = colnames(activity_matrices), 248 | cell_group=1) 249 | } else { 250 | scores <- do.call(cbind, activity_matrices) 251 | 252 | normalization_df <- 253 | do.call(rbind, 254 | lapply(seq_along(activity_matrices), 255 | function(x) { 256 | data.frame(cell = colnames(activity_matrices[[x]]), 257 | cell_group=rep(x, ncol(activity_matrices[[x]]))) 258 | })) 259 | } 260 | 261 | scores <- scores[Matrix::rowSums(scores) != 0, Matrix::colSums(scores) != 0] 262 | normalization_df$cell_group <- factor(normalization_df$cell_group) 263 | normalization_df$total_activity <- Matrix::colSums(scores) 264 | normalization_df$total_sites <- 265 | cell_num_genes[as.character(normalization_df$cell)] 266 | 267 | if (!is.list(activity_matrices)) { 268 | activity_model <- stats::lm(log(total_activity) ~ log(total_sites), 269 | data=normalization_df) 270 | } else { 271 | activity_model <- stats::lm(log(total_activity) ~ log(total_sites) * 272 | cell_group, data=normalization_df) 273 | } 274 | 275 | normalization_df$fitted_curve <- exp(as.vector(predict(activity_model, 276 | type="response"))) 277 | 278 | size_factors <- log(normalization_df$fitted_curve) / 279 | mean(log(normalization_df$fitted_curve)) 280 | 281 | size_factors <- Matrix::Diagonal(x=1/size_factors) 282 | row.names(size_factors) <- normalization_df$cell 283 | colnames(size_factors) <- row.names(size_factors) 284 | 285 | # Adjust the scores by the size factors 286 | scores <- Matrix::t(size_factors %*% Matrix::t(scores)) 287 | 288 | scores@x <- pmin(1e9, exp(scores@x) - 1) 289 | 290 | sum_activity_scores <- Matrix::colSums(scores) 291 | 292 | scale_factors <- Matrix::Diagonal(x=1/sum_activity_scores) 293 | row.names(scale_factors) <- normalization_df$cell 294 | colnames(scale_factors) <- row.names(scale_factors) 295 | 296 | scores <- Matrix::t(scale_factors %*% Matrix::t(scores)) 297 | 298 | if (!is.list(activity_matrices)) { 299 | ret <- scores[row.names(activity_matrices), colnames(activity_matrices)] 300 | } else { 301 | ret <- lapply(activity_matrices, function(x) { 302 | scores[row.names(x), colnames(x)] 303 | }) 304 | } 305 | return(ret) 306 | } 307 | -------------------------------------------------------------------------------- /R/aggregate.R: -------------------------------------------------------------------------------- 1 | #' Make an aggregate count cds by collapsing nearby peaks 2 | #' 3 | #' @param cds A CellDataSet (CDS) object. For example, output of 4 | #' \code{\link{make_atac_cds}} 5 | #' @param distance The distance within which peaks should be collapsed 6 | #' 7 | #' @return A CDS object with aggregated peaks. 8 | #' 9 | #' @details This function takes an input CDS object and collapses features 10 | #' within a given distance by summing the values for the collapsed features. 11 | #' Ranges of features are determined by their feature name, so the feature 12 | #' names must be in the form "chr1:1039013-2309023". 13 | #' 14 | #' @export 15 | #' 16 | #' @examples 17 | #' data("cicero_data") 18 | #' input_cds <- make_atac_cds(cicero_data, binarize = TRUE) 19 | #' agg_cds <- aggregate_nearby_peaks(input_cds, distance = 10000) 20 | #' 21 | aggregate_nearby_peaks <- function(cds, 22 | distance = 1000) { 23 | assertthat::assert_that(assertthat::is.number(distance)) 24 | assertthat::assert_that(is(cds, "CellDataSet")) 25 | 26 | fData(cds)$bin <- make_bin_col(cds, distance) 27 | cds <- cds[!is.na(fData(cds)$bin),] 28 | 29 | exprs_dt <- sparse_to_datatable(Matrix::Matrix(exprs(cds), sparse = TRUE)) 30 | bin_info <- data.table::data.table(site = row.names(fData(cds)), 31 | bin = fData(cds)$bin) 32 | data.table::setkey(bin_info, "site") 33 | data.table::setkey(exprs_dt, "site") 34 | exprs_dt <- merge(exprs_dt, bin_info) 35 | 36 | data.table::setkey(exprs_dt, "cell", "bin") 37 | genomic_bins <- exprs_dt[,sum(val), by="cell,bin"] 38 | out <- Matrix::sparseMatrix(j=as.numeric(factor(genomic_bins$cell)), 39 | i=as.numeric(factor(genomic_bins$bin)), 40 | x=genomic_bins$V1) 41 | 42 | match_table <- 43 | data.table::data.table(num = as.numeric(factor(genomic_bins$bin)), 44 | name = genomic_bins$bin) 45 | match_table <- unique(match_table) 46 | 47 | match_table2 <- 48 | data.table::data.table(num = as.numeric(factor(genomic_bins$cell)), 49 | name = genomic_bins$cell) 50 | match_table2 <- unique(match_table2) 51 | 52 | fdf <- data.frame(site_name = levels(factor(genomic_bins$bin)), 53 | row.names = levels(factor(genomic_bins$bin))) 54 | pdf <- data.frame(cells = levels(factor(genomic_bins$cell)), 55 | row.names = levels(factor(genomic_bins$cell))) 56 | fdf$bin <- NULL 57 | pdf <- pdf[row.names(pData(cds)),] 58 | pdf <- cbind(pdf, pData(cds)) 59 | pdf$pdf <- NULL 60 | 61 | data.table::setorder(match_table, "num") 62 | row.names(out) <- match_table$name 63 | 64 | data.table::setorder(match_table2, "num") 65 | colnames(out) <- match_table2$name 66 | 67 | out <- out[row.names(fdf), row.names(pdf)] 68 | 69 | fd <- new("AnnotatedDataFrame", data = fdf) 70 | pd <- new("AnnotatedDataFrame", data = pdf) 71 | 72 | if (is(exprs(cds), "dgCMatrix")) { 73 | compart_cds <- suppressWarnings(newCellDataSet(as(out, "sparseMatrix"), 74 | phenoData = pd, 75 | featureData = fd, 76 | expressionFamily=negbinomial.size(), 77 | lowerDetectionLimit=0)) 78 | } else { 79 | compart_cds <- suppressWarnings(newCellDataSet(as.matrix(out), 80 | phenoData = pd, 81 | featureData = fd, 82 | expressionFamily=negbinomial.size(), 83 | lowerDetectionLimit=0)) 84 | } 85 | 86 | return(compart_cds) 87 | } 88 | 89 | 90 | make_bin_col <- function(cds, distance) { 91 | coords_string_df <- df_for_coords(row.names(exprs(cds))) 92 | names(coords_string_df)[2:3] <- c("start", "stop") 93 | coords_ranges <- GenomicRanges::makeGRangesFromDataFrame(coords_string_df) 94 | coords_range_merge <- GenomicRanges::reduce(coords_ranges, 95 | min.gapwidth = distance) 96 | 97 | merge_df <- data.frame(seqnames=GenomicRanges::seqnames(coords_range_merge), 98 | starts=GenomicRanges::start(coords_range_merge), 99 | ends=GenomicRanges::end(coords_range_merge)) 100 | merge_df$name <- paste(merge_df$seqnames, 101 | merge_df$starts, 102 | merge_df$ends, sep="_") 103 | 104 | overlaps <- GenomicRanges::findOverlaps(coords_ranges, 105 | coords_range_merge, 106 | select="first") 107 | overlaps <- as.data.frame(overlaps) 108 | 109 | merge_df <- merge_df[overlaps$overlaps,] 110 | merge_df$name 111 | } 112 | 113 | sparse_to_datatable <- function(sparse) { 114 | dgt_mat <- as(Matrix::t(sparse), "dgTMatrix") 115 | dt <- data.table(cell = dgt_mat@Dimnames[[1]][dgt_mat@i+1], 116 | site=dgt_mat@Dimnames[[2]][dgt_mat@j+1], 117 | val = dgt_mat@x) 118 | setkey(dt, "site", "cell") 119 | dt 120 | } 121 | 122 | #' Aggregate count CDS by groups of cells 123 | #' 124 | #' Aggregates a CDS based on an indicator column in the \code{pData} table 125 | #' 126 | #' @importFrom dplyr %>% 127 | #' @importFrom plyr . 128 | #' @param cds A CDS object to be aggregated 129 | #' @param group_col The name of the column in the \code{pData} table that 130 | #' indicates the cells assignment to its aggregate bin. 131 | #' 132 | #' @details This function takes an input CDS object and collapses cells based 133 | #' on a column in the \code{pData} table by summing the values within the 134 | #' cell group. 135 | #' 136 | #' @return A count cds aggregated by group_col 137 | #' @export 138 | #' 139 | #' @examples 140 | #' data("cicero_data") 141 | #' #input_cds <- make_atac_cds(cicero_data, binarize = TRUE) 142 | #' #pData(input_cds)$cell_subtype <- rep(1:10, times=20) 143 | #' #binned_input_lin <-aggregate_by_cell_bin(input_cds, "cell_subtype") 144 | #' 145 | aggregate_by_cell_bin <- function(cds, group_col) { 146 | assertthat::assert_that(is(cds, "CellDataSet")) 147 | assertthat::assert_that(is.character(group_col)) 148 | assertthat::assert_that(group_col %in% names(pData(cds)), 149 | msg = "group_col is missing from your pData table") 150 | 151 | pData_grouping <- pData(cds) %>% 152 | tibble::rownames_to_column() %>% 153 | dplyr::group_by_at(group_col) 154 | 155 | cell_bins <- pData_grouping %>% dplyr::do(agg_cells(exprs(cds)[,.$rowname])) 156 | var_cols <- setdiff(colnames(cell_bins), c("site", "compartment_count")) 157 | 158 | agg_counts <- reshape2::dcast(cell_bins, 159 | as.formula(paste("site", "~", 160 | paste(var_cols, collapse="+"))), 161 | value.var="compartment_count") 162 | 163 | pData_cols <- as.data.frame(pData_grouping %>% 164 | dplyr::group_by_at(group_col) %>% 165 | dplyr::add_tally() %>% 166 | dplyr::summarise_if(is.numeric, 167 | mean, 168 | na.rm = TRUE)) 169 | 170 | rownames(pData_cols) <- colnames(agg_counts)[-1] 171 | 172 | fdf <- data.frame(site_name = agg_counts$site, row.names = agg_counts$site) 173 | 174 | bin_names <- colnames(agg_counts)[-1] 175 | 176 | pdf <- pData_cols 177 | 178 | fd <- new("AnnotatedDataFrame", data = fdf) 179 | pd <- new("AnnotatedDataFrame", data = pdf) 180 | out <- agg_counts[,bin_names] 181 | 182 | compart_cds <- suppressWarnings(newCellDataSet(as.matrix(out), 183 | phenoData = pd, 184 | featureData = fd, 185 | expressionFamily=negbinomial.size(), 186 | lowerDetectionLimit=0)) 187 | 188 | compart_cds <- detectGenes(compart_cds, min_expr=0.1) 189 | compart_cds <- estimateSizeFactorsSimp(compart_cds) 190 | compart_cds <- estimateDispersionsSimp(compart_cds) 191 | 192 | fData(compart_cds)$use_for_ordering <- FALSE 193 | 194 | compart_cds 195 | } 196 | 197 | agg_cells <- function(exprs_mat){ 198 | cell_bins <- data.frame(compartment_count=Matrix::rowSums(exprs_mat)) 199 | cell_bins$site <- row.names(exprs_mat) 200 | return (cell_bins) 201 | } 202 | -------------------------------------------------------------------------------- /R/cicero.R: -------------------------------------------------------------------------------- 1 | #' cicero 2 | #' 3 | #' @import monocle 4 | #' @import VGAM 5 | #' @import data.table 6 | #' @import ggplot2 7 | #' @importFrom Biobase exprs pData fData ExpressionSet annotatedDataFrameFrom 8 | #' multiassign assayDataNew "fData<-" "pData<-" 9 | #' @importFrom grDevices col2rgb dev.cur dev.off palette rainbow 10 | #' @importFrom methods as callNextMethod is new 11 | #' @importFrom stats as.formula cov dist filter median 12 | #' @importFrom utils combn read.table 13 | #' @importFrom BiocGenerics estimateDispersions estimateSizeFactors 14 | "_PACKAGE" 15 | 16 | ## quiets concerns of R CMD check re: the .'s that appear in pipelines 17 | #utils::globalVariables(c(".")) 18 | 19 | ## temporary until i figure out a fix 20 | #utils::globalVariables(c("val", "value", "CCAN", "V1", "f_id")) 21 | -------------------------------------------------------------------------------- /R/data.R: -------------------------------------------------------------------------------- 1 | #' Example single-cell chromatin accessibility data 2 | #' 3 | #' A dataset containing a subset of a single-cell ATAC-seq 4 | #' dataset collected on Human Skeletal Muscle Myoblasts. 5 | #' Only includes data from chromosome 18. 6 | #' 7 | #' @format A data frame with 35137 rows and 3 variables: 8 | #' \describe{ 9 | #' \item{Peak}{Peak information} 10 | #' \item{Cell}{Cell ID} 11 | #' \item{Count}{Reads per cell per peak} 12 | #' } 13 | #' @usage data(cicero_data) 14 | "cicero_data" 15 | 16 | #' Chromosome lengths from human genome hg19 17 | #' 18 | #' A list of the chromosomes in hg19 and their lengths 19 | #' in base pairs. 20 | #' 21 | #' @format A data frame with 93 rows and 2 variables: 22 | #' \describe{ 23 | #' \item{V1}{Chromosome} 24 | #' \item{V2}{Chromosome length, base pairs} 25 | #' } 26 | #' @usage data(human.hg19.genome) 27 | "human.hg19.genome" 28 | 29 | #' Example gene annotation information 30 | #' 31 | #' Gencode gene annotation data from chromosome 18 of the 32 | #' human genome (hg19). 33 | #' 34 | #' @format A data frame with 15129 rows and 8 variables: 35 | #' \describe{ 36 | #' \item{chromosome}{Chromosome} 37 | #' \item{start}{Exon starting base} 38 | #' \item{end}{Exon ending base} 39 | #' \item{strand}{Exon mapping direction} 40 | #' \item{feature}{Feature type} 41 | #' \item{gene}{Gene ID} 42 | #' \item{transcript}{Transcript ID} 43 | #' \item{symbol}{Gene symbol} 44 | #' } 45 | #' @usage data(gene_annotation_sample) 46 | "gene_annotation_sample" 47 | 48 | #' Metadata for example cells in cicero_data 49 | #' 50 | #' Metadata information for cicero_data 51 | #' 52 | #' @format A data frame with 200 rows and 2 variables: 53 | #' \describe{ 54 | #' \item{timepoint}{Time at cell collection} 55 | #' \item{cell}{Cell barcode} 56 | #' } 57 | #' @usage data(cell_data) 58 | "cell_data" 59 | -------------------------------------------------------------------------------- /R/fromGviz.R: -------------------------------------------------------------------------------- 1 | setGeneric("drawAxis", function(GdObject, ...) standardGeneric("drawAxis")) 2 | setMethod("drawAxis", signature(GdObject="GdObject"), function(GdObject, ...) 3 | return(NULL)) 4 | 5 | setMethod("drawAxis", signature(GdObject="CustomTrack"), function(GdObject, 6 | from, 7 | to, ...) { 8 | ylim <- displayPars(GdObject)$ylim 9 | hSpaceAvail <- Gviz:::vpLocation()$isize["width"]/6 10 | #yscale <- extendrange(r=ylim, f=0.05) #extends axis by 5% 11 | yscale <- ylim 12 | col <- Gviz:::.dpOrDefault(GdObject, "col.axis", "white") 13 | acex <- Gviz:::.dpOrDefault(GdObject, "cex.axis") 14 | acol <- Gviz:::.dpOrDefault(GdObject, "col.axis", "white") 15 | at <- pretty(yscale) #finds breakpoints 16 | #at <- at[at>=sort(ylim)[1] & at<=sort(ylim)[2]] 17 | if(is.null(acex)) 18 | { 19 | vSpaceNeeded <- max(as.numeric(grid::convertWidth(grid::stringHeight(at), 20 | "inches")))*length(at)*1.5 21 | hSpaceNeeded <- max(as.numeric(grid::convertWidth(grid::stringWidth(at), 22 | "inches"))) 23 | vSpaceAvail <- abs(diff(range(at)))/ 24 | abs(diff(yscale))*Gviz:::vpLocation()$isize["height"] 25 | 26 | acex <- max(0.6, min(vSpaceAvail/vSpaceNeeded, hSpaceAvail/hSpaceNeeded)) 27 | } 28 | nlevs <- max(1, nlevels(factor(Gviz:::.dpOrDefault(GdObject, "groups")))) 29 | vpTitleAxis <- grid::viewport(x=0.95, width=0.2, yscale= yscale, just=0) 30 | grid::pushViewport(vpTitleAxis) 31 | suppressWarnings(grid::grid.yaxis(gp=grid::gpar(col=acol, cex=acex), at=at)) 32 | grid::popViewport(1) 33 | }) 34 | 35 | 36 | 37 | ##----------------------------------------------------------------------------- 38 | ## CustomTrack: 39 | ## 40 | ## A track class to allow for user-defined plotting functions 41 | ##----------------------------------------------------------------------------- 42 | setClass("CustomTrack", 43 | contains=c("GdObject"), 44 | representation=representation(plottingFunction="function", 45 | variables="list"), 46 | prototype=prototype(dp=DisplayPars())) 47 | 48 | setMethod("initialize", "CustomTrack", function(.Object, plottingFunction, 49 | variables, ...) { 50 | .Object <- Gviz:::.updatePars(.Object, "CustomTrack") 51 | .Object@plottingFunction <- plottingFunction 52 | .Object@variables <- variables 53 | .Object <- callNextMethod(.Object, ...) 54 | return(.Object) 55 | }) 56 | 57 | 58 | CustomTrack <- function(plottingFunction=function(GdObject, 59 | prepare=FALSE, ...){}, 60 | variables=list(), name="CustomTrack", ...){ 61 | return(new("CustomTrack", plottingFunction=plottingFunction, 62 | variables=variables, name=name, ...)) 63 | } 64 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![example workflow](https://github.com/cole-trapnell-lab/cicero-release/actions/workflows/check-standard.yaml/badge.svg) 2 | [![Coverage status](https://codecov.io/gh/cole-trapnell-lab/cicero-release/branch/master/graph/badge.svg)](https://codecov.io/github/cole-trapnell-lab/cicero-release?branch=master) 3 | # Cicero 4 | ### Predicting the cis-regulatory landscape 5 | 6 | Please see our [website](http://cole-trapnell-lab.github.io/cicero-release/) for information on installing and using Cicero 7 | -------------------------------------------------------------------------------- /code-of-conduct.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to make participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies within all project spaces, and it also applies when 49 | an individual is representing the project or its community in public spaces. 50 | Examples of representing a project or community include using an official 51 | project e-mail address, posting via an official social media account, or acting 52 | as an appointed representative at an online or offline event. Representation of 53 | a project may be further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at coletrap [at] uw.edu. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | 78 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | 3 | coverage: 4 | status: 5 | project: 6 | default: 7 | target: auto 8 | threshold: 1% 9 | patch: 10 | default: 11 | target: auto 12 | threshold: 1% 13 | -------------------------------------------------------------------------------- /data/cell_data.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cole-trapnell-lab/cicero-release/07f8731b2d2029ab774621b768b20259238ede4d/data/cell_data.rda -------------------------------------------------------------------------------- /data/cicero_data.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cole-trapnell-lab/cicero-release/07f8731b2d2029ab774621b768b20259238ede4d/data/cicero_data.rda -------------------------------------------------------------------------------- /data/gene_annotation_sample.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cole-trapnell-lab/cicero-release/07f8731b2d2029ab774621b768b20259238ede4d/data/gene_annotation_sample.rda -------------------------------------------------------------------------------- /data/human.hg19.genome.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cole-trapnell-lab/cicero-release/07f8731b2d2029ab774621b768b20259238ede4d/data/human.hg19.genome.rda -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | citEntry(entry="article", 2 | title = "Cicero Predicts cis-Regulatory DNA Interactions from Single-Cell Chromatin Accessibility Data", 3 | journal = "Molecular Cell", 4 | volume = "71", 5 | number = "5", 6 | pages = "858 - 871.e8", 7 | year = 2018, 8 | issn = "1097-2765", 9 | doi = "https://doi.org/10.1016/j.molcel.2018.06.044", 10 | author = personList( as.person("Hannah A. Pliner"), 11 | as.person("Jonathan S. Packer"), 12 | as.person("José L. McFaline-Figueroa"), 13 | as.person("Darren A. Cusanovich"), 14 | as.person("Riza M. Daza"), 15 | as.person("Delasa Aghamirzaie"), 16 | as.person("Sanjay Srivatsan"), 17 | as.person("Xiaojie Qiu"), 18 | as.person("Dana Jackson"), 19 | as.person("Anna Minkina"), 20 | as.person("Andrew C. Adey"), 21 | as.person("Frank J. Steemers"), 22 | as.person("Jay Shendure"), 23 | as.person("Cole Trapnell")), 24 | textVersion = 25 | paste("Hannah A. Pliner, Jay Shendure & Cole Trapnell et. al. (2018).", 26 | "Cicero Predicts cis-Regulatory DNA Interactions from Single-Cell Chromatin Accessibility Data.", 27 | "Molecular Cell,", "71,", "858-871.e8." ) ) -------------------------------------------------------------------------------- /man/aggregate_by_cell_bin.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/aggregate.R 3 | \name{aggregate_by_cell_bin} 4 | \alias{aggregate_by_cell_bin} 5 | \title{Aggregate count CDS by groups of cells} 6 | \usage{ 7 | aggregate_by_cell_bin(cds, group_col) 8 | } 9 | \arguments{ 10 | \item{cds}{A CDS object to be aggregated} 11 | 12 | \item{group_col}{The name of the column in the \code{pData} table that 13 | indicates the cells assignment to its aggregate bin.} 14 | } 15 | \value{ 16 | A count cds aggregated by group_col 17 | } 18 | \description{ 19 | Aggregates a CDS based on an indicator column in the \code{pData} table 20 | } 21 | \details{ 22 | This function takes an input CDS object and collapses cells based 23 | on a column in the \code{pData} table by summing the values within the 24 | cell group. 25 | } 26 | \examples{ 27 | data("cicero_data") 28 | #input_cds <- make_atac_cds(cicero_data, binarize = TRUE) 29 | #pData(input_cds)$cell_subtype <- rep(1:10, times=20) 30 | #binned_input_lin <-aggregate_by_cell_bin(input_cds, "cell_subtype") 31 | 32 | } 33 | -------------------------------------------------------------------------------- /man/aggregate_nearby_peaks.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/aggregate.R 3 | \name{aggregate_nearby_peaks} 4 | \alias{aggregate_nearby_peaks} 5 | \title{Make an aggregate count cds by collapsing nearby peaks} 6 | \usage{ 7 | aggregate_nearby_peaks(cds, distance = 1000) 8 | } 9 | \arguments{ 10 | \item{cds}{A CellDataSet (CDS) object. For example, output of 11 | \code{\link{make_atac_cds}}} 12 | 13 | \item{distance}{The distance within which peaks should be collapsed} 14 | } 15 | \value{ 16 | A CDS object with aggregated peaks. 17 | } 18 | \description{ 19 | Make an aggregate count cds by collapsing nearby peaks 20 | } 21 | \details{ 22 | This function takes an input CDS object and collapses features 23 | within a given distance by summing the values for the collapsed features. 24 | Ranges of features are determined by their feature name, so the feature 25 | names must be in the form "chr1:1039013-2309023". 26 | } 27 | \examples{ 28 | data("cicero_data") 29 | input_cds <- make_atac_cds(cicero_data, binarize = TRUE) 30 | agg_cds <- aggregate_nearby_peaks(input_cds, distance = 10000) 31 | 32 | } 33 | -------------------------------------------------------------------------------- /man/annotate_cds_by_site.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{annotate_cds_by_site} 4 | \alias{annotate_cds_by_site} 5 | \title{Add feature data columns to fData} 6 | \usage{ 7 | annotate_cds_by_site( 8 | cds, 9 | feature_data, 10 | verbose = FALSE, 11 | maxgap = 0, 12 | all = FALSE, 13 | header = FALSE 14 | ) 15 | } 16 | \arguments{ 17 | \item{cds}{A CDS object.} 18 | 19 | \item{feature_data}{Data frame, or a character path to a file of 20 | feature data. If a path, the file should be tab separated. Default assumes 21 | no header, if your file has a header, set \code{header = FALSE}. For 22 | either a data frame or a path, the file should be in bed-like format, with 23 | the first 3 columns containing chromosome, start and stop respectively. 24 | The remaining columns will be added to the \code{fData} table as feature 25 | data.} 26 | 27 | \item{verbose}{Logical, should progress messages be printed?} 28 | 29 | \item{maxgap}{The maximum number of base pairs allowed between the peak and 30 | the feature for the feature and peak to be considered overlapping. 31 | Default = 0 (overlapping). Details in 32 | \code{\link[IRanges]{findOverlaps-methods}}. If \code{maxgap} 33 | is set to "nearest" then the nearest feature will be assigned regardless 34 | of distance.} 35 | 36 | \item{all}{Logical, should all overlapping intervals be reported? If all is 37 | FALSE, the largest overlap is reported.} 38 | 39 | \item{header}{Logical, if reading a file, is there a header?} 40 | } 41 | \value{ 42 | A CDS object with updated \code{fData} table. 43 | } 44 | \description{ 45 | Annotate the sites of your CDS with feature data based on coordinate overlap. 46 | } 47 | \details{ 48 | \code{annotate_cds_by_site} will add columns to the \code{fData} 49 | table of a CDS object based on the overlap of peaks with features in a 50 | data frame or file. An "overlap" column will be added, along with any 51 | columns beyond the three required columns in the feature data. The 52 | "overlap" column is the number of base pairs overlapping the \code{fData} 53 | site. When maxgap is used, the true overlap is still calculated (overlap 54 | will be 0 if the two features only overlap because of maxgap) \code{NA} 55 | means that there was no overlapping feature. If a peak overlaps multiple 56 | data intervals and all is FALSE, the largest overlapping interval will be 57 | chosen (in a tie, the first entry is taken), otherwise all intervals will 58 | be chosen and annotations will be collapsed using a comma as a separator. 59 | } 60 | \examples{ 61 | data("cicero_data") 62 | input_cds <- make_atac_cds(cicero_data, binarize = TRUE) 63 | feat <- data.frame(chr = c("chr18", "chr18", "chr18", "chr18"), 64 | bp1 = c(10000, 10800, 50000, 100000), 65 | bp2 = c(10700, 11000, 60000, 110000), 66 | type = c("Acetylated", "Methylated", "Acetylated", 67 | "Methylated")) 68 | input_cds <- annotate_cds_by_site(input_cds, feat) 69 | 70 | } 71 | -------------------------------------------------------------------------------- /man/assemble_connections.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/runCicero.R 3 | \name{assemble_connections} 4 | \alias{assemble_connections} 5 | \title{Combine and reconcile cicero models} 6 | \usage{ 7 | assemble_connections(cicero_model_list, silent = FALSE) 8 | } 9 | \arguments{ 10 | \item{cicero_model_list}{A list of cicero output objects, generally, the 11 | output of \code{\link{generate_cicero_models}}.} 12 | 13 | \item{silent}{Logical, should the function run silently?} 14 | } 15 | \value{ 16 | A data frame of connections with their cicero co-accessibility 17 | scores. 18 | } 19 | \description{ 20 | Function which takes the output of \code{\link{generate_cicero_models}} and 21 | assembles the connections into a data frame with cicero co-accessibility 22 | scores. 23 | } 24 | \details{ 25 | This function combines glasso models computed on overlapping windows of the 26 | genome. Pairs of sites whose regularized correlation was calculated twice 27 | are first checked for qualitative concordance (both zero, positive or 28 | negative). If they not concordant, NA is returned. If they are concordant 29 | the mean is returned. 30 | } 31 | \examples{ 32 | data("cicero_data") 33 | data("human.hg19.genome") 34 | sample_genome <- subset(human.hg19.genome, V1 == "chr18") 35 | sample_genome$V2[1] <- 100000 36 | input_cds <- make_atac_cds(cicero_data, binarize = TRUE) 37 | input_cds <- reduceDimension(input_cds, max_components = 2, num_dim=6, 38 | reduction_method = 'tSNE', 39 | norm_method = "none") 40 | tsne_coords <- t(reducedDimA(input_cds)) 41 | row.names(tsne_coords) <- row.names(pData(input_cds)) 42 | cicero_cds <- make_cicero_cds(input_cds, reduced_coordinates = tsne_coords) 43 | model_output <- generate_cicero_models(cicero_cds, 44 | distance_parameter = 0.3, 45 | genomic_coords = sample_genome) 46 | cicero_cons <- assemble_connections(model_output) 47 | 48 | } 49 | \seealso{ 50 | \code{\link{generate_cicero_models}} 51 | } 52 | -------------------------------------------------------------------------------- /man/build_gene_activity_matrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/activityScores.R 3 | \name{build_gene_activity_matrix} 4 | \alias{build_gene_activity_matrix} 5 | \title{Calculate initial Cicero gene activity matrix} 6 | \usage{ 7 | build_gene_activity_matrix( 8 | input_cds, 9 | cicero_cons_info, 10 | site_weights = NULL, 11 | dist_thresh = 250000, 12 | coaccess_cutoff = 0.25 13 | ) 14 | } 15 | \arguments{ 16 | \item{input_cds}{Binary sci-ATAC-seq input CDS. The input CDS must have a 17 | column in the fData table called "gene" which is the gene name if the 18 | site is a promoter, and \code{NA} if the site is distal.} 19 | 20 | \item{cicero_cons_info}{Cicero connections table, generally the output of 21 | \code{\link{run_cicero}}. This table is a data frame with three required 22 | columns named "Peak1", "Peak2", and "coaccess". Peak1 and Peak2 contain 23 | coordinates for the two compared elements, and coaccess contains their 24 | Cicero co-accessibility score.} 25 | 26 | \item{site_weights}{NULL or an individual weight for each site in input_cds.} 27 | 28 | \item{dist_thresh}{The maximum distance in base pairs between pairs of sites 29 | to include in the gene activity calculation.} 30 | 31 | \item{coaccess_cutoff}{The minimum Cicero co-accessibility score that should 32 | be considered connected.} 33 | } 34 | \value{ 35 | Unnormalized gene activity matrix. 36 | } 37 | \description{ 38 | This function calculates the initial Cicero gene activity matrix. After this 39 | function, the activity matrix should be normalized with any comparison 40 | matrices using the function \code{\link{normalize_gene_activities}}. 41 | } 42 | \examples{ 43 | data("cicero_data") 44 | data("human.hg19.genome") 45 | sample_genome <- subset(human.hg19.genome, V1 == "chr18") 46 | sample_genome$V2[1] <- 100000 47 | input_cds <- make_atac_cds(cicero_data, binarize = TRUE) 48 | input_cds <- detectGenes(input_cds) 49 | input_cds <- reduceDimension(input_cds, max_components = 2, num_dim=6, 50 | reduction_method = 'tSNE', 51 | norm_method = "none") 52 | tsne_coords <- t(reducedDimA(input_cds)) 53 | row.names(tsne_coords) <- row.names(pData(input_cds)) 54 | cicero_cds <- make_cicero_cds(input_cds, 55 | reduced_coordinates = tsne_coords) 56 | cons <- run_cicero(cicero_cds, sample_genome, sample_num=2) 57 | 58 | data(gene_annotation_sample) 59 | gene_annotation_sub <- gene_annotation_sample[,c(1:3, 8)] 60 | names(gene_annotation_sub)[4] <- "gene" 61 | input_cds <- annotate_cds_by_site(input_cds, gene_annotation_sub) 62 | num_genes <- pData(input_cds)$num_genes_expressed 63 | names(num_genes) <- row.names(pData(input_cds)) 64 | unnorm_ga <- build_gene_activity_matrix(input_cds, cons) 65 | 66 | 67 | } 68 | -------------------------------------------------------------------------------- /man/cell_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{cell_data} 5 | \alias{cell_data} 6 | \title{Metadata for example cells in cicero_data} 7 | \format{ 8 | A data frame with 200 rows and 2 variables: 9 | \describe{ 10 | \item{timepoint}{Time at cell collection} 11 | \item{cell}{Cell barcode} 12 | } 13 | } 14 | \usage{ 15 | data(cell_data) 16 | } 17 | \description{ 18 | Metadata information for cicero_data 19 | } 20 | \keyword{datasets} 21 | -------------------------------------------------------------------------------- /man/cicero-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cicero.R 3 | \docType{package} 4 | \name{cicero-package} 5 | \alias{cicero} 6 | \alias{cicero-package} 7 | \title{cicero} 8 | \description{ 9 | Cicero computes putative cis-regulatory maps from single-cell chromatin accessibility data. It also extends monocle 2 for use in chromatin accessibility data. 10 | } 11 | \author{ 12 | \strong{Maintainer}: Hannah Pliner \email{hpliner@uw.edu} 13 | 14 | Authors: 15 | \itemize{ 16 | \item Cole Trapnell \email{coletrap@uw.edu} 17 | } 18 | 19 | } 20 | -------------------------------------------------------------------------------- /man/cicero_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{cicero_data} 5 | \alias{cicero_data} 6 | \title{Example single-cell chromatin accessibility data} 7 | \format{ 8 | A data frame with 35137 rows and 3 variables: 9 | \describe{ 10 | \item{Peak}{Peak information} 11 | \item{Cell}{Cell ID} 12 | \item{Count}{Reads per cell per peak} 13 | } 14 | } 15 | \usage{ 16 | data(cicero_data) 17 | } 18 | \description{ 19 | A dataset containing a subset of a single-cell ATAC-seq 20 | dataset collected on Human Skeletal Muscle Myoblasts. 21 | Only includes data from chromosome 18. 22 | } 23 | \keyword{datasets} 24 | -------------------------------------------------------------------------------- /man/compare_connections.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{compare_connections} 4 | \alias{compare_connections} 5 | \title{Compare Cicero connections to other datasets} 6 | \usage{ 7 | compare_connections(conns1, conns2, maxgap = 0) 8 | } 9 | \arguments{ 10 | \item{conns1}{A data frame of Cicero connections, like those output from 11 | \code{assemble_connections}. The first two columns must be the coordinates 12 | of peaks that are connected.} 13 | 14 | \item{conns2}{A data frame of connections to be searched for overlap. The 15 | first two columns must be coordinates of genomic sites that are connected.} 16 | 17 | \item{maxgap}{The number of base pairs between peaks allowed to be called 18 | overlapping. See \code{\link[IRanges]{findOverlaps-methods}} in the IRanges 19 | package for further description.} 20 | } 21 | \value{ 22 | A vector of logicals of whether the Cicero pair is present in the 23 | alternate dataset. 24 | } 25 | \description{ 26 | Compare two sets of connections and return a vector of logicals for whether 27 | connections in one are present in the other. 28 | } 29 | \examples{ 30 | \dontrun{ 31 | cons$in_dataset <- compare_connections(conns, alt_data) 32 | } 33 | 34 | } 35 | -------------------------------------------------------------------------------- /man/df_for_coords.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{df_for_coords} 4 | \alias{df_for_coords} 5 | \title{Construct a data frame of coordinate info from coordinate strings} 6 | \usage{ 7 | df_for_coords(coord_strings) 8 | } 9 | \arguments{ 10 | \item{coord_strings}{A list of coordinate strings (each like 11 | "chr1:500000-1000000")} 12 | } 13 | \value{ 14 | data.frame with three columns, chromosome, starting base pair and 15 | ending base pair 16 | } 17 | \description{ 18 | Construct a data frame of coordinate info from coordinate strings 19 | } 20 | \details{ 21 | Coordinate strings consist of three pieces of information: 22 | chromosome, start, and stop. These pieces of information can be separated 23 | by the characters ":", "_", or "-". Commas will be removed, not used as 24 | separators (ex: "chr18:8,575,097-8,839,855" is ok). 25 | } 26 | \examples{ 27 | df_for_coords(c("chr1:2,039-30,239", "chrX:28884:101293")) 28 | 29 | } 30 | -------------------------------------------------------------------------------- /man/estimate_distance_parameter.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/runCicero.R 3 | \name{estimate_distance_parameter} 4 | \alias{estimate_distance_parameter} 5 | \title{Calculate distance penalty parameter} 6 | \usage{ 7 | estimate_distance_parameter( 8 | cds, 9 | window = 5e+05, 10 | maxit = 100, 11 | s = 0.75, 12 | sample_num = 100, 13 | distance_constraint = 250000, 14 | distance_parameter_convergence = 1e-22, 15 | max_elements = 200, 16 | genomic_coords = cicero::human.hg19.genome, 17 | max_sample_windows = 500 18 | ) 19 | } 20 | \arguments{ 21 | \item{cds}{A cicero CDS object generated using \code{\link{make_cicero_cds}}.} 22 | 23 | \item{window}{Size of the genomic window to query, in base pairs.} 24 | 25 | \item{maxit}{Maximum number of iterations for distance_parameter estimation.} 26 | 27 | \item{s}{Power law value. See details for more information.} 28 | 29 | \item{sample_num}{Number of random windows to calculate 30 | \code{distance_parameter} for.} 31 | 32 | \item{distance_constraint}{Maximum distance of expected connections. Must be 33 | smaller than \code{window}.} 34 | 35 | \item{distance_parameter_convergence}{Convergence step size for 36 | \code{distance_parameter} calculation.} 37 | 38 | \item{max_elements}{Maximum number of elements per window allowed. Prevents 39 | very large models from slowing performance.} 40 | 41 | \item{genomic_coords}{Either a data frame or a path (character) to a file 42 | with chromosome lengths. The file should have two columns, the first is 43 | the chromosome name (ex. "chr1") and the second is the chromosome length 44 | in base pairs. See \code{data(human.hg19.genome)} for an example. If a 45 | file, should be tab-separated and without header.} 46 | 47 | \item{max_sample_windows}{Maximum number of random windows to screen to find 48 | sample_num windows for distance calculation. Default 500.} 49 | } 50 | \value{ 51 | A list of results of length \code{sample_num}. List members are 52 | numeric \code{distance_parameter} values. 53 | } 54 | \description{ 55 | Function to calculate distance penalty parameter (\code{distance_parameter}) 56 | for random genomic windows. Used to choose \code{distance_parameter} to pass 57 | to \code{\link{generate_cicero_models}}. 58 | } 59 | \details{ 60 | The purpose of this function is to calculate the distance scaling 61 | parameter used to adjust the distance-based penalty function used in 62 | Cicero's model calculation. The scaling parameter, in combination with the 63 | power law value \code{s} determines the distance-based penalty. 64 | 65 | This function chooses random windows of the genome and calculates a 66 | \code{distance_parameter}. The function returns a vector of values 67 | calculated on these random windows. We recommend using the mean value of 68 | this vector moving forward with Cicero analysis. 69 | 70 | The function works by finding the minimum distance scaling parameter such 71 | that no more than 5% of pairs of sites at a distance greater than 72 | \code{distance_constraint} have non-zero entries after graphical lasso 73 | regularization and such that fewer than 80% of all output entries are 74 | nonzero. 75 | 76 | If the chosen random window has fewer than 2 or greater than 77 | \code{max_elements} sites, the window is skipped. In addition, the random 78 | window will be skipped if there are insufficient long-range comparisons 79 | (see below) to be made. The \code{max_elements} parameter exist to prevent 80 | very dense windows from slowing the calculation. If you expect that your 81 | data may regularly have this many sites in a window, you will need to 82 | raise this parameter. 83 | 84 | Calculating the \code{distance_parameter} in a sample window requires 85 | peaks in that window that are at a distance greater than the 86 | \code{distance_constraint} parameter. If there are not enough examples at 87 | high distance have been found, the function will return the warning 88 | \code{"Warning: could not calculate sample_num distance_parameters - see 89 | documentation details"}.When looking for \code{sample_num} example 90 | windows, the function will search \code{max_sample_windows} windows. By 91 | default this is set at 500, which should be well beyond the 100 windows 92 | that need to be found. However, in very sparse datasets, increasing 93 | \code{max_sample_windows} may help avoid the above warning. Increasing 94 | \code{max_sample_windows} may slow performance in sparse datasets. If you 95 | are still not able to get enough example windows, even with a large 96 | \code{max_sample_windows} paramter, this may mean your \code{window} 97 | parameter needs to be larger or your \code{distance_constraint} parameter 98 | needs to be smaller. A less likely possibility is that your 99 | \code{max_elements} parameter needs to be larger. This would occur if your 100 | data is particularly dense. 101 | 102 | The parameter \code{s} is a constant that captures the power-law 103 | distribution of contact frequencies between different locations in the 104 | genome as a function of their linear distance. For a complete discussion 105 | of the various polymer models of DNA packed into the nucleus and of 106 | justifiable values for s, we refer readers to (Dekker et al., 2013) for a 107 | discussion of justifiable values for s. We use a value of 0.75 by default 108 | in Cicero, which corresponds to the “tension globule” polymer model of DNA 109 | (Sanborn et al., 2015). This parameter must be the same as the s parameter 110 | for generate_cicero_models. 111 | 112 | Further details are available in the publication that accompanies this 113 | package. Run \code{citation("cicero")} for publication details. 114 | } 115 | \examples{ 116 | data("cicero_data") 117 | data("human.hg19.genome") 118 | sample_genome <- subset(human.hg19.genome, V1 == "chr18") 119 | sample_genome$V2[1] <- 100000 120 | input_cds <- make_atac_cds(cicero_data, binarize = TRUE) 121 | input_cds <- reduceDimension(input_cds, max_components = 2, num_dim=6, 122 | reduction_method = 'tSNE', 123 | norm_method = "none") 124 | tsne_coords <- t(reducedDimA(input_cds)) 125 | row.names(tsne_coords) <- row.names(pData(input_cds)) 126 | cicero_cds <- make_cicero_cds(input_cds, reduced_coordinates = tsne_coords) 127 | distance_parameters <- estimate_distance_parameter(cicero_cds, 128 | sample_num=5, 129 | genomic_coords = sample_genome) 130 | 131 | } 132 | \references{ 133 | \itemize{ 134 | \item Dekker, J., Marti-Renom, M.A., and Mirny, L.A. (2013). Exploring 135 | the three-dimensional organization of genomes: interpreting chromatin 136 | interaction data. Nat. Rev. Genet. 14, 390–403. 137 | \item Sanborn, A.L., Rao, S.S.P., Huang, S.-C., Durand, N.C., Huntley, 138 | M.H., Jewett, A.I., Bochkov, I.D., Chinnappan, D., Cutkosky, A., Li, J., 139 | et al. (2015). Chromatin extrusion explains key features of loop and 140 | domain formation in wild-type and engineered genomes. Proc. Natl. Acad. 141 | Sci. U. S. A. 112, E6456–E6465. 142 | } 143 | } 144 | \seealso{ 145 | \code{\link{generate_cicero_models}} 146 | } 147 | -------------------------------------------------------------------------------- /man/find_overlapping_ccans.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/runCicero.R 3 | \name{find_overlapping_ccans} 4 | \alias{find_overlapping_ccans} 5 | \title{Find CCANs that overlap each other in genomic coordinates} 6 | \usage{ 7 | find_overlapping_ccans(ccan_assignments, min_overlap = 1) 8 | } 9 | \arguments{ 10 | \item{ccan_assignments}{A data frame where the first column is the peak and 11 | the second is the CCAN assignment. For example, output of 12 | \code{generate_ccans}.} 13 | 14 | \item{min_overlap}{The minimum base pair overlap to count as overlapping.} 15 | } 16 | \value{ 17 | A data frame with two columns, CCAN1 and CCAN2. CCANs in this list 18 | are overlapping. The data frame is reciprocal (if CCAN 2 overlaps CCAN 1, 19 | there will be two rows, 1,2 and 2,1). 20 | } 21 | \description{ 22 | Find CCANs that overlap each other in genomic coordinates 23 | } 24 | \examples{ 25 | ccan_df <- data.frame(peak = c("chr18_1408345_1408845", "chr18_1779830_1780330", 26 | "chr18_1929095_1929595", "chr18_1954501_1954727", 27 | "chr18_2049865_2050884", "chr18_2083726_2084102", 28 | "chr18_2087935_2088622", "chr18_2104705_2105551", 29 | "chr18_2108641_2108907"), 30 | CCAN = c(1,2,2,2,3,3,3,3,2)) 31 | olap_ccans <- find_overlapping_ccans(ccan_df) 32 | 33 | 34 | } 35 | -------------------------------------------------------------------------------- /man/find_overlapping_coordinates.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{find_overlapping_coordinates} 4 | \alias{find_overlapping_coordinates} 5 | \title{Find peaks that overlap a specific genomic location} 6 | \usage{ 7 | find_overlapping_coordinates(coord_list, coord, maxgap = 0) 8 | } 9 | \arguments{ 10 | \item{coord_list}{A list of coordinates to be searched for overlap in the 11 | form chr_100_2000.} 12 | 13 | \item{coord}{The coordinates that you want to find in the form chr1_100_2000.} 14 | 15 | \item{maxgap}{The maximum distance in base pairs between coord and the 16 | coord_list that should count as overlapping. Default is 0.} 17 | } 18 | \value{ 19 | A character vector of the peaks that overlap coord. 20 | } 21 | \description{ 22 | Find peaks that overlap a specific genomic location 23 | } 24 | \examples{ 25 | test_coords <- c("chr18_10025_10225", "chr18_10603_11103", 26 | "chr18_11604_13986", 27 | "chr18_157883_158536", "chr18_217477_218555", 28 | "chr18_245734_246234") 29 | find_overlapping_coordinates(test_coords, "chr18:10,100-1246234") 30 | 31 | 32 | } 33 | -------------------------------------------------------------------------------- /man/gene_annotation_sample.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{gene_annotation_sample} 5 | \alias{gene_annotation_sample} 6 | \title{Example gene annotation information} 7 | \format{ 8 | A data frame with 15129 rows and 8 variables: 9 | \describe{ 10 | \item{chromosome}{Chromosome} 11 | \item{start}{Exon starting base} 12 | \item{end}{Exon ending base} 13 | \item{strand}{Exon mapping direction} 14 | \item{feature}{Feature type} 15 | \item{gene}{Gene ID} 16 | \item{transcript}{Transcript ID} 17 | \item{symbol}{Gene symbol} 18 | } 19 | } 20 | \usage{ 21 | data(gene_annotation_sample) 22 | } 23 | \description{ 24 | Gencode gene annotation data from chromosome 18 of the 25 | human genome (hg19). 26 | } 27 | \keyword{datasets} 28 | -------------------------------------------------------------------------------- /man/generate_ccans.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/runCicero.R 3 | \name{generate_ccans} 4 | \alias{generate_ccans} 5 | \title{Generate cis-co-accessibility networks (CCANs)} 6 | \usage{ 7 | generate_ccans( 8 | connections_df, 9 | coaccess_cutoff_override = NULL, 10 | tolerance_digits = 2 11 | ) 12 | } 13 | \arguments{ 14 | \item{connections_df}{Data frame of connections with columns: Peak1, Peak2, 15 | coaccess. Generally, the output of \code{\link{run_cicero}} or 16 | \code{\link{assemble_connections}}} 17 | 18 | \item{coaccess_cutoff_override}{Numeric, co-accessibility score threshold to 19 | impose. Overrides automatic calculation.} 20 | 21 | \item{tolerance_digits}{The number of digits to calculate cutoff to. Default 22 | is 2 (0.01 tolerance)} 23 | } 24 | \value{ 25 | Data frame with two columns - Peak and CCAN. CCAN column indicates 26 | CCAN assignment. Peaks not included in a CCAN are not returned. 27 | } 28 | \description{ 29 | Post process cicero co-accessibility scores to extract modules of sites that 30 | are co-accessible. 31 | } 32 | \details{ 33 | CCANs are calculated by first specifying a minimum co-accessibility 34 | score and then using the Louvain community detection algorithm on the 35 | subgraph induced by excluding edges below this score. For this function, 36 | either the user can specify the minimum co-accessibility using 37 | \code{coaccess_cutoff_override}, or the cutoff can be calculated 38 | automatically by optimizing for CCAN number. The cutoff calculation can be 39 | slow, so users may wish to use the \code{coaccess_cutoff_override} after 40 | initially calculating the cutoff to speed future runs. 41 | } 42 | \examples{ 43 | \dontrun{ 44 | data("cicero_data") 45 | set.seed(18) 46 | data("human.hg19.genome") 47 | sample_genome <- subset(human.hg19.genome, V1 == "chr18") 48 | sample_genome$V2[1] <- 100000 49 | input_cds <- make_atac_cds(cicero_data, binarize = TRUE) 50 | input_cds <- reduceDimension(input_cds, max_components = 2, num_dim=6, 51 | reduction_method = 'tSNE', 52 | norm_method = "none") 53 | tsne_coords <- t(reducedDimA(input_cds)) 54 | row.names(tsne_coords) <- row.names(pData(input_cds)) 55 | cicero_cds <- make_cicero_cds(input_cds, reduced_coordinates = tsne_coords) 56 | cicero_cons <- run_cicero(cicero_cds, sample_genome, sample_num = 2) 57 | ccan_assigns <- generate_ccans(cicero_cons) 58 | } 59 | 60 | } 61 | -------------------------------------------------------------------------------- /man/generate_cicero_models.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/runCicero.R 3 | \name{generate_cicero_models} 4 | \alias{generate_cicero_models} 5 | \title{Generate cicero models} 6 | \usage{ 7 | generate_cicero_models( 8 | cds, 9 | distance_parameter, 10 | s = 0.75, 11 | window = 5e+05, 12 | max_elements = 200, 13 | genomic_coords = cicero::human.hg19.genome 14 | ) 15 | } 16 | \arguments{ 17 | \item{cds}{A cicero CDS object generated using \code{\link{make_cicero_cds}}.} 18 | 19 | \item{distance_parameter}{Distance based penalty parameter value. Generally, 20 | the mean of the calculated \code{distance_parameter} values from 21 | \code{\link{estimate_distance_parameter}}.} 22 | 23 | \item{s}{Power law value. See details.} 24 | 25 | \item{window}{Size of the genomic window to query, in base pairs.} 26 | 27 | \item{max_elements}{Maximum number of elements per window allowed. Prevents 28 | very large models from slowing performance.} 29 | 30 | \item{genomic_coords}{Either a data frame or a path (character) to a file 31 | with chromosome lengths. The file should have two columns, the first is 32 | the chromosome name (ex. "chr1") and the second is the chromosome length 33 | in base pairs. See \code{data(human.hg19.genome)} for an example. If a 34 | file, should be tab-separated and without header.} 35 | } 36 | \value{ 37 | A list of results for each window. Either a \code{glasso} object, or 38 | a character description of why the window was skipped. This list can be 39 | directly input into \code{\link{assemble_connections}} to create a 40 | reconciled list of cicero co-accessibility scores. 41 | } 42 | \description{ 43 | Function to generate graphical lasso models on all sites in a CDS object 44 | within overlapping genomic windows. 45 | } 46 | \details{ 47 | The purpose of this function is to compute the raw covariances 48 | between each pair of sites within overlapping windows of the genome. 49 | Within each window, the function then estimates a regularized correlation 50 | matrix using the graphical LASSO (Friedman et al., 2008), penalizing pairs 51 | of distant sites more than proximal sites. The scaling parameter, 52 | \code{distance_parameter}, in combination with the power law value \code{s} 53 | determines the distance-based penalty. 54 | 55 | The parameter \code{s} is a constant that captures the power-law 56 | distribution of contact frequencies between different locations in the 57 | genome as a function of their linear distance. For a complete discussion 58 | of the various polymer models of DNA packed into the nucleus and of 59 | justifiable values for s, we refer readers to (Dekker et al., 2013) for a 60 | discussion of justifiable values for s. We use a value of 0.75 by default 61 | in Cicero, which corresponds to the “tension globule” polymer model of DNA 62 | (Sanborn et al., 2015). This parameter must be the same as the s parameter 63 | for \code{\link{estimate_distance_parameter}}. 64 | 65 | Further details are available in the publication that accompanies this 66 | package. Run \code{citation("cicero")} for publication details. 67 | } 68 | \examples{ 69 | data("cicero_data") 70 | data("human.hg19.genome") 71 | sample_genome <- subset(human.hg19.genome, V1 == "chr18") 72 | sample_genome$V2[1] <- 100000 73 | input_cds <- make_atac_cds(cicero_data, binarize = TRUE) 74 | input_cds <- reduceDimension(input_cds, max_components = 2, num_dim=6, 75 | reduction_method = 'tSNE', 76 | norm_method = "none") 77 | tsne_coords <- t(reducedDimA(input_cds)) 78 | row.names(tsne_coords) <- row.names(pData(input_cds)) 79 | cicero_cds <- make_cicero_cds(input_cds, reduced_coordinates = tsne_coords) 80 | model_output <- generate_cicero_models(cicero_cds, 81 | distance_parameter = 0.3, 82 | genomic_coords = sample_genome) 83 | 84 | } 85 | \references{ 86 | \itemize{ 87 | \item Dekker, J., Marti-Renom, M.A., and Mirny, L.A. (2013). Exploring 88 | the three-dimensional organization of genomes: interpreting chromatin 89 | interaction data. Nat. Rev. Genet. 14, 390–403. 90 | \item Friedman, J., Hastie, T., and Tibshirani, R. (2008). Sparse 91 | inverse covariance estimation with the graphical lasso. Biostatistics 9, 92 | 432–441. 93 | \item Sanborn, A.L., Rao, S.S.P., Huang, S.-C., Durand, N.C., Huntley, 94 | M.H., Jewett, A.I., Bochkov, I.D., Chinnappan, D., Cutkosky, A., Li, J., 95 | et al. (2015). Chromatin extrusion explains key features of loop and 96 | domain formation in wild-type and engineered genomes. Proc. Natl. Acad. 97 | Sci. U. S. A. 112, E6456–E6465. 98 | } 99 | } 100 | \seealso{ 101 | \code{\link{estimate_distance_parameter}} 102 | } 103 | -------------------------------------------------------------------------------- /man/human.hg19.genome.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{human.hg19.genome} 5 | \alias{human.hg19.genome} 6 | \title{Chromosome lengths from human genome hg19} 7 | \format{ 8 | A data frame with 93 rows and 2 variables: 9 | \describe{ 10 | \item{V1}{Chromosome} 11 | \item{V2}{Chromosome length, base pairs} 12 | } 13 | } 14 | \usage{ 15 | data(human.hg19.genome) 16 | } 17 | \description{ 18 | A list of the chromosomes in hg19 and their lengths 19 | in base pairs. 20 | } 21 | \keyword{datasets} 22 | -------------------------------------------------------------------------------- /man/make_atac_cds.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{make_atac_cds} 4 | \alias{make_atac_cds} 5 | \title{Make ATAC CDS object} 6 | \usage{ 7 | make_atac_cds(input, binarize = FALSE) 8 | } 9 | \arguments{ 10 | \item{input}{Either a data frame or a path to input data. If a file, it 11 | should be a tab-delimited text file with three columns and no header. For 12 | either a file or a data frame, the first column is the peak coordinates in 13 | the form "chr10_100013372_100013596", the second column is the cell name, 14 | and the third column is an integer that represents the number of reads 15 | from that cell overlapping that peak. Zero values do not need to be 16 | included (sparse matrix format).} 17 | 18 | \item{binarize}{Logical. Should the count matrix be converted to binary?} 19 | } 20 | \value{ 21 | A CDS object containing your ATAC data in proper format. 22 | } 23 | \description{ 24 | This function takes as input a data frame or a path to a file in a sparse 25 | matrix format and returns a properly formatted \code{CellDataSet} (CDS) 26 | object. 27 | } 28 | \examples{ 29 | data("cicero_data") 30 | input_cds <- make_atac_cds(cicero_data, binarize = TRUE) 31 | 32 | } 33 | -------------------------------------------------------------------------------- /man/make_cicero_cds.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/runCicero.R 3 | \name{make_cicero_cds} 4 | \alias{make_cicero_cds} 5 | \title{Create cicero input CDS} 6 | \usage{ 7 | make_cicero_cds( 8 | cds, 9 | reduced_coordinates, 10 | k = 50, 11 | summary_stats = NULL, 12 | size_factor_normalize = TRUE, 13 | silent = FALSE, 14 | return_agg_info = FALSE 15 | ) 16 | } 17 | \arguments{ 18 | \item{cds}{Input CDS object.} 19 | 20 | \item{reduced_coordinates}{A data frame with columns representing the 21 | coordinates of each cell in reduced dimension space (generally 2-3 22 | dimensions). \code{row.names(reduced_coordinates)} should match the cell 23 | names in the CDS object. If dimension reduction was done using monocle, 24 | tSNE coordinates can be accessed by \code{t(reducedDimA(cds))}, and 25 | DDRTree coordinates can be accessed by \code{t(reducedDimS(cds))}.} 26 | 27 | \item{k}{Number of cells to aggregate per bin.} 28 | 29 | \item{summary_stats}{Which numeric \code{pData(cds)} columns you would like 30 | summarized (mean) by bin in the resulting CDS object.} 31 | 32 | \item{size_factor_normalize}{Logical, should accessibility values be 33 | normalized by size factor?} 34 | 35 | \item{silent}{Logical, should warning and info messages be printed?} 36 | 37 | \item{return_agg_info}{Logical, should a list of the assignments of cells to 38 | aggregated bins be output? When \code{TRUE}, this function returns a list 39 | of two items, first, the aggregated CDS object and second, a data.frame 40 | with the binning information.} 41 | } 42 | \value{ 43 | Aggregated CDS object. If return_agg_info is \code{TRUE}, a list 44 | of the aggregated CDS object and a data.frame of aggregation info. 45 | } 46 | \description{ 47 | Function to generate an aggregated input CDS for cicero. \code{run_cicero} 48 | takes as input an aggregated cicero CDS object. This function will generate 49 | the CDS given an input CDS (perhaps generated by \code{make_atac_cds}) and 50 | a value for k, which is the number of cells to be aggregated per bin. The 51 | default value for k is 50. 52 | } 53 | \details{ 54 | Aggregation of similar cells is done using a k-nearest-neighbors 55 | graph and a randomized "bagging" procedure. Details are available in the 56 | publication that accompanies this package. Run \code{citation("cicero")} 57 | for publication details. KNN is calculated using 58 | \code{\link[FNN]{knn.index}} 59 | } 60 | \examples{ 61 | \dontrun{ 62 | data("cicero_data") 63 | 64 | input_cds <- make_atac_cds(cicero_data, binarize = TRUE) 65 | input_cds <- reduceDimension(input_cds, max_components = 2, num_dim=6, 66 | reduction_method = 'tSNE', 67 | norm_method = "none") 68 | tsne_coords <- t(reducedDimA(input_cds)) 69 | row.names(tsne_coords) <- row.names(pData(input_cds)) 70 | cicero_cds <- make_cicero_cds(input_cds, reduced_coordinates = tsne_coords) 71 | } 72 | 73 | } 74 | -------------------------------------------------------------------------------- /man/make_sparse_matrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{make_sparse_matrix} 4 | \alias{make_sparse_matrix} 5 | \title{Make a symmetric square sparse matrix from data frame} 6 | \usage{ 7 | make_sparse_matrix(data, i.name = "Peak1", j.name = "Peak2", x.name = "value") 8 | } 9 | \arguments{ 10 | \item{data}{data frame} 11 | 12 | \item{i.name}{name of i column} 13 | 14 | \item{j.name}{name of j column} 15 | 16 | \item{x.name}{name of value column} 17 | } 18 | \value{ 19 | sparse matrix 20 | } 21 | \description{ 22 | Convert a data frame into a square sparse matrix (all versus all) 23 | } 24 | -------------------------------------------------------------------------------- /man/normalize_gene_activities.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/activityScores.R 3 | \name{normalize_gene_activities} 4 | \alias{normalize_gene_activities} 5 | \title{Normalize gene activities} 6 | \usage{ 7 | normalize_gene_activities(activity_matrices, cell_num_genes) 8 | } 9 | \arguments{ 10 | \item{activity_matrices}{A gene activity matrix, output from 11 | \code{\link{build_gene_activity_matrix}}, or a list of gene activity 12 | matrices to be normalized together.} 13 | 14 | \item{cell_num_genes}{A named vector of the total number of accessible sites 15 | per cell. Names should correspond to the cell names in the activity 16 | matrices. These values can be found in the "num_genes_expressed" column 17 | of the pData table of the CDS used to calculate the gene activity matrix.} 18 | } 19 | \value{ 20 | Normalized activity matrix or matrices. 21 | } 22 | \description{ 23 | Normalize the output of \code{\link{build_gene_activity_matrix}}. Input is 24 | either one or multiple gene activity matrices. Any gene activities to be 25 | compared amongst each other should be normalized together. 26 | } 27 | \examples{ 28 | data("cicero_data") 29 | data("human.hg19.genome") 30 | sample_genome <- subset(human.hg19.genome, V1 == "chr18") 31 | sample_genome$V2[1] <- 100000 32 | input_cds <- make_atac_cds(cicero_data, binarize = TRUE) 33 | input_cds <- detectGenes(input_cds) 34 | input_cds <- reduceDimension(input_cds, max_components = 2, num_dim=6, 35 | reduction_method = 'tSNE', 36 | norm_method = "none") 37 | tsne_coords <- t(reducedDimA(input_cds)) 38 | row.names(tsne_coords) <- row.names(pData(input_cds)) 39 | cicero_cds <- make_cicero_cds(input_cds, 40 | reduced_coordinates = tsne_coords) 41 | cons <- run_cicero(cicero_cds, sample_genome, sample_num=2) 42 | 43 | data(gene_annotation_sample) 44 | gene_annotation_sub <- gene_annotation_sample[,c(1:3, 8)] 45 | names(gene_annotation_sub)[4] <- "gene" 46 | input_cds <- annotate_cds_by_site(input_cds, gene_annotation_sub) 47 | num_genes <- pData(input_cds)$num_genes_expressed 48 | names(num_genes) <- row.names(pData(input_cds)) 49 | unnorm_ga <- build_gene_activity_matrix(input_cds, cons) 50 | cicero_gene_activities <- normalize_gene_activities(unnorm_ga, num_genes) 51 | 52 | 53 | } 54 | -------------------------------------------------------------------------------- /man/plot_accessibility_in_pseudotime.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plotting.R 3 | \name{plot_accessibility_in_pseudotime} 4 | \alias{plot_accessibility_in_pseudotime} 5 | \title{Plot accessibility by pseudotime} 6 | \usage{ 7 | plot_accessibility_in_pseudotime(cds_subset, breaks = 10) 8 | } 9 | \arguments{ 10 | \item{cds_subset}{Subset of the CDS object you want to plot. The CDS must 11 | have a column in the pData table called "Pseudotime".} 12 | 13 | \item{breaks}{Number of breaks along pseudotime. Controls the coarseness of 14 | the plot.} 15 | } 16 | \value{ 17 | ggplot object 18 | } 19 | \description{ 20 | Make a barplot of chromatin accessibility across pseudotime 21 | } 22 | \details{ 23 | This function plots each site in the CDS subset by cell pseudotime 24 | as a barplot. Cells are divided into bins by pseudotime (number determined 25 | by \code{breaks}) and the percent of cells in each bin that are accessible 26 | is represented by bar height. In addition, the black line represents the 27 | pseudotime-dependent average accessibility from a smoothed binomial 28 | regression. 29 | } 30 | \examples{ 31 | \dontrun{ 32 | plot_accessibility_in_pseudotime(input_cds_lin[c("chr18_38156577_38158261", 33 | "chr18_48373358_48374180", 34 | "chr18_60457956_60459080")]) 35 | } 36 | 37 | } 38 | -------------------------------------------------------------------------------- /man/plot_connections.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plotting.R 3 | \name{plot_connections} 4 | \alias{plot_connections} 5 | \title{Plot connections} 6 | \usage{ 7 | plot_connections( 8 | connection_df, 9 | chr, 10 | minbp, 11 | maxbp, 12 | coaccess_cutoff = 0, 13 | peak_color = "#B4656F", 14 | connection_color = "#7F7CAF", 15 | connection_color_legend = TRUE, 16 | alpha_by_coaccess = FALSE, 17 | connection_width = 2, 18 | connection_ymax = NULL, 19 | gene_model = NULL, 20 | gene_model_color = "#81D2C7", 21 | gene_model_shape = c("smallArrow", "box"), 22 | comparison_track = NULL, 23 | comparison_coaccess_cutoff = 0, 24 | comparison_peak_color = "#B4656F", 25 | comparison_connection_color = "#7F7CAF", 26 | comparison_connection_color_legend = TRUE, 27 | comparison_connection_width = 2, 28 | comparison_ymax = NULL, 29 | collapseTranscripts = FALSE, 30 | include_axis_track = TRUE, 31 | return_as_list = FALSE, 32 | viewpoint = NULL, 33 | comparison_viewpoint = TRUE, 34 | viewpoint_color = "#F0544F", 35 | viewpoint_fill = "#EFD8D7", 36 | viewpoint_alpha = 0.5 37 | ) 38 | } 39 | \arguments{ 40 | \item{connection_df}{Data frame of connections, which must include the 41 | columns 'Peak1', 'Peak2', and 'coaccess'. Generally, the output of 42 | run_cicero or assemble_connections.} 43 | 44 | \item{chr}{The chromosome of the region you would like to plot in the form 45 | 'chr10'.} 46 | 47 | \item{minbp}{The base pair coordinate of the start of the region to be 48 | plotted.} 49 | 50 | \item{maxbp}{The base pair coordinate of the end of the region to be plotted.} 51 | 52 | \item{coaccess_cutoff}{The minimum cicero co-accessibility score you would 53 | like to be plotted. Default is 0.} 54 | 55 | \item{peak_color}{Color for peak annotations - a single color, the name of a 56 | column containing color values that correspond to Peak1, or the name of 57 | column containing a character or factor to base peak colors on.} 58 | 59 | \item{connection_color}{Color for connection lines. A single color, the name 60 | of a column containing color values, or the name of a column containing a 61 | character or factor to base connection colors on.} 62 | 63 | \item{connection_color_legend}{Logical, should connection color legend be 64 | shown?} 65 | 66 | \item{alpha_by_coaccess}{Logical, should the transparency of connection 67 | lines be scaled based on co-accessibility score?} 68 | 69 | \item{connection_width}{Width of connection lines.} 70 | 71 | \item{connection_ymax}{Connection y-axis height. If \code{NULL}, chosen 72 | automatically.} 73 | 74 | \item{gene_model}{Either \code{NULL} or a data.frame. The data.frame should 75 | be in a form compatible with the Gviz function 76 | \code{\link[Gviz]{GeneRegionTrack-class}} (cannot have NA as column names).} 77 | 78 | \item{gene_model_color}{Color for gene annotations.} 79 | 80 | \item{gene_model_shape}{Shape for gene models, passed to 81 | \code{\link[Gviz]{GeneRegionTrack-class}}. Options described at 82 | \code{\link[Gviz]{GeneRegionTrack-class}}.} 83 | 84 | \item{comparison_track}{Either \code{NULL} or a data frame. If a data frame, 85 | a second track of connections will be plotted based on this data. This 86 | data frame has the same requirements as connection_df (Peak1, Peak2 and 87 | coaccess columns).} 88 | 89 | \item{comparison_coaccess_cutoff}{The minimum cicero co-accessibility score 90 | you would like to be plotted for the comparison dataset. Default = 0.} 91 | 92 | \item{comparison_peak_color}{Color for comparison peak annotations - a 93 | single color, the name of a column containing color values that correspond 94 | to Peak1, or the name of a column containing a character or factor to base 95 | peak colors on.} 96 | 97 | \item{comparison_connection_color}{Color for comparison connection lines. A 98 | single color, the name of a column containing color values, or the name of 99 | a column containing a character or factor to base connection colors on.} 100 | 101 | \item{comparison_connection_color_legend}{Logical, should comparison 102 | connection color legend be shown?} 103 | 104 | \item{comparison_connection_width}{Width of comparison connection lines.} 105 | 106 | \item{comparison_ymax}{Connection y-axis height for comparison track. If 107 | \code{NULL}, chosen automatically.} 108 | 109 | \item{collapseTranscripts}{Logical or character scalar. Can be one in 110 | \code{gene}, \code{longest}, \code{shortest} or \code{meta}. Variable is 111 | passed to the \code{\link[Gviz]{GeneRegionTrack-class}} function of Gviz. 112 | Determines whether and how to collapse related transcripts. See Gviz 113 | documentation for details.} 114 | 115 | \item{include_axis_track}{Logical, should a genomic axis be plotted?} 116 | 117 | \item{return_as_list}{Logical, if TRUE, the function will not plot, but will 118 | return the plot components as a list. Allows user to add/customize Gviz 119 | components and plot them separately using \code{\link[Gviz]{plotTracks}}.} 120 | 121 | \item{viewpoint}{\code{NULL} or Coordinates in form "chr1_10000_10020". Use 122 | viewpoint if you would like to plot cicero connections "4C-seq style". 123 | Only connections originating in the viewpoint will be shown. Ideal for 124 | comparisons with 4C-seq data. If comparison_viewpoint is \code{TRUE}, any 125 | comparison track will be subsetted as well.} 126 | 127 | \item{comparison_viewpoint}{Logical, should viewpoint apply to comparison 128 | track as well?} 129 | 130 | \item{viewpoint_color}{Color for the highlight border.} 131 | 132 | \item{viewpoint_fill}{Color for the highlight fill.} 133 | 134 | \item{viewpoint_alpha}{Alpha value for the highlight fill.} 135 | } 136 | \value{ 137 | A gene region plot, or list of components if return_as_list is 138 | \code{TRUE}. 139 | } 140 | \description{ 141 | Plotting function for Cicero connections. Uses \code{\link[Gviz]{plotTracks}} 142 | as its basis 143 | } 144 | \examples{ 145 | cicero_cons <- data.frame( 146 | Peak1 = c("chr18_10034652_10034983", "chr18_10034652_10034983", 147 | "chr18_10034652_10034983", "chr18_10034652_10034983", 148 | "chr18_10087586_10087901", "chr18_10120685_10127115", 149 | "chr18_10097718_10097934", "chr18_10087586_10087901", 150 | "chr18_10154818_10155215", "chr18_10238762_10238983", 151 | "chr18_10198959_10199183", "chr18_10250985_10251585"), 152 | Peak2 = c("chr18_10097718_10097934", "chr18_10087586_10087901", 153 | "chr18_10154818_10155215", "chr18_10238762_10238983", 154 | "chr18_10198959_10199183", "chr18_10250985_10251585", 155 | "chr18_10034652_10034983", "chr18_10034652_10034983", 156 | "chr18_10034652_10034983", "chr18_10034652_10034983", 157 | "chr18_10087586_10087901", "chr18_10120685_10127115"), 158 | coaccess = c(0.0051121787, 0.0016698617, 0.0006570246, 159 | 0.0013466927, 0.0737935011, 0.3264019452, 160 | 0.0051121787, 0.0016698617, 0.0006570246, 161 | 0.0013466927, 0.0737935011, 0.3264019452)) 162 | plot_connections(cicero_cons, chr = "chr18", 163 | minbp = 10034652, 164 | maxbp = 10251585, 165 | peak_color = "purple") 166 | 167 | } 168 | -------------------------------------------------------------------------------- /man/ranges_for_coords.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{ranges_for_coords} 4 | \alias{ranges_for_coords} 5 | \title{Construct GRanges objects from coordinate strings} 6 | \usage{ 7 | ranges_for_coords(coord_strings, meta_data_df = NULL, with_names = FALSE) 8 | } 9 | \arguments{ 10 | \item{coord_strings}{A list of coordinate strings (in the form 11 | "chr1:500000-1000000")} 12 | 13 | \item{meta_data_df}{A data frame with any meta data columns you want 14 | included with the ranges. Must be in the same order as coord_strings.} 15 | 16 | \item{with_names}{logical - should meta data include coordinate string 17 | (field coord_string)?} 18 | } 19 | \value{ 20 | GRanges object of the input strings 21 | } 22 | \description{ 23 | Construct GRanges objects from coordinate strings 24 | } 25 | \details{ 26 | Coordinate strings consist of three pieces of information: 27 | chromosome, start, and stop. These pieces of information can be separated 28 | by the characters ":", "_", or "-". Commas will be removed, not used as 29 | separators (ex: "chr18:8,575,097-8,839,855" is ok). 30 | } 31 | \examples{ 32 | ran1 <- ranges_for_coords("chr1:2039-30239", with_names = TRUE) 33 | ran2 <- ranges_for_coords(c("chr1:2049-203902", "chrX:489249-1389389"), 34 | meta_data_df = data.frame(dat = c("1", "X"))) 35 | ran3 <- ranges_for_coords(c("chr1:2049-203902", "chrX:489249-1389389"), 36 | with_names = TRUE, 37 | meta_data_df = data.frame(dat = c("1", "X"), 38 | stringsAsFactors = FALSE)) 39 | 40 | } 41 | \seealso{ 42 | \code{\link[GenomicRanges]{GRanges-class}} 43 | } 44 | -------------------------------------------------------------------------------- /man/run_cicero.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/runCicero.R 3 | \name{run_cicero} 4 | \alias{run_cicero} 5 | \title{Run Cicero} 6 | \usage{ 7 | run_cicero( 8 | cds, 9 | genomic_coords, 10 | window = 5e+05, 11 | silent = FALSE, 12 | sample_num = 100 13 | ) 14 | } 15 | \arguments{ 16 | \item{cds}{Cicero CDS object, created using \code{\link{make_cicero_cds}}} 17 | 18 | \item{genomic_coords}{Either a data frame or a path (character) to a file 19 | with chromosome lengths. The file should have two columns, the first is 20 | the chromosome name (ex. "chr1") and the second is the chromosome length 21 | in base pairs. See \code{data(human.hg19.genome)} for an example. If a 22 | file, should be tab-separated and without header.} 23 | 24 | \item{window}{Size of the genomic window to query, in base pairs.} 25 | 26 | \item{silent}{Whether to print progress messages} 27 | 28 | \item{sample_num}{How many sample genomic windows to use to generate 29 | \code{distance_parameter} parameter. Default: 100.} 30 | } 31 | \value{ 32 | A table of co-accessibility scores 33 | } 34 | \description{ 35 | A wrapper function that runs the primary functions of the Cicero pipeline 36 | with default parameters. Runs \code{\link{estimate_distance_parameter}}, 37 | \code{\link{generate_cicero_models}} and \code{\link{assemble_connections}}. 38 | See the manual pages of these functions for details about their function and 39 | parameter options. Defaults in this function are designed for mammalian data, 40 | those with non-mammalian data should read about parameters in the above 41 | functions. 42 | } 43 | \examples{ 44 | data("cicero_data") 45 | data("human.hg19.genome") 46 | sample_genome <- subset(human.hg19.genome, V1 == "chr18") 47 | sample_genome$V2[1] <- 100000 48 | input_cds <- make_atac_cds(cicero_data, binarize = TRUE) 49 | input_cds <- reduceDimension(input_cds, max_components = 2, num_dim=6, 50 | reduction_method = 'tSNE', 51 | norm_method = "none") 52 | tsne_coords <- t(reducedDimA(input_cds)) 53 | row.names(tsne_coords) <- row.names(pData(input_cds)) 54 | cicero_cds <- make_cicero_cds(input_cds, reduced_coordinates = tsne_coords) 55 | cons <- run_cicero(cicero_cds, sample_genome, sample_num = 2) 56 | 57 | } 58 | -------------------------------------------------------------------------------- /tests/feat.txt: -------------------------------------------------------------------------------- 1 | chr18 10000 10700 Acetylated 2 | chr18 10800 11000 Methylated 3 | chr18 50000 60000 Acetylated 4 | chr18 1e+05 110000 Methylated 5 | -------------------------------------------------------------------------------- /tests/feat_head.txt: -------------------------------------------------------------------------------- 1 | chr bp1 bp2 type 2 | chr18 10000 10700 Acetylated 3 | chr18 10800 11000 Methylated 4 | chr18 50000 60000 Acetylated 5 | chr18 1e+05 110000 Methylated 6 | -------------------------------------------------------------------------------- /tests/figs/deps.txt: -------------------------------------------------------------------------------- 1 | - vdiffr-svg-engine: 1.0 2 | - vdiffr: 0.3.1 3 | - freetypeharfbuzz: 0.2.5 4 | -------------------------------------------------------------------------------- /tests/figs/plotting/basic-bar-one.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | chr18_38156577_38158261 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 0 52 | 1 53 | 2 54 | 3 55 | 4 56 | 5 57 | 58 | 0 59 | 5 60 | 10 61 | 15 62 | 20 63 | 25 64 | 65 | 66 | 67 | 68 | 69 | 70 | Pseudotime 71 | Percent of cells accessible 72 | basic bar one 73 | 74 | -------------------------------------------------------------------------------- /tests/figs/plotting/connection-ymax-plus-cutoff.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 0 84 | 0.5 85 | 1 86 | 1.5 87 | 2 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | Peaks 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | Axis 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | 395 | 396 | 397 | 398 | 399 | 400 | 401 | 402 | 403 | 404 | 405 | 406 | 407 | 408 | 409 | 410 | 411 | 412 | 413 | 414 | 415 | 416 | 10.05 mb 417 | 10.1 mb 418 | 10.15 mb 419 | 10.2 mb 420 | 421 | 422 | 423 | 424 | 425 | 426 | -------------------------------------------------------------------------------- /tests/human.hg19.genome_sub.txt: -------------------------------------------------------------------------------- 1 | chr18 78077248 2 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(cicero) 3 | 4 | test_check("cicero") 5 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/plotting/basic-bar-one.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | chr18_38156577_38158261 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 0 56 | 1 57 | 2 58 | 3 59 | 4 60 | 5 61 | 62 | 0 63 | 5 64 | 10 65 | 15 66 | 20 67 | 25 68 | 69 | 70 | 71 | 72 | 73 | 74 | Pseudotime 75 | Percent of cells accessible 76 | basic bar one 77 | 78 | 79 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/plotting/basic-connections-high-cutoff.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 0 65 | 0.2 66 | 0.4 67 | 0.6 68 | 0.8 69 | 1 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | Peaks 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | Axis 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 10.05 mb 241 | 10.1 mb 242 | 10.15 mb 243 | 10.2 mb 244 | 245 | 246 | 247 | 248 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/plotting/basic-connections-plot-cutoff.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 0 64 | 0.1 65 | 0.2 66 | 0.3 67 | 0.4 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | Peaks 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | Axis 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 10.05 mb 242 | 10.1 mb 243 | 10.15 mb 244 | 10.2 mb 245 | 246 | 247 | 248 | 249 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/plotting/connection-ymax-plus-cutoff.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 0 64 | 0.5 65 | 1 66 | 1.5 67 | 2 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | Peaks 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | Axis 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 10.05 mb 239 | 10.1 mb 240 | 10.15 mb 241 | 10.2 mb 242 | 243 | 244 | 245 | 246 | -------------------------------------------------------------------------------- /tests/testthat/test-aggregate.R: -------------------------------------------------------------------------------- 1 | context("test-aggregate.R") 2 | 3 | data("cicero_data") 4 | test_cds <- suppressWarnings(make_atac_cds(cicero_data)) 5 | 6 | test_cds2 <- suppressWarnings(make_atac_cds(cicero_data)) 7 | exprs(test_cds2) <- as.matrix(exprs(test_cds2)) 8 | 9 | test_that("aggregate_nearby_peaks makes a valid cds object", { 10 | #skip_on_bioc() 11 | agg_cds <- aggregate_nearby_peaks(test_cds, 10000) 12 | expect_is(agg_cds, "CellDataSet") 13 | expect_equal(nrow(exprs(agg_cds)), 1690) 14 | expect_equal(ncol(exprs(agg_cds)), 200) 15 | expect_equal(exprs(agg_cds)[1,4], 4) 16 | expect_match(row.names(agg_cds)[1], "chr18_10006196_10017274") 17 | expect_match(colnames(agg_cds)[1], "AGCGATAGAACGAATTCGGCGCAATGACCCTATCCT") 18 | expect_is(exprs(agg_cds), "dgCMatrix") 19 | }) 20 | 21 | test_that("aggregate_nearby_peaks makes a valid cds object not sparse", { 22 | #skip_on_bioc() 23 | agg_cds <- aggregate_nearby_peaks(test_cds2, 10000) 24 | expect_is(agg_cds, "CellDataSet") 25 | expect_equal(nrow(exprs(agg_cds)), 1690) 26 | expect_equal(ncol(exprs(agg_cds)), 200) 27 | expect_equal(exprs(agg_cds)[1,4], 4) 28 | expect_match(row.names(agg_cds)[1], "chr18_10006196_10017274") 29 | expect_match(colnames(agg_cds)[1], "AGCGATAGAACGAATTCGGCGCAATGACCCTATCCT") 30 | expect_is(exprs(agg_cds), "matrix") 31 | }) 32 | 33 | test_that("aggregate_by_cell_bin makes a valid cds object", { 34 | #skip_on_bioc() 35 | pData(test_cds)$cell_subtype <- rep(1:10, times= 20) 36 | agg_cds2 <- suppressMessages(aggregate_by_cell_bin(test_cds, "cell_subtype")) 37 | expect_is(agg_cds2, "CellDataSet") 38 | expect_equal(nrow(exprs(agg_cds2)), 6146) 39 | expect_equal(ncol(exprs(agg_cds2)), 10) 40 | expect_equal(exprs(agg_cds2)[1,4], 2) 41 | expect_match(row.names(agg_cds2)[1], "chr18_10006196_10006822") 42 | expect_match(colnames(agg_cds2)[1], "1") 43 | expect_is(exprs(agg_cds2), "matrix") 44 | }) 45 | 46 | test_that("aggregate_by_cell_bin makes a valid cds object not sparse", { 47 | #skip_on_bioc() 48 | pData(test_cds2)$cell_subtype <- rep(1:10, times= 20) 49 | agg_cds2 <- suppressMessages(aggregate_by_cell_bin(test_cds2, "cell_subtype")) 50 | expect_is(agg_cds2, "CellDataSet") 51 | expect_equal(nrow(exprs(agg_cds2)), 6146) 52 | expect_equal(ncol(exprs(agg_cds2)), 10) 53 | expect_equal(exprs(agg_cds2)[1,4], 2) 54 | expect_match(row.names(agg_cds2)[1], "chr18_10006196_10006822") 55 | expect_match(colnames(agg_cds2)[1], "1") 56 | expect_is(exprs(agg_cds2), "matrix") 57 | }) 58 | -------------------------------------------------------------------------------- /tests/testthat/test-runCicero.R: -------------------------------------------------------------------------------- 1 | context("runCicero") 2 | 3 | #### make_cicero_cds #### 4 | 5 | data(cicero_data) 6 | load("../tsne_coord.Rdata") 7 | data("human.hg19.genome") 8 | 9 | sample_genome <- subset(human.hg19.genome, V1 == "chr18") 10 | input_cds <- make_atac_cds(cicero_data) 11 | 12 | set.seed(2017) 13 | input_cds <- detectGenes(input_cds, min_expr = .1) 14 | input_cds <- estimateSizeFactors(input_cds) 15 | input_cds <- suppressWarnings(suppressMessages(estimateDispersions(input_cds))) 16 | 17 | set.seed(2018) 18 | cicero_cds <- make_cicero_cds(input_cds, 19 | reduced_coordinates = tsne_coords, 20 | silent = TRUE, 21 | summary_stats = c("num_genes_expressed")) 22 | 23 | test_that("make_cicero_cds aggregates correctly", { 24 | #skip_on_bioc() 25 | expect_is(cicero_cds, "CellDataSet") 26 | expect_equal(nrow(fData(cicero_cds)), nrow(fData(input_cds))) 27 | expect_named(pData(cicero_cds),c("agg_cell", "mean_num_genes_expressed", 28 | "Size_Factor", "num_genes_expressed")) 29 | expect_equal(nrow(exprs(cicero_cds)), 6146) 30 | expect_equal(ncol(exprs(cicero_cds)), 34) 31 | 32 | set.seed(2018) 33 | expect_warning(cicero_cds <- make_cicero_cds(input_cds, 34 | reduced_coordinates = tsne_coords, 35 | silent = FALSE, 36 | summary_stats = c("num_genes_expressed"))) 37 | input_cds2 <- input_cds 38 | fData(input_cds2)$bp1 <- NULL 39 | set.seed(2018) 40 | cicero_cds <- make_cicero_cds(input_cds2, 41 | reduced_coordinates = tsne_coords, 42 | silent = TRUE, 43 | size_factor_normalize = FALSE, 44 | summary_stats = c("num_genes_expressed")) 45 | expect_is(cicero_cds, "CellDataSet") 46 | expect_equal(nrow(fData(cicero_cds)), nrow(fData(input_cds))) 47 | expect_named(pData(cicero_cds),c("agg_cell", "mean_num_genes_expressed", 48 | "Size_Factor", "num_genes_expressed")) 49 | expect_equal(nrow(exprs(cicero_cds)), 6146) 50 | expect_equal(ncol(exprs(cicero_cds)), 34) 51 | }) 52 | 53 | set.seed(2018) 54 | cicero_cds_temp <- make_cicero_cds(input_cds, 55 | reduced_coordinates = tsne_coords, 56 | silent = TRUE, 57 | summary_stats = c("num_genes_expressed"), 58 | return_agg_info = TRUE, 59 | size_factor_normalize = FALSE) 60 | cicero_cds2 <- cicero_cds_temp[[1]] 61 | agg_info <- cicero_cds_temp[[2]] 62 | 63 | test_that("make_cicero_cds returns agg_info", { 64 | expect_is(cicero_cds2, "CellDataSet") 65 | expect_equal(nrow(fData(cicero_cds2)), nrow(fData(input_cds))) 66 | expect_named(pData(cicero_cds2),c("agg_cell", "mean_num_genes_expressed", 67 | "Size_Factor", "num_genes_expressed")) 68 | expect_equal(nrow(exprs(cicero_cds2)), 6146) 69 | expect_equal(ncol(exprs(cicero_cds2)), 34) 70 | 71 | expect_is(agg_info, "data.frame") 72 | 73 | agg_test_cell <- agg_info$agg_cell[[1]] 74 | test_agg <- as.character(agg_info[agg_info$agg_cell == agg_test_cell,]$cell) 75 | temp_exprs <- exprs(input_cds)[,test_agg] 76 | test_agg <- Matrix::rowSums(temp_exprs) 77 | expect_equal(sum(exprs(cicero_cds2)[,agg_test_cell] != test_agg), 0) 78 | 79 | }) 80 | 81 | #### estimate_distance_parameter #### 82 | 83 | test_that("estimate_distance_parameter gives correct mean", { 84 | #skip_on_bioc() 85 | set.seed(200) 86 | alphas <- estimate_distance_parameter(cicero_cds, window=500000, 87 | maxit=100, sample_num = 2, 88 | distance_constraint = 250000, 89 | distance_parameter_convergence = 1e-22, 90 | genomic_coords = sample_genome, 91 | max_sample_windows = 6) 92 | mean_alpha <- mean(alphas) 93 | expect_equal(length(alphas), 2) 94 | expect_equal(mean_alpha, 2.25, tolerance = 1e-2) 95 | set.seed(200) 96 | alphas <- estimate_distance_parameter(cicero_cds, window=500000, 97 | maxit=100, sample_num = 2, 98 | distance_constraint = 250000, 99 | distance_parameter_convergence = 1e-22, 100 | genomic_coords = "../human.hg19.genome_sub.txt", 101 | max_sample_windows = 6) 102 | mean_alpha <- mean(alphas) 103 | expect_equal(length(alphas), 2) 104 | expect_equal(mean_alpha, 2.25, tolerance = 1e-2) 105 | set.seed(200) 106 | expect_error(expect_warning(alphas <- estimate_distance_parameter(cicero_cds, 107 | window=500000, 108 | maxit=100, sample_num = 2, 109 | max_elements = 2, 110 | distance_constraint = 250000, 111 | distance_parameter_convergence = 1e-22, 112 | genomic_coords = sample_genome, 113 | max_sample_windows = 6))) 114 | testthat::expect_error(alphas <- estimate_distance_parameter(cicero_cds, 115 | window=10000, 116 | maxit=100, sample_num = 2, 117 | distance_constraint = 250000, 118 | distance_parameter_convergence = 1e-22, 119 | genomic_coords = sample_genome, 120 | max_sample_windows = 6), 121 | "distance_constraint not less than window") 122 | set.seed(205) 123 | testthat::expect_warning(alphas <- estimate_distance_parameter(cicero_cds, 124 | window=10000, 125 | maxit=100, sample_num = 2, 126 | distance_constraint = 5000, 127 | distance_parameter_convergence = 1e-22, 128 | genomic_coords = sample_genome, 129 | max_sample_windows = 6)) 130 | }) 131 | 132 | #### generate_cicero_models #### 133 | set.seed(203) 134 | mean_alpha <- 2.030655 135 | con_list <- generate_cicero_models(cicero_cds, 136 | mean_alpha, 137 | s=.75, 138 | genomic_coords = sample_genome) 139 | 140 | test_that("generate_cicero_models gives output", { #slow 141 | skip_on_bioc() 142 | expect_is(con_list, "list") 143 | expect_equal(length(con_list), 313) 144 | expect_equal(con_list[[1]]$w[1,2], 0.866, tolerance = 1e-3) 145 | set.seed(203) 146 | con_list <- generate_cicero_models(cicero_cds, 147 | mean_alpha, 148 | s=.75, 149 | genomic_coords = "../human.hg19.genome_sub.txt") 150 | expect_is(con_list, "list") 151 | expect_equal(length(con_list), 313) 152 | expect_equal(con_list[[1]]$w[1,2], 0.866, tolerance = 1e-3) 153 | set.seed(203) 154 | con_list <- generate_cicero_models(cicero_cds, 155 | mean_alpha, 156 | window = 5000000, 157 | s=0.75, 158 | genomic_coords = sample_genome) 159 | expect_equal(length(con_list), 32) 160 | expect_equal(con_list[[1]], "Too many elements in range") 161 | 162 | set.seed(203) 163 | expect_error(con_list <- generate_cicero_models(cicero_cds, 164 | mean_alpha, 165 | window = 5000000, 166 | s=1, 167 | genomic_coords = sample_genome), 168 | "s not less than 1") 169 | 170 | set.seed(203) 171 | con_list <- generate_cicero_models(cicero_cds, 172 | mean_alpha, 173 | window = 500000, 174 | s=0.1, 175 | genomic_coords = sample_genome) 176 | expect_equal(con_list[[1]]$w[1,3], -3.7, tolerance = 1e-2) 177 | }) 178 | 179 | #### assemble_connections #### 180 | 181 | test_that("assemble_connections gives output", { 182 | #skip_on_bioc() 183 | expect_is(con_list, "list") 184 | expect_equal(length(con_list), 313) 185 | expect_equal(con_list[[1]]$w[1,2], 0.866, tolerance = 1e-3) 186 | set.seed(203) 187 | con_list <- generate_cicero_models(cicero_cds, 188 | mean_alpha, 189 | s=.75, 190 | genomic_coords = "../human.hg19.genome_sub.txt") 191 | 192 | cons <- assemble_connections(con_list, silent = FALSE) 193 | expect_equal(cons[cons$Peak1 == "chr18_10025_10225" & 194 | cons$Peak2 == "chr18_10603_11103",]$coaccess, 0.877, 195 | tolerance = 1e-3) 196 | expect_equal(ncol(cons), 3) 197 | expect_equal(nrow(cons), 543286) 198 | }) 199 | 200 | #### run_cicero #### 201 | set.seed(2000) 202 | cons <- run_cicero(cicero_cds, sample_genome, window = 500000, silent=TRUE, 203 | sample_num = 2) 204 | 205 | test_that("run_cicero gives output", { 206 | #skip_on_bioc() 207 | expect_equal(cons[cons$Peak1 == "chr18_10025_10225" & 208 | cons$Peak2 == "chr18_10603_11103",]$coaccess, 0.877, 209 | tolerance = 1e-3) 210 | expect_equal(ncol(cons), 3) 211 | expect_equal(nrow(cons), 543286) 212 | cons <- run_cicero(cicero_cds, window = 500000, silent=TRUE, sample_num = 2, 213 | genomic_coords = "../human.hg19.genome_sub.txt") 214 | expect_equal(cons[cons$Peak1 == "chr18_10025_10225" & 215 | cons$Peak2 == "chr18_10603_11103",]$coaccess, 0.877, 216 | tolerance = 1e-3) 217 | expect_equal(ncol(cons), 3) 218 | expect_equal(nrow(cons), 543286) 219 | }) 220 | 221 | test_that("run_cicero gives output bad chromosomes", { 222 | sample_genome <- subset(human.hg19.genome, V1 == "chr18") 223 | input_cds <- make_atac_cds(cicero_data) 224 | 225 | fdata <- fData(input_cds) 226 | mtx <- exprs(input_cds) 227 | pdata <- pData(input_cds) 228 | row.names(fdata) <- gsub("chr", "A0", row.names(fdata)) 229 | fdata$site_name <- row.names(fdata) 230 | row.names(mtx) <- row.names(fdata) 231 | pdata <- new("AnnotatedDataFrame", data = pdata) 232 | fdata <- new("AnnotatedDataFrame", data = fdata) 233 | new_inp <- suppressWarnings(newCellDataSet(mtx, pdata, fdata)) 234 | 235 | set.seed(2017) 236 | new_inp <- detectGenes(new_inp, min_expr = .1) 237 | new_inp <- estimateSizeFactors(new_inp) 238 | 239 | set.seed(2018) 240 | cicero_cds <- make_cicero_cds(new_inp, 241 | reduced_coordinates = tsne_coords, 242 | silent = TRUE, 243 | summary_stats = c("num_genes_expressed")) 244 | 245 | cons <- run_cicero(cicero_cds, window = 500000, silent=TRUE, sample_num = 2, 246 | genomic_coords = "../human.hg19.genome_sub.txt") 247 | 248 | #skip_on_bioc() 249 | expect_equal(cons[cons$Peak1 == "A018_10025_10225" & 250 | cons$Peak2 == "A018_10603_11103",]$coaccess, 0.877, 251 | tolerance = 1e-3) 252 | expect_equal(ncol(cons), 3) 253 | expect_equal(nrow(cons), 543286) 254 | }) 255 | 256 | #### generate_ccans #### 257 | 258 | test_that("generate_ccans gives output", { #slow 259 | skip_on_bioc() 260 | expect_output(CCAN_assigns <- generate_ccans(cons), 261 | "Coaccessibility cutoff used: 0.47") 262 | #expect_equal(CCAN_assigns["chr18_217477_218555",]$CCAN, 3, tolerance = 1e-7) 263 | expect_equal(ncol(CCAN_assigns), 2) 264 | expect_equal(nrow(CCAN_assigns), 1905) 265 | expect_equal(length(unique(CCAN_assigns$CCAN)), 116) 266 | 267 | expect_output(CCAN_assigns <- generate_ccans(cons, 268 | coaccess_cutoff_override = 0.25), 269 | "Coaccessibility cutoff used: 0.25") 270 | expect_output(CCAN_assigns <- generate_ccans(cons, tolerance_digits = 1), 271 | "Coaccessibility cutoff used: 0.5") 272 | expect_output(CCAN_assigns <- generate_ccans(cons, tolerance_digits = 1, 273 | coaccess_cutoff_override = .25), 274 | "Coaccessibility cutoff used: 0.25") 275 | }) 276 | 277 | #### compare_connections #### 278 | 279 | test_that("compare_connections works", { 280 | #skip_on_bioc() 281 | chia_conns <- data.frame(Peak1 = c("chr18_10000_10200", "chr18_10000_10200", 282 | "chr18_49500_49600"), 283 | Peak2 = c("chr18_10600_10700", "chr18_111700_111800", 284 | "chr18_10600_10700")) 285 | cons$in_dataset <- compare_connections(cons, chia_conns) 286 | cons$in_dataset2 <- compare_connections(cons, chia_conns, maxgap=1000) 287 | 288 | expect_is(cons, "data.frame") 289 | expect_equal(sum(cons$in_dataset), 4) 290 | expect_equal(sum(cons$in_dataset2), 22) 291 | expect_equal(cons[cons$Peak1 == "chr18_10025_10225" & 292 | cons$Peak2 == "chr18_10603_11103",]$in_dataset[1], TRUE) 293 | }) 294 | 295 | #### find_overlapping_ccans #### 296 | 297 | test_that("find_overlapping_ccans works", { 298 | CCAN_assigns <- generate_ccans(cons, coaccess_cutoff_override = 0.25) 299 | over <- find_overlapping_ccans(CCAN_assigns) 300 | expect_is(over, "data.frame") 301 | expect_equal(ncol(over), 2) 302 | skip_on_bioc() 303 | expect_equal(nrow(over), 98) 304 | over <- find_overlapping_ccans(CCAN_assigns, min_overlap = 3000000) 305 | expect_equal(nrow(over), 2) 306 | }) 307 | 308 | #### activity scores #### 309 | 310 | input_cds <- make_atac_cds(cicero_data, binarize=TRUE) 311 | input_cds <- detectGenes(input_cds, min_expr = .1) 312 | 313 | data(gene_annotation_sample) 314 | gene_annotation_sub <- gene_annotation_sample[,c(1:3, 8)] 315 | names(gene_annotation_sub)[4] <- "gene" 316 | 317 | input_cds <- suppressWarnings(annotate_cds_by_site(input_cds, 318 | gene_annotation_sub)) 319 | unnorm_ga <- build_gene_activity_matrix(input_cds, cons) 320 | expect_equal(nrow(unnorm_ga), 626) 321 | expect_equal(ncol(unnorm_ga), 200) 322 | expect_equal(unnorm_ga[1,1], 1.19, tolerance = 1e-2) 323 | 324 | exprs(input_cds) <- as.matrix(exprs(input_cds)) 325 | unnorm_ga <- build_gene_activity_matrix(input_cds, cons) 326 | 327 | test_that("build_gene_activity_matrix works", { 328 | #skip_on_bioc() 329 | expect_equal(nrow(unnorm_ga), 626) 330 | expect_equal(ncol(unnorm_ga), 200) 331 | expect_equal(unnorm_ga[1,1], 1.19, tolerance = 1e-2) 332 | }) 333 | 334 | test_that("normalize_gene_activities works", { 335 | #skip_on_bioc() 336 | 337 | num_genes <- pData(input_cds)$num_genes_expressed 338 | names(num_genes) <- row.names(pData(input_cds)) 339 | 340 | cicero_gene_activities <- normalize_gene_activities(unnorm_ga, num_genes) 341 | expect_equal(nrow(cicero_gene_activities), 626) 342 | expect_equal(ncol(cicero_gene_activities), 200) 343 | expect_equal(cicero_gene_activities[1,1], 0.0086, tolerance = 1e-4) 344 | 345 | cicero_gene_activities <- normalize_gene_activities(list(unnorm_ga, 346 | unnorm_ga), 347 | num_genes) 348 | expect_is(cicero_gene_activities, "list") 349 | expect_equal(length(cicero_gene_activities), 2) 350 | cicero_gene_activities1 <- cicero_gene_activities[[1]] 351 | cicero_gene_activities2 <- cicero_gene_activities[[2]] 352 | expect_equal(nrow(cicero_gene_activities1), 626) 353 | expect_equal(ncol(cicero_gene_activities1), 200) 354 | expect_equal(cicero_gene_activities1[1,1], 0.0086, tolerance = 1e-4) 355 | 356 | expect_equal(nrow(cicero_gene_activities2), 626) 357 | expect_equal(ncol(cicero_gene_activities2), 200) 358 | expect_equal(cicero_gene_activities2[1,1], 0.0086, tolerance = 1e-4) 359 | 360 | 361 | unnorm_ga <- as.matrix(unnorm_ga) 362 | cicero_gene_activities <- normalize_gene_activities(unnorm_ga, num_genes) 363 | expect_equal(nrow(cicero_gene_activities), 626) 364 | expect_equal(ncol(cicero_gene_activities), 200) 365 | expect_equal(cicero_gene_activities[1,1], 0.0086, tolerance = 1e-4) 366 | 367 | cicero_gene_activities <- normalize_gene_activities(list(unnorm_ga, 368 | unnorm_ga), 369 | num_genes) 370 | expect_is(cicero_gene_activities, "list") 371 | expect_equal(length(cicero_gene_activities), 2) 372 | cicero_gene_activities1 <- cicero_gene_activities[[1]] 373 | cicero_gene_activities2 <- cicero_gene_activities[[2]] 374 | expect_equal(nrow(cicero_gene_activities1), 626) 375 | expect_equal(ncol(cicero_gene_activities1), 200) 376 | expect_equal(cicero_gene_activities1[1,1], 0.0086, tolerance = 1e-4) 377 | 378 | expect_equal(nrow(cicero_gene_activities2), 626) 379 | expect_equal(ncol(cicero_gene_activities2), 200) 380 | expect_equal(cicero_gene_activities2[1,1], 0.0086, tolerance = 1e-4) 381 | 382 | }) 383 | 384 | -------------------------------------------------------------------------------- /tests/testthat/test-utils.R: -------------------------------------------------------------------------------- 1 | context("test-utils.R") 2 | 3 | 4 | test_that("make_atac_cds makes a valid cds object", { 5 | #skip_on_bioc() 6 | data("cicero_data") 7 | #### make_atac_cds #### 8 | test_cds <- make_atac_cds(cicero_data) 9 | expect_is(test_cds, "CellDataSet") 10 | expect_equal(nrow(exprs(test_cds)), 6146) 11 | expect_equal(ncol(exprs(test_cds)), 200) 12 | expect_match(row.names(test_cds)[1], "chr18_10025_10225") 13 | expect_match(colnames(test_cds)[1], "AGCGATAGAACGAATTCGGCGCAATGACCCTATCCT") 14 | expect_is(exprs(test_cds), "dgCMatrix") 15 | test_cds <-make_atac_cds(cicero_data, binarize=TRUE) 16 | expect_is(test_cds, "CellDataSet") 17 | expect_equal(nrow(exprs(test_cds)), 6146) 18 | expect_equal(ncol(exprs(test_cds)), 200) 19 | expect_match(row.names(test_cds)[1], "chr18_10025_10225") 20 | expect_match(colnames(test_cds)[1], "AGCGATAGAACGAATTCGGCGCAATGACCCTATCCT") 21 | expect_is(exprs(test_cds), "dgCMatrix") 22 | expect_error(test_cds <- make_atac_cds(3), 23 | "Input must be file path, matrix, or data.frame") 24 | test_cds <-make_atac_cds("../cicero_data_sub.txt", binarize=TRUE) 25 | expect_is(test_cds, "CellDataSet") 26 | expect_equal(nrow(exprs(test_cds)), 2149) 27 | expect_equal(ncol(exprs(test_cds)), 7) 28 | expect_match(row.names(test_cds)[1], "chr18_10025_10225") 29 | expect_match(colnames(test_cds)[1], "AGCGATAGGCGCTATGGTGGAATTCAGTCAGGACGT") 30 | expect_is(exprs(test_cds), "dgCMatrix") 31 | }) 32 | 33 | #### ranges_for_coords #### 34 | 35 | test_that("ranges_for_coords works", { 36 | #skip_on_bioc() 37 | 38 | wn <- ranges_for_coords("chr1:2039-30239", with_names = TRUE) 39 | wmd <- ranges_for_coords(c("chr1:2049-203902", "chrX:489249-1389389"), 40 | meta_data_df = data.frame(dat = c("1", "X"))) 41 | wmdn <- ranges_for_coords(c("chr1:2049-203902", "chrX:489249-1389389"), 42 | with_names = TRUE, 43 | meta_data_df = data.frame(dat = c("1", "X"), 44 | stringsAsFactors = FALSE)) 45 | 46 | 47 | expect_is(ranges_for_coords("chr1_2039_30239"), "GRanges") 48 | expect_is(ranges_for_coords("chr1_random_2039_30239"), "GRanges") 49 | expect_is(ranges_for_coords("chr1:2039:30239"), "GRanges") 50 | expect_is(ranges_for_coords("chr1-2039-30239"), "GRanges") 51 | expect_is(ranges_for_coords("chr1:2,039-30,239"), "GRanges") 52 | expect_is(ranges_for_coords(c("chr1:2,039-30,239", "chrX:28884:101293")), 53 | "GRanges") 54 | expect_is(ranges_for_coords(c("chr1:2,039-30,239", "chrX:28884:101293"), 55 | with_names = TRUE), "GRanges") 56 | expect_is(wn, "GRanges") 57 | expect_is(wmd, "GRanges") 58 | expect_match(wn$coord_string, "chr1:2039-30239") 59 | expect_match(as.character(wmd$dat[2]), "X") 60 | expect_match(wmdn$coord_string[1], "chr1:2049-203902") 61 | expect_match(as.character(wmdn$dat[2]), "X") 62 | }) 63 | 64 | #### df_for_coords #### 65 | 66 | test_that("df_for_coords works", { 67 | #skip_on_bioc() 68 | expect_is(df_for_coords(c("chr1:2,039-30,239", "chrX:28884:101293")), 69 | "data.frame") 70 | expect_equal(df_for_coords(c("chr1:2,039-30,239", 71 | "chrX:28884:101293"))$bp2[1], 30239) 72 | 73 | expect_is(df_for_coords(c("chr1:2,039-30,239", "chrX:28884:101293", 74 | "chr1_random_2039_30239")), 75 | "data.frame") 76 | expect_equal(df_for_coords(c("chr1:2,039-30,238", "chrX:28884:101293", 77 | "chr1_random_2039_30239"))$bp2[3], 30239) 78 | }) 79 | 80 | #### annotate_cds_by_site #### 81 | 82 | 83 | 84 | test_that("annotate_cds_by_site works", { 85 | #skip_on_bioc() 86 | data("cicero_data") 87 | #### make_atac_cds #### 88 | test_cds <- make_atac_cds(cicero_data) 89 | 90 | feat <- data.frame(chr = c("chr18", "chr18", "chr18", "chr18"), 91 | bp1 = c(10000, 10800, 50000, 100000), 92 | bp2 = c(10700, 11000, 60000, 110000), 93 | type = c("Acetylated", "Methylated", 94 | "Acetylated", "Methylated"), 95 | stringsAsFactors = FALSE) 96 | 97 | test_cds2 <- annotate_cds_by_site(test_cds, feat, verbose = TRUE) 98 | test_cds3 <- annotate_cds_by_site(test_cds, feat, all=TRUE, verbose = TRUE) 99 | 100 | expect_is(test_cds2, "CellDataSet") 101 | expect_is(test_cds3, "CellDataSet") 102 | expect_equal(nrow(fData(test_cds2)), nrow(fData(test_cds))) 103 | expect_equal(nrow(fData(test_cds3)), nrow(fData(test_cds))) 104 | expect_equal(ncol(fData(test_cds2)), ncol(fData(test_cds)) + 2) 105 | expect_equal(ncol(fData(test_cds3)), ncol(fData(test_cds)) + 2) 106 | 107 | expect_equal(fData(test_cds2)$overlap[2], 201) 108 | expect_equal(fData(test_cds3)$overlap[2], "98,201") 109 | expect_equal(fData(test_cds2)$type[2], "Methylated") 110 | expect_equal(fData(test_cds3)$type[2], "Acetylated,Methylated") 111 | 112 | expect_true(is.na(fData(test_cds2)$overlap[3])) 113 | expect_true(is.na(fData(test_cds3)$overlap[3])) 114 | expect_true(is.na(fData(test_cds2)$type[3])) 115 | expect_true(is.na(fData(test_cds3)$type[3])) 116 | 117 | test_cds2 <- annotate_cds_by_site(test_cds, feat) 118 | test_cds3 <- annotate_cds_by_site(test_cds, feat, all=TRUE) 119 | 120 | expect_is(test_cds2, "CellDataSet") 121 | expect_is(test_cds3, "CellDataSet") 122 | expect_equal(nrow(fData(test_cds2)), nrow(fData(test_cds))) 123 | expect_equal(nrow(fData(test_cds3)), nrow(fData(test_cds))) 124 | expect_equal(ncol(fData(test_cds2)), ncol(fData(test_cds)) + 2) 125 | expect_equal(ncol(fData(test_cds3)), ncol(fData(test_cds)) + 2) 126 | 127 | expect_equal(fData(test_cds2)$overlap[2], 201) 128 | expect_equal(fData(test_cds3)$overlap[2], "98,201") 129 | expect_equal(fData(test_cds2)$type[2], "Methylated") 130 | expect_equal(fData(test_cds3)$type[2], "Acetylated,Methylated") 131 | 132 | expect_true(is.na(fData(test_cds2)$overlap[3])) 133 | expect_true(is.na(fData(test_cds3)$overlap[3])) 134 | expect_true(is.na(fData(test_cds2)$type[3])) 135 | expect_true(is.na(fData(test_cds3)$type[3])) 136 | 137 | test_cds2 <- annotate_cds_by_site(test_cds, "../feat.txt", verbose =TRUE) 138 | test_cds3 <- annotate_cds_by_site(test_cds, "../feat.txt", all=TRUE) 139 | 140 | expect_is(test_cds2, "CellDataSet") 141 | expect_is(test_cds3, "CellDataSet") 142 | expect_equal(nrow(fData(test_cds2)), nrow(fData(test_cds))) 143 | expect_equal(nrow(fData(test_cds3)), nrow(fData(test_cds))) 144 | expect_equal(ncol(fData(test_cds2)), ncol(fData(test_cds)) + 2) 145 | expect_equal(ncol(fData(test_cds3)), ncol(fData(test_cds)) + 2) 146 | 147 | expect_equal(fData(test_cds2)$overlap[2], 201) 148 | expect_equal(fData(test_cds3)$overlap[2], "98,201") 149 | expect_equal(fData(test_cds2)$V4[2], "Methylated") 150 | expect_equal(fData(test_cds3)$V4[2], "Acetylated,Methylated") 151 | 152 | expect_true(is.na(fData(test_cds2)$overlap[3])) 153 | expect_true(is.na(fData(test_cds3)$overlap[3])) 154 | expect_true(is.na(fData(test_cds2)$V4[3])) 155 | expect_true(is.na(fData(test_cds3)$V4[3])) 156 | 157 | test_cds2 <- annotate_cds_by_site(test_cds, "../feat_head.txt", 158 | header = TRUE) 159 | test_cds3 <- annotate_cds_by_site(test_cds, "../feat_head.txt", 160 | header = TRUE, all=TRUE) 161 | 162 | expect_is(test_cds2, "CellDataSet") 163 | expect_is(test_cds3, "CellDataSet") 164 | expect_equal(nrow(fData(test_cds2)), nrow(fData(test_cds))) 165 | expect_equal(nrow(fData(test_cds3)), nrow(fData(test_cds))) 166 | expect_equal(ncol(fData(test_cds2)), ncol(fData(test_cds)) + 2) 167 | expect_equal(ncol(fData(test_cds3)), ncol(fData(test_cds)) + 2) 168 | 169 | expect_equal(fData(test_cds2)$overlap[2], 201) 170 | expect_equal(fData(test_cds3)$overlap[2], "98,201") 171 | expect_equal(fData(test_cds2)$type[2], "Methylated") 172 | expect_equal(fData(test_cds3)$type[2], "Acetylated,Methylated") 173 | 174 | expect_true(is.na(fData(test_cds2)$overlap[3])) 175 | expect_true(is.na(fData(test_cds3)$overlap[3])) 176 | expect_true(is.na(fData(test_cds2)$type[3])) 177 | expect_true(is.na(fData(test_cds3)$type[3])) 178 | 179 | # check tie 180 | feat2 <- data.frame(chr = c("chr18", "chr18", "chr18", "chr18", "chr18_GL456216_random"), 181 | bp1 = c(10125, 10125, 50000, 100000, 32820116), 182 | bp2 = c(10703, 10703, 60000, 110000, 32820118), 183 | type = c("Acetylated", "Methylated", 184 | "Acetylated", "Methylated", "Other"), 185 | stringsAsFactors = FALSE) 186 | test_cds2 <- annotate_cds_by_site(test_cds, feat2, all=FALSE) 187 | expect_equal(fData(test_cds2)$type[2], "Acetylated") 188 | test_cds2 <- annotate_cds_by_site(test_cds, feat2, all=FALSE, maxgap = 901) 189 | expect_equal(fData(test_cds2)$type[3], "Acetylated") 190 | 191 | # check maxgap = "nearest" 192 | test_cds2 <- annotate_cds_by_site(test_cds, feat2, all=FALSE, maxgap = "nearest") 193 | expect_equal(sum(is.na(fData(test_cds2)$type)), 0) 194 | 195 | }) 196 | 197 | 198 | #### make_sparse_matrix #### 199 | 200 | test_that("make_sparse_matrix works", { 201 | #skip_on_bioc() 202 | df <- data.frame(icol = c("chr18_30209631_30210783", 203 | "chr18_45820294_45821666", 204 | "chr18_32820116_32820994"), 205 | jcol = c("chr18_41888433_41890138", 206 | "chr18_33038287_33039444", 207 | "chr18_random_25533921_25534483"), 208 | xcol = c(1,2,3)) 209 | sm <- make_sparse_matrix(df, "icol", "jcol", "xcol") 210 | expect_equal(sm["chr18_30209631_30210783", "chr18_41888433_41890138"], 1) 211 | expect_equal(sm["chr18_45820294_45821666", "chr18_33038287_33039444"], 2) 212 | expect_equal(sm["chr18_random_25533921_25534483", "chr18_32820116_32820994"], 3) 213 | expect_equal(sm["chr18_random_25533921_25534483", "chr18_30209631_30210783"], 0) 214 | expect_error(make_sparse_matrix(df, "icol", "xcol", "jcol"), 215 | "x.name column must be numeric") 216 | expect_error(make_sparse_matrix(df, "icol", "hannah", "jcol"), 217 | "i.name, j.name, and x.name must be columns in data") 218 | }) 219 | 220 | #### compare_connections #### 221 | # IN test-runCicero.R 222 | 223 | #### find_overlapping_coordinates #### 224 | 225 | 226 | 227 | test_that("find_overlapping_coordinates works", { 228 | #skip_on_bioc() 229 | test_coords <- c("chr18_10025_10225", "chr18_10603_11103", "chr18_11604_13986", 230 | "chr18_157883_158536", "chr18_217477_218555", 231 | "chr18_245734_246234", "chr18_random_245734_246234") 232 | expect_equal(length(find_overlapping_coordinates(test_coords, 233 | "chr18:10,100-1246234")), 6) 234 | expect_equal(length(find_overlapping_coordinates(test_coords, 235 | "chr18_10227_10601")), 0) 236 | expect_equal(length(find_overlapping_coordinates(test_coords, 237 | "chr18_10227_10601", 238 | maxgap = 1)), 2) 239 | expect_equal(length(find_overlapping_coordinates(test_coords, 240 | "chr18_random_10227_245736", 241 | maxgap = 1)), 1) 242 | expect_equal(length(find_overlapping_coordinates(test_coords, 243 | c("chr18_10227_10602", 244 | "chr18:11604-246234"))), 5) 245 | expect_equal(length(find_overlapping_coordinates(test_coords, 246 | c("chr18_10226_10602", 247 | "chr18:11604-246234"), 248 | maxgap = 1)), 6) 249 | expect_true(all(is.na(find_overlapping_coordinates(test_coords, 250 | c("chr19_10226_10602", 251 | "chr19:11604-246234"), 252 | maxgap = 1)))) 253 | expect_true(all(is.na(find_overlapping_coordinates(test_coords, 254 | c("chr18_1022600_1060200", 255 | "chr18:1160400-24623400"), 256 | maxgap = 1)))) 257 | }) 258 | 259 | 260 | 261 | 262 | 263 | 264 | -------------------------------------------------------------------------------- /tests/tsne_coord.Rdata: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cole-trapnell-lab/cicero-release/07f8731b2d2029ab774621b768b20259238ede4d/tests/tsne_coord.Rdata --------------------------------------------------------------------------------