├── .Rbuildignore
├── .github
├── .gitignore
└── workflows
│ └── check-standard.yaml
├── .gitignore
├── .travis.yml
├── DESCRIPTION
├── LICENSE
├── NAMESPACE
├── NEWS
├── R
├── activityScores.R
├── aggregate.R
├── cicero.R
├── data.R
├── fromGviz.R
├── plotting.R
├── runCicero.R
└── utils.R
├── README.md
├── code-of-conduct.md
├── codecov.yml
├── data
├── cell_data.rda
├── cicero_data.rda
├── gene_annotation_sample.rda
└── human.hg19.genome.rda
├── inst
└── CITATION
├── man
├── aggregate_by_cell_bin.Rd
├── aggregate_nearby_peaks.Rd
├── annotate_cds_by_site.Rd
├── assemble_connections.Rd
├── build_gene_activity_matrix.Rd
├── cell_data.Rd
├── cicero-package.Rd
├── cicero_data.Rd
├── compare_connections.Rd
├── df_for_coords.Rd
├── estimate_distance_parameter.Rd
├── find_overlapping_ccans.Rd
├── find_overlapping_coordinates.Rd
├── gene_annotation_sample.Rd
├── generate_ccans.Rd
├── generate_cicero_models.Rd
├── human.hg19.genome.Rd
├── make_atac_cds.Rd
├── make_cicero_cds.Rd
├── make_sparse_matrix.Rd
├── normalize_gene_activities.Rd
├── plot_accessibility_in_pseudotime.Rd
├── plot_connections.Rd
├── ranges_for_coords.Rd
└── run_cicero.Rd
├── tests
├── cicero_data_sub.txt
├── feat.txt
├── feat_head.txt
├── figs
│ ├── deps.txt
│ └── plotting
│ │ ├── basic-bar-high-breaks.svg
│ │ ├── basic-bar-one.svg
│ │ ├── basic-bar.svg
│ │ ├── basic-connections-all-bp.svg
│ │ ├── basic-connections-chr-bp1.svg
│ │ ├── basic-connections-chr.svg
│ │ ├── basic-connections-comparison-plot.svg
│ │ ├── basic-connections-high-comparison-cutoff.svg
│ │ ├── basic-connections-high-cutoff.svg
│ │ ├── basic-connections-include-axis-track.svg
│ │ ├── basic-connections-plot-bad-chr.svg
│ │ ├── basic-connections-plot-comparison-cutoff.svg
│ │ ├── basic-connections-plot-cutoff.svg
│ │ ├── basic-connections-plot-dt.svg
│ │ ├── basic-connections-plot-with-viewpoint-change-colors.svg
│ │ ├── basic-connections-plot-with-viewpoint-no-comp.svg
│ │ ├── basic-connections-plot-with-viewpoint.svg
│ │ ├── basic-connections-plot.svg
│ │ ├── comparison-connection-color-color-column.svg
│ │ ├── comparison-connection-color-comparison-connection-width.svg
│ │ ├── comparison-connection-color-type-column-coaccess-no-legend.svg
│ │ ├── comparison-connection-color-type-column-coaccess.svg
│ │ ├── comparison-connection-color-type-column.svg
│ │ ├── comparison-connection-color.svg
│ │ ├── comparison-peak-color-color-column.svg
│ │ ├── comparison-peak-color-logical-column.svg
│ │ ├── comparison-peak-color-type-column.svg
│ │ ├── comparison-peak-color.svg
│ │ ├── comparison-ymax-plus-cutoff.svg
│ │ ├── comparison-ymax.svg
│ │ ├── connection-color-color-column.svg
│ │ ├── connection-color-connection-width.svg
│ │ ├── connection-color-type-column-coaccess-no-legend.svg
│ │ ├── connection-color-type-column-coaccess.svg
│ │ ├── connection-color-type-column.svg
│ │ ├── connection-color.svg
│ │ ├── connection-ymax-plus-cutoff.svg
│ │ ├── connection-ymax.svg
│ │ ├── connections-plot-with-collapsetranscripts-gene.svg
│ │ ├── connections-plot-with-collapsetranscripts-longest.svg
│ │ ├── connections-plot-with-collapsetranscripts-meta.svg
│ │ ├── connections-plot-with-collapsetranscripts-shortest.svg
│ │ ├── connections-plot-with-collapsetranscripts-true.svg
│ │ ├── connections-plot-with-comparison-color.svg
│ │ ├── connections-plot-with-comparison-peak-color-hex.svg
│ │ ├── connections-plot-with-comparison-peak-color.svg
│ │ ├── connections-plot-with-comparison.svg
│ │ ├── connections-plot-with-gene-model-color.svg
│ │ ├── connections-plot-with-gene-model-no-genes.svg
│ │ ├── connections-plot-with-gene-model-with-comparison.svg
│ │ ├── connections-plot-with-gene-model.svg
│ │ ├── peak-color-color-column.svg
│ │ ├── peak-color-logical-column.svg
│ │ ├── peak-color-type-column.svg
│ │ └── peak-color.svg
├── human.hg19.genome_sub.txt
├── testthat.R
├── testthat
│ ├── _snaps
│ │ └── plotting
│ │ │ ├── basic-bar-high-breaks.svg
│ │ │ ├── basic-bar-one.svg
│ │ │ ├── basic-bar.svg
│ │ │ ├── basic-connections-all-bp.svg
│ │ │ ├── basic-connections-chr-bp1.svg
│ │ │ ├── basic-connections-chr.svg
│ │ │ ├── basic-connections-comparison-plot.svg
│ │ │ ├── basic-connections-high-comparison-cutoff.svg
│ │ │ ├── basic-connections-high-cutoff.svg
│ │ │ ├── basic-connections-include-axis-track.svg
│ │ │ ├── basic-connections-plot-bad-chr.svg
│ │ │ ├── basic-connections-plot-comparison-cutoff.svg
│ │ │ ├── basic-connections-plot-cutoff.svg
│ │ │ ├── basic-connections-plot-dt.svg
│ │ │ ├── basic-connections-plot-with-viewpoint-change-colors.svg
│ │ │ ├── basic-connections-plot-with-viewpoint-no-comp.svg
│ │ │ ├── basic-connections-plot-with-viewpoint.svg
│ │ │ ├── basic-connections-plot.svg
│ │ │ ├── comparison-connection-color-color-column.svg
│ │ │ ├── comparison-connection-color-comparison-connection-width.svg
│ │ │ ├── comparison-connection-color-type-column-coaccess-no-legend.svg
│ │ │ ├── comparison-connection-color-type-column-coaccess.svg
│ │ │ ├── comparison-connection-color-type-column.svg
│ │ │ ├── comparison-connection-color.svg
│ │ │ ├── comparison-peak-color-color-column.svg
│ │ │ ├── comparison-peak-color-logical-column.svg
│ │ │ ├── comparison-peak-color-type-column.svg
│ │ │ ├── comparison-peak-color.svg
│ │ │ ├── comparison-ymax-plus-cutoff.svg
│ │ │ ├── comparison-ymax.svg
│ │ │ ├── connection-color-color-column.svg
│ │ │ ├── connection-color-connection-width.svg
│ │ │ ├── connection-color-type-column-coaccess-no-legend.svg
│ │ │ ├── connection-color-type-column-coaccess.svg
│ │ │ ├── connection-color-type-column.svg
│ │ │ ├── connection-color.svg
│ │ │ ├── connection-ymax-plus-cutoff.svg
│ │ │ ├── connection-ymax.svg
│ │ │ ├── connections-plot-with-collapsetranscripts-gene.svg
│ │ │ ├── connections-plot-with-collapsetranscripts-longest.svg
│ │ │ ├── connections-plot-with-collapsetranscripts-meta.svg
│ │ │ ├── connections-plot-with-collapsetranscripts-shortest.svg
│ │ │ ├── connections-plot-with-collapsetranscripts-true.svg
│ │ │ ├── connections-plot-with-comparison-color.svg
│ │ │ ├── connections-plot-with-comparison-peak-color-hex.svg
│ │ │ ├── connections-plot-with-comparison-peak-color.svg
│ │ │ ├── connections-plot-with-comparison.svg
│ │ │ ├── connections-plot-with-gene-model-color.svg
│ │ │ ├── connections-plot-with-gene-model-no-genes.svg
│ │ │ ├── connections-plot-with-gene-model-with-comparison.svg
│ │ │ ├── connections-plot-with-gene-model.svg
│ │ │ ├── peak-color-color-column.svg
│ │ │ ├── peak-color-logical-column.svg
│ │ │ ├── peak-color-type-column.svg
│ │ │ └── peak-color.svg
│ ├── test-aggregate.R
│ ├── test-plotting.R
│ ├── test-runCicero.R
│ └── test-utils.R
└── tsne_coord.Rdata
└── vignettes
└── website.Rmd
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | ^\.travis\.yml$
4 | ^codecov\.yml$
5 | ^\.github$
6 |
--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 |
--------------------------------------------------------------------------------
/.github/workflows/check-standard.yaml:
--------------------------------------------------------------------------------
1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
3 | on:
4 | schedule:
5 | - cron: '0 0 1 * *'
6 | push:
7 | branches: [main, master, monocle3]
8 | pull_request:
9 | branches: [main, master, monocle3]
10 |
11 | name: R-CMD-check
12 |
13 | jobs:
14 | R-CMD-check:
15 | runs-on: ${{ matrix.config.os }}
16 |
17 | name: ${{ matrix.config.os }} (${{ matrix.config.r }})
18 |
19 | strategy:
20 | fail-fast: false
21 | matrix:
22 | config:
23 | - {os: macos-latest, r: 'release'}
24 | - {os: windows-latest, r: 'release'}
25 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}
26 | - {os: ubuntu-latest, r: 'release'}
27 |
28 | env:
29 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
30 | R_KEEP_PKG_SOURCE: yes
31 |
32 | steps:
33 | - uses: actions/checkout@v3
34 |
35 | - uses: r-lib/actions/setup-pandoc@v2
36 |
37 | - uses: r-lib/actions/setup-r@v2
38 | with:
39 | r-version: ${{ matrix.config.r }}
40 | http-user-agent: ${{ matrix.config.http-user-agent }}
41 | use-public-rspm: true
42 |
43 | - uses: r-lib/actions/setup-r-dependencies@v2
44 | with:
45 | extra-packages: any::rcmdcheck
46 | needs: check
47 |
48 | - uses: r-lib/actions/check-r-package@v2
49 | with:
50 | upload-snapshots: true
51 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | cicero.Rproj
6 | .DS_Store
7 | cicero-release.Rproj
8 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | cache: packages
2 | sudo: required
3 | warnings_are_errors: true
4 |
5 | before_install: [
6 | "sudo add-apt-repository \"deb http://archive.ubuntu.com/ubuntu/ xenial main\" -y",
7 | "sudo apt-get update -q",
8 | "sudo apt-get install libfreetype6"
9 | ]
10 |
11 | language: r
12 | r:
13 | - bioc-release
14 |
15 | r_binary_packages:
16 | - Matrix
17 | - boot
18 | - class
19 | - cluster
20 | - codetools
21 | - colorspace
22 | - foreign
23 | - gtable
24 | - kernsmooth
25 | - lattice
26 | - littler
27 | - mass
28 | - mgcv
29 | - nlme
30 | - nnet
31 | - rpart
32 | - spatial
33 | - survival
34 |
35 | after_success:
36 | - Rscript -e 'covr::codecov()'
37 |
38 | notifications:
39 | slack:
40 | secure: sFOlT9BYAUZLqt+RDtLdbGbG411FU56IEALGIoPiHHhmCoWEbyaVJfhasRh5SZShhAjUvxiTMPkA05prjrQBHkzVozkPhNPUZSwezp5zaKpsQerBVHtc5jN5xaZinvqLK0TNKk1zq5pnBMwcRg54RVQe/petTsiuttxkmyJSAgztiterifMnhUefaJURNvd2AYvi57agUhu41j3xQnarnXdJR8YZtAaXDoYdC9kckJwrf6ZsBBWCe/P0kg1YZt/86SiCGidcjmNOqbAud7/tKJKzjFaU2R1v8ecyZiGSXWFVfbUt/27Ur+8IzJiV0Qn08Ru6TwJSqS/fbc+2vWYo6MWKkfZtK4HhoWsYXyk7jnOP2DTynFMidthGR+qxLxpBuvQbGtPRgcm1DVjDl2Z3MqXz1RbAmnbCDOWNF21go1yIAKCxgHd2a/2TtLsbJsZwyGwSmrxhoF+CH4kJ2mgKarwyKGTLs5L6/eQfXR31hQc0h0eKMLlBkweTkxOguCrO30cuQvZ+bm/pFmR9doyZLP23/cYbnu0AsxgOa/SJ57MjDG2gKYmuzlBXPhNl30M9OIc0P1WRNHmk6aW4wZn6VKYBOAq1gGMM2k8g5x/OPM01R2c5ZJ7Fwizh4kaocXydfxGoJOSO1JPE+r/G339GgTuXyEekgA0dcF0j3recLyQ=
41 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: cicero
2 | Type: Package
3 | Title: Predict cis-co-accessibility from single-cell chromatin accessibility data
4 | Version: 1.17.2
5 | Authors@R: c(
6 | person("Hannah", "Pliner", email = "hpliner@uw.edu", role = c("aut", "cre")),
7 | person("Cole", "Trapnell", email = "coletrap@uw.edu", role = c("aut")))
8 | Description: Cicero computes putative cis-regulatory maps from single-cell chromatin
9 | accessibility data. It also extends monocle 2 for use in chromatin accessibility
10 | data.
11 | Depends:
12 | R (>= 3.5.0),
13 | monocle,
14 | Gviz (>= 1.22.3)
15 | License: MIT + file LICENSE
16 | Encoding: UTF-8
17 | Imports:
18 | assertthat (>= 0.2.0),
19 | Biobase (>= 2.37.2),
20 | BiocGenerics (>= 0.23.0),
21 | data.table (>= 1.10.4),
22 | dplyr (>= 0.7.4),
23 | FNN (>= 1.1),
24 | GenomicRanges (>= 1.30.3),
25 | ggplot2 (>= 2.2.1),
26 | glasso (>= 1.8),
27 | grDevices,
28 | igraph (>= 1.1.0),
29 | IRanges (>= 2.10.5),
30 | Matrix (>= 1.2-12),
31 | methods,
32 | parallel,
33 | plyr (>= 1.8.4),
34 | reshape2 (>= 1.4.3),
35 | S4Vectors (>= 0.14.7),
36 | stats,
37 | stringi,
38 | stringr (>= 1.2.0),
39 | tibble (>= 1.4.2),
40 | tidyr,
41 | VGAM (>= 1.0-5),
42 | utils
43 | RoxygenNote: 7.2.3
44 | Suggests:
45 | AnnotationDbi (>= 1.38.2),
46 | knitr,
47 | markdown,
48 | rmarkdown,
49 | rtracklayer (>= 1.36.6),
50 | testthat,
51 | vdiffr (>= 0.2.3),
52 | covr
53 | VignetteBuilder: knitr
54 | biocViews: Sequencing, Clustering, CellBasedAssays, ImmunoOncology,
55 | GeneRegulation, GeneTarget, Epigenetics, ATACSeq, SingleCell
56 | LazyData: true
57 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 Hannah Pliner and Cole Trapnell
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | export(aggregate_by_cell_bin)
4 | export(aggregate_nearby_peaks)
5 | export(annotate_cds_by_site)
6 | export(assemble_connections)
7 | export(build_gene_activity_matrix)
8 | export(compare_connections)
9 | export(df_for_coords)
10 | export(estimate_distance_parameter)
11 | export(find_overlapping_ccans)
12 | export(find_overlapping_coordinates)
13 | export(generate_ccans)
14 | export(generate_cicero_models)
15 | export(make_atac_cds)
16 | export(make_cicero_cds)
17 | export(normalize_gene_activities)
18 | export(plot_accessibility_in_pseudotime)
19 | export(plot_connections)
20 | export(ranges_for_coords)
21 | export(run_cicero)
22 | import(Gviz)
23 | import(VGAM)
24 | import(data.table)
25 | import(ggplot2)
26 | import(monocle)
27 | importFrom(Biobase,"fData<-")
28 | importFrom(Biobase,"pData<-")
29 | importFrom(Biobase,ExpressionSet)
30 | importFrom(Biobase,annotatedDataFrameFrom)
31 | importFrom(Biobase,assayDataNew)
32 | importFrom(Biobase,exprs)
33 | importFrom(Biobase,fData)
34 | importFrom(Biobase,multiassign)
35 | importFrom(Biobase,pData)
36 | importFrom(BiocGenerics,estimateDispersions)
37 | importFrom(BiocGenerics,estimateSizeFactors)
38 | importFrom(IRanges,findOverlaps)
39 | importFrom(data.table,melt.data.table)
40 | importFrom(dplyr,"%>%")
41 | importFrom(grDevices,col2rgb)
42 | importFrom(grDevices,dev.cur)
43 | importFrom(grDevices,dev.off)
44 | importFrom(grDevices,palette)
45 | importFrom(grDevices,rainbow)
46 | importFrom(methods,as)
47 | importFrom(methods,callNextMethod)
48 | importFrom(methods,is)
49 | importFrom(methods,new)
50 | importFrom(plyr,.)
51 | importFrom(stats,as.formula)
52 | importFrom(stats,cov)
53 | importFrom(stats,dist)
54 | importFrom(stats,filter)
55 | importFrom(stats,median)
56 | importFrom(utils,combn)
57 | importFrom(utils,read.table)
58 |
--------------------------------------------------------------------------------
/NEWS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cole-trapnell-lab/cicero-release/07f8731b2d2029ab774621b768b20259238ede4d/NEWS
--------------------------------------------------------------------------------
/R/activityScores.R:
--------------------------------------------------------------------------------
1 | #' Calculate initial Cicero gene activity matrix
2 | #'
3 | #' This function calculates the initial Cicero gene activity matrix. After this
4 | #' function, the activity matrix should be normalized with any comparison
5 | #' matrices using the function \code{\link{normalize_gene_activities}}.
6 | #'
7 | #' @param input_cds Binary sci-ATAC-seq input CDS. The input CDS must have a
8 | #' column in the fData table called "gene" which is the gene name if the
9 | #' site is a promoter, and \code{NA} if the site is distal.
10 | #' @param cicero_cons_info Cicero connections table, generally the output of
11 | #' \code{\link{run_cicero}}. This table is a data frame with three required
12 | #' columns named "Peak1", "Peak2", and "coaccess". Peak1 and Peak2 contain
13 | #' coordinates for the two compared elements, and coaccess contains their
14 | #' Cicero co-accessibility score.
15 | #' @param site_weights NULL or an individual weight for each site in input_cds.
16 | #' @param dist_thresh The maximum distance in base pairs between pairs of sites
17 | #' to include in the gene activity calculation.
18 | #' @param coaccess_cutoff The minimum Cicero co-accessibility score that should
19 | #' be considered connected.
20 | #'
21 | #' @return Unnormalized gene activity matrix.
22 | #' @export
23 | #'
24 | #' @examples
25 | #' data("cicero_data")
26 | #' data("human.hg19.genome")
27 | #' sample_genome <- subset(human.hg19.genome, V1 == "chr18")
28 | #' sample_genome$V2[1] <- 100000
29 | #' input_cds <- make_atac_cds(cicero_data, binarize = TRUE)
30 | #' input_cds <- detectGenes(input_cds)
31 | #' input_cds <- reduceDimension(input_cds, max_components = 2, num_dim=6,
32 | #' reduction_method = 'tSNE',
33 | #' norm_method = "none")
34 | #' tsne_coords <- t(reducedDimA(input_cds))
35 | #' row.names(tsne_coords) <- row.names(pData(input_cds))
36 | #' cicero_cds <- make_cicero_cds(input_cds,
37 | #' reduced_coordinates = tsne_coords)
38 | #' cons <- run_cicero(cicero_cds, sample_genome, sample_num=2)
39 | #'
40 | #' data(gene_annotation_sample)
41 | #' gene_annotation_sub <- gene_annotation_sample[,c(1:3, 8)]
42 | #' names(gene_annotation_sub)[4] <- "gene"
43 | #' input_cds <- annotate_cds_by_site(input_cds, gene_annotation_sub)
44 | #' num_genes <- pData(input_cds)$num_genes_expressed
45 | #' names(num_genes) <- row.names(pData(input_cds))
46 | #' unnorm_ga <- build_gene_activity_matrix(input_cds, cons)
47 | #'
48 | #'
49 | build_gene_activity_matrix <- function(input_cds,
50 | cicero_cons_info,
51 | site_weights=NULL,
52 | dist_thresh=250000,
53 | coaccess_cutoff=0.25){
54 | assertthat::assert_that(is(input_cds, "CellDataSet"))
55 | assertthat::assert_that(is.data.frame(cicero_cons_info))
56 | assertthat::assert_that(assertthat::has_name(cicero_cons_info, "Peak1"),
57 | assertthat::has_name(cicero_cons_info, "Peak2"),
58 | assertthat::has_name(cicero_cons_info, "coaccess"))
59 |
60 | assertthat::assert_that(assertthat::has_name(fData(input_cds), "gene"),
61 | msg = paste("The fData table of the input CDS must",
62 | "have a column called 'gene'. See",
63 | "documentation for details.",
64 | collapse=" "))
65 |
66 | accessibility_mat <- exprs(input_cds)
67 | if (is.null(site_weights)) {
68 | site_weights <- Matrix::rowMeans(accessibility_mat) /
69 | Matrix::rowMeans(accessibility_mat)
70 | site_weights[names(site_weights)] <- 1
71 | }
72 |
73 | gene_promoter_activity <-
74 | build_composite_gene_activity_matrix(input_cds,
75 | site_weights,
76 | cicero_cons_info,
77 | dist_thresh=dist_thresh,
78 | coaccess_cutoff=coaccess_cutoff)
79 |
80 |
81 | gene_activity_scores <- gene_promoter_activity
82 |
83 | return(gene_activity_scores)
84 | }
85 |
86 | build_composite_gene_activity_matrix <- function(input_cds,
87 | site_weights,
88 | cicero_cons_info,
89 | dist_thresh=250000,
90 | coaccess_cutoff=0.25) {
91 | accessibility_mat <- exprs(input_cds)
92 | promoter_peak_table <- fData(input_cds)
93 | promoter_peak_table$peak <- as.character(row.names(promoter_peak_table))
94 | promoter_peak_table <-
95 | promoter_peak_table[!is.na(promoter_peak_table$gene),]
96 | promoter_peak_table <- promoter_peak_table[,c("peak", "gene")]
97 | promoter_peak_table$gene <- as.character(promoter_peak_table$gene)
98 |
99 | # Make site_weight matrix
100 | site_names <- names(site_weights)
101 | site_weights <- as(Matrix::Diagonal(x=as.numeric(site_weights)),
102 | "sparseMatrix")
103 | row.names(site_weights) <- site_names
104 | colnames(site_weights) <- site_names
105 |
106 | # Find distance between cicero peaks. If distance already calculated, skip
107 | if ("dist" %in% colnames(cicero_cons_info) == FALSE) {
108 | Peak1_cols <- split_peak_names(cicero_cons_info$Peak1)
109 | Peak2_cols <- split_peak_names(cicero_cons_info$Peak2)
110 | Peak1_bp <- round((as.integer(Peak1_cols[,3]) +
111 | as.integer(Peak1_cols[,2])) / 2)
112 | Peak2_bp <- round((as.integer(Peak2_cols[,3]) +
113 | as.integer(Peak2_cols[,2])) / 2)
114 | cicero_cons_info$dist <- abs(Peak2_bp - Peak1_bp)
115 | }
116 |
117 | # Get connections between promoters and distal sites above coaccess
118 | # threshold
119 | nonneg_cons <-
120 | cicero_cons_info[(cicero_cons_info$Peak1 %in%
121 | promoter_peak_table$peak |
122 | cicero_cons_info$Peak2 %in%
123 | promoter_peak_table$peak) &
124 | cicero_cons_info$coaccess >= coaccess_cutoff &
125 | cicero_cons_info$dist < dist_thresh,]
126 | nonneg_cons <- nonneg_cons[,c("Peak1", "Peak2", "coaccess")]
127 | nonneg_cons <- nonneg_cons[!duplicated(nonneg_cons),]
128 |
129 | nonneg_cons$Peak1 <- as.character(nonneg_cons$Peak1)
130 | nonneg_cons$Peak2 <- as.character(nonneg_cons$Peak2)
131 |
132 | nonneg_cons <- rbind(nonneg_cons,
133 | data.frame(Peak1=unique(promoter_peak_table$peak),
134 | Peak2=unique(promoter_peak_table$peak),
135 | coaccess=0))
136 |
137 | # Make square matrix of connections from distal to proximal
138 | distal_connectivity_matrix <- make_sparse_matrix(nonneg_cons,
139 | x.name="coaccess")
140 |
141 | # Make connectivity matrix of promoters versus all
142 | promoter_conn_matrix <-
143 | distal_connectivity_matrix[unique(promoter_peak_table$peak),]
144 |
145 | # Get list of promoter and distal sites in accessibility mat
146 | promoter_safe_sites <- intersect(rownames(promoter_conn_matrix),
147 | row.names(accessibility_mat))
148 | distal_safe_sites <- intersect(colnames(promoter_conn_matrix),
149 | row.names(accessibility_mat))
150 | distal_safe_sites <- setdiff(distal_safe_sites, promoter_safe_sites)
151 |
152 | # Get accessibility info for promoters
153 | promoter_access_mat_in_cicero_map <- accessibility_mat[promoter_safe_sites,, drop=FALSE]
154 |
155 | # Get accessibility for distal sites
156 | distal_activity_scores <- accessibility_mat[distal_safe_sites,, drop=FALSE]
157 |
158 | # Scale connectivity matrix by site_weights
159 | scaled_site_weights <- site_weights[distal_safe_sites,distal_safe_sites, drop=FALSE]
160 | total_linked_site_weights <- promoter_conn_matrix[,distal_safe_sites, drop=FALSE] %*%
161 | scaled_site_weights
162 | total_linked_site_weights <- 1/Matrix::rowSums(total_linked_site_weights,
163 | na.rm=TRUE)
164 | total_linked_site_weights[is.finite(total_linked_site_weights) == FALSE] <- 0
165 | total_linked_site_weights[is.na(total_linked_site_weights)] <- 0
166 | total_linked_site_weights[is.nan(total_linked_site_weights)] <- 0
167 | site_names <- names(total_linked_site_weights)
168 | total_linked_site_weights <- Matrix::Diagonal(x=total_linked_site_weights)
169 | row.names(total_linked_site_weights) <- site_names
170 | colnames(total_linked_site_weights) <- site_names
171 | scaled_site_weights <- total_linked_site_weights %*%
172 | promoter_conn_matrix[,distal_safe_sites, drop=FALSE] %*%
173 | scaled_site_weights
174 | scaled_site_weights@x[scaled_site_weights@x > 1] <- 1
175 |
176 | # Multiply distal accessibility by site weights
177 | distal_activity_scores <- scaled_site_weights %*% distal_activity_scores
178 |
179 | distal_activity_scores <-
180 | distal_activity_scores[row.names(promoter_access_mat_in_cicero_map),, drop=FALSE]
181 |
182 | # Sum distal and promoter scores
183 | promoter_activity_scores <- distal_activity_scores +
184 | promoter_access_mat_in_cicero_map
185 |
186 | # Make and populate final matrix
187 | promoter_gene_mat <-
188 | Matrix::sparseMatrix(j=as.numeric(factor(promoter_peak_table$peak)),
189 | i=as.numeric(factor(promoter_peak_table$gene)),
190 | x=1)
191 | colnames(promoter_gene_mat) = levels(factor(promoter_peak_table$peak))
192 | row.names(promoter_gene_mat) = levels(factor(promoter_peak_table$gene))
193 | promoter_gene_mat <- promoter_gene_mat[,row.names(promoter_activity_scores)]
194 | gene_activity_scores <- promoter_gene_mat %*% promoter_activity_scores
195 |
196 | return(gene_activity_scores)
197 | }
198 |
199 | #' Normalize gene activities
200 | #'
201 | #' Normalize the output of \code{\link{build_gene_activity_matrix}}. Input is
202 | #' either one or multiple gene activity matrices. Any gene activities to be
203 | #' compared amongst each other should be normalized together.
204 | #'
205 | #'
206 | #' @param activity_matrices A gene activity matrix, output from
207 | #' \code{\link{build_gene_activity_matrix}}, or a list of gene activity
208 | #' matrices to be normalized together.
209 | #' @param cell_num_genes A named vector of the total number of accessible sites
210 | #' per cell. Names should correspond to the cell names in the activity
211 | #' matrices. These values can be found in the "num_genes_expressed" column
212 | #' of the pData table of the CDS used to calculate the gene activity matrix.
213 | #'
214 | #' @return Normalized activity matrix or matrices.
215 | #' @export
216 | #'
217 | #' @examples
218 | #' data("cicero_data")
219 | #' data("human.hg19.genome")
220 | #' sample_genome <- subset(human.hg19.genome, V1 == "chr18")
221 | #' sample_genome$V2[1] <- 100000
222 | #' input_cds <- make_atac_cds(cicero_data, binarize = TRUE)
223 | #' input_cds <- detectGenes(input_cds)
224 | #' input_cds <- reduceDimension(input_cds, max_components = 2, num_dim=6,
225 | #' reduction_method = 'tSNE',
226 | #' norm_method = "none")
227 | #' tsne_coords <- t(reducedDimA(input_cds))
228 | #' row.names(tsne_coords) <- row.names(pData(input_cds))
229 | #' cicero_cds <- make_cicero_cds(input_cds,
230 | #' reduced_coordinates = tsne_coords)
231 | #' cons <- run_cicero(cicero_cds, sample_genome, sample_num=2)
232 | #'
233 | #' data(gene_annotation_sample)
234 | #' gene_annotation_sub <- gene_annotation_sample[,c(1:3, 8)]
235 | #' names(gene_annotation_sub)[4] <- "gene"
236 | #' input_cds <- annotate_cds_by_site(input_cds, gene_annotation_sub)
237 | #' num_genes <- pData(input_cds)$num_genes_expressed
238 | #' names(num_genes) <- row.names(pData(input_cds))
239 | #' unnorm_ga <- build_gene_activity_matrix(input_cds, cons)
240 | #' cicero_gene_activities <- normalize_gene_activities(unnorm_ga, num_genes)
241 | #'
242 | #'
243 | normalize_gene_activities <- function(activity_matrices,
244 | cell_num_genes){
245 | if (!is.list(activity_matrices)) {
246 | scores <- activity_matrices
247 | normalization_df <- data.frame(cell = colnames(activity_matrices),
248 | cell_group=1)
249 | } else {
250 | scores <- do.call(cbind, activity_matrices)
251 |
252 | normalization_df <-
253 | do.call(rbind,
254 | lapply(seq_along(activity_matrices),
255 | function(x) {
256 | data.frame(cell = colnames(activity_matrices[[x]]),
257 | cell_group=rep(x, ncol(activity_matrices[[x]])))
258 | }))
259 | }
260 |
261 | scores <- scores[Matrix::rowSums(scores) != 0, Matrix::colSums(scores) != 0]
262 | normalization_df$cell_group <- factor(normalization_df$cell_group)
263 | normalization_df$total_activity <- Matrix::colSums(scores)
264 | normalization_df$total_sites <-
265 | cell_num_genes[as.character(normalization_df$cell)]
266 |
267 | if (!is.list(activity_matrices)) {
268 | activity_model <- stats::lm(log(total_activity) ~ log(total_sites),
269 | data=normalization_df)
270 | } else {
271 | activity_model <- stats::lm(log(total_activity) ~ log(total_sites) *
272 | cell_group, data=normalization_df)
273 | }
274 |
275 | normalization_df$fitted_curve <- exp(as.vector(predict(activity_model,
276 | type="response")))
277 |
278 | size_factors <- log(normalization_df$fitted_curve) /
279 | mean(log(normalization_df$fitted_curve))
280 |
281 | size_factors <- Matrix::Diagonal(x=1/size_factors)
282 | row.names(size_factors) <- normalization_df$cell
283 | colnames(size_factors) <- row.names(size_factors)
284 |
285 | # Adjust the scores by the size factors
286 | scores <- Matrix::t(size_factors %*% Matrix::t(scores))
287 |
288 | scores@x <- pmin(1e9, exp(scores@x) - 1)
289 |
290 | sum_activity_scores <- Matrix::colSums(scores)
291 |
292 | scale_factors <- Matrix::Diagonal(x=1/sum_activity_scores)
293 | row.names(scale_factors) <- normalization_df$cell
294 | colnames(scale_factors) <- row.names(scale_factors)
295 |
296 | scores <- Matrix::t(scale_factors %*% Matrix::t(scores))
297 |
298 | if (!is.list(activity_matrices)) {
299 | ret <- scores[row.names(activity_matrices), colnames(activity_matrices)]
300 | } else {
301 | ret <- lapply(activity_matrices, function(x) {
302 | scores[row.names(x), colnames(x)]
303 | })
304 | }
305 | return(ret)
306 | }
307 |
--------------------------------------------------------------------------------
/R/aggregate.R:
--------------------------------------------------------------------------------
1 | #' Make an aggregate count cds by collapsing nearby peaks
2 | #'
3 | #' @param cds A CellDataSet (CDS) object. For example, output of
4 | #' \code{\link{make_atac_cds}}
5 | #' @param distance The distance within which peaks should be collapsed
6 | #'
7 | #' @return A CDS object with aggregated peaks.
8 | #'
9 | #' @details This function takes an input CDS object and collapses features
10 | #' within a given distance by summing the values for the collapsed features.
11 | #' Ranges of features are determined by their feature name, so the feature
12 | #' names must be in the form "chr1:1039013-2309023".
13 | #'
14 | #' @export
15 | #'
16 | #' @examples
17 | #' data("cicero_data")
18 | #' input_cds <- make_atac_cds(cicero_data, binarize = TRUE)
19 | #' agg_cds <- aggregate_nearby_peaks(input_cds, distance = 10000)
20 | #'
21 | aggregate_nearby_peaks <- function(cds,
22 | distance = 1000) {
23 | assertthat::assert_that(assertthat::is.number(distance))
24 | assertthat::assert_that(is(cds, "CellDataSet"))
25 |
26 | fData(cds)$bin <- make_bin_col(cds, distance)
27 | cds <- cds[!is.na(fData(cds)$bin),]
28 |
29 | exprs_dt <- sparse_to_datatable(Matrix::Matrix(exprs(cds), sparse = TRUE))
30 | bin_info <- data.table::data.table(site = row.names(fData(cds)),
31 | bin = fData(cds)$bin)
32 | data.table::setkey(bin_info, "site")
33 | data.table::setkey(exprs_dt, "site")
34 | exprs_dt <- merge(exprs_dt, bin_info)
35 |
36 | data.table::setkey(exprs_dt, "cell", "bin")
37 | genomic_bins <- exprs_dt[,sum(val), by="cell,bin"]
38 | out <- Matrix::sparseMatrix(j=as.numeric(factor(genomic_bins$cell)),
39 | i=as.numeric(factor(genomic_bins$bin)),
40 | x=genomic_bins$V1)
41 |
42 | match_table <-
43 | data.table::data.table(num = as.numeric(factor(genomic_bins$bin)),
44 | name = genomic_bins$bin)
45 | match_table <- unique(match_table)
46 |
47 | match_table2 <-
48 | data.table::data.table(num = as.numeric(factor(genomic_bins$cell)),
49 | name = genomic_bins$cell)
50 | match_table2 <- unique(match_table2)
51 |
52 | fdf <- data.frame(site_name = levels(factor(genomic_bins$bin)),
53 | row.names = levels(factor(genomic_bins$bin)))
54 | pdf <- data.frame(cells = levels(factor(genomic_bins$cell)),
55 | row.names = levels(factor(genomic_bins$cell)))
56 | fdf$bin <- NULL
57 | pdf <- pdf[row.names(pData(cds)),]
58 | pdf <- cbind(pdf, pData(cds))
59 | pdf$pdf <- NULL
60 |
61 | data.table::setorder(match_table, "num")
62 | row.names(out) <- match_table$name
63 |
64 | data.table::setorder(match_table2, "num")
65 | colnames(out) <- match_table2$name
66 |
67 | out <- out[row.names(fdf), row.names(pdf)]
68 |
69 | fd <- new("AnnotatedDataFrame", data = fdf)
70 | pd <- new("AnnotatedDataFrame", data = pdf)
71 |
72 | if (is(exprs(cds), "dgCMatrix")) {
73 | compart_cds <- suppressWarnings(newCellDataSet(as(out, "sparseMatrix"),
74 | phenoData = pd,
75 | featureData = fd,
76 | expressionFamily=negbinomial.size(),
77 | lowerDetectionLimit=0))
78 | } else {
79 | compart_cds <- suppressWarnings(newCellDataSet(as.matrix(out),
80 | phenoData = pd,
81 | featureData = fd,
82 | expressionFamily=negbinomial.size(),
83 | lowerDetectionLimit=0))
84 | }
85 |
86 | return(compart_cds)
87 | }
88 |
89 |
90 | make_bin_col <- function(cds, distance) {
91 | coords_string_df <- df_for_coords(row.names(exprs(cds)))
92 | names(coords_string_df)[2:3] <- c("start", "stop")
93 | coords_ranges <- GenomicRanges::makeGRangesFromDataFrame(coords_string_df)
94 | coords_range_merge <- GenomicRanges::reduce(coords_ranges,
95 | min.gapwidth = distance)
96 |
97 | merge_df <- data.frame(seqnames=GenomicRanges::seqnames(coords_range_merge),
98 | starts=GenomicRanges::start(coords_range_merge),
99 | ends=GenomicRanges::end(coords_range_merge))
100 | merge_df$name <- paste(merge_df$seqnames,
101 | merge_df$starts,
102 | merge_df$ends, sep="_")
103 |
104 | overlaps <- GenomicRanges::findOverlaps(coords_ranges,
105 | coords_range_merge,
106 | select="first")
107 | overlaps <- as.data.frame(overlaps)
108 |
109 | merge_df <- merge_df[overlaps$overlaps,]
110 | merge_df$name
111 | }
112 |
113 | sparse_to_datatable <- function(sparse) {
114 | dgt_mat <- as(Matrix::t(sparse), "dgTMatrix")
115 | dt <- data.table(cell = dgt_mat@Dimnames[[1]][dgt_mat@i+1],
116 | site=dgt_mat@Dimnames[[2]][dgt_mat@j+1],
117 | val = dgt_mat@x)
118 | setkey(dt, "site", "cell")
119 | dt
120 | }
121 |
122 | #' Aggregate count CDS by groups of cells
123 | #'
124 | #' Aggregates a CDS based on an indicator column in the \code{pData} table
125 | #'
126 | #' @importFrom dplyr %>%
127 | #' @importFrom plyr .
128 | #' @param cds A CDS object to be aggregated
129 | #' @param group_col The name of the column in the \code{pData} table that
130 | #' indicates the cells assignment to its aggregate bin.
131 | #'
132 | #' @details This function takes an input CDS object and collapses cells based
133 | #' on a column in the \code{pData} table by summing the values within the
134 | #' cell group.
135 | #'
136 | #' @return A count cds aggregated by group_col
137 | #' @export
138 | #'
139 | #' @examples
140 | #' data("cicero_data")
141 | #' #input_cds <- make_atac_cds(cicero_data, binarize = TRUE)
142 | #' #pData(input_cds)$cell_subtype <- rep(1:10, times=20)
143 | #' #binned_input_lin <-aggregate_by_cell_bin(input_cds, "cell_subtype")
144 | #'
145 | aggregate_by_cell_bin <- function(cds, group_col) {
146 | assertthat::assert_that(is(cds, "CellDataSet"))
147 | assertthat::assert_that(is.character(group_col))
148 | assertthat::assert_that(group_col %in% names(pData(cds)),
149 | msg = "group_col is missing from your pData table")
150 |
151 | pData_grouping <- pData(cds) %>%
152 | tibble::rownames_to_column() %>%
153 | dplyr::group_by_at(group_col)
154 |
155 | cell_bins <- pData_grouping %>% dplyr::do(agg_cells(exprs(cds)[,.$rowname]))
156 | var_cols <- setdiff(colnames(cell_bins), c("site", "compartment_count"))
157 |
158 | agg_counts <- reshape2::dcast(cell_bins,
159 | as.formula(paste("site", "~",
160 | paste(var_cols, collapse="+"))),
161 | value.var="compartment_count")
162 |
163 | pData_cols <- as.data.frame(pData_grouping %>%
164 | dplyr::group_by_at(group_col) %>%
165 | dplyr::add_tally() %>%
166 | dplyr::summarise_if(is.numeric,
167 | mean,
168 | na.rm = TRUE))
169 |
170 | rownames(pData_cols) <- colnames(agg_counts)[-1]
171 |
172 | fdf <- data.frame(site_name = agg_counts$site, row.names = agg_counts$site)
173 |
174 | bin_names <- colnames(agg_counts)[-1]
175 |
176 | pdf <- pData_cols
177 |
178 | fd <- new("AnnotatedDataFrame", data = fdf)
179 | pd <- new("AnnotatedDataFrame", data = pdf)
180 | out <- agg_counts[,bin_names]
181 |
182 | compart_cds <- suppressWarnings(newCellDataSet(as.matrix(out),
183 | phenoData = pd,
184 | featureData = fd,
185 | expressionFamily=negbinomial.size(),
186 | lowerDetectionLimit=0))
187 |
188 | compart_cds <- detectGenes(compart_cds, min_expr=0.1)
189 | compart_cds <- estimateSizeFactorsSimp(compart_cds)
190 | compart_cds <- estimateDispersionsSimp(compart_cds)
191 |
192 | fData(compart_cds)$use_for_ordering <- FALSE
193 |
194 | compart_cds
195 | }
196 |
197 | agg_cells <- function(exprs_mat){
198 | cell_bins <- data.frame(compartment_count=Matrix::rowSums(exprs_mat))
199 | cell_bins$site <- row.names(exprs_mat)
200 | return (cell_bins)
201 | }
202 |
--------------------------------------------------------------------------------
/R/cicero.R:
--------------------------------------------------------------------------------
1 | #' cicero
2 | #'
3 | #' @import monocle
4 | #' @import VGAM
5 | #' @import data.table
6 | #' @import ggplot2
7 | #' @importFrom Biobase exprs pData fData ExpressionSet annotatedDataFrameFrom
8 | #' multiassign assayDataNew "fData<-" "pData<-"
9 | #' @importFrom grDevices col2rgb dev.cur dev.off palette rainbow
10 | #' @importFrom methods as callNextMethod is new
11 | #' @importFrom stats as.formula cov dist filter median
12 | #' @importFrom utils combn read.table
13 | #' @importFrom BiocGenerics estimateDispersions estimateSizeFactors
14 | "_PACKAGE"
15 |
16 | ## quiets concerns of R CMD check re: the .'s that appear in pipelines
17 | #utils::globalVariables(c("."))
18 |
19 | ## temporary until i figure out a fix
20 | #utils::globalVariables(c("val", "value", "CCAN", "V1", "f_id"))
21 |
--------------------------------------------------------------------------------
/R/data.R:
--------------------------------------------------------------------------------
1 | #' Example single-cell chromatin accessibility data
2 | #'
3 | #' A dataset containing a subset of a single-cell ATAC-seq
4 | #' dataset collected on Human Skeletal Muscle Myoblasts.
5 | #' Only includes data from chromosome 18.
6 | #'
7 | #' @format A data frame with 35137 rows and 3 variables:
8 | #' \describe{
9 | #' \item{Peak}{Peak information}
10 | #' \item{Cell}{Cell ID}
11 | #' \item{Count}{Reads per cell per peak}
12 | #' }
13 | #' @usage data(cicero_data)
14 | "cicero_data"
15 |
16 | #' Chromosome lengths from human genome hg19
17 | #'
18 | #' A list of the chromosomes in hg19 and their lengths
19 | #' in base pairs.
20 | #'
21 | #' @format A data frame with 93 rows and 2 variables:
22 | #' \describe{
23 | #' \item{V1}{Chromosome}
24 | #' \item{V2}{Chromosome length, base pairs}
25 | #' }
26 | #' @usage data(human.hg19.genome)
27 | "human.hg19.genome"
28 |
29 | #' Example gene annotation information
30 | #'
31 | #' Gencode gene annotation data from chromosome 18 of the
32 | #' human genome (hg19).
33 | #'
34 | #' @format A data frame with 15129 rows and 8 variables:
35 | #' \describe{
36 | #' \item{chromosome}{Chromosome}
37 | #' \item{start}{Exon starting base}
38 | #' \item{end}{Exon ending base}
39 | #' \item{strand}{Exon mapping direction}
40 | #' \item{feature}{Feature type}
41 | #' \item{gene}{Gene ID}
42 | #' \item{transcript}{Transcript ID}
43 | #' \item{symbol}{Gene symbol}
44 | #' }
45 | #' @usage data(gene_annotation_sample)
46 | "gene_annotation_sample"
47 |
48 | #' Metadata for example cells in cicero_data
49 | #'
50 | #' Metadata information for cicero_data
51 | #'
52 | #' @format A data frame with 200 rows and 2 variables:
53 | #' \describe{
54 | #' \item{timepoint}{Time at cell collection}
55 | #' \item{cell}{Cell barcode}
56 | #' }
57 | #' @usage data(cell_data)
58 | "cell_data"
59 |
--------------------------------------------------------------------------------
/R/fromGviz.R:
--------------------------------------------------------------------------------
1 | setGeneric("drawAxis", function(GdObject, ...) standardGeneric("drawAxis"))
2 | setMethod("drawAxis", signature(GdObject="GdObject"), function(GdObject, ...)
3 | return(NULL))
4 |
5 | setMethod("drawAxis", signature(GdObject="CustomTrack"), function(GdObject,
6 | from,
7 | to, ...) {
8 | ylim <- displayPars(GdObject)$ylim
9 | hSpaceAvail <- Gviz:::vpLocation()$isize["width"]/6
10 | #yscale <- extendrange(r=ylim, f=0.05) #extends axis by 5%
11 | yscale <- ylim
12 | col <- Gviz:::.dpOrDefault(GdObject, "col.axis", "white")
13 | acex <- Gviz:::.dpOrDefault(GdObject, "cex.axis")
14 | acol <- Gviz:::.dpOrDefault(GdObject, "col.axis", "white")
15 | at <- pretty(yscale) #finds breakpoints
16 | #at <- at[at>=sort(ylim)[1] & at<=sort(ylim)[2]]
17 | if(is.null(acex))
18 | {
19 | vSpaceNeeded <- max(as.numeric(grid::convertWidth(grid::stringHeight(at),
20 | "inches")))*length(at)*1.5
21 | hSpaceNeeded <- max(as.numeric(grid::convertWidth(grid::stringWidth(at),
22 | "inches")))
23 | vSpaceAvail <- abs(diff(range(at)))/
24 | abs(diff(yscale))*Gviz:::vpLocation()$isize["height"]
25 |
26 | acex <- max(0.6, min(vSpaceAvail/vSpaceNeeded, hSpaceAvail/hSpaceNeeded))
27 | }
28 | nlevs <- max(1, nlevels(factor(Gviz:::.dpOrDefault(GdObject, "groups"))))
29 | vpTitleAxis <- grid::viewport(x=0.95, width=0.2, yscale= yscale, just=0)
30 | grid::pushViewport(vpTitleAxis)
31 | suppressWarnings(grid::grid.yaxis(gp=grid::gpar(col=acol, cex=acex), at=at))
32 | grid::popViewport(1)
33 | })
34 |
35 |
36 |
37 | ##-----------------------------------------------------------------------------
38 | ## CustomTrack:
39 | ##
40 | ## A track class to allow for user-defined plotting functions
41 | ##-----------------------------------------------------------------------------
42 | setClass("CustomTrack",
43 | contains=c("GdObject"),
44 | representation=representation(plottingFunction="function",
45 | variables="list"),
46 | prototype=prototype(dp=DisplayPars()))
47 |
48 | setMethod("initialize", "CustomTrack", function(.Object, plottingFunction,
49 | variables, ...) {
50 | .Object <- Gviz:::.updatePars(.Object, "CustomTrack")
51 | .Object@plottingFunction <- plottingFunction
52 | .Object@variables <- variables
53 | .Object <- callNextMethod(.Object, ...)
54 | return(.Object)
55 | })
56 |
57 |
58 | CustomTrack <- function(plottingFunction=function(GdObject,
59 | prepare=FALSE, ...){},
60 | variables=list(), name="CustomTrack", ...){
61 | return(new("CustomTrack", plottingFunction=plottingFunction,
62 | variables=variables, name=name, ...))
63 | }
64 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | 
2 | [](https://codecov.io/github/cole-trapnell-lab/cicero-release?branch=master)
3 | # Cicero
4 | ### Predicting the cis-regulatory landscape
5 |
6 | Please see our [website](http://cole-trapnell-lab.github.io/cicero-release/) for information on installing and using Cicero
7 |
--------------------------------------------------------------------------------
/code-of-conduct.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | In the interest of fostering an open and welcoming environment, we as
6 | contributors and maintainers pledge to make participation in our project and
7 | our community a harassment-free experience for everyone, regardless of age, body
8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 |
12 | ## Our Standards
13 |
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 |
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 |
23 | Examples of unacceptable behavior by participants include:
24 |
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 | advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 | address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 | professional setting
33 |
34 | ## Our Responsibilities
35 |
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 |
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 |
46 | ## Scope
47 |
48 | This Code of Conduct applies within all project spaces, and it also applies when
49 | an individual is representing the project or its community in public spaces.
50 | Examples of representing a project or community include using an official
51 | project e-mail address, posting via an official social media account, or acting
52 | as an appointed representative at an online or offline event. Representation of
53 | a project may be further defined and clarified by project maintainers.
54 |
55 | ## Enforcement
56 |
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at coletrap [at] uw.edu. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 |
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 |
68 | ## Attribution
69 |
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
72 |
73 | [homepage]: https://www.contributor-covenant.org
74 |
75 | For answers to common questions about this code of conduct, see
76 | https://www.contributor-covenant.org/faq
77 |
78 |
--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
1 | comment: false
2 |
3 | coverage:
4 | status:
5 | project:
6 | default:
7 | target: auto
8 | threshold: 1%
9 | patch:
10 | default:
11 | target: auto
12 | threshold: 1%
13 |
--------------------------------------------------------------------------------
/data/cell_data.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cole-trapnell-lab/cicero-release/07f8731b2d2029ab774621b768b20259238ede4d/data/cell_data.rda
--------------------------------------------------------------------------------
/data/cicero_data.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cole-trapnell-lab/cicero-release/07f8731b2d2029ab774621b768b20259238ede4d/data/cicero_data.rda
--------------------------------------------------------------------------------
/data/gene_annotation_sample.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cole-trapnell-lab/cicero-release/07f8731b2d2029ab774621b768b20259238ede4d/data/gene_annotation_sample.rda
--------------------------------------------------------------------------------
/data/human.hg19.genome.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cole-trapnell-lab/cicero-release/07f8731b2d2029ab774621b768b20259238ede4d/data/human.hg19.genome.rda
--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
1 | citEntry(entry="article",
2 | title = "Cicero Predicts cis-Regulatory DNA Interactions from Single-Cell Chromatin Accessibility Data",
3 | journal = "Molecular Cell",
4 | volume = "71",
5 | number = "5",
6 | pages = "858 - 871.e8",
7 | year = 2018,
8 | issn = "1097-2765",
9 | doi = "https://doi.org/10.1016/j.molcel.2018.06.044",
10 | author = personList( as.person("Hannah A. Pliner"),
11 | as.person("Jonathan S. Packer"),
12 | as.person("José L. McFaline-Figueroa"),
13 | as.person("Darren A. Cusanovich"),
14 | as.person("Riza M. Daza"),
15 | as.person("Delasa Aghamirzaie"),
16 | as.person("Sanjay Srivatsan"),
17 | as.person("Xiaojie Qiu"),
18 | as.person("Dana Jackson"),
19 | as.person("Anna Minkina"),
20 | as.person("Andrew C. Adey"),
21 | as.person("Frank J. Steemers"),
22 | as.person("Jay Shendure"),
23 | as.person("Cole Trapnell")),
24 | textVersion =
25 | paste("Hannah A. Pliner, Jay Shendure & Cole Trapnell et. al. (2018).",
26 | "Cicero Predicts cis-Regulatory DNA Interactions from Single-Cell Chromatin Accessibility Data.",
27 | "Molecular Cell,", "71,", "858-871.e8." ) )
--------------------------------------------------------------------------------
/man/aggregate_by_cell_bin.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/aggregate.R
3 | \name{aggregate_by_cell_bin}
4 | \alias{aggregate_by_cell_bin}
5 | \title{Aggregate count CDS by groups of cells}
6 | \usage{
7 | aggregate_by_cell_bin(cds, group_col)
8 | }
9 | \arguments{
10 | \item{cds}{A CDS object to be aggregated}
11 |
12 | \item{group_col}{The name of the column in the \code{pData} table that
13 | indicates the cells assignment to its aggregate bin.}
14 | }
15 | \value{
16 | A count cds aggregated by group_col
17 | }
18 | \description{
19 | Aggregates a CDS based on an indicator column in the \code{pData} table
20 | }
21 | \details{
22 | This function takes an input CDS object and collapses cells based
23 | on a column in the \code{pData} table by summing the values within the
24 | cell group.
25 | }
26 | \examples{
27 | data("cicero_data")
28 | #input_cds <- make_atac_cds(cicero_data, binarize = TRUE)
29 | #pData(input_cds)$cell_subtype <- rep(1:10, times=20)
30 | #binned_input_lin <-aggregate_by_cell_bin(input_cds, "cell_subtype")
31 |
32 | }
33 |
--------------------------------------------------------------------------------
/man/aggregate_nearby_peaks.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/aggregate.R
3 | \name{aggregate_nearby_peaks}
4 | \alias{aggregate_nearby_peaks}
5 | \title{Make an aggregate count cds by collapsing nearby peaks}
6 | \usage{
7 | aggregate_nearby_peaks(cds, distance = 1000)
8 | }
9 | \arguments{
10 | \item{cds}{A CellDataSet (CDS) object. For example, output of
11 | \code{\link{make_atac_cds}}}
12 |
13 | \item{distance}{The distance within which peaks should be collapsed}
14 | }
15 | \value{
16 | A CDS object with aggregated peaks.
17 | }
18 | \description{
19 | Make an aggregate count cds by collapsing nearby peaks
20 | }
21 | \details{
22 | This function takes an input CDS object and collapses features
23 | within a given distance by summing the values for the collapsed features.
24 | Ranges of features are determined by their feature name, so the feature
25 | names must be in the form "chr1:1039013-2309023".
26 | }
27 | \examples{
28 | data("cicero_data")
29 | input_cds <- make_atac_cds(cicero_data, binarize = TRUE)
30 | agg_cds <- aggregate_nearby_peaks(input_cds, distance = 10000)
31 |
32 | }
33 |
--------------------------------------------------------------------------------
/man/annotate_cds_by_site.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/utils.R
3 | \name{annotate_cds_by_site}
4 | \alias{annotate_cds_by_site}
5 | \title{Add feature data columns to fData}
6 | \usage{
7 | annotate_cds_by_site(
8 | cds,
9 | feature_data,
10 | verbose = FALSE,
11 | maxgap = 0,
12 | all = FALSE,
13 | header = FALSE
14 | )
15 | }
16 | \arguments{
17 | \item{cds}{A CDS object.}
18 |
19 | \item{feature_data}{Data frame, or a character path to a file of
20 | feature data. If a path, the file should be tab separated. Default assumes
21 | no header, if your file has a header, set \code{header = FALSE}. For
22 | either a data frame or a path, the file should be in bed-like format, with
23 | the first 3 columns containing chromosome, start and stop respectively.
24 | The remaining columns will be added to the \code{fData} table as feature
25 | data.}
26 |
27 | \item{verbose}{Logical, should progress messages be printed?}
28 |
29 | \item{maxgap}{The maximum number of base pairs allowed between the peak and
30 | the feature for the feature and peak to be considered overlapping.
31 | Default = 0 (overlapping). Details in
32 | \code{\link[IRanges]{findOverlaps-methods}}. If \code{maxgap}
33 | is set to "nearest" then the nearest feature will be assigned regardless
34 | of distance.}
35 |
36 | \item{all}{Logical, should all overlapping intervals be reported? If all is
37 | FALSE, the largest overlap is reported.}
38 |
39 | \item{header}{Logical, if reading a file, is there a header?}
40 | }
41 | \value{
42 | A CDS object with updated \code{fData} table.
43 | }
44 | \description{
45 | Annotate the sites of your CDS with feature data based on coordinate overlap.
46 | }
47 | \details{
48 | \code{annotate_cds_by_site} will add columns to the \code{fData}
49 | table of a CDS object based on the overlap of peaks with features in a
50 | data frame or file. An "overlap" column will be added, along with any
51 | columns beyond the three required columns in the feature data. The
52 | "overlap" column is the number of base pairs overlapping the \code{fData}
53 | site. When maxgap is used, the true overlap is still calculated (overlap
54 | will be 0 if the two features only overlap because of maxgap) \code{NA}
55 | means that there was no overlapping feature. If a peak overlaps multiple
56 | data intervals and all is FALSE, the largest overlapping interval will be
57 | chosen (in a tie, the first entry is taken), otherwise all intervals will
58 | be chosen and annotations will be collapsed using a comma as a separator.
59 | }
60 | \examples{
61 | data("cicero_data")
62 | input_cds <- make_atac_cds(cicero_data, binarize = TRUE)
63 | feat <- data.frame(chr = c("chr18", "chr18", "chr18", "chr18"),
64 | bp1 = c(10000, 10800, 50000, 100000),
65 | bp2 = c(10700, 11000, 60000, 110000),
66 | type = c("Acetylated", "Methylated", "Acetylated",
67 | "Methylated"))
68 | input_cds <- annotate_cds_by_site(input_cds, feat)
69 |
70 | }
71 |
--------------------------------------------------------------------------------
/man/assemble_connections.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/runCicero.R
3 | \name{assemble_connections}
4 | \alias{assemble_connections}
5 | \title{Combine and reconcile cicero models}
6 | \usage{
7 | assemble_connections(cicero_model_list, silent = FALSE)
8 | }
9 | \arguments{
10 | \item{cicero_model_list}{A list of cicero output objects, generally, the
11 | output of \code{\link{generate_cicero_models}}.}
12 |
13 | \item{silent}{Logical, should the function run silently?}
14 | }
15 | \value{
16 | A data frame of connections with their cicero co-accessibility
17 | scores.
18 | }
19 | \description{
20 | Function which takes the output of \code{\link{generate_cicero_models}} and
21 | assembles the connections into a data frame with cicero co-accessibility
22 | scores.
23 | }
24 | \details{
25 | This function combines glasso models computed on overlapping windows of the
26 | genome. Pairs of sites whose regularized correlation was calculated twice
27 | are first checked for qualitative concordance (both zero, positive or
28 | negative). If they not concordant, NA is returned. If they are concordant
29 | the mean is returned.
30 | }
31 | \examples{
32 | data("cicero_data")
33 | data("human.hg19.genome")
34 | sample_genome <- subset(human.hg19.genome, V1 == "chr18")
35 | sample_genome$V2[1] <- 100000
36 | input_cds <- make_atac_cds(cicero_data, binarize = TRUE)
37 | input_cds <- reduceDimension(input_cds, max_components = 2, num_dim=6,
38 | reduction_method = 'tSNE',
39 | norm_method = "none")
40 | tsne_coords <- t(reducedDimA(input_cds))
41 | row.names(tsne_coords) <- row.names(pData(input_cds))
42 | cicero_cds <- make_cicero_cds(input_cds, reduced_coordinates = tsne_coords)
43 | model_output <- generate_cicero_models(cicero_cds,
44 | distance_parameter = 0.3,
45 | genomic_coords = sample_genome)
46 | cicero_cons <- assemble_connections(model_output)
47 |
48 | }
49 | \seealso{
50 | \code{\link{generate_cicero_models}}
51 | }
52 |
--------------------------------------------------------------------------------
/man/build_gene_activity_matrix.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/activityScores.R
3 | \name{build_gene_activity_matrix}
4 | \alias{build_gene_activity_matrix}
5 | \title{Calculate initial Cicero gene activity matrix}
6 | \usage{
7 | build_gene_activity_matrix(
8 | input_cds,
9 | cicero_cons_info,
10 | site_weights = NULL,
11 | dist_thresh = 250000,
12 | coaccess_cutoff = 0.25
13 | )
14 | }
15 | \arguments{
16 | \item{input_cds}{Binary sci-ATAC-seq input CDS. The input CDS must have a
17 | column in the fData table called "gene" which is the gene name if the
18 | site is a promoter, and \code{NA} if the site is distal.}
19 |
20 | \item{cicero_cons_info}{Cicero connections table, generally the output of
21 | \code{\link{run_cicero}}. This table is a data frame with three required
22 | columns named "Peak1", "Peak2", and "coaccess". Peak1 and Peak2 contain
23 | coordinates for the two compared elements, and coaccess contains their
24 | Cicero co-accessibility score.}
25 |
26 | \item{site_weights}{NULL or an individual weight for each site in input_cds.}
27 |
28 | \item{dist_thresh}{The maximum distance in base pairs between pairs of sites
29 | to include in the gene activity calculation.}
30 |
31 | \item{coaccess_cutoff}{The minimum Cicero co-accessibility score that should
32 | be considered connected.}
33 | }
34 | \value{
35 | Unnormalized gene activity matrix.
36 | }
37 | \description{
38 | This function calculates the initial Cicero gene activity matrix. After this
39 | function, the activity matrix should be normalized with any comparison
40 | matrices using the function \code{\link{normalize_gene_activities}}.
41 | }
42 | \examples{
43 | data("cicero_data")
44 | data("human.hg19.genome")
45 | sample_genome <- subset(human.hg19.genome, V1 == "chr18")
46 | sample_genome$V2[1] <- 100000
47 | input_cds <- make_atac_cds(cicero_data, binarize = TRUE)
48 | input_cds <- detectGenes(input_cds)
49 | input_cds <- reduceDimension(input_cds, max_components = 2, num_dim=6,
50 | reduction_method = 'tSNE',
51 | norm_method = "none")
52 | tsne_coords <- t(reducedDimA(input_cds))
53 | row.names(tsne_coords) <- row.names(pData(input_cds))
54 | cicero_cds <- make_cicero_cds(input_cds,
55 | reduced_coordinates = tsne_coords)
56 | cons <- run_cicero(cicero_cds, sample_genome, sample_num=2)
57 |
58 | data(gene_annotation_sample)
59 | gene_annotation_sub <- gene_annotation_sample[,c(1:3, 8)]
60 | names(gene_annotation_sub)[4] <- "gene"
61 | input_cds <- annotate_cds_by_site(input_cds, gene_annotation_sub)
62 | num_genes <- pData(input_cds)$num_genes_expressed
63 | names(num_genes) <- row.names(pData(input_cds))
64 | unnorm_ga <- build_gene_activity_matrix(input_cds, cons)
65 |
66 |
67 | }
68 |
--------------------------------------------------------------------------------
/man/cell_data.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data.R
3 | \docType{data}
4 | \name{cell_data}
5 | \alias{cell_data}
6 | \title{Metadata for example cells in cicero_data}
7 | \format{
8 | A data frame with 200 rows and 2 variables:
9 | \describe{
10 | \item{timepoint}{Time at cell collection}
11 | \item{cell}{Cell barcode}
12 | }
13 | }
14 | \usage{
15 | data(cell_data)
16 | }
17 | \description{
18 | Metadata information for cicero_data
19 | }
20 | \keyword{datasets}
21 |
--------------------------------------------------------------------------------
/man/cicero-package.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/cicero.R
3 | \docType{package}
4 | \name{cicero-package}
5 | \alias{cicero}
6 | \alias{cicero-package}
7 | \title{cicero}
8 | \description{
9 | Cicero computes putative cis-regulatory maps from single-cell chromatin accessibility data. It also extends monocle 2 for use in chromatin accessibility data.
10 | }
11 | \author{
12 | \strong{Maintainer}: Hannah Pliner \email{hpliner@uw.edu}
13 |
14 | Authors:
15 | \itemize{
16 | \item Cole Trapnell \email{coletrap@uw.edu}
17 | }
18 |
19 | }
20 |
--------------------------------------------------------------------------------
/man/cicero_data.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data.R
3 | \docType{data}
4 | \name{cicero_data}
5 | \alias{cicero_data}
6 | \title{Example single-cell chromatin accessibility data}
7 | \format{
8 | A data frame with 35137 rows and 3 variables:
9 | \describe{
10 | \item{Peak}{Peak information}
11 | \item{Cell}{Cell ID}
12 | \item{Count}{Reads per cell per peak}
13 | }
14 | }
15 | \usage{
16 | data(cicero_data)
17 | }
18 | \description{
19 | A dataset containing a subset of a single-cell ATAC-seq
20 | dataset collected on Human Skeletal Muscle Myoblasts.
21 | Only includes data from chromosome 18.
22 | }
23 | \keyword{datasets}
24 |
--------------------------------------------------------------------------------
/man/compare_connections.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/utils.R
3 | \name{compare_connections}
4 | \alias{compare_connections}
5 | \title{Compare Cicero connections to other datasets}
6 | \usage{
7 | compare_connections(conns1, conns2, maxgap = 0)
8 | }
9 | \arguments{
10 | \item{conns1}{A data frame of Cicero connections, like those output from
11 | \code{assemble_connections}. The first two columns must be the coordinates
12 | of peaks that are connected.}
13 |
14 | \item{conns2}{A data frame of connections to be searched for overlap. The
15 | first two columns must be coordinates of genomic sites that are connected.}
16 |
17 | \item{maxgap}{The number of base pairs between peaks allowed to be called
18 | overlapping. See \code{\link[IRanges]{findOverlaps-methods}} in the IRanges
19 | package for further description.}
20 | }
21 | \value{
22 | A vector of logicals of whether the Cicero pair is present in the
23 | alternate dataset.
24 | }
25 | \description{
26 | Compare two sets of connections and return a vector of logicals for whether
27 | connections in one are present in the other.
28 | }
29 | \examples{
30 | \dontrun{
31 | cons$in_dataset <- compare_connections(conns, alt_data)
32 | }
33 |
34 | }
35 |
--------------------------------------------------------------------------------
/man/df_for_coords.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/utils.R
3 | \name{df_for_coords}
4 | \alias{df_for_coords}
5 | \title{Construct a data frame of coordinate info from coordinate strings}
6 | \usage{
7 | df_for_coords(coord_strings)
8 | }
9 | \arguments{
10 | \item{coord_strings}{A list of coordinate strings (each like
11 | "chr1:500000-1000000")}
12 | }
13 | \value{
14 | data.frame with three columns, chromosome, starting base pair and
15 | ending base pair
16 | }
17 | \description{
18 | Construct a data frame of coordinate info from coordinate strings
19 | }
20 | \details{
21 | Coordinate strings consist of three pieces of information:
22 | chromosome, start, and stop. These pieces of information can be separated
23 | by the characters ":", "_", or "-". Commas will be removed, not used as
24 | separators (ex: "chr18:8,575,097-8,839,855" is ok).
25 | }
26 | \examples{
27 | df_for_coords(c("chr1:2,039-30,239", "chrX:28884:101293"))
28 |
29 | }
30 |
--------------------------------------------------------------------------------
/man/estimate_distance_parameter.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/runCicero.R
3 | \name{estimate_distance_parameter}
4 | \alias{estimate_distance_parameter}
5 | \title{Calculate distance penalty parameter}
6 | \usage{
7 | estimate_distance_parameter(
8 | cds,
9 | window = 5e+05,
10 | maxit = 100,
11 | s = 0.75,
12 | sample_num = 100,
13 | distance_constraint = 250000,
14 | distance_parameter_convergence = 1e-22,
15 | max_elements = 200,
16 | genomic_coords = cicero::human.hg19.genome,
17 | max_sample_windows = 500
18 | )
19 | }
20 | \arguments{
21 | \item{cds}{A cicero CDS object generated using \code{\link{make_cicero_cds}}.}
22 |
23 | \item{window}{Size of the genomic window to query, in base pairs.}
24 |
25 | \item{maxit}{Maximum number of iterations for distance_parameter estimation.}
26 |
27 | \item{s}{Power law value. See details for more information.}
28 |
29 | \item{sample_num}{Number of random windows to calculate
30 | \code{distance_parameter} for.}
31 |
32 | \item{distance_constraint}{Maximum distance of expected connections. Must be
33 | smaller than \code{window}.}
34 |
35 | \item{distance_parameter_convergence}{Convergence step size for
36 | \code{distance_parameter} calculation.}
37 |
38 | \item{max_elements}{Maximum number of elements per window allowed. Prevents
39 | very large models from slowing performance.}
40 |
41 | \item{genomic_coords}{Either a data frame or a path (character) to a file
42 | with chromosome lengths. The file should have two columns, the first is
43 | the chromosome name (ex. "chr1") and the second is the chromosome length
44 | in base pairs. See \code{data(human.hg19.genome)} for an example. If a
45 | file, should be tab-separated and without header.}
46 |
47 | \item{max_sample_windows}{Maximum number of random windows to screen to find
48 | sample_num windows for distance calculation. Default 500.}
49 | }
50 | \value{
51 | A list of results of length \code{sample_num}. List members are
52 | numeric \code{distance_parameter} values.
53 | }
54 | \description{
55 | Function to calculate distance penalty parameter (\code{distance_parameter})
56 | for random genomic windows. Used to choose \code{distance_parameter} to pass
57 | to \code{\link{generate_cicero_models}}.
58 | }
59 | \details{
60 | The purpose of this function is to calculate the distance scaling
61 | parameter used to adjust the distance-based penalty function used in
62 | Cicero's model calculation. The scaling parameter, in combination with the
63 | power law value \code{s} determines the distance-based penalty.
64 |
65 | This function chooses random windows of the genome and calculates a
66 | \code{distance_parameter}. The function returns a vector of values
67 | calculated on these random windows. We recommend using the mean value of
68 | this vector moving forward with Cicero analysis.
69 |
70 | The function works by finding the minimum distance scaling parameter such
71 | that no more than 5% of pairs of sites at a distance greater than
72 | \code{distance_constraint} have non-zero entries after graphical lasso
73 | regularization and such that fewer than 80% of all output entries are
74 | nonzero.
75 |
76 | If the chosen random window has fewer than 2 or greater than
77 | \code{max_elements} sites, the window is skipped. In addition, the random
78 | window will be skipped if there are insufficient long-range comparisons
79 | (see below) to be made. The \code{max_elements} parameter exist to prevent
80 | very dense windows from slowing the calculation. If you expect that your
81 | data may regularly have this many sites in a window, you will need to
82 | raise this parameter.
83 |
84 | Calculating the \code{distance_parameter} in a sample window requires
85 | peaks in that window that are at a distance greater than the
86 | \code{distance_constraint} parameter. If there are not enough examples at
87 | high distance have been found, the function will return the warning
88 | \code{"Warning: could not calculate sample_num distance_parameters - see
89 | documentation details"}.When looking for \code{sample_num} example
90 | windows, the function will search \code{max_sample_windows} windows. By
91 | default this is set at 500, which should be well beyond the 100 windows
92 | that need to be found. However, in very sparse datasets, increasing
93 | \code{max_sample_windows} may help avoid the above warning. Increasing
94 | \code{max_sample_windows} may slow performance in sparse datasets. If you
95 | are still not able to get enough example windows, even with a large
96 | \code{max_sample_windows} paramter, this may mean your \code{window}
97 | parameter needs to be larger or your \code{distance_constraint} parameter
98 | needs to be smaller. A less likely possibility is that your
99 | \code{max_elements} parameter needs to be larger. This would occur if your
100 | data is particularly dense.
101 |
102 | The parameter \code{s} is a constant that captures the power-law
103 | distribution of contact frequencies between different locations in the
104 | genome as a function of their linear distance. For a complete discussion
105 | of the various polymer models of DNA packed into the nucleus and of
106 | justifiable values for s, we refer readers to (Dekker et al., 2013) for a
107 | discussion of justifiable values for s. We use a value of 0.75 by default
108 | in Cicero, which corresponds to the “tension globule” polymer model of DNA
109 | (Sanborn et al., 2015). This parameter must be the same as the s parameter
110 | for generate_cicero_models.
111 |
112 | Further details are available in the publication that accompanies this
113 | package. Run \code{citation("cicero")} for publication details.
114 | }
115 | \examples{
116 | data("cicero_data")
117 | data("human.hg19.genome")
118 | sample_genome <- subset(human.hg19.genome, V1 == "chr18")
119 | sample_genome$V2[1] <- 100000
120 | input_cds <- make_atac_cds(cicero_data, binarize = TRUE)
121 | input_cds <- reduceDimension(input_cds, max_components = 2, num_dim=6,
122 | reduction_method = 'tSNE',
123 | norm_method = "none")
124 | tsne_coords <- t(reducedDimA(input_cds))
125 | row.names(tsne_coords) <- row.names(pData(input_cds))
126 | cicero_cds <- make_cicero_cds(input_cds, reduced_coordinates = tsne_coords)
127 | distance_parameters <- estimate_distance_parameter(cicero_cds,
128 | sample_num=5,
129 | genomic_coords = sample_genome)
130 |
131 | }
132 | \references{
133 | \itemize{
134 | \item Dekker, J., Marti-Renom, M.A., and Mirny, L.A. (2013). Exploring
135 | the three-dimensional organization of genomes: interpreting chromatin
136 | interaction data. Nat. Rev. Genet. 14, 390–403.
137 | \item Sanborn, A.L., Rao, S.S.P., Huang, S.-C., Durand, N.C., Huntley,
138 | M.H., Jewett, A.I., Bochkov, I.D., Chinnappan, D., Cutkosky, A., Li, J.,
139 | et al. (2015). Chromatin extrusion explains key features of loop and
140 | domain formation in wild-type and engineered genomes. Proc. Natl. Acad.
141 | Sci. U. S. A. 112, E6456–E6465.
142 | }
143 | }
144 | \seealso{
145 | \code{\link{generate_cicero_models}}
146 | }
147 |
--------------------------------------------------------------------------------
/man/find_overlapping_ccans.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/runCicero.R
3 | \name{find_overlapping_ccans}
4 | \alias{find_overlapping_ccans}
5 | \title{Find CCANs that overlap each other in genomic coordinates}
6 | \usage{
7 | find_overlapping_ccans(ccan_assignments, min_overlap = 1)
8 | }
9 | \arguments{
10 | \item{ccan_assignments}{A data frame where the first column is the peak and
11 | the second is the CCAN assignment. For example, output of
12 | \code{generate_ccans}.}
13 |
14 | \item{min_overlap}{The minimum base pair overlap to count as overlapping.}
15 | }
16 | \value{
17 | A data frame with two columns, CCAN1 and CCAN2. CCANs in this list
18 | are overlapping. The data frame is reciprocal (if CCAN 2 overlaps CCAN 1,
19 | there will be two rows, 1,2 and 2,1).
20 | }
21 | \description{
22 | Find CCANs that overlap each other in genomic coordinates
23 | }
24 | \examples{
25 | ccan_df <- data.frame(peak = c("chr18_1408345_1408845", "chr18_1779830_1780330",
26 | "chr18_1929095_1929595", "chr18_1954501_1954727",
27 | "chr18_2049865_2050884", "chr18_2083726_2084102",
28 | "chr18_2087935_2088622", "chr18_2104705_2105551",
29 | "chr18_2108641_2108907"),
30 | CCAN = c(1,2,2,2,3,3,3,3,2))
31 | olap_ccans <- find_overlapping_ccans(ccan_df)
32 |
33 |
34 | }
35 |
--------------------------------------------------------------------------------
/man/find_overlapping_coordinates.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/utils.R
3 | \name{find_overlapping_coordinates}
4 | \alias{find_overlapping_coordinates}
5 | \title{Find peaks that overlap a specific genomic location}
6 | \usage{
7 | find_overlapping_coordinates(coord_list, coord, maxgap = 0)
8 | }
9 | \arguments{
10 | \item{coord_list}{A list of coordinates to be searched for overlap in the
11 | form chr_100_2000.}
12 |
13 | \item{coord}{The coordinates that you want to find in the form chr1_100_2000.}
14 |
15 | \item{maxgap}{The maximum distance in base pairs between coord and the
16 | coord_list that should count as overlapping. Default is 0.}
17 | }
18 | \value{
19 | A character vector of the peaks that overlap coord.
20 | }
21 | \description{
22 | Find peaks that overlap a specific genomic location
23 | }
24 | \examples{
25 | test_coords <- c("chr18_10025_10225", "chr18_10603_11103",
26 | "chr18_11604_13986",
27 | "chr18_157883_158536", "chr18_217477_218555",
28 | "chr18_245734_246234")
29 | find_overlapping_coordinates(test_coords, "chr18:10,100-1246234")
30 |
31 |
32 | }
33 |
--------------------------------------------------------------------------------
/man/gene_annotation_sample.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data.R
3 | \docType{data}
4 | \name{gene_annotation_sample}
5 | \alias{gene_annotation_sample}
6 | \title{Example gene annotation information}
7 | \format{
8 | A data frame with 15129 rows and 8 variables:
9 | \describe{
10 | \item{chromosome}{Chromosome}
11 | \item{start}{Exon starting base}
12 | \item{end}{Exon ending base}
13 | \item{strand}{Exon mapping direction}
14 | \item{feature}{Feature type}
15 | \item{gene}{Gene ID}
16 | \item{transcript}{Transcript ID}
17 | \item{symbol}{Gene symbol}
18 | }
19 | }
20 | \usage{
21 | data(gene_annotation_sample)
22 | }
23 | \description{
24 | Gencode gene annotation data from chromosome 18 of the
25 | human genome (hg19).
26 | }
27 | \keyword{datasets}
28 |
--------------------------------------------------------------------------------
/man/generate_ccans.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/runCicero.R
3 | \name{generate_ccans}
4 | \alias{generate_ccans}
5 | \title{Generate cis-co-accessibility networks (CCANs)}
6 | \usage{
7 | generate_ccans(
8 | connections_df,
9 | coaccess_cutoff_override = NULL,
10 | tolerance_digits = 2
11 | )
12 | }
13 | \arguments{
14 | \item{connections_df}{Data frame of connections with columns: Peak1, Peak2,
15 | coaccess. Generally, the output of \code{\link{run_cicero}} or
16 | \code{\link{assemble_connections}}}
17 |
18 | \item{coaccess_cutoff_override}{Numeric, co-accessibility score threshold to
19 | impose. Overrides automatic calculation.}
20 |
21 | \item{tolerance_digits}{The number of digits to calculate cutoff to. Default
22 | is 2 (0.01 tolerance)}
23 | }
24 | \value{
25 | Data frame with two columns - Peak and CCAN. CCAN column indicates
26 | CCAN assignment. Peaks not included in a CCAN are not returned.
27 | }
28 | \description{
29 | Post process cicero co-accessibility scores to extract modules of sites that
30 | are co-accessible.
31 | }
32 | \details{
33 | CCANs are calculated by first specifying a minimum co-accessibility
34 | score and then using the Louvain community detection algorithm on the
35 | subgraph induced by excluding edges below this score. For this function,
36 | either the user can specify the minimum co-accessibility using
37 | \code{coaccess_cutoff_override}, or the cutoff can be calculated
38 | automatically by optimizing for CCAN number. The cutoff calculation can be
39 | slow, so users may wish to use the \code{coaccess_cutoff_override} after
40 | initially calculating the cutoff to speed future runs.
41 | }
42 | \examples{
43 | \dontrun{
44 | data("cicero_data")
45 | set.seed(18)
46 | data("human.hg19.genome")
47 | sample_genome <- subset(human.hg19.genome, V1 == "chr18")
48 | sample_genome$V2[1] <- 100000
49 | input_cds <- make_atac_cds(cicero_data, binarize = TRUE)
50 | input_cds <- reduceDimension(input_cds, max_components = 2, num_dim=6,
51 | reduction_method = 'tSNE',
52 | norm_method = "none")
53 | tsne_coords <- t(reducedDimA(input_cds))
54 | row.names(tsne_coords) <- row.names(pData(input_cds))
55 | cicero_cds <- make_cicero_cds(input_cds, reduced_coordinates = tsne_coords)
56 | cicero_cons <- run_cicero(cicero_cds, sample_genome, sample_num = 2)
57 | ccan_assigns <- generate_ccans(cicero_cons)
58 | }
59 |
60 | }
61 |
--------------------------------------------------------------------------------
/man/generate_cicero_models.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/runCicero.R
3 | \name{generate_cicero_models}
4 | \alias{generate_cicero_models}
5 | \title{Generate cicero models}
6 | \usage{
7 | generate_cicero_models(
8 | cds,
9 | distance_parameter,
10 | s = 0.75,
11 | window = 5e+05,
12 | max_elements = 200,
13 | genomic_coords = cicero::human.hg19.genome
14 | )
15 | }
16 | \arguments{
17 | \item{cds}{A cicero CDS object generated using \code{\link{make_cicero_cds}}.}
18 |
19 | \item{distance_parameter}{Distance based penalty parameter value. Generally,
20 | the mean of the calculated \code{distance_parameter} values from
21 | \code{\link{estimate_distance_parameter}}.}
22 |
23 | \item{s}{Power law value. See details.}
24 |
25 | \item{window}{Size of the genomic window to query, in base pairs.}
26 |
27 | \item{max_elements}{Maximum number of elements per window allowed. Prevents
28 | very large models from slowing performance.}
29 |
30 | \item{genomic_coords}{Either a data frame or a path (character) to a file
31 | with chromosome lengths. The file should have two columns, the first is
32 | the chromosome name (ex. "chr1") and the second is the chromosome length
33 | in base pairs. See \code{data(human.hg19.genome)} for an example. If a
34 | file, should be tab-separated and without header.}
35 | }
36 | \value{
37 | A list of results for each window. Either a \code{glasso} object, or
38 | a character description of why the window was skipped. This list can be
39 | directly input into \code{\link{assemble_connections}} to create a
40 | reconciled list of cicero co-accessibility scores.
41 | }
42 | \description{
43 | Function to generate graphical lasso models on all sites in a CDS object
44 | within overlapping genomic windows.
45 | }
46 | \details{
47 | The purpose of this function is to compute the raw covariances
48 | between each pair of sites within overlapping windows of the genome.
49 | Within each window, the function then estimates a regularized correlation
50 | matrix using the graphical LASSO (Friedman et al., 2008), penalizing pairs
51 | of distant sites more than proximal sites. The scaling parameter,
52 | \code{distance_parameter}, in combination with the power law value \code{s}
53 | determines the distance-based penalty.
54 |
55 | The parameter \code{s} is a constant that captures the power-law
56 | distribution of contact frequencies between different locations in the
57 | genome as a function of their linear distance. For a complete discussion
58 | of the various polymer models of DNA packed into the nucleus and of
59 | justifiable values for s, we refer readers to (Dekker et al., 2013) for a
60 | discussion of justifiable values for s. We use a value of 0.75 by default
61 | in Cicero, which corresponds to the “tension globule” polymer model of DNA
62 | (Sanborn et al., 2015). This parameter must be the same as the s parameter
63 | for \code{\link{estimate_distance_parameter}}.
64 |
65 | Further details are available in the publication that accompanies this
66 | package. Run \code{citation("cicero")} for publication details.
67 | }
68 | \examples{
69 | data("cicero_data")
70 | data("human.hg19.genome")
71 | sample_genome <- subset(human.hg19.genome, V1 == "chr18")
72 | sample_genome$V2[1] <- 100000
73 | input_cds <- make_atac_cds(cicero_data, binarize = TRUE)
74 | input_cds <- reduceDimension(input_cds, max_components = 2, num_dim=6,
75 | reduction_method = 'tSNE',
76 | norm_method = "none")
77 | tsne_coords <- t(reducedDimA(input_cds))
78 | row.names(tsne_coords) <- row.names(pData(input_cds))
79 | cicero_cds <- make_cicero_cds(input_cds, reduced_coordinates = tsne_coords)
80 | model_output <- generate_cicero_models(cicero_cds,
81 | distance_parameter = 0.3,
82 | genomic_coords = sample_genome)
83 |
84 | }
85 | \references{
86 | \itemize{
87 | \item Dekker, J., Marti-Renom, M.A., and Mirny, L.A. (2013). Exploring
88 | the three-dimensional organization of genomes: interpreting chromatin
89 | interaction data. Nat. Rev. Genet. 14, 390–403.
90 | \item Friedman, J., Hastie, T., and Tibshirani, R. (2008). Sparse
91 | inverse covariance estimation with the graphical lasso. Biostatistics 9,
92 | 432–441.
93 | \item Sanborn, A.L., Rao, S.S.P., Huang, S.-C., Durand, N.C., Huntley,
94 | M.H., Jewett, A.I., Bochkov, I.D., Chinnappan, D., Cutkosky, A., Li, J.,
95 | et al. (2015). Chromatin extrusion explains key features of loop and
96 | domain formation in wild-type and engineered genomes. Proc. Natl. Acad.
97 | Sci. U. S. A. 112, E6456–E6465.
98 | }
99 | }
100 | \seealso{
101 | \code{\link{estimate_distance_parameter}}
102 | }
103 |
--------------------------------------------------------------------------------
/man/human.hg19.genome.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data.R
3 | \docType{data}
4 | \name{human.hg19.genome}
5 | \alias{human.hg19.genome}
6 | \title{Chromosome lengths from human genome hg19}
7 | \format{
8 | A data frame with 93 rows and 2 variables:
9 | \describe{
10 | \item{V1}{Chromosome}
11 | \item{V2}{Chromosome length, base pairs}
12 | }
13 | }
14 | \usage{
15 | data(human.hg19.genome)
16 | }
17 | \description{
18 | A list of the chromosomes in hg19 and their lengths
19 | in base pairs.
20 | }
21 | \keyword{datasets}
22 |
--------------------------------------------------------------------------------
/man/make_atac_cds.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/utils.R
3 | \name{make_atac_cds}
4 | \alias{make_atac_cds}
5 | \title{Make ATAC CDS object}
6 | \usage{
7 | make_atac_cds(input, binarize = FALSE)
8 | }
9 | \arguments{
10 | \item{input}{Either a data frame or a path to input data. If a file, it
11 | should be a tab-delimited text file with three columns and no header. For
12 | either a file or a data frame, the first column is the peak coordinates in
13 | the form "chr10_100013372_100013596", the second column is the cell name,
14 | and the third column is an integer that represents the number of reads
15 | from that cell overlapping that peak. Zero values do not need to be
16 | included (sparse matrix format).}
17 |
18 | \item{binarize}{Logical. Should the count matrix be converted to binary?}
19 | }
20 | \value{
21 | A CDS object containing your ATAC data in proper format.
22 | }
23 | \description{
24 | This function takes as input a data frame or a path to a file in a sparse
25 | matrix format and returns a properly formatted \code{CellDataSet} (CDS)
26 | object.
27 | }
28 | \examples{
29 | data("cicero_data")
30 | input_cds <- make_atac_cds(cicero_data, binarize = TRUE)
31 |
32 | }
33 |
--------------------------------------------------------------------------------
/man/make_cicero_cds.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/runCicero.R
3 | \name{make_cicero_cds}
4 | \alias{make_cicero_cds}
5 | \title{Create cicero input CDS}
6 | \usage{
7 | make_cicero_cds(
8 | cds,
9 | reduced_coordinates,
10 | k = 50,
11 | summary_stats = NULL,
12 | size_factor_normalize = TRUE,
13 | silent = FALSE,
14 | return_agg_info = FALSE
15 | )
16 | }
17 | \arguments{
18 | \item{cds}{Input CDS object.}
19 |
20 | \item{reduced_coordinates}{A data frame with columns representing the
21 | coordinates of each cell in reduced dimension space (generally 2-3
22 | dimensions). \code{row.names(reduced_coordinates)} should match the cell
23 | names in the CDS object. If dimension reduction was done using monocle,
24 | tSNE coordinates can be accessed by \code{t(reducedDimA(cds))}, and
25 | DDRTree coordinates can be accessed by \code{t(reducedDimS(cds))}.}
26 |
27 | \item{k}{Number of cells to aggregate per bin.}
28 |
29 | \item{summary_stats}{Which numeric \code{pData(cds)} columns you would like
30 | summarized (mean) by bin in the resulting CDS object.}
31 |
32 | \item{size_factor_normalize}{Logical, should accessibility values be
33 | normalized by size factor?}
34 |
35 | \item{silent}{Logical, should warning and info messages be printed?}
36 |
37 | \item{return_agg_info}{Logical, should a list of the assignments of cells to
38 | aggregated bins be output? When \code{TRUE}, this function returns a list
39 | of two items, first, the aggregated CDS object and second, a data.frame
40 | with the binning information.}
41 | }
42 | \value{
43 | Aggregated CDS object. If return_agg_info is \code{TRUE}, a list
44 | of the aggregated CDS object and a data.frame of aggregation info.
45 | }
46 | \description{
47 | Function to generate an aggregated input CDS for cicero. \code{run_cicero}
48 | takes as input an aggregated cicero CDS object. This function will generate
49 | the CDS given an input CDS (perhaps generated by \code{make_atac_cds}) and
50 | a value for k, which is the number of cells to be aggregated per bin. The
51 | default value for k is 50.
52 | }
53 | \details{
54 | Aggregation of similar cells is done using a k-nearest-neighbors
55 | graph and a randomized "bagging" procedure. Details are available in the
56 | publication that accompanies this package. Run \code{citation("cicero")}
57 | for publication details. KNN is calculated using
58 | \code{\link[FNN]{knn.index}}
59 | }
60 | \examples{
61 | \dontrun{
62 | data("cicero_data")
63 |
64 | input_cds <- make_atac_cds(cicero_data, binarize = TRUE)
65 | input_cds <- reduceDimension(input_cds, max_components = 2, num_dim=6,
66 | reduction_method = 'tSNE',
67 | norm_method = "none")
68 | tsne_coords <- t(reducedDimA(input_cds))
69 | row.names(tsne_coords) <- row.names(pData(input_cds))
70 | cicero_cds <- make_cicero_cds(input_cds, reduced_coordinates = tsne_coords)
71 | }
72 |
73 | }
74 |
--------------------------------------------------------------------------------
/man/make_sparse_matrix.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/utils.R
3 | \name{make_sparse_matrix}
4 | \alias{make_sparse_matrix}
5 | \title{Make a symmetric square sparse matrix from data frame}
6 | \usage{
7 | make_sparse_matrix(data, i.name = "Peak1", j.name = "Peak2", x.name = "value")
8 | }
9 | \arguments{
10 | \item{data}{data frame}
11 |
12 | \item{i.name}{name of i column}
13 |
14 | \item{j.name}{name of j column}
15 |
16 | \item{x.name}{name of value column}
17 | }
18 | \value{
19 | sparse matrix
20 | }
21 | \description{
22 | Convert a data frame into a square sparse matrix (all versus all)
23 | }
24 |
--------------------------------------------------------------------------------
/man/normalize_gene_activities.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/activityScores.R
3 | \name{normalize_gene_activities}
4 | \alias{normalize_gene_activities}
5 | \title{Normalize gene activities}
6 | \usage{
7 | normalize_gene_activities(activity_matrices, cell_num_genes)
8 | }
9 | \arguments{
10 | \item{activity_matrices}{A gene activity matrix, output from
11 | \code{\link{build_gene_activity_matrix}}, or a list of gene activity
12 | matrices to be normalized together.}
13 |
14 | \item{cell_num_genes}{A named vector of the total number of accessible sites
15 | per cell. Names should correspond to the cell names in the activity
16 | matrices. These values can be found in the "num_genes_expressed" column
17 | of the pData table of the CDS used to calculate the gene activity matrix.}
18 | }
19 | \value{
20 | Normalized activity matrix or matrices.
21 | }
22 | \description{
23 | Normalize the output of \code{\link{build_gene_activity_matrix}}. Input is
24 | either one or multiple gene activity matrices. Any gene activities to be
25 | compared amongst each other should be normalized together.
26 | }
27 | \examples{
28 | data("cicero_data")
29 | data("human.hg19.genome")
30 | sample_genome <- subset(human.hg19.genome, V1 == "chr18")
31 | sample_genome$V2[1] <- 100000
32 | input_cds <- make_atac_cds(cicero_data, binarize = TRUE)
33 | input_cds <- detectGenes(input_cds)
34 | input_cds <- reduceDimension(input_cds, max_components = 2, num_dim=6,
35 | reduction_method = 'tSNE',
36 | norm_method = "none")
37 | tsne_coords <- t(reducedDimA(input_cds))
38 | row.names(tsne_coords) <- row.names(pData(input_cds))
39 | cicero_cds <- make_cicero_cds(input_cds,
40 | reduced_coordinates = tsne_coords)
41 | cons <- run_cicero(cicero_cds, sample_genome, sample_num=2)
42 |
43 | data(gene_annotation_sample)
44 | gene_annotation_sub <- gene_annotation_sample[,c(1:3, 8)]
45 | names(gene_annotation_sub)[4] <- "gene"
46 | input_cds <- annotate_cds_by_site(input_cds, gene_annotation_sub)
47 | num_genes <- pData(input_cds)$num_genes_expressed
48 | names(num_genes) <- row.names(pData(input_cds))
49 | unnorm_ga <- build_gene_activity_matrix(input_cds, cons)
50 | cicero_gene_activities <- normalize_gene_activities(unnorm_ga, num_genes)
51 |
52 |
53 | }
54 |
--------------------------------------------------------------------------------
/man/plot_accessibility_in_pseudotime.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/plotting.R
3 | \name{plot_accessibility_in_pseudotime}
4 | \alias{plot_accessibility_in_pseudotime}
5 | \title{Plot accessibility by pseudotime}
6 | \usage{
7 | plot_accessibility_in_pseudotime(cds_subset, breaks = 10)
8 | }
9 | \arguments{
10 | \item{cds_subset}{Subset of the CDS object you want to plot. The CDS must
11 | have a column in the pData table called "Pseudotime".}
12 |
13 | \item{breaks}{Number of breaks along pseudotime. Controls the coarseness of
14 | the plot.}
15 | }
16 | \value{
17 | ggplot object
18 | }
19 | \description{
20 | Make a barplot of chromatin accessibility across pseudotime
21 | }
22 | \details{
23 | This function plots each site in the CDS subset by cell pseudotime
24 | as a barplot. Cells are divided into bins by pseudotime (number determined
25 | by \code{breaks}) and the percent of cells in each bin that are accessible
26 | is represented by bar height. In addition, the black line represents the
27 | pseudotime-dependent average accessibility from a smoothed binomial
28 | regression.
29 | }
30 | \examples{
31 | \dontrun{
32 | plot_accessibility_in_pseudotime(input_cds_lin[c("chr18_38156577_38158261",
33 | "chr18_48373358_48374180",
34 | "chr18_60457956_60459080")])
35 | }
36 |
37 | }
38 |
--------------------------------------------------------------------------------
/man/plot_connections.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/plotting.R
3 | \name{plot_connections}
4 | \alias{plot_connections}
5 | \title{Plot connections}
6 | \usage{
7 | plot_connections(
8 | connection_df,
9 | chr,
10 | minbp,
11 | maxbp,
12 | coaccess_cutoff = 0,
13 | peak_color = "#B4656F",
14 | connection_color = "#7F7CAF",
15 | connection_color_legend = TRUE,
16 | alpha_by_coaccess = FALSE,
17 | connection_width = 2,
18 | connection_ymax = NULL,
19 | gene_model = NULL,
20 | gene_model_color = "#81D2C7",
21 | gene_model_shape = c("smallArrow", "box"),
22 | comparison_track = NULL,
23 | comparison_coaccess_cutoff = 0,
24 | comparison_peak_color = "#B4656F",
25 | comparison_connection_color = "#7F7CAF",
26 | comparison_connection_color_legend = TRUE,
27 | comparison_connection_width = 2,
28 | comparison_ymax = NULL,
29 | collapseTranscripts = FALSE,
30 | include_axis_track = TRUE,
31 | return_as_list = FALSE,
32 | viewpoint = NULL,
33 | comparison_viewpoint = TRUE,
34 | viewpoint_color = "#F0544F",
35 | viewpoint_fill = "#EFD8D7",
36 | viewpoint_alpha = 0.5
37 | )
38 | }
39 | \arguments{
40 | \item{connection_df}{Data frame of connections, which must include the
41 | columns 'Peak1', 'Peak2', and 'coaccess'. Generally, the output of
42 | run_cicero or assemble_connections.}
43 |
44 | \item{chr}{The chromosome of the region you would like to plot in the form
45 | 'chr10'.}
46 |
47 | \item{minbp}{The base pair coordinate of the start of the region to be
48 | plotted.}
49 |
50 | \item{maxbp}{The base pair coordinate of the end of the region to be plotted.}
51 |
52 | \item{coaccess_cutoff}{The minimum cicero co-accessibility score you would
53 | like to be plotted. Default is 0.}
54 |
55 | \item{peak_color}{Color for peak annotations - a single color, the name of a
56 | column containing color values that correspond to Peak1, or the name of
57 | column containing a character or factor to base peak colors on.}
58 |
59 | \item{connection_color}{Color for connection lines. A single color, the name
60 | of a column containing color values, or the name of a column containing a
61 | character or factor to base connection colors on.}
62 |
63 | \item{connection_color_legend}{Logical, should connection color legend be
64 | shown?}
65 |
66 | \item{alpha_by_coaccess}{Logical, should the transparency of connection
67 | lines be scaled based on co-accessibility score?}
68 |
69 | \item{connection_width}{Width of connection lines.}
70 |
71 | \item{connection_ymax}{Connection y-axis height. If \code{NULL}, chosen
72 | automatically.}
73 |
74 | \item{gene_model}{Either \code{NULL} or a data.frame. The data.frame should
75 | be in a form compatible with the Gviz function
76 | \code{\link[Gviz]{GeneRegionTrack-class}} (cannot have NA as column names).}
77 |
78 | \item{gene_model_color}{Color for gene annotations.}
79 |
80 | \item{gene_model_shape}{Shape for gene models, passed to
81 | \code{\link[Gviz]{GeneRegionTrack-class}}. Options described at
82 | \code{\link[Gviz]{GeneRegionTrack-class}}.}
83 |
84 | \item{comparison_track}{Either \code{NULL} or a data frame. If a data frame,
85 | a second track of connections will be plotted based on this data. This
86 | data frame has the same requirements as connection_df (Peak1, Peak2 and
87 | coaccess columns).}
88 |
89 | \item{comparison_coaccess_cutoff}{The minimum cicero co-accessibility score
90 | you would like to be plotted for the comparison dataset. Default = 0.}
91 |
92 | \item{comparison_peak_color}{Color for comparison peak annotations - a
93 | single color, the name of a column containing color values that correspond
94 | to Peak1, or the name of a column containing a character or factor to base
95 | peak colors on.}
96 |
97 | \item{comparison_connection_color}{Color for comparison connection lines. A
98 | single color, the name of a column containing color values, or the name of
99 | a column containing a character or factor to base connection colors on.}
100 |
101 | \item{comparison_connection_color_legend}{Logical, should comparison
102 | connection color legend be shown?}
103 |
104 | \item{comparison_connection_width}{Width of comparison connection lines.}
105 |
106 | \item{comparison_ymax}{Connection y-axis height for comparison track. If
107 | \code{NULL}, chosen automatically.}
108 |
109 | \item{collapseTranscripts}{Logical or character scalar. Can be one in
110 | \code{gene}, \code{longest}, \code{shortest} or \code{meta}. Variable is
111 | passed to the \code{\link[Gviz]{GeneRegionTrack-class}} function of Gviz.
112 | Determines whether and how to collapse related transcripts. See Gviz
113 | documentation for details.}
114 |
115 | \item{include_axis_track}{Logical, should a genomic axis be plotted?}
116 |
117 | \item{return_as_list}{Logical, if TRUE, the function will not plot, but will
118 | return the plot components as a list. Allows user to add/customize Gviz
119 | components and plot them separately using \code{\link[Gviz]{plotTracks}}.}
120 |
121 | \item{viewpoint}{\code{NULL} or Coordinates in form "chr1_10000_10020". Use
122 | viewpoint if you would like to plot cicero connections "4C-seq style".
123 | Only connections originating in the viewpoint will be shown. Ideal for
124 | comparisons with 4C-seq data. If comparison_viewpoint is \code{TRUE}, any
125 | comparison track will be subsetted as well.}
126 |
127 | \item{comparison_viewpoint}{Logical, should viewpoint apply to comparison
128 | track as well?}
129 |
130 | \item{viewpoint_color}{Color for the highlight border.}
131 |
132 | \item{viewpoint_fill}{Color for the highlight fill.}
133 |
134 | \item{viewpoint_alpha}{Alpha value for the highlight fill.}
135 | }
136 | \value{
137 | A gene region plot, or list of components if return_as_list is
138 | \code{TRUE}.
139 | }
140 | \description{
141 | Plotting function for Cicero connections. Uses \code{\link[Gviz]{plotTracks}}
142 | as its basis
143 | }
144 | \examples{
145 | cicero_cons <- data.frame(
146 | Peak1 = c("chr18_10034652_10034983", "chr18_10034652_10034983",
147 | "chr18_10034652_10034983", "chr18_10034652_10034983",
148 | "chr18_10087586_10087901", "chr18_10120685_10127115",
149 | "chr18_10097718_10097934", "chr18_10087586_10087901",
150 | "chr18_10154818_10155215", "chr18_10238762_10238983",
151 | "chr18_10198959_10199183", "chr18_10250985_10251585"),
152 | Peak2 = c("chr18_10097718_10097934", "chr18_10087586_10087901",
153 | "chr18_10154818_10155215", "chr18_10238762_10238983",
154 | "chr18_10198959_10199183", "chr18_10250985_10251585",
155 | "chr18_10034652_10034983", "chr18_10034652_10034983",
156 | "chr18_10034652_10034983", "chr18_10034652_10034983",
157 | "chr18_10087586_10087901", "chr18_10120685_10127115"),
158 | coaccess = c(0.0051121787, 0.0016698617, 0.0006570246,
159 | 0.0013466927, 0.0737935011, 0.3264019452,
160 | 0.0051121787, 0.0016698617, 0.0006570246,
161 | 0.0013466927, 0.0737935011, 0.3264019452))
162 | plot_connections(cicero_cons, chr = "chr18",
163 | minbp = 10034652,
164 | maxbp = 10251585,
165 | peak_color = "purple")
166 |
167 | }
168 |
--------------------------------------------------------------------------------
/man/ranges_for_coords.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/utils.R
3 | \name{ranges_for_coords}
4 | \alias{ranges_for_coords}
5 | \title{Construct GRanges objects from coordinate strings}
6 | \usage{
7 | ranges_for_coords(coord_strings, meta_data_df = NULL, with_names = FALSE)
8 | }
9 | \arguments{
10 | \item{coord_strings}{A list of coordinate strings (in the form
11 | "chr1:500000-1000000")}
12 |
13 | \item{meta_data_df}{A data frame with any meta data columns you want
14 | included with the ranges. Must be in the same order as coord_strings.}
15 |
16 | \item{with_names}{logical - should meta data include coordinate string
17 | (field coord_string)?}
18 | }
19 | \value{
20 | GRanges object of the input strings
21 | }
22 | \description{
23 | Construct GRanges objects from coordinate strings
24 | }
25 | \details{
26 | Coordinate strings consist of three pieces of information:
27 | chromosome, start, and stop. These pieces of information can be separated
28 | by the characters ":", "_", or "-". Commas will be removed, not used as
29 | separators (ex: "chr18:8,575,097-8,839,855" is ok).
30 | }
31 | \examples{
32 | ran1 <- ranges_for_coords("chr1:2039-30239", with_names = TRUE)
33 | ran2 <- ranges_for_coords(c("chr1:2049-203902", "chrX:489249-1389389"),
34 | meta_data_df = data.frame(dat = c("1", "X")))
35 | ran3 <- ranges_for_coords(c("chr1:2049-203902", "chrX:489249-1389389"),
36 | with_names = TRUE,
37 | meta_data_df = data.frame(dat = c("1", "X"),
38 | stringsAsFactors = FALSE))
39 |
40 | }
41 | \seealso{
42 | \code{\link[GenomicRanges]{GRanges-class}}
43 | }
44 |
--------------------------------------------------------------------------------
/man/run_cicero.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/runCicero.R
3 | \name{run_cicero}
4 | \alias{run_cicero}
5 | \title{Run Cicero}
6 | \usage{
7 | run_cicero(
8 | cds,
9 | genomic_coords,
10 | window = 5e+05,
11 | silent = FALSE,
12 | sample_num = 100
13 | )
14 | }
15 | \arguments{
16 | \item{cds}{Cicero CDS object, created using \code{\link{make_cicero_cds}}}
17 |
18 | \item{genomic_coords}{Either a data frame or a path (character) to a file
19 | with chromosome lengths. The file should have two columns, the first is
20 | the chromosome name (ex. "chr1") and the second is the chromosome length
21 | in base pairs. See \code{data(human.hg19.genome)} for an example. If a
22 | file, should be tab-separated and without header.}
23 |
24 | \item{window}{Size of the genomic window to query, in base pairs.}
25 |
26 | \item{silent}{Whether to print progress messages}
27 |
28 | \item{sample_num}{How many sample genomic windows to use to generate
29 | \code{distance_parameter} parameter. Default: 100.}
30 | }
31 | \value{
32 | A table of co-accessibility scores
33 | }
34 | \description{
35 | A wrapper function that runs the primary functions of the Cicero pipeline
36 | with default parameters. Runs \code{\link{estimate_distance_parameter}},
37 | \code{\link{generate_cicero_models}} and \code{\link{assemble_connections}}.
38 | See the manual pages of these functions for details about their function and
39 | parameter options. Defaults in this function are designed for mammalian data,
40 | those with non-mammalian data should read about parameters in the above
41 | functions.
42 | }
43 | \examples{
44 | data("cicero_data")
45 | data("human.hg19.genome")
46 | sample_genome <- subset(human.hg19.genome, V1 == "chr18")
47 | sample_genome$V2[1] <- 100000
48 | input_cds <- make_atac_cds(cicero_data, binarize = TRUE)
49 | input_cds <- reduceDimension(input_cds, max_components = 2, num_dim=6,
50 | reduction_method = 'tSNE',
51 | norm_method = "none")
52 | tsne_coords <- t(reducedDimA(input_cds))
53 | row.names(tsne_coords) <- row.names(pData(input_cds))
54 | cicero_cds <- make_cicero_cds(input_cds, reduced_coordinates = tsne_coords)
55 | cons <- run_cicero(cicero_cds, sample_genome, sample_num = 2)
56 |
57 | }
58 |
--------------------------------------------------------------------------------
/tests/feat.txt:
--------------------------------------------------------------------------------
1 | chr18 10000 10700 Acetylated
2 | chr18 10800 11000 Methylated
3 | chr18 50000 60000 Acetylated
4 | chr18 1e+05 110000 Methylated
5 |
--------------------------------------------------------------------------------
/tests/feat_head.txt:
--------------------------------------------------------------------------------
1 | chr bp1 bp2 type
2 | chr18 10000 10700 Acetylated
3 | chr18 10800 11000 Methylated
4 | chr18 50000 60000 Acetylated
5 | chr18 1e+05 110000 Methylated
6 |
--------------------------------------------------------------------------------
/tests/figs/deps.txt:
--------------------------------------------------------------------------------
1 | - vdiffr-svg-engine: 1.0
2 | - vdiffr: 0.3.1
3 | - freetypeharfbuzz: 0.2.5
4 |
--------------------------------------------------------------------------------
/tests/figs/plotting/basic-bar-one.svg:
--------------------------------------------------------------------------------
1 |
2 |
74 |
--------------------------------------------------------------------------------
/tests/figs/plotting/connection-ymax-plus-cutoff.svg:
--------------------------------------------------------------------------------
1 |
2 |
426 |
--------------------------------------------------------------------------------
/tests/human.hg19.genome_sub.txt:
--------------------------------------------------------------------------------
1 | chr18 78077248
2 |
--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(cicero)
3 |
4 | test_check("cicero")
5 |
--------------------------------------------------------------------------------
/tests/testthat/_snaps/plotting/basic-bar-one.svg:
--------------------------------------------------------------------------------
1 |
2 |
79 |
--------------------------------------------------------------------------------
/tests/testthat/_snaps/plotting/basic-connections-high-cutoff.svg:
--------------------------------------------------------------------------------
1 |
2 |
248 |
--------------------------------------------------------------------------------
/tests/testthat/_snaps/plotting/basic-connections-plot-cutoff.svg:
--------------------------------------------------------------------------------
1 |
2 |
249 |
--------------------------------------------------------------------------------
/tests/testthat/_snaps/plotting/connection-ymax-plus-cutoff.svg:
--------------------------------------------------------------------------------
1 |
2 |
246 |
--------------------------------------------------------------------------------
/tests/testthat/test-aggregate.R:
--------------------------------------------------------------------------------
1 | context("test-aggregate.R")
2 |
3 | data("cicero_data")
4 | test_cds <- suppressWarnings(make_atac_cds(cicero_data))
5 |
6 | test_cds2 <- suppressWarnings(make_atac_cds(cicero_data))
7 | exprs(test_cds2) <- as.matrix(exprs(test_cds2))
8 |
9 | test_that("aggregate_nearby_peaks makes a valid cds object", {
10 | #skip_on_bioc()
11 | agg_cds <- aggregate_nearby_peaks(test_cds, 10000)
12 | expect_is(agg_cds, "CellDataSet")
13 | expect_equal(nrow(exprs(agg_cds)), 1690)
14 | expect_equal(ncol(exprs(agg_cds)), 200)
15 | expect_equal(exprs(agg_cds)[1,4], 4)
16 | expect_match(row.names(agg_cds)[1], "chr18_10006196_10017274")
17 | expect_match(colnames(agg_cds)[1], "AGCGATAGAACGAATTCGGCGCAATGACCCTATCCT")
18 | expect_is(exprs(agg_cds), "dgCMatrix")
19 | })
20 |
21 | test_that("aggregate_nearby_peaks makes a valid cds object not sparse", {
22 | #skip_on_bioc()
23 | agg_cds <- aggregate_nearby_peaks(test_cds2, 10000)
24 | expect_is(agg_cds, "CellDataSet")
25 | expect_equal(nrow(exprs(agg_cds)), 1690)
26 | expect_equal(ncol(exprs(agg_cds)), 200)
27 | expect_equal(exprs(agg_cds)[1,4], 4)
28 | expect_match(row.names(agg_cds)[1], "chr18_10006196_10017274")
29 | expect_match(colnames(agg_cds)[1], "AGCGATAGAACGAATTCGGCGCAATGACCCTATCCT")
30 | expect_is(exprs(agg_cds), "matrix")
31 | })
32 |
33 | test_that("aggregate_by_cell_bin makes a valid cds object", {
34 | #skip_on_bioc()
35 | pData(test_cds)$cell_subtype <- rep(1:10, times= 20)
36 | agg_cds2 <- suppressMessages(aggregate_by_cell_bin(test_cds, "cell_subtype"))
37 | expect_is(agg_cds2, "CellDataSet")
38 | expect_equal(nrow(exprs(agg_cds2)), 6146)
39 | expect_equal(ncol(exprs(agg_cds2)), 10)
40 | expect_equal(exprs(agg_cds2)[1,4], 2)
41 | expect_match(row.names(agg_cds2)[1], "chr18_10006196_10006822")
42 | expect_match(colnames(agg_cds2)[1], "1")
43 | expect_is(exprs(agg_cds2), "matrix")
44 | })
45 |
46 | test_that("aggregate_by_cell_bin makes a valid cds object not sparse", {
47 | #skip_on_bioc()
48 | pData(test_cds2)$cell_subtype <- rep(1:10, times= 20)
49 | agg_cds2 <- suppressMessages(aggregate_by_cell_bin(test_cds2, "cell_subtype"))
50 | expect_is(agg_cds2, "CellDataSet")
51 | expect_equal(nrow(exprs(agg_cds2)), 6146)
52 | expect_equal(ncol(exprs(agg_cds2)), 10)
53 | expect_equal(exprs(agg_cds2)[1,4], 2)
54 | expect_match(row.names(agg_cds2)[1], "chr18_10006196_10006822")
55 | expect_match(colnames(agg_cds2)[1], "1")
56 | expect_is(exprs(agg_cds2), "matrix")
57 | })
58 |
--------------------------------------------------------------------------------
/tests/testthat/test-runCicero.R:
--------------------------------------------------------------------------------
1 | context("runCicero")
2 |
3 | #### make_cicero_cds ####
4 |
5 | data(cicero_data)
6 | load("../tsne_coord.Rdata")
7 | data("human.hg19.genome")
8 |
9 | sample_genome <- subset(human.hg19.genome, V1 == "chr18")
10 | input_cds <- make_atac_cds(cicero_data)
11 |
12 | set.seed(2017)
13 | input_cds <- detectGenes(input_cds, min_expr = .1)
14 | input_cds <- estimateSizeFactors(input_cds)
15 | input_cds <- suppressWarnings(suppressMessages(estimateDispersions(input_cds)))
16 |
17 | set.seed(2018)
18 | cicero_cds <- make_cicero_cds(input_cds,
19 | reduced_coordinates = tsne_coords,
20 | silent = TRUE,
21 | summary_stats = c("num_genes_expressed"))
22 |
23 | test_that("make_cicero_cds aggregates correctly", {
24 | #skip_on_bioc()
25 | expect_is(cicero_cds, "CellDataSet")
26 | expect_equal(nrow(fData(cicero_cds)), nrow(fData(input_cds)))
27 | expect_named(pData(cicero_cds),c("agg_cell", "mean_num_genes_expressed",
28 | "Size_Factor", "num_genes_expressed"))
29 | expect_equal(nrow(exprs(cicero_cds)), 6146)
30 | expect_equal(ncol(exprs(cicero_cds)), 34)
31 |
32 | set.seed(2018)
33 | expect_warning(cicero_cds <- make_cicero_cds(input_cds,
34 | reduced_coordinates = tsne_coords,
35 | silent = FALSE,
36 | summary_stats = c("num_genes_expressed")))
37 | input_cds2 <- input_cds
38 | fData(input_cds2)$bp1 <- NULL
39 | set.seed(2018)
40 | cicero_cds <- make_cicero_cds(input_cds2,
41 | reduced_coordinates = tsne_coords,
42 | silent = TRUE,
43 | size_factor_normalize = FALSE,
44 | summary_stats = c("num_genes_expressed"))
45 | expect_is(cicero_cds, "CellDataSet")
46 | expect_equal(nrow(fData(cicero_cds)), nrow(fData(input_cds)))
47 | expect_named(pData(cicero_cds),c("agg_cell", "mean_num_genes_expressed",
48 | "Size_Factor", "num_genes_expressed"))
49 | expect_equal(nrow(exprs(cicero_cds)), 6146)
50 | expect_equal(ncol(exprs(cicero_cds)), 34)
51 | })
52 |
53 | set.seed(2018)
54 | cicero_cds_temp <- make_cicero_cds(input_cds,
55 | reduced_coordinates = tsne_coords,
56 | silent = TRUE,
57 | summary_stats = c("num_genes_expressed"),
58 | return_agg_info = TRUE,
59 | size_factor_normalize = FALSE)
60 | cicero_cds2 <- cicero_cds_temp[[1]]
61 | agg_info <- cicero_cds_temp[[2]]
62 |
63 | test_that("make_cicero_cds returns agg_info", {
64 | expect_is(cicero_cds2, "CellDataSet")
65 | expect_equal(nrow(fData(cicero_cds2)), nrow(fData(input_cds)))
66 | expect_named(pData(cicero_cds2),c("agg_cell", "mean_num_genes_expressed",
67 | "Size_Factor", "num_genes_expressed"))
68 | expect_equal(nrow(exprs(cicero_cds2)), 6146)
69 | expect_equal(ncol(exprs(cicero_cds2)), 34)
70 |
71 | expect_is(agg_info, "data.frame")
72 |
73 | agg_test_cell <- agg_info$agg_cell[[1]]
74 | test_agg <- as.character(agg_info[agg_info$agg_cell == agg_test_cell,]$cell)
75 | temp_exprs <- exprs(input_cds)[,test_agg]
76 | test_agg <- Matrix::rowSums(temp_exprs)
77 | expect_equal(sum(exprs(cicero_cds2)[,agg_test_cell] != test_agg), 0)
78 |
79 | })
80 |
81 | #### estimate_distance_parameter ####
82 |
83 | test_that("estimate_distance_parameter gives correct mean", {
84 | #skip_on_bioc()
85 | set.seed(200)
86 | alphas <- estimate_distance_parameter(cicero_cds, window=500000,
87 | maxit=100, sample_num = 2,
88 | distance_constraint = 250000,
89 | distance_parameter_convergence = 1e-22,
90 | genomic_coords = sample_genome,
91 | max_sample_windows = 6)
92 | mean_alpha <- mean(alphas)
93 | expect_equal(length(alphas), 2)
94 | expect_equal(mean_alpha, 2.25, tolerance = 1e-2)
95 | set.seed(200)
96 | alphas <- estimate_distance_parameter(cicero_cds, window=500000,
97 | maxit=100, sample_num = 2,
98 | distance_constraint = 250000,
99 | distance_parameter_convergence = 1e-22,
100 | genomic_coords = "../human.hg19.genome_sub.txt",
101 | max_sample_windows = 6)
102 | mean_alpha <- mean(alphas)
103 | expect_equal(length(alphas), 2)
104 | expect_equal(mean_alpha, 2.25, tolerance = 1e-2)
105 | set.seed(200)
106 | expect_error(expect_warning(alphas <- estimate_distance_parameter(cicero_cds,
107 | window=500000,
108 | maxit=100, sample_num = 2,
109 | max_elements = 2,
110 | distance_constraint = 250000,
111 | distance_parameter_convergence = 1e-22,
112 | genomic_coords = sample_genome,
113 | max_sample_windows = 6)))
114 | testthat::expect_error(alphas <- estimate_distance_parameter(cicero_cds,
115 | window=10000,
116 | maxit=100, sample_num = 2,
117 | distance_constraint = 250000,
118 | distance_parameter_convergence = 1e-22,
119 | genomic_coords = sample_genome,
120 | max_sample_windows = 6),
121 | "distance_constraint not less than window")
122 | set.seed(205)
123 | testthat::expect_warning(alphas <- estimate_distance_parameter(cicero_cds,
124 | window=10000,
125 | maxit=100, sample_num = 2,
126 | distance_constraint = 5000,
127 | distance_parameter_convergence = 1e-22,
128 | genomic_coords = sample_genome,
129 | max_sample_windows = 6))
130 | })
131 |
132 | #### generate_cicero_models ####
133 | set.seed(203)
134 | mean_alpha <- 2.030655
135 | con_list <- generate_cicero_models(cicero_cds,
136 | mean_alpha,
137 | s=.75,
138 | genomic_coords = sample_genome)
139 |
140 | test_that("generate_cicero_models gives output", { #slow
141 | skip_on_bioc()
142 | expect_is(con_list, "list")
143 | expect_equal(length(con_list), 313)
144 | expect_equal(con_list[[1]]$w[1,2], 0.866, tolerance = 1e-3)
145 | set.seed(203)
146 | con_list <- generate_cicero_models(cicero_cds,
147 | mean_alpha,
148 | s=.75,
149 | genomic_coords = "../human.hg19.genome_sub.txt")
150 | expect_is(con_list, "list")
151 | expect_equal(length(con_list), 313)
152 | expect_equal(con_list[[1]]$w[1,2], 0.866, tolerance = 1e-3)
153 | set.seed(203)
154 | con_list <- generate_cicero_models(cicero_cds,
155 | mean_alpha,
156 | window = 5000000,
157 | s=0.75,
158 | genomic_coords = sample_genome)
159 | expect_equal(length(con_list), 32)
160 | expect_equal(con_list[[1]], "Too many elements in range")
161 |
162 | set.seed(203)
163 | expect_error(con_list <- generate_cicero_models(cicero_cds,
164 | mean_alpha,
165 | window = 5000000,
166 | s=1,
167 | genomic_coords = sample_genome),
168 | "s not less than 1")
169 |
170 | set.seed(203)
171 | con_list <- generate_cicero_models(cicero_cds,
172 | mean_alpha,
173 | window = 500000,
174 | s=0.1,
175 | genomic_coords = sample_genome)
176 | expect_equal(con_list[[1]]$w[1,3], -3.7, tolerance = 1e-2)
177 | })
178 |
179 | #### assemble_connections ####
180 |
181 | test_that("assemble_connections gives output", {
182 | #skip_on_bioc()
183 | expect_is(con_list, "list")
184 | expect_equal(length(con_list), 313)
185 | expect_equal(con_list[[1]]$w[1,2], 0.866, tolerance = 1e-3)
186 | set.seed(203)
187 | con_list <- generate_cicero_models(cicero_cds,
188 | mean_alpha,
189 | s=.75,
190 | genomic_coords = "../human.hg19.genome_sub.txt")
191 |
192 | cons <- assemble_connections(con_list, silent = FALSE)
193 | expect_equal(cons[cons$Peak1 == "chr18_10025_10225" &
194 | cons$Peak2 == "chr18_10603_11103",]$coaccess, 0.877,
195 | tolerance = 1e-3)
196 | expect_equal(ncol(cons), 3)
197 | expect_equal(nrow(cons), 543286)
198 | })
199 |
200 | #### run_cicero ####
201 | set.seed(2000)
202 | cons <- run_cicero(cicero_cds, sample_genome, window = 500000, silent=TRUE,
203 | sample_num = 2)
204 |
205 | test_that("run_cicero gives output", {
206 | #skip_on_bioc()
207 | expect_equal(cons[cons$Peak1 == "chr18_10025_10225" &
208 | cons$Peak2 == "chr18_10603_11103",]$coaccess, 0.877,
209 | tolerance = 1e-3)
210 | expect_equal(ncol(cons), 3)
211 | expect_equal(nrow(cons), 543286)
212 | cons <- run_cicero(cicero_cds, window = 500000, silent=TRUE, sample_num = 2,
213 | genomic_coords = "../human.hg19.genome_sub.txt")
214 | expect_equal(cons[cons$Peak1 == "chr18_10025_10225" &
215 | cons$Peak2 == "chr18_10603_11103",]$coaccess, 0.877,
216 | tolerance = 1e-3)
217 | expect_equal(ncol(cons), 3)
218 | expect_equal(nrow(cons), 543286)
219 | })
220 |
221 | test_that("run_cicero gives output bad chromosomes", {
222 | sample_genome <- subset(human.hg19.genome, V1 == "chr18")
223 | input_cds <- make_atac_cds(cicero_data)
224 |
225 | fdata <- fData(input_cds)
226 | mtx <- exprs(input_cds)
227 | pdata <- pData(input_cds)
228 | row.names(fdata) <- gsub("chr", "A0", row.names(fdata))
229 | fdata$site_name <- row.names(fdata)
230 | row.names(mtx) <- row.names(fdata)
231 | pdata <- new("AnnotatedDataFrame", data = pdata)
232 | fdata <- new("AnnotatedDataFrame", data = fdata)
233 | new_inp <- suppressWarnings(newCellDataSet(mtx, pdata, fdata))
234 |
235 | set.seed(2017)
236 | new_inp <- detectGenes(new_inp, min_expr = .1)
237 | new_inp <- estimateSizeFactors(new_inp)
238 |
239 | set.seed(2018)
240 | cicero_cds <- make_cicero_cds(new_inp,
241 | reduced_coordinates = tsne_coords,
242 | silent = TRUE,
243 | summary_stats = c("num_genes_expressed"))
244 |
245 | cons <- run_cicero(cicero_cds, window = 500000, silent=TRUE, sample_num = 2,
246 | genomic_coords = "../human.hg19.genome_sub.txt")
247 |
248 | #skip_on_bioc()
249 | expect_equal(cons[cons$Peak1 == "A018_10025_10225" &
250 | cons$Peak2 == "A018_10603_11103",]$coaccess, 0.877,
251 | tolerance = 1e-3)
252 | expect_equal(ncol(cons), 3)
253 | expect_equal(nrow(cons), 543286)
254 | })
255 |
256 | #### generate_ccans ####
257 |
258 | test_that("generate_ccans gives output", { #slow
259 | skip_on_bioc()
260 | expect_output(CCAN_assigns <- generate_ccans(cons),
261 | "Coaccessibility cutoff used: 0.47")
262 | #expect_equal(CCAN_assigns["chr18_217477_218555",]$CCAN, 3, tolerance = 1e-7)
263 | expect_equal(ncol(CCAN_assigns), 2)
264 | expect_equal(nrow(CCAN_assigns), 1905)
265 | expect_equal(length(unique(CCAN_assigns$CCAN)), 116)
266 |
267 | expect_output(CCAN_assigns <- generate_ccans(cons,
268 | coaccess_cutoff_override = 0.25),
269 | "Coaccessibility cutoff used: 0.25")
270 | expect_output(CCAN_assigns <- generate_ccans(cons, tolerance_digits = 1),
271 | "Coaccessibility cutoff used: 0.5")
272 | expect_output(CCAN_assigns <- generate_ccans(cons, tolerance_digits = 1,
273 | coaccess_cutoff_override = .25),
274 | "Coaccessibility cutoff used: 0.25")
275 | })
276 |
277 | #### compare_connections ####
278 |
279 | test_that("compare_connections works", {
280 | #skip_on_bioc()
281 | chia_conns <- data.frame(Peak1 = c("chr18_10000_10200", "chr18_10000_10200",
282 | "chr18_49500_49600"),
283 | Peak2 = c("chr18_10600_10700", "chr18_111700_111800",
284 | "chr18_10600_10700"))
285 | cons$in_dataset <- compare_connections(cons, chia_conns)
286 | cons$in_dataset2 <- compare_connections(cons, chia_conns, maxgap=1000)
287 |
288 | expect_is(cons, "data.frame")
289 | expect_equal(sum(cons$in_dataset), 4)
290 | expect_equal(sum(cons$in_dataset2), 22)
291 | expect_equal(cons[cons$Peak1 == "chr18_10025_10225" &
292 | cons$Peak2 == "chr18_10603_11103",]$in_dataset[1], TRUE)
293 | })
294 |
295 | #### find_overlapping_ccans ####
296 |
297 | test_that("find_overlapping_ccans works", {
298 | CCAN_assigns <- generate_ccans(cons, coaccess_cutoff_override = 0.25)
299 | over <- find_overlapping_ccans(CCAN_assigns)
300 | expect_is(over, "data.frame")
301 | expect_equal(ncol(over), 2)
302 | skip_on_bioc()
303 | expect_equal(nrow(over), 98)
304 | over <- find_overlapping_ccans(CCAN_assigns, min_overlap = 3000000)
305 | expect_equal(nrow(over), 2)
306 | })
307 |
308 | #### activity scores ####
309 |
310 | input_cds <- make_atac_cds(cicero_data, binarize=TRUE)
311 | input_cds <- detectGenes(input_cds, min_expr = .1)
312 |
313 | data(gene_annotation_sample)
314 | gene_annotation_sub <- gene_annotation_sample[,c(1:3, 8)]
315 | names(gene_annotation_sub)[4] <- "gene"
316 |
317 | input_cds <- suppressWarnings(annotate_cds_by_site(input_cds,
318 | gene_annotation_sub))
319 | unnorm_ga <- build_gene_activity_matrix(input_cds, cons)
320 | expect_equal(nrow(unnorm_ga), 626)
321 | expect_equal(ncol(unnorm_ga), 200)
322 | expect_equal(unnorm_ga[1,1], 1.19, tolerance = 1e-2)
323 |
324 | exprs(input_cds) <- as.matrix(exprs(input_cds))
325 | unnorm_ga <- build_gene_activity_matrix(input_cds, cons)
326 |
327 | test_that("build_gene_activity_matrix works", {
328 | #skip_on_bioc()
329 | expect_equal(nrow(unnorm_ga), 626)
330 | expect_equal(ncol(unnorm_ga), 200)
331 | expect_equal(unnorm_ga[1,1], 1.19, tolerance = 1e-2)
332 | })
333 |
334 | test_that("normalize_gene_activities works", {
335 | #skip_on_bioc()
336 |
337 | num_genes <- pData(input_cds)$num_genes_expressed
338 | names(num_genes) <- row.names(pData(input_cds))
339 |
340 | cicero_gene_activities <- normalize_gene_activities(unnorm_ga, num_genes)
341 | expect_equal(nrow(cicero_gene_activities), 626)
342 | expect_equal(ncol(cicero_gene_activities), 200)
343 | expect_equal(cicero_gene_activities[1,1], 0.0086, tolerance = 1e-4)
344 |
345 | cicero_gene_activities <- normalize_gene_activities(list(unnorm_ga,
346 | unnorm_ga),
347 | num_genes)
348 | expect_is(cicero_gene_activities, "list")
349 | expect_equal(length(cicero_gene_activities), 2)
350 | cicero_gene_activities1 <- cicero_gene_activities[[1]]
351 | cicero_gene_activities2 <- cicero_gene_activities[[2]]
352 | expect_equal(nrow(cicero_gene_activities1), 626)
353 | expect_equal(ncol(cicero_gene_activities1), 200)
354 | expect_equal(cicero_gene_activities1[1,1], 0.0086, tolerance = 1e-4)
355 |
356 | expect_equal(nrow(cicero_gene_activities2), 626)
357 | expect_equal(ncol(cicero_gene_activities2), 200)
358 | expect_equal(cicero_gene_activities2[1,1], 0.0086, tolerance = 1e-4)
359 |
360 |
361 | unnorm_ga <- as.matrix(unnorm_ga)
362 | cicero_gene_activities <- normalize_gene_activities(unnorm_ga, num_genes)
363 | expect_equal(nrow(cicero_gene_activities), 626)
364 | expect_equal(ncol(cicero_gene_activities), 200)
365 | expect_equal(cicero_gene_activities[1,1], 0.0086, tolerance = 1e-4)
366 |
367 | cicero_gene_activities <- normalize_gene_activities(list(unnorm_ga,
368 | unnorm_ga),
369 | num_genes)
370 | expect_is(cicero_gene_activities, "list")
371 | expect_equal(length(cicero_gene_activities), 2)
372 | cicero_gene_activities1 <- cicero_gene_activities[[1]]
373 | cicero_gene_activities2 <- cicero_gene_activities[[2]]
374 | expect_equal(nrow(cicero_gene_activities1), 626)
375 | expect_equal(ncol(cicero_gene_activities1), 200)
376 | expect_equal(cicero_gene_activities1[1,1], 0.0086, tolerance = 1e-4)
377 |
378 | expect_equal(nrow(cicero_gene_activities2), 626)
379 | expect_equal(ncol(cicero_gene_activities2), 200)
380 | expect_equal(cicero_gene_activities2[1,1], 0.0086, tolerance = 1e-4)
381 |
382 | })
383 |
384 |
--------------------------------------------------------------------------------
/tests/testthat/test-utils.R:
--------------------------------------------------------------------------------
1 | context("test-utils.R")
2 |
3 |
4 | test_that("make_atac_cds makes a valid cds object", {
5 | #skip_on_bioc()
6 | data("cicero_data")
7 | #### make_atac_cds ####
8 | test_cds <- make_atac_cds(cicero_data)
9 | expect_is(test_cds, "CellDataSet")
10 | expect_equal(nrow(exprs(test_cds)), 6146)
11 | expect_equal(ncol(exprs(test_cds)), 200)
12 | expect_match(row.names(test_cds)[1], "chr18_10025_10225")
13 | expect_match(colnames(test_cds)[1], "AGCGATAGAACGAATTCGGCGCAATGACCCTATCCT")
14 | expect_is(exprs(test_cds), "dgCMatrix")
15 | test_cds <-make_atac_cds(cicero_data, binarize=TRUE)
16 | expect_is(test_cds, "CellDataSet")
17 | expect_equal(nrow(exprs(test_cds)), 6146)
18 | expect_equal(ncol(exprs(test_cds)), 200)
19 | expect_match(row.names(test_cds)[1], "chr18_10025_10225")
20 | expect_match(colnames(test_cds)[1], "AGCGATAGAACGAATTCGGCGCAATGACCCTATCCT")
21 | expect_is(exprs(test_cds), "dgCMatrix")
22 | expect_error(test_cds <- make_atac_cds(3),
23 | "Input must be file path, matrix, or data.frame")
24 | test_cds <-make_atac_cds("../cicero_data_sub.txt", binarize=TRUE)
25 | expect_is(test_cds, "CellDataSet")
26 | expect_equal(nrow(exprs(test_cds)), 2149)
27 | expect_equal(ncol(exprs(test_cds)), 7)
28 | expect_match(row.names(test_cds)[1], "chr18_10025_10225")
29 | expect_match(colnames(test_cds)[1], "AGCGATAGGCGCTATGGTGGAATTCAGTCAGGACGT")
30 | expect_is(exprs(test_cds), "dgCMatrix")
31 | })
32 |
33 | #### ranges_for_coords ####
34 |
35 | test_that("ranges_for_coords works", {
36 | #skip_on_bioc()
37 |
38 | wn <- ranges_for_coords("chr1:2039-30239", with_names = TRUE)
39 | wmd <- ranges_for_coords(c("chr1:2049-203902", "chrX:489249-1389389"),
40 | meta_data_df = data.frame(dat = c("1", "X")))
41 | wmdn <- ranges_for_coords(c("chr1:2049-203902", "chrX:489249-1389389"),
42 | with_names = TRUE,
43 | meta_data_df = data.frame(dat = c("1", "X"),
44 | stringsAsFactors = FALSE))
45 |
46 |
47 | expect_is(ranges_for_coords("chr1_2039_30239"), "GRanges")
48 | expect_is(ranges_for_coords("chr1_random_2039_30239"), "GRanges")
49 | expect_is(ranges_for_coords("chr1:2039:30239"), "GRanges")
50 | expect_is(ranges_for_coords("chr1-2039-30239"), "GRanges")
51 | expect_is(ranges_for_coords("chr1:2,039-30,239"), "GRanges")
52 | expect_is(ranges_for_coords(c("chr1:2,039-30,239", "chrX:28884:101293")),
53 | "GRanges")
54 | expect_is(ranges_for_coords(c("chr1:2,039-30,239", "chrX:28884:101293"),
55 | with_names = TRUE), "GRanges")
56 | expect_is(wn, "GRanges")
57 | expect_is(wmd, "GRanges")
58 | expect_match(wn$coord_string, "chr1:2039-30239")
59 | expect_match(as.character(wmd$dat[2]), "X")
60 | expect_match(wmdn$coord_string[1], "chr1:2049-203902")
61 | expect_match(as.character(wmdn$dat[2]), "X")
62 | })
63 |
64 | #### df_for_coords ####
65 |
66 | test_that("df_for_coords works", {
67 | #skip_on_bioc()
68 | expect_is(df_for_coords(c("chr1:2,039-30,239", "chrX:28884:101293")),
69 | "data.frame")
70 | expect_equal(df_for_coords(c("chr1:2,039-30,239",
71 | "chrX:28884:101293"))$bp2[1], 30239)
72 |
73 | expect_is(df_for_coords(c("chr1:2,039-30,239", "chrX:28884:101293",
74 | "chr1_random_2039_30239")),
75 | "data.frame")
76 | expect_equal(df_for_coords(c("chr1:2,039-30,238", "chrX:28884:101293",
77 | "chr1_random_2039_30239"))$bp2[3], 30239)
78 | })
79 |
80 | #### annotate_cds_by_site ####
81 |
82 |
83 |
84 | test_that("annotate_cds_by_site works", {
85 | #skip_on_bioc()
86 | data("cicero_data")
87 | #### make_atac_cds ####
88 | test_cds <- make_atac_cds(cicero_data)
89 |
90 | feat <- data.frame(chr = c("chr18", "chr18", "chr18", "chr18"),
91 | bp1 = c(10000, 10800, 50000, 100000),
92 | bp2 = c(10700, 11000, 60000, 110000),
93 | type = c("Acetylated", "Methylated",
94 | "Acetylated", "Methylated"),
95 | stringsAsFactors = FALSE)
96 |
97 | test_cds2 <- annotate_cds_by_site(test_cds, feat, verbose = TRUE)
98 | test_cds3 <- annotate_cds_by_site(test_cds, feat, all=TRUE, verbose = TRUE)
99 |
100 | expect_is(test_cds2, "CellDataSet")
101 | expect_is(test_cds3, "CellDataSet")
102 | expect_equal(nrow(fData(test_cds2)), nrow(fData(test_cds)))
103 | expect_equal(nrow(fData(test_cds3)), nrow(fData(test_cds)))
104 | expect_equal(ncol(fData(test_cds2)), ncol(fData(test_cds)) + 2)
105 | expect_equal(ncol(fData(test_cds3)), ncol(fData(test_cds)) + 2)
106 |
107 | expect_equal(fData(test_cds2)$overlap[2], 201)
108 | expect_equal(fData(test_cds3)$overlap[2], "98,201")
109 | expect_equal(fData(test_cds2)$type[2], "Methylated")
110 | expect_equal(fData(test_cds3)$type[2], "Acetylated,Methylated")
111 |
112 | expect_true(is.na(fData(test_cds2)$overlap[3]))
113 | expect_true(is.na(fData(test_cds3)$overlap[3]))
114 | expect_true(is.na(fData(test_cds2)$type[3]))
115 | expect_true(is.na(fData(test_cds3)$type[3]))
116 |
117 | test_cds2 <- annotate_cds_by_site(test_cds, feat)
118 | test_cds3 <- annotate_cds_by_site(test_cds, feat, all=TRUE)
119 |
120 | expect_is(test_cds2, "CellDataSet")
121 | expect_is(test_cds3, "CellDataSet")
122 | expect_equal(nrow(fData(test_cds2)), nrow(fData(test_cds)))
123 | expect_equal(nrow(fData(test_cds3)), nrow(fData(test_cds)))
124 | expect_equal(ncol(fData(test_cds2)), ncol(fData(test_cds)) + 2)
125 | expect_equal(ncol(fData(test_cds3)), ncol(fData(test_cds)) + 2)
126 |
127 | expect_equal(fData(test_cds2)$overlap[2], 201)
128 | expect_equal(fData(test_cds3)$overlap[2], "98,201")
129 | expect_equal(fData(test_cds2)$type[2], "Methylated")
130 | expect_equal(fData(test_cds3)$type[2], "Acetylated,Methylated")
131 |
132 | expect_true(is.na(fData(test_cds2)$overlap[3]))
133 | expect_true(is.na(fData(test_cds3)$overlap[3]))
134 | expect_true(is.na(fData(test_cds2)$type[3]))
135 | expect_true(is.na(fData(test_cds3)$type[3]))
136 |
137 | test_cds2 <- annotate_cds_by_site(test_cds, "../feat.txt", verbose =TRUE)
138 | test_cds3 <- annotate_cds_by_site(test_cds, "../feat.txt", all=TRUE)
139 |
140 | expect_is(test_cds2, "CellDataSet")
141 | expect_is(test_cds3, "CellDataSet")
142 | expect_equal(nrow(fData(test_cds2)), nrow(fData(test_cds)))
143 | expect_equal(nrow(fData(test_cds3)), nrow(fData(test_cds)))
144 | expect_equal(ncol(fData(test_cds2)), ncol(fData(test_cds)) + 2)
145 | expect_equal(ncol(fData(test_cds3)), ncol(fData(test_cds)) + 2)
146 |
147 | expect_equal(fData(test_cds2)$overlap[2], 201)
148 | expect_equal(fData(test_cds3)$overlap[2], "98,201")
149 | expect_equal(fData(test_cds2)$V4[2], "Methylated")
150 | expect_equal(fData(test_cds3)$V4[2], "Acetylated,Methylated")
151 |
152 | expect_true(is.na(fData(test_cds2)$overlap[3]))
153 | expect_true(is.na(fData(test_cds3)$overlap[3]))
154 | expect_true(is.na(fData(test_cds2)$V4[3]))
155 | expect_true(is.na(fData(test_cds3)$V4[3]))
156 |
157 | test_cds2 <- annotate_cds_by_site(test_cds, "../feat_head.txt",
158 | header = TRUE)
159 | test_cds3 <- annotate_cds_by_site(test_cds, "../feat_head.txt",
160 | header = TRUE, all=TRUE)
161 |
162 | expect_is(test_cds2, "CellDataSet")
163 | expect_is(test_cds3, "CellDataSet")
164 | expect_equal(nrow(fData(test_cds2)), nrow(fData(test_cds)))
165 | expect_equal(nrow(fData(test_cds3)), nrow(fData(test_cds)))
166 | expect_equal(ncol(fData(test_cds2)), ncol(fData(test_cds)) + 2)
167 | expect_equal(ncol(fData(test_cds3)), ncol(fData(test_cds)) + 2)
168 |
169 | expect_equal(fData(test_cds2)$overlap[2], 201)
170 | expect_equal(fData(test_cds3)$overlap[2], "98,201")
171 | expect_equal(fData(test_cds2)$type[2], "Methylated")
172 | expect_equal(fData(test_cds3)$type[2], "Acetylated,Methylated")
173 |
174 | expect_true(is.na(fData(test_cds2)$overlap[3]))
175 | expect_true(is.na(fData(test_cds3)$overlap[3]))
176 | expect_true(is.na(fData(test_cds2)$type[3]))
177 | expect_true(is.na(fData(test_cds3)$type[3]))
178 |
179 | # check tie
180 | feat2 <- data.frame(chr = c("chr18", "chr18", "chr18", "chr18", "chr18_GL456216_random"),
181 | bp1 = c(10125, 10125, 50000, 100000, 32820116),
182 | bp2 = c(10703, 10703, 60000, 110000, 32820118),
183 | type = c("Acetylated", "Methylated",
184 | "Acetylated", "Methylated", "Other"),
185 | stringsAsFactors = FALSE)
186 | test_cds2 <- annotate_cds_by_site(test_cds, feat2, all=FALSE)
187 | expect_equal(fData(test_cds2)$type[2], "Acetylated")
188 | test_cds2 <- annotate_cds_by_site(test_cds, feat2, all=FALSE, maxgap = 901)
189 | expect_equal(fData(test_cds2)$type[3], "Acetylated")
190 |
191 | # check maxgap = "nearest"
192 | test_cds2 <- annotate_cds_by_site(test_cds, feat2, all=FALSE, maxgap = "nearest")
193 | expect_equal(sum(is.na(fData(test_cds2)$type)), 0)
194 |
195 | })
196 |
197 |
198 | #### make_sparse_matrix ####
199 |
200 | test_that("make_sparse_matrix works", {
201 | #skip_on_bioc()
202 | df <- data.frame(icol = c("chr18_30209631_30210783",
203 | "chr18_45820294_45821666",
204 | "chr18_32820116_32820994"),
205 | jcol = c("chr18_41888433_41890138",
206 | "chr18_33038287_33039444",
207 | "chr18_random_25533921_25534483"),
208 | xcol = c(1,2,3))
209 | sm <- make_sparse_matrix(df, "icol", "jcol", "xcol")
210 | expect_equal(sm["chr18_30209631_30210783", "chr18_41888433_41890138"], 1)
211 | expect_equal(sm["chr18_45820294_45821666", "chr18_33038287_33039444"], 2)
212 | expect_equal(sm["chr18_random_25533921_25534483", "chr18_32820116_32820994"], 3)
213 | expect_equal(sm["chr18_random_25533921_25534483", "chr18_30209631_30210783"], 0)
214 | expect_error(make_sparse_matrix(df, "icol", "xcol", "jcol"),
215 | "x.name column must be numeric")
216 | expect_error(make_sparse_matrix(df, "icol", "hannah", "jcol"),
217 | "i.name, j.name, and x.name must be columns in data")
218 | })
219 |
220 | #### compare_connections ####
221 | # IN test-runCicero.R
222 |
223 | #### find_overlapping_coordinates ####
224 |
225 |
226 |
227 | test_that("find_overlapping_coordinates works", {
228 | #skip_on_bioc()
229 | test_coords <- c("chr18_10025_10225", "chr18_10603_11103", "chr18_11604_13986",
230 | "chr18_157883_158536", "chr18_217477_218555",
231 | "chr18_245734_246234", "chr18_random_245734_246234")
232 | expect_equal(length(find_overlapping_coordinates(test_coords,
233 | "chr18:10,100-1246234")), 6)
234 | expect_equal(length(find_overlapping_coordinates(test_coords,
235 | "chr18_10227_10601")), 0)
236 | expect_equal(length(find_overlapping_coordinates(test_coords,
237 | "chr18_10227_10601",
238 | maxgap = 1)), 2)
239 | expect_equal(length(find_overlapping_coordinates(test_coords,
240 | "chr18_random_10227_245736",
241 | maxgap = 1)), 1)
242 | expect_equal(length(find_overlapping_coordinates(test_coords,
243 | c("chr18_10227_10602",
244 | "chr18:11604-246234"))), 5)
245 | expect_equal(length(find_overlapping_coordinates(test_coords,
246 | c("chr18_10226_10602",
247 | "chr18:11604-246234"),
248 | maxgap = 1)), 6)
249 | expect_true(all(is.na(find_overlapping_coordinates(test_coords,
250 | c("chr19_10226_10602",
251 | "chr19:11604-246234"),
252 | maxgap = 1))))
253 | expect_true(all(is.na(find_overlapping_coordinates(test_coords,
254 | c("chr18_1022600_1060200",
255 | "chr18:1160400-24623400"),
256 | maxgap = 1))))
257 | })
258 |
259 |
260 |
261 |
262 |
263 |
264 |
--------------------------------------------------------------------------------
/tests/tsne_coord.Rdata:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cole-trapnell-lab/cicero-release/07f8731b2d2029ab774621b768b20259238ede4d/tests/tsne_coord.Rdata
--------------------------------------------------------------------------------