├── .Rbuildignore
├── .gitignore
├── DESCRIPTION
├── LICENSE
├── NAMESPACE
├── R
    ├── Annotate.R
    ├── count_polyA.R
    ├── data_util.R
    ├── dataset_merging.R
    ├── differential_usage.R
    ├── plotting_functions.R
    └── split_bams.R
├── README.md
├── inst
    └── extdata
    │   ├── Cxcl12_example.RData
    │   ├── Cycling_vs_resting_fibro_UTR_length_res.RData
    │   ├── TIP_cell_info.RData
    │   ├── TIP_merged_peaks.txt
    │   ├── TIP_vignette_gene_Seurat.RData
    │   ├── Vignette_cellranger_genes_subset.gtf
    │   ├── Vignette_example_TIP_MI_junctions.bed
    │   ├── Vignette_example_TIP_mi.bai
    │   ├── Vignette_example_TIP_mi.bam
    │   ├── Vignette_example_TIP_sham.bam
    │   ├── Vignette_example_TIP_sham.bam.bai
    │   ├── Vignette_example_TIP_sham_junctions.bed
    │   ├── example_TIP_MI_whitelist_barcodes.tsv
    │   └── example_TIP_sham_whitelist_barcodes.tsv
├── man
    ├── AggregatePeakCounts.Rd
    ├── AnnotatePeaksFromGTF.Rd
    ├── BaseComposition.Rd
    ├── CountPeaks.Rd
    ├── DUTest.Rd
    ├── DetectAEU.Rd
    ├── DetectUTRLengthShift.Rd
    ├── FindPeaks.Rd
    ├── GetExpressedPeaks.Rd
    ├── GetRelativeExpression.Rd
    ├── MergePeakCoordinates.Rd
    ├── NewPeakSCE.Rd
    ├── NewPeakSeurat.Rd
    ├── PeakSeuratFromTransfer.Rd
    ├── PlotCoverage.Rd
    ├── PlotRelativeExpressionBox.Rd
    ├── PlotRelativeExpressionTSNE.Rd
    ├── PlotRelativeExpressionUMAP.Rd
    ├── PlotRelativeExpressionViolin.Rd
    ├── PlotUTRLengthShift.Rd
    ├── ReadPeakCounts.Rd
    ├── SelectGenePeaks.Rd
    ├── SplitBam.Rd
    ├── annotate_gr_from_gtf.Rd
    ├── apply_DEXSeq_test_sce.Rd
    ├── apply_DEXSeq_test_seurat.Rd
    ├── do_arrow_plot.Rd
    ├── fit_gaussian.Rd
    ├── geneToGR.Rd
    ├── gene_Labels.Rd
    ├── generate_merged_peak_table.Rd
    ├── generate_self_merged_peaks.Rd
    ├── generate_self_similarity_table.Rd
    ├── generate_similarity_table.Rd
    ├── get_expressed_peaks_sce.Rd
    ├── get_expressed_peaks_seurat.Rd
    ├── get_relative_expression_sce.Rd
    ├── get_relative_expression_seurat.Rd
    ├── make_exons.Rd
    ├── make_reference.Rd
    ├── merge_bam_coverage.Rd
    ├── relative_location.Rd
    └── rle_to_WIG.Rd
└── vignettes
    ├── Cxcl12_coverage_annotated.png
    ├── DimPlot.png
    ├── PlotCoverage_CXCL12.png
    ├── PlotRelativeExpressionBox.png
    ├── PlotRelativeExpressionTSNE.png
    ├── PlotRelativeExpressionViolin.png
    ├── Seurat.FeaturePlot.png
    ├── Sierra_vignette.rmd
    └── UTRLengthPlot.png


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | doc
6 | Meta
7 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: Sierra
 2 | Type: Package
 3 | Title: PolyA counting and differential transcript usage analysis for scRNA-seq data
 4 | Version: 0.99.27
 5 | Authors@R: c(person("Ralph", "Patrick", role = c("aut", "ctb"), email = "r.patrick@victorchang.edu.au"),
 6 |              person("David", "Humphreys", role = c("aut", "cre"), email = "d.humphreys@victorchang.edu.au"),
 7 |              person("Kitty", "Lo", role = c("aut", "ctb"), email = "kitty.lo@gmail.com"))
 8 | Description: Sierra is a pipeline for detecting differential transcript usage from barcoded polyA-enriched scRNA-seq experiments
 9 | License: file LICENSE
10 | Encoding: UTF-8
11 | LazyData: true
12 | biocViews: Software,Transcriptomics, SingleCell, RNASeq, GeneExpression, Sequencing, Visualization, StatisticalMethod, TranscriptomeVariant 
13 | Imports:
14 |     GenomicRanges,
15 |     GenomicAlignments,
16 |     reshape2,
17 |     S4Vectors,
18 |     plyr,
19 |     dplyr,
20 |     utils,
21 |     foreach,
22 |     parallel,
23 |     doParallel,
24 |     Matrix,
25 |     MLmetrics,
26 |     progress,
27 |     ggplot2,
28 |     cowplot,
29 |     Gviz,
30 |     BiocStyle,
31 |     SingleCellExperiment,
32 |     DEXSeq,
33 |     flock,
34 |     magrittr,
35 |     R.utils,
36 |     BSgenome, BiocGenerics, BiocParallel, Biostrings, GenomeInfoDb,
37 |     GenomicFeatures, IRanges, Rsamtools, SummarizedExperiment,
38 |     data.table, genefilter, rtracklayer, scales
39 | RoxygenNote: 7.1.1
40 | Suggests: 
41 |     Seurat,
42 |     knitr,
43 |     gggenes,
44 |     rmarkdown,
45 |     BSgenome.Mmusculus.UCSC.mm10
46 | VignetteBuilder: knitr
47 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(AggregatePeakCounts)
 4 | export(AnnotatePeaksFromGTF)
 5 | export(BaseComposition)
 6 | export(CountPeaks)
 7 | export(DUTest)
 8 | export(DetectAEU)
 9 | export(DetectUTRLengthShift)
10 | export(FindPeaks)
11 | export(GetExpressedPeaks)
12 | export(GetRelativeExpression)
13 | export(MergePeakCoordinates)
14 | export(NewPeakSCE)
15 | export(NewPeakSeurat)
16 | export(PeakSeuratFromTransfer)
17 | export(PlotCoverage)
18 | export(PlotRelativeExpressionBox)
19 | export(PlotRelativeExpressionTSNE)
20 | export(PlotRelativeExpressionUMAP)
21 | export(PlotRelativeExpressionViolin)
22 | export(PlotUTRLengthShift)
23 | export(ReadPeakCounts)
24 | export(SelectGenePeaks)
25 | export(SplitBam)
26 | export(annotate_gr_from_gtf)
27 | export(geneToGR)
28 | import(GenomicRanges)
29 | import(Gviz)
30 | import(SingleCellExperiment)
31 | import(dplyr)
32 | import(ggplot2)
33 | import(utils)
34 | importFrom(Matrix,writeMM)
35 | importFrom(foreach,"%dopar%")
36 | importFrom(genefilter,plot)
37 | importFrom(magrittr,"%>%")
38 | 


--------------------------------------------------------------------------------
/R/data_util.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | ################################################
  4 | #'
  5 | #' Read in peak data saved in MEX format
  6 | #'
  7 | #' Read in peak data saved in MEX format. Files can be in a gzipped (.gz) format. 
  8 | #'
  9 | #' @param data.dir directory where output from CountPeaks is stored
 10 | #' @param mm.file count matrix in MEX format
 11 | #' @param barcodes.file file containing cell barcodes corresponding to columns in the matrix
 12 | #' @param sites.file file containing peak coordinate names corresponding to rows in the matrix
 13 | #' @return a sparseMatrix
 14 | #' @examples
 15 | #' # Following commands can be used to generate a new random sample data set
 16 | #' # barcode_seq <- stringi::stri_rand_strings(12,14,pattern="[ACTG]")
 17 | #' # barcode_seq <- paste0(barcode_seq,"-1")
 18 | #' # Below is hard coded example
 19 | #' 
 20 | #' barcode_seq <- c("TCCCAGTACTGGGC-1", "CCAGAGAAAAACTT-1", "CGATAGGGGTAACA-1", 
 21 | #' "GGCGGATGGAGATT-1", "ATCAGTACATCTAT-1", "TTTCCCGTACCACA-1", "TTGTGTACGGGATG-1", 
 22 | #' "CAGGGCATAGTCTA-1", "GCTCTTTGGCTGAG-1", "AGTCGTATCACTAA-1", "CGGTTGGCTGGTAT-1", 
 23 | #' "TGACCTGGAGCTGC-1")
 24 | #' 
 25 | #' # Note: siteNames could be genes
 26 | #' siteNames <- cbind( paste0("Gene_",letters[1:12]))
 27 | #'                  
 28 | #'  # For this working example set site_names to be peak coordinates                
 29 | #' siteNames <- c("Sash1:10:8722219-8722812:-1", "Sash1:10:8813689-8814157:-1", 
 30 | #'              "Lamp2:X:38419489-38419901:-1", "Lamp2:X:38405042-38405480:-1", 
 31 | #'              "Lamp2:X:38455818-38456298:-1", "Pecam1:11:106654217-106654585:-1", 
 32 | #'              "Ly6e:15:74958936-74959338:1", "Ly6e:15:74956076-74956512:1", 
 33 | #'              "Pnkd:1:74285960-74287456:1", "Pdgfra:5:75197715-75198215:1", 
 34 | #'              "Dlc1:8:36567751-36568049:-1", "Dlc1:8:36568379-36568865:-1")
 35 | #' 
 36 | #' # Randomly generate a matrix that contains a bunch of zeros.
 37 | #' # Columns are cells, rows are 
 38 | #' matrix_A <- matrix(round(rexp(144,rate = 1),digits = 0), nrow = 12,ncol = 12)
 39 | #' matrix_B <- matrix(round(rexp(144,rate = 0.7),digits = 0), nrow = 12,ncol = 12)
 40 | #' matrix_mtx <- matrix_A * matrix_B
 41 | #' matrix_mtx <- Matrix::Matrix(matrix_mtx, sparse=TRUE)
 42 | #' 
 43 | #' # Save example to appropriate named files in temporary location
 44 | #' data.dir <- tempdir()
 45 | #' barcodes.file <- paste0(data.dir,"/barcodes.tsv")
 46 | #' writeLines(barcode_seq, barcodes.file)
 47 | #' mm.file <- paste0(data.dir,"/matrix.mtx")
 48 | #' Matrix::writeMM(matrix_mtx, mm.file)
 49 | #' sites.file <- paste0(data.dir,"/sitenames.tsv")
 50 | #' writeLines(siteNames,sites.file)
 51 | #' 
 52 | #' # Now read in using Sierra ReadPeakCounts by passing just directory name
 53 | #' count.matrix <- Sierra::ReadPeakCounts(data.dir=data.dir)  
 54 | #' 
 55 | #' # Or by passing full length file names
 56 | #' count.matrix <- Sierra::ReadPeakCounts(barcodes.file=barcodes.file, mm.file=mm.file, sites.file=sites.file)   
 57 | #'  
 58 | #'  
 59 | #' @export
 60 | #'
 61 | ReadPeakCounts <- function(data.dir = NULL, 
 62 |                            mm.file = NULL, 
 63 |                            barcodes.file = NULL, 
 64 |                            sites.file = NULL) {
 65 | 
 66 |   if (is.null(data.dir) & is.null(mm.file) & is.null(barcodes.file) & is.null(sites.file)) {
 67 |     stop("Please provide either a directory or file names.")
 68 |   }
 69 |   
 70 |   if (!is.null(data.dir)) {
 71 |     
 72 |     ## First check if files are compressed
 73 |     file.list <- list.files(data.dir)
 74 |     if (sum(endsWith(file.list, ".gz")) == 3) {
 75 |       gzipped = TRUE
 76 |     } else{
 77 |       gzipped = FALSE
 78 |     }
 79 |     
 80 |     if (gzipped) {
 81 |       mm.file <- paste0(data.dir, "/matrix.mtx.gz")
 82 |       barcodes.file <- paste0(data.dir, "/barcodes.tsv.gz")
 83 |       sites.file <- paste0(data.dir, "/sitenames.tsv.gz")
 84 |     } else {
 85 |       mm.file <- paste0(data.dir, "/matrix.mtx")
 86 |       barcodes.file <- paste0(data.dir, "/barcodes.tsv")
 87 |       sites.file <- paste0(data.dir, "/sitenames.tsv")
 88 |     }
 89 |     
 90 |   } else{
 91 |     
 92 |     if (is.null(mm.file) | is.null(barcodes.file) | is.null(sites.file)) {
 93 |       stop("No directory provided, but an input file appears to be missing. Please check.")
 94 |     }
 95 |     
 96 |     ## Individual files provided - check if compressed
 97 |     file.list <- c(mm.file, barcodes.file, sites.file)
 98 |     
 99 |     if (sum(endsWith(file.list, ".gz")) == 3) {
100 |       gzipped = TRUE
101 |     } else{
102 |       gzipped = FALSE
103 |     }
104 |   }
105 |   
106 |   if (gzipped) {
107 |     count.mat <- Matrix::readMM(gzfile(mm.file))
108 |     
109 |     barcodes.con <- gzfile(barcodes.file)
110 |     barcodes <- readLines(barcodes.con)
111 |     close(barcodes.con)
112 |     
113 |     peaks.con <- gzfile(sites.file)
114 |     peaks <- readLines(peaks.con)
115 |     close(barcodes.con)
116 |   } else {
117 |     count.mat <- Matrix::readMM(mm.file)
118 |     
119 |     barcodes <- readLines(barcodes.file)
120 |     peaks <- readLines(sites.file)
121 |   }
122 |   
123 | 
124 |   colnames(count.mat) <- barcodes
125 |   rownames(count.mat) <- peaks
126 | 
127 |   return(count.mat)
128 | }
129 | 
130 | 
131 | ################################################
132 | #'
133 | #' Create a peak count Seurat object using a gene-level object
134 | #'
135 | #' Creates a new peak Seurat object, importing information on clustering and dimensionality reduction,
136 | #' such as t-SNE and UMAP coordinates, from a Seurat object that has been processed at the gene level.
137 | #'
138 | #' @param peak.data matrix of peak counts
139 | #' @param genes.seurat a Seurat object
140 | #' @param annot.info peak annotation information
141 | #' @param project.name project name passed to the Seurat object creation
142 | #' @param min.cells minimum number of cells for retaining a peak
143 | #' @param min.peaks minimum number of peaks for retaining a cell
144 | #' @param norm.scale.factor scale factor for Seurat NormalizeData function
145 | #' @param filter.gene.mismatch whether to filter out peaks with ambiguous gene mappings
146 | #'
147 | #' @return a new peak-level Seurat object
148 | #'
149 | #' @examples
150 | #'      
151 | #' ## Load example data for two peaks from the Cxcl12 gene
152 | #' extdata_path <- system.file("extdata",package = "Sierra")
153 | #' load(paste0(extdata_path, "/Cxcl12_example.RData"))
154 | #' load(paste0(extdata_path, "/TIP_cell_info.RData"))
155 | #' 
156 | #' ## Create an seurat object holding the peak data
157 | #' peaks.seurat <- NewPeakSeurat(peak.data = peak.counts, 
158 | #'                         annot.info = peak.annotations, 
159 | #'                         cell.idents = tip.populations, 
160 | #'                         tsne.coords = tip.tsne.coordinates,
161 | #'                         min.cells = 0, min.peaks = 0)
162 | #'                         
163 | #' ##                         
164 | #' peaks.seurat.transfer <- PeakSeuratFromTransfer(peak.data = peak.counts, 
165 | #'                         genes.seurat = peaks.seurat, 
166 | #'                         annot.info = peak.annotations)
167 | #'      
168 | #'
169 | #' @export
170 | #'
171 | PeakSeuratFromTransfer <- function(peak.data, 
172 |                                    genes.seurat, 
173 |                                    annot.info, 
174 |                                    project.name = "PolyA",
175 |                                    min.cells = 10, 
176 |                                    min.peaks = 200,
177 |                                    norm.scale.factor = 10000,
178 |                                    filter.gene.mismatch = TRUE) {
179 | 
180 |   if (packageVersion("Seurat") < '3.0.0') {
181 |     stop("Seurat 3.0.0 or above is required for this function. Either upgrage or see ?NewPeakSCE")
182 |   }
183 | 
184 |   # remove any cells not in the gene-level object
185 |   cells.keep <- intersect(colnames(peak.data), colnames(genes.seurat))
186 |   length(cells.keep)
187 |   if (length(cells.keep) == 0) {
188 |     stop("No cells overlapping with the Seurat object - please check that cell barcodes are matching.")
189 |   }
190 | 
191 |   peak.data <- peak.data[, cells.keep]
192 | 
193 |   peaks.seurat <- NewPeakSeurat(peak.data = peak.data, 
194 |                                 annot.info = annot.info,
195 |                                 project.name = project.name, 
196 |                                 min.cells = min.cells,
197 |                                 min.peaks = min.peaks, 
198 |                                 norm.scale.factor = norm.scale.factor,
199 |                                 filter.gene.mismatch = filter.gene.mismatch,
200 |                                 verbose = FALSE)
201 | 
202 |   ## Add cluster identities to peak Seurat object
203 |   cells.overlap <- intersect(colnames(peaks.seurat), colnames(genes.seurat))
204 |   clusters.overlap <- Seurat::Idents(genes.seurat)[cells.overlap]
205 |   clusters.overlap <- clusters.overlap[colnames(peaks.seurat)]
206 |   peaks.seurat <- Seurat::AddMetaData(object = peaks.seurat, metadata = clusters.overlap, col.name = "geneLvlID")
207 |   Seurat::Idents(peaks.seurat) <- peaks.seurat@meta.data$geneLvlID
208 | 
209 |   ## Add t-SNE coordinates to peak count object
210 |   tryCatch({
211 |     tsne.embeddings <- Seurat::Embeddings(genes.seurat, reduction = 'tsne')
212 |     tsne.embeddings <- tsne.embeddings[colnames(peaks.seurat), ]
213 |     new.embedding <- Seurat::CreateDimReducObject(embeddings = tsne.embeddings, key = "tSNE_", assay = "RNA")
214 |     peaks.seurat@reductions$tsne <- new.embedding
215 |     print("t-SNE coordinates added")
216 |   }, error = function(err) {
217 |     print("No t-SNE coodinates detected")
218 |   })
219 | 
220 |   ## Add UMAP coordinates to peak count object
221 |   tryCatch({
222 |     umap.embeddings <- Seurat::Embeddings(genes.seurat, reduction = 'umap')
223 |     umap.embeddings <- umap.embeddings[colnames(peaks.seurat), ]
224 |     new.embedding <- Seurat::CreateDimReducObject(embeddings = umap.embeddings, key = "UMAP_", assay = "RNA")
225 |     peaks.seurat@reductions$umap = new.embedding
226 |     print("UMAP coordinates added")
227 |   }, error = function(err) {
228 |     print("No UMAP coordinates detected")
229 |   })
230 | 
231 |   return(peaks.seurat)
232 | }
233 | 
234 | 
235 | 
236 | ################################################
237 | #'
238 | #' Create a new peak-level Seurat object from the peak counts
239 | #'
240 | #' Creates a new peak-level Seurat object from the peak counts and annotation table
241 | #'
242 | #' @param peak.data matrix of peak counts
243 | #' @param annot.info peak annotation information
244 | #' @param project.name project name passed to the Seurat object creation
245 | #' @param cell.idents a list of cell identities (optional)
246 | #' @param tsne.coords a data-frame of t-SNE coordinates (optional)
247 | #' @param umap.coords a data-frame of UMAP coordinates (optional)
248 | #' @param min.cells minimum number of cells for retaining a peak
249 | #' @param min.peaks minimum number of peaks for retaining a cell
250 | #' @param norm.scale.factor scale factor for Seurat NormalizeData function
251 | #' @param filter.gene.mismatch whether to filter out peaks with ambiguous gene mappings
252 | #' @param verbose whether to print output 
253 | #'
254 | #' @return a new peak-level Seurat object
255 | #'
256 | #' @examples
257 | #'                              
258 | #' ## Load example data for two peaks from the Cxcl12 gene
259 | #' extdata_path <- system.file("extdata",package = "Sierra")
260 | #' load(paste0(extdata_path, "/Cxcl12_example.RData"))
261 | #' load(paste0(extdata_path, "/TIP_cell_info.RData"))
262 | #' 
263 | #' ## Create an Seurat object holding the peak data
264 | #' peaks.seurat <- NewPeakSeurat(peak.data = peak.counts, 
265 | #'                         annot.info = peak.annotations, 
266 | #'                         cell.idents = tip.populations, 
267 | #'                         tsne.coords = tip.tsne.coordinates,
268 | #'                         min.cells = 0, min.peaks = 0)
269 | #'                              
270 | #' 
271 | #' @export
272 | #'
273 | NewPeakSeurat <- function(peak.data, annot.info, project.name = "PolyA", cell.idents = NULL,
274 |                           tsne.coords = NULL, umap.coords = NULL, min.cells = 10,
275 |                           min.peaks = 200, norm.scale.factor = 10000, 
276 |                           filter.gene.mismatch = TRUE, verbose = TRUE) {
277 | 
278 |   if (packageVersion("Seurat") < '3.0.0') {
279 |     stop("Seurat 3.0.0 or above is required for this function. Either upgrage or see ?NewPeakSCE")
280 |   }
281 | 
282 |   if (filter.gene.mismatch) {
283 |     ## Check that gene names according to the peak calling match gene
284 |     ## names according to feature annotation - remove any discrepancies
285 |     peak.gene.names = sub("(.*).*:.*:.*-.*:.*", "\\1", rownames(annot.info))
286 |     annot.info$peak_assigned_gene <- peak.gene.names
287 |     annot.info$gene_coverage = annot.info$gene_id
288 |     
289 |     #assigned.genes = strsplit(annot.info$gene_id[1], split = ',')[[1]]
290 |     #annot.info <- subset(annot.info, seqnames =="M")
291 |     gene.checks = apply(annot.info, 1, function(x) {
292 |       peak.gene = as.character(x["peak_assigned_gene"])
293 |       
294 |       gene.cov <- as.character(x["gene_coverage"])
295 |       
296 |       if (gene.cov != "") {
297 |         gene.cov = strsplit(as.character(x["gene_coverage"]), split = ',')[[1]]
298 |         if (length(gene.cov) > 1) {
299 |           sum.diff <- sum(gene.cov != peak.gene)
300 |         } else {
301 |           sum.diff <- ifelse(gene.cov == peak.gene, 0, 1)
302 |         }
303 |       } else {
304 |         sum.diff <- 0
305 |       }
306 |       #print(x)
307 |       if (sum.diff > 0) {
308 |         return(FALSE)
309 |       } else {
310 |         return(TRUE)
311 |       }
312 |     })
313 |     peaks.keep = names(gene.checks[which(gene.checks == TRUE)])
314 |     annot.info = annot.info[peaks.keep, ]
315 |     
316 |   }
317 |   
318 |   ## Peak names to add to the Seurat object
319 |   annot.peaks <- rownames(annot.info)
320 | 
321 |   ## Check if there are annotations for peaks
322 |   peaks.use <- intersect(rownames(peak.data), annot.peaks)
323 |   peak.data <- peak.data[peaks.use, ]
324 | 
325 |   print(paste("Creating Seurat object with", nrow(peak.data), "peaks and", ncol(peak.data), "cells"))
326 | 
327 |   ## Create a Seurat object for polyA counts
328 |   peaks.seurat <- Seurat::CreateSeuratObject(peak.data, min.cells = min.cells, min.features = min.peaks, project = project.name)
329 | 
330 |   ## Add cell annotation information if provided
331 |   if (!is.null(cell.idents)) {
332 |     cell.data <- data.frame(CellIdent = cell.idents)
333 |     peaks.seurat <- Seurat::AddMetaData(peaks.seurat, cell.data, "CellIdent")
334 |     Seurat::Idents(peaks.seurat) <- peaks.seurat$CellIdent
335 |   }
336 | 
337 |   ## Add peak annotations to the Seurat object
338 |   annot.info <- as.data.frame(annot.info, stringsAsFactors = FALSE)
339 |   peaks.use <- intersect(annot.peaks, rownames(Seurat::GetAssayData(peaks.seurat)))
340 |   annot.info <- annot.info[peaks.use, ]
341 |   feature.names <- c("UTR3", "UTR5", "intron", "exon")
342 |   feature.mat <- annot.info[peaks.use, feature.names]
343 | 
344 |   features.collapsed <- apply(feature.mat, 1, function(x) {
345 |     paste(feature.names[which(x == "YES")], collapse = ";")})
346 | 
347 |   feature.mat$FeaturesCollapsed <- features.collapsed
348 |   feature.mat$Gene_name <- annot.info$gene_id
349 |   feature.mat$start <- annot.info$start
350 |   feature.mat$end <- annot.info$end
351 |   feature.mat$chr <- annot.info$seqnames
352 |   feature.mat$strand <- annot.info$strand
353 | 
354 |   if (!is.null(annot.info$pA_motif)) {
355 |     feature.mat$pA_motif <- annot.info$pA_motif
356 |     feature.mat$pA_stretch <- annot.info$pA_stretch
357 |   } else {
358 |     warning("Motif information not found in annotation data - some Sierra functions will be unavailable.")
359 |   }
360 |   
361 |   feature.mat$Junctions <- annot.info$Junctions
362 | 
363 |   ## Add additional peak IDs for input to DEXSeq
364 |   print("Preparing feature table for DEXSeq")
365 |   gene.set <- unique(as.character(feature.mat$Gene_name))
366 |   dexseq.feature.table <- c()
367 |   for (this.gene in gene.set) {
368 | 
369 |     ## collect peaks
370 |     peak.subset <- subset(feature.mat, Gene_name == this.gene)
371 |     peak.subset <- peak.subset[order(peak.subset$start, decreasing = FALSE), ]
372 | 
373 |     transcript.names <- paste0('transcripts "', rownames(peak.subset), '"')
374 |     gene.ids <- paste0('gene_id "', peak.subset$gene_id, '"')
375 |     exonic.part.numbers <- paste0('exonic_part_number "', 1:nrow(peak.subset), '"')
376 |     info.part <- paste(transcript.names, exonic.part.numbers, gene.ids, sep = "; ")
377 | 
378 |     dexseq.feature.set <- data.frame(Gene_name = peak.subset$Gene_name,
379 |                                      Gene_part = paste0(peak.subset$Gene_name, ":", 1:nrow(peak.subset)),
380 |                                      Peak_number = paste0("Peak", 1:nrow(peak.subset)),
381 |                                      Peak_name = rownames(peak.subset), stringsAsFactors = FALSE)
382 |     rownames(dexseq.feature.set) <- dexseq.feature.set$Peak_name
383 | 
384 |     dexseq.feature.table <- rbind(dexseq.feature.table, dexseq.feature.set)
385 |   }
386 |   dexseq.feature.table <- dexseq.feature.table[rownames(feature.mat), ]
387 | 
388 |   feature.mat$Gene_part <- dexseq.feature.table$Gene_part
389 |   feature.mat$Peak_number <- dexseq.feature.table$Peak_number
390 | 
391 |   ## Store the data in the Seurat @tool slot
392 |   feature.mat.input <- list(feature.mat)
393 |   names(feature.mat.input) <- "Sierra"
394 |   peaks.seurat@tools <- feature.mat.input
395 | 
396 |   ## Normalise and calculate highly-variable genes
397 |   peaks.seurat <- Seurat::NormalizeData(object = peaks.seurat, normalization.method = "LogNormalize",
398 |                               scale.factor = norm.scale.factor)
399 | 
400 |   ## Add t-SNE coordinates to peak count object
401 |   if (!is.null(tsne.coords)) {
402 |     tsne.coords <- tsne.coords[colnames(peaks.seurat), ]
403 |     new.embedding <- Seurat::CreateDimReducObject(embeddings = tsne.coords, key = "tSNE_", assay = "RNA")
404 |     peaks.seurat@reductions$tsne <- new.embedding
405 |     if (verbose) print("t-SNE coordinates added")
406 |   } else {
407 |     if (verbose) print("No t-SNE coodinates included")
408 |   }
409 | 
410 |   ## Add UMAP coordinates to peak count object
411 |   if (!is.null(umap.coords)) {
412 |     umap.coords <- umap.coords[colnames(peaks.seurat), ]
413 |     new.embedding <- Seurat::CreateDimReducObject(embeddings = umap.coords, key = "UMAP_", assay = "RNA")
414 |     peaks.seurat@reductions$umap = new.embedding
415 |     if (verbose) print("UMAP coordinates added")
416 |   } else {
417 |     if (verbose) print("No UMAP coordinates included")
418 |   }
419 | 
420 |   return(peaks.seurat)
421 | }
422 | 
423 | 
424 | 
425 | ################################################
426 | #'
427 | #' Create a new peak-counts single-cell experiment object from the peak counts
428 | #'
429 | #' Creates a new peak-counts single-cell experiment object from the peak counts and annotation table
430 | #'
431 | #' @param peak.data matrix of peak counts
432 | #' @param annot.info peak annotation information
433 | #' @param cell.idents named list of cell identities to be used for DU analysis
434 | #' @param tsne.coords data-frame of t-SNE coordinates. Rownames should correspond to cell names.
435 | #' @param umap.coords data-frame of UMAP coordinates. Rownames should correspond to cell names.
436 | #' @param min.cells minimum number of cells for retaining a peak
437 | #' @param min.peaks minimum number of peaks for retaining a cell
438 | #' @param norm.scale.factor scale factor for log normalisation  function
439 | #' @param filter.gene.mismatch whether to filter out peaks with ambiguous gene mappings
440 | #' @param verbose whether to print output 
441 | #'
442 | #' @return a new peak-level SCE object
443 | #'
444 | #' @examples
445 | #' 
446 | #' 
447 | #'  ## Load example data for two peaks from the Cxcl12 gene
448 | #' extdata_path <- system.file("extdata",package = "Sierra")
449 | #' load(paste0(extdata_path, "/Cxcl12_example.RData"))
450 | #' load(paste0(extdata_path, "/TIP_cell_info.RData"))
451 | #' 
452 | #' ## Create an SCE object holding the peak data
453 | #' peaks.sce <- NewPeakSCE(peak.data = peak.counts, 
454 | #'                         annot.info = peak.annotations, 
455 | #'                         cell.idents = tip.populations, 
456 | #'                         tsne.coords = tip.tsne.coordinates,
457 | #'                         min.cells = 0, min.peaks = 0)
458 | #' @export
459 | #'
460 | #' @import SingleCellExperiment
461 | #'
462 | NewPeakSCE <- function(peak.data, annot.info, cell.idents = NULL, 
463 |                        tsne.coords = NULL, umap.coords = NULL,
464 |                        min.cells = 10, min.peaks = 200, norm.scale.factor = 10000, 
465 |                        filter.gene.mismatch = TRUE, verbose = TRUE) {
466 | 
467 |   ## Check that peak.data is of dgCMatrix format
468 |   if ( class(peak.data) != "dgcMatrix" )
469 |     peak.data <- as(peak.data, "dgCMatrix")
470 |   
471 |   if (filter.gene.mismatch) {
472 |     ## Check that gene names according to the peak calling match gene
473 |     ## names according to feature annotation - remove any discrepancies
474 |     peak.gene.names = sub("(.*).*:.*:.*-.*:.*", "\\1", rownames(annot.info))
475 |     annot.info$peak_assigned_gene <- peak.gene.names
476 |     annot.info$gene_coverage = annot.info$gene_id
477 |     
478 |     #assigned.genes = strsplit(annot.info$gene_id[1], split = ',')[[1]]
479 |     #annot.info <- subset(annot.info, seqnames =="M")
480 |     gene.checks = apply(annot.info, 1, function(x) {
481 |       peak.gene = as.character(x["peak_assigned_gene"])
482 |       
483 |       gene.cov <- as.character(x["gene_coverage"])
484 |       
485 |       if (gene.cov != "") {
486 |         gene.cov = strsplit(as.character(x["gene_coverage"]), split = ',')[[1]]
487 |         if (length(gene.cov) > 1) {
488 |           sum.diff <- sum(gene.cov != peak.gene)
489 |         } else {
490 |           sum.diff <- ifelse(gene.cov == peak.gene, 0, 1)
491 |         }
492 |       } else {
493 |         sum.diff <- 0
494 |       }
495 |       #print(x)
496 |       if (sum.diff > 0) {
497 |         return(FALSE)
498 |       } else {
499 |         return(TRUE)
500 |       }
501 |     })
502 |     peaks.keep = names(gene.checks[which(gene.checks == TRUE)])
503 |     annot.info = annot.info[peaks.keep, ]
504 |   }
505 | 
506 |   ## Read in annotations to add to the SCE object
507 |   annot.peaks = rownames(annot.info)
508 | 
509 |   ## Check if there are annotations for peaks
510 |   peaks.use = intersect(rownames(peak.data), annot.peaks)
511 |   peak.data = peak.data[peaks.use, ]
512 | 
513 |   if(verbose) print(paste("Creating SCE object with", nrow(peak.data), "peaks and", ncol(peak.data), "cells"))
514 | 
515 |   ## filter peaks and cells
516 |   #rows.keep <- which(rowSums(peak.data > 0) >= min.cells)
517 |   nz.row.counts <- tabulate(peak.data@i + 1, nbins = nrow(peak.data))
518 |   peaks.keep <- rownames(peak.data)[which(nz.row.counts >= min.cells)]
519 | 
520 |   nz.col.counts <- diff(peak.data@p)
521 |   cells.keep <- colnames(peak.data)[which(nz.col.counts >= min.peaks)]
522 | 
523 |   ## filter the matrix and corresponding cell identities
524 |   peak.data <- peak.data[peaks.keep, cells.keep]
525 | 
526 |   if (!is.null(cell.idents)) {
527 |     cell.idents <- cell.idents[cells.keep]
528 |   }
529 | 
530 |   ## create a log-normalised matrix
531 |   if(verbose) print("Log-normalising data")
532 |   peak.data.norm <- peak.data
533 |   peak.data.norm@x <- peak.data.norm@x / rep.int(Matrix::colSums(peak.data.norm), diff(peak.data.norm@p))
534 |   peak.data.norm <- peak.data.norm * norm.scale.factor
535 |   peak.data.norm@x <- log(peak.data.norm@x + 1)
536 | 
537 |   dim.reductions.list <- S4Vectors::SimpleList()
538 | 
539 |   ## check if t-SNE/UMAP coordinates have been provided
540 |   if (!is.null(tsne.coords)) {
541 |     tsne.coords <- tsne.coords[cells.keep, ]
542 |     dim.reductions.list[['tsne']] <- tsne.coords
543 |   }
544 |   if (!is.null(umap.coords)) {
545 |     umap.coords <- umap.coords[cells.keep, ]
546 |     dim.reductions.list[['umap']] <- umap.coords
547 |   }
548 | 
549 |   ## Create an SCE object for peak counts
550 |   #peaks.sce <- SingleCellExperiment::SingleCellExperiment(assays = list(counts = peak.data,
551 |   #                                                                      lnorm_counts = peak.data.norm),
552 |   #                                                        reducedDims = dim.reductions.list)
553 |   
554 |   peaks.sce <- SingleCellExperiment::SingleCellExperiment(assays = list(counts = peak.data,
555 |                                                                         logcounts = peak.data.norm),
556 |                                                           reducedDims = dim.reductions.list)
557 | 
558 |   ## Add peak annotations to the SCE object
559 |   annot.info = as.data.frame(annot.info, stringsAsFactors = FALSE)
560 | 
561 |   peaks.use = intersect(annot.peaks, rownames(peaks.sce))
562 |   annot.info = annot.info[peaks.use, ]
563 |   feature.names = c("UTR3", "UTR5", "intron", "exon")
564 |   feature.mat = annot.info[peaks.use, feature.names]
565 | 
566 |   features.collapsed = apply(feature.mat, 1, function(x) {
567 |     paste(feature.names[which(x == "YES")], collapse = ";")})
568 | 
569 |   feature.mat$FeaturesCollapsed = features.collapsed
570 |   feature.mat$Gene_name = annot.info$gene_id
571 |   feature.mat$start = annot.info$start
572 |   feature.mat$end = annot.info$end
573 |   feature.mat$chr = annot.info$seqnames
574 |   feature.mat$strand = annot.info$strand
575 | 
576 |   if (!is.null(annot.info$pA_motif)) {
577 |     feature.mat$pA_motif <- annot.info$pA_motif
578 |     feature.mat$pA_stretch <- annot.info$pA_stretch
579 |   } else {
580 |     warning("Motif information not found in annotation data - some Sierra functions will be unavailable.")
581 |   }
582 |   
583 |   feature.mat$Junctions <- annot.info$Junctions
584 | 
585 |   ## Add additional peak IDs for input to DEXSeq
586 |   if (verbose) print("Preparing feature table for DEXSeq")
587 |   gene.set = unique(as.character(feature.mat$Gene_name))
588 |   dexseq.feature.table = c()
589 |   for (this.gene in gene.set) {
590 | 
591 |     ## collect peaks
592 |     peak.subset = subset(feature.mat, Gene_name == this.gene)
593 |     peak.subset = peak.subset[order(peak.subset$start, decreasing = FALSE), ]
594 | 
595 |     transcript.names = paste0('transcripts "', rownames(peak.subset), '"')
596 |     gene.ids = paste0('gene_id "', peak.subset$gene_id, '"')
597 |     exonic.part.numbers = paste0('exonic_part_number "', 1:nrow(peak.subset), '"')
598 |     info.part = paste(transcript.names, exonic.part.numbers, gene.ids, sep = "; ")
599 | 
600 |     dexseq.feature.set = data.frame(Gene_name = peak.subset$Gene_name,
601 |                                     Gene_part = paste0(peak.subset$Gene_name, ":", 1:nrow(peak.subset)),
602 |                                     Peak_number = paste0("Peak", 1:nrow(peak.subset)),
603 |                                     Peak_name = rownames(peak.subset), stringsAsFactors = FALSE)
604 |     rownames(dexseq.feature.set) <- dexseq.feature.set$Peak_name
605 | 
606 |     dexseq.feature.table = rbind(dexseq.feature.table, dexseq.feature.set)
607 |   }
608 |   dexseq.feature.table <- dexseq.feature.table[rownames(feature.mat), ]
609 | 
610 |   feature.mat$Gene_part <- dexseq.feature.table$Gene_part
611 |   feature.mat$Peak_number <- dexseq.feature.table$Peak_number
612 | 
613 |   ## Store the data in the SCE @metadata slot
614 |   peaks.sce@metadata$Sierra <- feature.mat
615 | 
616 |   ## Add cell annotation information
617 |   if (!is.null(cell.idents)) {
618 |     cell.data <- S4Vectors::DataFrame(CellIdent = cell.idents)
619 |     SummarizedExperiment::colData(peaks.sce) <- cell.data
620 |   } else{
621 |     warning("Cell identities not provided. DU testing will not be possible without these")
622 |   }
623 | 
624 |   return(peaks.sce)
625 | }
626 | 
627 | 
628 | ################################################
629 | #'
630 | #' Return peaks associated with a select gene.
631 | #'
632 | #' Returns peaks associated with a select gene.
633 | #'
634 | #' @param peaks.object Peaks SCE or Seurat object.
635 | #' @param gene Gene name
636 | #' @param feature.type type of genomic features to use
637 | #' @return a list of peak IDs
638 | #' @examples
639 | #' 
640 | #' 
641 | #' extdata_path <- system.file("extdata",package = "Sierra")
642 | #' load(paste0(extdata_path, "/Cxcl12_example.RData"))
643 | #' load(paste0(extdata_path, "/TIP_cell_info.RData"))
644 | #' 
645 | #' ## Create an suerat object holding the peak data
646 | #' peaks.seurat <- NewPeakSeurat(peak.data = peak.counts, 
647 | #'                         annot.info = peak.annotations, 
648 | #'                         cell.idents = tip.populations, 
649 | #'                         tsne.coords = tip.tsne.coordinates,
650 | #'                         min.cells = 0, min.peaks = 0)
651 | #' 
652 | #' peak.list <- SelectGenePeaks(peaks.object =  peaks.seurat ,gene = "Cxcl12")
653 | #' 
654 | #' @export
655 | #'
656 | SelectGenePeaks <- function(peaks.object, gene, feature.type = c("UTR3", "UTR5", "exon", "intron")) {
657 | 
658 |   if (class(peaks.object) == "Seurat") {
659 |     annot.subset <- subset(Seurat::Tool(peaks.object, "Sierra"), Gene_name == gene)
660 |     peaks.to.use <- apply(annot.subset, 1, function(x) {
661 |       ifelse(sum(x[feature.type] == "YES") >= 1, TRUE, FALSE)
662 |     })
663 |     annot.subset <- annot.subset[peaks.to.use, ]
664 |     return(rownames(annot.subset))
665 | 
666 |   } else if (class(peaks.object) == "SingleCellExperiment") {
667 |     annot.subset <- subset(peaks.object@metadata$Sierra, Gene_name == gene)
668 |     peaks.to.use <- apply(annot.subset, 1, function(x) {
669 |       ifelse(sum(x[feature.type] == "YES") >= 1, TRUE, FALSE)
670 |     })
671 |     annot.subset <- annot.subset[peaks.to.use, ]
672 |     return(rownames(annot.subset))
673 |   }
674 | }
675 | 
676 | 


--------------------------------------------------------------------------------
/R/split_bams.R:
--------------------------------------------------------------------------------
  1 | 
  2 | #############################################################################
  3 | #' Utility to split a bam file into multiple bam files based on the barcode
  4 | #'
  5 | #' Given a bam file that was processed by CellRanger, splitBam splits the
  6 | #' bam into multiple bam files, one per cell barcode.
  7 | #' Bam file needs to have the barcode stored in the "CB" field.
  8 | #'
  9 | #' @param bam CellRanger outputted bam file with the CB field
 10 | #' @param cellbc.df data frame of the cell barcode, needs to have the column names: "celltype" and "cellbc"
 11 | #' @param outdir directory to output the bam files. The bam files will be called [celltype].bam. If NULL no BAM file created.
 12 | #' @param yieldSize number of lines of bam files to load. Default: 1000000
 13 | #' @param gtf_gr gene model genomic ranges. Only used if geneSymbol is defined.
 14 | #' @param geneSymbol Gene symbol. Used to identify the genomic coordinates to extract reads from.
 15 | #' @param gi_ext The number of nucleotides to extend the genomic interval in extracting reads from (default 50).
 16 | #' @param rle_output If TRUE will generate and return rle_list object
 17 | #' @param exportFastqHeader If TRUE will generate a txt output file that has same prefix as bam file containing fastq header IDs
 18 | #' @param genomicRegion Granges object of genomic region to extract. Only used if geneSymbol not defined.
 19 | #' @param bamTags BAM field tag identifiers to extract. Default is c("CB", "UB").
 20 | #' @param what  What BAM fields to copy into new file. Default is c('qname', 'flag', 'rname', 'strand', 'pos')
 21 | #'
 22 | #' @return a rleList of coverage for each cell type
 23 | #'
 24 | #' @examples
 25 | #' library('Sierra')
 26 | #' 
 27 | #' # Example 1 split the entire BAM file for each cell type
 28 | #' \dontrun{
 29 | #' extdata_path <- system.file("extdata",package = "scpolya")
 30 | #' load(paste(extdata_path,"TIP_vignette_gene_Seurat.RData",sep="/"))
 31 | #' cellbc.df <- data.frame(celltype=genes.seurat@active.ident, 
 32 | #'                         cellbc= names(genes.seurat@active.ident))
 33 | #' bamfile <- c(paste0(extdata_path,"/Vignette_example_TIP_sham.bam")
 34 | #' 
 35 | #' SplitBam(bam, cellbc.df)
 36 | #' }
 37 | #'
 38 | #' # Example 2 extract reads that overlap a gene
 39 | #' 
 40 | #' extdata_path <- system.file("extdata",package = "Sierra")
 41 | #' gtf.file <- paste0(extdata_path,"/Vignette_cellranger_genes_subset.gtf")
 42 | #' gtf.gr <- rtracklayer::import(gtf.file)
 43 | #' 
 44 | #' load(paste(extdata_path,"TIP_vignette_gene_Seurat.RData",sep="/"))
 45 | #' cellbc.df <- data.frame(celltype=genes.seurat@active.ident, 
 46 | #'                        cellbc= names(genes.seurat@active.ident))
 47 | #'   
 48 | #' # Modify cellbc.df so that the barcodes match what is in the BAM file                     
 49 | #' cellbc.df$cellbc <- sub("(.*)-.*", "\\1", cellbc.df$cellbc)
 50 | #' cellbc.df$cellbc <- paste0(cellbc.df$cellbc, "-1")
 51 | #'                        
 52 | #'                        
 53 | #' bam.file <- paste0(extdata_path,"/Vignette_example_TIP_mi.bam")
 54 | #' outdir <-  tempdir()  # change this to a meaningful location
 55 | #' SplitBam(bam.file, cellbc.df, outdir=outdir, gtf_gr=gtf.gr, geneSymbol="Dnajc19")
 56 | #' 
 57 | #'
 58 | #' @export
 59 | #' 
 60 | SplitBam <- function(bam, cellbc.df, outdir=NULL, yieldSize = 1000000,
 61 |                      gtf_gr = NULL, geneSymbol=NULL, gi_ext = 50,
 62 |                      rle_output=FALSE, exportFastqHeader=FALSE, genomicRegion=NULL,
 63 |                      bamTags=c("CB", "UB"),  what=c('qname', 'flag', 'rname', 'strand', 'pos')) {
 64 | 
 65 |   message("splitting bam file: ", bam)
 66 | 
 67 | 
 68 |   if (! is.null(geneSymbol))
 69 |   {
 70 |     # Need check that gene_name field exists
 71 |     idx <-which(gtf_gr$gene_name == geneSymbol)
 72 |     if (length(idx) == 0)
 73 |     { warning("Could not find gene name. Please check spelling (and case)")
 74 |       return(NULL)
 75 |     }
 76 |     sls <-  GenomeInfoDb::seqlevelsStyle(Rsamtools::BamFile(bam))
 77 |     if (length(intersect(sls,"UCSC")) == 1)
 78 |       GenomeInfoDb::seqlevelsStyle(gtf_gr) <- 'UCSC'
 79 |     else
 80 |       GenomeInfoDb::seqlevelsStyle(gtf_gr) <- "NCBI"
 81 | 
 82 |     # Work out the genomic range to extract from
 83 |     start <- min(start(ranges(gtf_gr[idx])))
 84 |     end <- max(end(ranges(gtf_gr[idx])))
 85 |     chrom <- as.character(GenomicRanges::seqnames(gtf_gr[idx]))[1]  # should I check that all returned chromosomes are the same?
 86 |     gene_strand <- as.character(strand(gtf_gr[idx]))[1]
 87 |     toExtract_gr <- GenomicRanges::GRanges(seqnames=chrom, ranges=IRanges::IRanges(start-gi_ext , width=end-start+gi_ext), strand=gene_strand)
 88 |     param <- Rsamtools::ScanBamParam(tag=bamTags, which = toExtract_gr, what=what)
 89 |     gene.provided <- geneSymbol
 90 |   }
 91 |   else if (! is.null(genomicRegion))
 92 |   {
 93 |     tryCatch({
 94 |       param <- Rsamtools::ScanBamParam(tag=bamTags, which = genomicRegion, what=what)
 95 |       geneSymbol <- gsub(pattern = ":",replacement = "_",x = as.character(genomicRegion))
 96 |     }, error = function(err) {
 97 |       stop(paste0("Problem detected with provided genomic region. Please ensure genomicRegion is a Granges object"))
 98 |       })
 99 |   }
100 |   else
101 |   {
102 | 
103 |     param <- Rsamtools::ScanBamParam(tag=bamTags, what=what)
104 |     geneSymbol <- "all"   # This will be incorporated into filename
105 |     gene.provided <- NULL
106 |   }
107 | 
108 |    cov_rle <- IRanges::RleList(compress=FALSE)     # Coverage list (i.e wig like). Populated for each cell type
109 | 
110 |    ctypes <- unique(cellbc.df$celltype)
111 |    print(ctypes)
112 |    for(eachtype in ctypes) {
113 |       message("processing cell type ", eachtype)
114 |       aln.per.type <- NULL
115 |       cellbc <- subset(cellbc.df, celltype == eachtype)$cellbc
116 |       bamfile <- Rsamtools::BamFile(bam, yieldSize=yieldSize)
117 |       open(bamfile)
118 |       while (length(chunk0 <- GenomicAlignments::readGAlignments(bamfile,param=param))) {
119 |         if (! is.null(gene.provided))
120 |         { # Only want to reads that are same strand as gene
121 |           idx <- which(as.character(strand(chunk0)) == gene_strand)
122 |           chunk0 <- chunk0[idx]
123 |         }
124 |         cat("chunk0:", length(chunk0), "length of aln: ", length(aln.per.type), "\n")
125 | 
126 |         if (length(aln.per.type) == 0)
127 |           aln.per.type <- chunk0[which(S4Vectors::mcols(chunk0)$CB %in% cellbc)]
128 |         else
129 |           aln.per.type <- c(aln.per.type, chunk0[which(S4Vectors::mcols(chunk0)$CB %in% cellbc)])
130 |       } # read in yieldSize number of records per iteration
131 |       close(bamfile)
132 | 
133 |       outfile <- ''
134 |       if (length(aln.per.type) == 0) {
135 |         message("No data found for ", eachtype)
136 |       }
137 |       else if (is.null(outdir)){
138 |         message(eachtype)
139 |       }
140 |       else {
141 |         outfile <- paste0(outdir, eachtype, ".", geneSymbol,".bam")
142 |         message("Writing to ", outfile)
143 | 
144 |        # as(aln.per.type, "GAlignments")
145 |         rtracklayer::export(aln.per.type, Rsamtools::BamFile(outfile))
146 |         if (exportFastqHeader)
147 |         {   outfile.readID <- paste0(outdir, eachtype, ".", geneSymbol,".txt")
148 |             write((as.data.frame(aln.per.type)$qname), file=outfile.readID)
149 |         }
150 |       }
151 |       if(rle_output)
152 |       {
153 |         cov_rle  <- c(cov_rle, GenomicRanges::coverage(aln.per.type)[chrom])
154 |         names(cov_rle)[length(cov_rle)] <- paste0(eachtype, ".", geneSymbol,".bam")
155 |       }
156 | 
157 |    } ## Loop over cell types
158 |   invisible(cov_rle)
159 | }
160 | 
161 | 
162 | 
163 | 
164 | #######################################################################
165 | #' merge_bam_coverage
166 | #' 
167 | #' merge_bam_coverage
168 | #'
169 | #' @param bamfiles : A list of BAM files that are to be merged
170 | #'
171 | #'
172 | merge_bam_coverage <- function(bamfiles)
173 | {
174 | 
175 |   warning("Function is not finished .. expect errors?")
176 |   for(i in bamfiles)
177 |   {
178 |     bf <-Rsamtools::BamFile(i)
179 | 
180 | #    open(bf)
181 | #    chunk0 <- GenomicAlignments::readGAlignments(bf)
182 | #    GenomeInfoDb::seqlevelsStyle(chunk0) <- "UCSC"
183 | #    close(bf)
184 | #    idx <- which(as.character(BiocGenerics::strand(chunk0)) == gene_strand)
185 | #    tmp <-GenomicRanges::coverage
186 | 
187 | #    gr <- GenomicRanges::GRanges(seqnames=chrom, ranges=IRanges::IRanges(start:end, width=1), strand=gene_strand)
188 | #    S4Vectors::mcols(gr) <- as.numeric(tmp[[chrom]])[start:end]
189 | 
190 |   }
191 |   return (bam_coverage)
192 | }
193 | 
194 | ##################################################################
195 | #' geneToGR converts a gene symbol to genomic ranges coordinate
196 | #' 
197 | #' geneToGR converts a gene symbol to genomic ranges coordinate
198 | #'
199 | #' @param geneSymbol : Gene symbol
200 | #' @param gtf_gr : Granges object of a gtf file
201 | #'
202 | #' @examples
203 | #'     library('Sierra')
204 | #'     extdata_path <- system.file("extdata",package = "Sierra")
205 | #'     gtf.file <- paste0(extdata_path,"/Vignette_cellranger_genes_subset.gtf")
206 | #'     gtf.gr <- rtracklayer::import(gtf.file)
207 | #'     
208 | #'     geneGR  <- geneToGR(geneSymbol= "Dnajc19",gtf_gr=gtf.gr)
209 | #' @export
210 | geneToGR <- function(geneSymbol, gtf_gr)
211 | {
212 |   if (! is.null(geneSymbol))
213 |   {
214 |     # Need check that gene_name field exists
215 |     idx <-which(gtf_gr$gene_name == geneSymbol)
216 |     if (length(idx) == 0)
217 |     { warning("Could not find gene name. Please check spelling (and case)")
218 |       return(NULL)
219 |     }
220 |     GenomeInfoDb::seqlevelsStyle(gtf_gr) <- "NCBI"
221 | 
222 |     # Work out the genomic range to extract from
223 |     start <- min(GenomicRanges::start(GenomicRanges::ranges(gtf_gr[idx])))
224 |     end <- max(GenomicRanges::end(GenomicRanges::ranges(gtf_gr[idx])))
225 |     chrom <- as.character(GenomicRanges::seqnames(gtf_gr[idx]))[1]  # should I check that all returned chromosomes are the same?
226 |     gene_strand <- as.character(GenomicRanges::strand(gtf_gr[idx]))[1]
227 |     gr <- GenomicRanges::GRanges(seqnames=chrom, ranges=IRanges::IRanges(start, width=end - start), strand=gene_strand)
228 |   }
229 |   return(gr)
230 | }
231 | 
232 | 
233 | ##############################################################3
234 | #
235 | #
236 | # Example of how to use function:
237 | #
238 | # library("data.table")
239 | # endothelial_cov <- fread(file="c:/BAM/Harvey/scpolyA/Porrello_Support_Files/Porrello_Endothelial.F-CycCl_vs_F-Act.wig.txt.gz", sep = "\t", header = TRUE)
240 | # EC_coverage.rle <- seqmonk_to_rle(endothelial_cov, col_idx = 13:28)
241 | #
242 | seqmonk_to_rle <- function(df, col_idx = 13:28)
243 | {
244 |   colnames(df)[2:5] <- c("chrom", "start","end", "strand")
245 |   sampleIDs <- colnames(df)[col_idx]
246 |   coverage.rle <- list()
247 | 
248 |   for(i in col_idx)
249 |   {
250 |     coverage.rle[[length(coverage.rle)+1]] <- rle(df[,..i] )
251 |     print(length(coverage.rle))
252 |   }
253 |   names(coverage.rle) <- sampleIDs
254 |   coverage.rle$gr <- GenomicRanges::makeGRangesFromDataFrame(df[,2:5])
255 | }
256 | 
257 | ##################################################################
258 | #
259 | #
260 | seqmonk_file_to_rle <- function(fn)
261 | {
262 |   coverage.rle <- list()
263 | 
264 |   # Quick scan of file to identify column names
265 |   temp <- data.table::fread(file=fn, sep = "\t",nrows = 2,header = TRUE)
266 |   all_col_names <- colnames(temp)
267 |   col_idx <- 13:length(all_col_names)
268 | 
269 |   # Read in genomic coordinates and generate a genomic range object
270 |   col_to_keep <- c("Chromosome","Start","End","Probe Strand")
271 |   df <- data.table::fread(file=fn, sep = "\t", header = TRUE, select = col_to_keep)
272 | #  browser()
273 | 
274 |   colnames(df) <- c("chrom", "start","end", "strand")
275 |   coverage.rle$gr <- GenomicRanges::makeGRangesFromDataFrame(df)
276 | 
277 | 
278 |   # Now extract all columns as rle objects
279 | 
280 |   for(i in all_col_names[col_idx])
281 |   {
282 |     print(paste("Starting :",i))
283 |     df <- data.table::fread(file=fn, sep = "\t", header = TRUE, select = i)
284 |     if (! is.null(df))
285 |     {  coverage.rle[[length(coverage.rle)+1]] <- rle(as.numeric(unlist(df)))
286 |       print(paste(i , ": complete"))
287 |     }
288 |     else
289 |     { print(paste("NO DATA for:",i))}
290 |   }
291 |   names(coverage.rle) <- c("gr",all_col_names[col_idx])
292 |   return(coverage.rle)
293 | }
294 | 
295 | ################################################################3
296 | #'
297 | #' load(file="c:/BAM/scRNA_polyA/FC.RData")
298 | #' gtf_file <- "u:/Reference/mm10/cellranger_genes.gtf.gz"
299 | #' gtf_gr <- rtracklayer::import(gtf_file)
300 | #'
301 | #' @param rle_input   rle input object
302 | #' @param gtf_gr   GTF file as genomics ranges pbject
303 | #' @param geneSymbol name of gene to interrogate
304 | #'
305 | rle_to_WIG <- function(rle_input, gtf_gr=gtf_gr, geneSymbol="Dnajc19")
306 | {
307 |   toExtract <-geneToGR(geneSymbol=geneSymbol, gtf_gr)
308 |   tmp <- GenomicRanges::findOverlaps(FC$gr,toExtract)
309 |   idx <- S4Vectors::queryHits(tmp)
310 |   min_idx <- min(idx)
311 |   max_idx <- max(idx)
312 |   # Next need to extract idx coordinates from rle_input
313 | 
314 | }
315 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Sierra
 3 | 
 4 | Sierra is an R package designed for detecting differential transcript usage from polyA-captured single cell (sc)RNA-seq data. Sierra identifies coordinates of read pileups (i.e. peaks) and performs UMI counting, followed by differential usage analysis between defined cell populations. Please read the vignette for a demonstration on how to use this software.
 5 | 
 6 | ## Installation
 7 | 
 8 | To install Sierra on your local machine, open an R session and use the following commands:
 9 | 
10 | ```
11 | install.packages("devtools")
12 | devtools::install_github("VCCRI/Sierra", build = TRUE, build_vignettes = TRUE, build_opts = c("--no-resave-data", "--no-manual"))
13 | ```
14 | The vignette contains a detailed walk-through of how to use Sierra. The vignette can be access from R using:
15 | 
16 | ```
17 | library(Sierra)
18 | browseVignettes("Sierra")
19 | ```
20 | 
21 | ## Alternative installation
22 | 
23 | If you have trouble building the vignette, an alternative is to view it on the [wiki](https://github.com/VCCRI/Sierra/wiki/Sierra-Vignette) and install the R package without it:
24 | 
25 | ```
26 | devtools::install_github("VCCRI/Sierra", build = TRUE)
27 | ```
28 | 
29 | ## Method overview
30 | 
31 | The manuscript describing Sierra, including validation and example applications, is published in [Genome Biology](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-020-02071-7).
32 | 
33 | Briefly, the Sierra pipeline requires as input a BAM file, such as produced by the 10x Genomics CellRanger software, the reference GTF file used for alignment and a BED file of junctions derived from the BAM file as produced by [RegTools](https://regtools.readthedocs.io/en/latest/).
34 | 
35 | 1. Splice-aware peak calling is used to identify read pileups corresponding to potential polyA sites in the dataset.
36 | 
37 | 2. UMI counting is performed against a set of peak coordinates, first unified in the case of multiple experiments. 
38 | 
39 | 3. Peak coordinates are annotated with various features, including the genomic features they fall on.
40 | 
41 | 4. Differential transcript usage (DTU) is evaluated between defined cell populations by applying the differential exon method DEXSeq to test for differences in the relative usage of peaks, with pseudo-bulk profiles of cells used to define replicates.
42 | 
43 | 5. DTU genes can be visualised using read coverage plots, or by plotting the relative expression of peaks. 
44 | 
45 | ## Citation
46 | 
47 | If you find Sierra useful in your research, please cite the following paper:
48 | 
49 | Patrick, R., Humphreys, D.T., Janbandhu, V., Oshlack A., Ho J.W.K., Harvey, R.P. and Lo K.K. Sierra: discovery of differential transcript usage from polyA-captured single-cell RNA-seq data. Genome Biol 21, 167 (2020). https://doi.org/10.1186/s13059-020-02071-7
50 | 
51 | ## Contact
52 | 
53 | Sierra is maintained by Ralph Patrick, David Humphreys and Kitty Lo. For questions or feedback you can contact them at:
54 | 
55 | * Ralph Patrick: ralph.patrick at imb dot uq dot edu dot au
56 | 
57 | * David Humphreys: d.humphreys at victorchang dot edu dot au
58 | 
59 | * Kitty Lo: kitty.lo at gmail dot com
60 | 
61 | ## Additional implementation
62 | 
63 | For a command line version of Sierra that can be used, for example, in a high performance computing environment, see [this implementation](https://github.com/GeertvanGeest/Sierra-commands) by Geert van Geest. 
64 | 
65 | 
66 | 
67 | 


--------------------------------------------------------------------------------
/inst/extdata/Cxcl12_example.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VCCRI/Sierra/52e68072aa2b974238c018cd0b2e71d881503b5b/inst/extdata/Cxcl12_example.RData


--------------------------------------------------------------------------------
/inst/extdata/Cycling_vs_resting_fibro_UTR_length_res.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VCCRI/Sierra/52e68072aa2b974238c018cd0b2e71d881503b5b/inst/extdata/Cycling_vs_resting_fibro_UTR_length_res.RData


--------------------------------------------------------------------------------
/inst/extdata/TIP_cell_info.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VCCRI/Sierra/52e68072aa2b974238c018cd0b2e71d881503b5b/inst/extdata/TIP_cell_info.RData


--------------------------------------------------------------------------------
/inst/extdata/TIP_merged_peaks.txt:
--------------------------------------------------------------------------------
  1 | Gene	Chr	Strand	Fit.start	Fit.end	polyA_ID	PeakClass	OriginalPeak	DataOrigin	exon.intron
  2 | Lamp2	X	-1	38419492	38419899	Lamp2:X:38419492-38419899:-1	Merged	Lamp2:X:38419492-38419888:-1	TIP-example-Sham	non-juncs
  3 | Lamp2	X	-1	38405044	38405482	Lamp2:X:38405044-38405482:-1	Merged	Lamp2:X:38405044-38405482:-1	TIP-example-Sham	non-juncs
  4 | Lamp2	X	-1	38455814	38456306	Lamp2:X:38455814-38456306:-1	Merged	Lamp2:X:38455824-38456298:-1	TIP-example-Sham	non-juncs
  5 | Sash1	10	-1	8722219	8722809	Sash1:10:8722219-8722809:-1	Merged	Sash1:10:8722219-8722809:-1	TIP-example-Sham	non-juncs
  6 | Sash1	10	-1	8786216	8886070	Sash1:10:8786216-8886070:-1	Merged	Sash1:10:8789351-8886070:-1	TIP-example-Sham	non-juncs
  7 | Sash1	10	-1	8813694	8814144	Sash1:10:8813694-8814144:-1	Merged	Sash1:10:8813700-8814138:-1	TIP-example-Sham	junctions
  8 | Sash1	10	-1	8884995	8885451	Sash1:10:8884995-8885451:-1	Merged	Sash1:10:8884995-8885445:-1	TIP-example-Sham	junctions
  9 | Ly6e	15	1	74958936	74959340	Ly6e:15:74958936-74959340:1	Merged	Ly6e:15:74958936-74959338:1	TIP-example-Sham	non-juncs
 10 | Ly6e	15	1	74956073	74956513	Ly6e:15:74956073-74956513:1	Merged	Ly6e:15:74956075-74956513:1	TIP-example-Sham	non-juncs
 11 | Pecam1	11	-1	106654217	106654578	Pecam1:11:106654217-106654578:-1	Merged	Pecam1:11:106654217-106654578:-1	TIP-example-Sham	non-juncs
 12 | Pecam1	11	-1	106661210	106661630	Pecam1:11:106661210-106661630:-1	Merged	Pecam1:11:106661219-106661609:-1	TIP-example-Sham	non-juncs
 13 | Pecam1	11	-1	106660246	106660702	Pecam1:11:106660246-106660702:-1	Merged	Pecam1:11:106660246-106660702:-1	TIP-example-Sham	non-juncs
 14 | Pnkd	1	1	74285960	74287456	Pnkd:1:74285960-74287456:1	Merged	Pnkd:1:74285960-74287456:1	TIP-example-Sham	non-juncs
 15 | Pnkd	1	1	74353281	74353694	Pnkd:1:74353281-74353694:1	Merged	Pnkd:1:74353289-74353691:1	TIP-example-Sham	non-juncs
 16 | Thy1	9	1	44048153	44048579	Thy1:9:44048153-44048579:1	Merged	Thy1:9:44048153-44048579:1	TIP-example-Sham	non-juncs
 17 | Dlc1	8	-1	36567751	36568043	Dlc1:8:36567751-36568043:-1	Merged	Dlc1:8:36567751-36568041:-1	TIP-example-Sham	non-juncs
 18 | Dlc1	8	-1	36568399	36568849	Dlc1:8:36568399-36568849:-1	Merged	Dlc1:8:36568399-36568849:-1	TIP-example-Sham	non-juncs
 19 | Dlc1	8	-1	36593214	36613672	Dlc1:8:36593214-36613672:-1	Merged	Dlc1:8:36593228-36613672:-1	TIP-example-Sham	non-juncs
 20 | Pdgfra	5	1	75197708	75198215	Pdgfra:5:75197708-75198215:1	Merged	Pdgfra:5:75197717-75198215:1	TIP-example-Sham	non-juncs
 21 | Lrrc58	16	1	37888444	37888858	Lrrc58:16:37888444-37888858:1	Merged	Lrrc58:16:37888449-37888858:1	TIP-example-Sham	non-juncs
 22 | Lrrc58	16	1	37883336	37883588	Lrrc58:16:37883336-37883588:1	Merged	Lrrc58:16:37883336-37883588:1	TIP-example-Sham	non-juncs
 23 | Lrrc58	16	1	37883896	37884199	Lrrc58:16:37883896-37884199:1	Merged	Lrrc58:16:37883896-37884190:1	TIP-example-Sham	non-juncs
 24 | Lrrc58	16	1	37869210	37869714	Lrrc58:16:37869210-37869714:1	Merged	Lrrc58:16:37869210-37869714:1	TIP-example-Sham	non-juncs
 25 | Mast4	13	-1	102905759	102906224	Mast4:13:102905759-102906224:-1	Merged	Mast4:13:102905768-102906224:-1	TIP-example-Sham	non-juncs
 26 | Mast4	13	-1	102732486	102732787	Mast4:13:102732486-102732787:-1	Merged	Mast4:13:102732486-102732787:-1	TIP-example-Sham	non-juncs
 27 | Mast4	13	-1	103018616	103019012	Mast4:13:103018616-103019012:-1	Merged	Mast4:13:103018622-103019000:-1	TIP-example-Sham	non-juncs
 28 | Mast4	13	-1	102852533	102852851	Mast4:13:102852533-102852851:-1	Merged	Mast4:13:102852542-102852842:-1	TIP-example-Sham	non-juncs
 29 | Mast4	13	-1	102964311	102964803	Mast4:13:102964311-102964803:-1	Merged	Mast4:13:102964311-102964803:-1	TIP-example-Sham	non-juncs
 30 | Mast4	13	-1	103260732	103261145	Mast4:13:103260732-103261145:-1	Merged	Mast4:13:103260743-103261145:-1	TIP-example-Sham	non-juncs
 31 | Mast4	13	-1	103132882	103133308	Mast4:13:103132882-103133308:-1	Merged	Mast4:13:103132882-103133290:-1	TIP-example-Sham	non-juncs
 32 | Mast4	13	-1	103104541	103104943	Mast4:13:103104541-103104943:-1	Merged	Mast4:13:103104545-103104935:-1	TIP-example-Sham	non-juncs
 33 | Mast4	13	-1	102981850	102982282	Mast4:13:102981850-102982282:-1	Merged	Mast4:13:102981850-102982282:-1	TIP-example-Sham	non-juncs
 34 | Mast4	13	-1	102977931	102978447	Mast4:13:102977931-102978447:-1	Merged	Mast4:13:102977931-102978447:-1	TIP-example-Sham	non-juncs
 35 | Mast4	13	-1	103023134	103023530	Mast4:13:103023134-103023530:-1	Merged	Mast4:13:103023134-103023530:-1	TIP-example-Sham	non-juncs
 36 | Mast4	13	-1	102940068	102940476	Mast4:13:102940068-102940476:-1	Merged	Mast4:13:102940068-102940476:-1	TIP-example-Sham	non-juncs
 37 | Mast4	13	-1	102896792	102897188	Mast4:13:102896792-102897188:-1	Merged	Mast4:13:102896798-102897170:-1	TIP-example-Sham	non-juncs
 38 | Mast4	13	-1	102976035	102976454	Mast4:13:102976035-102976454:-1	Merged	Mast4:13:102976035-102976437:-1	TIP-example-Sham	non-juncs
 39 | Mast4	13	-1	103292146	103292554	Mast4:13:103292146-103292554:-1	Merged	Mast4:13:103292146-103292554:-1	TIP-example-Sham	non-juncs
 40 | Mast4	13	-1	103002494	103002902	Mast4:13:103002494-103002902:-1	Merged	Mast4:13:103002501-103002897:-1	TIP-example-Sham	non-juncs
 41 | Mast4	13	-1	103094175	103094613	Mast4:13:103094175-103094613:-1	Merged	Mast4:13:103094197-103094587:-1	TIP-example-Sham	non-juncs
 42 | Mast4	13	-1	103024151	103024607	Mast4:13:103024151-103024607:-1	Merged	Mast4:13:103024151-103024607:-1	TIP-example-Sham	non-juncs
 43 | Mast4	13	-1	102875537	102875961	Mast4:13:102875537-102875961:-1	Merged	Mast4:13:102875537-102875945:-1	TIP-example-Sham	non-juncs
 44 | Arhgap10	8	-1	77250366	77250689	Arhgap10:8:77250366-77250689:-1	Merged	Arhgap10:8:77250366-77250689:-1	TIP-example-Sham	non-juncs
 45 | Arhgap10	8	-1	77432739	77517834	Arhgap10:8:77432739-77517834:-1	Merged	Arhgap10:8:77432739-77517834:-1	TIP-example-Sham	non-juncs
 46 | Cd47	16	1	49911286	49911685	Cd47:16:49911286-49911685:1	Merged	Cd47:16:49911286-49911682:1	TIP-example-Sham	non-juncs
 47 | Cd47	16	1	49896389	49911089	Cd47:16:49896389-49911089:1	Merged	Cd47:16:49896389-49911089:1	TIP-example-Sham	non-juncs
 48 | Cd47	16	1	49914619	49915010	Cd47:16:49914619-49915010:1	Merged	Cd47:16:49914627-49915010:1	TIP-example-Sham	non-juncs
 49 | Cd47	16	1	49911894	49912242	Cd47:16:49911894-49912242:1	Merged	Cd47:16:49911894-49912242:1	TIP-example-Sham	non-juncs
 50 | Rgs3	4	1	62702619	62703022	Rgs3:4:62702619-62703022:1	Merged	Rgs3:4:62702620-62703022:1	TIP-example-Sham	non-juncs
 51 | Rgs3	4	1	62625745	62626153	Rgs3:4:62625745-62626153:1	Merged	Rgs3:4:62625745-62626153:1	TIP-example-Sham	non-juncs
 52 | Cxcl12	6	1	117168553	117176146	Cxcl12:6:117168553-117176146:1	Merged	Cxcl12:6:117168553-117176146:1	TIP-example-Sham	non-juncs
 53 | Cxcl12	6	1	117180974	117181367	Cxcl12:6:117180974-117181367:1	Merged	Cxcl12:6:117180974-117181367:1	TIP-example-Sham	non-juncs
 54 | Cxcl12	6	1	117174603	117175050	Cxcl12:6:117174603-117175050:1	Merged	Cxcl12:6:117174606-117175050:1	TIP-example-Sham	junctions
 55 | Kdr	5	-1	75932827	75933118	Kdr:5:75932827-75933118:-1	Merged	Kdr:5:75932827-75933114:-1	TIP-example-Sham	non-juncs
 56 | Kdr	5	-1	75933283	75933685	Kdr:5:75933283-75933685:-1	Merged	Kdr:5:75933283-75933685:-1	TIP-example-Sham	non-juncs
 57 | Lamp2	X	-1	38429865	38430285	Lamp2:X:38429865-38430285:-1	Unique_TIP-example-Sham	Lamp2:X:38429865-38430285:-1	TIP-example-Sham	non-juncs
 58 | Lamp2	X	-1	38421590	38424492	Lamp2:X:38421590-38424492:-1	Unique_TIP-example-Sham	Lamp2:X:38421590-38424492:-1	TIP-example-Sham	non-juncs
 59 | Lamp2	X	-1	38421120	38421540	Lamp2:X:38421120-38421540:-1	Unique_TIP-example-Sham	Lamp2:X:38421120-38421540:-1	TIP-example-Sham	non-juncs
 60 | Sash1	10	-1	8749069	8749465	Sash1:10:8749069-8749465:-1	Unique_TIP-example-Sham	Sash1:10:8749069-8749465:-1	TIP-example-Sham	non-juncs
 61 | Sash1	10	-1	8756580	8757084	Sash1:10:8756580-8757084:-1	Unique_TIP-example-Sham	Sash1:10:8756580-8757084:-1	TIP-example-Sham	non-juncs
 62 | Sash1	10	-1	8762000	8762384	Sash1:10:8762000-8762384:-1	Unique_TIP-example-Sham	Sash1:10:8762000-8762384:-1	TIP-example-Sham	non-juncs
 63 | Sash1	10	-1	8752303	8752735	Sash1:10:8752303-8752735:-1	Unique_TIP-example-Sham	Sash1:10:8752303-8752735:-1	TIP-example-Sham	non-juncs
 64 | Sash1	10	-1	8726857	8727229	Sash1:10:8726857-8727229:-1	Unique_TIP-example-Sham	Sash1:10:8726857-8727229:-1	TIP-example-Sham	non-juncs
 65 | Sash1	10	-1	8766973	8767381	Sash1:10:8766973-8767381:-1	Unique_TIP-example-Sham	Sash1:10:8766973-8767381:-1	TIP-example-Sham	non-juncs
 66 | Sash1	10	-1	8841933	8842323	Sash1:10:8841933-8842323:-1	Unique_TIP-example-Sham	Sash1:10:8841933-8842323:-1	TIP-example-Sham	junctions
 67 | Pecam1	11	-1	106667192	106667612	Pecam1:11:106667192-106667612:-1	Unique_TIP-example-Sham	Pecam1:11:106667192-106667612:-1	TIP-example-Sham	non-juncs
 68 | Pecam1	11	-1	106712387	106712819	Pecam1:11:106712387-106712819:-1	Unique_TIP-example-Sham	Pecam1:11:106712387-106712819:-1	TIP-example-Sham	non-juncs
 69 | Pecam1	11	-1	106662336	106662726	Pecam1:11:106662336-106662726:-1	Unique_TIP-example-Sham	Pecam1:11:106662336-106662726:-1	TIP-example-Sham	non-juncs
 70 | Pecam1	11	-1	106696701	106697193	Pecam1:11:106696701-106697193:-1	Unique_TIP-example-Sham	Pecam1:11:106696701-106697193:-1	TIP-example-Sham	non-juncs
 71 | Pecam1	11	-1	106699602	106700004	Pecam1:11:106699602-106700004:-1	Unique_TIP-example-Sham	Pecam1:11:106699602-106700004:-1	TIP-example-Sham	non-juncs
 72 | Pecam1	11	-1	106680262	106681282	Pecam1:11:106680262-106681282:-1	Unique_TIP-example-Sham	Pecam1:11:106680262-106681282:-1	TIP-example-Sham	non-juncs
 73 | Pecam1	11	-1	106662956	106663364	Pecam1:11:106662956-106663364:-1	Unique_TIP-example-Sham	Pecam1:11:106662956-106663364:-1	TIP-example-Sham	non-juncs
 74 | Pecam1	11	-1	106714376	106714820	Pecam1:11:106714376-106714820:-1	Unique_TIP-example-Sham	Pecam1:11:106714376-106714820:-1	TIP-example-Sham	non-juncs
 75 | Pecam1	11	-1	106686817	106687195	Pecam1:11:106686817-106687195:-1	Unique_TIP-example-Sham	Pecam1:11:106686817-106687195:-1	TIP-example-Sham	non-juncs
 76 | Pnkd	1	1	74287464	74287842	Pnkd:1:74287464-74287842:1	Unique_TIP-example-Sham	Pnkd:1:74287464-74287842:1	TIP-example-Sham	non-juncs
 77 | Thy1	9	1	44046560	44047022	Thy1:9:44046560-44047022:1	Unique_TIP-example-Sham	Thy1:9:44046560-44047022:1	TIP-example-Sham	non-juncs
 78 | Dlc1	8	-1	36745799	36746189	Dlc1:8:36745799-36746189:-1	Unique_TIP-example-Sham	Dlc1:8:36745799-36746189:-1	TIP-example-Sham	non-juncs
 79 | Dlc1	8	-1	36728396	36728828	Dlc1:8:36728396-36728828:-1	Unique_TIP-example-Sham	Dlc1:8:36728396-36728828:-1	TIP-example-Sham	non-juncs
 80 | Dlc1	8	-1	36667954	36668386	Dlc1:8:36667954-36668386:-1	Unique_TIP-example-Sham	Dlc1:8:36667954-36668386:-1	TIP-example-Sham	non-juncs
 81 | Dlc1	8	-1	36902368	36903118	Dlc1:8:36902368-36903118:-1	Unique_TIP-example-Sham	Dlc1:8:36902368-36903118:-1	TIP-example-Sham	non-juncs
 82 | Dlc1	8	-1	36701605	36702073	Dlc1:8:36701605-36702073:-1	Unique_TIP-example-Sham	Dlc1:8:36701605-36702073:-1	TIP-example-Sham	non-juncs
 83 | Dlc1	8	-1	36849117	36849513	Dlc1:8:36849117-36849513:-1	Unique_TIP-example-Sham	Dlc1:8:36849117-36849513:-1	TIP-example-Sham	non-juncs
 84 | Dlc1	8	-1	36580940	36581372	Dlc1:8:36580940-36581372:-1	Unique_TIP-example-Sham	Dlc1:8:36580940-36581372:-1	TIP-example-Sham	non-juncs
 85 | Dlc1	8	-1	36687377	36687815	Dlc1:8:36687377-36687815:-1	Unique_TIP-example-Sham	Dlc1:8:36687377-36687815:-1	TIP-example-Sham	non-juncs
 86 | Dlc1	8	-1	36758787	36759201	Dlc1:8:36758787-36759201:-1	Unique_TIP-example-Sham	Dlc1:8:36758787-36759201:-1	TIP-example-Sham	non-juncs
 87 | Dlc1	8	-1	36690939	36691419	Dlc1:8:36690939-36691419:-1	Unique_TIP-example-Sham	Dlc1:8:36690939-36691419:-1	TIP-example-Sham	non-juncs
 88 | Dlc1	8	-1	36730752	36731082	Dlc1:8:36730752-36731082:-1	Unique_TIP-example-Sham	Dlc1:8:36730752-36731082:-1	TIP-example-Sham	non-juncs
 89 | Dlc1	8	-1	36931702	36932116	Dlc1:8:36931702-36932116:-1	Unique_TIP-example-Sham	Dlc1:8:36931702-36932116:-1	TIP-example-Sham	non-juncs
 90 | Dlc1	8	-1	36644989	36645397	Dlc1:8:36644989-36645397:-1	Unique_TIP-example-Sham	Dlc1:8:36644989-36645397:-1	TIP-example-Sham	non-juncs
 91 | Dlc1	8	-1	36720058	36720454	Dlc1:8:36720058-36720454:-1	Unique_TIP-example-Sham	Dlc1:8:36720058-36720454:-1	TIP-example-Sham	non-juncs
 92 | Dlc1	8	-1	36738886	36739318	Dlc1:8:36738886-36739318:-1	Unique_TIP-example-Sham	Dlc1:8:36738886-36739318:-1	TIP-example-Sham	non-juncs
 93 | Dlc1	8	-1	36902190	36902496	Dlc1:8:36902190-36902496:-1	Unique_TIP-example-Sham	Dlc1:8:36902190-36902496:-1	TIP-example-Sham	non-juncs
 94 | Dlc1	8	-1	36731114	36731564	Dlc1:8:36731114-36731564:-1	Unique_TIP-example-Sham	Dlc1:8:36731114-36731564:-1	TIP-example-Sham	non-juncs
 95 | Dlc1	8	-1	36573599	36574001	Dlc1:8:36573599-36574001:-1	Unique_TIP-example-Sham	Dlc1:8:36573599-36574001:-1	TIP-example-Sham	non-juncs
 96 | Dlc1	8	-1	36649404	36649788	Dlc1:8:36649404-36649788:-1	Unique_TIP-example-Sham	Dlc1:8:36649404-36649788:-1	TIP-example-Sham	non-juncs
 97 | Dlc1	8	-1	36644533	36644995	Dlc1:8:36644533-36644995:-1	Unique_TIP-example-Sham	Dlc1:8:36644533-36644995:-1	TIP-example-Sham	non-juncs
 98 | Dlc1	8	-1	36573063	36573489	Dlc1:8:36573063-36573489:-1	Unique_TIP-example-Sham	Dlc1:8:36573063-36573489:-1	TIP-example-Sham	non-juncs
 99 | Dlc1	8	-1	36633840	36634242	Dlc1:8:36633840-36634242:-1	Unique_TIP-example-Sham	Dlc1:8:36633840-36634242:-1	TIP-example-Sham	non-juncs
100 | Dlc1	8	-1	36722363	36722789	Dlc1:8:36722363-36722789:-1	Unique_TIP-example-Sham	Dlc1:8:36722363-36722789:-1	TIP-example-Sham	non-juncs
101 | Dlc1	8	-1	36680292	36680640	Dlc1:8:36680292-36680640:-1	Unique_TIP-example-Sham	Dlc1:8:36680292-36680640:-1	TIP-example-Sham	non-juncs
102 | Dlc1	8	-1	36755685	36756099	Dlc1:8:36755685-36756099:-1	Unique_TIP-example-Sham	Dlc1:8:36755685-36756099:-1	TIP-example-Sham	non-juncs
103 | Dlc1	8	-1	36713205	36713613	Dlc1:8:36713205-36713613:-1	Unique_TIP-example-Sham	Dlc1:8:36713205-36713613:-1	TIP-example-Sham	non-juncs
104 | Dlc1	8	-1	36631141	36631399	Dlc1:8:36631141-36631399:-1	Unique_TIP-example-Sham	Dlc1:8:36631141-36631399:-1	TIP-example-Sham	non-juncs
105 | Dlc1	8	-1	36901655	36902039	Dlc1:8:36901655-36902039:-1	Unique_TIP-example-Sham	Dlc1:8:36901655-36902039:-1	TIP-example-Sham	non-juncs
106 | Dlc1	8	-1	36936594	36937194	Dlc1:8:36936594-36937194:-1	Unique_TIP-example-Sham	Dlc1:8:36936594-36937194:-1	TIP-example-Sham	non-juncs
107 | Dlc1	8	-1	36917518	36917860	Dlc1:8:36917518-36917860:-1	Unique_TIP-example-Sham	Dlc1:8:36917518-36917860:-1	TIP-example-Sham	non-juncs
108 | Dlc1	8	-1	36749592	36750030	Dlc1:8:36749592-36750030:-1	Unique_TIP-example-Sham	Dlc1:8:36749592-36750030:-1	TIP-example-Sham	non-juncs
109 | Dlc1	8	-1	36762912	36763578	Dlc1:8:36762912-36763578:-1	Unique_TIP-example-Sham	Dlc1:8:36762912-36763578:-1	TIP-example-Sham	non-juncs
110 | Dlc1	8	-1	36741209	36741617	Dlc1:8:36741209-36741617:-1	Unique_TIP-example-Sham	Dlc1:8:36741209-36741617:-1	TIP-example-Sham	non-juncs
111 | Dlc1	8	-1	36635485	36635917	Dlc1:8:36635485-36635917:-1	Unique_TIP-example-Sham	Dlc1:8:36635485-36635917:-1	TIP-example-Sham	non-juncs
112 | Dlc1	8	-1	36792386	36792878	Dlc1:8:36792386-36792878:-1	Unique_TIP-example-Sham	Dlc1:8:36792386-36792878:-1	TIP-example-Sham	non-juncs
113 | Dlc1	8	-1	36879016	36879478	Dlc1:8:36879016-36879478:-1	Unique_TIP-example-Sham	Dlc1:8:36879016-36879478:-1	TIP-example-Sham	non-juncs
114 | Dlc1	8	-1	36662310	36662718	Dlc1:8:36662310-36662718:-1	Unique_TIP-example-Sham	Dlc1:8:36662310-36662718:-1	TIP-example-Sham	non-juncs
115 | Dlc1	8	-1	36774968	36775334	Dlc1:8:36774968-36775334:-1	Unique_TIP-example-Sham	Dlc1:8:36774968-36775334:-1	TIP-example-Sham	non-juncs
116 | Dlc1	8	-1	36811236	36811806	Dlc1:8:36811236-36811806:-1	Unique_TIP-example-Sham	Dlc1:8:36811236-36811806:-1	TIP-example-Sham	non-juncs
117 | Dlc1	8	-1	36928037	36928511	Dlc1:8:36928037-36928511:-1	Unique_TIP-example-Sham	Dlc1:8:36928037-36928511:-1	TIP-example-Sham	non-juncs
118 | Dlc1	8	-1	36594963	36595491	Dlc1:8:36594963-36595491:-1	Unique_TIP-example-Sham	Dlc1:8:36594963-36595491:-1	TIP-example-Sham	junctions
119 | Dlc1	8	-1	36610695	36611145	Dlc1:8:36610695-36611145:-1	Unique_TIP-example-Sham	Dlc1:8:36610695-36611145:-1	TIP-example-Sham	junctions
120 | Pdgfra	5	1	75158897	75159293	Pdgfra:5:75158897-75159293:1	Unique_TIP-example-Sham	Pdgfra:5:75158897-75159293:1	TIP-example-Sham	non-juncs
121 | Pdgfra	5	1	75195294	75195966	Pdgfra:5:75195294-75195966:1	Unique_TIP-example-Sham	Pdgfra:5:75195294-75195966:1	TIP-example-Sham	non-juncs
122 | Pdgfra	5	1	75183070	75183532	Pdgfra:5:75183070-75183532:1	Unique_TIP-example-Sham	Pdgfra:5:75183070-75183532:1	TIP-example-Sham	non-juncs
123 | Pdgfra	5	1	75167082	75167490	Pdgfra:5:75167082-75167490:1	Unique_TIP-example-Sham	Pdgfra:5:75167082-75167490:1	TIP-example-Sham	non-juncs
124 | Pdgfra	5	1	75176944	75177394	Pdgfra:5:75176944-75177394:1	Unique_TIP-example-Sham	Pdgfra:5:75176944-75177394:1	TIP-example-Sham	non-juncs
125 | Lrrc58	16	1	37871486	37871846	Lrrc58:16:37871486-37871846:1	Unique_TIP-example-Sham	Lrrc58:16:37871486-37871846:1	TIP-example-Sham	non-juncs
126 | Mast4	13	-1	103300497	103300887	Mast4:13:103300497-103300887:-1	Unique_TIP-example-Sham	Mast4:13:103300497-103300887:-1	TIP-example-Sham	non-juncs
127 | Mast4	13	-1	103232618	103233026	Mast4:13:103232618-103233026:-1	Unique_TIP-example-Sham	Mast4:13:103232618-103233026:-1	TIP-example-Sham	non-juncs
128 | Mast4	13	-1	103263865	103264273	Mast4:13:103263865-103264273:-1	Unique_TIP-example-Sham	Mast4:13:103263865-103264273:-1	TIP-example-Sham	non-juncs
129 | Mast4	13	-1	103224743	103224965	Mast4:13:103224743-103224965:-1	Unique_TIP-example-Sham	Mast4:13:103224743-103224965:-1	TIP-example-Sham	non-juncs
130 | Mast4	13	-1	102895816	102896218	Mast4:13:102895816-102896218:-1	Unique_TIP-example-Sham	Mast4:13:102895816-102896218:-1	TIP-example-Sham	non-juncs
131 | Mast4	13	-1	103128647	103129073	Mast4:13:103128647-103129073:-1	Unique_TIP-example-Sham	Mast4:13:103128647-103129073:-1	TIP-example-Sham	non-juncs
132 | Mast4	13	-1	102942471	102942777	Mast4:13:102942471-102942777:-1	Unique_TIP-example-Sham	Mast4:13:102942471-102942777:-1	TIP-example-Sham	non-juncs
133 | Mast4	13	-1	103313671	103314079	Mast4:13:103313671-103314079:-1	Unique_TIP-example-Sham	Mast4:13:103313671-103314079:-1	TIP-example-Sham	non-juncs
134 | Mast4	13	-1	102958417	102958753	Mast4:13:102958417-102958753:-1	Unique_TIP-example-Sham	Mast4:13:102958417-102958753:-1	TIP-example-Sham	non-juncs
135 | Mast4	13	-1	102944469	102944847	Mast4:13:102944469-102944847:-1	Unique_TIP-example-Sham	Mast4:13:102944469-102944847:-1	TIP-example-Sham	non-juncs
136 | Mast4	13	-1	103063688	103063970	Mast4:13:103063688-103063970:-1	Unique_TIP-example-Sham	Mast4:13:103063688-103063970:-1	TIP-example-Sham	non-juncs
137 | Mast4	13	-1	102967491	102967881	Mast4:13:102967491-102967881:-1	Unique_TIP-example-Sham	Mast4:13:102967491-102967881:-1	TIP-example-Sham	non-juncs
138 | Mast4	13	-1	102924501	102924831	Mast4:13:102924501-102924831:-1	Unique_TIP-example-Sham	Mast4:13:102924501-102924831:-1	TIP-example-Sham	non-juncs
139 | Mast4	13	-1	102907936	102908332	Mast4:13:102907936-102908332:-1	Unique_TIP-example-Sham	Mast4:13:102907936-102908332:-1	TIP-example-Sham	non-juncs
140 | Mast4	13	-1	103060752	103061112	Mast4:13:103060752-103061112:-1	Unique_TIP-example-Sham	Mast4:13:103060752-103061112:-1	TIP-example-Sham	non-juncs
141 | Mast4	13	-1	102808120	102808546	Mast4:13:102808120-102808546:-1	Unique_TIP-example-Sham	Mast4:13:102808120-102808546:-1	TIP-example-Sham	non-juncs
142 | Mast4	13	-1	102963988	102964306	Mast4:13:102963988-102964306:-1	Unique_TIP-example-Sham	Mast4:13:102963988-102964306:-1	TIP-example-Sham	non-juncs
143 | Mast4	13	-1	102733339	102733783	Mast4:13:102733339-102733783:-1	Unique_TIP-example-Sham	Mast4:13:102733339-102733783:-1	TIP-example-Sham	non-juncs
144 | Mast4	13	-1	103262715	103263093	Mast4:13:103262715-103263093:-1	Unique_TIP-example-Sham	Mast4:13:103262715-103263093:-1	TIP-example-Sham	non-juncs
145 | Mast4	13	-1	102953709	102954411	Mast4:13:102953709-102954411:-1	Unique_TIP-example-Sham	Mast4:13:102953709-102954411:-1	TIP-example-Sham	non-juncs
146 | Mast4	13	-1	102973666	102974080	Mast4:13:102973666-102974080:-1	Unique_TIP-example-Sham	Mast4:13:102973666-102974080:-1	TIP-example-Sham	non-juncs
147 | Mast4	13	-1	102823880	102824258	Mast4:13:102823880-102824258:-1	Unique_TIP-example-Sham	Mast4:13:102823880-102824258:-1	TIP-example-Sham	non-juncs
148 | Mast4	13	-1	103100922	103101426	Mast4:13:103100922-103101426:-1	Unique_TIP-example-Sham	Mast4:13:103100922-103101426:-1	TIP-example-Sham	non-juncs
149 | Mast4	13	-1	102968584	102968962	Mast4:13:102968584-102968962:-1	Unique_TIP-example-Sham	Mast4:13:102968584-102968962:-1	TIP-example-Sham	non-juncs
150 | Mast4	13	-1	102997862	102998210	Mast4:13:102997862-102998210:-1	Unique_TIP-example-Sham	Mast4:13:102997862-102998210:-1	TIP-example-Sham	non-juncs
151 | Mast4	13	-1	102866493	102866889	Mast4:13:102866493-102866889:-1	Unique_TIP-example-Sham	Mast4:13:102866493-102866889:-1	TIP-example-Sham	non-juncs
152 | Mast4	13	-1	103279029	103279437	Mast4:13:103279029-103279437:-1	Unique_TIP-example-Sham	Mast4:13:103279029-103279437:-1	TIP-example-Sham	non-juncs
153 | Mast4	13	-1	102853290	102853710	Mast4:13:102853290-102853710:-1	Unique_TIP-example-Sham	Mast4:13:102853290-102853710:-1	TIP-example-Sham	non-juncs
154 | Mast4	13	-1	102973169	102973541	Mast4:13:102973169-102973541:-1	Unique_TIP-example-Sham	Mast4:13:102973169-102973541:-1	TIP-example-Sham	non-juncs
155 | Mast4	13	-1	103032751	103033285	Mast4:13:103032751-103033285:-1	Unique_TIP-example-Sham	Mast4:13:103032751-103033285:-1	TIP-example-Sham	non-juncs
156 | Mast4	13	-1	102986253	102986817	Mast4:13:102986253-102986817:-1	Unique_TIP-example-Sham	Mast4:13:102986253-102986817:-1	TIP-example-Sham	non-juncs
157 | Mast4	13	-1	103138989	103139379	Mast4:13:103138989-103139379:-1	Unique_TIP-example-Sham	Mast4:13:103138989-103139379:-1	TIP-example-Sham	non-juncs
158 | Mast4	13	-1	102839011	102839449	Mast4:13:102839011-102839449:-1	Unique_TIP-example-Sham	Mast4:13:102839011-102839449:-1	TIP-example-Sham	non-juncs
159 | Mast4	13	-1	102769485	102769959	Mast4:13:102769485-102769959:-1	Unique_TIP-example-Sham	Mast4:13:102769485-102769959:-1	TIP-example-Sham	non-juncs
160 | Mast4	13	-1	103233060	103233456	Mast4:13:103233060-103233456:-1	Unique_TIP-example-Sham	Mast4:13:103233060-103233456:-1	TIP-example-Sham	non-juncs
161 | Mast4	13	-1	102977039	102977471	Mast4:13:102977039-102977471:-1	Unique_TIP-example-Sham	Mast4:13:102977039-102977471:-1	TIP-example-Sham	non-juncs
162 | Mast4	13	-1	102964777	102965191	Mast4:13:102964777-102965191:-1	Unique_TIP-example-Sham	Mast4:13:102964777-102965191:-1	TIP-example-Sham	non-juncs
163 | Mast4	13	-1	102991269	102991671	Mast4:13:102991269-102991671:-1	Unique_TIP-example-Sham	Mast4:13:102991269-102991671:-1	TIP-example-Sham	non-juncs
164 | Mast4	13	-1	102920906	102921260	Mast4:13:102920906-102921260:-1	Unique_TIP-example-Sham	Mast4:13:102920906-102921260:-1	TIP-example-Sham	non-juncs
165 | Mast4	13	-1	103280714	103281152	Mast4:13:103280714-103281152:-1	Unique_TIP-example-Sham	Mast4:13:103280714-103281152:-1	TIP-example-Sham	non-juncs
166 | Mast4	13	-1	102900167	102900581	Mast4:13:102900167-102900581:-1	Unique_TIP-example-Sham	Mast4:13:102900167-102900581:-1	TIP-example-Sham	non-juncs
167 | Mast4	13	-1	103139933	103171547	Mast4:13:103139933-103171547:-1	Unique_TIP-example-Sham	Mast4:13:103139933-103171547:-1	TIP-example-Sham	non-juncs
168 | Mast4	13	-1	103011222	103011672	Mast4:13:103011222-103011672:-1	Unique_TIP-example-Sham	Mast4:13:103011222-103011672:-1	TIP-example-Sham	non-juncs
169 | Mast4	13	-1	102762621	102763059	Mast4:13:102762621-102763059:-1	Unique_TIP-example-Sham	Mast4:13:102762621-102763059:-1	TIP-example-Sham	non-juncs
170 | Mast4	13	-1	102966086	102966500	Mast4:13:102966086-102966500:-1	Unique_TIP-example-Sham	Mast4:13:102966086-102966500:-1	TIP-example-Sham	non-juncs
171 | Mast4	13	-1	103267028	103267388	Mast4:13:103267028-103267388:-1	Unique_TIP-example-Sham	Mast4:13:103267028-103267388:-1	TIP-example-Sham	non-juncs
172 | Mast4	13	-1	102932837	102933203	Mast4:13:102932837-102933203:-1	Unique_TIP-example-Sham	Mast4:13:102932837-102933203:-1	TIP-example-Sham	non-juncs
173 | Mast4	13	-1	103009957	103010653	Mast4:13:103009957-103010653:-1	Unique_TIP-example-Sham	Mast4:13:103009957-103010653:-1	TIP-example-Sham	non-juncs
174 | Mast4	13	-1	102953540	102953828	Mast4:13:102953540-102953828:-1	Unique_TIP-example-Sham	Mast4:13:102953540-102953828:-1	TIP-example-Sham	non-juncs
175 | Mast4	13	-1	102849452	102849866	Mast4:13:102849452-102849866:-1	Unique_TIP-example-Sham	Mast4:13:102849452-102849866:-1	TIP-example-Sham	non-juncs
176 | Mast4	13	-1	102766810	102767194	Mast4:13:102766810-102767194:-1	Unique_TIP-example-Sham	Mast4:13:102766810-102767194:-1	TIP-example-Sham	non-juncs
177 | Mast4	13	-1	103115210	103115600	Mast4:13:103115210-103115600:-1	Unique_TIP-example-Sham	Mast4:13:103115210-103115600:-1	TIP-example-Sham	non-juncs
178 | Mast4	13	-1	103086588	103086984	Mast4:13:103086588-103086984:-1	Unique_TIP-example-Sham	Mast4:13:103086588-103086984:-1	TIP-example-Sham	non-juncs
179 | Mast4	13	-1	103299080	103299620	Mast4:13:103299080-103299620:-1	Unique_TIP-example-Sham	Mast4:13:103299080-103299620:-1	TIP-example-Sham	non-juncs
180 | Mast4	13	-1	103320988	103321702	Mast4:13:103320988-103321702:-1	Unique_TIP-example-Sham	Mast4:13:103320988-103321702:-1	TIP-example-Sham	non-juncs
181 | Mast4	13	-1	102982231	102982627	Mast4:13:102982231-102982627:-1	Unique_TIP-example-Sham	Mast4:13:102982231-102982627:-1	TIP-example-Sham	non-juncs
182 | Mast4	13	-1	103013292	103013658	Mast4:13:103013292-103013658:-1	Unique_TIP-example-Sham	Mast4:13:103013292-103013658:-1	TIP-example-Sham	non-juncs
183 | Mast4	13	-1	102867976	102868480	Mast4:13:102867976-102868480:-1	Unique_TIP-example-Sham	Mast4:13:102867976-102868480:-1	TIP-example-Sham	non-juncs
184 | Mast4	13	-1	102734876	102735260	Mast4:13:102734876-102735260:-1	Unique_TIP-example-Sham	Mast4:13:102734876-102735260:-1	TIP-example-Sham	non-juncs
185 | Mast4	13	-1	103059597	103059981	Mast4:13:103059597-103059981:-1	Unique_TIP-example-Sham	Mast4:13:103059597-103059981:-1	TIP-example-Sham	non-juncs
186 | Mast4	13	-1	102898435	102898825	Mast4:13:102898435-102898825:-1	Unique_TIP-example-Sham	Mast4:13:102898435-102898825:-1	TIP-example-Sham	non-juncs
187 | Mast4	13	-1	102984821	102985217	Mast4:13:102984821-102985217:-1	Unique_TIP-example-Sham	Mast4:13:102984821-102985217:-1	TIP-example-Sham	non-juncs
188 | Mast4	13	-1	103237633	103238263	Mast4:13:103237633-103238263:-1	Unique_TIP-example-Sham	Mast4:13:103237633-103238263:-1	TIP-example-Sham	non-juncs
189 | Mast4	13	-1	102801742	102802180	Mast4:13:102801742-102802180:-1	Unique_TIP-example-Sham	Mast4:13:102801742-102802180:-1	TIP-example-Sham	non-juncs
190 | Mast4	13	-1	102761247	102761625	Mast4:13:102761247-102761625:-1	Unique_TIP-example-Sham	Mast4:13:102761247-102761625:-1	TIP-example-Sham	non-juncs
191 | Mast4	13	-1	103159786	103160212	Mast4:13:103159786-103160212:-1	Unique_TIP-example-Sham	Mast4:13:103159786-103160212:-1	TIP-example-Sham	junctions
192 | Arhgap10	8	-1	77425933	77426353	Arhgap10:8:77425933-77426353:-1	Unique_TIP-example-Sham	Arhgap10:8:77425933-77426353:-1	TIP-example-Sham	non-juncs
193 | Arhgap10	8	-1	77368847	77369237	Arhgap10:8:77368847-77369237:-1	Unique_TIP-example-Sham	Arhgap10:8:77368847-77369237:-1	TIP-example-Sham	non-juncs
194 | Arhgap10	8	-1	77274287	77274713	Arhgap10:8:77274287-77274713:-1	Unique_TIP-example-Sham	Arhgap10:8:77274287-77274713:-1	TIP-example-Sham	non-juncs
195 | Arhgap10	8	-1	77365490	77365880	Arhgap10:8:77365490-77365880:-1	Unique_TIP-example-Sham	Arhgap10:8:77365490-77365880:-1	TIP-example-Sham	non-juncs
196 | Arhgap10	8	-1	77387191	77387581	Arhgap10:8:77387191-77387581:-1	Unique_TIP-example-Sham	Arhgap10:8:77387191-77387581:-1	TIP-example-Sham	non-juncs
197 | Arhgap10	8	-1	77366975	77367335	Arhgap10:8:77366975-77367335:-1	Unique_TIP-example-Sham	Arhgap10:8:77366975-77367335:-1	TIP-example-Sham	non-juncs
198 | Arhgap10	8	-1	77389286	77389532	Arhgap10:8:77389286-77389532:-1	Unique_TIP-example-Sham	Arhgap10:8:77389286-77389532:-1	TIP-example-Sham	non-juncs
199 | Arhgap10	8	-1	77514504	77514894	Arhgap10:8:77514504-77514894:-1	Unique_TIP-example-Sham	Arhgap10:8:77514504-77514894:-1	TIP-example-Sham	junctions
200 | Rgs3	4	1	62673911	62674175	Rgs3:4:62673911-62674175:1	Unique_TIP-example-Sham	Rgs3:4:62673911-62674175:1	TIP-example-Sham	non-juncs
201 | Rgs3	4	1	62628587	62629007	Rgs3:4:62628587-62629007:1	Unique_TIP-example-Sham	Rgs3:4:62628587-62629007:1	TIP-example-Sham	non-juncs
202 | Kdr	5	-1	75962632	75964713	Kdr:5:75962632-75964713:-1	Unique_TIP-example-Sham	Kdr:5:75962632-75964713:-1	TIP-example-Sham	non-juncs
203 | Pdgfra	5	1	75152292	75152574	Pdgfra:5:75152292-75152574:1	Unique_TIP-example-MI	Pdgfra:5:75152292-75152574:1	TIP-example-MI	non-juncs
204 | Dlc1	8	-1	36902520	36902910	Dlc1:8:36902520-36902910:-1	Unique_TIP-example-MI	Dlc1:8:36902520-36902910:-1	TIP-example-MI	non-juncs
205 | Kdr	5	-1	75961761	75964707	Kdr:5:75961761-75964707:-1	Unique_TIP-example-MI	Kdr:5:75961761-75964707:-1	TIP-example-MI	non-juncs
206 | 


--------------------------------------------------------------------------------
/inst/extdata/TIP_vignette_gene_Seurat.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VCCRI/Sierra/52e68072aa2b974238c018cd0b2e71d881503b5b/inst/extdata/TIP_vignette_gene_Seurat.RData


--------------------------------------------------------------------------------
/inst/extdata/Vignette_example_TIP_mi.bai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VCCRI/Sierra/52e68072aa2b974238c018cd0b2e71d881503b5b/inst/extdata/Vignette_example_TIP_mi.bai


--------------------------------------------------------------------------------
/inst/extdata/Vignette_example_TIP_mi.bam:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VCCRI/Sierra/52e68072aa2b974238c018cd0b2e71d881503b5b/inst/extdata/Vignette_example_TIP_mi.bam


--------------------------------------------------------------------------------
/inst/extdata/Vignette_example_TIP_sham.bam:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VCCRI/Sierra/52e68072aa2b974238c018cd0b2e71d881503b5b/inst/extdata/Vignette_example_TIP_sham.bam


--------------------------------------------------------------------------------
/inst/extdata/Vignette_example_TIP_sham.bam.bai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VCCRI/Sierra/52e68072aa2b974238c018cd0b2e71d881503b5b/inst/extdata/Vignette_example_TIP_sham.bam.bai


--------------------------------------------------------------------------------
/inst/extdata/example_TIP_MI_whitelist_barcodes.tsv:
--------------------------------------------------------------------------------
   1 | AACTCCCCAGGTTTCA-1
   2 | TCTATTGAGTAACCCT-1
   3 | CTAGCCTGTTATGCGT-1
   4 | GTGCTTCAGATTACCC-1
   5 | TCTTTCCTCTACCTGC-1
   6 | GCTGCGAGTAAGGGAA-1
   7 | TCATTTGCATGGAATA-1
   8 | TATGCCCTCGCTTAGA-1
   9 | CTCGGAGCAGCTCCGA-1
  10 | ATTCTACAGTTCCACA-1
  11 | CCTTTCTCAATAACGA-1
  12 | GTTCGGGGTCCGAACC-1
  13 | GGCTCGACAAACAACA-1
  14 | CGCGGTATCTACTCAT-1
  15 | TTAGTTCCAATCGGTT-1
  16 | ATCTACTAGGCATGTG-1
  17 | ACTATCTAGGCTACGA-1
  18 | ATCCACCAGTCCAGGA-1
  19 | AGCCTAACAAAGTGCG-1
  20 | ACTGATGTCCAAGCCG-1
  21 | AGCGGTCAGAATTCCC-1
  22 | AGGTCATGTTTGTTGG-1
  23 | ACTGATGGTTACCAGT-1
  24 | TGAGGGAGTCGATTGT-1
  25 | CATATTCTCCGTTGCT-1
  26 | GGGACCTGTTGGTGGA-1
  27 | CTCTACGCATTCACTT-1
  28 | GCGGGTTAGTTTCCTT-1
  29 | ACGGGCTGTTACTGAC-1
  30 | AACACGTCATATACGC-1
  31 | GTGCGGTCAGTTAACC-1
  32 | TGACGGCCAAGCGATG-1
  33 | GGACGTCCACGTCAGC-1
  34 | TACAGTGAGCACCGTC-1
  35 | TCGGGACAGTAGATGT-1
  36 | GAACCTACATTGGGCC-1
  37 | ACCTTTACACATGGGA-1
  38 | GACTAACCATGGATGG-1
  39 | TCATTACTCATTGCGA-1
  40 | GGCCGATAGTTTGCGT-1
  41 | TAAGCGTGTCTTCAAG-1
  42 | CAGCAGCAGATGTCGG-1
  43 | TCGGTAATCGTCTGCT-1
  44 | CAGCTAAGTTTCGCTC-1
  45 | ACATCAGTCCAAACAC-1
  46 | GCGCAACTCTCGTTTA-1
  47 | AGCGTATTCATACGGT-1
  48 | GAACATCTCACATGCA-1
  49 | AAAGATGCAATGGAAT-1
  50 | TACTTGTAGACCTTTG-1
  51 | GCATACATCTTTACGT-1
  52 | GCTGCTTCAGCCACCA-1
  53 | ACTATCTTCCGCAAGC-1
  54 | CTAAGACGTAGCGATG-1
  55 | TTGCGTCAGTAGATGT-1
  56 | AGATCTGGTCTAACGT-1
  57 | GTACTCCCAGATCTGT-1
  58 | GAATGAAAGTGTACTC-1
  59 | ACGGAGATCCTCTAGC-1
  60 | GGCGACTGTGAGTGAC-1
  61 | TCGCGAGCATACGCCG-1
  62 | CGAGCACGTAAACGCG-1
  63 | CATCCACGTTTGTGTG-1
  64 | ACGATACCACACTGCG-1
  65 | CTTACCGGTAATAGCA-1
  66 | GGGATGATCCGTAGGC-1
  67 | CTAACTTCAGGGTACA-1
  68 | CAAGAAAGTCTGCAAT-1
  69 | TGACGGCCACGGTAGA-1
  70 | CCGTTCACAAGCTGGA-1
  71 | TGACTTTTCCCTGACT-1
  72 | CGTCTACCACATGGGA-1
  73 | TCAGGTACACGAAATA-1
  74 | GGGCATCGTAGGAGTC-1
  75 | CTACACCTCCCATTTA-1
  76 | TGATTTCTCTTACCTA-1
  77 | GCGCAACAGTCGTACT-1
  78 | TGTATTCCACAGCCCA-1
  79 | GAGGTGACAAGGTTTC-1
  80 | GCTCTGTAGTTAGCGG-1
  81 | TACCTTAGTAGATTAG-1
  82 | GGGCATCAGAGAACAG-1
  83 | ATTTCTGGTAGCTGCC-1
  84 | CGAGCACTCTTACCGC-1
  85 | CACTCCACAATCAGAA-1
  86 | ACTGAGTGTCCTGCTT-1
  87 | GGGATGATCAGTCCCT-1
  88 | CCTTCCCAGTGGGCTA-1
  89 | ACGCAGCCAGATTGCT-1
  90 | CTGCTGTCACCTATCC-1
  91 | CATATTCTCAGAGCTT-1
  92 | TGGCTGGGTCACACGC-1
  93 | ACGATACGTATAGGTA-1
  94 | TGGTTAGGTCCGAAGA-1
  95 | CGGACACAGCACCGCT-1
  96 | CGAACATAGCAATCTC-1
  97 | GAAATGAGTGGTACAG-1
  98 | ACGCCAGTCCCATTAT-1
  99 | ACGATGTCAATAACGA-1
 100 | TGCCCATTCTGCGTAA-1
 101 | CCTCAGTGTACATGTC-1
 102 | TAGCCGGTCTTGCATT-1
 103 | CAAGATCCAGTAAGAT-1
 104 | TTGGCAATCGGAAACG-1
 105 | ACTGTCCAGCTGTTCA-1
 106 | CTAGCCTCACGACTCG-1
 107 | CGCTTCAAGATCTGCT-1
 108 | GATGCTACACAACGTT-1
 109 | TTGGCAACACGCGAAA-1
 110 | GACTAACTCTTTCCTC-1
 111 | CCGTACTGTTCTGGTA-1
 112 | CTGATAGGTACCCAAT-1
 113 | TACGGATTCTCATTCA-1
 114 | GTCATTTGTAGGCATG-1
 115 | AGTCTTTTCGTAGATC-1
 116 | CACAAACAGATCCCAT-1
 117 | GTGAAGGTCTAACGGT-1
 118 | TATGCCCAGGGTCTCC-1
 119 | CAACCAACAATACGCT-1
 120 | CTCACACTCCAAGTAC-1
 121 | CATCGAATCCAAAGTC-1
 122 | CGTCAGGGTGCCTGGT-1
 123 | GCTCCTAAGTGAAGAG-1
 124 | AAAGTAGGTATCGCAT-1
 125 | CAGATCACACTATCTT-1
 126 | ACTGATGTCACTCTTA-1
 127 | GCATGCGCAATGAATG-1
 128 | CTCTAATAGACTAGGC-1
 129 | GGACAAGTCATTTGGG-1
 130 | CGGGTCAAGCTACCGC-1
 131 | TGAGAGGGTCCATCCT-1
 132 | GACGTTAGTTTAAGCC-1
 133 | TTATGCTGTCATTAGC-1
 134 | AAGGCAGCAACACCCG-1
 135 | TCGGGACAGGGCATGT-1
 136 | TTTGGTTTCAGCATGT-1
 137 | CCTAGCTCATCACCCT-1
 138 | GCTTCCACAGCGTAAG-1
 139 | GCCTCTACAAGTACCT-1
 140 | CGTTCTGCATATACGC-1
 141 | TCTGGAATCGTCTGCT-1
 142 | TGCGTGGCAATGGAGC-1
 143 | TCAGGTAGTCACCCAG-1
 144 | AGCTCCTCACGGATAG-1
 145 | ACCTTTACATAGGATA-1
 146 | GGCGTGTGTCAGAAGC-1
 147 | GAAACTCGTTAAAGTG-1
 148 | CATTATCAGAAGCCCA-1
 149 | TATCTCAAGCGCTCCA-1
 150 | TACGGATGTTTAGCTG-1
 151 | GTACTCCGTAAGTTCC-1
 152 | ACGAGCCGTAGCGTAG-1
 153 | CCTACCATCGGTTAAC-1
 154 | AGGGTGACAGACGCAA-1
 155 | TTGTAGGTCGATAGAA-1
 156 | TCGTAGACAGTGGGAT-1
 157 | CCTCAGTTCCCTCAGT-1
 158 | GTGAAGGAGTCTTGCA-1
 159 | CTTTGCGAGTCCAGGA-1
 160 | GACCAATCACGGTTTA-1
 161 | ATAGACCCATCATCCC-1
 162 | CGTCAGGGTTGCCTCT-1
 163 | TCAGGTAAGTGTCCCG-1
 164 | GATGAGGCAGGTCCAC-1
 165 | GTGCATATCTGACCTC-1
 166 | TCCCGATAGGGAACGG-1
 167 | CCCAGTTGTCACCCAG-1
 168 | GCAGCCAGTTCCAACA-1
 169 | GTACGTAAGACTAAGT-1
 170 | GGCCGATCATCACAAC-1
 171 | GACACGCAGATCTGCT-1
 172 | TTGGAACCAGGATCGA-1
 173 | CATGGCGCACCTGGTG-1
 174 | AAGGAGCCAATAAGCA-1
 175 | AGTGGGAAGAGGGATA-1
 176 | GCGCAACGTTCTCATT-1
 177 | CATTCGCAGCTGTCTA-1
 178 | GACAGAGGTTGACGTT-1
 179 | CATATGGTCTCATTCA-1
 180 | GCTCTGTCAAATTGCC-1
 181 | CTGATAGTCGCCGTGA-1
 182 | GTTAAGCTCAGTTGAC-1
 183 | GTACTTTAGTCTCAAC-1
 184 | CTCCTAGAGAAACGCC-1
 185 | CTCGTCATCAGTGCAT-1
 186 | CCACTACTCCGCATAA-1
 187 | CTACGTCGTAGCCTCG-1
 188 | TTGGAACAGGAACTGC-1
 189 | TTTGCGCAGCTGAACG-1
 190 | GGTGAAGGTCATGCAT-1
 191 | TAGGCATCAGACGTAG-1
 192 | AACCGCGTCGCCAAAT-1
 193 | CTACACCTCAAGGTAA-1
 194 | GATCAGTTCCTAGAAC-1
 195 | GTCCTCAAGGTACTCT-1
 196 | AACCGCGAGTTAGCGG-1
 197 | CCCATACTCAGGCAAG-1
 198 | CTCCTAGAGCCGATTT-1
 199 | CTGAAGTGTCTGATTG-1
 200 | GCGAGAAAGTGAATTG-1
 201 | CGGACGTTCGCAGGCT-1
 202 | CACAGGCAGCAGACTG-1
 203 | CCTTCGAAGGGAACGG-1
 204 | GAACATCTCTGCGACG-1
 205 | CCTTTCTGTCAGAATA-1
 206 | CCATTCGGTAGCTAAA-1
 207 | GTCTTCGCAGCGATCC-1
 208 | GCACTCTCATTACGAC-1
 209 | ATAGACCTCCTTTACA-1
 210 | GCTGCAGGTAATCACC-1
 211 | GTGTTAGGTTGTTTGG-1
 212 | GACACGCGTCCGAAGA-1
 213 | ATAAGAGAGACAAAGG-1
 214 | GTGTGCGGTCTTCGTC-1
 215 | GATCAGTTCACCGTAA-1
 216 | GACTGCGTCTGGTATG-1
 217 | CCGTTCATCAAGGTAA-1
 218 | AGGGATGCATGGTCAT-1
 219 | TTCGAAGAGGCAAAGA-1
 220 | CTGAAACAGACTCGGA-1
 221 | AACTCTTAGAGTAATC-1
 222 | TACTCATCAGTCCTTC-1
 223 | ACAGCCGGTCTTCTCG-1
 224 | ACACCGGTCCGCGGTA-1
 225 | AATCGGTGTGAAAGAG-1
 226 | AGAGCTTCAGGTTTCA-1
 227 | CTTCTCTAGGCGCTCT-1
 228 | CCCAATCGTAGAGGAA-1
 229 | CGATGTATCCGTCATC-1
 230 | GCATACATCAACCATG-1
 231 | CGCTTCAGTGTCTGAT-1
 232 | AACCATGTCTTTACGT-1
 233 | GATCTAGTCTGTCAAG-1
 234 | TCAGCTCTCTGTCAAG-1
 235 | AACTCTTCATAGACTC-1
 236 | CGTCCATGTTTAGGAA-1
 237 | GATCGTAAGCTAGGCA-1
 238 | CAAGAAATCCTACAGA-1
 239 | AACACGTTCACTCCTG-1
 240 | CGTCAGGCAAGCGAGT-1
 241 | GAAACTCGTTCAGTAC-1
 242 | GCTGCAGGTTGGTAAA-1
 243 | CGTTCTGTCATTGCGA-1
 244 | CATATTCCATATGCTG-1
 245 | TCGCGAGGTGGCGAAT-1
 246 | TAGGCATAGGTGCACA-1
 247 | TATTACCTCATGGTCA-1
 248 | CTAGTGAAGGACTGGT-1
 249 | AGGCCGTGTCTAGTCA-1
 250 | GCACATACACTGTGTA-1
 251 | GGGCACTTCAGCTGGC-1
 252 | AACTCTTGTTGATTGC-1
 253 | GTACTCCCATCCTTGC-1
 254 | AGCTCCTCATCTACGA-1
 255 | CGCGGTATCACGCGGT-1
 256 | TGAGCCGCACAGGTTT-1
 257 | GCCTCTACAGGAATCG-1
 258 | GATCAGTGTTCCACAA-1
 259 | GTTTCTAGTTCGCTAA-1
 260 | AACTCTTGTAGAAGGA-1
 261 | TGCCAAAGTGGTCCGT-1
 262 | GTTACAGCAATCACAC-1
 263 | GCACATAGTCCGTGAC-1
 264 | TGGTTCCCAAGCGATG-1
 265 | TGTGTTTTCGGGAGTA-1
 266 | GCGCAACCAGGGATTG-1
 267 | GATGCTACAGATAATG-1
 268 | ATTCTACTCCTAAGTG-1
 269 | TCGGGACTCCGAACGC-1
 270 | ATGCGATCAATGGACG-1
 271 | GGAAAGCAGCCACTAT-1
 272 | TACTTGTAGAGTAAGG-1
 273 | CGCGTTTAGGACGAAA-1
 274 | GCAGTTAAGCGATTCT-1
 275 | GGATTACAGAGCTTCT-1
 276 | ACTGTCCGTATATGGA-1
 277 | ACCGTAACAAGCCATT-1
 278 | TACACGATCGGATGTT-1
 279 | CATCAAGGTTTCCACC-1
 280 | CCACTACGTCTTCTCG-1
 281 | AAGGCAGGTCGGATCC-1
 282 | AGGCCACAGGACCACA-1
 283 | CCTATTAGTCAAGCGA-1
 284 | CCTTACGGTAGCTTGT-1
 285 | GTGCATAGTCTCATCC-1
 286 | ATAACGCAGGTGTTAA-1
 287 | CCGTGGAGTAGTGAAT-1
 288 | CACCTTGGTCGCTTCT-1
 289 | AAATGCCCATTTCAGG-1
 290 | AGAGCGACACATGGGA-1
 291 | CGCGGTATCTGTCCGT-1
 292 | ATTTCTGCAGTAAGAT-1
 293 | TCAACGAGTTCAGGCC-1
 294 | TTGGCAATCCAATGGT-1
 295 | ACGGGTCAGTAGGTGC-1
 296 | TGGTTCCGTTCCACAA-1
 297 | GCTGCGATCAAAGACA-1
 298 | ACGAGGAGTGAGTGAC-1
 299 | GTTCTCGTCGAACTGT-1
 300 | CGTAGGCTCATGTGGT-1
 301 | CACAGTAAGACCCACC-1
 302 | AAACGGGGTCTCTCTG-1
 303 | TGGACGCGTCTCATCC-1
 304 | AGGGATGCAATGGATA-1
 305 | CGTGTAAAGTAGATGT-1
 306 | TCCCGATCAATGACCT-1
 307 | GTAACGTGTATGAATG-1
 308 | CAGAGAGAGTCATCCA-1
 309 | ATCTACTTCCCTCAGT-1
 310 | TACCTATCACTCGACG-1
 311 | ACATACGAGCAACGGT-1
 312 | ACACCGGTCGGAAATA-1
 313 | TATGCCCAGCACCGTC-1
 314 | GCAAACTGTAGGAGTC-1
 315 | GCTCCTATCTGCCAGG-1
 316 | AGAGCTTAGTGAACAT-1
 317 | GCTCTGTAGGCAGTCA-1
 318 | CGAGAAGCAAGTTCTG-1
 319 | CCACGGACAGGGTACA-1
 320 | ATGCGATGTCCGAGTC-1
 321 | AAGGAGCGTTACCGAT-1
 322 | TCGCGAGTCGTCCAGG-1
 323 | ATCACGATCTCTTGAT-1
 324 | AGTGGGATCACGGTTA-1
 325 | CGTTAGAAGAGTGAGA-1
 326 | TGCTACCAGCAATCTC-1
 327 | AGTGTCAAGTAGATGT-1
 328 | TGAGCCGCAGACGCAA-1
 329 | CAGATCATCACATACG-1
 330 | TTAACTCTCACCGGGT-1
 331 | AGATTGCAGTTCGATC-1
 332 | GGTATTGTCCATGAAC-1
 333 | TCGTAGATCACAGGCC-1
 334 | CGTGAGCTCTCGCTTG-1
 335 | GTCTCGTAGACATAAC-1
 336 | CGGACTGCAACAACCT-1
 337 | GAAGCAGCACTAAGTC-1
 338 | GTATTCTTCCCTCTTT-1
 339 | TCTTTCCAGTATCTCG-1
 340 | GACAGAGGTACTTAGC-1
 341 | ACGTCAAAGCAGCGTA-1
 342 | GGCAATTCAGGAATCG-1
 343 | GATCGCGGTTGTGGCC-1
 344 | GGCAATTTCAAGGCTT-1
 345 | GGCTGGTTCATCACCC-1
 346 | TCAGCAACACTTAAGC-1
 347 | CGTAGCGTCACTTATC-1
 348 | GAACCTAGTGTTTGGT-1
 349 | TGGGAAGGTCGCTTTC-1
 350 | ACGAGGACATGATCCA-1
 351 | CGGTTAACACAAGCCC-1
 352 | CATTATCGTTACCAGT-1
 353 | AGCTCTCAGACGCACA-1
 354 | CAGAATCGTTCATGGT-1
 355 | AGTGTCAGTTCAGACT-1
 356 | CTAGCCTAGTGAATTG-1
 357 | ATCTGCCGTCTCGTTC-1
 358 | CATTATCGTCATGCCG-1
 359 | CTGAAGTAGAAGAAGC-1
 360 | TGTGTTTAGCTTTGGT-1
 361 | GTGAAGGTCTTGGGTA-1
 362 | TCTTTCCAGTTAGGTA-1
 363 | CTCGAGGCAGTCTTCC-1
 364 | AGGTCCGCAATCGGTT-1
 365 | ACGCCAGGTCGCTTTC-1
 366 | CGCTATCCAAGACACG-1
 367 | CTGATAGTCCGGCACA-1
 368 | TTCTCCTCACCCATTC-1
 369 | AACTCAGTCACCTCGT-1
 370 | CATCGAACACCTGGTG-1
 371 | AGGTCATCAAGGCTCC-1
 372 | AGTGTCACATCAGTAC-1
 373 | CCGTTCAGTAGGCATG-1
 374 | TATCTCACATTAACCG-1
 375 | ACCGTAAGTGAGTATA-1
 376 | GTGCGGTGTCAGTGGA-1
 377 | GGGCATCAGGACACCA-1
 378 | GTTCATTGTCGGCTCA-1
 379 | CAGCAGCAGATCTGAA-1
 380 | CCATTCGTCTACTATC-1
 381 | GTTAAGCCAAGCCGCT-1
 382 | ATCCGAAAGAATTCCC-1
 383 | AAGGTTCGTGCGAAAC-1
 384 | GGATGTTTCCGTCATC-1
 385 | GGGACCTGTTATGCGT-1
 386 | CCTTTCTTCTCTGTCG-1
 387 | TGGTTCCGTCCTCCAT-1
 388 | CATGACAAGAGTAATC-1
 389 | AGGGAGTGTTCAGCGC-1
 390 | ACTGAGTAGAGCTTCT-1
 391 | TGCGTGGAGATGTGGC-1
 392 | GGTATTGGTTGGTTTG-1
 393 | CAGCCGAGTCGGCACT-1
 394 | TAAGTGCGTGTGACGA-1
 395 | CGTGAGCGTAGAAGGA-1
 396 | GGGACCTAGGAGTACC-1
 397 | ATCGAGTCAGGAATGC-1
 398 | CCTTCCCCATTGTGCA-1
 399 | TTCTCCTCACATCTTT-1
 400 | TGGGAAGCAAGTCTAC-1
 401 | TTGAACGGTCAGAATA-1
 402 | TTTGGTTTCATGTCCC-1
 403 | GCGACCACAATGAAAC-1
 404 | TTCTCCTGTTGAGTTC-1
 405 | TGCGTGGTCGTCACGG-1
 406 | TTCCCAGGTTGCCTCT-1
 407 | GCTCTGTTCTGCCCTA-1
 408 | TGCTACCCAGAAGCAC-1
 409 | GGAGCAATCTCTTATG-1
 410 | GCATGATGTGGTTTCA-1
 411 | CTGAAGTGTCCCGACA-1
 412 | TACTCATTCTGGTTCC-1
 413 | GTCACAATCACATAGC-1
 414 | GTATTCTGTGCAACTT-1
 415 | ACTGAGTTCATGCTCC-1
 416 | TGTGGTACAAGCGATG-1
 417 | GTACTTTTCTTGAGAC-1
 418 | ATCGAGTTCTTGGGTA-1
 419 | ACATGGTGTTACCAGT-1
 420 | GATGCTACAGTGGGAT-1
 421 | CATCGAACACTTACGA-1
 422 | TAGTTGGCACCACGTG-1
 423 | TGCCCATTCTCTAAGG-1
 424 | AAACGGGGTTACCGAT-1
 425 | TCAATCTAGAGGTTGC-1
 426 | TGCCAAACAGAAGCAC-1
 427 | CTGAAGTAGCTCCCAG-1
 428 | TCTTTCCGTTACCAGT-1
 429 | CGGACTGCATGTAAGA-1
 430 | ACTGAACTCGCAAACT-1
 431 | GTGCATATCTTCCTTC-1
 432 | ACGGGCTTCTGAGTGT-1
 433 | TGTATTCGTCAGAAGC-1
 434 | GACCTGGCACCAGATT-1
 435 | CGTCTACCAAGTCATC-1
 436 | CGATTGAGTACCATCA-1
 437 | CGATTGATCATCATTC-1
 438 | ATCACGAAGCCTATGT-1
 439 | GCTCTGTTCAGCTCTC-1
 440 | TTAGGCATCTTGGGTA-1
 441 | TAAGTGCCAACTGCGC-1
 442 | CATTCGCAGATCACGG-1
 443 | GTAGTCATCTAACTCT-1
 444 | TGATTTCAGATTACCC-1
 445 | CATCGGGCATGACGGA-1
 446 | GGGAATGGTGTCGCTG-1
 447 | TCGTACCAGAATGTTG-1
 448 | TACTTGTCACGGTTTA-1
 449 | GACTACAGTGATGATA-1
 450 | GGGAGATCAGGTCTCG-1
 451 | CATATTCTCTTTAGTC-1
 452 | TCAGATGAGGCAGTCA-1
 453 | CTTAGGAAGGTGCACA-1
 454 | TAAGAGATCTGTTGAG-1
 455 | TGCCCTAGTCCGAGTC-1
 456 | CATCAGAAGGAATTAC-1
 457 | AGTAGTCGTGTTCTTT-1
 458 | CGCTATCAGTGAAGTT-1
 459 | TCCCGATAGACTAGAT-1
 460 | CAGCATAGTTCGCGAC-1
 461 | CGTAGCGGTAGCAAAT-1
 462 | GCATGCGTCCTACAGA-1
 463 | AGATTGCAGATGTCGG-1
 464 | ACATGGTAGTGAACGC-1
 465 | GGCCGATCATATGCTG-1
 466 | CCTATTAAGTCACGCC-1
 467 | CATCGGGGTAGGCTGA-1
 468 | GCGACCAGTGTTTGTG-1
 469 | TGCGGGTGTCATCCCT-1
 470 | GTATCTTTCAAACCAC-1
 471 | CAGCTAAGTCCAACTA-1
 472 | CCCAGTTGTGAAATCA-1
 473 | ATCGAGTAGGACAGAA-1
 474 | GATGAGGCAGATGGGT-1
 475 | CAGCTAAGTGAGTATA-1
 476 | GTCATTTCAGAGCCAA-1
 477 | CGTCCATGTTTCGCTC-1
 478 | CGGACTGGTTGCTCCT-1
 479 | TATGCCCTCTGTCAAG-1
 480 | CATCGGGTCGGCGCAT-1
 481 | AAGACCTGTTCACGGC-1
 482 | ACTGAACTCTCTTATG-1
 483 | CGGCTAGTCATGTCCC-1
 484 | CTTAACTAGATGCGAC-1
 485 | GTGTGCGTCAATAAGG-1
 486 | ATCCACCAGAGGGATA-1
 487 | TCAGCAAGTGTTCGAT-1
 488 | CTGAAGTCATCCCACT-1
 489 | GTGCGGTTCTCTTATG-1
 490 | TCCACACTCCCTTGTG-1
 491 | AAACCTGCACTACAGT-1
 492 | AAACGGGAGTACATGA-1
 493 | GCATACATCTCGCTTG-1
 494 | GCACATAAGTCCGTAT-1
 495 | ACGGGCTGTTATCGGT-1
 496 | AGACGTTTCATGGTCA-1
 497 | CCTTACGCACGGCGTT-1
 498 | TACTCGCCATCGATTG-1
 499 | CAGCATATCTGGTGTA-1
 500 | GTATCTTCAAAGGTGC-1
 501 | ACGGGCTGTTAAGTAG-1
 502 | AAGCCGCAGACACTAA-1
 503 | GGAGCAAGTCAGAAGC-1
 504 | GGAGCAATCCATGAGT-1
 505 | GCAATCAAGGAGTAGA-1
 506 | CAGTAACGTCTTCTCG-1
 507 | ACCCACTTCGCCCTTA-1
 508 | CATATTCGTAGCCTAT-1
 509 | CTAAGACAGGATGGTC-1
 510 | GCATACAGTGCGATAG-1
 511 | GGCGTGTTCTTACCGC-1
 512 | GCACATACAGTGACAG-1
 513 | AGTGGGACAAGTTGTC-1
 514 | CTTAACTAGTATCTCG-1
 515 | TTGACTTAGTATCGAA-1
 516 | CTGAAGTAGACTCGGA-1
 517 | GACAGAGGTAGCACGA-1
 518 | GAATGAAGTCATGCAT-1
 519 | TCCCGATTCGCCATAA-1
 520 | AACTGGTCATGCAACT-1
 521 | AACTCCCTCCGCATAA-1
 522 | CTGATAGAGGGTGTTG-1
 523 | CATGACACACGGCTAC-1
 524 | GGAACTTTCGTCACGG-1
 525 | TGCTGCTCATCGATTG-1
 526 | CTGTTTATCTATCCTA-1
 527 | ACAGCTATCGCGCCAA-1
 528 | AGCGGTCGTGTGGCTC-1
 529 | CATCGAACACGAGAGT-1
 530 | CACACCTGTCGTCTTC-1
 531 | CACACCTAGGTGGGTT-1
 532 | AGTAGTCCAACGCACC-1
 533 | TACGGATAGGAATTAC-1
 534 | AACTCCCCAGGCTGAA-1
 535 | GCAGTTATCAGTACGT-1
 536 | AACCGCGAGGTGCAAC-1
 537 | AATCGGTGTTCGCTAA-1
 538 | CTGCGGACATGGTCTA-1
 539 | ATGCGATAGAAACGAG-1
 540 | CGCCAAGTCGAGAACG-1
 541 | AAATGCCAGCGATAGC-1
 542 | ACACCGGTCCTGTACC-1
 543 | CCTATTATCCAACCAA-1
 544 | ACTTTCAGTCAGCTAT-1
 545 | GCAGTTACACAGGTTT-1
 546 | AGCTCTCTCTGGTTCC-1
 547 | AGCCTAAGTCAAGCGA-1
 548 | CCTTCGATCCTCAACC-1
 549 | AGAGTGGTCTCGTATT-1
 550 | GCAATCATCGCAAGCC-1
 551 | TAAGAGATCCGTACAA-1
 552 | CATCGGGGTCATCCCT-1
 553 | AGGGAGTCATCCCACT-1
 554 | AGACGTTAGACTCGGA-1
 555 | GTGCATAGTTAAGACA-1
 556 | TGTATTCTCGTTGCCT-1
 557 | GCTGGGTGTACCGGCT-1
 558 | TTTGTCAGTGAAATCA-1
 559 | TGAGAGGTCTTCATGT-1
 560 | GTGCGGTAGAATGTTG-1
 561 | TCTTCGGTCAGGCAAG-1
 562 | GTTCTCGTCGTTTAGG-1
 563 | CAACTAGGTCTAGTGT-1
 564 | CCTCTGATCGATGAGG-1
 565 | ACTGTCCCATGTAGTC-1
 566 | CAAGATCGTAGCGTCC-1
 567 | CATTATCTCGTTGACA-1
 568 | CAACTAGCATGCAACT-1
 569 | ATTTCTGGTACTTCTT-1
 570 | CCAATCCCACAACTGT-1
 571 | ACTGAGTAGTCGTTTG-1
 572 | CTACGTCCAAGGACTG-1
 573 | GGCGACTTCTGTCAAG-1
 574 | CTGCGGACAATGGAAT-1
 575 | TATTACCAGATCTGCT-1
 576 | TGGTTCCTCGGATGTT-1
 577 | AGCGTCGCACGAAGCA-1
 578 | CCGTACTAGTGGAGTC-1
 579 | TTCTACAAGTGGGTTG-1
 580 | ATAGACCGTGCGCTTG-1
 581 | ACGCCAGCAGTGAGTG-1
 582 | GTGCATACACTTAACG-1
 583 | CGGTTAACAAGAAGAG-1
 584 | GTTACAGAGCCCAATT-1
 585 | CTCTACGAGACCGGAT-1
 586 | ATTCTACAGTAAGTAC-1
 587 | CGCTATCGTAAGGGAA-1
 588 | ACCAGTAGTGAGCGAT-1
 589 | CGGGTCACAATCGAAA-1
 590 | TACGGATTCACGCATA-1
 591 | CAAGGCCCAGCTGGCT-1
 592 | GCGCAGTCAGTCACTA-1
 593 | ACTGAGTCAGTTCCCT-1
 594 | CGAGCCACAATCCGAT-1
 595 | TGTGTTTCATTAACCG-1
 596 | GCGCAACTCTTTACGT-1
 597 | TCATTACGTTTGTTTC-1
 598 | CTCGTACGTTGGTAAA-1
 599 | GTCATTTGTGCTGTAT-1
 600 | TAGACCAAGCAGATCG-1
 601 | CTCTACGAGACTAGGC-1
 602 | GTCGGGTGTGTAACGG-1
 603 | TGCCCTATCTCTTGAT-1
 604 | CGAGAAGCAATGTAAG-1
 605 | GACGTGCGTTCCACGG-1
 606 | ATCTGCCCATTGGCGC-1
 607 | CGGACACTCGAACGGA-1
 608 | AGTCTTTTCAAACCGT-1
 609 | AGGCCGTGTCATCCCT-1
 610 | CAGCCGAGTCGCTTTC-1
 611 | CCACTACAGTCACGCC-1
 612 | AGGGAGTCATTCTCAT-1
 613 | CAAGATCCACTGTCGG-1
 614 | ATAACGCGTTATCACG-1
 615 | GGCTGGTGTGACTCAT-1
 616 | CTGCCTACACCTTGTC-1
 617 | AGCGGTCCAGTTAACC-1
 618 | GAGGTGATCACCCTCA-1
 619 | GGCGACTCACCAGCAC-1
 620 | TCGAGGCCATGTTGAC-1
 621 | ACGCAGCCAGGGTACA-1
 622 | GATCAGTGTAGCTGCC-1
 623 | TGTCCCAGTCGCATAT-1
 624 | AAAGATGTCGTTTATC-1
 625 | TGTTCCGGTGGTACAG-1
 626 | CAACCAAGTCTAGAGG-1
 627 | CTAATGGGTTCGGGCT-1
 628 | CACACCTCAACACGCC-1
 629 | GATTCAGCATGGGAAC-1
 630 | GGACAAGGTACTTGAC-1
 631 | AACTGGTCAATGTTGC-1
 632 | CGGACTGCAGTATAAG-1
 633 | CGTTGGGAGGTCATCT-1
 634 | CAGTAACAGCCAACAG-1
 635 | GTCGGGTCATTGGTAC-1
 636 | GATGAAAAGGACCACA-1
 637 | GACCAATAGATAGGAG-1
 638 | GACTAACTCTACTATC-1
 639 | CGGACACGTCTAGCCG-1
 640 | AGATCTGAGATGCGAC-1
 641 | GATCGATAGACTACAA-1
 642 | GGCTGGTAGCATGGCA-1
 643 | GCTCTGTGTCCTAGCG-1
 644 | CCTACCACATTCTTAC-1
 645 | TCGTAGAGTTGGGACA-1
 646 | ATTTCTGGTTGTACAC-1
 647 | ACGTCAACAAAGTCAA-1
 648 | CTAGTGAAGAGTAAGG-1
 649 | GTTAAGCCAATCAGAA-1
 650 | AGACGTTTCATTCACT-1
 651 | AGCAGCCGTATCGCAT-1
 652 | TGTGTTTGTGTAATGA-1
 653 | GTAACTGAGAAACCGC-1
 654 | CGATGTACACGAAACG-1
 655 | GACGGCTAGAGTGACC-1
 656 | TGAGAGGGTTTCCACC-1
 657 | ACTTTCACATCACAAC-1
 658 | AGGTCCGGTTGCTCCT-1
 659 | CTGCCTACACAGCGTC-1
 660 | CGACCTTAGCCCAACC-1
 661 | TCGGGACCAGTCTTCC-1
 662 | TATCAGGTCTGGTATG-1
 663 | CGGCTAGCATAAGACA-1
 664 | ACCGTAATCGAGAGCA-1
 665 | CGCTTCATCCCAACGG-1
 666 | CTTCTCTTCGACAGCC-1
 667 | TTGCGTCTCACCACCT-1
 668 | AACTTTCGTGGGTATG-1
 669 | TCTTTCCCAACGCACC-1
 670 | TTAGGCATCGCATGAT-1
 671 | CTCACACTCTGAGTGT-1
 672 | CGCCAAGAGTTGTAGA-1
 673 | TAAACCGCATCACGAT-1
 674 | TACCTATAGAAGGTGA-1
 675 | TACAGTGTCAGGTAAA-1
 676 | GCAATCAAGATGCCTT-1
 677 | GGCGACTAGGTCGGAT-1
 678 | TGTTCCGGTCAAAGCG-1
 679 | ATCGAGTAGCAGCGTA-1
 680 | CTTGGCTAGTGTACGG-1
 681 | TCTCTAAGTTAAGACA-1
 682 | TAGTTGGTCAACGCTA-1
 683 | TGGCCAGCAAGTCATC-1
 684 | CAGCTAATCTTGCCGT-1
 685 | TACGGTAAGCGACGTA-1
 686 | CCCAATCCAAGAGGCT-1
 687 | TATTACCAGGAATTAC-1
 688 | TCAGCTCAGTCAAGGC-1
 689 | GGCTGGTTCGCGTAGC-1
 690 | CATCAAGAGTCCTCCT-1
 691 | ACATCAGCACAACTGT-1
 692 | ACCAGTAGTGTTCGAT-1
 693 | GCTGGGTTCATACGGT-1
 694 | CTTGGCTAGCTAGTCT-1
 695 | GAATAAGAGAGGTTAT-1
 696 | CCAGCGAGTGATGTGG-1
 697 | TACGGATGTCACTGGC-1
 698 | AGTGTCATCGATGAGG-1
 699 | CTACACCAGACAGAGA-1
 700 | GCGCCAAGTTCACGGC-1
 701 | CGATGTAGTAAGTGGC-1
 702 | TGCTACCCAGATGGGT-1
 703 | TACTTACCAGTATCTG-1
 704 | AGAGCTTAGAAGGCCT-1
 705 | ACTGAGTTCTGCTGCT-1
 706 | GTATCTTGTCTAGTCA-1
 707 | GTGTGCGTCGCGATCG-1
 708 | TATTACCGTATAGTAG-1
 709 | GCGACCAAGGCCCGTT-1
 710 | GAAATGAGTCTGCAAT-1
 711 | CTTAACTGTACAGACG-1
 712 | GACGGCTGTCCGTGAC-1
 713 | CCTCAGTCAAGCTGAG-1
 714 | CGCGTTTCATTAGCCA-1
 715 | CTAGAGTAGTGTGGCA-1
 716 | ATCCACCAGAGCTGGT-1
 717 | ACTGAACTCCAGTAGT-1
 718 | CGGACGTAGTGAATTG-1
 719 | CATTATCTCTTATCTG-1
 720 | TTTATGCGTTCCTCCA-1
 721 | TGTGTTTAGGGTTTCT-1
 722 | CTAAGACAGCTAACAA-1
 723 | GTGCATACAAGTTAAG-1
 724 | AGGGTGAAGTGTACCT-1
 725 | TTGTAGGGTCTAACGT-1
 726 | AAGACCTTCAGGTAAA-1
 727 | GCGACCAGTGGCGAAT-1
 728 | ACGGGTCAGTACGATA-1
 729 | TGGTTCCTCCACGCAG-1
 730 | AAACGGGGTAGCTCCG-1
 731 | TCTTTCCCACTCGACG-1
 732 | ATTATCCCAAGCCGTC-1
 733 | CGGGTCATCGTATCAG-1
 734 | TGGCCAGCAGACGCTC-1
 735 | GTCTTCGAGGGCTTCC-1
 736 | CGAGCCAGTTCAGACT-1
 737 | GTAGGCCAGTGCGTGA-1
 738 | ATGTGTGGTTCCGGCA-1
 739 | GTATTCTGTAACGACG-1
 740 | GCTGGGTAGCCAGTAG-1
 741 | GTATTCTAGTGGAGTC-1
 742 | CGAGCACCACCATCCT-1
 743 | ATTGGTGAGGTGATTA-1
 744 | GACGGCTTCTTGTCAT-1
 745 | CTCTGGTAGACTGGGT-1
 746 | GAAGCAGTCTGTCTCG-1
 747 | CAAGATCAGCTAGTCT-1
 748 | GACTACAAGAATTGTG-1
 749 | TGAGGGATCCATGCTC-1
 750 | ATGTGTGTCCGTACAA-1
 751 | TCGGTAAGTCTCTCGT-1
 752 | GAATAAGGTGCTTCTC-1
 753 | CCTTTCTGTATAGGGC-1
 754 | TGGGAAGGTCAGAGGT-1
 755 | TACTTACCAGCTGTGC-1
 756 | ACAGCTAGTGCAACGA-1
 757 | TTTCCTCTCGCCCTTA-1
 758 | CCTTTCTCAAAGCAAT-1
 759 | CTTAGGAGTCCCTACT-1
 760 | ACAGCTATCTCTAAGG-1
 761 | CACACTCTCCAGATCA-1
 762 | CTGTGCTAGGGTTTCT-1
 763 | TCTATTGTCCACGTTC-1
 764 | GCGCGATAGGCATGTG-1
 765 | AACCGCGTCAACTCTT-1
 766 | AAATGCCAGCCAACAG-1
 767 | TATTACCTCGTTGCCT-1
 768 | CTGGTCTGTACCATCA-1
 769 | TAGCCGGGTTGGTTTG-1
 770 | TGTCCCATCCTTCAAT-1
 771 | GTAACTGGTGGGTATG-1
 772 | GTCAAGTCAGGCTCAC-1
 773 | TAAACCGTCGCGCCAA-1
 774 | CGCGGTAGTACATGTC-1
 775 | CACTCCACACGTTGGC-1
 776 | AAGACCTTCGATAGAA-1
 777 | CATGCCTAGACAGGCT-1
 778 | CTCACACTCCTAGTGA-1
 779 | TGGCCAGAGACAATAC-1
 780 | CAGCTGGTCGCATGGC-1
 781 | CGCTGGACATCGACGC-1
 782 | CAGCCGAGTCGACTAT-1
 783 | GGCTCGACACGAAACG-1
 784 | AAAGCAACACGTCTCT-1
 785 | CGTAGGCTCTCGGACG-1
 786 | CGGCTAGTCACTCTTA-1
 787 | AGGGTGAAGATCGGGT-1
 788 | CATGGCGAGCCTATGT-1
 789 | GGTGCGTGTAATAGCA-1
 790 | ATTACTCCACCACGTG-1
 791 | ATTACTCTCGTCACGG-1
 792 | GTTAAGCTCAATCTCT-1
 793 | GCTCTGTTCAAACAAG-1
 794 | GAGCAGACATTGCGGC-1
 795 | CATCAGATCCCAAGAT-1
 796 | AACTCTTTCGGCCGAT-1
 797 | CATTCGCCAGTGGAGT-1
 798 | CATGGCGCATGCGCAC-1
 799 | AGGTCCGTCGGCGGTT-1
 800 | TATCTCACAGCTTAAC-1
 801 | TCAACGACACATGACT-1
 802 | TTCCCAGAGGACTGGT-1
 803 | GAGGTGAAGCGTTGCC-1
 804 | GTTTCTAAGAGTGACC-1
 805 | TTGCCGTAGAGAACAG-1
 806 | CTCGTACTCCGAATGT-1
 807 | CTCATTACATGTTGAC-1
 808 | TACTCGCCACTTCGAA-1
 809 | AAGTCTGAGATACACA-1
 810 | TAAGAGATCAAACAAG-1
 811 | TGCCAAAGTCGCGAAA-1
 812 | CATGGCGGTGCAGGTA-1
 813 | CGTTAGAAGTGTCTCA-1
 814 | TCACAAGCATCCCACT-1
 815 | CTAATGGCATTGGTAC-1
 816 | ACGCCAGTCTAACCGA-1
 817 | TCTTTCCCAGCGTCCA-1
 818 | CCACCTATCAAACCGT-1
 819 | TCAATCTTCAACACGT-1
 820 | CAGAGAGTCAGGATCT-1
 821 | GAGGTGAGTGCGAAAC-1
 822 | CGTAGCGTCAAACGGG-1
 823 | TCGCGAGAGCCACTAT-1
 824 | TCAATCTCATGTAAGA-1
 825 | CAGAGAGTCGCCGTGA-1
 826 | ACGCCGATCATGTCCC-1
 827 | GACTGCGAGGATGTAT-1
 828 | AACCATGAGTATTGGA-1
 829 | CGGACACCACCGGAAA-1
 830 | GGCCGATAGCCACCTG-1
 831 | GCTGCTTGTGAAATCA-1
 832 | TGAGAGGCAGGCTCAC-1
 833 | CCCATACTCTAACTGG-1
 834 | GTCGTAAGTGGACGAT-1
 835 | TTCTCCTGTAGTAGTA-1
 836 | TGACAACAGTACCGGA-1
 837 | ATTACTCGTTAAGATG-1
 838 | ACTGTCCGTGTTCTTT-1
 839 | ACTTTCAAGTGCGTGA-1
 840 | GACGTTAAGGTGATTA-1
 841 | CGGAGCTTCACGAAGG-1
 842 | TTCGAAGTCGTACGGC-1
 843 | GTCCTCAAGTACGTAA-1
 844 | CCGTTCACAGTCGTGC-1
 845 | CCTCTGACACGACTCG-1
 846 | GCACTCTAGAGGGATA-1
 847 | AACTCCCAGACGCAAC-1
 848 | GTACGTAAGACTCGGA-1
 849 | CACATAGAGTCGAGTG-1
 850 | CGGCTAGTCCTGCAGG-1
 851 | GGGAATGCATGTCCTC-1
 852 | TTTATGCTCCAAGTAC-1
 853 | GTTCATTCAAACTGTC-1
 854 | GGACATTCATCCTTGC-1
 855 | TGTTCCGAGCTAGTCT-1
 856 | GGCGACTAGCGTTTAC-1
 857 | TGACTAGCAGCATACT-1
 858 | GCACTCTCAGTAACGG-1
 859 | GTGTGCGCATTGGCGC-1
 860 | CCTCAGTCAGCTCGAC-1
 861 | TTAGGACTCCGTTGCT-1
 862 | AACTGGTCAGTGGGAT-1
 863 | CCGGGATCACGGCTAC-1
 864 | CGCTTCACATTGGTAC-1
 865 | TGGCTGGGTAGGGTAC-1
 866 | GCGCCAACACGGCGTT-1
 867 | GGATTACGTGTGCGTC-1
 868 | GGGTTGCTCGTGGACC-1
 869 | TGACAACCACCATCCT-1
 870 | AAACGGGAGGGATCTG-1
 871 | CATCAAGCACTTACGA-1
 872 | CCGGTAGAGGAGTTGC-1
 873 | ACATGGTAGGATGCGT-1
 874 | CATCCACTCACCACCT-1
 875 | CGGCTAGGTGAGGCTA-1
 876 | GGAATAAGTTCCACAA-1
 877 | TTATGCTGTGTTAAGA-1
 878 | TAGGCATAGTGACTCT-1
 879 | TTGACTTGTTGTCGCG-1
 880 | AATCGGTAGTCGCCGT-1
 881 | ATCCGAATCCGAACGC-1
 882 | TGGGCGTGTCTCGTTC-1
 883 | CTCCTAGGTTGAGGTG-1
 884 | CGTCTACAGAGTACAT-1
 885 | CGGGTCACACCTCGTT-1
 886 | GATCGCGGTACTTAGC-1
 887 | AGCTTGAGTTGAGGTG-1
 888 | CGCTTCAGTTTCGCTC-1
 889 | CTTAACTCAGTTAACC-1
 890 | TGCACCTTCCAAATGC-1
 891 | CGATTGACAGCCACCA-1
 892 | AGCTCCTCACTCTGTC-1
 893 | TTCTTAGTCCCGGATG-1
 894 | TGCGGGTTCAGGATCT-1
 895 | CAGCTAAAGTGGAGTC-1
 896 | TTCTACAGTGGCAAAC-1
 897 | GGAACTTTCACGGTTA-1
 898 | GCTGCGATCAACGGCC-1
 899 | GGAAAGCTCAACACGT-1
 900 | TGTGTTTTCTCTTGAT-1
 901 | AGCTTGATCCTGTAGA-1
 902 | ACGCAGCGTAGCCTAT-1
 903 | TATCTCAAGCAAATCA-1
 904 | TTGGCAAGTCCGAAGA-1
 905 | TCGAGGCAGAGTTGGC-1
 906 | GAACATCTCGTAGGAG-1
 907 | AGCTCCTTCAGGCCCA-1
 908 | TTCTACATCCAACCAA-1
 909 | CACTCCATCGGCGCTA-1
 910 | ACGGGTCCACAAGTAA-1
 911 | GGCGTGTCAGTATAAG-1
 912 | TGACGGCGTTGTACAC-1
 913 | CATCGGGCATGGTAGG-1
 914 | TCGGGACAGTCGTTTG-1
 915 | TTGCGTCCATGACGGA-1
 916 | TCGTACCTCACTCTTA-1
 917 | AGGCCGTAGAGGTACC-1
 918 | GTCACGGTCAGGCAAG-1
 919 | TGACTAGCATCACAAC-1
 920 | GCATGTACATACTCTT-1
 921 | CGAACATCAGCTCGCA-1
 922 | TGGTTCCCATTAACCG-1
 923 | CAGCATATCTGCAGTA-1
 924 | TGTGGTATCCTTAATC-1
 925 | TGTGGTAGTTCGTCTC-1
 926 | CGGGTCAGTACCGTTA-1
 927 | CGTCACTAGGATATAC-1
 928 | CAGCATATCGCAAGCC-1
 929 | AAAGCAAAGGTACTCT-1
 930 | TCCACACTCTGGTATG-1
 931 | CAGCATATCACAGGCC-1
 932 | TGTCCCAAGGACAGCT-1
 933 | TGGCTGGCACGAAACG-1
 934 | ACATCAGGTCTCGTTC-1
 935 | TGTGGTACATTCTTAC-1
 936 | CGGAGTCGTCTGCAAT-1
 937 | CACACTCTCGCATGAT-1
 938 | CAGTAACCAAGCCATT-1
 939 | CTGAAACGTATAAACG-1
 940 | CAACCAAAGGAATCGC-1
 941 | AGGCCGTGTTGATTGC-1
 942 | GAGGTGACAGACGCCT-1
 943 | TGGTTCCGTCAGTGGA-1
 944 | TACGGTATCACATGCA-1
 945 | ACGCCGATCCTCCTAG-1
 946 | CATATGGCAATGCCAT-1
 947 | TAGTGGTTCGGCGGTT-1
 948 | GGCTCGATCATCTGTT-1
 949 | GTACTTTAGATGTGGC-1
 950 | CACCAGGGTTACGCGC-1
 951 | GCGGGTTTCAACGGCC-1
 952 | ACACCGGGTCTTCAAG-1
 953 | GTACGTAGTGGAAAGA-1
 954 | ATCACGATCAACGCTA-1
 955 | CTGCTGTCAAGCTGGA-1
 956 | AACACGTCACCAGATT-1
 957 | CTGCGGACACAGACAG-1
 958 | CCTACCAAGTCTCGGC-1
 959 | CACTCCAGTATCTGCA-1
 960 | CCATTCGCATCGTCGG-1
 961 | ACCCACTGTGATGATA-1
 962 | GTCACAAGTGCACGAA-1
 963 | TACTTACTCAGGTTCA-1
 964 | ACTTACTAGGCATGGT-1
 965 | ACGATGTGTGTGAATA-1
 966 | TGACTAGGTACAGCAG-1
 967 | CTGAAGTCACACGCTG-1
 968 | CGGCTAGGTCGAACAG-1
 969 | CCACGGATCTGTTTGT-1
 970 | CGGACTGGTCATGCAT-1
 971 | CGTCAGGTCTAAGCCA-1
 972 | TTAGGACAGCTTCGCG-1
 973 | CCTATTAAGATCGGGT-1
 974 | AGCATACTCTCGAGTA-1
 975 | GTGAAGGAGGAATGGA-1
 976 | GGTATTGCACGTAAGG-1
 977 | AACACGTCATCACGAT-1
 978 | GGCCGATAGACGACGT-1
 979 | ATCTACTTCGAGAACG-1
 980 | CGGCTAGGTTGCGTTA-1
 981 | GTCTCGTGTGATAAGT-1
 982 | AGAGTGGTCGCTTAGA-1
 983 | TGCCAAACAGGTGCCT-1
 984 | TAGAGCTAGCTGAACG-1
 985 | CTTACCGAGCCAACAG-1
 986 | GGACAAGGTTTGGGCC-1
 987 | AGAGCGAGTACCCAAT-1
 988 | CTACACCCAGCTGTGC-1
 989 | TCATTACGTTGCGCAC-1
 990 | ATGTGTGAGACTAGGC-1
 991 | ATCATCTGTGATGTCT-1
 992 | CGACCTTAGTGTACTC-1
 993 | TGCACCTGTAATCGTC-1
 994 | CTAAGACGTGCGGTAA-1
 995 | CGAGCACAGCGATAGC-1
 996 | TGACTAGGTCTTCTCG-1
 997 | TCAGGTAAGCTGAAAT-1
 998 | TTGCGTCTCTGCTTGC-1
 999 | CAGAGAGCAAGAGGCT-1
1000 | ACATGGTCAATTCCTT-1
1001 | 


--------------------------------------------------------------------------------
/inst/extdata/example_TIP_sham_whitelist_barcodes.tsv:
--------------------------------------------------------------------------------
   1 | AGCGTATAGTGAACGC-1
   2 | GGATTACCATGGTCAT-1
   3 | CCCTCCTAGCGCCTCA-1
   4 | TACAGTGAGCGGCTTC-1
   5 | TAAGTGCTCAAGCCTA-1
   6 | AACTTTCGTCCATCCT-1
   7 | TAGTGGTAGACAAGCC-1
   8 | TCAGATGCACTCTGTC-1
   9 | AGTCTTTAGACGCACA-1
  10 | CATGCCTGTCTCCACT-1
  11 | TTGACTTTCACCACCT-1
  12 | TTGGAACGTAGATTAG-1
  13 | CGTTCTGCAAGTAGTA-1
  14 | GAGCAGACATAGAAAC-1
  15 | AGCTCCTTCTAAGCCA-1
  16 | GGTGAAGCAATAAGCA-1
  17 | TACCTTAGTCAAACTC-1
  18 | TTCGGTCTCAATCACG-1
  19 | CGGACGTGTACGAAAT-1
  20 | CTTGGCTCAGGCGATA-1
  21 | GGACATTGTGCAGACA-1
  22 | CGCTTCAAGTTGAGAT-1
  23 | GCTCTGTGTGTGACGA-1
  24 | TGACAACTCTTCTGGC-1
  25 | TGCCAAAGTATAGGTA-1
  26 | GTCACGGGTGCACGAA-1
  27 | TACACGACAGTCGTGC-1
  28 | TATGCCCTCGCGCCAA-1
  29 | TTTACTGCAGCTCCGA-1
  30 | TACTCGCAGAGTTGGC-1
  31 | AACCGCGTCATATCGG-1
  32 | TTAGGCAGTCACACGC-1
  33 | CGTTCTGCAAATTGCC-1
  34 | TAAACCGCACAGGCCT-1
  35 | GCTGCTTGTGGCCCTA-1
  36 | CATCGGGCAATAGAGT-1
  37 | GTAACGTCACGGACAA-1
  38 | CCACGGACAACGCACC-1
  39 | AGTAGTCTCTCAAGTG-1
  40 | TTAACTCAGTGGACGT-1
  41 | TGACTAGGTTCCAACA-1
  42 | TTTACTGAGAACAATC-1
  43 | CGATGGCCATTCCTCG-1
  44 | GCGGGTTTCCTGTACC-1
  45 | ATTCTACTCACTTCAT-1
  46 | ATCTGCCTCTCGTATT-1
  47 | AACTCCCAGGGTTTCT-1
  48 | GCTGGGTGTTATCCGA-1
  49 | GTCACAATCTACCTGC-1
  50 | CGAACATCAAGTCTAC-1
  51 | TGTTCCGGTAACGACG-1
  52 | AACACGTAGCCGGTAA-1
  53 | CGAGAAGGTAGCCTCG-1
  54 | CACACCTTCACTTATC-1
  55 | GAAATGATCAACGCTA-1
  56 | TGGACGCAGCTACCTA-1
  57 | GCTGCAGGTCGATTGT-1
  58 | ACTTACTAGAGCTATA-1
  59 | CATTATCGTCTGATTG-1
  60 | GCACTCTTCACTTCAT-1
  61 | TAGCCGGGTATAGGGC-1
  62 | TATGCCCGTCTTGATG-1
  63 | CTCGAAAAGGGATCTG-1
  64 | GGGAGATTCTCAAGTG-1
  65 | CGGCTAGGTGACGCCT-1
  66 | TCCCGATTCCTGTACC-1
  67 | GGAACTTCACGCTTTC-1
  68 | CCACCTATCGGCGGTT-1
  69 | TAAGCGTCAAGTCTGT-1
  70 | CGTAGCGGTTCCACAA-1
  71 | ACTGATGGTGGTCCGT-1
  72 | ATTACTCAGAGTAAGG-1
  73 | TCTGAGAAGCTATGCT-1
  74 | GAGCAGACATACGCCG-1
  75 | CTGATAGGTGATAAAC-1
  76 | CAGGTGCTCATTGCGA-1
  77 | ACACCGGCACGCTTTC-1
  78 | CTTAGGAGTACACCGC-1
  79 | CGCCAAGCAGTAAGCG-1
  80 | CGAACATAGACGACGT-1
  81 | GATTCAGTCATGCAAC-1
  82 | GCCAAATTCCTCCTAG-1
  83 | GGCGTGTGTGGTGTAG-1
  84 | AACTCAGTCTTGGGTA-1
  85 | GTATTCTAGACTACAA-1
  86 | AAACGGGCACTGTTAG-1
  87 | GTCTCGTGTGTTAAGA-1
  88 | CTGCTGTGTGTAATGA-1
  89 | ACACCAAAGCGACGTA-1
  90 | TGAGAGGTCTGCAAGT-1
  91 | GTCAAGTCATGAAGTA-1
  92 | CTGATCCAGCTAACTC-1
  93 | ATCGAGTAGTGGGTTG-1
  94 | TAGCCGGTCTTTACGT-1
  95 | ACTTGTTAGCAGCCTC-1
  96 | ATCTACTGTGTGACCC-1
  97 | TACTCATTCTTAGCCC-1
  98 | TATCTCAAGGTTACCT-1
  99 | GTTCATTCAGGGTATG-1
 100 | TTGAACGTCGAATGGG-1
 101 | CGATGGCTCCGTAGTA-1
 102 | GTAACTGAGGCATGGT-1
 103 | AAGCCGCCAACACCTA-1
 104 | CTGATCCAGCTCCCAG-1
 105 | GCTCTGTTCTCCGGTT-1
 106 | CATCGGGGTACAAGTA-1
 107 | GTAGTCATCGACCAGC-1
 108 | TCGCGAGTCTGCGGCA-1
 109 | GAGGTGAGTGCGATAG-1
 110 | ACGGCCATCTCGTTTA-1
 111 | AGATCTGCACCGTTGG-1
 112 | AAGGAGCCACATTAGC-1
 113 | CGAGAAGAGGCTAGCA-1
 114 | TCGGTAATCTGTACGA-1
 115 | GGACATTCATCGACGC-1
 116 | TGTATTCTCTCAAACG-1
 117 | CAACCTCTCCAATGGT-1
 118 | CTACATTGTGTGGTTT-1
 119 | GACAGAGCATTACCTT-1
 120 | TAGAGCTGTCAAAGCG-1
 121 | CATCGGGAGACTAGAT-1
 122 | CGCTATCGTTTAGGAA-1
 123 | CACATAGGTCTTCGTC-1
 124 | GACACGCTCTCGCTTG-1
 125 | ACATACGTCACCTTAT-1
 126 | ACTGAGTAGCGCTTAT-1
 127 | CGCTATCGTGGAAAGA-1
 128 | AATCGGTTCCGAATGT-1
 129 | TGAGAGGCAGTATCTG-1
 130 | TCGCGTTAGAACTGTA-1
 131 | GCCTCTATCACGGTTA-1
 132 | GAGGTGAGTACGCTGC-1
 133 | CTCGAGGCAAGCCCAC-1
 134 | TCAACGAGTGGTCTCG-1
 135 | TGAAAGACAATACGCT-1
 136 | CCTAGCTCATTTCACT-1
 137 | ACATCAGAGAGGGATA-1
 138 | ACTTTCAGTCTACCTC-1
 139 | CAGTAACCACGCATCG-1
 140 | GTTCATTGTAAGTGTA-1
 141 | ATTTCTGAGTGTACTC-1
 142 | GTCATTTCAATCTACG-1
 143 | GCCTCTAGTAATCACC-1
 144 | GCTGGGTAGTGTACTC-1
 145 | AAAGATGAGCGTGAGT-1
 146 | GGGTCTGCATCGGGTC-1
 147 | AAGACCTTCGCCAAAT-1
 148 | TCACGAAAGTACGTAA-1
 149 | CCTACCAAGATACACA-1
 150 | GGACAAGAGTCTCAAC-1
 151 | GACTAACAGGCAAAGA-1
 152 | TCACAAGAGTACCGGA-1
 153 | AGCCTAATCTTAGCCC-1
 154 | TGAGCCGAGCTGCCCA-1
 155 | CGTCTACGTACCCAAT-1
 156 | CAACTAGTCCCGGATG-1
 157 | TGCGCAGGTAACGCGA-1
 158 | TTGCCGTAGCTATGCT-1
 159 | GTGTGCGCAGGTCGTC-1
 160 | CGGAGTCAGATCCTGT-1
 161 | ACACTGACATGCTGGC-1
 162 | TGAGAGGAGCCACTAT-1
 163 | TCTGAGAGTGGTTTCA-1
 164 | AAAGTAGAGCCGGTAA-1
 165 | ACGGGCTGTTAAAGTG-1
 166 | GGACAGACATTAACCG-1
 167 | GGACATTTCAACCATG-1
 168 | CATTCGCGTCGGCACT-1
 169 | CCGGTAGAGTACGCGA-1
 170 | TTAGGCACACACTGCG-1
 171 | AAGGTTCTCAGCGATT-1
 172 | TAGTGGTCACCAGTTA-1
 173 | ACTGCTCCACGAAACG-1
 174 | CTGGTCTTCTGGTATG-1
 175 | ACACCAATCTGTACGA-1
 176 | CTACACCAGTAGGTGC-1
 177 | TACTCGCAGTAGGTGC-1
 178 | TGCACCTTCAGAGCTT-1
 179 | AGTTGGTAGAGCTGCA-1
 180 | CATGCCTGTGCAGGTA-1
 181 | ATTGGTGCACGTTGGC-1
 182 | CCACCTATCTTGTCAT-1
 183 | ATCGAGTCAGCCAATT-1
 184 | GCTGCAGTCAGTGCAT-1
 185 | GAAATGACAAGAGTCG-1
 186 | CCTTTCTTCGAATGCT-1
 187 | GCATACATCAGTCAGT-1
 188 | AAATGCCTCTTTCCTC-1
 189 | CTACGTCCACGAAGCA-1
 190 | GATGAGGAGCCCAGCT-1
 191 | ACTTACTCAGTCACTA-1
 192 | TAAGTGCAGTTCGCAT-1
 193 | GTTCGGGGTAAGGGCT-1
 194 | GTTCATTGTTCCACGG-1
 195 | CAACCAAAGCCAGAAC-1
 196 | GTAACTGCAGCTCGCA-1
 197 | TCAGGATCAGATAATG-1
 198 | GAGGTGATCAACACCA-1
 199 | CGAACATAGAACAACT-1
 200 | GGGATGAGTTGGTTTG-1
 201 | CATGGCGGTTGCGTTA-1
 202 | ACGTCAAAGGCATTGG-1
 203 | GGCGTGTCACGACTCG-1
 204 | TTAGGACTCTGTCCGT-1
 205 | TGCTGCTAGGAGCGTT-1
 206 | CATCAGATCCGTCAAA-1
 207 | TAAGCGTAGTGCTGCC-1
 208 | TTTGCGCGTCCAGTTA-1
 209 | AAGGTTCCAAGTCTAC-1
 210 | CCTTCGAAGTAGATGT-1
 211 | TCAGCTCAGGGAGTAA-1
 212 | GTTAAGCAGTACGTAA-1
 213 | CCGGTAGGTACGCTGC-1
 214 | CCTTCCCAGCAATCTC-1
 215 | TGTCCCACAAAGGTGC-1
 216 | TTTGGTTTCACCTTAT-1
 217 | GCGCAGTAGAGCTGCA-1
 218 | TGACTTTTCTTCCTTC-1
 219 | ATAGACCTCGTACCGG-1
 220 | CACAAACAGCTAACTC-1
 221 | TACTCGCCAACTGGCC-1
 222 | TGTGGTACAGATGGCA-1
 223 | CACAAACAGAGGTAGA-1
 224 | CTAATGGCAAAGCGGT-1
 225 | CGTAGGCAGGATGTAT-1
 226 | GTACGTACAGTTCCCT-1
 227 | GTAACTGTCCGCGGTA-1
 228 | GTTTCTACAAAGTCAA-1
 229 | TGTCCCAGTCCGACGT-1
 230 | ATCGAGTAGACTACAA-1
 231 | CGGGTCAAGCTAAGAT-1
 232 | CATCAAGTCCTAAGTG-1
 233 | GTTCGGGTCTAAGCCA-1
 234 | TGAGCCGTCTCTGTCG-1
 235 | TTTACTGAGGCTAGAC-1
 236 | TTGCGTCAGCGATCCC-1
 237 | CTACATTCAGGGTTAG-1
 238 | CATATTCTCAGCACAT-1
 239 | TAGTGGTCAAGGACTG-1
 240 | TAGGCATGTCTCCACT-1
 241 | CCACTACCATTCCTGC-1
 242 | GTATCTTAGGGAGTAA-1
 243 | TGGTTAGAGCGTTGCC-1
 244 | TCGTAGAAGCTAGTCT-1
 245 | AAGGTTCGTGAAATCA-1
 246 | CGTTAGAGTGGTCTCG-1
 247 | GCGCGATAGACCGGAT-1
 248 | TGTATTCGTGTGTGCC-1
 249 | GGACGTCTCATGTCTT-1
 250 | TCAGGTAGTTCACCTC-1
 251 | GCGGGTTTCTGCAGTA-1
 252 | CGCTATCGTTCGTCTC-1
 253 | CAGTCCTAGGGCTTGA-1
 254 | CTGGTCTAGCTCTCGG-1
 255 | TTGTAGGTCATCACCC-1
 256 | AGAGCGAGTCCAGTGC-1
 257 | CACCAGGGTTGACGTT-1
 258 | GGTGTTATCTGCGACG-1
 259 | AGATTGCTCTCTTATG-1
 260 | TATCAGGAGCTGCAAG-1
 261 | AAACCTGCAAGGACTG-1
 262 | GTATCTTGTCAAAGAT-1
 263 | TTAACTCGTTGTCTTT-1
 264 | GGTGTTACACACTGCG-1
 265 | CTTAACTCATCGATTG-1
 266 | GAGCAGATCGGAGGTA-1
 267 | TGACAACTCGGCTTGG-1
 268 | GATCGATCAAACCTAC-1
 269 | CTCTACGGTGATGTGG-1
 270 | ACTTTCATCAGTACGT-1
 271 | AGGGATGAGGTGACCA-1
 272 | TGCGTGGCAACCGCCA-1
 273 | ATGCGATCACGAAAGC-1
 274 | TGCCCTACAAAGAATC-1
 275 | CGAGCACGTGGTGTAG-1
 276 | ATTGGTGCATACTACG-1
 277 | CTAGAGTAGTCATGCT-1
 278 | CTACATTCAAAGGAAG-1
 279 | CACTCCATCAGATAAG-1
 280 | CTCCTAGGTCAACATC-1
 281 | GGCGACTGTCTGATTG-1
 282 | CCTCTGAGTCGCATCG-1
 283 | GGGTCTGAGACGCAAC-1
 284 | ACGCCAGCAGTGACAG-1
 285 | CTGCGGAAGTGGTAGC-1
 286 | GGTGCGTAGACCTTTG-1
 287 | CATGCCTCAATAGCAA-1
 288 | GAGTCCGCAGGGATTG-1
 289 | CTGATAGGTCGGCACT-1
 290 | TTTACTGGTCACTTCC-1
 291 | CTCTGGTAGCCGATTT-1
 292 | AAGCCGCGTCTCACCT-1
 293 | CGATCGGGTCGAATCT-1
 294 | GTTAAGCCAGCCTGTG-1
 295 | CCACTACTCTCACATT-1
 296 | TTTGCGCCAGCCAATT-1
 297 | CTCTAATTCGGCTTGG-1
 298 | CACAGTACAGGTCCAC-1
 299 | AGGGTGAAGAACAATC-1
 300 | AGTTGGTGTTGTTTGG-1
 301 | ACTGCTCTCACCTCGT-1
 302 | CCTACCAGTAGCACGA-1
 303 | TTCTCCTTCACTGGGC-1
 304 | TGGCGCATCTGGTGTA-1
 305 | GTTTCTAAGTGGTAGC-1
 306 | CTCGTACAGAATAGGG-1
 307 | CATATGGGTATAGGTA-1
 308 | TCTCATACAGTTCCCT-1
 309 | GATGAGGAGTGTACCT-1
 310 | CTAGAGTGTAGAGCTG-1
 311 | CTTAACTAGACTACAA-1
 312 | GTTCATTGTGTATGGG-1
 313 | AGTGTCATCACCCTCA-1
 314 | TGGCCAGTCAGCTCTC-1
 315 | CACCAGGTCACGCGGT-1
 316 | TACAGTGTCGTTGACA-1
 317 | CCATGTCGTAGAGGAA-1
 318 | CCTCTGATCTACTATC-1
 319 | CGTAGCGTCCATGAGT-1
 320 | GATGAAAGTTCGTCTC-1
 321 | GAAACTCCACGAGAGT-1
 322 | CTACGTCTCCTGCTTG-1
 323 | AGAGTGGGTACTTGAC-1
 324 | GCCAAATGTAGCTCCG-1
 325 | ACATCAGCAGCAGTTT-1
 326 | GCGCCAAAGTAGGTGC-1
 327 | CATCAAGCATCAGTCA-1
 328 | AGCATACAGTAGGTGC-1
 329 | CGGAGCTCACACCGAC-1
 330 | GGTATTGCATGGGAAC-1
 331 | CGTGTCTTCTACTCAT-1
 332 | TTGCGTCAGAGACTAT-1
 333 | GACTAACTCACTCCTG-1
 334 | GGCTGGTGTATGGTTC-1
 335 | CGCTTCAGTGTTTGGT-1
 336 | ATGAGGGGTAATCACC-1
 337 | ACGGAGAAGCTGAAAT-1
 338 | TAAGAGAGTCAGGACA-1
 339 | AAAGCAAGTGTGTGCC-1
 340 | ACATCAGGTCGACTAT-1
 341 | TACCTATGTTCCGGCA-1
 342 | AAGGAGCCATATGAGA-1
 343 | TCACGAAGTTTGTGTG-1
 344 | CATGACACAATGACCT-1
 345 | CTACCCATCTTGCCGT-1
 346 | ACTGCTCCACGAAATA-1
 347 | CGAGCCAGTTCAGCGC-1
 348 | GGACGTCCAATGTAAG-1
 349 | GGACAGAGTTCGAATC-1
 350 | TCATTACTCTCTAAGG-1
 351 | CATTATCGTCATATCG-1
 352 | GGGCATCTCTCAAACG-1
 353 | AACGTTGAGCTTTGGT-1
 354 | ATCCGAAAGTGTGGCA-1
 355 | CGGCTAGTCGAGAGCA-1
 356 | GAATAAGGTTACGTCA-1
 357 | TAGAGCTCACTTGGAT-1
 358 | AGAGCTTGTCGAGATG-1
 359 | GCAGCCAAGAAGCCCA-1
 360 | CGTTCTGGTATAGTAG-1
 361 | CACAAACTCTGCAAGT-1
 362 | CCCAGTTTCACATGCA-1
 363 | GGTGAAGAGCTACCTA-1
 364 | CGCTGGAGTTCGTCTC-1
 365 | ACATGGTCATGCCCGA-1
 366 | GCATACATCCAAACTG-1
 367 | GAGCAGACAAGCCCAC-1
 368 | TCGCGTTTCGGATGTT-1
 369 | CGCTGGATCTCTGTCG-1
 370 | CGAGCCAAGATGCCTT-1
 371 | GTTCGGGAGGCAAAGA-1
 372 | GGACAAGAGTCCATAC-1
 373 | CTCTGGTGTATCTGCA-1
 374 | CTGAAGTCACATCTTT-1
 375 | TTAGGCAAGATGTCGG-1
 376 | TACCTTAGTAGGCATG-1
 377 | AACGTTGTCCAGTATG-1
 378 | ACGATACGTATGGTTC-1
 379 | ATCCGAACAACACCCG-1
 380 | GACTAACTCGGCGGTT-1
 381 | TTAGGCAAGTCTCGGC-1
 382 | CCTTCCCTCGACAGCC-1
 383 | TACTCGCAGGATGTAT-1
 384 | TACTTACCATTAGCCA-1
 385 | CGCTATCGTTCACCTC-1
 386 | ATGCGATCAAGCGATG-1
 387 | CATTATCCAAGCCATT-1
 388 | TGAGGGAGTATTACCG-1
 389 | CTACGTCTCATCGCTC-1
 390 | GAGTCCGCACAGGCCT-1
 391 | TTCTCCTTCCGAATGT-1
 392 | CTGCGGAGTGGTCCGT-1
 393 | CCCAATCGTATCAGTC-1
 394 | CATATGGTCTACGAGT-1
 395 | CAGATCACAGCTGTAT-1
 396 | GGAAAGCAGGCGATAC-1
 397 | GCAGTTAGTTTGTGTG-1
 398 | TACGGTAGTCTAAAGA-1
 399 | TTGACTTCACTTAAGC-1
 400 | GGATTACCATCACGAT-1
 401 | TTGGCAAAGTCTCCTC-1
 402 | AGTAGTCCACGCTTTC-1
 403 | GAAACTCAGCCAACAG-1
 404 | CAACCTCCAGCATGAG-1
 405 | TGAGCCGTCGACAGCC-1
 406 | GTGAAGGTCAGTTCGA-1
 407 | GAGGTGAGTCTCACCT-1
 408 | GCAGCCAGTGACGCCT-1
 409 | CGGACTGTCAGTTGAC-1
 410 | GTTCATTTCTTTCCTC-1
 411 | ATTCTACAGAAGATTC-1
 412 | CTCTACGGTCGAACAG-1
 413 | GCGAGAATCATCTGTT-1
 414 | GTAACGTGTAGCTCCG-1
 415 | CGTGTAAAGCCCGAAA-1
 416 | CTGCCTAGTATATGGA-1
 417 | CAGCTGGTCTATCCCG-1
 418 | CACTCCATCCGTAGTA-1
 419 | GATTCAGAGCAGGCTA-1
 420 | CGAACATCATGGTCTA-1
 421 | ATAAGAGAGATGGCGT-1
 422 | TGTGTTTCACGAAAGC-1
 423 | TACGGGCGTCAAACTC-1
 424 | GGCTGGTCAGCCTATA-1
 425 | GCTGGGTAGACTCGGA-1
 426 | TCACAAGTCGGTGTTA-1
 427 | ACACTGACATGTCGAT-1
 428 | GACGCGTTCACTTATC-1
 429 | GGACGTCTCAGTCCCT-1
 430 | TATTACCAGGCTAGCA-1
 431 | GCTGGGTTCTTATCTG-1
 432 | CGAATGTAGTCACGCC-1
 433 | TTGCCGTCAATGGAGC-1
 434 | AAGGAGCTCTTTAGTC-1
 435 | CTAAGACAGACGCACA-1
 436 | CGCCAAGCAATCGAAA-1
 437 | CTTAACTGTACCGTAT-1
 438 | GGAATAATCTTATCTG-1
 439 | GATCGATCATCTGGTA-1
 440 | GTGCGGTTCAGTTTGG-1
 441 | GCGCGATGTCTAGCGC-1
 442 | TACCTTAGTATGCTTG-1
 443 | AAGACCTAGAGGTACC-1
 444 | ACATACGTCTGGAGCC-1
 445 | TCATTTGGTTCTCATT-1
 446 | TGTCCCAAGTGGGCTA-1
 447 | CCCTCCTGTTACCGAT-1
 448 | CACACAACAACACCTA-1
 449 | TCAACGATCAGTCAGT-1
 450 | GGTGCGTAGTAGCCGA-1
 451 | TTAGTTCCATTAGGCT-1
 452 | CACATAGTCATAACCG-1
 453 | GCTCCTAAGAGACTTA-1
 454 | GTGAAGGCAGGGCATA-1
 455 | TATCTCATCTGGTATG-1
 456 | TTCGAAGGTCTACCTC-1
 457 | AGACGTTTCACCGTAA-1
 458 | TCAGATGTCCGTTGTC-1
 459 | TTTGTCATCATCTGTT-1
 460 | CACATAGCATTATCTC-1
 461 | CATGGCGGTCTAGCCG-1
 462 | TAGGCATTCAGGCGAA-1
 463 | TGCACCTAGCTAACTC-1
 464 | ACACCAATCCACGAAT-1
 465 | ATAACGCGTAAACCTC-1
 466 | TCACAAGGTACCGAGA-1
 467 | GAGTCCGTCTGTTGAG-1
 468 | TTCTACACAAAGCAAT-1
 469 | GTCACGGCAGCTATTG-1
 470 | GAGCAGAAGCCAGTAG-1
 471 | TAAGCGTGTCGAAAGC-1
 472 | ACGAGGAAGGATCGCA-1
 473 | TGGTTAGTCCGCAAGC-1
 474 | TGACAACGTGCATCTA-1
 475 | CCTCAGTCAATCTACG-1
 476 | AGTAGTCGTCGCATAT-1
 477 | GTACTCCCACAAGACG-1
 478 | TCAGATGTCGTTACGA-1
 479 | CGATGTAGTGGTGTAG-1
 480 | CCAGCGATCATTCACT-1
 481 | CAGAATCTCAGCAACT-1
 482 | ACTGAGTGTCAGAGGT-1
 483 | GCATGTAAGATGCCAG-1
 484 | GGCTCGACAGTAACGG-1
 485 | AAGGAGCCAGTCAGCC-1
 486 | GTGAAGGAGAATCTCC-1
 487 | ATTGGTGCATCGGTTA-1
 488 | TGCTACCAGTGAACAT-1
 489 | AACTGGTGTTGCGTTA-1
 490 | CGACCTTAGGGCTTGA-1
 491 | AAAGATGGTCCGTGAC-1
 492 | TTGAACGTCGGAGCAA-1
 493 | TGTGTTTAGTCTCAAC-1
 494 | GACCAATAGCCAGGAT-1
 495 | GTACGTAGTTTCCACC-1
 496 | CGACCTTCATTAGCCA-1
 497 | CACATTTAGAGTACAT-1
 498 | AGGTCCGTCCAAGCCG-1
 499 | ACAGCTAAGCCTCGTG-1
 500 | CAACTAGCATGGTCAT-1
 501 | CCTTCCCTCGTTACAG-1
 502 | CGCTATCTCACCAGGC-1
 503 | TGGACGCAGCGATCCC-1
 504 | CTAGTGACATCACAAC-1
 505 | AAAGATGTCTACGAGT-1
 506 | TCAGATGAGTTCGCGC-1
 507 | ACACCGGGTACCGTAT-1
 508 | GTGTTAGTCACAAACC-1
 509 | CCTCAGTCATCGTCGG-1
 510 | ACGTCAATCCTTGGTC-1
 511 | TTAGGACTCACAAACC-1
 512 | GTGCAGCGTTAAAGTG-1
 513 | GGCCGATGTTAAAGTG-1
 514 | TAGACCACATTGTGCA-1
 515 | CTCTACGCAGGGTTAG-1
 516 | TAAACCGAGATCTGCT-1
 517 | CAGAATCAGCCACCTG-1
 518 | CGACTTCCAAGGGTCA-1
 519 | AGAGTGGCATCCCATC-1
 520 | GACTGCGGTGATGCCC-1
 521 | GGTGAAGTCACCTTAT-1
 522 | GCTGCGAGTACATCCA-1
 523 | ACGCCAGCACCCAGTG-1
 524 | ATTTCTGCACCGAAAG-1
 525 | GTCACAAGTGTGGCTC-1
 526 | AGCGTCGTCAGAGCTT-1
 527 | GTTTCTAGTTGGTTTG-1
 528 | GGGAATGGTATTACCG-1
 529 | CCGGTAGTCAACGGCC-1
 530 | ACTATCTGTACCGTAT-1
 531 | AATCGGTAGATATGGT-1
 532 | GATCGTACACGTAAGG-1
 533 | ACACCGGCACTGAAGG-1
 534 | ATTATCCCATATGAGA-1
 535 | ATGGGAGAGGGCACTA-1
 536 | ACGGCCATCACGAAGG-1
 537 | CTTTGCGGTCGGCACT-1
 538 | CTAACTTCATCGGACC-1
 539 | GTCTTCGAGTTAGGTA-1
 540 | CGTGTAACACCGGAAA-1
 541 | AGATTGCCATCTACGA-1
 542 | GGACAAGGTGCCTGTG-1
 543 | GCTCTGTGTATCAGTC-1
 544 | GTCATTTAGTAGTGCG-1
 545 | AAGACCTAGATCTGAA-1
 546 | AGCATACGTATGAAAC-1
 547 | TCAGGATGTTGTCTTT-1
 548 | CTAGAGTAGTAGGTGC-1
 549 | GTCTTCGCAGCTCGAC-1
 550 | GTTCGGGTCCACGAAT-1
 551 | TGGCCAGAGTATCTCG-1
 552 | ACCCACTTCTTCAACT-1
 553 | GATGCTACAGACTCGC-1
 554 | TATTACCAGCCCAACC-1
 555 | CAAGTTGAGGTAAACT-1
 556 | GAAATGACAGGCAGTA-1
 557 | CTAGCCTGTACCCAAT-1
 558 | GCTGCGACATCCTAGA-1
 559 | AAGCCGCCAGGGTTAG-1
 560 | TGTTCCGCAGACGTAG-1
 561 | TGGACGCCAGCTGGCT-1
 562 | ACGGGTCAGTCCGGTC-1
 563 | CGTAGGCCAAGGGTCA-1
 564 | CAGATCAAGGGTGTTG-1
 565 | GAGCAGAGTGTGAAAT-1
 566 | AGATCTGAGTTGAGAT-1
 567 | GCGAGAACAATACGCT-1
 568 | AAAGATGGTACTTCTT-1
 569 | CAGCAGCCAAGCCGCT-1
 570 | GCAAACTCACCACCAG-1
 571 | TCAATCTCAAAGAATC-1
 572 | TACGGGCGTACGCACC-1
 573 | TCCACACGTCAAAGCG-1
 574 | TGTGTTTGTCTCCCTA-1
 575 | TACACGATCCTTGCCA-1
 576 | TGTCCCACATCCGCGA-1
 577 | ATTACTCCACTCAGGC-1
 578 | CCGGTAGCAAGTACCT-1
 579 | CTCAGAACATACTCTT-1
 580 | ACGATACGTGCGAAAC-1
 581 | TTTGCGCAGCCACGCT-1
 582 | ACACCGGGTGATAAAC-1
 583 | CAACTAGTCGGAAATA-1
 584 | CATATTCGTCTTCAAG-1
 585 | ACGGGTCTCGGTCTAA-1
 586 | GATCAGTAGCAGACTG-1
 587 | GGACAAGAGCTCAACT-1
 588 | TAGGCATAGCGTTCCG-1
 589 | CATGCCTTCCTGCAGG-1
 590 | CGGCTAGGTCTGATTG-1
 591 | TTGCGTCAGAGACTTA-1
 592 | CTTACCGAGGCTCTTA-1
 593 | TATCAGGAGCGTGAGT-1
 594 | TGGCGCATCTGATTCT-1
 595 | AGGGAGTTCTTGCAAG-1
 596 | CTTGGCTTCCAGATCA-1
 597 | TGCTACCAGGAGCGTT-1
 598 | ATTGGACGTGCGATAG-1
 599 | ACACTGAAGTTGTCGT-1
 600 | CACACTCCAGTCGTGC-1
 601 | AGCTTGAGTTAGATGA-1
 602 | GTAGTCACATGTCTCC-1
 603 | AAGGAGCGTGAGGCTA-1
 604 | GAGGTGAGTCAATACC-1
 605 | ACATACGGTTGATTCG-1
 606 | ATGGGAGAGTCAAGGC-1
 607 | GACTAACCATCTCGCT-1
 608 | TTCTCAAAGCGAGAAA-1
 609 | TGACTAGTCTGCGTAA-1
 610 | GTATCTTGTATAATGG-1
 611 | CATGACATCGGATGTT-1
 612 | CGTCTACGTTTCCACC-1
 613 | CGCTGGACATAGACTC-1
 614 | CTGAAGTCACTCGACG-1
 615 | GTCACGGCACCCATGG-1
 616 | GCATGCGCAAGCCTAT-1
 617 | TTGTAGGTCCTAGGGC-1
 618 | GGTGCGTTCCCTAATT-1
 619 | TGAGAGGGTTCGCGAC-1
 620 | CCGTGGATCAGTGTTG-1
 621 | ACCCACTTCGTGGGAA-1
 622 | AAGGCAGCAGGTCCAC-1
 623 | GGGCATCCACTAGTAC-1
 624 | CAAGGCCCAAACCCAT-1
 625 | GAGGTGACAATTCCTT-1
 626 | AGCTCCTTCCTTGACC-1
 627 | GATTCAGGTCCAGTGC-1
 628 | TCTGAGAGTCCGAATT-1
 629 | CACCTTGGTGTGGTTT-1
 630 | GACCTGGAGCACACAG-1
 631 | TCTTTCCCAGGTCCAC-1
 632 | TATCTCATCAACTCTT-1
 633 | ACGGAGATCACTTACT-1
 634 | CAGCTGGGTGCTCTTC-1
 635 | CTGCTGTCACCAGATT-1
 636 | CGTGTCTCACCAGGCT-1
 637 | GTACGTATCAACACCA-1
 638 | AACTCTTAGCTGAAAT-1
 639 | GATCTAGGTGCACTTA-1
 640 | TTCCCAGGTAAACGCG-1
 641 | GTCCTCACACAACGCC-1
 642 | TAGAGCTAGAGTGAGA-1
 643 | GCGACCACAAAGCAAT-1
 644 | TCTGGAACACAAGCCC-1
 645 | TCAGCAACAACCGCCA-1
 646 | TGCCCTAGTTGCGTTA-1
 647 | GGCGACTTCAGCGATT-1
 648 | CTCATTATCTCTAAGG-1
 649 | TGAGCCGAGTGAACAT-1
 650 | CGATGTATCATGTCTT-1
 651 | TTGTAGGAGTCCTCCT-1
 652 | TACTCATCACTGCCAG-1
 653 | CCGTGGAGTCACAAGG-1
 654 | CTACATTAGACAAGCC-1
 655 | ACGATGTAGGAGCGTT-1
 656 | AAAGATGTCCAAGCCG-1
 657 | TGGTTCCAGGCAAAGA-1
 658 | GCGCGATCACAACGTT-1
 659 | TCGCGAGAGAAACCGC-1
 660 | GATCGATCACATTAGC-1
 661 | CTACCCATCTTCAACT-1
 662 | TACTCGCTCTGGTATG-1
 663 | GCAAACTGTAGCACGA-1
 664 | TCGTAGAGTACCCAAT-1
 665 | GGCTCGAGTAAAGTCA-1
 666 | GGATGTTGTAGGCATG-1
 667 | CTCAGAACATTACCTT-1
 668 | CCTTTCTAGACTGGGT-1
 669 | ATGAGGGGTCATACTG-1
 670 | GTTCATTGTCGACTAT-1
 671 | GACGTTAGTCTCCATC-1
 672 | GGACATTTCGGCATCG-1
 673 | CGCGTTTAGTGTACGG-1
 674 | GGAGCAATCTGAGGGA-1
 675 | GGAATAACAGTCAGCC-1
 676 | GGTGCGTGTGGGTCAA-1
 677 | TTTGGTTGTCGTTGTA-1
 678 | ACACTGAGTGAACCTT-1
 679 | CTAACTTCAATCTACG-1
 680 | TCATTACAGATAGTCA-1
 681 | ACCTTTAGTTGGTAAA-1
 682 | ACCGTAAAGAAGAAGC-1
 683 | GCGACCATCCCTAACC-1
 684 | AAACGGGTCCTCAATT-1
 685 | TCGCGAGTCGATAGAA-1
 686 | GTAGTCAAGACCTTTG-1
 687 | TGCACCTGTCTTCGTC-1
 688 | CGGTTAACAGCATGAG-1
 689 | ACATACGTCATGCAAC-1
 690 | CGAGCCAGTCCGAGTC-1
 691 | ACTGTCCCATATGGTC-1
 692 | CGCTATCGTACAGTTC-1
 693 | GTAACTGCATCTCCCA-1
 694 | CTAATGGTCGGAATCT-1
 695 | TCATTACAGTGGTAAT-1
 696 | GTGTTAGGTCGCTTTC-1
 697 | CGGACTGAGAGCCCAA-1
 698 | CCTAGCTAGAGGGATA-1
 699 | CTTAACTTCGGAAACG-1
 700 | CGCGTTTGTCAGAATA-1
 701 | TACAGTGAGCCAGTAG-1
 702 | AGAGCTTGTCTAAAGA-1
 703 | CATATGGTCGTCTGCT-1
 704 | TGCCCATAGCATCATC-1
 705 | TGTGGTACACCAGGTC-1
 706 | CATATTCTCAACGAAA-1
 707 | TGGTTCCTCCCAACGG-1
 708 | GTAGTCACATTGCGGC-1
 709 | TAGACCACATGAAGTA-1
 710 | AACTGGTTCGTGGTCG-1
 711 | ACACCGGTCCCGACTT-1
 712 | ACGATACGTCTAGAGG-1
 713 | CTAGCCTCAGGCGATA-1
 714 | AAATGCCCAATCGGTT-1
 715 | AGAGTGGGTTCCCTTG-1
 716 | GTTTCTAAGCTGTTCA-1
 717 | CTGCTGTGTTCGGCAC-1
 718 | CATGGCGCAAGAGTCG-1
 719 | TGTATTCAGCCGATTT-1
 720 | AGCGGTCGTCTGCCAG-1
 721 | CAACCAATCTTAGAGC-1
 722 | GTTCATTCACCAGATT-1
 723 | GACCTGGTCTTACCGC-1
 724 | AACCATGCAAGGTGTG-1
 725 | CTACGTCCAGCTGCTG-1
 726 | CAGCAGCCATGTCGAT-1
 727 | CATTATCAGAGCTATA-1
 728 | GGTGTTAGTGCTTCTC-1
 729 | GCTCCTACATGTAGTC-1
 730 | GACGTTAAGTGGTAAT-1
 731 | ATTGGTGAGGATGTAT-1
 732 | CAGTCCTAGCACCGCT-1
 733 | CTCAGAACATAGGATA-1
 734 | CGTCACTAGCTTCGCG-1
 735 | CCTTCCCTCCCAAGTA-1
 736 | GACGGCTAGGGCTTGA-1
 737 | ACCTTTACACATGACT-1
 738 | TGTTCCGAGATCGGGT-1
 739 | AACCATGCACAACGCC-1
 740 | TGACAACTCTTAACCT-1
 741 | CACACTCTCCCTGACT-1
 742 | CTCCTAGCACCGTTGG-1
 743 | GATGCTACACGACTCG-1
 744 | CTGCGGACAGCCAATT-1
 745 | TGAAAGAAGACTACAA-1
 746 | GATGAAACAGCCTATA-1
 747 | TGTTCCGAGTTTCCTT-1
 748 | CAACTAGTCACGGTTA-1
 749 | GTGCGGTCATAGACTC-1
 750 | CATTATCTCGGACAAG-1
 751 | TGAGCATGTAAGTAGT-1
 752 | CACACCTGTTGGTAAA-1
 753 | TCAATCTTCACAAACC-1
 754 | ATGAGGGTCCCATTAT-1
 755 | GGTATTGGTCTGCGGT-1
 756 | CCTTACGTCTCAAGTG-1
 757 | CACAGTATCGAATCCA-1
 758 | CTGATAGAGGAGTTTA-1
 759 | CAGCAGCTCCTTTCGG-1
 760 | GGGATGACACGGTGTC-1
 761 | AACTTTCGTCACAAGG-1
 762 | GGCGACTAGCTACCTA-1
 763 | AAACGGGGTTGCGTTA-1
 764 | ATAACGCTCGGTTAAC-1
 765 | TTCTCAATCACGATGT-1
 766 | ATCCGAATCACCAGGC-1
 767 | GCGCGATTCTTTAGGG-1
 768 | TGGGAAGGTTTGACAC-1
 769 | TTGACTTTCAGGTAAA-1
 770 | ACGGCCAGTTCCTCCA-1
 771 | GGAGCAATCGCCATAA-1
 772 | GTCGTAAAGCACAGGT-1
 773 | GGTGAAGGTTTAGGAA-1
 774 | GCATGCGCATGTCGAT-1
 775 | CGGACTGCACCATGTA-1
 776 | GGAAAGCCAGGCGATA-1
 777 | CCTATTAGTCTCCATC-1
 778 | GGGTCTGGTCGCGTGT-1
 779 | CAGATCAGTGCACCAC-1
 780 | CTTACCGTCAACCATG-1
 781 | CTACCCACAGGGTTAG-1
 782 | AACGTTGAGACTGGGT-1
 783 | CCTATTAAGAGACTTA-1
 784 | CAACCTCAGAAACCTA-1
 785 | GATTCAGAGAACAATC-1
 786 | CCTTTCTTCAACACGT-1
 787 | CACAGTATCAAGGTAA-1
 788 | CTTAACTTCTGTTTGT-1
 789 | CACATAGCATAGAAAC-1
 790 | AGGTCCGAGAAACCGC-1
 791 | ACACCGGAGTTCCACA-1
 792 | TGGGAAGAGGTTCCTA-1
 793 | GCCTCTACACTAGTAC-1
 794 | CACAGGCCAAGCCGCT-1
 795 | GGATTACCAGACACTT-1
 796 | CTACACCTCGTGGTCG-1
 797 | CACAGTACACCTCGTT-1
 798 | GGATTACGTACAGTTC-1
 799 | GTGTTAGTCTACCAGA-1
 800 | ATTACTCCAATTCCTT-1
 801 | TTTGCGCGTCCGCTGA-1
 802 | AGATCTGCACGTGAGA-1
 803 | CCTAAAGCATACCATG-1
 804 | CCCAGTTCATCGATTG-1
 805 | TCGTACCGTACCCAAT-1
 806 | TCTGGAATCCATGAGT-1
 807 | CTCGTCATCCTTGCCA-1
 808 | GACCAATAGCCGTCGT-1
 809 | GCTTCCAGTGCCTTGG-1
 810 | TCGTACCCAATAGCGG-1
 811 | GAATGAATCATCTGTT-1
 812 | AGTGGGACATCGGACC-1
 813 | CAGCAGCGTTGAACTC-1
 814 | GGACGTCCATGCCCGA-1
 815 | AGCTCCTGTCAGAGGT-1
 816 | GAAACTCCATGTCTCC-1
 817 | CTCGGGATCTATGTGG-1
 818 | ATCCGAAGTCCGAATT-1
 819 | TACGGGCCAAGCCTAT-1
 820 | GCCAAATGTGCCTGCA-1
 821 | AGCTCTCGTATATCCG-1
 822 | AGGTCCGGTTGCGCAC-1
 823 | TTAGTTCGTACAGTTC-1
 824 | TTGACTTCAGTATCTG-1
 825 | ACGGAGAAGTTCCACA-1
 826 | TTAGGCAGTTCCACTC-1
 827 | CACAGGCGTTCCAACA-1
 828 | TTGCCGTCATAGTAAG-1
 829 | AAGTCTGAGTCGTACT-1
 830 | ACGAGGAGTGTGAATA-1
 831 | GTCATTTCAAGCTGGA-1
 832 | CTCTACGCAAGGTTCT-1
 833 | GATCGATTCGATAGAA-1
 834 | CCGTACTTCAAACCAC-1
 835 | ACTGAACAGGTTCCTA-1
 836 | GTTCATTCAAGTACCT-1
 837 | GTGCGGTGTAAATGAC-1
 838 | CCCAGTTGTGTGACCC-1
 839 | AGAGCTTAGTGCCATT-1
 840 | GTTCATTCAGTCGATT-1
 841 | TCAGGTAAGTACGCCC-1
 842 | TTAGTTCTCGAGGTAG-1
 843 | CATATGGCACACCGAC-1
 844 | TCTCTAACACTGAAGG-1
 845 | GCTCTGTGTAGCCTAT-1
 846 | CTCGAGGCATAGAAAC-1
 847 | TACTTACAGGCGCTCT-1
 848 | GGTGTTAAGGACTGGT-1
 849 | CTAACTTCAATAGAGT-1
 850 | AATCCAGTCTCGAGTA-1
 851 | ACTGTCCGTTAAGTAG-1
 852 | TCGTACCTCATACGGT-1
 853 | AGGGATGCAGACAGGT-1
 854 | CACTCCACACGTGAGA-1
 855 | TATCAGGTCTCACATT-1
 856 | TAGACCACAATGTAAG-1
 857 | AGTTGGTTCGTCCGTT-1
 858 | CTCGAAAAGCGGATCA-1
 859 | ATTGGTGTCGGACAAG-1
 860 | CTGAAACCACCGATAT-1
 861 | AAGACCTTCGTTGACA-1
 862 | GGGACCTCAGCGAACA-1
 863 | CACAAACGTAGTACCT-1
 864 | ATCTGCCTCGATGAGG-1
 865 | AGGGTGATCAACACCA-1
 866 | GTCATTTGTCCTCTTG-1
 867 | ATCCGAATCGAGCCCA-1
 868 | ACAGCTATCCGTAGGC-1
 869 | GGGAGATAGCTAGCCC-1
 870 | AGGTCATAGTGTTTGC-1
 871 | GGATTACTCATTTGGG-1
 872 | GCACTCTCAGGTTTCA-1
 873 | AAAGTAGCACGCCAGT-1
 874 | CCTATTATCTCATTCA-1
 875 | TTAGGACTCAGAAATG-1
 876 | ATCCACCGTTTGACTG-1
 877 | ATTGGACTCGAGAACG-1
 878 | GGGTTGCTCAGAAATG-1
 879 | GAATAAGTCCAATGGT-1
 880 | CTGTGCTAGGTGATAT-1
 881 | TCGGGACAGAACTCGG-1
 882 | AACGTTGTCAACGCTA-1
 883 | TTCTCCTAGGTAGCCA-1
 884 | ATTGGTGGTCTCGTTC-1
 885 | GGAATAATCACATGCA-1
 886 | CGAGAAGTCGATGAGG-1
 887 | CCACGGAGTGAACCTT-1
 888 | GAACCTATCTACCAGA-1
 889 | CTGTTTACATGCGCAC-1
 890 | GCCTCTAGTTACGTCA-1
 891 | GAAATGATCCTGCAGG-1
 892 | CGTGTAATCAATCACG-1
 893 | AAGGAGCCATACGCCG-1
 894 | TGCGCAGTCGCACTCT-1
 895 | ACAGCTACACGGCTAC-1
 896 | CCTAGCTGTCAGAAGC-1
 897 | TGAGCATTCATGCATG-1
 898 | TCTGGAACAAGGTTTC-1
 899 | CGGACGTAGGTCATCT-1
 900 | CTTACCGTCCACTCCA-1
 901 | TACACGACAGTATAAG-1
 902 | GTCAAGTAGCTAACAA-1
 903 | GTGCATATCTGTCAAG-1
 904 | AGCAGCCGTTATGCGT-1
 905 | ACTTGTTGTCTAAACC-1
 906 | CACATTTGTCTAGTGT-1
 907 | TGTATTCTCCGCGGTA-1
 908 | CCACCTACATCGGAAG-1
 909 | TCTTCGGTCCGCGTTT-1
 910 | ACCGTAATCATGTAGC-1
 911 | TGGACGCCACGAGAGT-1
 912 | TACCTTATCAAACAAG-1
 913 | AGCGTCGCATCTCCCA-1
 914 | AAGGCAGGTAAGAGGA-1
 915 | TGACTAGCATGAGCGA-1
 916 | AAGGCAGGTTGGTGGA-1
 917 | CCACGGATCGCCTGAG-1
 918 | TGGTTCCTCCCATTTA-1
 919 | ACAGCTACATCGGAAG-1
 920 | CTACCCACAGGCTCAC-1
 921 | GTCACAAGTCAATACC-1
 922 | CATCGGGAGATATGGT-1
 923 | GCGCCAAAGACTAGAT-1
 924 | CACAAACTCAGTTAGC-1
 925 | TACCTTAGTCTAGCCG-1
 926 | AATCCAGGTCTCATCC-1
 927 | CACAAACCAGGTGCCT-1
 928 | ATAAGAGAGCGTAGTG-1
 929 | TGCCAAATCCTTGGTC-1
 930 | GAATGAAAGTTCGCAT-1
 931 | CAGGTGCTCCATGCTC-1
 932 | GTGCGGTAGGTAGCTG-1
 933 | CGGGTCAGTATGGTTC-1
 934 | AAACCTGTCCAAAGTC-1
 935 | CATCAAGGTAAAGTCA-1
 936 | CGTAGGCGTTGGACCC-1
 937 | TCGGTAAAGCCGTCGT-1
 938 | GGATGTTCACCCTATC-1
 939 | CGTTAGAAGCACCGCT-1
 940 | CTTGGCTTCAATACCG-1
 941 | GCGAGAAAGAGACTAT-1
 942 | GCTTCCAGTGAGGGTT-1
 943 | GATGCTAGTCCAGTAT-1
 944 | TAGTGGTGTACCAGTT-1
 945 | CTTAACTAGGGAAACA-1
 946 | CACATAGTCTGCTGTC-1
 947 | GTTCTCGGTTCCGGCA-1
 948 | AAACCTGTCACTCTTA-1
 949 | CCACTACTCAACACTG-1
 950 | AGGGATGTCTGTGCAA-1
 951 | TATGCCCCAGAGCCAA-1
 952 | GGATTACAGGAGCGTT-1
 953 | CGTTGGGTCCAGATCA-1
 954 | GACGGCTGTCCGTGAC-1
 955 | ATCATCTTCTCAACTT-1
 956 | GACACGCGTACATCCA-1
 957 | TTCGGTCGTCCCTTGT-1
 958 | TTGCGTCTCCACGAAT-1
 959 | ACTGCTCGTTTAGGAA-1
 960 | TAGAGCTGTAAGTAGT-1
 961 | GCTTCCATCGGCTACG-1
 962 | AAAGCAAGTTAGGGTG-1
 963 | AACTTTCAGATGCCTT-1
 964 | TTCTCCTCATGGTTGT-1
 965 | CTAGTGAAGCCGGTAA-1
 966 | CATCAAGGTACCGAGA-1
 967 | ACAGCTAGTAGAGGAA-1
 968 | CTTACCGTCAAAGACA-1
 969 | ACGCCAGGTGTAACGG-1
 970 | CATTATCCAACTGCTA-1
 971 | TTCTCAATCGTGGTCG-1
 972 | TACACGAAGGGTCTCC-1
 973 | AGTGTCAGTACTCAAC-1
 974 | CGGACGTTCGCCAGCA-1
 975 | ACTTACTTCCAGTATG-1
 976 | GATCGTAAGATCCCAT-1
 977 | CACCAGGGTCTACCTC-1
 978 | AGTAGTCTCCTTTCTC-1
 979 | CAACCTCAGACACTAA-1
 980 | GTACTTTTCATTGCCC-1
 981 | GGGAGATAGATCCCGC-1
 982 | CACCTTGAGGCTAGAC-1
 983 | CATGGCGGTGATGCCC-1
 984 | CGTGAGCCATGCGCAC-1
 985 | TTCTCAAAGTACGCGA-1
 986 | AAGCCGCAGACCTAGG-1
 987 | ACGTCAAAGCATCATC-1
 988 | TGACAACTCTGGAGCC-1
 989 | GACACGCAGCCTCGTG-1
 990 | GCGCAACGTATTCTCT-1
 991 | TTAACTCGTGTTCTTT-1
 992 | CTACGTCAGTACATGA-1
 993 | GCCTCTAAGCAGCCTC-1
 994 | TACTCATAGTCGAGTG-1
 995 | ACGGGCTTCGGCGCTA-1
 996 | TCCACACTCATCTGTT-1
 997 | GTCATTTGTCCGAACC-1
 998 | CCTACACCATTAGCCA-1
 999 | GGAACTTGTACGCTGC-1
1000 | TTCTACACACTGTGTA-1
1001 | 


--------------------------------------------------------------------------------
/man/AggregatePeakCounts.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/count_polyA.R
 3 | \name{AggregatePeakCounts}
 4 | \alias{AggregatePeakCounts}
 5 | \title{Aggregate multiple peak count outputs together}
 6 | \usage{
 7 | AggregatePeakCounts(
 8 |   peak.sites.file,
 9 |   count.dirs,
10 |   output.dir,
11 |   update.labels = TRUE,
12 |   exp.sep = "-",
13 |   exp.labels = NULL
14 | )
15 | }
16 | \arguments{
17 | \item{peak.sites.file}{a file containing peak site coordinates}
18 | 
19 | \item{count.dirs}{a list of output directories from CountPeaks}
20 | 
21 | \item{output.dir}{output directory for aggregate count matrix}
22 | 
23 | \item{update.labels}{whether to append an experiment label to the cell barcode (default: TRUE)}
24 | 
25 | \item{exp.sep}{a character separating the cell barcode from experiment label (default: '-')}
26 | 
27 | \item{exp.labels}{optional labels to append to cell barcodes corresponding to count.dirs}
28 | }
29 | \value{
30 | NULL. Writes counts to file.
31 | }
32 | \description{
33 | Aggregate the output from multiple runs of CountPeaks together. By default, this function will
34 | update the cell barcodes in a format consistent with the CellRanger aggr program. That is, for
35 | n experiments to aggregate, cell barcodes will be appended with '-1', '-2',...,'-n'. For downstream 
36 | analysis, if using the PeakSeuratFromTransfer function, it is important to ensure that these match what is
37 | in the gene count matrix. The name of the expected separator character can be updated with the 'exp.sep'
38 | parameter and preferred labels can be specified with the 'exp.labels' parameter. The barcode updates can 
39 | be turned off by setting update.labels = FALSE if manual setting is preferred.
40 | }
41 | \examples{
42 | 
43 | library(Sierra)
44 | extdata_path <- system.file("extdata",package = "Sierra")
45 | reference.file <- paste0(extdata_path,"/Vignette_cellranger_genes_subset.gtf")
46 | junctions.file <- paste0(extdata_path,"/Vignette_example_TIP_sham_junctions.bed")
47 | bamfile <- c(paste0(extdata_path,"/Vignette_example_TIP_sham.bam"),
48 |             paste0(extdata_path,"/Vignette_example_TIP_mi.bam") )
49 | whitelist.bc.file <- c(paste0(extdata_path,"/example_TIP_sham_whitelist_barcodes.tsv"),
50 |                       paste0(extdata_path,"/example_TIP_MI_whitelist_barcodes.tsv"))
51 | 
52 | ### Peak calling
53 | peak.output.file <- c("Vignette_example_TIP_sham_peaks.txt",
54 |                      "Vignette_example_TIP_MI_peaks.txt")
55 | FindPeaks(output.file = peak.output.file[1],   # output filename
56 |          gtf.file = reference.file,           # gene model as a GTF file
57 | bamfile = bamfile[1],                # BAM alignment filename.
58 |          junctions.file = junctions.file,     # BED filename of splice junctions exising in BAM file.
59 |          ncores = 1)                          # number of cores to use
60 | 
61 | 
62 | FindPeaks(output.file = peak.output.file[2],   # output filename
63 |          gtf.file = reference.file,           # gene model as a GTF file
64 |          bamfile = bamfile[2],                # BAM alignment filename.
65 |          junctions.file = junctions.file,     # BED filename of splice junctions exising in BAM file.
66 |          ncores = 1)
67 | 
68 | #### Peak merging
69 | peak.dataset.table = data.frame(Peak_file = peak.output.file,
70 |                                Identifier = c("TIP-example-Sham", "TIP-example-MI"),
71 |                                stringsAsFactors = FALSE)
72 | 
73 | peak.merge.output.file = "TIP_merged_peaks.txt"
74 | MergePeakCoordinates(peak.dataset.table, output.file = peak.merge.output.file, ncores = 1)
75 | 
76 | count.dirs <- c("example_TIP_sham_counts", "example_TIP_MI_counts")
77 | #sham data set
78 | CountPeaks(peak.sites.file = peak.merge.output.file,  gtf.file = reference.file,
79 |           bamfile = bamfile[1], whitelist.file = whitelist.bc.file[1],
80 |           output.dir = count.dirs[1],  countUMI = TRUE, ncores = 1)
81 |  # MI data set
82 |  CountPeaks(peak.sites.file = peak.merge.output.file,  gtf.file = reference.file,
83 |             bamfile = bamfile[2], whitelist.file = whitelist.bc.file[2],
84 |             output.dir = count.dirs[2],  countUMI = TRUE, ncores = 1)
85 |             
86 |  out.dir <- "example_TIP_aggregate"
87 |  AggregatePeakCounts(peak.sites.file = peak.merge.output.file, count.dirs = count.dirs,
88 |                    exp.labels = c("Sham", "MI"), output.dir = out.dir)      
89 |  
90 | }
91 | 


--------------------------------------------------------------------------------
/man/AnnotatePeaksFromGTF.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Annotate.R
 3 | \name{AnnotatePeaksFromGTF}
 4 | \alias{AnnotatePeaksFromGTF}
 5 | \title{Annotates a set of peak coordinates from a GTF}
 6 | \usage{
 7 | AnnotatePeaksFromGTF(
 8 |   peak.sites.file,
 9 |   gtf.file,
10 |   output.file,
11 |   genome = NULL,
12 |   invert_strand = FALSE,
13 |   annotationType = "any",
14 |   transcriptDetails = TRUE,
15 |   annotation_correction = TRUE,
16 |   pA_motif_max_position = 50,
17 |   AAA_motif_min_position = 10,
18 |   polystretch_length = 13,
19 |   max_mismatch = 1,
20 |   append.chr.peaks = TRUE,
21 |   check.chr = TRUE
22 | )
23 | }
24 | \arguments{
25 | \item{peak.sites.file}{a file of peak coordinates.}
26 | 
27 | \item{gtf.file}{GTF reference file.}
28 | 
29 | \item{output.file}{file to write the annotations to.}
30 | 
31 | \item{genome}{genome object. If NOT NULL then will perform pA motif analysis.}
32 | 
33 | \item{invert_strand}{Boolean to signifiy if strand of gr peaks should be inversed}
34 | 
35 | \item{annotationType}{can be assigned "any" or "within". Default is "any" which states that the peak with gr must overlap annotation feature (eg exon)}
36 | 
37 | \item{transcriptDetails}{Boolean. If false will only return gene name. If true will return internal transcript position feature (eg exon/intron)}
38 | 
39 | \item{annotation_correction}{Boolean. When multiple overlapping genes are identified will prioritise gene based on annotation. 3'UTR annotation trumps all other annotation.}
40 | 
41 | \item{pA_motif_max_position}{Any AAUAAA after this position are not considered (default 50nt)}
42 | 
43 | \item{AAA_motif_min_position}{Any polyA/polyT stretches before this postion are not considered (default 10)}
44 | 
45 | \item{polystretch_length}{: the length of A or T to search for (default 13)}
46 | 
47 | \item{max_mismatch}{number of allowed mismatches for motif matching (default 1)}
48 | 
49 | \item{append.chr.peaks}{: When TRUE (default) appends the character "chr" on chromosome entry in peaks file.}
50 | 
51 | \item{check.chr}{if TRUE (default) and append.chr.peaks is also TRUE, check whether "chr" characters have already been added.}
52 | }
53 | \value{
54 | NULL. writes output to file
55 | }
56 | \description{
57 | Annotate a set of peak coordinates according to genomic features the coordinates fall on -
58 | 3'UTR, exon, intron and 5'UTR, and annotate proximity to motifs. Motifs include the
59 | canonical polyA motif, A-rich regions and T-rich regions.
60 | }
61 | \examples{
62 | 
63 | extdata_path <- system.file("extdata",package = "Sierra")
64 | peak.merge.output.file <- paste0(extdata_path, "/TIP_merged_peaks.txt")
65 | reference.file <- paste0(extdata_path,"/Vignette_cellranger_genes_subset.gtf")
66 | 
67 | 
68 |  genome <- BSgenome.Mmusculus.UCSC.mm10::BSgenome.Mmusculus.UCSC.mm10
69 | 
70 | 
71 |  AnnotatePeaksFromGTF(peak.sites.file = peak.merge.output.file, 
72 |                     gtf.file = reference.file, 
73 |                     output.file = "TIP_merged_peak_annotations.txt", 
74 |                     genome = genome)
75 | 
76 | 
77 | }
78 | 


--------------------------------------------------------------------------------
/man/BaseComposition.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Annotate.R
 3 | \name{BaseComposition}
 4 | \alias{BaseComposition}
 5 | \title{Identify polyA motif and/or polyA stretches from provided genomic coordinates}
 6 | \usage{
 7 | BaseComposition(
 8 |   genome = NULL,
 9 |   chrom = NULL,
10 |   start = NULL,
11 |   stop = NULL,
12 |   strand = NULL,
13 |   coord = NULL,
14 |   offset = -50,
15 |   length = 250,
16 |   mismatch = 1,
17 |   AT_length = 13
18 | )
19 | }
20 | \arguments{
21 | \item{genome}{genome object of organism.}
22 | 
23 | \item{chrom}{chromosome}
24 | 
25 | \item{start}{Upstream start position}
26 | 
27 | \item{stop}{downstream end position}
28 | 
29 | \item{strand}{'+' or '-'. This will define the applied direction of offset}
30 | 
31 | \item{coord}{coordinates}
32 | 
33 | \item{offset}{The}
34 | 
35 | \item{length}{How many nucleotides of DNA sequence to return}
36 | 
37 | \item{mismatch}{: The max number of mismatches allowed in poly A/T stretch (default 1)}
38 | 
39 | \item{AT_length}{: length of A/T to search for within input sequence (default 13)}
40 | }
41 | \value{
42 | a dataframe with appended columns containing annotation
43 | 
44 | chrom <- 'chr16'
45 | start <- 49896378
46 | stop  <- 49911102
47 | strand <- '+'
48 | genome <- BSgenome.Mmusculus.UCSC.mm10::BSgenome.Mmusculus.UCSC.mm10
49 | 
50 | # Tmem126a intronic peak that coincides with a poly(A) rich region.
51 | chrom  <- 'chr7'
52 | start  <- 90451180
53 | stop   <- 90451380
54 | strand <- '-'
55 | output <-BaseComposition(genome=genome, chrom=chrom, start=start, stop=stop, strand=strand)
56 | 
57 | # Dync1h1 intronic peak that coincides with a long poly(T) rich region
58 | coord <- "chr12:110609400-110609800:1"
59 | output <-BaseComposition(genome=genome, coord=coord)
60 | 
61 | TO DO:
62 | * If peak falls at end of exon then need to obtain sequence from next exon. This
63 | would require passing exon junction information.
64 | }
65 | \description{
66 | chrom <- chr8
67 | }
68 | \details{
69 | start <- 70331172
70 | 
71 |       stop <- 70331574
72 | 
73 |       strand <- "+"
74 | 
75 | 
76 | You need to run the following code:
77 |      genome <-  GenomicFeatures::makeTxDbFromGFF(gtf_file, format="gtf")
78 | 
79 | annotationType can be c("any", "start", "end", "within", "equal"),
80 | }
81 | 


--------------------------------------------------------------------------------
/man/CountPeaks.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/count_polyA.R
 3 | \name{CountPeaks}
 4 | \alias{CountPeaks}
 5 | \title{Generate a peak x cell UMI count matrix}
 6 | \usage{
 7 | CountPeaks(
 8 |   peak.sites.file,
 9 |   gtf.file,
10 |   bamfile,
11 |   whitelist.file,
12 |   output.dir,
13 |   countUMI = TRUE,
14 |   ncores = 1,
15 |   chr.names = NULL,
16 |   filter.chr = FALSE,
17 |   gene.symbol.ref = "gene_name",
18 |   CBtag = "CB",
19 |   UMItag = "UB"
20 | )
21 | }
22 | \arguments{
23 | \item{peak.sites.file}{a file containing peak coordinates generated by FindPeaks}
24 | 
25 | \item{gtf.file}{reference (GTF) file}
26 | 
27 | \item{bamfile}{scRNA-seq BAM file}
28 | 
29 | \item{whitelist.file}{file of cell barcodes to count}
30 | 
31 | \item{output.dir}{name of directory to write output (will be created if it doesn't exist)}
32 | 
33 | \item{countUMI}{whether to count UMIs (default: TRUE)}
34 | 
35 | \item{ncores}{Number of cores for multithreading}
36 | 
37 | \item{chr.names}{names of chromosomes}
38 | 
39 | \item{filter.chr}{names of chromosomes to filter}
40 | 
41 | \item{gene.symbol.ref}{field in the GTF file containing the gene symbol}
42 | 
43 | \item{CBtag}{cell barcode tag identifier present in BAM file. Default 'CB'.}
44 | 
45 | \item{UMItag}{UMI barcode tag identifier present in BAM file. Default 'UB'.}
46 | }
47 | \value{
48 | NULL. Writes counts to file.
49 | }
50 | \description{
51 | Generates a UMI count matrix where rows are the peaks and columns are the cells. Counts cells 
52 | that are identified through a provided 'white list' of cell barcodes. If alignment done using CellRanger,
53 | this will be the barcodes.tsv file contained in the 'filtered_gene_matrices_mex' folder for example.
54 | }
55 | \examples{
56 | 
57 | extdata_path <- system.file("extdata",package = "Sierra")
58 | reference.file <- paste0(extdata_path,"/Vignette_cellranger_genes_subset.gtf")
59 | 
60 | bamfile <- c(paste0(extdata_path,"/Vignette_example_TIP_sham.bam"),
61 |              paste0(extdata_path,"/Vignette_example_TIP_mi.bam") )
62 |              
63 | whitelist.bc.file <- paste0(extdata_path,"/example_TIP_sham_whitelist_barcodes.tsv")
64 |   
65 | peak.merge.output.file = paste0(extdata_path, "/TIP_merged_peaks.txt")
66 |  
67 | \dontrun{                                
68 | CountPeaks(peak.sites.file = peak.merge.output.file, 
69 |              gtf.file = reference.file,
70 |              bamfile = bamfile[1], 
71 |              whitelist.file = whitelist.bc.file[1],
72 |              output.dir = count.dirs[1], 
73 |              countUMI = TRUE, 
74 |              ncores = 1)
75 |  }
76 | 
77 | 
78 | }
79 | 


--------------------------------------------------------------------------------
/man/DUTest.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/differential_usage.R
 3 | \name{DUTest}
 4 | \alias{DUTest}
 5 | \title{Apply DEXSeq to detect differential peak usage}
 6 | \usage{
 7 | DUTest(
 8 |   peaks.object,
 9 |   population.1 = NULL,
10 |   population.2 = NULL,
11 |   exp.thresh = 0.1,
12 |   fc.thresh = 0.25,
13 |   adj.pval.thresh = 0.05,
14 |   num.splits = 6,
15 |   seed.use = 1,
16 |   feature.type = c("UTR3", "exon"),
17 |   replicates.1 = NULL,
18 |   replicates.2 = NULL,
19 |   include.annotations = FALSE,
20 |   filter.pA.stretch = FALSE,
21 |   verbose = TRUE,
22 |   do.MAPlot = FALSE,
23 |   return.dexseq.res = FALSE,
24 |   ncores = 1
25 | )
26 | }
27 | \arguments{
28 | \item{peaks.object}{Either a Seurat or SCE object of peaks}
29 | 
30 | \item{population.1}{a target population of cells (can be an ID/cluster label or a set of cell barcode IDs)}
31 | 
32 | \item{population.2}{comparison population of cells. If NULL (default), uses all non-population.1 cells}
33 | 
34 | \item{exp.thresh}{minimum percent expression threshold (for a population of cells) to include a peak}
35 | 
36 | \item{fc.thresh}{threshold for log2 fold-change difference for returned results}
37 | 
38 | \item{adj.pval.thresh}{threshold for adjusted P-value for returned results}
39 | 
40 | \item{num.splits}{the number of pseudo-bulk profiles to create per identity class (default: 6)}
41 | 
42 | \item{seed.use}{seed to set the randomised assignment of cells to pseudo-bulk profiles}
43 | 
44 | \item{feature.type}{genomic feature types to run analysis on (default: UTR3, exon)}
45 | 
46 | \item{replicates.1}{an optional list to define the cells used as replicates for population.1. 
47 | Will override anything set for the population.1 parameter.}
48 | 
49 | \item{replicates.2}{an optional list to define the cells used as replicates for population.2. 
50 | Will override anything set for the population.2 parameter.}
51 | 
52 | \item{include.annotations}{whether to include junction, polyA motif and stretch annotations in output (default: FALSE)}
53 | 
54 | \item{filter.pA.stretch}{whether to filter out peaks annotated as proximal to an A-rich region (default: FALSE)}
55 | 
56 | \item{verbose}{whether to print outputs (TRUE by default)}
57 | 
58 | \item{do.MAPlot}{make an MA plot of results (FALSE by default)}
59 | 
60 | \item{return.dexseq.res}{return the raw and unfiltered DEXSeq results object (FALSE by default)}
61 | 
62 | \item{ncores}{number of cores to run DEXSeq with}
63 | }
64 | \value{
65 | The results are returned as a DataFrame where each row corresponds to a peak coordinate. 
66 |  The default table contains the following columns:
67 |  gene_name, genomic_feature(s), population1_pct, population2_pct, pvalue, padj and Log2_fold_change.
68 |  genomic_feature(s) indicates the genomic feature type(s) that the peak overlaps. population1_pct and 
69 |  population2_pct indicate the percentage of cell expressing the peak in the target and comparison population
70 |  of cells, respectively. The pvalue, padj and Log2_fold_change values are derived from the results table 
71 |  returned by the DEXSeq::DEXSeqResults function.
72 | }
73 | \description{
74 | Apply DEXSeq to detect differential peak usage been select populations. Works by building
75 | a 'pseudo-bulk' profile of cell populations by aggregating counts from individual cells
76 | into a smaller number of profiles, defined by num.splits.
77 | }
78 | \examples{
79 | 
80 | 
81 | 
82 | extdata_path <- system.file("extdata",package = "Sierra")
83 | load(paste0(extdata_path,"/TIP_cell_info.RData"))
84 | \dontrun{
85 | peak.annotations <- read.table("TIP_merged_peak_annotations.txt", header = TRUE,sep = "\t",
86 |                                       row.names = 1,stringsAsFactors = FALSE)
87 | peaks.seurat <- NewPeakSeurat(peak.data = peak.counts, 
88 |                              annot.info = peak.annotations, 
89 |                              cell.idents = tip.populations,
90 |                              tsne.coords = tip.tsne.coordinates,
91 |                              min.cells = 0, min.peaks = 0)
92 | 
93 | res.table = DUTest(peaks.seurat, population.1 = "F-SL", population.2 = "EC1",
94 |                          exp.thresh = 0.1,  feature.type = c("UTR3", "exon"))
95 | }
96 | 
97 | }
98 | 


--------------------------------------------------------------------------------
/man/DetectAEU.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/differential_usage.R
 3 | \name{DetectAEU}
 4 | \alias{DetectAEU}
 5 | \title{Find alternative 3' end usage between two single-cell populations}
 6 | \usage{
 7 | DetectAEU(
 8 |   peaks.object,
 9 |   gtf_gr,
10 |   gtf_TxDb,
11 |   population.1,
12 |   population.2 = NULL,
13 |   exp.thresh = 0.1,
14 |   fc.thresh = 0.25,
15 |   adj.pval.thresh = 0.05,
16 |   num.splits = 6,
17 |   seed.use = 1,
18 |   verbose = TRUE,
19 |   do.MAPlot = FALSE,
20 |   ncores = 1
21 | )
22 | }
23 | \arguments{
24 | \item{peaks.object}{Either a Seurat or SCE object of peaks}
25 | 
26 | \item{gtf_gr}{GenomicRanges object from a GTF file}
27 | 
28 | \item{gtf_TxDb}{TxDb from gtf file}
29 | 
30 | \item{population.1}{a target population of cells (can be an ID/cluster label or a set of cell barcode IDs)}
31 | 
32 | \item{population.2}{comparison population of cells. If NULL (default), uses all non-population.1 cells}
33 | 
34 | \item{exp.thresh}{minimum percent expression threshold (for a population of cells) to include a peak}
35 | 
36 | \item{fc.thresh}{threshold for log2 fold-change difference for returned results}
37 | 
38 | \item{adj.pval.thresh}{threshold for adjusted P-value for returned results}
39 | 
40 | \item{num.splits}{the number of pseudo-bulk profiles to create per identity class (default: 6)}
41 | 
42 | \item{seed.use}{seed to set the randomised assignment of cells to pseudo-bulk profiles}
43 | 
44 | \item{verbose}{whether to print outputs (TRUE by default)}
45 | 
46 | \item{do.MAPlot}{make an MA plot of results (FALSE by default)}
47 | 
48 | \item{ncores}{Number of cores for multithreading}
49 | }
50 | \value{
51 | a data-frame of results.
52 | }
53 | \description{
54 | Wrapper function to DUTest for detecting differential 3' end use. First applies DUTest to
55 | test for differential usage between 3'UTRs. For DU 3'UTR peaks, evaluates whether the DU peaks
56 | fall in different 3'UTRs.
57 | }
58 | \examples{
59 | \dontrun{
60 |      DetectAEU(apa.seurat.object, population.1 = "1", population.2 = "2")
61 |  }
62 | }
63 | 


--------------------------------------------------------------------------------
/man/DetectUTRLengthShift.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/differential_usage.R
 3 | \name{DetectUTRLengthShift}
 4 | \alias{DetectUTRLengthShift}
 5 | \title{Detect shifts in 3'UTR length usage between cell populations}
 6 | \usage{
 7 | DetectUTRLengthShift(
 8 |   peaks.object,
 9 |   gtf_gr,
10 |   gtf_TxDb,
11 |   population.1,
12 |   population.2 = NULL,
13 |   exp.thresh = 0.1,
14 |   fc.thresh = 0.25,
15 |   adj.pval.thresh = 0.05,
16 |   num.splits = 6,
17 |   seed.use = 1,
18 |   verbose = TRUE,
19 |   do.MAPlot = FALSE,
20 |   ncores = 1
21 | )
22 | }
23 | \arguments{
24 | \item{peaks.object}{Either a Seurat or SCE object of peaks}
25 | 
26 | \item{gtf_gr}{GenomicRanges object from a GTF file}
27 | 
28 | \item{gtf_TxDb}{TxDb from gtf file}
29 | 
30 | \item{population.1}{a target population of cells (can be an ID/cluster label or a set of cell barcode IDs)}
31 | 
32 | \item{population.2}{comparison population of cells. If NULL (default), uses all non-population.1 cells}
33 | 
34 | \item{exp.thresh}{minimum percent expression threshold (for a population of cells) to include a peak}
35 | 
36 | \item{fc.thresh}{threshold for log2 fold-change difference for returned results}
37 | 
38 | \item{adj.pval.thresh}{threshold for adjusted P-value for returned results}
39 | 
40 | \item{num.splits}{the number of pseudo-bulk profiles to create per identity class (default: 6)}
41 | 
42 | \item{seed.use}{seed to set the randomised assignment of cells to pseudo-bulk profiles}
43 | 
44 | \item{verbose}{whether to print outputs (TRUE by default)}
45 | 
46 | \item{do.MAPlot}{make an MA plot of results (FALSE by default)}
47 | 
48 | \item{ncores}{Number of cores for multithreading}
49 | }
50 | \value{
51 | a data-frame of results.
52 | }
53 | \description{
54 | Detect global shifts in 3'UTR length usage between defined cell populations.
55 | Firsts applies the DUTest function to detect differential usage (DU) peaks on 3'UTRs, 
56 | after filtering out peaks annotated as proximal to A-rich regions. Identifies peaks
57 | on the same 3'UTR as each DU peak, and determines a position of the DU peak on the
58 | 3'UTR relative to the terminating exon. Returns a table of DU results, with the location 
59 | of each peak relative to the total number of peaks on the corresponding 3'UTR. Results 
60 | table can be input to the PlotUTRLengthShift function to visualise the results, 
61 | and evaluate global shifts.
62 | }
63 | 


--------------------------------------------------------------------------------
/man/FindPeaks.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/count_polyA.R
 3 | \name{FindPeaks}
 4 | \alias{FindPeaks}
 5 | \title{Perform splice-aware peak calling on a BAM file produced from a scRNA-seq experiment}
 6 | \usage{
 7 | FindPeaks(
 8 |   output.file,
 9 |   gtf.file,
10 |   bamfile,
11 |   junctions.file,
12 |   min.jcutoff = 50,
13 |   min.jcutoff.prop = 0.05,
14 |   min.cov.cutoff = 500,
15 |   min.cov.prop = 0.05,
16 |   min.peak.cutoff = 200,
17 |   min.peak.prop = 0.05,
18 |   ncores = 1,
19 |   chr.names = NULL,
20 |   filter.chr = FALSE,
21 |   gene.symbol.ref = "gene_name",
22 |   fit.method = "NLS"
23 | )
24 | }
25 | \arguments{
26 | \item{output.file}{a file containing polyA sites}
27 | 
28 | \item{gtf.file}{reference (GTF) file}
29 | 
30 | \item{bamfile}{scRNA-seq BAM file}
31 | 
32 | \item{junctions.file}{BED file (as produced by regtools) or SJ.out.tab file (STAR aligner) containing  splice junction coordinates}
33 | 
34 | \item{min.jcutoff}{minimum number of spliced reads across a junction for it to be considered (default: 50).}
35 | 
36 | \item{min.jcutoff.prop}{minimum proportion of junction reads out of all junction reads for that gene (default: 0.05)}
37 | 
38 | \item{min.cov.cutoff}{minimum number of reads to consider a peak (default: 500)}
39 | 
40 | \item{min.cov.prop}{minimum proportion of reads to consider a peak (default: 0.05)}
41 | 
42 | \item{min.peak.cutoff}{minimum peak height (default: 200)}
43 | 
44 | \item{min.peak.prop}{minimum ratio of current peak height relative to maximum peak height for this gene (default: 0.05)}
45 | 
46 | \item{ncores}{number of cores to use}
47 | 
48 | \item{chr.names}{names of chromosomes}
49 | 
50 | \item{filter.chr}{names of chromosomes to filter}
51 | 
52 | \item{gene.symbol.ref}{field in the GTF file containing the gene symbol}
53 | }
54 | \value{
55 | NULL. Writes counts to file.
56 | }
57 | \description{
58 | Takes as input a BAM file produced from barcoded scRNA-seq experiment, the reference (GTF) file used during alignment and
59 | a BED file of junctions produced by regtools. For each gene in the reference file, the peak calling process first splits 
60 | the read coverage into 'across junction' and 'no junction' subsets. Within each subset, the site of maximum coverage 
61 | is identified and a peak called, by fitting a Gaussian to the read coverage, from a 600bp window around this region.
62 | After calling a peak, the local read coverage is removed and the next site of maximum coverage is identified. This process 
63 | runs iteratively until at least one of two stopping criteria are reached. The first criteria is defined as the maximum 
64 | read coverage a minimum cutoff (min.cov.cutoff) and proportion (min.cov.prop). The second critera is the size of the peak,
65 | including a absolute threshold (min.peak.cutoff) and a relative threshold (min.peak.prop).
66 | }
67 | \examples{
68 | 
69 | extdata_path <- system.file("extdata",package = "Sierra")
70 | reference.file <- paste0(extdata_path,"/Vignette_cellranger_genes_subset.gtf")
71 | junctions.file <- paste0(extdata_path,"/Vignette_example_TIP_sham_junctions.bed")
72 | 
73 | bamfile <- c(paste0(extdata_path,"/Vignette_example_TIP_sham.bam"),
74 |              paste0(extdata_path,"/Vignette_example_TIP_mi.bam") )
75 |              
76 | peak.output.file <- c("Vignette_example_TIP_sham_peaks.txt",  "Vignette_example_TIP_MI_peaks.txt")
77 | 
78 | 
79 | FindPeaks(output.file=peak.output.file[1], gtf.file = reference.file, 
80 |              bamfile=bamfile[1], junctions.file=junctions.file)
81 | 
82 | }
83 | 


--------------------------------------------------------------------------------
/man/GetExpressedPeaks.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/differential_usage.R
 3 | \name{GetExpressedPeaks}
 4 | \alias{GetExpressedPeaks}
 5 | \title{Identify peaks expressed within a certain percentage of cells}
 6 | \usage{
 7 | GetExpressedPeaks(
 8 |   peaks.object,
 9 |   population.1,
10 |   population.2 = NULL,
11 |   threshold = 0.05
12 | )
13 | }
14 | \arguments{
15 | \item{peaks.object}{the peaks object either Seurat of SingleCellExperiment class.}
16 | 
17 | \item{population.1}{target cluster}
18 | 
19 | \item{population.2}{background cluster. If NULL (deafult) all non-target cells}
20 | 
21 | \item{threshold}{percentage threshold of detected (non-zero) expression for including a peak}
22 | }
23 | \value{
24 | an array of peak (or gene) names
25 | }
26 | \description{
27 | Selects peaks that are considered expressed above some provided criteria within a target or
28 | background cluster. Considers peaks expressed in some x\% of cells to be highly expressed. Returns the
29 | union of peaks identified from the target and background cluster
30 | }
31 | \examples{
32 | 
33 | \dontrun{
34 |     get_highly_expressed_peaks(seurat.object, "1")
35 |     get_highly_expressed_peaks(seurat.object, cluster1 = "1", cluster2 = "2")
36 |  }
37 | }
38 | 


--------------------------------------------------------------------------------
/man/GetRelativeExpression.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plotting_functions.R
 3 | \name{GetRelativeExpression}
 4 | \alias{GetRelativeExpression}
 5 | \title{Calculate relative expression between two or more peaks}
 6 | \usage{
 7 | GetRelativeExpression(
 8 |   peaks.object,
 9 |   peak.set = NULL,
10 |   gene.name = NULL,
11 |   feature.type = c("UTR3", "exon"),
12 |   p.count = 1
13 | )
14 | }
15 | \arguments{
16 | \item{peaks.object}{Seurat object}
17 | 
18 | \item{peak.set}{set of peaks}
19 | 
20 | \item{gene.name}{gene name for retrieving a set of peaks}
21 | 
22 | \item{feature.type}{features to consider. 3'UTR and exon by default.}
23 | 
24 | \item{p.count}{Pseudo count}
25 | }
26 | \value{
27 | a matrix of relative expression
28 | }
29 | \description{
30 | Calculate a relative expression between two or more peaks by dividing
31 | the expression of each peak by the mean of the peak expression for that gene -
32 | or set of provided peaks
33 | }
34 | \examples{
35 | 
36 | ## Load example data for two peaks from the Cxcl12 gene
37 | extdata_path <- system.file("extdata",package = "Sierra")
38 | load(paste0(extdata_path, "/Cxcl12_example.RData"))
39 | load(paste0(extdata_path, "/TIP_cell_info.RData"))
40 | 
41 | ## Create an seurat object holding the peak data
42 |                         
43 |  peaks.seurat <- NewPeakSeurat(peak.data = peak.counts, 
44 |                         annot.info = peak.annotations, 
45 |                         cell.idents = tip.populations, 
46 |                         tsne.coords = tip.tsne.coordinates,
47 |                         min.cells = 0, min.peaks = 0)
48 |                         
49 | ## Plot relative expression of example peaks on t-SNE coordinates
50 | relative.exp <- GetRelativeExpression(peaks.object = peaks.seurat, 
51 |                  peak.set = c("Cxcl12:6:117174603-117175050:1", "Cxcl12:6:117180974-117181367:1"))
52 | 
53 | }
54 | 


--------------------------------------------------------------------------------
/man/MergePeakCoordinates.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dataset_merging.R
 3 | \name{MergePeakCoordinates}
 4 | \alias{MergePeakCoordinates}
 5 | \title{Merge peaks across a list of data-sets}
 6 | \usage{
 7 | MergePeakCoordinates(
 8 |   peak.dataset.table,
 9 |   output.file,
10 |   sim.thresh = 0.75,
11 |   allow.match.var = 0.25,
12 |   ncores = 1
13 | )
14 | }
15 | \arguments{
16 | \item{peak.dataset.table}{a dataframe with two required columnss: one called "Peak_file" , which
17 | contains file names of the peak data-sets to be merged and labels ("Identifier") for each file.}
18 | 
19 | \item{output.file}{file to write the set of merged peaks to}
20 | 
21 | \item{sim.thresh}{The required similarity threshold for merging (default: 0.75)}
22 | 
23 | \item{allow.match.var}{The allowance for deviation from the sim.thresh for comparison peaks (default: 0.25)}
24 | 
25 | \item{ncores}{number of cores to use (default 1)}
26 | }
27 | \value{
28 | NULL. writes out a set of merged peaks to output.file
29 | }
30 | \description{
31 | Takes as input a list of named peaks obtained from running peak calling on multiple data-sets.
32 | First goes through each peak set and check what peaks within each set should be merged (self-merging).
33 | Merging is based on similarity criteria set by sim.thresh and allow.match.var.
34 | Then compares each peak set as a reference to the remaining sets to identify peaks that should be merged.
35 | Returns a list of peaks that have been merged, as well as the unique peaks from each data-set.
36 | }
37 | \examples{
38 |      
39 |      
40 | library(Sierra)
41 | extdata_path <- system.file("extdata",package = "Sierra")
42 | reference.file <- paste0(extdata_path,"/Vignette_cellranger_genes_subset.gtf")
43 | junctions.file <- paste0(extdata_path,"/Vignette_example_TIP_sham_junctions.bed")
44 | bamfile <- c(paste0(extdata_path,"/Vignette_example_TIP_sham.bam"),
45 |             paste0(extdata_path,"/Vignette_example_TIP_mi.bam") )
46 | whitelist.bc.file <- c(paste0(extdata_path,"/example_TIP_sham_whitelist_barcodes.tsv"),
47 |                       paste0(extdata_path,"/example_TIP_MI_whitelist_barcodes.tsv"))
48 | 
49 | ### Peak calling
50 | peak.output.file <- c("Vignette_example_TIP_sham_peaks.txt",
51 |                      "Vignette_example_TIP_MI_peaks.txt")
52 | FindPeaks(output.file = peak.output.file[1],   # output filename
53 |          gtf.file = reference.file,           # gene model as a GTF file
54 | bamfile = bamfile[1],                # BAM alignment filename.
55 |          junctions.file = junctions.file,     # BED filename of splice junctions exising in BAM file.
56 |          ncores = 1)                          # number of cores to use
57 | 
58 | 
59 | FindPeaks(output.file = peak.output.file[2],   # output filename
60 |          gtf.file = reference.file,           # gene model as a GTF file
61 |          bamfile = bamfile[2],                # BAM alignment filename.
62 |          junctions.file = junctions.file,     # BED filename of splice junctions exising in BAM file.
63 |          ncores = 1)
64 | 
65 | #### Peak merging
66 | peak.dataset.table = data.frame(Peak_file = peak.output.file,
67 |                                Identifier = c("TIP-example-Sham", "TIP-example-MI"),
68 |                                stringsAsFactors = FALSE)
69 | 
70 | peak.merge.output.file = "TIP_merged_peaks.txt"
71 | MergePeakCoordinates(peak.dataset.table, output.file = peak.merge.output.file, ncores = 1)
72 |      
73 |      
74 |      
75 |  
76 | }
77 | 


--------------------------------------------------------------------------------
/man/NewPeakSCE.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_util.R
 3 | \name{NewPeakSCE}
 4 | \alias{NewPeakSCE}
 5 | \title{Create a new peak-counts single-cell experiment object from the peak counts}
 6 | \usage{
 7 | NewPeakSCE(
 8 |   peak.data,
 9 |   annot.info,
10 |   cell.idents = NULL,
11 |   tsne.coords = NULL,
12 |   umap.coords = NULL,
13 |   min.cells = 10,
14 |   min.peaks = 200,
15 |   norm.scale.factor = 10000,
16 |   filter.gene.mismatch = TRUE,
17 |   verbose = TRUE
18 | )
19 | }
20 | \arguments{
21 | \item{peak.data}{matrix of peak counts}
22 | 
23 | \item{annot.info}{peak annotation information}
24 | 
25 | \item{cell.idents}{named list of cell identities to be used for DU analysis}
26 | 
27 | \item{tsne.coords}{data-frame of t-SNE coordinates. Rownames should correspond to cell names.}
28 | 
29 | \item{umap.coords}{data-frame of UMAP coordinates. Rownames should correspond to cell names.}
30 | 
31 | \item{min.cells}{minimum number of cells for retaining a peak}
32 | 
33 | \item{min.peaks}{minimum number of peaks for retaining a cell}
34 | 
35 | \item{norm.scale.factor}{scale factor for log normalisation  function}
36 | 
37 | \item{filter.gene.mismatch}{whether to filter out peaks with ambiguous gene mappings}
38 | 
39 | \item{verbose}{whether to print output}
40 | }
41 | \value{
42 | a new peak-level SCE object
43 | }
44 | \description{
45 | Creates a new peak-counts single-cell experiment object from the peak counts and annotation table
46 | }
47 | \examples{
48 | 
49 | 
50 |  ## Load example data for two peaks from the Cxcl12 gene
51 | extdata_path <- system.file("extdata",package = "Sierra")
52 | load(paste0(extdata_path, "/Cxcl12_example.RData"))
53 | load(paste0(extdata_path, "/TIP_cell_info.RData"))
54 | 
55 | ## Create an SCE object holding the peak data
56 | peaks.sce <- NewPeakSCE(peak.data = peak.counts, 
57 |                         annot.info = peak.annotations, 
58 |                         cell.idents = tip.populations, 
59 |                         tsne.coords = tip.tsne.coordinates,
60 |                         min.cells = 0, min.peaks = 0)
61 | }
62 | 


--------------------------------------------------------------------------------
/man/NewPeakSeurat.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_util.R
 3 | \name{NewPeakSeurat}
 4 | \alias{NewPeakSeurat}
 5 | \title{Create a new peak-level Seurat object from the peak counts}
 6 | \usage{
 7 | NewPeakSeurat(
 8 |   peak.data,
 9 |   annot.info,
10 |   project.name = "PolyA",
11 |   cell.idents = NULL,
12 |   tsne.coords = NULL,
13 |   umap.coords = NULL,
14 |   min.cells = 10,
15 |   min.peaks = 200,
16 |   norm.scale.factor = 10000,
17 |   filter.gene.mismatch = TRUE,
18 |   verbose = TRUE
19 | )
20 | }
21 | \arguments{
22 | \item{peak.data}{matrix of peak counts}
23 | 
24 | \item{annot.info}{peak annotation information}
25 | 
26 | \item{project.name}{project name passed to the Seurat object creation}
27 | 
28 | \item{cell.idents}{a list of cell identities (optional)}
29 | 
30 | \item{tsne.coords}{a data-frame of t-SNE coordinates (optional)}
31 | 
32 | \item{umap.coords}{a data-frame of UMAP coordinates (optional)}
33 | 
34 | \item{min.cells}{minimum number of cells for retaining a peak}
35 | 
36 | \item{min.peaks}{minimum number of peaks for retaining a cell}
37 | 
38 | \item{norm.scale.factor}{scale factor for Seurat NormalizeData function}
39 | 
40 | \item{filter.gene.mismatch}{whether to filter out peaks with ambiguous gene mappings}
41 | 
42 | \item{verbose}{whether to print output}
43 | }
44 | \value{
45 | a new peak-level Seurat object
46 | }
47 | \description{
48 | Creates a new peak-level Seurat object from the peak counts and annotation table
49 | }
50 | \examples{
51 |                              
52 | ## Load example data for two peaks from the Cxcl12 gene
53 | extdata_path <- system.file("extdata",package = "Sierra")
54 | load(paste0(extdata_path, "/Cxcl12_example.RData"))
55 | load(paste0(extdata_path, "/TIP_cell_info.RData"))
56 | 
57 | ## Create an Seurat object holding the peak data
58 | peaks.seurat <- NewPeakSeurat(peak.data = peak.counts, 
59 |                         annot.info = peak.annotations, 
60 |                         cell.idents = tip.populations, 
61 |                         tsne.coords = tip.tsne.coordinates,
62 |                         min.cells = 0, min.peaks = 0)
63 |                              
64 | 
65 | }
66 | 


--------------------------------------------------------------------------------
/man/PeakSeuratFromTransfer.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_util.R
 3 | \name{PeakSeuratFromTransfer}
 4 | \alias{PeakSeuratFromTransfer}
 5 | \title{Create a peak count Seurat object using a gene-level object}
 6 | \usage{
 7 | PeakSeuratFromTransfer(
 8 |   peak.data,
 9 |   genes.seurat,
10 |   annot.info,
11 |   project.name = "PolyA",
12 |   min.cells = 10,
13 |   min.peaks = 200,
14 |   norm.scale.factor = 10000,
15 |   filter.gene.mismatch = TRUE
16 | )
17 | }
18 | \arguments{
19 | \item{peak.data}{matrix of peak counts}
20 | 
21 | \item{genes.seurat}{a Seurat object}
22 | 
23 | \item{annot.info}{peak annotation information}
24 | 
25 | \item{project.name}{project name passed to the Seurat object creation}
26 | 
27 | \item{min.cells}{minimum number of cells for retaining a peak}
28 | 
29 | \item{min.peaks}{minimum number of peaks for retaining a cell}
30 | 
31 | \item{norm.scale.factor}{scale factor for Seurat NormalizeData function}
32 | 
33 | \item{filter.gene.mismatch}{whether to filter out peaks with ambiguous gene mappings}
34 | }
35 | \value{
36 | a new peak-level Seurat object
37 | }
38 | \description{
39 | Creates a new peak Seurat object, importing information on clustering and dimensionality reduction,
40 | such as t-SNE and UMAP coordinates, from a Seurat object that has been processed at the gene level.
41 | }
42 | \examples{
43 |      
44 | ## Load example data for two peaks from the Cxcl12 gene
45 | extdata_path <- system.file("extdata",package = "Sierra")
46 | load(paste0(extdata_path, "/Cxcl12_example.RData"))
47 | load(paste0(extdata_path, "/TIP_cell_info.RData"))
48 | 
49 | ## Create an seurat object holding the peak data
50 | peaks.seurat <- NewPeakSeurat(peak.data = peak.counts, 
51 |                         annot.info = peak.annotations, 
52 |                         cell.idents = tip.populations, 
53 |                         tsne.coords = tip.tsne.coordinates,
54 |                         min.cells = 0, min.peaks = 0)
55 |                         
56 | ##                         
57 | peaks.seurat.transfer <- PeakSeuratFromTransfer(peak.data = peak.counts, 
58 |                         genes.seurat = peaks.seurat, 
59 |                         annot.info = peak.annotations)
60 |      
61 | 
62 | }
63 | 


--------------------------------------------------------------------------------
/man/PlotCoverage.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plotting_functions.R
 3 | \name{PlotCoverage}
 4 | \alias{PlotCoverage}
 5 | \title{PlotCoverage}
 6 | \usage{
 7 | PlotCoverage(
 8 |   genome_gr,
 9 |   geneSymbol = "",
10 |   wig_data = NULL,
11 |   bamfiles = NULL,
12 |   peaks.annot = NULL,
13 |   label.transcripts = FALSE,
14 |   wig_same_strand = TRUE,
15 |   genome = NULL,
16 |   pdf_output = FALSE,
17 |   wig_data.tracknames = NULL,
18 |   bamfile.tracknames = NULL,
19 |   output_file_name = "",
20 |   zoom_3UTR = FALSE,
21 |   annotation.fontsize = NULL,
22 |   axis.fontsize = NULL,
23 |   ylims = NULL
24 | )
25 | }
26 | \arguments{
27 | \item{genome_gr}{: genome granges object}
28 | 
29 | \item{geneSymbol}{: Name of gene symbol}
30 | 
31 | \item{wig_data}{can be a data frame or a genomic ranges object. Must be stranded.}
32 | 
33 | \item{bamfiles}{: BAM filenames that are to be displayed as data tracks}
34 | 
35 | \item{peaks.annot}{an optionally named vector of peaks to annotate on the plot.}
36 | 
37 | \item{label.transcripts}{if set to TRUE, adds transcript identifiers to the gene model}
38 | 
39 | \item{wig_same_strand}{Display same strand or opposing strand of wig data (compared to reference gene)}
40 | 
41 | \item{genome}{: genome object}
42 | 
43 | \item{pdf_output}{: If true will create output pdf files}
44 | 
45 | \item{wig_data.tracknames}{: WIG track display names. Assumed to be in same order as wig_data.}
46 | 
47 | \item{bamfile.tracknames}{: BAM track display names. Assumed to be in same order as bamfiles.}
48 | 
49 | \item{output_file_name}{: Used if pdf_output is true. Location of where files will be placed.}
50 | 
51 | \item{zoom_3UTR}{: If TRUE will create a second figure which will zoom in on 3'UTR.}
52 | 
53 | \item{annotation.fontsize}{font size for optional peak and transcript annotations}
54 | 
55 | \item{axis.fontsize}{font size for the axis labels}
56 | 
57 | \item{ylims}{manually set the y-axis scale}
58 | }
59 | \value{
60 | NULL by default.
61 | }
62 | \description{
63 | Plots read coverage across a gene for a set of BAM files and/or wig data.
64 | }
65 | \examples{
66 | 
67 | extdata_path <- system.file("extdata",package = "Sierra")
68 | reference.file <- paste0(extdata_path,"/Vignette_cellranger_genes_subset.gtf")
69 | gtf_gr <- rtracklayer::import(reference.file)
70 | bam.files <- c(paste0(extdata_path,"/Vignette_example_TIP_mi.bam"),
71 |                  paste0(extdata_path,"/Vignette_example_TIP_sham.bam"))
72 | 
73 | 
74 | PlotCoverage(genome_gr = gtf_gr, geneSymbol = "Lrrc58", genome = "mm10", 
75 |            bamfiles = bam.files, bamfile.tracknames=c("MI", "sham"))
76 |            
77 | ## Alternatively, plot with annotated peaks
78 | peaks.annot <- c("Lrrc58:16:37888444-37888858:1", "Lrrc58:16:37883336-37883588:1")
79 | names(peaks.annot) <- c("Peak 1", "Peak 2")
80 | 
81 | PlotCoverage(genome_gr = gtf_gr, geneSymbol = "Lrrc58", genome = "mm10", 
82 |           peaks.annot = peaks.annot, bamfiles = bam.files, 
83 |           bamfile.tracknames=c("MI", "sham"))
84 | 
85 | }
86 | 


--------------------------------------------------------------------------------
/man/PlotRelativeExpressionBox.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plotting_functions.R
 3 | \name{PlotRelativeExpressionBox}
 4 | \alias{PlotRelativeExpressionBox}
 5 | \title{Generate a box plot plot using relative expression}
 6 | \usage{
 7 | PlotRelativeExpressionBox(
 8 |   peaks.object,
 9 |   peaks.to.plot,
10 |   do.plot = FALSE,
11 |   figure.title = NULL,
12 |   return.plot = TRUE,
13 |   pt.size = 0.5,
14 |   col.set = NULL,
15 |   txt.size = 14,
16 |   p.count = 1
17 | )
18 | }
19 | \arguments{
20 | \item{peaks.object}{Peak object of either Seurat or SCE class}
21 | 
22 | \item{peaks.to.plot}{Set of peaks to plot}
23 | 
24 | \item{do.plot}{Whether to plot to output (TRUE by default)}
25 | 
26 | \item{figure.title}{Optional figure title}
27 | 
28 | \item{return.plot}{Boolean (default True) identifying if plot should be returned.}
29 | 
30 | \item{pt.size}{Size of the points on the t-SNE plot (default 0.5)}
31 | 
32 | \item{col.set}{col set (default NULL)}
33 | 
34 | \item{txt.size}{sie of text (default 14)}
35 | 
36 | \item{p.count}{Pseudo count}
37 | }
38 | \value{
39 | a ggplot2 object
40 | }
41 | \description{
42 | Given two or more peaks to plot, a relative expression score and
43 | generate a box plot according to cell identities
44 | }
45 | \examples{
46 | 
47 | ## Load example data for two peaks from the Cxcl12 gene
48 | extdata_path <- system.file("extdata",package = "Sierra")
49 | load(paste0(extdata_path, "/Cxcl12_example.RData"))
50 | load(paste0(extdata_path, "/TIP_cell_info.RData"))
51 | 
52 | ## Create an SCE object holding the peak data
53 | peaks.sce <- NewPeakSCE(peak.data = peak.counts, 
54 |                         annot.info = peak.annotations, 
55 |                         cell.idents = tip.populations, 
56 |                         tsne.coords = tip.tsne.coordinates,
57 |                         min.cells = 0, min.peaks = 0)
58 |                         
59 | ## Plot relative expression of example peaks on t-SNE coordinates
60 | PlotRelativeExpressionBox(peaks.object = peaks.sce, 
61 |       peaks.to.plot = c("Cxcl12:6:117174603-117175050:1", "Cxcl12:6:117180974-117181367:1"))
62 | 
63 | 
64 | }
65 | 


--------------------------------------------------------------------------------
/man/PlotRelativeExpressionTSNE.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plotting_functions.R
 3 | \name{PlotRelativeExpressionTSNE}
 4 | \alias{PlotRelativeExpressionTSNE}
 5 | \title{Generate a t-SNE plot using relative expression}
 6 | \usage{
 7 | PlotRelativeExpressionTSNE(
 8 |   peaks.object,
 9 |   peaks.to.plot,
10 |   do.plot = FALSE,
11 |   figure.title = NULL,
12 |   return.plot = TRUE,
13 |   pt.size = 0.5,
14 |   txt.size = 14,
15 |   legend.position = "right",
16 |   use.facet = TRUE,
17 |   p.count = 1
18 | )
19 | }
20 | \arguments{
21 | \item{peaks.object}{peak object either Seurat or SingleCellExperiment class}
22 | 
23 | \item{peaks.to.plot}{Set of peaks to plot}
24 | 
25 | \item{do.plot}{Whether to plot to output (TRUE by default)}
26 | 
27 | \item{figure.title}{Optional figure title}
28 | 
29 | \item{return.plot}{Boolean of whether to return plot. Default is TRUE.}
30 | 
31 | \item{pt.size}{Size of the points on the t-SNE plot. Default 0.5}
32 | 
33 | \item{txt.size}{Size of text. Default 14}
34 | 
35 | \item{legend.position}{position of the legend (right, left, bottom or top)}
36 | 
37 | \item{use.facet}{Whether to plot peaks using ggplot facets. If set to FALSE will use cowplot to plot each peak}
38 | 
39 | \item{p.count}{Pseudo-count}
40 | }
41 | \value{
42 | a ggplot2 object
43 | }
44 | \description{
45 | Given two or more peaks to plot, a relative expression score and
46 | plot on t-SNE coordinates
47 | }
48 | \examples{
49 | 
50 | ## Load example data for two peaks from the Cxcl12 gene
51 | extdata_path <- system.file("extdata",package = "Sierra")
52 | load(paste0(extdata_path, "/Cxcl12_example.RData"))
53 | load(paste0(extdata_path, "/TIP_cell_info.RData"))
54 | 
55 | ## Create an SCE object holding the peak data
56 | peaks.sce <- NewPeakSCE(peak.data = peak.counts, 
57 |                         annot.info = peak.annotations, 
58 |                         cell.idents = tip.populations, 
59 |                         tsne.coords = tip.tsne.coordinates,
60 |                         min.cells = 0, min.peaks = 0)
61 |                         
62 | ## Plot relative expression of example peaks on t-SNE coordinates
63 | PlotRelativeExpressionTSNE(peaks.object = peaks.sce, 
64 |       peaks.to.plot = c("Cxcl12:6:117174603-117175050:1", "Cxcl12:6:117180974-117181367:1"))
65 |  
66 | }
67 | 


--------------------------------------------------------------------------------
/man/PlotRelativeExpressionUMAP.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plotting_functions.R
 3 | \name{PlotRelativeExpressionUMAP}
 4 | \alias{PlotRelativeExpressionUMAP}
 5 | \title{Generate a t-SNE plot using relative expression}
 6 | \usage{
 7 | PlotRelativeExpressionUMAP(
 8 |   peaks.object,
 9 |   peaks.to.plot,
10 |   do.plot = FALSE,
11 |   figure.title = NULL,
12 |   return.plot = TRUE,
13 |   pt.size = 0.5,
14 |   txt.size = 14,
15 |   legend.position = "right",
16 |   use.facet = TRUE,
17 |   p.count = 1
18 | )
19 | }
20 | \arguments{
21 | \item{peaks.object}{Seurat object}
22 | 
23 | \item{peaks.to.plot}{Set of peaks to plot}
24 | 
25 | \item{do.plot}{Whether to plot to output (TRUE by default)}
26 | 
27 | \item{figure.title}{Optional figure title}
28 | 
29 | \item{return.plot}{Boolean of whether to return plot (default TRUE)}
30 | 
31 | \item{pt.size}{size of the points on the t-SNE plot. Default 0.5}
32 | 
33 | \item{txt.size}{size of text. Default 14}
34 | 
35 | \item{legend.position}{position of the legend (right, left, bottom or top)}
36 | 
37 | \item{use.facet}{Whether to plot peaks using ggplot facets. If set to FALSE will use cowplot to plot each peak}
38 | 
39 | \item{p.count}{Pseudo count}
40 | }
41 | \value{
42 | a ggplot2 object
43 | }
44 | \description{
45 | Given two or more peaks to plot, calculate a relative expression score and
46 | plot on UMAP coordinates
47 | }
48 | \examples{
49 | 
50 | ## Load example data for two peaks from the Cxcl12 gene
51 | extdata_path <- system.file("extdata",package = "Sierra")
52 | load(paste0(extdata_path, "/Cxcl12_example.RData"))
53 | load(paste0(extdata_path, "/TIP_cell_info.RData"))
54 | 
55 | ## Create an SCE object holding the peak data
56 | ## Note, for this example we are recycling t-SNE coordinates to demonstrate running of the function
57 | peaks.sce <- NewPeakSCE(peak.data = peak.counts, 
58 |                         annot.info = peak.annotations, 
59 |                         cell.idents = tip.populations, 
60 |                         umap.coords = tip.tsne.coordinates,
61 |                         min.cells = 0, min.peaks = 0)
62 |                         
63 | ## Plot relative expression of example peaks on t-SNE coordinates
64 | PlotRelativeExpressionUMAP(peaks.object = peaks.sce, 
65 |       peaks.to.plot = c("Cxcl12:6:117174603-117175050:1", "Cxcl12:6:117180974-117181367:1"))
66 | 
67 | }
68 | 


--------------------------------------------------------------------------------
/man/PlotRelativeExpressionViolin.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plotting_functions.R
 3 | \name{PlotRelativeExpressionViolin}
 4 | \alias{PlotRelativeExpressionViolin}
 5 | \title{Generate a violin plot plot using relative expression}
 6 | \usage{
 7 | PlotRelativeExpressionViolin(
 8 |   peaks.object,
 9 |   peaks.to.plot,
10 |   do.plot = FALSE,
11 |   figure.title = NULL,
12 |   return.plot = TRUE,
13 |   pt.size = 0.5,
14 |   col.set = NULL,
15 |   txt.size = 14,
16 |   add.jitter = TRUE,
17 |   jitter.pt.size = 0.25,
18 |   p.count = 1
19 | )
20 | }
21 | \arguments{
22 | \item{peaks.object}{Peak object of either Seurat or SCE class}
23 | 
24 | \item{peaks.to.plot}{Set of peaks to plot}
25 | 
26 | \item{do.plot}{Whether to plot to output (TRUE by default)}
27 | 
28 | \item{figure.title}{Optional figure title}
29 | 
30 | \item{return.plot}{Boolean of whether to return plot (default TRUE)}
31 | 
32 | \item{pt.size}{size of the points on the t-SNE plot}
33 | 
34 | \item{col.set}{default NULL}
35 | 
36 | \item{txt.size}{size of text. Default 14}
37 | 
38 | \item{add.jitter}{whether to add a geom_jitter to the plot (default: TRUE)}
39 | 
40 | \item{jitter.pt.size}{size of point for geom_jitter (default = 0.25)}
41 | 
42 | \item{p.count}{Pseudo count}
43 | }
44 | \value{
45 | a ggplot2 object
46 | }
47 | \description{
48 | Given two or more peaks to plot, a relative expression score and
49 | generate a violin plot according to cell identities
50 | }
51 | \examples{
52 | 
53 | ## Load example data for two peaks from the Cxcl12 gene
54 | extdata_path <- system.file("extdata",package = "Sierra")
55 | load(paste0(extdata_path, "/Cxcl12_example.RData"))
56 | load(paste0(extdata_path, "/TIP_cell_info.RData"))
57 | 
58 | ## Create an SCE object holding the peak data
59 | peaks.sce <- NewPeakSCE(peak.data = peak.counts, 
60 |                         annot.info = peak.annotations, 
61 |                         cell.idents = tip.populations, 
62 |                         tsne.coords = tip.tsne.coordinates,
63 |                         min.cells = 0, min.peaks = 0)
64 |                         
65 | ## Plot relative expression of example peaks on t-SNE coordinates
66 | PlotRelativeExpressionViolin(peaks.object = peaks.sce, 
67 |       peaks.to.plot = c("Cxcl12:6:117174603-117175050:1", "Cxcl12:6:117180974-117181367:1"))
68 | 
69 | }
70 | 


--------------------------------------------------------------------------------
/man/PlotUTRLengthShift.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plotting_functions.R
 3 | \name{PlotUTRLengthShift}
 4 | \alias{PlotUTRLengthShift}
 5 | \title{Plot global shifts in 3'UTR length}
 6 | \usage{
 7 | PlotUTRLengthShift(
 8 |   results.table,
 9 |   plot.title = "Global shift in 3'UTR length",
10 |   do.ranksum.test = TRUE,
11 |   return.plot = TRUE,
12 |   do.plot = FALSE
13 | )
14 | }
15 | \arguments{
16 | \item{results.table}{table produced by the DetectUTRLengthShift function}
17 | 
18 | \item{plot.title}{optional title}
19 | 
20 | \item{do.ranksum.test}{whether to perform a ranksum test on the shift in UTR usage}
21 | 
22 | \item{return.plot}{whether to return the ggplot2 object}
23 | 
24 | \item{do.plot}{whether to print the figure to output}
25 | }
26 | \description{
27 | Plot global shifts in 3'UTR lengths between cell populations.
28 | Input is a table of results from the Detect3UTRLengthShift functions.
29 | By default evaluates whether there is a significant shift in 3'UTR length
30 | between upregulated and downregulated peaks using the Wilcoxon Rank-sum test.
31 | }
32 | \examples{
33 | 
34 | extdata_path <- system.file("extdata",package = "Sierra")
35 | results.file <- paste0(extdata_path,"/Cycling_vs_resting_fibro_UTR_length_res.RData")
36 | load(results.file)
37 | 
38 | PlotUTRLengthShift(res.table)
39 | 
40 | }
41 | 


--------------------------------------------------------------------------------
/man/ReadPeakCounts.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_util.R
 3 | \name{ReadPeakCounts}
 4 | \alias{ReadPeakCounts}
 5 | \title{Read in peak data saved in MEX format}
 6 | \usage{
 7 | ReadPeakCounts(
 8 |   data.dir = NULL,
 9 |   mm.file = NULL,
10 |   barcodes.file = NULL,
11 |   sites.file = NULL
12 | )
13 | }
14 | \arguments{
15 | \item{data.dir}{directory where output from CountPeaks is stored}
16 | 
17 | \item{mm.file}{count matrix in MEX format}
18 | 
19 | \item{barcodes.file}{file containing cell barcodes corresponding to columns in the matrix}
20 | 
21 | \item{sites.file}{file containing peak coordinate names corresponding to rows in the matrix}
22 | }
23 | \value{
24 | a sparseMatrix
25 | }
26 | \description{
27 | Read in peak data saved in MEX format. Files can be in a gzipped (.gz) format.
28 | }
29 | \examples{
30 | # Following commands can be used to generate a new random sample data set
31 | # barcode_seq <- stringi::stri_rand_strings(12,14,pattern="[ACTG]")
32 | # barcode_seq <- paste0(barcode_seq,"-1")
33 | # Below is hard coded example
34 | 
35 | barcode_seq <- c("TCCCAGTACTGGGC-1", "CCAGAGAAAAACTT-1", "CGATAGGGGTAACA-1", 
36 | "GGCGGATGGAGATT-1", "ATCAGTACATCTAT-1", "TTTCCCGTACCACA-1", "TTGTGTACGGGATG-1", 
37 | "CAGGGCATAGTCTA-1", "GCTCTTTGGCTGAG-1", "AGTCGTATCACTAA-1", "CGGTTGGCTGGTAT-1", 
38 | "TGACCTGGAGCTGC-1")
39 | 
40 | # Note: siteNames could be genes
41 | siteNames <- cbind( paste0("Gene_",letters[1:12]))
42 |                  
43 |  # For this working example set site_names to be peak coordinates                
44 | siteNames <- c("Sash1:10:8722219-8722812:-1", "Sash1:10:8813689-8814157:-1", 
45 |              "Lamp2:X:38419489-38419901:-1", "Lamp2:X:38405042-38405480:-1", 
46 |              "Lamp2:X:38455818-38456298:-1", "Pecam1:11:106654217-106654585:-1", 
47 |              "Ly6e:15:74958936-74959338:1", "Ly6e:15:74956076-74956512:1", 
48 |              "Pnkd:1:74285960-74287456:1", "Pdgfra:5:75197715-75198215:1", 
49 |              "Dlc1:8:36567751-36568049:-1", "Dlc1:8:36568379-36568865:-1")
50 | 
51 | # Randomly generate a matrix that contains a bunch of zeros.
52 | # Columns are cells, rows are 
53 | matrix_A <- matrix(round(rexp(144,rate = 1),digits = 0), nrow = 12,ncol = 12)
54 | matrix_B <- matrix(round(rexp(144,rate = 0.7),digits = 0), nrow = 12,ncol = 12)
55 | matrix_mtx <- matrix_A * matrix_B
56 | matrix_mtx <- Matrix::Matrix(matrix_mtx, sparse=TRUE)
57 | 
58 | # Save example to appropriate named files in temporary location
59 | data.dir <- tempdir()
60 | barcodes.file <- paste0(data.dir,"/barcodes.tsv")
61 | writeLines(barcode_seq, barcodes.file)
62 | mm.file <- paste0(data.dir,"/matrix.mtx")
63 | Matrix::writeMM(matrix_mtx, mm.file)
64 | sites.file <- paste0(data.dir,"/sitenames.tsv")
65 | writeLines(siteNames,sites.file)
66 | 
67 | # Now read in using Sierra ReadPeakCounts by passing just directory name
68 | count.matrix <- Sierra::ReadPeakCounts(data.dir=data.dir)  
69 | 
70 | # Or by passing full length file names
71 | count.matrix <- Sierra::ReadPeakCounts(barcodes.file=barcodes.file, mm.file=mm.file, sites.file=sites.file)   
72 |  
73 |  
74 | }
75 | 


--------------------------------------------------------------------------------
/man/SelectGenePeaks.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_util.R
 3 | \name{SelectGenePeaks}
 4 | \alias{SelectGenePeaks}
 5 | \title{Return peaks associated with a select gene.}
 6 | \usage{
 7 | SelectGenePeaks(
 8 |   peaks.object,
 9 |   gene,
10 |   feature.type = c("UTR3", "UTR5", "exon", "intron")
11 | )
12 | }
13 | \arguments{
14 | \item{peaks.object}{Peaks SCE or Seurat object.}
15 | 
16 | \item{gene}{Gene name}
17 | 
18 | \item{feature.type}{type of genomic features to use}
19 | }
20 | \value{
21 | a list of peak IDs
22 | }
23 | \description{
24 | Returns peaks associated with a select gene.
25 | }
26 | \examples{
27 | 
28 | 
29 | extdata_path <- system.file("extdata",package = "Sierra")
30 | load(paste0(extdata_path, "/Cxcl12_example.RData"))
31 | load(paste0(extdata_path, "/TIP_cell_info.RData"))
32 | 
33 | ## Create an suerat object holding the peak data
34 | peaks.seurat <- NewPeakSeurat(peak.data = peak.counts, 
35 |                         annot.info = peak.annotations, 
36 |                         cell.idents = tip.populations, 
37 |                         tsne.coords = tip.tsne.coordinates,
38 |                         min.cells = 0, min.peaks = 0)
39 | 
40 | peak.list <- SelectGenePeaks(peaks.object =  peaks.seurat ,gene = "Cxcl12")
41 | 
42 | }
43 | 


--------------------------------------------------------------------------------
/man/SplitBam.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/split_bams.R
 3 | \name{SplitBam}
 4 | \alias{SplitBam}
 5 | \title{Utility to split a bam file into multiple bam files based on the barcode}
 6 | \usage{
 7 | SplitBam(
 8 |   bam,
 9 |   cellbc.df,
10 |   outdir = NULL,
11 |   yieldSize = 1e+06,
12 |   gtf_gr = NULL,
13 |   geneSymbol = NULL,
14 |   gi_ext = 50,
15 |   rle_output = FALSE,
16 |   exportFastqHeader = FALSE,
17 |   genomicRegion = NULL,
18 |   bamTags = c("CB", "UB"),
19 |   what = c("qname", "flag", "rname", "strand", "pos")
20 | )
21 | }
22 | \arguments{
23 | \item{bam}{CellRanger outputted bam file with the CB field}
24 | 
25 | \item{cellbc.df}{data frame of the cell barcode, needs to have the column names: "celltype" and "cellbc"}
26 | 
27 | \item{outdir}{directory to output the bam files. The bam files will be called [celltype].bam. If NULL no BAM file created.}
28 | 
29 | \item{yieldSize}{number of lines of bam files to load. Default: 1000000}
30 | 
31 | \item{gtf_gr}{gene model genomic ranges. Only used if geneSymbol is defined.}
32 | 
33 | \item{geneSymbol}{Gene symbol. Used to identify the genomic coordinates to extract reads from.}
34 | 
35 | \item{gi_ext}{The number of nucleotides to extend the genomic interval in extracting reads from (default 50).}
36 | 
37 | \item{rle_output}{If TRUE will generate and return rle_list object}
38 | 
39 | \item{exportFastqHeader}{If TRUE will generate a txt output file that has same prefix as bam file containing fastq header IDs}
40 | 
41 | \item{genomicRegion}{Granges object of genomic region to extract. Only used if geneSymbol not defined.}
42 | 
43 | \item{bamTags}{BAM field tag identifiers to extract. Default is c("CB", "UB").}
44 | 
45 | \item{what}{What BAM fields to copy into new file. Default is c('qname', 'flag', 'rname', 'strand', 'pos')}
46 | }
47 | \value{
48 | a rleList of coverage for each cell type
49 | }
50 | \description{
51 | Given a bam file that was processed by CellRanger, splitBam splits the
52 | bam into multiple bam files, one per cell barcode.
53 | Bam file needs to have the barcode stored in the "CB" field.
54 | }
55 | \examples{
56 | library('Sierra')
57 | 
58 | # Example 1 split the entire BAM file for each cell type
59 | \dontrun{
60 | extdata_path <- system.file("extdata",package = "scpolya")
61 | load(paste(extdata_path,"TIP_vignette_gene_Seurat.RData",sep="/"))
62 | cellbc.df <- data.frame(celltype=genes.seurat@active.ident, 
63 |                         cellbc= names(genes.seurat@active.ident))
64 | bamfile <- c(paste0(extdata_path,"/Vignette_example_TIP_sham.bam")
65 | 
66 | SplitBam(bam, cellbc.df)
67 | }
68 | 
69 | # Example 2 extract reads that overlap a gene
70 | 
71 | extdata_path <- system.file("extdata",package = "Sierra")
72 | gtf.file <- paste0(extdata_path,"/Vignette_cellranger_genes_subset.gtf")
73 | gtf.gr <- rtracklayer::import(gtf.file)
74 | 
75 | load(paste(extdata_path,"TIP_vignette_gene_Seurat.RData",sep="/"))
76 | cellbc.df <- data.frame(celltype=genes.seurat@active.ident, 
77 |                        cellbc= names(genes.seurat@active.ident))
78 |   
79 | # Modify cellbc.df so that the barcodes match what is in the BAM file                     
80 | cellbc.df$cellbc <- sub("(.*)-.*", "\\\\1", cellbc.df$cellbc)
81 | cellbc.df$cellbc <- paste0(cellbc.df$cellbc, "-1")
82 |                        
83 |                        
84 | bam.file <- paste0(extdata_path,"/Vignette_example_TIP_mi.bam")
85 | outdir <-  tempdir()  # change this to a meaningful location
86 | SplitBam(bam.file, cellbc.df, outdir=outdir, gtf_gr=gtf.gr, geneSymbol="Dnajc19")
87 | 
88 | 
89 | }
90 | 


--------------------------------------------------------------------------------
/man/annotate_gr_from_gtf.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Annotate.R
 3 | \name{annotate_gr_from_gtf}
 4 | \alias{annotate_gr_from_gtf}
 5 | \title{Annotates a granges object with overlapping genes from gtf file.}
 6 | \usage{
 7 | annotate_gr_from_gtf(
 8 |   gr,
 9 |   invert_strand = FALSE,
10 |   gtf_gr = NULL,
11 |   annotationType = "any",
12 |   transcriptDetails = FALSE,
13 |   gtf_TxDb,
14 |   annotation_correction = TRUE,
15 |   genome = NULL,
16 |   pA_motif_max_position = 50,
17 |   AAA_motif_min_position = 10,
18 |   polystretch_length = 13,
19 |   max_mismatch = 1
20 | )
21 | }
22 | \arguments{
23 | \item{gr}{a granges object of peaks to annotate}
24 | 
25 | \item{invert_strand}{Boolean to signifiy if strand of gr peaks should be inversed}
26 | 
27 | \item{gtf_gr}{granges gtf file that contains annotation information}
28 | 
29 | \item{annotationType}{can be assigned "any" or "within". Default is "any" which states that the peak with gr must overlap annotation feature (eg exon)}
30 | 
31 | \item{transcriptDetails}{Boolean. If false will only return gene name. If true will return internal transcript position feature (eg exon/intron)}
32 | 
33 | \item{gtf_TxDb}{same as gtf_gr but as a TxDb object.}
34 | 
35 | \item{annotation_correction}{Boolean. When multiple overlapping genes are identified will
36 | prioritise gene based on annotation. 3'UTR annotation trumps all other annotation.}
37 | 
38 | \item{genome}{genome object. If NOT NULL then will perform pA motif analysis.}
39 | 
40 | \item{pA_motif_max_position}{Any AAUAAA after this position are not considered (default 50nt)}
41 | 
42 | \item{AAA_motif_min_position}{Any polyA/polyT stretches before this postion are not considered (default 10)}
43 | 
44 | \item{polystretch_length}{: the length of A or T to search for (default 13)}
45 | 
46 | \item{max_mismatch}{: The number of mismatches tolerated in polystretch}
47 | }
48 | \value{
49 | a dataframe with appended columns containing annotation
50 | }
51 | \description{
52 | gr is the genomic ranges that need to be annotation. Ideally original input should be in the format:
53 | }
54 | \details{
55 | chr8:70331172-70331574:+   # chr:start-end:strand
56 | 
57 |   This could already exist within an R object or you can copy it in via readClipboard.
58 | 
59 |  gr <- GRanges(readClipboard())
60 | 
61 | You need to run the following code:
62 |         gtf_file <- "u:/Reference/hg38/hg38_gene.gtf.gz"
63 |         gtf_file <- "u:/Reference/mm10/mm10_gene.gtf.gz"
64 |         gtf_file <- "u:/Reference/mm10/cellranger_genes.gtf.gz"
65 |        gtf_gr <- rtracklayer::import(gtf_file)
66 |        gtf_TxDb <- GenomicFeatures::makeTxDbFromGFF(gtf_file, format="gtf")
67 | 
68 | annotationType can be c("any", "start", "end", "within", "equal"),
69 | }
70 | \examples{
71 | library(Sierra)
72 | 
73 |  # Generate peaks for Cxcl12, Arhgap10, Mast4,  using mm10 coordinates:
74 |  gr_peaks <- GenomicRanges::GRanges(c("chr6:117174600-117175065:+",
75 |             "chr6:117180975-117181367:+",
76 |             "chr8:77250366-77250686:-",
77 |             "chr8:77426400-77517833:-",
78 |             "chr13:102905701-102906230:-",
79 |             "chr13:103139934-103171545:-"))
80 |             
81 |  # Load other files from vignette           
82 |  extdata_path <- system.file("extdata",package = "Sierra")
83 |  reference.file <- paste0(extdata_path,"/Vignette_cellranger_genes_subset.gtf")
84 |  
85 |  # convert gtf file to both granges and a TXDb object
86 |  gtf_gr <- rtracklayer::import(reference.file)
87 |  gtf_TxDb <- GenomicFeatures::makeTxDbFromGFF(reference.file, format="gtf")
88 |  
89 |  genome <- BSgenome.Mmusculus.UCSC.mm10::BSgenome.Mmusculus.UCSC.mm10          
90 |   
91 |  annotate_gr_from_gtf(gr = gr_peaks, gtf_gr = gtf_gr,
92 |                       gtf_TxDb = gtf_TxDb, genome = genome)
93 | 
94 |  annotate_gr_from_gtf(gr = gr_peaks, gtf_gr = gtf_gr,
95 |                       gtf_TxDb = gtf_TxDb, genome = genome, transcriptDetails=TRUE)                                            
96 | }
97 | 


--------------------------------------------------------------------------------
/man/apply_DEXSeq_test_sce.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/differential_usage.R
 3 | \name{apply_DEXSeq_test_sce}
 4 | \alias{apply_DEXSeq_test_sce}
 5 | \title{Apply DEXSeq to detect differential peak usage to a Single-Cell Experiment object}
 6 | \usage{
 7 | apply_DEXSeq_test_sce(
 8 |   peaks.sce.object,
 9 |   population.1 = NULL,
10 |   population.2 = NULL,
11 |   exp.thresh = 0.1,
12 |   fc.thresh = 0.25,
13 |   adj.pval.thresh = 0.05,
14 |   num.splits = 6,
15 |   seed.use = 1,
16 |   feature.type = c("UTR3", "UTR5", "exon", "intron"),
17 |   replicates.1 = NULL,
18 |   replicates.2 = NULL,
19 |   include.annotations = FALSE,
20 |   filter.pA.stretch = FALSE,
21 |   verbose = TRUE,
22 |   do.MAPlot = FALSE,
23 |   return.dexseq.res = FALSE,
24 |   ncores = 1
25 | )
26 | }
27 | \arguments{
28 | \item{peaks.sce.object}{SCE object of peaks}
29 | 
30 | \item{population.1}{a target population of cells (can be an ID/cluster label or a set of cell barcode IDs)}
31 | 
32 | \item{population.2}{comparison population of cells. If NULL (default), uses all non-population.1 cells}
33 | 
34 | \item{exp.thresh}{minimum percent expression threshold (for a population of cells) to include a peak}
35 | 
36 | \item{fc.thresh}{threshold for log2 fold-change difference for returned results}
37 | 
38 | \item{adj.pval.thresh}{threshold for adjusted P-value for returned results}
39 | 
40 | \item{num.splits}{the number of pseudo-bulk profiles to create per identity class (default: 6)}
41 | 
42 | \item{seed.use}{seed use}
43 | 
44 | \item{feature.type}{genomic feature types to run analysis on (degault: all)}
45 | 
46 | \item{replicates.1}{an optional list to define the cells used as replicates for population.1. 
47 | Will override anything set for the population.1 parameter.}
48 | 
49 | \item{replicates.2}{an optional list to define the cells used as replicates for population.2. 
50 | Will override anything set for the population.2 parameter.}
51 | 
52 | \item{include.annotations}{whether to include junction, polyA motif and stretch annotations in output (default: FALSE)}
53 | 
54 | \item{filter.pA.stretch}{whether to filter out peaks annotated as proximal to an A-rich region (default: FALSE)}
55 | 
56 | \item{verbose}{whether to print outputs (TRUE by default)}
57 | 
58 | \item{do.MAPlot}{make an MA plot of results (FALSE by default)}
59 | 
60 | \item{return.dexseq.res}{return the raw and unfiltered DEXSeq results object (FALSE by default)}
61 | 
62 | \item{ncores}{Number of cores to use for multithreading}
63 | }
64 | \value{
65 | a data-frame of results.
66 | }
67 | \description{
68 | Apply DEXSeq to detect differential peak usage been select populations. Works by building
69 | a 'pseudo-bulk' profile of cell populations by aggregating counts from individual cells
70 | into a smaller number of profiles, defined by num.splits.
71 | }
72 | \examples{
73 | 
74 | \dontrun{
75 | apply_DEXSeq_test_sce(apa.seurat.object, population.1 = "1", population.2 = "2")
76 | }
77 | 
78 | }
79 | 


--------------------------------------------------------------------------------
/man/apply_DEXSeq_test_seurat.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/differential_usage.R
 3 | \name{apply_DEXSeq_test_seurat}
 4 | \alias{apply_DEXSeq_test_seurat}
 5 | \title{Apply DEXSeq to detect differential peak usage}
 6 | \usage{
 7 | apply_DEXSeq_test_seurat(
 8 |   apa.seurat.object,
 9 |   population.1 = NULL,
10 |   population.2 = NULL,
11 |   exp.thresh = 0.1,
12 |   fc.thresh = 0.25,
13 |   adj.pval.thresh = 0.05,
14 |   num.splits = 6,
15 |   seed.use = 1,
16 |   feature.type = c("UTR3", "UTR5", "exon", "intron"),
17 |   replicates.1 = NULL,
18 |   replicates.2 = NULL,
19 |   include.annotations = FALSE,
20 |   filter.pA.stretch = FALSE,
21 |   verbose = TRUE,
22 |   do.MAPlot = FALSE,
23 |   return.dexseq.res = FALSE,
24 |   ncores = 1
25 | )
26 | }
27 | \arguments{
28 | \item{apa.seurat.object}{Seurat object of peaks}
29 | 
30 | \item{population.1}{a target population of cells (can be an ID/cluster label or a set of cell barcode IDs)}
31 | 
32 | \item{population.2}{comparison population of cells. If NULL (default), uses all non-population.1 cells}
33 | 
34 | \item{exp.thresh}{minimum percent expression threshold (for a population of cells) to include a peak}
35 | 
36 | \item{fc.thresh}{threshold for log2 fold-change difference for returned results}
37 | 
38 | \item{adj.pval.thresh}{threshold for adjusted P-value for returned results}
39 | 
40 | \item{num.splits}{the number of pseudo-bulk profiles to create per identity class (default: 6)}
41 | 
42 | \item{seed.use}{seed}
43 | 
44 | \item{feature.type}{genomic feature types to run analysis on (default: all)}
45 | 
46 | \item{replicates.1}{an optional list to define the cells used as replicates for population.1. 
47 | Will override anything set for the population.1 parameter.}
48 | 
49 | \item{replicates.2}{an optional list to define the cells used as replicates for population.2. 
50 | Will override anything set for the population.2 parameter.}
51 | 
52 | \item{include.annotations}{whether to include junction, polyA motif and stretch annotations in output (default: FALSE)}
53 | 
54 | \item{filter.pA.stretch}{whether to filter out peaks annotated as proximal to an A-rich region (default: FALSE)}
55 | 
56 | \item{verbose}{whether to print outputs (TRUE by default)}
57 | 
58 | \item{do.MAPlot}{make an MA plot of results (FALSE by default)}
59 | 
60 | \item{return.dexseq.res}{return the raw and unfiltered DEXSeq results object (FALSE by default)}
61 | 
62 | \item{ncores}{Number of cores to use for multithreading}
63 | }
64 | \value{
65 | a data-frame of results.
66 | }
67 | \description{
68 | Apply DEXSeq to detect differential peak usage been select populations. Works by building
69 | a 'pseudo-bulk' profile of cell populations by aggregating counts from individual cells
70 | into a smaller number of profiles, defined by num.splits.
71 | }
72 | \examples{
73 | 
74 | \dontrun{
75 |    apply_DEXSeq_test(apa.seurat.object, population.1 = "1", population.2 = "2")
76 |  }
77 | 
78 | }
79 | 


--------------------------------------------------------------------------------
/man/do_arrow_plot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plotting_functions.R
 3 | \name{do_arrow_plot}
 4 | \alias{do_arrow_plot}
 5 | \title{Produce an arrow plot of peak expression}
 6 | \usage{
 7 | do_arrow_plot(
 8 |   peaks.seurat.object,
 9 |   gene_name,
10 |   peaks.use = NULL,
11 |   population.ids = NULL,
12 |   return.plot = FALSE
13 | )
14 | }
15 | \arguments{
16 | \item{peaks.seurat.object}{a Seurat object containing t-SNE coordinates and cluster ID's in @ident slot}
17 | 
18 | \item{gene_name}{optional plot title}
19 | 
20 | \item{peaks.use}{whether to print the plot to output (default: TRUE).}
21 | 
22 | \item{population.ids}{size of the point (default: 0.75)}
23 | 
24 | \item{return.plot}{whether to return the ggplot object (default: FALSE)}
25 | }
26 | \value{
27 | NULL by default. Returns a ggplot2 object if return.plot = TRUE
28 | }
29 | \description{
30 | Produce an arrow plot of peak expression, utlising the gggenes package.
31 | }
32 | \examples{
33 | \dontrun{
34 | do_arrow_plot(peaks.seurat.object, gene_name = Favouritegene1)
35 | }
36 | }
37 | 


--------------------------------------------------------------------------------
/man/fit_gaussian.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/count_polyA.R
 3 | \name{fit_gaussian}
 4 | \alias{fit_gaussian}
 5 | \title{Fit Gaussian curve to the coverage}
 6 | \usage{
 7 | fit_gaussian(fit.data, maxval, fit.method, mu = 300)
 8 | }
 9 | \arguments{
10 | \item{fit.data}{read coverage to fit}
11 | 
12 | \item{maxval}{maximum read coverage}
13 | 
14 | \item{fit.method}{Either NLS or MLE}
15 | 
16 | \item{mu}{initialised value for the centre of the peak (default: 300)}
17 | }
18 | \description{
19 | Given read coverage data, fit a Guassian curve using either 
20 | NLS or MLE fits.
21 | }
22 | 


--------------------------------------------------------------------------------
/man/geneToGR.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/split_bams.R
 3 | \name{geneToGR}
 4 | \alias{geneToGR}
 5 | \title{geneToGR converts a gene symbol to genomic ranges coordinate}
 6 | \usage{
 7 | geneToGR(geneSymbol, gtf_gr)
 8 | }
 9 | \arguments{
10 | \item{geneSymbol}{: Gene symbol}
11 | 
12 | \item{gtf_gr}{: Granges object of a gtf file}
13 | }
14 | \description{
15 | geneToGR converts a gene symbol to genomic ranges coordinate
16 | }
17 | \examples{
18 |     library('Sierra')
19 |     extdata_path <- system.file("extdata",package = "Sierra")
20 |     gtf.file <- paste0(extdata_path,"/Vignette_cellranger_genes_subset.gtf")
21 |     gtf.gr <- rtracklayer::import(gtf.file)
22 |     
23 |     geneGR  <- geneToGR(geneSymbol= "Dnajc19",gtf_gr=gtf.gr)
24 | }
25 | 


--------------------------------------------------------------------------------
/man/gene_Labels.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Annotate.R
 3 | \name{gene_Labels}
 4 | \alias{gene_Labels}
 5 | \title{This function has been designed to be called from annotate_gr_from_gtf}
 6 | \usage{
 7 | gene_Labels(gr, reference_gr, annotationType)
 8 | }
 9 | \arguments{
10 | \item{gr}{a granges object of peaks to annotate}
11 | 
12 | \item{reference_gr}{a granges object of annotation info}
13 | 
14 | \item{annotationType}{a granges object of peaks to annotate}
15 | }
16 | \description{
17 | This function has been designed to be called from annotate_gr_from_gtf
18 | }
19 | 


--------------------------------------------------------------------------------
/man/generate_merged_peak_table.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dataset_merging.R
 3 | \name{generate_merged_peak_table}
 4 | \alias{generate_merged_peak_table}
 5 | \title{Merge peaks across data-sets based on a reference}
 6 | \usage{
 7 | generate_merged_peak_table(
 8 |   dataset.1,
 9 |   peak.dataset.list,
10 |   self.merged.peaks.list,
11 |   sim.thresh = 0.75,
12 |   allow.match.var = 0.25,
13 |   ncores = 1
14 | )
15 | }
16 | \arguments{
17 | \item{dataset.1}{the reference peak data-set}
18 | 
19 | \item{peak.dataset.list}{a list of peak data-sets}
20 | 
21 | \item{self.merged.peaks.list}{the set of self-merged peaks from the reference data-set}
22 | 
23 | \item{sim.thresh}{The required similarity threshold for merging (default: 0.75)}
24 | 
25 | \item{allow.match.var}{The allowance for deviation from the sim.thresh for comparison peaks (default: 0.25)}
26 | 
27 | \item{ncores}{Number of cores for multithreading}
28 | }
29 | \value{
30 | a data-frame containing peaks, their class (merged or unique) and the original peak from the reference
31 | }
32 | \description{
33 | Given a reference data-set, a list of data-sets for merging and set of merged peaks from the referece,
34 | identify peaks that should be merged. Merged peaks are taken as the union of the peaks to be merged.
35 | For two given peaks, A and B, they will be merged if at least one has some x\% (75\% by default) or more
36 | overlap with the other, and the other has at least x-(y*x)\% overlap where y is a percentage of allowed
37 | variance (25\% by default).
38 | }
39 | \examples{
40 | \dontrun{
41 |      generate_merged_peak_table(dataset.1, peak.dataset.table, self.merged.peaks.list)
42 |      
43 |      
44 |      
45 |      
46 |      
47 |  }
48 | }
49 | 


--------------------------------------------------------------------------------
/man/generate_self_merged_peaks.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dataset_merging.R
 3 | \name{generate_self_merged_peaks}
 4 | \alias{generate_self_merged_peaks}
 5 | \title{Merge a set of peaks}
 6 | \usage{
 7 | generate_self_merged_peaks(
 8 |   apa.similarity.table,
 9 |   sim.thresh = 0.75,
10 |   allow.match.var = 0.25,
11 |   return.type = "peak_list"
12 | )
13 | }
14 | \arguments{
15 | \item{apa.similarity.table}{the set of peaks to merge}
16 | 
17 | \item{sim.thresh}{The required similarity threshold for merging (default: 0.75)}
18 | 
19 | \item{allow.match.var}{The allowance for deviation from the sim.thresh for comparison peaks (default: 0.25)}
20 | 
21 | \item{return.type}{Whether to return a full table of results or simply a vector of merged peaks}
22 | }
23 | \value{
24 | a table of merged peaks with original merged peaks or a vector of merged peaks
25 | }
26 | \description{
27 | Given a self-similarity table of peaks, identify peaks that should be merged. Merged peaks are
28 | taken as the union of the two peaks. For two given peaks, A and B, they will be merged if at least one has
29 | some x\% (75\% by default) or more overlap with the other, and the other has at least x-(y*x)\% overlap where
30 | y is a percentage of allowed variance (25\% by default)
31 | }
32 | \examples{
33 | \dontrun{
34 | generate_self_merged_peaks(apa.similarity.table)
35 | }
36 | }
37 | 


--------------------------------------------------------------------------------
/man/generate_self_similarity_table.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dataset_merging.R
 3 | \name{generate_self_similarity_table}
 4 | \alias{generate_self_similarity_table}
 5 | \title{Generates a table of similarity measures within a set of peaks}
 6 | \usage{
 7 | generate_self_similarity_table(peaks.1, ncores = 1)
 8 | }
 9 | \arguments{
10 | \item{peaks.1}{the set of peaks to merge}
11 | 
12 | \item{ncores}{Number of cores for multithreading}
13 | }
14 | \value{
15 | a data-frame with peaks from peaks.1 mapped to the closest peak within itself
16 | }
17 | \description{
18 | In some rare cases, called peaks will show a high degree of overlap, and before
19 | merging two different sets of peaks, the similar peaks within a set first need
20 | to be merged. This function looks for the most similar peak (non-self) within a set of peaks
21 | and calculates the level of overlap.
22 | }
23 | \examples{
24 | \dontrun{
25 | generate_similarity_table(peaks.1)
26 | }
27 | }
28 | 


--------------------------------------------------------------------------------
/man/generate_similarity_table.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dataset_merging.R
 3 | \name{generate_similarity_table}
 4 | \alias{generate_similarity_table}
 5 | \title{Generates a table of similarity measures between two sets of peaks}
 6 | \usage{
 7 | generate_similarity_table(peaks.1, peaks.2, ncores = 1)
 8 | }
 9 | \arguments{
10 | \item{peaks.1}{first set of peaks - used as a reference point}
11 | 
12 | \item{peaks.2}{second set of peaks being compared}
13 | 
14 | \item{ncores}{number of cores for multithreading (default 1)}
15 | }
16 | \value{
17 | a data-frame with peaks from peaks.1 mapped to the closest corresponding peak in peaks.2.
18 | }
19 | \description{
20 | Goes through the set of genes contained in peaks.1. For each gene-specific peak,
21 | calculate the amount of overlapping nucleotides to the nearest peak in peaks.2. If the gene
22 | is not available in peaks.2, ditance is set to -1e7.
23 | }
24 | \examples{
25 | \dontrun{
26 | extdata_path <- system.file("extdata",package = "Sierra")
27 | peak.sites.file <- paste0(extdata_path,"/TIP_merged_peaks.txt")
28 | peak.table <- read.table(peak.sites.file, sep="\t", header = TRUE, stringsAsFactors = FALSE)
29 | 
30 | generate_similarity_table(peak.table, peak.table)
31 | }
32 | }
33 | 


--------------------------------------------------------------------------------
/man/get_expressed_peaks_sce.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/differential_usage.R
 3 | \name{get_expressed_peaks_sce}
 4 | \alias{get_expressed_peaks_sce}
 5 | \title{Identify highly expressed peaks}
 6 | \usage{
 7 | get_expressed_peaks_sce(
 8 |   peaks.sce.object,
 9 |   population.1,
10 |   population.2 = NULL,
11 |   threshold = 0.05
12 | )
13 | }
14 | \arguments{
15 | \item{peaks.sce.object}{the peak-count SCE object}
16 | 
17 | \item{population.1}{target population}
18 | 
19 | \item{population.2}{background population If NULL (deafult) all non-population.1 cells}
20 | 
21 | \item{threshold}{percentage threshold of detected (non-zero) expression for including a peak}
22 | }
23 | \value{
24 | an array of peak (or gene) names
25 | }
26 | \description{
27 | Selects peaks that are considered expressed above some provided criteria within a target or
28 | background cluster. Considers peaks expressed in some x\% of cells to be highly expressed. Returns the
29 | union of peaks identified from the target and background cluster
30 | }
31 | \examples{
32 | \dontrun{
33 | get_expressed_peaks_sce(peak.sce, "1")
34 | get_expressed_peaks_sce(peak.sce, population.1 = "1", population.2 = "2")
35 | }
36 | }
37 | 


--------------------------------------------------------------------------------
/man/get_expressed_peaks_seurat.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/differential_usage.R
 3 | \name{get_expressed_peaks_seurat}
 4 | \alias{get_expressed_peaks_seurat}
 5 | \title{Identify highly expressed peaks}
 6 | \usage{
 7 | get_expressed_peaks_seurat(
 8 |   peaks.seurat.object,
 9 |   population.1,
10 |   population.2 = NULL,
11 |   threshold = 0.05
12 | )
13 | }
14 | \arguments{
15 | \item{peaks.seurat.object}{the peak-count Seurat object}
16 | 
17 | \item{population.1}{target cluster}
18 | 
19 | \item{population.2}{background cluster. If NULL (deafult) all non-target cells}
20 | 
21 | \item{threshold}{percentage threshold of detected (non-zero) expression for including a peak}
22 | }
23 | \value{
24 | an array of peak (or gene) names
25 | }
26 | \description{
27 | Selects peaks that are considered expressed above some provided criteria within a target or
28 | background cluster. Considers peaks expressed in some x\% of cells to be highly expressed. Returns the
29 | union of peaks identified from the target and background cluster
30 | }
31 | \examples{
32 | \dontrun{
33 |   get_highly_expressed_peaks(seurat.object, "1")
34 |   get_highly_expressed_peaks(seurat.object, population.1 = "1", population.2 = "2")
35 | }
36 | }
37 | 


--------------------------------------------------------------------------------
/man/get_relative_expression_sce.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plotting_functions.R
 3 | \name{get_relative_expression_sce}
 4 | \alias{get_relative_expression_sce}
 5 | \title{Calculate relative expression between two or more peaks}
 6 | \usage{
 7 | get_relative_expression_sce(
 8 |   peaks.sce.object,
 9 |   peak.set = NULL,
10 |   gene.name = NULL,
11 |   feature.type = c("UTR3", "exon"),
12 |   p.count = 1
13 | )
14 | }
15 | \arguments{
16 | \item{peaks.sce.object}{Seurat object}
17 | 
18 | \item{peak.set}{set of peaks}
19 | 
20 | \item{gene.name}{gene name for retrieving a set of peaks}
21 | 
22 | \item{feature.type}{features to consider. 3'UTR and exon by default.}
23 | 
24 | \item{p.count}{Pseudo-count}
25 | }
26 | \value{
27 | a matrix of relative expression
28 | }
29 | \description{
30 | Calculate a relative expression between two or more peaks by dividing
31 | the expression of each peak by the mean of the peak expression for that gene -
32 | or set of provided peaks
33 | }
34 | \examples{
35 | \dontrun{
36 | get_relative_expression(peaks.seurat, gene.name = "Cxcl12")
37 | }
38 | }
39 | 


--------------------------------------------------------------------------------
/man/get_relative_expression_seurat.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plotting_functions.R
 3 | \name{get_relative_expression_seurat}
 4 | \alias{get_relative_expression_seurat}
 5 | \title{Calculate relative expression between two or more peaks}
 6 | \usage{
 7 | get_relative_expression_seurat(
 8 |   peaks.seurat.object,
 9 |   peak.set = NULL,
10 |   gene.name = NULL,
11 |   feature.type = c("UTR3", "exon"),
12 |   p.count = 1
13 | )
14 | }
15 | \arguments{
16 | \item{peaks.seurat.object}{Seurat object}
17 | 
18 | \item{peak.set}{set of peaks}
19 | 
20 | \item{gene.name}{gene name for retrieving a set of peaks}
21 | 
22 | \item{feature.type}{features to consider. 3'UTR and exon by default.}
23 | 
24 | \item{p.count}{Pseudo count}
25 | }
26 | \value{
27 | a matrix of relative expression
28 | }
29 | \description{
30 | Calculate a relative expression between two or more peaks by dividing
31 | the expression of each peak by the mean of the peak expression for that gene -
32 | or set of provided peaks
33 | }
34 | \examples{
35 | \dontrun{
36 | get_relative_expression_seurat(peaks.seurat.object, gene.name = "Cxcl12")
37 | }
38 | }
39 | 


--------------------------------------------------------------------------------
/man/make_exons.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/count_polyA.R
 3 | \name{make_exons}
 4 | \alias{make_exons}
 5 | \title{Helper function}
 6 | \usage{
 7 | make_exons(x)
 8 | }
 9 | \arguments{
10 | \item{x}{x}
11 | }
12 | \value{
13 | to write
14 | }
15 | \description{
16 | Helper function
17 | }
18 | \examples{
19 | \dontrun{
20 | make_exons(x)
21 | }
22 | 
23 | 
24 | }
25 | 


--------------------------------------------------------------------------------
/man/make_reference.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/count_polyA.R
 3 | \name{make_reference}
 4 | \alias{make_reference}
 5 | \title{Build gene start-end reference from a gtf file}
 6 | \usage{
 7 | make_reference(
 8 |   gtf_file,
 9 |   chr.names = NULL,
10 |   filter.chr = FALSE,
11 |   gene.symbol.ref = "gene_name"
12 | )
13 | }
14 | \arguments{
15 | \item{gtf_file}{gtf file}
16 | 
17 | \item{chr.names}{a list of valid chromosome names to use}
18 | 
19 | \item{filter.chr}{whether to filter chromosomes in the GTF file}
20 | 
21 | \item{gene.symbol.ref}{field in the GTF file containing the gene symbol
22 | 
23 | Takes a GTF file as input and creates a table of chromosome start-end
24 | positions for each gene. Works with GTF files downloaded from 10x Genomics website.}
25 | }
26 | \description{
27 | Build gene start-end reference from a gtf file
28 | }
29 | 


--------------------------------------------------------------------------------
/man/merge_bam_coverage.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/split_bams.R
 3 | \name{merge_bam_coverage}
 4 | \alias{merge_bam_coverage}
 5 | \title{merge_bam_coverage}
 6 | \usage{
 7 | merge_bam_coverage(bamfiles)
 8 | }
 9 | \arguments{
10 | \item{bamfiles}{: A list of BAM files that are to be merged}
11 | }
12 | \description{
13 | merge_bam_coverage
14 | }
15 | 


--------------------------------------------------------------------------------
/man/relative_location.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plotting_functions.R
 3 | \name{relative_location}
 4 | \alias{relative_location}
 5 | \title{Given a peak position in a 3'UTR out of some n number of peaks,
 6 | relative to the terminating exon, calculate the relative position
 7 | of the query peak location on a scale of 0 to 1, where 0 indicates
 8 | the most proximal location and 1 indicates most distal.}
 9 | \usage{
10 | relative_location(location, n)
11 | }
12 | \arguments{
13 | \item{location}{location}
14 | 
15 | \item{n}{number of locations}
16 | }
17 | \description{
18 | Given a peak position in a 3'UTR out of some n number of peaks,
19 | relative to the terminating exon, calculate the relative position
20 | of the query peak location on a scale of 0 to 1, where 0 indicates
21 | the most proximal location and 1 indicates most distal.
22 | }
23 | 


--------------------------------------------------------------------------------
/man/rle_to_WIG.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/split_bams.R
 3 | \name{rle_to_WIG}
 4 | \alias{rle_to_WIG}
 5 | \title{load(file="c:/BAM/scRNA_polyA/FC.RData")
 6 | gtf_file <- "u:/Reference/mm10/cellranger_genes.gtf.gz"
 7 | gtf_gr <- rtracklayer::import(gtf_file)}
 8 | \usage{
 9 | rle_to_WIG(rle_input, gtf_gr = gtf_gr, geneSymbol = "Dnajc19")
10 | }
11 | \arguments{
12 | \item{rle_input}{rle input object}
13 | 
14 | \item{gtf_gr}{GTF file as genomics ranges pbject}
15 | 
16 | \item{geneSymbol}{name of gene to interrogate}
17 | }
18 | \description{
19 | load(file="c:/BAM/scRNA_polyA/FC.RData")
20 | gtf_file <- "u:/Reference/mm10/cellranger_genes.gtf.gz"
21 | gtf_gr <- rtracklayer::import(gtf_file)
22 | }
23 | 


--------------------------------------------------------------------------------
/vignettes/Cxcl12_coverage_annotated.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VCCRI/Sierra/52e68072aa2b974238c018cd0b2e71d881503b5b/vignettes/Cxcl12_coverage_annotated.png


--------------------------------------------------------------------------------
/vignettes/DimPlot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VCCRI/Sierra/52e68072aa2b974238c018cd0b2e71d881503b5b/vignettes/DimPlot.png


--------------------------------------------------------------------------------
/vignettes/PlotCoverage_CXCL12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VCCRI/Sierra/52e68072aa2b974238c018cd0b2e71d881503b5b/vignettes/PlotCoverage_CXCL12.png


--------------------------------------------------------------------------------
/vignettes/PlotRelativeExpressionBox.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VCCRI/Sierra/52e68072aa2b974238c018cd0b2e71d881503b5b/vignettes/PlotRelativeExpressionBox.png


--------------------------------------------------------------------------------
/vignettes/PlotRelativeExpressionTSNE.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VCCRI/Sierra/52e68072aa2b974238c018cd0b2e71d881503b5b/vignettes/PlotRelativeExpressionTSNE.png


--------------------------------------------------------------------------------
/vignettes/PlotRelativeExpressionViolin.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VCCRI/Sierra/52e68072aa2b974238c018cd0b2e71d881503b5b/vignettes/PlotRelativeExpressionViolin.png


--------------------------------------------------------------------------------
/vignettes/Seurat.FeaturePlot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VCCRI/Sierra/52e68072aa2b974238c018cd0b2e71d881503b5b/vignettes/Seurat.FeaturePlot.png


--------------------------------------------------------------------------------
/vignettes/UTRLengthPlot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VCCRI/Sierra/52e68072aa2b974238c018cd0b2e71d881503b5b/vignettes/UTRLengthPlot.png


--------------------------------------------------------------------------------