├── .DS_Store
├── .Rbuildignore
├── .Rhistory
├── .gitignore
├── CellTagR.Rproj
├── DESCRIPTION
├── Examples
    ├── .DS_Store
    ├── CellTagR CellTag Object V1 V2 V3.pdf
    ├── CellTag_UTR.fa
    ├── CloneHunterWhitelistTestRun.Rmd
    ├── CloneHunterWhitelistTestRun.nb.html
    ├── bar_Chart.png
    ├── clone.calling.permutation.test.R
    ├── hf1.d15.network.construction.html
    ├── jaccard example.png
    ├── jaccard wo collapsing.png
    ├── network construction and visualization.Rmd
    ├── network construction and visualization.nb.html
    ├── permutation_python.py
    ├── post_filtering.png
    ├── pre_filtering.png
    ├── sc analysis.Rmd
    ├── sc analysis.nb.html
    └── v1_v2_v3.html
├── NAMESPACE
├── R
    ├── .DS_Store
    ├── AuxiliaryFunctions.R
    ├── CellTagExtraction.R
    ├── CellTagForCollapsing.R
    ├── CellTagMatrixGeneration.R
    ├── CellTagNetworkContruction.R
    ├── CellTagNetworkVisualiztion.R
    ├── CellTagObjSet.R
    ├── CellTagWhitelistGeneration.R
    ├── CloneCalling.R
    ├── CreateCellTagObject.R
    ├── MetricBasedPlottingAndFiltering.R
    ├── ScCellTagMatrixProcess.R
    └── scripts.zip
├── README.md
├── V2-1_S2_L001_R1_001.fastq
├── inst
    └── extdata
    │   ├── .DS_Store
    │   ├── Demo_V1.Rds
    │   ├── V2-1_R1.zip
    │   ├── bam_v123_obj.Rds
    │   ├── barcodes.tsv
    │   ├── hf1.d28.prefiltered.Rds
    │   └── v1_whitelist.csv
└── man
    ├── AddCellTagFreqSort.Rd
    ├── Barcode.Aggregate.Rd
    ├── CellTagDataForCollapsing.Rd
    ├── CellTagDataPostCollapsing.Rd
    ├── CellTagExtraction.Rd
    ├── CellTagMatrixCount.Rd
    ├── CellTagObject.Rd
    ├── CellTagPatternCalling.Rd
    ├── CellTagWhitelistFiltering.Rd
    ├── CloneCalling.Rd
    ├── JaccardAnalysis.Rd
    ├── MetricBasedFiltering.Rd
    ├── MetricPlots.Rd
    ├── SingleCellDataBinatization.Rd
    ├── SingleCellDataWhitelist.Rd
    ├── addData2Nodes.Rd
    ├── bam.process.Rd
    ├── convertCellTagMatrix2LinkList.Rd
    ├── drawSubnet.Rd
    ├── fastq.process.Rd
    └── getNodesfromLinkList.Rd


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/morris-lab/CellTagR/de6f6fb041e7c7b85915554c6eb675c087e4b2eb/.DS_Store


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^CellTagR\.Rproj$
2 | ^\.Rproj\.user$
3 | 


--------------------------------------------------------------------------------
/.Rhistory:
--------------------------------------------------------------------------------
  1 | panel.grid.major = element_blank(),
  2 | panel.grid.minor = element_blank(),
  3 | panel.background = element_blank(),
  4 | axis.line = element_line(colour = "black"))
  5 | background.error <- background.mtx[,c(4,5)]
  6 | background.error$category <- col.path.sub[ref.meta.path[rownames(background.error), "cell.bc"], "label"]
  7 | ggplot(background.error, aes(x = Error, color = category)) +
  8 | geom_density() +
  9 | scale_color_viridis_d(begin = 0.15, end = 0.85, option = "A") +
 10 | theme(legend.position="right",
 11 | legend.text = element_text(size = 10),
 12 | axis.title = element_blank(),
 13 | panel.grid.major = element_blank(),
 14 | panel.grid.minor = element_blank(),
 15 | panel.background = element_blank(),
 16 | axis.line = element_line(colour = "black"))
 17 | ggplot(background.error, aes(x = Lagrangian, color = category)) +
 18 | geom_density() +
 19 | scale_color_viridis_d(begin = 0.15, end = 0.85, option = "A") +
 20 | theme(legend.position="right",
 21 | legend.text = element_text(size = 10),
 22 | axis.title = element_blank(),
 23 | panel.grid.major = element_blank(),
 24 | panel.grid.minor = element_blank(),
 25 | panel.background = element_blank(),
 26 | axis.line = element_line(colour = "black"))
 27 | background.error$category <- paste0("Background.", background.error$category)
 28 | error.ref.background <- rbind(error.path.all, background.error)
 29 | ggplot(error.ref.background, aes(x = Error, color = category)) +
 30 | geom_density() +
 31 | scale_color_viridis_d(begin = 0.15, end = 0.85, option = "A") +
 32 | theme(legend.position="right",
 33 | legend.text = element_text(size = 10),
 34 | axis.title = element_blank(),
 35 | panel.grid.major = element_blank(),
 36 | panel.grid.minor = element_blank(),
 37 | panel.background = element_blank(),
 38 | axis.line = element_line(colour = "black"))
 39 | ggplot(error.ref.background, aes(x = Lagrangian, color = category)) +
 40 | geom_density() +
 41 | scale_color_viridis_d(begin = 0.15, end = 0.85, option = "A") +
 42 | theme(legend.position="right",
 43 | legend.text = element_text(size = 10),
 44 | axis.title = element_blank(),
 45 | panel.grid.major = element_blank(),
 46 | panel.grid.minor = element_blank(),
 47 | panel.background = element_blank(),
 48 | axis.line = element_line(colour = "black"))
 49 | background.error$category <- paste0("Background.", background.error$category)
 50 | error.ref.background <- rbind(error.path.all, background.error)
 51 | ggplot(error.ref.background, aes(x = Error, color = category)) +
 52 | geom_density() +
 53 | scale_color_viridis_d(begin = 0.15, end = 0.85, option = "A") +
 54 | theme(legend.position="right",
 55 | legend.text = element_text(size = 10),
 56 | axis.title = element_blank(),
 57 | panel.grid.major = element_blank(),
 58 | panel.grid.minor = element_blank(),
 59 | panel.background = element_blank(),
 60 | axis.line = element_line(colour = "black"))
 61 | ggplot(error.ref.background, aes(x = Lagrangian, color = category)) +
 62 | geom_density() +
 63 | scale_color_viridis_d(begin = 0.15, end = 0.85, option = "A") +
 64 | theme(legend.position="right",
 65 | legend.text = element_text(size = 10),
 66 | axis.title = element_blank(),
 67 | panel.grid.major = element_blank(),
 68 | panel.grid.minor = element_blank(),
 69 | panel.background = element_blank(),
 70 | axis.line = element_line(colour = "black"))
 71 | ggplot(error.path.all, aes(x = Error)) +
 72 | geom_density()
 73 | ggplot(background.error, aes(x = Error)) +
 74 | geom_density()
 75 | ggplot(background.error, aes(x = Lagrangian)) +
 76 | geom_density()
 77 | rslt <- table(classification.path$new.classification, classification.path$actual)
 78 | rslt
 79 | rslt <- as.data.frame(apply(rslt, 2, function(x) round(x * 100/sum(x), digits = 3)))
 80 | rownames(rslt) <- paste0("Capy.", rownames(rslt))
 81 | colnames(rslt) <- paste0("Actual.", colnames(rslt))
 82 | rslt$capy <- rownames(rslt)
 83 | rslt.stk <- reshape2::melt(rslt)
 84 | ggplot(rslt.stk, aes(x = variable, y = capy, fill = value)) +
 85 | geom_tile() +
 86 | scale_fill_viridis_c(begin = 0.15, end = 0.85, option = "A") +
 87 | theme(legend.position="right",
 88 | legend.text = element_text(size = 10),
 89 | axis.text.x = element_text(angle = 45, size = 12, hjust = 1),
 90 | axis.text.y = element_text(size = 12),
 91 | axis.title = element_blank(),
 92 | panel.grid.major = element_blank(),
 93 | panel.grid.minor = element_blank(),
 94 | panel.background = element_blank(),
 95 | axis.line = element_line(colour = "black"))
 96 | library(ggpubr)
 97 | error.p1.p2.p3 <- error.path.other[which(error.path.other$category %in% c("Path1.Term", "Path2.Term", "Path3.Term")),]
 98 | ggqqplot(error.p1.p2.p3$Error)
 99 | shapiro.test(error.p1.p2.p3$Error)
100 | library(ggpubr)
101 | error.p1.p2.p3 <- error.path.other[which(error.path.other$category %in% c("Path1.Term", "Path2.Term", "Path3.Term")),]
102 | ggqqplot(error.p1.p2.p3$Error)
103 | shapiro.test(error.p1.p2.p3$Error)
104 | ggqqplot(error.p1.p2.p3$Lagrangian)
105 | shapiro.test(error.p1.p2.p3$Lagrangian)
106 | library(ggpubr)
107 | error.p1.p2.p3 <- error.path.other[which(error.path.other$category %in% c("Path1.Term", "Path2.Term", "Path3.Term")),]
108 | ggqqplot(error.p1.p2.p3$Error)
109 | shapiro.test(error.p1.p2.p3$Error)
110 | ggqqplot(error.p1.p2.p3$Lagrangian)
111 | shapiro.test(error.p1.p2.p3$Lagrangian)
112 | plot(density(error.p1.p2.p3$Error))
113 | plot(density(error.p1.p2.p3$Error[which(error.p1.p2.p3$category == "Path1.Term")]))
114 | ggqqplot(error.p1.p2.p3$Error[which(error.p1.p2.p3$category == "Path1.Term")])
115 | shapiro.test(error.p1.p2.p3$Error[which(error.p1.p2.p3$category == "Path1.Term")])
116 | ggqqplot(error.p1.p2.p3$Error[which(error.p1.p2.p3$category == "Path2.Term")])
117 | shapiro.test(error.p1.p2.p3$Error[which(error.p1.p2.p3$category == "Path2.Term")])
118 | shapiro.test(error.p1.p2.p3$Error[which(error.p1.p2.p3$category == "Path3.Term")])
119 | ggqqplot(error.p1.p2.p3$Error[which(error.p1.p2.p3$category == "Path3.Term")])
120 | shapiro.test(error.p1.p2.p3$Error[which(error.p1.p2.p3$category == "Path3.Term")])
121 | ggqqplot(error.p1.p2.p3$Error[which(error.p1.p2.p3$category == "Path3.Term")])
122 | shapiro.test(error.p1.p2.p3$Error[which(error.p1.p2.p3$category == "Path3.Term")])
123 | plot(density(error.p1.p2.p3$Error))
124 | lines(density(rnorm(1000, mean = major.norm.mean.2, sd = new.sigma.2)), col = "red")
125 | plot(density(error.p1.p2.p3$Error))
126 | lines(density(rnorm(1000, mean = major.norm.mean.2, sd = new.sigma.2)), col = "red")
127 | plot(density(error.p1.p2.p3$Error)) + ylim(0,10)
128 | lines(density(rnorm(1000, mean = major.norm.mean.2, sd = new.sigma.2)), col = "red")
129 | plot(density(error.p1.p2.p3$Error)) + ylim(0,10)
130 | plot(density(error.p1.p2.p3$Error),ylim = c(0,10))
131 | lines(density(rnorm(1000, mean = major.norm.mean.2, sd = new.sigma.2)), col = "red")
132 | plot(density(rnorm(1000, mean = major.norm.mean.2, sd = new.sigma.2)))
133 | plot(density(error.p1.p2.p3$Error))
134 | lines(density(rnorm(1000, mean = major.norm.mean, sd = new.sigma)), col = "red")
135 | plot(density(error.p1.p2.p3$Error))
136 | lines(density(rnorm(1000, mean = major.norm.mean, sd = new.sigma)), col = "red")
137 | plot(density(error.p1.p2.p3$Error))
138 | lines(density(rnorm(2000, mean = major.norm.mean, sd = new.sigma)), col = "red")
139 | plot(density(error.p1.p2.p3$Error))
140 | lines(density(rnorm(2000, mean = major.norm.mean, sd = 150)), col = "red")
141 | plot(density(error.p1.p2.p3$Error))
142 | lines(density(rnorm(2000, mean = major.norm.mean, sd = new.sigma)), col = "red")
143 | ks.test(x = error.p1.p2.p3$Error, y = rnorm(2000, mean = major.norm.mean, sd = new.sigma))
144 | library(ggpubr)
145 | error.p1.p2.p3 <- error.path.other[which(error.path.other$category %in% c("Path1.Term", "Path2.Term", "Path3.Term")),]
146 | ggqqplot(error.p1.p2.p3$Error)
147 | shapiro.test(error.p1.p2.p3$Error)
148 | ggqqplot(error.p1.p2.p3$Lagrangian)
149 | shapiro.test(error.p1.p2.p3$Lagrangian)
150 | ggqqplot(error.p1.p2.p3$Error[which(error.p1.p2.p3$category == "Path3.Term")])
151 | shapiro.test(error.p1.p2.p3$Error[which(error.p1.p2.p3$category == "Path3.Term")])
152 | ## Form a major normal distribution for the error
153 | major.norm.mean <- mean(fitpro$parameters$mu)
154 | new.sigma <- mean(fitpro$parameters$sigma)
155 | plot(fitpro, ylim = c(0,0.003))
156 | ## Form a major normal distribution for the error
157 | major.norm.mean <- mean(fitpro$parameters$mu)
158 | new.sigma <- mean(fitpro$parameters$sigma)
159 | plot(fitpro, ylim = 0.003)
160 | ## Form a major normal distribution for the error
161 | major.norm.mean <- mean(fitpro$parameters$mu)
162 | new.sigma <- mean(fitpro$parameters$sigma)
163 | plot(fitpro)
164 | lines(density(rnorm(1000, mean = major.norm.mean, sd = new.sigma)), col = "red")
165 | ## Form a major normal distribution for the lagrangian
166 | major.norm.mean.2 <- mean(fitpro.2$parameters$mu)
167 | new.sigma.2 <- mean(fitpro.2$parameters$sigma)
168 | plot(fitpro.2)
169 | lines(density(rnorm(1000, mean = major.norm.mean.2, sd = new.sigma.2)), col = "red")
170 | ## Form a major normal distribution for the error
171 | major.norm.mean <- sum(fitpro$parameters$mu * fitpro$parameters$pi)
172 | new.sigma <- sum(fitpro$parameters$sigma * fitpro$parameters$pi)
173 | plot(fitpro)
174 | lines(density(rnorm(1000, mean = major.norm.mean, sd = new.sigma)), col = "red")
175 | ## Form a major normal distribution for the lagrangian
176 | major.norm.mean.2 <- sum(fitpro.2$parameters$mu * fitpro.2$parameters$pi)
177 | new.sigma.2 <- sum(fitpro.2$parameters$sigma * fitpro.2$parameters$pi)
178 | plot(fitpro.2)
179 | lines(density(rnorm(1000, mean = major.norm.mean.2, sd = new.sigma.2)), col = "red")
180 | ### Get some p-values in the test qp errors
181 | error.path.all$p.values <- 1 - pnorm(error.path.all$Error, mean = major.norm.mean, sd = new.sigma)
182 | error.path.all$p.values.lm.lower <- pnorm(error.path.all$Lagrangian, mean = fitpro.2$parameters$mu[1], sd = fitpro.2$parameters$sigma[1])
183 | error.path.all$p.values.lm.mode <- pnorm(error.path.all$Lagrangian, mean = major.norm.mean.2, sd = new.sigma.2)
184 | classification.path$new.classification <- classification.path$call
185 | classification.path[rownames(error.path.all)[which(error.path.all$p.values <= 0.05 | (error.path.all$p.values.lm <= 0.005))],"new.classification"] <- "Unassigned"
186 | table(classification.path$new.classification, classification.path$actual)
187 | ### Get some p-values in the test qp errors
188 | error.path.all$p.values <- 1 - pnorm(error.path.all$Error, mean = major.norm.mean, sd = new.sigma)
189 | error.path.all$p.values.lm.lower <- pnorm(error.path.all$Lagrangian, mean = fitpro.2$parameters$mu[1], sd = fitpro.2$parameters$sigma[1])
190 | error.path.all$p.values.lm.mode <- pnorm(error.path.all$Lagrangian, mean = major.norm.mean.2, sd = new.sigma.2)
191 | classification.path$new.classification <- classification.path$call
192 | classification.path[rownames(error.path.all)[which(error.path.all$p.values <= 0.05 | (error.path.all$p.values.lm.lower <= 0.005))],"new.classification"] <- "Unassigned"
193 | table(classification.path$new.classification, classification.path$actual)
194 | ks.test(x = error.p1.p2.p3$Lagrangian, y = rnorm(2000, mean = major.norm.mean.2, sd = new.sigma.2))
195 | ks.test(x = error.p1.p2.p3$Lagrangian, y = rnorm(1000, mean = major.norm.mean.2, sd = new.sigma.2))
196 | ks.test(x = error.p1.p2.p3$Lagrangian, y = rnorm(nrow(error.p1.p2.p3), mean = major.norm.mean.2, sd = new.sigma.2))
197 | ks.test(x = error.p1.p2.p3$Error, y = rnorm(nrow(error.p1.p2.p3), mean = major.norm.mean, sd = new.sigma))
198 | rslt <- table(classification.path$call, classification.path$actual)
199 | rslt
200 | rslt <- as.data.frame(apply(rslt, 2, function(x) round(x * 100/sum(x), digits = 3)))
201 | rownames(rslt) <- paste0("Capy.", rownames(rslt))
202 | colnames(rslt) <- paste0("Actual.", colnames(rslt))
203 | rslt$capy <- rownames(rslt)
204 | rslt.stk <- reshape2::melt(rslt)
205 | ggplot(rslt.stk, aes(x = variable, y = capy, fill = value)) +
206 | geom_tile() +
207 | scale_fill_viridis_c(begin = 0.15, end = 0.85, option = "A") +
208 | theme(legend.position="right",
209 | legend.text = element_text(size = 10),
210 | axis.text.x = element_text(angle = 45, size = 12, hjust = 1),
211 | axis.text.y = element_text(size = 12),
212 | axis.title = element_blank(),
213 | panel.grid.major = element_blank(),
214 | panel.grid.minor = element_blank(),
215 | panel.background = element_blank(),
216 | axis.line = element_line(colour = "black"))
217 | ggplot(rslt.stk, aes(x = variable, y = capy, fill = value)) +
218 | geom_tile() +
219 | scale_fill_viridis_c(begin = 0.15, end = 0.85, option = "A") +
220 | theme(legend.position="right",
221 | legend.text = element_text(size = 10),
222 | axis.text.x = element_text(angle = 45, size = 12, hjust = 1),
223 | axis.text.y = element_text(size = 12),
224 | axis.title = element_blank(),
225 | panel.grid.major = element_blank(),
226 | panel.grid.minor = element_blank(),
227 | panel.background = element_blank(),
228 | axis.line = element_line(colour = "black"))
229 | save.image("~/Desktop/Morris Lab/Manuscripts/Capybara/error evaluation/Simulation Study Notebook/111920_final_simulation_intermed_multi_unknown_workspace.RData")
230 | ref.lsk <- readRDS("~/Desktop/Morris Lab/Manuscripts/Capybara/LARRY Dataset/in vitro/lsk_reference_wo_undifferentiated.Rds")
231 | ref.df.lsk <- ref.lsk[[3]]
232 | ref.sc.lsk <- ref.lsk[[1]]
233 | View(ref.df.lsk)
234 | ref.meta.lsk <- ref.lsk[[2]]
235 | View(ref.meta.lsk)
236 | library(Seurat)
237 | library(dplyr)
238 | library(patchwork)
239 | library(ggplot2)
240 | library(ggpubr)
241 | library(viridis)
242 | dox <- Read10X("~/Desktop/Morris Lab/Manuscripts/Capybara/MN revision/nova_seq_110920/Dox_RA/filtered_feature_bc_matrix/")
243 | dox.ra.sag <- Read10X("~/Desktop/Morris Lab/Manuscripts/Capybara/MN revision/nova_seq_110920/Dox_SAG/filtered_feature_bc_matrix/")
244 | dox.obj <- CreateSeuratObject(counts = dox, project = "mn.dox.ra", min.cells = 3, min.features = 200)
245 | dox.ra.sag.obj <- CreateSeuratObject(counts = dox.ra.sag, project = "mn.dox.sag", min.cells = 3, min.features = 200)
246 | dox.obj[["percent.mt"]] <- PercentageFeatureSet(dox.obj, pattern = "^mt-")
247 | dox.ra.sag.obj[["percent.mt"]] <- PercentageFeatureSet(dox.ra.sag.obj, pattern = "^mt-")
248 | VlnPlot(dox.obj, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
249 | VlnPlot(dox.ra.sag.obj, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
250 | # FeatureScatter is typically used to visualize feature-feature relationships, but can be used
251 | # for anything calculated by the object, i.e. columns in object metadata, PC scores etc.
252 | plot1 <- FeatureScatter(dox.obj, feature1 = "nCount_RNA", feature2 = "percent.mt")
253 | plot2 <- FeatureScatter(dox.ra.sag.obj, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
254 | plot1 + plot2
255 | # FeatureScatter is typically used to visualize feature-feature relationships, but can be used
256 | # for anything calculated by the object, i.e. columns in object metadata, PC scores etc.
257 | plot1 <- FeatureScatter(dox.ra.sag.obj, feature1 = "nCount_RNA", feature2 = "percent.mt")
258 | plot2 <- FeatureScatter(dox.ra.sag.obj, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
259 | plot1 + plot2
260 | dox.obj <- subset(dox.obj, subset = nFeature_RNA > 1000 & nFeature_RNA < 7000 & percent.mt <= 15)
261 | dox.ra.sag.obj <- subset(dox.ra.sag.obj, subset = nFeature_RNA > 1000 & nFeature_RNA < 7000 & percent.mt <= 15)
262 | dox.obj <- NormalizeData(dox.obj, normalization.method = "LogNormalize", scale.factor = 10000)
263 | dox.ra.sag.obj <- NormalizeData(dox.ra.sag.obj, normalization.method = "LogNormalize", scale.factor = 10000)
264 | dox.obj <- FindVariableFeatures(dox.obj, selection.method = "vst", nfeatures = 2000)
265 | # Identify the 10 most highly variable genes
266 | top10 <- head(VariableFeatures(dox.obj), 10)
267 | # plot variable features with and without labels
268 | plot1 <- VariableFeaturePlot(dox.obj)
269 | plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
270 | plot1 + plot2
271 | dox.ra.sag.obj <- FindVariableFeatures(dox.ra.sag.obj, selection.method = "vst", nfeatures = 2000)
272 | # Identify the 10 most highly variable genes
273 | top10 <- head(VariableFeatures(dox.ra.sag.obj), 10)
274 | # plot variable features with and without labels
275 | plot1 <- VariableFeaturePlot(dox.ra.sag.obj)
276 | plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
277 | plot1 + plot2
278 | all.genes <- rownames(dox.obj)
279 | dox.obj <- ScaleData(dox.obj, features = all.genes, vars.to.regress = c("nCount_RNA", "percent.mt"))
280 | all.genes <- rownames(dox.ra.sag.obj)
281 | dox.ra.sag.obj <- ScaleData(dox.ra.sag.obj, features = all.genes, vars.to.regress = c("nCount_RNA", "percent.mt"))
282 | dox.obj <- RunPCA(dox.obj, features = VariableFeatures(object = dox.obj))
283 | dox.ra.sag.obj <- RunPCA(dox.ra.sag.obj, features = VariableFeatures(object = dox.ra.sag.obj))
284 | VizDimLoadings(dox.obj, dims = 1:2, reduction = "pca")
285 | DimPlot(dox.obj, reduction = "pca")
286 | DimHeatmap(dox.obj, dims = 1:15, cells = 500, balanced = TRUE)
287 | # NOTE: This process can take a long time for big datasets, comment out for expediency. More
288 | # approximate techniques such as those implemented in ElbowPlot() can be used to reduce
289 | # computation time
290 | dox.obj <- JackStraw(dox.obj, num.replicate = 100)
291 | dox.obj <- ScoreJackStraw(dox.obj, dims = 1:20)
292 | JackStrawPlot(dox.obj, dims = 1:20)
293 | ElbowPlot(dox.obj)
294 | VizDimLoadings(dox.ra.sag.obj, dims = 1:2, reduction = "pca")
295 | DimHeatmap(dox.ra.sag.obj, dims = 1:15, cells = 500, balanced = TRUE)
296 | # NOTE: This process can take a long time for big datasets, comment out for expediency. More
297 | # approximate techniques such as those implemented in ElbowPlot() can be used to reduce
298 | # computation time
299 | dox.ra.sag.obj <- JackStraw(dox.ra.sag.obj, num.replicate = 100)
300 | dox.ra.sag.obj <- ScoreJackStraw(dox.ra.sag.obj, dims = 1:20)
301 | JackStrawPlot(dox.ra.sag.obj, dims = 1:20)
302 | ElbowPlot(dox.ra.sag.obj)
303 | dox.obj <- FindNeighbors(dox.obj, dims = 1:17)
304 | dox.ra.sag.obj <- FindNeighbors(dox.ra.sag.obj, dims = 1:17)
305 | dox.obj <- FindClusters(dox.obj, resolution = 0.8)
306 | dox.ra.sag.obj <- FindClusters(dox.ra.sag.obj, resolution = 0.8)
307 | dox.obj <- RunUMAP(dox.obj, dims = 1:17)
308 | dox.ra.sag.obj <- RunUMAP(dox.ra.sag.obj, dims = 1:17)
309 | DimPlot(dox.obj, reduction = "umap", label = T, label.size = 12)
310 | DimPlot(dox.ra.sag.obj, reduction = "umap", label = T, label.size = 12)
311 | FeaturePlot(dox.obj, features = c("Pou5f1", "Nanog", "Esrrb"), reduction = "umap")
312 | FeaturePlot(dox.ra.sag.obj, features = c("Pou5f1", "Nanog", "Esrrb"), reduction = "umap")
313 | FeaturePlot(dox.obj, features = c("Tubb3", "Map2", "Mnx1", "Isl1", "Lhx3", "Nefl", "Nefm", "Slit2","Onecut2"), reduction = "umap")
314 | FeaturePlot(dox.ra.sag.obj, features = c("Tubb3", "Map2", "Mnx1", "Isl1", "Lhx3", "Nefl", "Nefm", "Slit2", "Onecut2"), reduction = "umap")
315 | dox.marker <- FindAllMarkers(dox.obj, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
316 | dox.marker %>% group_by(cluster) %>% top_n(n = 2, wt = avg_logFC)
317 | dox.ra.sag.marker <- FindAllMarkers(dox.ra.sag.obj, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
318 | dox.ra.sag.marker %>% group_by(cluster) %>% top_n(n = 2, wt = avg_logFC)
319 | FeaturePlot(dox.obj, features = c("nCount_RNA", "percent.mito"), reduction = "umap")
320 | FeaturePlot(dox.ra.sag.obj, features = c("nCount_RNA", "percent.mito"), reduction = "umap")
321 | FeaturePlot(dox.obj, features = c("nCount_RNA", "percent.mt"), reduction = "umap")
322 | FeaturePlot(dox.ra.sag.obj, features = c("nCount_RNA", "percent.mt"), reduction = "umap")
323 | marker.read <- readLines("~/Desktop/Morris Lab/Manuscripts/Capybara/MN revision/TableS2.csv")
324 | marker.read.region <- unlist(lapply(strsplit(marker.read, ";"), function(x) x[1]))
325 | marker.read.gm_id <- unlist(lapply(strsplit(marker.read, ";"), function(x) x[2]))
326 | marker.read.gene <- unlist(lapply(strsplit(marker.read, ";"), function(x) x[3]))
327 | marker.df <- data.frame(marker.read.region, marker.read.gm_id, marker.read.gene, stringsAsFactors = F)
328 | cnms <- marker.df[1,]
329 | marker.df <- marker.df[-c(1), ]
330 | colnames(marker.df) <- cnms
331 | gene.list.construct <- list()
332 | unique.region <- unique(marker.df$domain)
333 | for (i in 1:length(unique.region)) {
334 | curr.region <- unique.region[i]
335 | curr.sub <- marker.df[which(marker.df$domain == curr.region), ]
336 | curr.gene.list <- curr.sub$Genes
337 | curr.gene.list <- unique(unlist(lapply(strsplit(curr.gene.list, ", "), function(x) x)))
338 | gene.list.construct[[curr.region]] <- curr.gene.list
339 | }
340 | dox.obj <- AddModuleScore(dox.obj, features = list(gene.list.construct$dl1), ctrl = 5, name = "dl1")
341 | dox.obj <- AddModuleScore(dox.obj, features = list(gene.list.construct$dl1), ctrl = 5, name = "dl2")
342 | dox.obj <- AddModuleScore(dox.obj, features = list(gene.list.construct$dl1), ctrl = 5, name = "dl3")
343 | dox.obj <- AddModuleScore(dox.obj, features = list(gene.list.construct$dl1), ctrl = 5, name = "dl4")
344 | dox.obj <- AddModuleScore(dox.obj, features = list(gene.list.construct$dl1), ctrl = 5, name = "dl5")
345 | dox.obj <- AddModuleScore(dox.obj, features = list(c(gene.list.construct$dl1, gene.list.construct$dl2,
346 | gene.list.construct$dl3, gene.list.construct$dl4,
347 | gene.list.construct$dl5)), ctrl = 5, name = "dorsal_features")
348 | dox.obj <- AddModuleScore(dox.obj, features = list(c(gene.list.construct$V0)), ctrl = 5, name = "V0")
349 | dox.obj <- AddModuleScore(dox.obj, features = list(c(gene.list.construct$V1)), ctrl = 5, name = "V1")
350 | dox.obj <- AddModuleScore(dox.obj, features = list(c(gene.list.construct$V2a)), ctrl = 5, name = "V2a")
351 | dox.obj <- AddModuleScore(dox.obj, features = list(c(gene.list.construct$V2b)), ctrl = 5, name = "V2b")
352 | dox.obj <- AddModuleScore(dox.obj, features = list(c(gene.list.construct$V3)), ctrl = 5, name = "V3")
353 | dox.obj <- AddModuleScore(dox.obj, features = list(c(gene.list.construct$V3)), ctrl = 5, name = "MN")
354 | FeaturePlot(dox.obj, features = c("dorsal_features1", "dl11", "dl21", "dl31", "dl41", "dl51"))
355 | dox.ra.sag.obj <- AddModuleScore(dox.ra.sag.obj, features = list(gene.list.construct$dl1), ctrl = 5, name = "dl1")
356 | dox.ra.sag.obj <- AddModuleScore(dox.ra.sag.obj, features = list(gene.list.construct$dl1), ctrl = 5, name = "dl2")
357 | dox.ra.sag.obj <- AddModuleScore(dox.ra.sag.obj, features = list(gene.list.construct$dl1), ctrl = 5, name = "dl3")
358 | dox.ra.sag.obj <- AddModuleScore(dox.ra.sag.obj, features = list(gene.list.construct$dl1), ctrl = 5, name = "dl4")
359 | dox.ra.sag.obj <- AddModuleScore(dox.ra.sag.obj, features = list(gene.list.construct$dl1), ctrl = 5, name = "dl5")
360 | dox.ra.sag.obj <- AddModuleScore(dox.ra.sag.obj, features = list(c(gene.list.construct$dl1, gene.list.construct$dl2,
361 | gene.list.construct$dl3, gene.list.construct$dl4,
362 | gene.list.construct$dl5)), ctrl = 5, name = "dorsal_features")
363 | dox.ra.sag.obj <- AddModuleScore(dox.ra.sag.obj, features = list(c(gene.list.construct$V0)), ctrl = 5, name = "V0")
364 | dox.ra.sag.obj <- AddModuleScore(dox.ra.sag.obj, features = list(c(gene.list.construct$V1)), ctrl = 5, name = "V1")
365 | dox.ra.sag.obj <- AddModuleScore(dox.ra.sag.obj, features = list(c(gene.list.construct$V2a)), ctrl = 5, name = "V2a")
366 | dox.ra.sag.obj <- AddModuleScore(dox.ra.sag.obj, features = list(c(gene.list.construct$V2b)), ctrl = 5, name = "V2b")
367 | dox.ra.sag.obj <- AddModuleScore(dox.ra.sag.obj, features = list(c(gene.list.construct$V3)), ctrl = 5, name = "V3")
368 | dox.ra.sag.obj <- AddModuleScore(dox.ra.sag.obj, features = list(c(gene.list.construct$V3)), ctrl = 5, name = "MN")
369 | FeaturePlot(dox.ra.sag.obj, features = c("dorsal_features1", "dl11", "dl21", "dl31", "dl41", "dl51"))
370 | dox.meta <- dox.obj@meta.data
371 | dox.ra.sag.meta <- dox.ra.sag.obj@meta.data
372 | dox.meta$category <- "Dox Only"
373 | dox.ra.sag.meta$category <- "Dox RA SAG"
374 | meta.all <- rbind(dox.meta, dox.ra.sag.meta)
375 | meta.all.sub <- meta.all[,c(8:20)]
376 | dox.meta <- dox.obj@meta.data
377 | dox.ra.sag.meta <- dox.ra.sag.obj@meta.data
378 | dox.meta$category <- "Dox + RA"
379 | dox.ra.sag.meta$category <- "Dox + SAG"
380 | meta.all <- rbind(dox.meta, dox.ra.sag.meta)
381 | meta.all.sub <- meta.all[,c(7:19)]
382 | meta.all.sub.melt <- reshape2::melt(meta.all.sub[,c(6:13)])
383 | ## Ref: https://stackoverflow.com/questions/17319487/median-and-quartile-on-violin-plots-in-ggplot2
384 | median.quartile <- function(x){
385 | out <- quantile(x, probs = c(0.25,0.5,0.75))
386 | names(out) <- c("ymin","y","ymax")
387 | return(out)
388 | }
389 | cs <- viridis(20)
390 | ggplot(meta.all.sub.melt, aes(x = variable, y = value, fill = category)) +
391 | geom_boxplot() +
392 | scale_fill_viridis_d(option = "A", begin = 0.5, end = 0.9)
393 | ggplot(meta.all.sub, aes(x = category, y = dorsal_features1, fill = category)) +
394 | geom_violin(trim = T) +
395 | scale_fill_viridis_d(option = "A", begin = 0.5, end = 0.9) +
396 | stat_summary(fun.y=median.quartile,geom='point', color = rep(cs[c(20,1)], each = 3)) +
397 | stat_summary(fun.y=median.quartile,geom='line', color = rep(cs[c(20,1)], each = 3)) +
398 | stat_compare_means(label =  "p.signif", label.x = 1.5)
399 | save.image("~/Desktop/Morris Lab/Manuscripts/Capybara/MN revision/nova_seq_110920/Reprogrammed_mapped/112220_Dox_with_ra_or_sag_workspace.RData")
400 | saveRDS(dox.obj, "~/Desktop/Morris Lab/Manuscripts/Capybara/MN revision/nova_seq_110920/dox_ra_seurat.Rds")
401 | saveRDS(dox.ra.sag.obj, "~/Desktop/Morris Lab/Manuscripts/Capybara/MN revision/nova_seq_110920/dox_sag_seurat.Rds")
402 | library(CellTagR)
403 | test.obj <- readRDS("~/Desktop/Morris Lab/CellTagR Edit/filtered_celltag_obj.Rds")
404 | CellTagDataForCollapsing(test.obj, "~/Desktop/collpasing.txt")
405 | remove.packages("CellTagR")
406 | devtools::install_github("morris-lab/CellTagR")
407 | library(CellTagR)
408 | setwd("~/Desktop/")
409 | devtools::install("CellTagR")
410 | library(CellTagR)
411 | test.obj <- readRDS("~/Desktop/Morris Lab/CellTagR Edit/filtered_celltag_obj.Rds")
412 | CellTagDataForCollapsing(test.obj, "~/Desktop/test.txt")
413 | CellTagDataForCollapsing()
414 | CellTagDataForCollapsing
415 | devtools::install("CellTagR")
416 | library(CellTagR)
417 | CellTagDataForCollapsing(test.obj, "~/Desktop/test.txt")
418 | devtools::install("CellTagR")
419 | library(CellTagR)
420 | CellTagDataForCollapsing(test.obj, "~/Desktop/test.txt")
421 | devtools::install("CellTagR")
422 | CellTagDataForCollapsing(test.obj, "~/Desktop/test.txt")
423 | devtools::install("CellTagR")
424 | CellTagDataForCollapsing(test.obj, "~/Desktop/test.txt")
425 | devtools::document()
426 | setwd("CellTagR/")
427 | devtools::document()
428 | GetCellTagCurrentVersionWorkingMatrix <- function(celltag.obj, slot.to.select) {
429 | curr.mtx <- slot(celltag.obj, slot.to.select)
430 | if (nrow(curr.mtx) <= 0) {
431 | return(curr.mtx)
432 | } else {
433 | curr.version <- celltag.obj@curr.version
434 | curr.mtx.sub <- curr.mtx[, which(startsWith(colnames(curr.mtx), curr.version))]
435 | colnames(curr.mtx.sub) <- gsub(pattern = paste0(curr.version, "."), replacement = "", colnames(curr.mtx.sub))
436 | full.mtx.sub <- curr.mtx.sub[Matrix::rowSums(is.na(curr.mtx.sub)) != ncol(curr.mtx.sub),]
437 | return(full.mtx.sub)
438 | }
439 | }
440 | test.obj <- readRDS("~/Desktop/Morris Lab/CellTagR Edit/post_collapsing_hf1_d15.Rds")
441 | test.obj <- SingleCellDataBinatization(test.obj, 2)
442 | celltag.obj <- test.obj
443 | dt.mtx.whitelist.path <- system.file("extdata", "v1_whitelist.csv", package = "CellTagR")
444 | celltag.obj <- SingleCellDataWhitelist(celltag.obj, dt.mtx.whitelist.path)
445 | celltag.obj <- MetricBasedFiltering(celltag.obj, 20, comparison = "less")
446 | celltag.obj <- MetricBasedFiltering(celltag.obj, 2, comparison = "greater")
447 | filtered.whitelised.data <- GetCellTagCurrentVersionWorkingMatrix(celltag.obj, "metric.filtered.count")
448 | Jac <- simil(filtered.whitelised.data, method = "Jaccard")
449 | Jac <- proxy::simil(filtered.whitelised.data, method = "Jaccard")
450 | library(Matrix)
451 | install.packages("proxyC")
452 | library(proxyC)
453 | Jac <- proxy::simil(filtered.whitelised.data, method = "Jaccard")
454 | Jac <- proxyC::simil(filtered.whitelised.data, method = "Jaccard")
455 | Jac <- proxyC::simil(filtered.whitelised.data, method = "jaccard")
456 | Jac.2 <- proxy::simil(as.matrix(filtered.whitelised.data), method = "Jaccard")
457 | Jac.2 <- as.matrix(Jac.2)
458 | sum(abs(Jac - Jac.2))
459 | Jac.mtx <- as.matrix(Jac)
460 | sum(abs(Jac.mtx[rownames(Jac.2), colnames(Jac.2)] - Jac.2))
461 | View(Jac.2)
462 | View(Jac.mtx)
463 | diag(Jac.2) <- 1
464 | sum(abs(Jac.mtx[rownames(Jac.2), colnames(Jac.2)] - Jac.2))
465 | as(Jac, "dgCMatrix")
466 | as(Jac, "dgTMatrix")
467 | as(as(Jac, "dgTMatrix"), "dgCMatrix")
468 | devtools::install_github("morris-lab/CellTagR")
469 | JaccardAnalysis
470 | library(CellTagR)
471 | JaccardAnalysis
472 | library(CellTagR)
473 | library(CellTagR)
474 | JaccardAnalysis
475 | remove.packages("CellTagR")
476 | devtools::install_github("morris-lab/CellTagR")
477 | library(CellTagR)
478 | JaccardAnalysis
479 | devtools::document()
480 | rm(list = c("GetCellTagCurrentVersionWorkingMatrix"))
481 | devtools::document()
482 | library(CellTagR)
483 | JaccardAnalysis()
484 | JaccardAnalysis
485 | celltag.obj <- JaccardAnalysis(celltag.obj, fast = T)
486 | celltag.obj.2 <- JaccardAnalysis(celltag.obj)
487 | Jaccard.Matrix <- celltag.obj@jaccard.mtx
488 | # Using the igraph package to facilitate the identification of membership to each clone
489 | jac.summ <- Matrix::summary(Jaccard.Matrix)
490 | lower.tri.summ <- subset(jac.summ, i>=j)
491 | test <- sparseMatrix(i = lower.tri.summ$i,
492 | j = lower.tri.summ$j,
493 | x = lower.tri.summ$x,
494 | dims = dim(Jaccard.Matrix))
495 | test.df <- as.data.frame(Matrix::summary(test))
496 | test.2 <- Jaccard.Matrix * lower.tri(Jaccard.Matrix)
497 | test.df.2 <- as.data.frame(Matrix::summary(test.2))
498 | View(test.2)
499 | View(test.df)
500 | View(test.df.2)
501 | # Using the igraph package to facilitate the identification of membership to each clone
502 | jac.summ <- Matrix::summary(Jaccard.Matrix)
503 | lower.tri.summ <- subset(jac.summ, i>j)
504 | test <- sparseMatrix(i = lower.tri.summ$i,
505 | j = lower.tri.summ$j,
506 | x = lower.tri.summ$x,
507 | dims = dim(Jaccard.Matrix))
508 | test.df <- as.data.frame(Matrix::summary(test))
509 | View(test.df)
510 | View(test.df.2)
511 | sum(abs(test.df$x - test.df.2$x))
512 | devtools::document()
513 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | 


--------------------------------------------------------------------------------
/CellTagR.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: No
 4 | SaveWorkspace: No
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | Encoding: UTF-8
 9 | 
10 | AutoAppendNewline: Yes
11 | StripTrailingWhitespace: Yes
12 | 
13 | BuildType: Package
14 | PackageUseDevtools: Yes
15 | PackageInstallArgs: --no-multiarch --with-keep.source
16 | PackageRoxygenize: rd,collate,namespace
17 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: CellTagR
 2 | Title: Identify Clonal Identity from ScRNA-Seq and CellTag Data
 3 | Version: 0.0.0.9000
 4 | Authors@R: person("Samantha", "Morris", email = "s.morris@wustl.edu", role = c("aut", "cre"))
 5 | Description: <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< Description >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
 6 | Depends: R (>= 3.5.0),
 7 |     gridExtra,
 8 |     tools,
 9 |     proxy,
10 |     corrplot,
11 |     igraph,
12 |     data.table,
13 |     plyr,
14 |     reshape,
15 |     Matrix,
16 |     tidyverse,
17 |     foreach,
18 |     networkD3,
19 |     proxyC
20 | License: MIT License
21 | Encoding: UTF-8
22 | LazyData: true
23 | RoxygenNote: 7.1.1
24 | 


--------------------------------------------------------------------------------
/Examples/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/morris-lab/CellTagR/de6f6fb041e7c7b85915554c6eb675c087e4b2eb/Examples/.DS_Store


--------------------------------------------------------------------------------
/Examples/CellTagR CellTag Object V1 V2 V3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/morris-lab/CellTagR/de6f6fb041e7c7b85915554c6eb675c087e4b2eb/Examples/CellTagR CellTag Object V1 V2 V3.pdf


--------------------------------------------------------------------------------
/Examples/CellTag_UTR.fa:
--------------------------------------------------------------------------------
 1 | >CellTag.UTR
 2 | GAATTCGATGACAGGCGCAGCTTCCGAGGGATTTGAGATCCAGACATGATAAGATACATT
 3 | GATGAGTTTGGACAAACCAAAACTAGAATGCAGTGAAAAAAATGCCTTATTTGTGAAATT
 4 | TGTGATGCTATTGCCTTATTTGTAACCATTATAAGCTGCAATAAACAAGTTAACA
 5 | 
 6 | >GFP.CDS
 7 | ATGGTGAGCAAGGGCGAGGAGCTGTTCACCGGGGTGGTGCCCATCCTGGTCGAGCTGGAC
 8 | GGCGACGTAAACGGCCACAAGTTCAGCGTGTCTGGCGAGGGCGAGGGCGATGCCACCTAC
 9 | GGCAAGCTGACCCTGAAGTTCATCTGCACCACCGGCAAGCTGCCCGTGCCCTGGCCCACC
10 | CTCGTGACCACCCTGACCTACGGCGTGCAGTGCTTCAGCCGCTACCCCGACCACATGAAG
11 | CAGCACGACTTCTTCAAGTCCGCCATGCCCGAAGGCTACGTCCAGGAGCGCACCATCTTC
12 | TTCAAGGACGACGGCAACTACAAGACCCGCGCCGAGGTGAAGTTCGAGGGCGACACCCTG
13 | GTGAACCGCATCGAGCTGAAGGGCATCGACTTCAAGGAGGACGGCAACATCCTGGGGCAC
14 | AAGCTGGAGTACAACTACAACAGCCACAACGTCTATATCATGGCCGACAAGCAGAAGAAC
15 | GGCATCAAGGCGAACTTCAAGATCCGCCACAACATCGAGGACGGCAGCGTGCAGCTCGCC
16 | GACCACTACCAGCAGAACACCCCCATCGGCGACGGCCCCGTGCTGCTGCCCGACAACCAC
17 | TACCTGAGCACCCAGTCCGCCCTGAGCAAAGACCCCAACGAGAAGCGCGATCACATGGTC
18 | CTGCTGGAGTTCGTGACCGCCGCCGGGATCACTCTCGGCATGGACGAGCTGTACAAGTAA
19 | 
20 | 
21 | #GTF Entries:
22 | CellTag.UTR	custom	exon	1	175	.	+	.	gene_id "CellTag.UTR"; transcript_id "celltag.utr";
23 | GFP.CDS	custom	exon	1	720	.	+	.	gene_id "GFP.CDS"; transcript_id "gfp.cds";
24 | 


--------------------------------------------------------------------------------
/Examples/CloneHunterWhitelistTestRun.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Whitelist Regeneration Notebook"
 3 | output: html_notebook
 4 | ---
 5 | 
 6 | ### Load the Package
 7 | ```{r}
 8 | library(roxygen2)
 9 | library(devtools)
10 | setwd("~/Desktop/CloneHunterNew_5/")
11 | setwd("CloneHunterNew/")
12 | devtools::document()
13 | ```
14 | 
15 | ## V1
16 | ### Create CellTag Object
17 | ```{r}
18 | v1.whitelist <- CellTagObject("v1.whitelist", "~/Desktop/CloneHunterTest/V1_S1_L001_R1_001.fastq")
19 | ```
20 | 
21 | ### Extract the CellTag Reads
22 | ```{r}
23 | v1.whitelist <- CellTagExtraction(v1.whitelist, "v1")
24 | ```
25 | 
26 | ### Sort by CellTag Frequency
27 | ```{r}
28 | v1.whitelist <- AddCellTagFreqSort(v1.whitelist)
29 | ```
30 | 
31 | ### V1 Whitelist Generation
32 | ```{r}
33 | v1.whitelist <- CellTagWhitelistFiltering(v1.whitelist, 0.9)
34 | ```
35 | 
36 | ## V2
37 | ### Create CellTag Object
38 | ```{r}
39 | v2.whitelist <- CellTagObject("v1.whitelist", "~/Desktop/CloneHunterTest/V2-1_S2_L001_R1_001.fastq")
40 | ```
41 | 
42 | ### Extract the CellTag Reads
43 | ```{r}
44 | v2.whitelist <- CellTagExtraction(v2.whitelist, "v2")
45 | ```
46 | 
47 | ### Sort by CellTag Frequency
48 | ```{r}
49 | v2.whitelist <- AddCellTagFreqSort(v2.whitelist)
50 | ```
51 | 
52 | ### V2 Whitelist Generation
53 | ```{r}
54 | v2.whitelist <- CellTagWhitelistFiltering(v2.whitelist, 0.9)
55 | ```
56 | 
57 | ## V3
58 | ### Create CellTag Object
59 | ```{r}
60 | v3.whitelist <- CellTagObject("v3.whitelist", "~/Desktop/CloneHunterTest/V2-2_S3_L001_R1_001.fastq")
61 | ```
62 | 
63 | ### Extract the CellTag Reads
64 | ```{r}
65 | v3.whitelist <- CellTagExtraction(v3.whitelist, "v3")
66 | ```
67 | 
68 | ### Sort by CellTag Frequency
69 | ```{r}
70 | v3.whitelist <- AddCellTagFreqSort(v3.whitelist)
71 | ```
72 | 
73 | ### V2 Whitelist Generation
74 | ```{r}
75 | v3.whitelist <- CellTagWhitelistFiltering(v3.whitelist, 0.9)
76 | ```
77 | 


--------------------------------------------------------------------------------
/Examples/bar_Chart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/morris-lab/CellTagR/de6f6fb041e7c7b85915554c6eb675c087e4b2eb/Examples/bar_Chart.png


--------------------------------------------------------------------------------
/Examples/clone.calling.permutation.test.R:
--------------------------------------------------------------------------------
  1 | library(data.table)
  2 | library(parallel)
  3 | 
  4 | meta.data <- read.table("/scratch/smlab/CellTag_paper_analysis/permutation_test/final.drop.seq.10x.meta.data.txt", sep = "\t", 
  5 |                         stringsAsFactors = F, header = T, row.names = 1)
  6 | 
  7 | meta.data.orig <- read.table("/scratch/smlab/CellTag_paper_analysis/permutation_test/qp.comb.clone.meta.data.drop.seq.10X.txt", sep = "\t", 
  8 |                              stringsAsFactors = F, header = T, row.names = 1)
  9 | 
 10 | clones <- meta.data.orig[,c("hf1.v1", "hf1.v2.1", "hf1.v2.2", "hf2.v1", "hf2.v2.1", "hf2.v2.2")]
 11 | 
 12 | hf1 <- clones[, c("hf1.v1", "hf1.v2.1", "hf1.v2.2")]
 13 | hf2 <- clones[, c("hf2.v1", "hf2.v2.1", "hf2.v2.2")]
 14 | 
 15 | colnames(hf1) <- c("v1.1", "v2.1", "v2.2")
 16 | colnames(hf2) <- c("v1.1", "v2.1", "v2.2")
 17 | 
 18 | hf2.not.na.v1.1 <- which(!is.na(hf2$v1.1))
 19 | hf2.not.na.v2.1 <- which(!is.na(hf2$v2.1))
 20 | hf2.not.na.v2.2 <- which(!is.na(hf2$v2.2))
 21 | index.v1 <- intersect(which(is.na(hf1$v1.1)), hf2.not.na.v1.1)
 22 | hf1$v1.1[index.v1] <- 2000 + hf2$v1.1[index.v1]
 23 | hf1$v2.1[hf2.not.na.v2.1] <- 2000 + hf2$v2.1[hf2.not.na.v2.1]
 24 | hf1$v2.2[hf2.not.na.v2.2] <- 2000 + hf2$v2.2[hf2.not.na.v2.2]
 25 | 
 26 | all.clones <- hf1
 27 | # v2.1.unique.clones <- unique(all.clones$v2.1)
 28 | # clone.2.1 <- all.clones[,2]
 29 | # clone.2.1.count <- as.data.frame(table(clone.2.1))
 30 | # over.10.freq <- clone.2.1.count[which(clone.2.1.count$Freq > 10), ]
 31 | 
 32 | v1.1.unique.clones <- unique(all.clones$v1.1)
 33 | clone.1.1 <- all.clones[,1]
 34 | clone.1.1.count <- as.data.frame(table(clone.1.1))
 35 | over.10.freq.1.1 <- clone.1.1.count[which(clone.1.1.count$Freq > 10), ]
 36 | 
 37 | 
 38 | # high.number.clone.2.1 <- hf1[which(hf1$v2.1 %in% over.10.freq$clone.2.1), ]
 39 | high.number.clone.1.1 <- hf1[which(hf1$v1.1 %in% over.10.freq.1.1$clone.1.1), ]
 40 | 
 41 | ####
 42 | # Fast sampling only
 43 | sampling <- function(clone.id, clone.info, over.threshold.df, subset.df=NULL) {
 44 |   curr.count <- over.threshold.df[as.character(clone.id), "Freq"]
 45 |   curr.cell.barcode <- rownames(clone.info)[which(clone.info$v1.1 == clone.id)]
 46 |   replicate.num <- ceiling(nrow(clone.info)/curr.count)
 47 |   barcode.names <- rownames(clone.info)[which(startsWith(rownames(clone.info), "_10X"))]
 48 |   if (!is.null(subset.df)){
 49 |     bc.nams <- rownames(subset.df)
 50 |     perm.subset <- replicate(replicate.num, sample(bc.nams, curr.count))
 51 |   }
 52 |   perm <- replicate(replicate.num, sample(barcode.names, curr.count))
 53 |   clone.perm <- replicate(replicate.num, sample(curr.cell.barcode, curr.count))
 54 |   return(list(perm, clone.perm, perm.subset))
 55 | }
 56 | 
 57 | # clones.id <- as.numeric(as.character(over.10.freq$clone.2.1))
 58 | # over.10.freq$clone.2.1 <- as.integer(as.character(over.10.freq$clone.2.1))
 59 | # rownames(over.10.freq) <- over.10.freq$clone.2.1
 60 | 
 61 | clones.id <- as.numeric(as.character(over.10.freq.1.1$clone.1.1))
 62 | over.10.freq.1.1$clone.1.1 <- as.integer(as.character(over.10.freq.1.1$clone.1.1))
 63 | rownames(over.10.freq.1.1) <- over.10.freq.1.1$clone.1.1
 64 | num.to.rep <- as.data.frame(seq(1, 50))
 65 | 
 66 | hf1.w.tp <- cbind(hf1, timepoints = unlist(lapply(strsplit(rownames(hf1), "-"), function(x) x[length(x)])))
 67 | hf1.w.tp.10x.only <- hf1.w.tp[which(startsWith(rownames(hf1.w.tp), "_10X")), ]
 68 | hf1.w.tp.10x.only$timepoints <- as.integer(as.character(hf1.w.tp.10x.only$timepoints))
 69 | hf1.aft.tp.3 <- hf1.w.tp.10x.only[which(hf1.w.tp.10x.only$timepoints >= 3), ]
 70 | rslt <- apply(num.to.rep,1,
 71 |               function(x) {
 72 |                 sampling.ls <- mclapply(over.10.freq.1.1$clone.1.1, sampling, 
 73 |                                         clone.info = hf1, over.threshold.df = over.10.freq.1.1, subset.df = hf1.w.tp.10x.only,
 74 |                                         mc.cores = 24)
 75 |                 return(sampling.ls)
 76 |               })
 77 | 
 78 | 
 79 | save(rslt, file = "/scratch/smlab/CellTag_paper_analysis/permutation_test/sampling_output_v1.RData")
 80 | 
 81 | # load("/scratch/smlab/CellTag_paper_analysis/permutation_test/sampling_output.RData")
 82 | # Calculate the percentages
 83 | percentage.perm.calc <- function(col.num, meta.data.original, x) {
 84 |   curr.samp.perm <- x[[1]][,col.num]
 85 |   curr.samp.perm.clone <- x[[2]][,col.num]
 86 |   curr.samp.perm.subset <- x[[3]][,col.num]
 87 | 
 88 |   perm.cluster.0.8 <- meta.data.original[curr.samp.perm, "res.0.8"]
 89 |   clone.perm.cluster.0.8 <- meta.data.original[curr.samp.perm.clone, "res.0.8"]
 90 |   subset.perm.cluster.0.8 <- meta.data.original[curr.samp.perm.subset, "res.0.8"]
 91 | 
 92 |   perm.percent <- length(which(perm.cluster.0.8 == 5)) * 100/length(perm.cluster.0.8)
 93 |   clone.perm.percent <- length(which(clone.perm.cluster.0.8 == 5)) * 100/length(clone.perm.cluster.0.8)
 94 |   subset.perm.percent <- length(which(subset.perm.cluster.0.8 == 5)) * 100/length(subset.perm.cluster.0.8)
 95 | 
 96 |   return(c(perm.percent, clone.perm.percent, subset.perm.percent))
 97 | }
 98 | 
 99 | percentage.ls <- lapply(rslt,
100 |                                 function(x) {
101 |                                   rep <-
102 |                                     lapply(x,
103 |                                       function(x) {
104 |                                          curr.cell.bar <- x[[2]][1,1]
105 |                                          clone.id <- hf1.w.tp.10x.only[curr.cell.bar, ]$v1.1
106 |                                          curr.cell.barcode <- rownames(hf1.w.tp.10x.only)[which(hf1.w.tp.10x.only$v1.1 == clone.id)]
107 |                                          clone.cluster.0.8 <- meta.data.orig[curr.cell.barcode, "res.0.8"]
108 |                                          percent.null <- length(which(clone.cluster.0.8 == 5)) * 100/length(clone.cluster.0.8)
109 |                                          perc.calc.rslt.ls <- mclapply(seq(1, ncol(x[[1]])), percentage.perm.calc,
110 |                                            meta.data.original = meta.data.orig, x = x,
111 |                                            mc.cores = 24)
112 |                                          percent.perm <- unlist(lapply(perc.calc.rslt.ls, function(x) x[1]))
113 |                                          percent.clone.perm <- unlist(lapply(perc.calc.rslt.ls, function(x) x[2]))
114 |                                          percent.subset.perm <- unlist(lapply(perc.calc.rslt.ls, function(x) x[3]))
115 |                                          return(list(clone.id, percent.null, percent.perm, percent.clone.perm, percent.subset.perm))
116 |                                       }
117 |                                     )
118 |                                   return(rep)
119 |                                 }
120 |                               )
121 | 
122 | save(percentage.ls, file = "/scratch/smlab/CellTag_paper_analysis/permutation_test/percentage_over_10_v1.RData")
123 | 
124 | perm.test.super.ls <- lapply(percentage.ls,
125 |                              function(x) {
126 |                                p.value.ls <- lapply(x, 
127 |                                                     function(y) {
128 |                                                       clone.id <- y[[1]]
129 |                                                       null.percent <- y[[2]]
130 |                                                       real.distribution <- y[[5]]
131 |                                                       curr.p <- sum(real.distribution > null.percent)/length(real.distribution)
132 |                                                       return(data.frame(clone.num = clone.id, p.val = curr.p))
133 |                                                     })
134 |                                p.value.df <- rbindlist(p.value.ls)
135 |                                return(p.value.df)
136 |                              })
137 | 
138 | perm.df <- data.frame()
139 | for (i in 1:length(perm.test.super.ls)) {
140 |   curr.df <- perm.test.super.ls[[i]]
141 |   if (ncol(perm.df) == 0) {
142 |     perm.df <- curr.df
143 |   } else {
144 |     perm.df <- cbind(perm.df, curr.df[,2])
145 |   }
146 | }
147 | 
148 | clone.vec <- perm.df$clone.num
149 | perm.df <- as.data.frame(perm.df[,-1])
150 | rownames(perm.df) <- clone.vec
151 | perm.df <- cbind(perm.df, avg = rowMeans(perm.df))
152 | 
153 | p.val.df <- data.frame(clone.id = rownames(perm.df), avg.p = perm.df[rownames(perm.df),]$avg)
154 | 
155 | save(p.val.df, file = "/scratch/smlab/CellTag_paper_analysis/permutation_test/p_value_v1.RData")


--------------------------------------------------------------------------------
/Examples/jaccard example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/morris-lab/CellTagR/de6f6fb041e7c7b85915554c6eb675c087e4b2eb/Examples/jaccard example.png


--------------------------------------------------------------------------------
/Examples/jaccard wo collapsing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/morris-lab/CellTagR/de6f6fb041e7c7b85915554c6eb675c087e4b2eb/Examples/jaccard wo collapsing.png


--------------------------------------------------------------------------------
/Examples/network construction and visualization.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Network Construction Test"
 3 | output: html_notebook
 4 | ---
 5 | 
 6 | ### Load the package
 7 | ```{r}
 8 | library(roxygen2)
 9 | library(devtools)
10 | setwd("~/Desktop/CloneHunterNew_5/CloneHunterNew/")
11 | devtools::document()
12 | 
13 | library(tidyverse)
14 | library(foreach)
15 | library(networkD3)
16 | ```
17 | 
18 | ### Read in the CellTag Object
19 | ```{r}
20 | bam.test.obj <- readRDS("~/Desktop/bam_v123_obj.Rds")
21 | ```
22 | 
23 | ### Calculate the Linked list
24 | ```{r}
25 | bam.test.obj <- convertCellTagMatrix2LinkList(bam.test.obj)
26 | ```
27 | 
28 | ### Get the nodes
29 | ```{r}
30 | bam.test.obj <- getNodesfromLinkList(bam.test.obj)
31 | ```
32 | 
33 | ### Add additional information
34 | ```{r}
35 | additional_data <- data.frame(sample(1:10, size = length(rownames(bam.test.obj@celltag.aggr.final)), replace = TRUE), row.names = rownames(bam.test.obj@celltag.aggr.final))
36 | colnames(additional_data) <- "Cluster"
37 | 
38 | bam.test.obj <- addData2Nodes(bam.test.obj, additional_data)
39 | ```
40 | 
41 | ### Network visualization and plot
42 | 
43 | ```{r, fig.width=10, fig.height=10}
44 | bam.test.obj <- drawSubnet(tag = "CellTagV1_2", overlay = "Cluster", celltag.obj = bam.test.obj)
45 | bam.test.obj@network
46 | saveNetwork(bam.test.obj@network, "~/Desktop/hf1.d15.network.construction.html")
47 | ```
48 | 
49 | ### Stacked bar charts
50 | ```{r}
51 | bar.data <- bam.test.obj@celltag.aggr.final
52 | bar.data$Cell.BC <- rownames(bar.data)
53 | 
54 | bar.data <- gather(bar.data, key = "CellTag", value = "Clone", 1:3, na.rm = FALSE)
55 | ```
56 | 
57 | ### ggplot
58 | ```{r}
59 | ggplot(data = bar.data) + 
60 |   geom_bar(mapping = aes(x = CellTag, fill = factor(Clone)), position = "fill", show.legend = FALSE) + 
61 |   scale_y_continuous(labels = scales::percent_format()) +
62 |   theme_bw()
63 | ```
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------
/Examples/permutation_python.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import collections
  3 | import numpy as np
  4 | import math as mt
  5 | import time
  6 | 
  7 | def calculate_proportion(sp, cluster_num_list):
  8 |     clusters_curr = sp.loc[:,"res.0.8"]
  9 |     unique_curr, counts_curr = np.unique(clusters_curr, return_counts=True)
 10 |     count_dict_curr = dict(zip(unique_curr, counts_curr))
 11 |     curr_total = sum(count_dict_curr.values())
 12 |     cluster_dict = {}
 13 |     for c_n in cluster_num_list:
 14 |         if c_n in count_dict_curr.keys():
 15 |             cluster_dict[c_n] = count_dict_curr[c_n] * 100/curr_total
 16 |         else:
 17 |             cluster_dict[c_n] = 0
 18 |     
 19 |     return cluster_dict
 20 | 
 21 | 
 22 | def randomized_test(clones, orig):
 23 |     clone_sample_randoms = {}
 24 |     for c_1 in clones:
 25 |         curr_clone_cells = orig.loc[df['v2.2'] == c_1]
 26 |         clone_size = len(curr_clone_cells.index)
 27 |         rep_num = int(mt.ceil(len(orig.index)/clone_size))
 28 |         curr_clone_random_percentage = {}
 29 |         for rep in range(rep_num):
 30 |             # Sample without replacement
 31 |             curr_sample = orig.sample(n=clone_size)
 32 |             curr_percentage_dict = calculate_proportion(curr_sample, cluster_ls)
 33 |             curr_clone_random_percentage[rep] = curr_percentage_dict
 34 | 
 35 |         clone_sample_randoms[c_1] = curr_clone_random_percentage
 36 |     
 37 |     return clone_sample_randoms
 38 | 
 39 | 
 40 | df = pd.read_table("meta.clone.clean.integrated.v1.v2.1.v2.2.txt", sep="\t")
 41 | v1 = df.loc[:,"v1.1"]
 42 | v2 = df.loc[:,"v2.1"]
 43 | v22 = df.loc[:, "v2.2"]
 44 | clusters = df.loc[:,"res.0.8"]
 45 | 
 46 | not_na_v1 = pd.notnull(v1)
 47 | not_na_v2 = pd.notnull(v2)
 48 | not_na_v22 = pd.notnull(v22)
 49 | 
 50 | v1_not_na = v1[not_na_v1]
 51 | v2_not_na = v2[not_na_v2]
 52 | v22_not_na = v22[not_na_v22]
 53 | 
 54 | unique, counts = np.unique(v22_not_na, return_counts=True)
 55 | count_dict = dict(zip(unique, counts))
 56 | grt_5_count_dict = {}
 57 | 
 58 | for key, value in count_dict.items():
 59 |     if value > 5:
 60 |         grt_5_count_dict[key] = value
 61 | 
 62 | clones = list(grt_5_count_dict.keys())
 63 | cluster_list, counts_cls = np.unique(clusters, return_counts=True)
 64 | cluster_dict = dict(zip(cluster_list, counts_cls))
 65 | cluster_ls = list(cluster_dict.keys())
 66 | 
 67 | replication_number = 50
 68 | time_vec = []
 69 | replication_dict = {}
 70 | for j in range(replication_number):
 71 |     print(j)
 72 |     start_time = time.time()
 73 |     curr_replicate_rslt = randomized_test(clones, df)
 74 |     end_time = time.time()
 75 |     replication_dict[j] = curr_replicate_rslt
 76 |     time_vec.append(end_time - start_time)
 77 |     print(end_time - start_time)
 78 | 
 79 | # Format: {clone id1: {0:[], 1:[], 3:[], 4:[], 6:[]}, clone id2: {0:[], 1:[], 3:[], 4:[], 6:[]}, ...}
 80 | rearrange_dict = {}
 81 | for k,v in replication_dict.items():
 82 |     for sk,sv in v.items():
 83 |         curr_c = sk
 84 |         curr_pct_dict = {}
 85 |         for ssk,ssv in sv.items():
 86 |             for sssk,sssv in ssv.items():
 87 |                 if sssk not in curr_pct_dict.keys():
 88 |                     curr_pct_dict[sssk] = [sssv]
 89 |                 else:
 90 |                     curr_pct_dict[sssk].append(sssv)
 91 |         #print(len(curr_pct_dict[0]))
 92 |         if curr_c not in rearrange_dict.keys():
 93 |             rearrange_dict[curr_c] = curr_pct_dict
 94 |         else:
 95 |             for pct_k,pct_v in curr_pct_dict.items():
 96 |                 rearrange_dict[curr_c][pct_k].extend(pct_v)
 97 | 
 98 | p_val_grt_overall = {}
 99 | p_val_less_overall = {}
100 | for key_1,val_1 in rearrange_dict.items():
101 |     curr_cl = key_1
102 |     grt_p = {}
103 |     less_p = {}
104 |     for ky,vl in val_1.items():
105 |         grt_p[ky] = sum(i > clone_null_pct[curr_cl][ky] for i in vl)/len(vl)
106 |         less_p[ky] = sum(j < clone_null_pct[curr_cl][ky] for j in vl)/len(vl)
107 |     p_val_grt_overall[curr_cl] = grt_p
108 |     p_val_less_overall[curr_cl] = less_p
109 | 
110 | grt_df = pd.DataFrame(p_val_grt_overall)
111 | less_df = pd.DataFrame(p_val_less_overall)
112 | 
113 | clusters_all = {}
114 | for k1,vl in rearrange_dict.items():
115 |     for k2,vl2 in vl.items():
116 |         if k2 in clusters_all.keys():
117 |             clusters_all[k2].extend(vl2)
118 |         else:
119 |             clusters_all[k2] = vl2         
120 | 
121 | clster_df = pd.DataFrame(clusters_all)
122 | null_df = pd.DataFrame(clone_null_pct)
123 | 
124 | null_df.to_csv("permutation_clean_null_v2_2_ca.txt", sep = "\t")
125 | clster_df.to_csv("percentages_all_clusters_v2_2_ca.txt", sep = "\t")
126 | grt_df.to_csv("p_value_hyper_v2_2_ca.txt", sep = "\t")
127 | less_df.to_csv("p_value_hypo_v2_2_ca.txt", sep = "\t")
128 | 
129 | 


--------------------------------------------------------------------------------
/Examples/post_filtering.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/morris-lab/CellTagR/de6f6fb041e7c7b85915554c6eb675c087e4b2eb/Examples/post_filtering.png


--------------------------------------------------------------------------------
/Examples/pre_filtering.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/morris-lab/CellTagR/de6f6fb041e7c7b85915554c6eb675c087e4b2eb/Examples/pre_filtering.png


--------------------------------------------------------------------------------
/Examples/sc analysis.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Single-Cell CellTag Data Analysis"
 3 | output: html_notebook
 4 | ---
 5 | 
 6 | # Load the CloneHunter package if already installed while carrying out whitelisting
 7 | ```{r}
 8 | library(CellTagR)
 9 | ```
10 | 
11 | ## The following two steps are skipped for time saving and demo purposes
12 | ### Create a CellTag Object
13 | ```{r, eval=FALSE}
14 | bam.test.obj <- CellTagObject(object.name = "", fastq.bam.directory = "")
15 | ```
16 | 
17 | ### Extract CellTag Information
18 | ```{r, eval=FALSE}
19 | bam.test.obj <- CellTagExtraction(celltag.obj = bam.test.obj, celltag.version = "v1")
20 | ```
21 | 
22 | ### Load the demo object
23 | ```{r}
24 | bam.test.obj <- readRDS("~/Desktop/presentation/Demo/demo_object.Rds")
25 | head(bam.test.obj@bam.parse.rslt[["v1"]])
26 | bam.test.obj@celltag.stats
27 | ```
28 | 
29 | ### Generate the count matrix
30 | ```{r}
31 | bam.test.obj <- CellTagMatrixCount(celltag.obj = bam.test.obj, barcodes.file = "~/Desktop/presentation/Demo/barcodes.tsv")
32 | dim(bam.test.obj@raw.count)
33 | ```
34 | 
35 | ### Generate file for collapsing
36 | ```{r}
37 | bam.test.obj <- CellTagDataForCollapsing(celltag.obj = bam.test.obj, output.file = "~/Desktop/collapsing.txt")
38 | ```
39 | 
40 | ### Process the collapsing result
41 | ```{r}
42 | bam.test.obj <- CellTagDataPostCollapsing(celltag.obj = bam.test.obj, collapsed.rslt.file = "~/Desktop/test_starcode_out_2.txt")
43 | dim(bam.test.obj@collapsed.count)
44 | ```
45 | 
46 | ### generate the Binary matrix
47 | ```{r}
48 | bam.test.obj <- SingleCellDataBinarization(bam.test.obj, 2)
49 | ```
50 | 
51 | ### Look at the metric plots
52 | ```{r, fig.width=10, fig.height=10}
53 | MetricPlots(bam.test.obj)
54 | ```
55 | 
56 | ### Whitelist based filtering
57 | ```{r}
58 | bam.test.obj <- SingleCellDataWhitelist(bam.test.obj, "~/Desktop/Morris Lab/CloneHunter/inst/extdata/v1_whitelist.csv")
59 | dim(bam.test.obj@whitelisted.count)
60 | ```
61 | 
62 | ### Metric Based Filtering
63 | ```{r}
64 | bam.test.obj <- MetricBasedFiltering(bam.test.obj, 20, comparison = "less")
65 | bam.test.obj <- MetricBasedFiltering(bam.test.obj, 2, comparison = "greater")
66 | dim(bam.test.obj@metric.filtered.count)
67 | ```
68 | 
69 | ### Metric Plots Again to Check for Additional Filtering
70 | ```{r, fig.width=10, fig.height=10}
71 | MetricPlots(bam.test.obj)
72 | ```
73 | 
74 | ### Jaccard Analysis
75 | ```{r}
76 | bam.test.obj <- JaccardAnalysis(bam.test.obj)
77 | ```
78 | 
79 | ### Clone Calling
80 | ```{r}
81 | bam.test.obj <- CloneCalling(celltag.obj = bam.test.obj, correlation.cutoff=0.7)
82 | bam.test.obj@clone.composition[["v1"]]
83 | bam.test.obj@clone.size.info[["v1"]]
84 | ```
85 | 
86 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(AddCellTagFreqSort)
 4 | export(Barcode.Aggregate)
 5 | export(CellTagDataForCollapsing)
 6 | export(CellTagDataPostCollapsing)
 7 | export(CellTagExtraction)
 8 | export(CellTagMatrixCount)
 9 | export(CellTagObject)
10 | export(CellTagPatternCalling)
11 | export(CellTagWhitelistFiltering)
12 | export(CloneCalling)
13 | export(JaccardAnalysis)
14 | export(MetricBasedFiltering)
15 | export(MetricPlots)
16 | export(SingleCellDataBinarization)
17 | export(SingleCellDataWhitelist)
18 | export(addData2Nodes)
19 | export(bam.process)
20 | export(convertCellTagMatrix2LinkList)
21 | export(drawSubnet)
22 | export(fastq.process)
23 | export(getNodesfromLinkList)
24 | exportClasses(CellTag)
25 | exportMethods(show)
26 | 


--------------------------------------------------------------------------------
/R/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/morris-lab/CellTagR/de6f6fb041e7c7b85915554c6eb675c087e4b2eb/R/.DS_Store


--------------------------------------------------------------------------------
/R/AuxiliaryFunctions.R:
--------------------------------------------------------------------------------
  1 | #' Fastq Process Function
  2 | #'
  3 | #' This function extracts CellTags from the raw fastq sequencing file, provides counts of each CellTag and sorts them in desending order.
  4 | #' @param fastq.file The input fastq/bam data directory
  5 | #' @param pattern The pattern to seek for
  6 | #' @param short.nt.before.tag A short sequence before the 8nt tag to help more specific identification
  7 | #' @param short.nt.after.tag A short sequence after the 8nt tag to help more specific identification
  8 | #' @return A list contains count table of CellTags. If requested to save fullTag counts, i.e. save.fullTag.counts = TRUE, return a list of both 8nt tags and full sequences count. Otherwise, a list of 8nt tags counts. 
  9 | #' @keywords single-cell RNA-seq data, CellTagging
 10 | #' @export
 11 | #' @examples
 12 | #' fastq.process("data.fastq", "CCGGT[ATCG]{8}GAATTC", "CCGGT", "GAATTC")
 13 | #' 
 14 | fastq.process <- function(fastq.file, pattern, short.nt.before.tag, short.nt.after.tag) {
 15 |   con <- file(fastq.file, "r")
 16 |   
 17 |   # Get the sequences containing the tags (with both full tag region and only 8nt tag)
 18 |   seq.list <- c()
 19 |   filtered.sequences <- c()
 20 |   full.tag.seq <- c()
 21 |   only.tag.seq <- c()
 22 |   print("Reading File......")
 23 |   # Get the size of the bam file
 24 |   fq.size <- file.size(fastq.file)
 25 |   total <- fq.size/(1000000 * 101)
 26 |   # Initialize the progress bar
 27 |   pb <- txtProgressBar(min = 0, max = total, style = 3)
 28 |   # Initialize the count
 29 |   count <- 0
 30 |   while(TRUE) {
 31 |     curr.lines <- readLines(con, 1000000)
 32 |     if (length(curr.lines) == 0) break
 33 |     else {
 34 |       curr.seqs <- curr.lines[seq(2, 1000000, by = 4)]
 35 |       seq.list <- c(seq.list, curr.seqs)
 36 |       reg.rslt <- regexpr(pattern, curr.seqs, ignore.case = TRUE, perl = TRUE)
 37 |       contain.idx <- which(reg.rslt > 0)
 38 |       curr.f.seq <- curr.seqs[contain.idx]
 39 |       
 40 |       filtered.sequences <- c(filtered.sequences, curr.f.seq)
 41 |       start.loc <- reg.rslt[contain.idx]
 42 |       end.loc <- start.loc + nchar(short.nt.before.tag) + 8 + nchar(short.nt.after.tag) - 1
 43 |       curr.full.tag <- substr(curr.f.seq, start = start.loc, stop = end.loc)
 44 |       only.tag <- substr(curr.full.tag, start = (nchar(short.nt.before.tag) + 1), stop = (nchar(short.nt.before.tag) + 8))
 45 |       full.tag.seq <- c(full.tag.seq, curr.full.tag)
 46 |       only.tag.seq <- c(only.tag.seq, only.tag)
 47 |     }
 48 |     count <- count + 1
 49 |     if (count > total) {
 50 |       count <- total
 51 |     }
 52 |     setTxtProgressBar(pb, count)
 53 |   }
 54 |   close(con)
 55 |   close(pb)
 56 |   rslt <- list(full.tag.seq, only.tag.seq)
 57 |   return(rslt)
 58 | }
 59 | 
 60 | #' Bam File Process Function
 61 | #'
 62 | #' This function extracts CellTags from the bam sequencing file, provides cell barcode, umi and their corresponding celltag information.
 63 | #' @param bam.file The input bam data directory
 64 | #' @param pattern The pattern to seek for
 65 | #' @param short.nt.before.tag A short sequence before the 8nt tag to help more specific identification
 66 | #' @param short.nt.after.tag A short sequence after the 8nt tag to help more specific identification
 67 | #' @return A data table contains cell barcode, celltag and umi information
 68 | #' @keywords single-cell RNA-seq data, CellTagging
 69 | #' @export
 70 | #' @examples
 71 | #' bam.process("data.fastq", "CCGGT[ATCG]{8}GAATTC", "CCGGT", "GAATTC")
 72 | #' 
 73 | bam.process <- function(bam.file, pattern, short.nt.before.tag, short.nt.after.tag, technique) {
 74 |   # Install Rsamtools
 75 |   if (!requireNamespace("BiocManager", quietly = TRUE))
 76 |     install.packages("BiocManager")
 77 |   if (!requireNamespace("Rsamtools", quietly = TRUE)) {
 78 |     BiocManager::install("Rsamtools")
 79 |   }
 80 |   library(Rsamtools)
 81 |   # Get the bam file
 82 |   bamFile <- BamFile(bam.file)
 83 |   # Get the size of the bam file
 84 |   bam.size <- file.size(bam.file)
 85 |   total <- bam.size/(1000000 * 82.99)
 86 |   print(paste0("Reading ", bam.file, " ..."))
 87 |   # Initialize the progress bar
 88 |   pb <- txtProgressBar(min = 0, max = total, style = 3)
 89 |   # Initialize the number of lines to read at once
 90 |   yieldSize(bamFile) <- 1000000
 91 |   open(bamFile)
 92 |   if (tolower(technique) == "10x") {
 93 |       parameters <- ScanBamParam(what = scanBamWhat(), tag = c("CB", "GN", "UB", "CR"))
 94 |   } else {
 95 |       if (tolower(technique) == "dropseq") {
 96 |           parameters <- ScanBamParam(what = scanBamWhat(), tag = c("XC", "GN", "XM", "GE"))
 97 |       } else {
 98 |           if (tolower(technique) == "zumi") {
 99 |               parameters <- ScanBamParam(what = scanBamWhat(), tag = c("BC", "GN", "UB", "CR"))
100 |           } else {
101 |               stop("We don't support your current single-cell sequencing technology. Please contact us to add.")
102 |           }
103 |       }
104 |   }
105 |   bam.parsed.df <- data.table()
106 |   count <- 0
107 |   while(TRUE) {
108 |     curr.read <- scanBam(bamFile, param = parameters)[[1]]
109 | #    print(count)
110 |     if (length(curr.read$qname) <= 0) {
111 |       break
112 |     } else {
113 |       # Read in all information
114 |       curr.seqs <- as.character(curr.read$seq)
115 |       # Check if the sequences contain the celltag motif
116 |       reg.rslt <- regexpr(pattern, curr.seqs, ignore.case = TRUE, perl = TRUE)
117 |       contain.idx <- which(reg.rslt > 0)
118 |       if (length(contain.idx) > 0) {
119 |           if (tolower(technique) == "10x") {
120 |             curr.cell.bc <- curr.read$tag$CB
121 |             curr.umi <- curr.read$tag$UB
122 |           } else {
123 |             if (tolower(technique) == "dropseq") {
124 |                 curr.cell.bc <- curr.read$tag$XC
125 |                 curr.umi <- curr.read$tag$XM
126 |             } else if (tolower(technique) == "zumi") {
127 |                 curr.cell.bc <- curr.read$tag$BC
128 |                 curr.umi <- curr.read$tag$UB
129 |             }
130 |           }
131 |         curr.cell.tag <- rep(NA, length(curr.read$qname))
132 |         if (!(is.null(curr.cell.bc) | is.null(curr.umi))) {
133 |           # Initialize the current data table
134 |           curr.df <- data.table(Cell.BC = curr.cell.bc, UMI = curr.umi, Cell.Tag = curr.cell.tag)
135 |           curr.f.seq <- curr.seqs[contain.idx]
136 |           start.loc <- reg.rslt[contain.idx]
137 |           end.loc <- start.loc + nchar(short.nt.before.tag) + 8 + nchar(short.nt.after.tag) - 1
138 |           
139 |           curr.full.tag <- substr(curr.f.seq, start = start.loc, stop = end.loc)
140 |           only.tag <- substr(curr.full.tag, start = (nchar(short.nt.before.tag) + 1), stop = (nchar(short.nt.before.tag) + 8))
141 |           
142 |           curr.df$Cell.Tag[contain.idx] <- only.tag
143 |           # Add to the current data frame
144 |           if (nrow(bam.parsed.df) <= 0) {
145 |             bam.parsed.df <- curr.df[contain.idx,]
146 |           } else {
147 |             bam.parsed.df <- rbind(bam.parsed.df, curr.df[contain.idx, ])
148 |           }
149 |         }
150 |       }
151 |     }
152 |     count <- count + 1
153 |     setTxtProgressBar(pb, count)
154 |   }
155 |   close(bamFile)
156 |   close(pb)
157 |   return(bam.parsed.df)
158 | }
159 | 
160 | #' CellTag Pattern Calling Function
161 | #'
162 | #' This function provides motif patterns corresponding to the input celltag version
163 | #' @param celltag.version Which CellTag version are you investigating?
164 | #' @return A list containing the pattern, nucleotides to look for before/after the motif
165 | #' @keywords single-cell RNA-seq data, CellTagging
166 | #' @export
167 | #' @examples
168 | #' CellTagPatternCalling("v1")
169 | #' 
170 | CellTagPatternCalling <- function(celltag.version) {
171 |   celltag.df <- data.frame(version = c("v1", "v2", "v3"),
172 |                            nt.before.tag = c("GGT", "GTGATG", "TGTACG"),
173 |                            stringsAsFactors = F)
174 |   rownames(celltag.df) <- celltag.df$version
175 |   short.nt.before.tag <- celltag.df[celltag.version, "nt.before.tag"]
176 |   short.nt.after.tag <- "GAATTC"
177 |   
178 |   pattern <- paste0(short.nt.before.tag, "[ATCG]{8}", short.nt.after.tag)
179 |   return(c(pattern, short.nt.before.tag, short.nt.after.tag))
180 | }
181 | 
182 | #' CellTag Barcode Aggregation function
183 | #'
184 | #' This function allows barcode aggregation of multiple-file processing. 
185 | #' @param file.list files in a list to aggregate in order same as the BAM files
186 | #' @param output.file where to save this aggregated output file. Should be a .tsv file.
187 | #' @return A list containing the pattern, nucleotides to look for before/after the motif
188 | #' @keywords single-cell RNA-seq data, CellTagging
189 | #' @export
190 | #' @examples
191 | #' Barcode.Aggregate(list("barcodes_1.tsv", "barcodes_2.tsv"), output.file = "barcode_aggr.tsv")
192 | #' 
193 | Barcode.Aggregate <- function(file.list, output.file) {
194 |   final.bc <- c()
195 |   for (i in 1:length(file.list)) {
196 |     curr.prefix <- paste0("Sample-", i, "_")
197 |     curr.file <- file.list[[i]]
198 |     curr.bc <- read.table(curr.file, header = F, stringsAsFactors = F)
199 |     bc.to.save <- paste0(curr.prefix, curr.bc[,1])
200 |     final.bc <- c(final.bc, bc.to.save)
201 |   }
202 |   write.table(as.data.frame(final.bc), output.file, sep = "\t", row.names = F, col.names = F, quote = F)
203 | }
204 | 
205 | 
206 | GetCellTagCurrentVersionWorkingMatrix <- function(celltag.obj, slot.to.select) {
207 |   curr.mtx <- slot(celltag.obj, slot.to.select)
208 |   if (nrow(curr.mtx) <= 0) {
209 |     return(curr.mtx)
210 |   } else {
211 |     curr.version <- celltag.obj@curr.version
212 |     curr.mtx.sub <- curr.mtx[, which(startsWith(colnames(curr.mtx), curr.version))]
213 |     colnames(curr.mtx.sub) <- gsub(pattern = paste0(curr.version, "."), replacement = "", colnames(curr.mtx.sub))
214 |     full.mtx.sub <- curr.mtx.sub[Matrix::rowSums(is.na(curr.mtx.sub)) != ncol(curr.mtx.sub),]
215 |     
216 |     return(full.mtx.sub)
217 |   }
218 | }
219 | 
220 | SetCellTagCurrentVersionWorkingMatrix <- function(celltag.obj, slot.to.set, final.to.set, replace = FALSE) {
221 |   cop.final <- final.to.set
222 |   colnames(cop.final) <- paste0(celltag.obj@curr.version, ".", colnames(cop.final))
223 |   curr.version.existing.mtx <- GetCellTagCurrentVersionWorkingMatrix(celltag.obj, slot.to.set)
224 |   
225 |   if (replace) {
226 |     slot(celltag.obj, slot.to.set) <- cop.final
227 |     return(celltag.obj)
228 |   }
229 |   
230 |   if (sum(dim(slot(celltag.obj, slot.to.set))) <= 0) {
231 |     slot(celltag.obj, slot.to.set) <- cop.final
232 |   } else  {
233 |     curr.existing.mtx <- slot(celltag.obj, slot.to.set)
234 |     if (ncol(curr.version.existing.mtx) > 0) {
235 |       curr.ver.exist.colnames <- paste0(celltag.obj@curr.version, ".", colnames(curr.version.existing.mtx))
236 |       indx <- which(colnames(curr.existing.mtx) %in% curr.ver.exist.colnames)
237 |       curr.existing.mtx <- curr.existing.mtx[, -indx]
238 |     }
239 |     new.rownames <- unique(c(rownames(curr.existing.mtx), rownames(cop.final)))
240 | 
241 |     diff.rnms <- setdiff(new.rownames, rownames(cop.final))
242 |     cop.comp.mtx <- matrix(NA, nrow = length(diff.rnms), ncol = ncol(cop.final))
243 |     rownames(cop.comp.mtx) <- diff.rnms
244 |     colnames(cop.comp.mtx) <- colnames(cop.final)
245 | 
246 |     diff.rnms.2 <- setdiff(new.rownames, rownames(curr.existing.mtx))
247 |     cem.comp.mtx <- matrix(NA, nrow = length(diff.rnms.2), ncol = ncol(curr.existing.mtx))
248 |     rownames(cem.comp.mtx) <- diff.rnms.2
249 |     colnames(cem.comp.mtx) <- colnames(curr.existing.mtx)
250 | 
251 |     to.merge.mtx.cop <- rbind(cop.final, cop.comp.mtx)
252 |     to.merge.mtx.cem <- rbind(curr.existing.mtx, cem.comp.mtx)
253 | 
254 |     if (ncol(to.merge.mtx.cem) <= 0) {
255 |       new.mtx <- to.merge.mtx.cop[,colnames(cop.final)]
256 |     } else {
257 |       new.mtx <- cbind(to.merge.mtx.cop[new.rownames,], to.merge.mtx.cem[new.rownames, ])
258 |     }
259 |     
260 |     slot(celltag.obj, slot.to.set) <- new.mtx
261 |   }
262 |   
263 |   return(celltag.obj)
264 | } 
265 | 
266 | 
267 | 


--------------------------------------------------------------------------------
/R/CellTagExtraction.R:
--------------------------------------------------------------------------------
 1 | #' CellTag Extraction Function
 2 | #'
 3 | #' This function extracts CellTags from the raw fastq/bam sequencing file. If it is a fastq file, provides counts of each CellTag and sorts them in desending order. If it is a bam file, returns the barcode, umi, celltag information.
 4 | #' @param celltag.obj A CellTag object initialized with path to the fastq/bam file
 5 | #' @param celltag.version The CellTag version to extract
 6 | #' @param technique The technique used for scRNA-seq, Default to 10x. Currently enabled for 10x and dropseq.
 7 | #' @return A CellTag object with attribute (bam.parse.rslt) filled
 8 | #' @keywords single-cell RNA-seq data, CellTagging
 9 | #' @export
10 | #' @examples
11 | #' CellTagExtraction(bam.test.obj)
12 | #' 
13 | CellTagExtraction <- function(celltag.obj, celltag.version, technique = "10x") {
14 |   celltag.obj@curr.version <- celltag.version
15 |   if (file_test("-f", celltag.obj@fastq.bam.dir)) {
16 |     fastq.bam.input <- celltag.obj@fastq.bam.dir
17 |   } else {
18 |     fastq.bam.input <- list.files(celltag.obj@fastq.bam.dir, full.names = T)
19 |   }
20 |   file.extension.unique <- unique(file_ext(fastq.bam.input))
21 |   
22 |   if (length(celltag.obj@celltag.version) > 0) {
23 |     if (celltag.obj@curr.version %in% celltag.obj@celltag.version) {
24 |       print("This CellTag has already been processed!")
25 |     } else {
26 |       celltag.obj@celltag.version <- c(celltag.obj@celltag.version, celltag.obj@curr.version)
27 |     }
28 |   } else {
29 |     celltag.obj@celltag.version <- celltag.obj@curr.version
30 |   }
31 |   
32 |   p.calling <- CellTagPatternCalling(celltag.version)
33 |   
34 |   if (endsWith(file.extension.unique, "fastq") || endsWith(file.extension.unique, "fq")) {
35 |     if (length(fastq.bam.input) > 1) {
36 |       stop("Please process the whitelist files one at a time!")
37 |     }
38 |     rslt <- fastq.process(fastq.file = fastq.bam.input, pattern = p.calling[1], p.calling[2], p.calling[3])
39 |     celltag.obj@fastq.full.celltag[[celltag.version]] <- rslt[[1]]
40 |     celltag.obj@fastq.only.celltag[[celltag.version]] <- rslt[[2]]
41 |   }
42 |   if (endsWith(file.extension.unique, "bam")) {
43 |     rslt <- NULL
44 |     for (i in 1:length(fastq.bam.input)) {
45 |       curr.rslt <- bam.process(bam.file = fastq.bam.input[i], pattern = p.calling[1], p.calling[2], p.calling[3], technique)
46 |       if (length(fastq.bam.input) > 1) curr.rslt$Cell.BC <- paste0("Sample-", i, "_", curr.rslt$Cell.BC)
47 |       if (is.null(rslt)) {
48 |         rslt <- curr.rslt
49 |       } else {
50 |         rslt <- rbind(rslt, curr.rslt, fill = TRUE)
51 |       }
52 |     }
53 |     celltag.obj@bam.parse.rslt[[celltag.version]] <- rslt
54 |   }
55 |   
56 |   return(celltag.obj)
57 | }
58 | 
59 | 


--------------------------------------------------------------------------------
/R/CellTagForCollapsing.R:
--------------------------------------------------------------------------------
  1 | #' CellTag Starcode Prior Collapsing
  2 | #'
  3 | #' This function generate the .txt file that will be fed into starcode - https://github.com/gui11aume/starcode - to collapse similar CellTags.
  4 | #' @param celltag.obj A CellTag object with the raw count matrix filled.
  5 | #' @param output.file The filepath and name to save the table for collapsing (usually a .txt file)
  6 | #' @return A CellTag object with collapsing mapping table stored in pre.starcode slot
  7 | #' @keywords single-cell RNA-seq data, CellTagging
  8 | #' @export
  9 | #' @examples
 10 | #' CellTagDataForCollapsing(bam.test.obj, "./collapsing.txt")
 11 | #' 
 12 | CellTagDataForCollapsing <- function(celltag.obj, output.file) {
 13 |   # Get the data out from the CellTag object
 14 |   umi.matrix <- GetCellTagCurrentVersionWorkingMatrix(celltag.obj, "raw.count")
 15 |   
 16 |   for.collapse <- as.data.frame(Matrix::summary(umi.matrix))
 17 |   for.collapse$i <- rownames(umi.matrix)[for.collapse$i]
 18 |   for.collapse$j <- colnames(umi.matrix)[for.collapse$j]
 19 | 
 20 |   colnames(for.collapse) <- c("X2", "X1", "value")
 21 |   for.collapse$X1 <- as.character(for.collapse$X1)
 22 |   for.collapse$X2 <- as.character(for.collapse$X2)
 23 |   for.collapse <- for.collapse[which(for.collapse$value > 0), ]
 24 |   # Create the contatenation column
 25 |   if (length(list.files(celltag.obj@fastq.bam.dir)) > 1) {
 26 |     parts.to.paste <- unlist(lapply(strsplit(for.collapse$X2, "_"), function(x) x[2]))
 27 |     for.collapse$concat <- paste0(for.collapse$X1, unlist(lapply(strsplit(parts.to.paste, "-"), function(x) x[1])))
 28 |     sample.list.prefix <- unique(unlist(lapply(strsplit(for.collapse$X2, "_"), function(x) x[1])))
 29 |     r <- apply(as.data.frame(sample.list.prefix), 1, 
 30 |                function(x) {
 31 |                  for.collapse.sub <- for.collapse[which(startsWith(for.collapse$X2, paste0(x, "_"))), c("concat", "value")]
 32 |                  filename.to.save <- paste0(strsplit(output.file, "[.]")[[1]][1], "_", x, ".txt")
 33 |                  write.table(for.collapse.sub, filename.to.save, sep = "\t", row.names = F, quote = F, col.names = F)
 34 |                })
 35 |   } else {
 36 |     for.collapse$concat <- paste0(for.collapse$X1, unlist(lapply(strsplit(for.collapse$X2, "-"), function(x) x[1])))
 37 |     for.collapse.sub <- for.collapse[, c("concat", "value")]
 38 |     write.table(for.collapse.sub, output.file, sep = "\t", row.names = F, quote = F, col.names = F)
 39 |   }
 40 |   # Set CellTag object
 41 |   celltag.obj@pre.starcode[[celltag.obj@curr.version]] <- for.collapse
 42 |   # Print the path saved
 43 |   cat("The file for collapsing is stored at: ", output.file, "\n")
 44 |   return(celltag.obj)
 45 | }
 46 | 
 47 | #' CellTag Starcode Post Collapsing
 48 | #'
 49 | #' This function processes the result generated from starcode - https://github.com/gui11aume/starcode.
 50 | #' @param celltag.obj A CellTag object with the pre-starcode mapping matrix filled.
 51 | #' @param collapsed.rslt.file File path to the collapsed result file
 52 | #' @return A CellTag object with collapsed count matrix stored in collapsed.count slot
 53 | #' @keywords single-cell RNA-seq data, CellTagging
 54 | #' @export
 55 | #' @examples
 56 | #' CellTagDataPostCollapsing(bam.test.obj, "./collapsing_result.txt")
 57 | #' 
 58 | CellTagDataPostCollapsing <- function(celltag.obj, collapsed.rslt.file, replace.option = FALSE) {
 59 |   ultimate.collapsing.df <- data.frame()
 60 |   for (i in 1:length(collapsed.rslt.file)) {
 61 |     final.collapsing.df <- data.frame()
 62 |     # Process this one by one
 63 |     curr.file.dir <- collapsed.rslt.file[i]
 64 |     print(paste0("Processing ", curr.file.dir))
 65 |     # Read in the collpased result
 66 |     collapsed <- read.table(curr.file.dir, sep = "\t", header = F, stringsAsFactors = F)
 67 |     # Read in the file for collapsing
 68 |     if (length(collapsed.rslt.file) > 1) {
 69 |       curr.sample.parts <- strsplit(basename(curr.file.dir), "_")[[1]]
 70 |       curr.sample <- strsplit(curr.sample.parts[length(curr.sample.parts)], "[.]")[[1]][1]
 71 |       collapsing <- celltag.obj@pre.starcode[[celltag.obj@curr.version]]
 72 |       collapsing <- collapsing[which(startsWith(collapsing$X2, paste0(curr.sample, "_"))), ]
 73 |     } else {
 74 |       collapsing <- celltag.obj@pre.starcode[[celltag.obj@curr.version]]
 75 |     }
 76 |     rownames(collapsing) <- collapsing$concat
 77 |     colnames(collapsing)[c(1:2)] <- c("Cell.Barcode", "CellTag")
 78 |     new.collapsing.df <- collapsing
 79 |     
 80 |     cell.bc <- substring(collapsed$V1, 9)
 81 |     cell.ct <- substring(collapsed$V1, 1, 8)
 82 |     cell.same <- apply(collapsed, 1, 
 83 |                        function(x) {
 84 |                          cell.bc <- substring(x[1], 9)
 85 |                          cell.subset <- strsplit(x[3], ",")[[1]]
 86 |                          return(all(endsWith(cell.subset, cell.bc)))
 87 |                        })
 88 |     
 89 |     cell.same.index <- which(cell.same)
 90 |     cell.diff.indx <- which(!cell.same)
 91 |     
 92 |     pb <- txtProgressBar(min = 0, max = length(cell.bc), style = 3)
 93 |     pb.count <- 0
 94 |     for (csi in cell.same.index) {
 95 |       pb.count <- pb.count + 1
 96 |       setTxtProgressBar(pb, pb.count)
 97 |       
 98 |       curr.row <- collapsed[csi,]
 99 |       curr.centroid <- curr.row$V1
100 |       curr.count <- curr.row$V2
101 |       curr.ct <- cell.ct[csi]
102 |       
103 |       curr.new.row <- data.frame(row.names = curr.centroid, concat = curr.centroid, CellTag = curr.ct, 
104 |                                  value = curr.count, stringsAsFactors = F)
105 |       
106 |       if (nrow(final.collapsing.df) <= 0){
107 |         final.collapsing.df <- curr.new.row
108 |       } else {
109 |         final.collapsing.df <- rbind(final.collapsing.df, curr.new.row)
110 |       }
111 |     }
112 |     
113 |     for (cdi in cell.diff.indx) {
114 |       pb.count <- pb.count + 1
115 |       setTxtProgressBar(pb, pb.count)
116 |       
117 |       curr.row <- collapsed[cdi,]
118 |       curr.centroid <- curr.row$V1
119 |       curr.count <- curr.row$V2
120 |       curr.collapse.set <- strsplit(curr.row$V3, ",")[[1]]
121 |       curr.ct <- cell.ct[cdi]
122 |       curr.bc <- cell.bc[cdi]
123 |       
124 |       same.concat <- curr.collapse.set[which(endsWith(curr.collapse.set, curr.bc))]
125 |       curr.to.collapse <- setdiff(same.concat, curr.centroid)
126 |       
127 |       if (length(curr.to.collapse) > 0) {
128 |         for (j in 1:length(curr.to.collapse)) {
129 |           curr.for.c <- curr.to.collapse[j]
130 |           curr.for.c.ct <- substring(curr.for.c, 1, 8)
131 |           if (curr.for.c.ct != curr.ct) {
132 |             ind <- which(collapsing$concat == curr.to.collapse[j])
133 |             ind.cent <- which(collapsing$concat == curr.centroid)
134 |             new.collapsing.df[ind, "concat"] <- curr.centroid
135 |             new.collapsing.df[ind, "CellTag"] <- collapsing[ind.cent[1], "CellTag"]
136 |             new.collapsing.df[ind, "Cell.Barcode"] <- collapsing[ind.cent[1], "Cell.Barcode"]
137 |           }
138 |         }
139 |         curr.centroid.sub <- new.collapsing.df[which(new.collapsing.df$concat == curr.centroid), ]
140 |         curr.count.new <- sum(curr.centroid.sub$value)
141 |         curr.new.row <- data.frame(row.names = curr.centroid, concat = curr.centroid, CellTag = curr.ct,
142 |                                    value = curr.count.new, stringsAsFactors = F)
143 |       }else {
144 |         curr.new.row <- new.collapsing.df[same.concat, c("concat", "CellTag", "value")]
145 |       }
146 |       curr.diff.rows <- new.collapsing.df[setdiff(curr.collapse.set, same.concat), c("concat", "CellTag", "value")]
147 |       
148 |       final.collapsing.df <- rbind(final.collapsing.df, curr.new.row)
149 |       final.collapsing.df <- rbind(final.collapsing.df, curr.diff.rows)
150 |     }
151 | 
152 |     if (length(which(is.na(final.collapsing.df$concat))) > 0) final.collapsing.df <- final.collapsing.df[-which(is.na(final.collapsing.df$concat)), ]
153 |     rownames(final.collapsing.df) <- final.collapsing.df$concat
154 |     final.collapsing.df <- cbind(final.collapsing.df, collapsing[rownames(final.collapsing.df), c("Cell.Barcode", "concat")])
155 |     
156 |     if (nrow(ultimate.collapsing.df) <= 0) {
157 |       ultimate.collapsing.df <- final.collapsing.df
158 |     } else {
159 |       ultimate.collapsing.df <- rbind(ultimate.collapsing.df, final.collapsing.df)
160 |     }
161 |     rownames(ultimate.collapsing.df) <- NULL
162 |     close(pb)
163 |   }
164 | 
165 |   df <- transform(ultimate.collapsing.df, Cell.Barcode = factor(Cell.Barcode), CellTag = factor(CellTag))
166 | 
167 |   celltag.count.sparse <- sparseMatrix(as.integer(df$Cell.Barcode), as.integer(df$CellTag), x = df$value)
168 |   colnames(celltag.count.sparse) <- levels(df$CellTag)
169 |   rownames(celltag.count.sparse) <- levels(df$Cell.Barcode)
170 |   
171 |   # Save the new matrix to the object
172 |   new.obj <- SetCellTagCurrentVersionWorkingMatrix(celltag.obj, "collapsed.count", as(celltag.count.sparse, "dgCMatrix"), replace = replace.option)
173 |   return(new.obj)
174 | }
175 | 


--------------------------------------------------------------------------------
/R/CellTagMatrixGeneration.R:
--------------------------------------------------------------------------------
 1 | #' CellTag Matrix Generation Function
 2 | #'
 3 | #' This function uses the extract information from data processed before and generate a Cell Barcode x CellTag matrix
 4 | #' @param celltag.obj A CellTag object with bam file result filled
 5 | #' @param barcodes.file A .tsv output file from 10x CellRanger pipeline. It contains a list of all cell barcodes identified in the filtered dataset.
 6 | #' @return A CellTag object with the attribute (raw.count) filled
 7 | #' @keywords single-cell RNA-seq data, CellTagging
 8 | #' @export
 9 | #' @examples
10 | #' CellTagMatrixCount(bam.test.obj, "barcodes.tsv")
11 | #' 
12 | CellTagMatrixCount <- function(celltag.obj, barcodes.file, replace.option = FALSE) {
13 |   # Read in the cell barcodes identified during alignment
14 |   barcodeList <- fread(barcodes.file, header = FALSE)[[1]]
15 |   celltagData <- celltag.obj@bam.parse.rslt[[celltag.obj@curr.version]]
16 |   # Filter based on filtered barcodes
17 |   celltagData <- celltagData[which(celltagData$Cell.BC %in% barcodeList), ]
18 | 
19 |   #With the parsed CellTag reads loaded we can then easily filter the data and generate UMI Counts for each Cell Barcode/Cell Tag combination.
20 |   #-Groups the data.table by Cell Barcode/Cell Tag combination and creates a new column "UMI.Count" which has the number of unique UMI associated with each Cell Barcode/Cell.Tag combination. uniqueN is equivalent to length(unique(UMI))
21 |   celltagCounts <- celltagData[, .(UMI.Count = uniqueN(UMI)), .(Cell.BC, Cell.Tag)]          
22 |   # The data is now in a long format and needs to be reshaped. We will cast the long data into a wide format resembling a matrix.
23 |   # celltagCountsWide <- dcast(data = celltagCounts, formula = Cell.BC ~ Cell.Tag, value.var = "UMI.Count", fill = 0 )
24 | 
25 |   #Now we have the data we want in the correct format. Next we can add Cells from the barcode list that were not in the celltagData.
26 |   missingCells <- barcodeList[!(barcodeList %in% celltagCounts$Cell.BC)]
27 |   #Lets make a data.table with one column Cell.BC which will contain a list of the missing cells. This can then be merged with the UMI Count data table.
28 |   missingCells <- setDT(expand.grid(Cell.BC = missingCells, Cell.Tag = unique(celltagCounts$Cell.Tag)))
29 |   missingCells$UMI.Count <- 0
30 |   #Bind the missing cells to the data.table containing the Cell Tag UMI Counts.
31 |   alltagCounts <- rbind(celltagCounts, missingCells, fill = TRUE)
32 |   #Now we can filter out cells which are not in our barcode list.
33 |   alltagCounts <- alltagCounts[Cell.BC %in% barcodeList, ]
34 | 
35 |   #Generate dgCMatrix
36 |   ### Reference for code
37 |   ## https://datawookie.netlify.app/blog/2016/01/casting-a-wide-and-sparse-matrix-in-r/
38 | 
39 |   df <- as.data.frame(alltagCounts)
40 |   df <- transform(df, Cell.BC = factor(Cell.BC), Cell.Tag = factor(Cell.Tag))
41 | 
42 |   celltag.count.sparse <- sparseMatrix(as.integer(df$Cell.BC), as.integer(df$Cell.Tag), x = df$UMI.Count)
43 |   colnames(celltag.count.sparse) <- levels(df$Cell.Tag)
44 |   rownames(celltag.count.sparse) <- levels(df$Cell.BC)
45 | 
46 |   #Lets also filter Cell Tags in which no UMIs are counted.
47 |   celltagExpr <- Matrix::colSums(celltag.count.sparse)
48 |   tagsRemove <- names(celltagExpr)[celltagExpr == 0]
49 |   alltagCounts[, (tagsRemove):= NULL]
50 | 
51 |   ## Let's make the dgc matrix again with the tags removed
52 |   df <- as.data.frame(alltagCounts)
53 |   df <- transform(df, Cell.BC = factor(Cell.BC), Cell.Tag = factor(Cell.Tag))
54 | 
55 |   celltag.count.sparse <- sparseMatrix(as.integer(df$Cell.BC), as.integer(df$Cell.Tag), x = df$UMI.Count)
56 |   colnames(celltag.count.sparse) <- levels(df$Cell.Tag)
57 |   rownames(celltag.count.sparse) <- levels(df$Cell.BC)
58 | 
59 |   #We now have a final matrix. Next lets generate some stats about the Cell Tags. 
60 |   celltagExpr <- summary(Matrix::colSums(celltag.count.sparse))
61 |   cellsPerTag <- summary(Matrix::colSums(celltag.count.sparse > 0))
62 |   cellExpr <- summary(Matrix::rowSums(celltag.count.sparse))
63 |   
64 |   tagsPerCell <- Matrix::rowSums(celltag.count.sparse > 0)
65 |   tagsPerCellSum <- summary(tagsPerCell)
66 |   
67 |   stats.df <- rbind(celltagExpr, cellsPerTag, cellExpr, tagsPerCellSum)
68 |   rownames(stats.df) <- c("CellTag.UMI.Counts", "Cells.per.CellTag", "Cell.UMI.Counts", "CellTags.per.Cell")
69 |   stats.df <- as.data.frame(stats.df)
70 |   
71 | 
72 |   dgc.mtx.filter <- celltag.count.sparse
73 |   new.obj <- SetCellTagCurrentVersionWorkingMatrix(celltag.obj, "raw.count", as(dgc.mtx.filter, "dgCMatrix"), replace = replace.option)
74 | 
75 |   return(new.obj)
76 | }
77 | 


--------------------------------------------------------------------------------
/R/CellTagNetworkContruction.R:
--------------------------------------------------------------------------------
  1 | #' Convert CellTag Matrix to Link List
  2 | #'
  3 | #' This function convert the CellTag Matrix to a link list, which is further used for network construction and visualizetion
  4 | #' @param celltag.obj A CellTag object with all clone information filled
  5 | #' @return A CellTag object with the attribute (network.link.list) filled
  6 | #' @keywords single-cell RNA-seq data, CellTagging
  7 | #' @export
  8 | #' @examples
  9 | #' convertCellTagMatrix2LinkList(bam.test.obj)
 10 | #' 
 11 | convertCellTagMatrix2LinkList <- function(celltag.obj){
 12 |   # celltag_data should be data frame (N x 3).
 13 |   # the columnname of this data frame should be c("CellTagV1", "CellTagV2", "CellTagV3")
 14 |   celltag.dt <- celltag.obj@clone.composition
 15 |   v1.df <- as.data.frame(celltag.dt$v1)
 16 |   v2.df <- as.data.frame(celltag.dt$v2)
 17 |   v3.df <- as.data.frame(celltag.dt$v3)
 18 |   rownames(v1.df) <- v1.df$cell.barcode
 19 |   rownames(v2.df) <- v2.df$cell.barcode
 20 |   rownames(v3.df) <- v3.df$cell.barcode
 21 | 
 22 |   all.cells <- unique(c(celltag.dt$v1$cell.barcode, celltag.dt$v2$cell.barcode, celltag.dt$v3$cell.barcode))
 23 |   celltag_data <- data.frame(row.names = all.cells)
 24 |   celltag_data[rownames(v1.df), "CellTagV1"] <- v1.df[rownames(v1.df),"clone.id"]
 25 |   celltag_data[rownames(v2.df), "CellTagV2"] <- v2.df[rownames(v2.df),"clone.id"]
 26 |   celltag_data[rownames(v3.df), "CellTagV3"] <- v3.df[rownames(v3.df),"clone.id"]
 27 |   
 28 |   celltag.obj@celltag.aggr.final <- celltag_data
 29 |   
 30 |   ### 1.Preprocessing celltag data #### 
 31 |   message("Preprocessing data..")
 32 |   # pick up cells that have one or more celltag, and remove cells that do not have any celltag.
 33 |   Cells_with_tag <- rownames(celltag_data)[!(is.na(celltag_data$CellTagV1) & 
 34 |                                                is.na(celltag_data$CellTagV2) & 
 35 |                                                is.na(celltag_data$CellTagV3))]
 36 |   
 37 |   
 38 |   message(paste0(" Cells that have CellTagV1: ", sum(!is.na(celltag_data$CellTagV1))))
 39 |   message(paste0(" Cells that have CellTagV2: ", sum(!is.na(celltag_data$CellTagV2))))
 40 |   message(paste0(" Cells that have CellTagV3: ", sum(!is.na(celltag_data$CellTagV3))))
 41 |   
 42 |   
 43 |   # remove non-tagged cells
 44 |   celltag_data <- celltag_data[Cells_with_tag, ]
 45 |   
 46 |   # convert NA to "e"
 47 |   tags <-  c("CellTagV1", "CellTagV2", "CellTagV3")
 48 |   for (i in tags) {
 49 |     celltag_data[is.na(celltag_data[ ,i]),i] <- "e"}
 50 |   
 51 |   ### 2. Constructing LinkList ###
 52 |   message("Constructing link list..")
 53 |   
 54 |   findRoot <- function(cell_id, tag) { # e.g, cell_id = "TGTTCCGGTGAGGCTA-8"; tag = "CellTagV1", "CellTagV2", "CellTagV3"
 55 |     tagid <- celltag_data[cell_id,tag]
 56 |     tmp <- as.data.frame(t(c(paste0(tag, "_", tagid), cell_id, tag)), stringsAsFactors = F)
 57 |     rownames(tmp) <- NULL
 58 |     colnames(tmp) <- c("source", "target", "tag")
 59 |     return(tmp)
 60 |   }
 61 |   
 62 |   
 63 |   ## first, clonal population that share the same celltag is combined to make subnetwork.
 64 |   ## then, subnewtorks will be combined further if they are originated from same mother. 
 65 |   
 66 |   all_cell_id <- rownames(celltag_data)
 67 |   remaining_cell_id <- all_cell_id
 68 |   tags <- c("CellTagV3", "CellTagV2", "CellTagV1")
 69 |   linkList <- data.frame()
 70 |   
 71 |   # 2.1 find connection between "celltag" -> "cells"
 72 |   for (tag in tags) {
 73 |     remaining_cells <- celltag_data[remaining_cell_id,]
 74 |     subcells <- remaining_cells[remaining_cells[,tag] != "e",]
 75 |     
 76 |     tmp <- foreach(i = rownames(subcells), .combine = rbind, .packages="foreach") %do% {
 77 |       findRoot(i, tag)
 78 |     }
 79 |     linkList <- rbind(linkList, tmp)
 80 |     done_id <- rownames(subcells)
 81 |     # remaining_cell_id <- remaining_cell_id[!(remaining_cell_id %in% done_id)]   !!!!algorithm was modified 20180830!!!! in new version, remaining_cell_id will now be updated.
 82 |   }
 83 |   
 84 |   
 85 |   # 2.2 hidden link ["CellTagV2" -> "CellTagV3"], or ["CellTagV1" -> "CellTagV3"]
 86 |   hiddenlink_D13 <- foreach(i = (unique(celltag_data$CellTagV3)[-1]), .combine = rbind, .packages="foreach") %do% {
 87 |     
 88 |     sub_cells <- celltag_data[celltag_data$CellTagV3 == i, ]
 89 |     
 90 |     prev_tag <- sub_cells$CellTagV2
 91 |     prev_tag <- prev_tag[prev_tag != "e"]
 92 |     prev_tag <- names(which.max(table(prev_tag)))
 93 |     
 94 |     if (class(prev_tag) != "NULL") {
 95 |       tmp <- as.data.frame(t(c(paste0("CellTagV2", "_", prev_tag),
 96 |                                paste0("CellTagV3", "_", i),
 97 |                                "CellTagV2")), stringsAsFactors = F)
 98 |       rownames(tmp) <- NULL
 99 |       colnames(tmp) <- c("source", "target", "tag")
100 |       return(tmp)
101 |     } 
102 |     
103 |     prev_tag <- sub_cells$CellTagV1
104 |     prev_tag <- prev_tag[prev_tag != "e"]
105 |     prev_tag <- names(which.max(table(prev_tag)))
106 |     
107 |     if (class(prev_tag) != "NULL") {
108 |       tmp <- as.data.frame(t(c(paste0("CellTagV1", "_", prev_tag),
109 |                                paste0("CellTagV3", "_", i),
110 |                                "CellTagV1")), stringsAsFactors = F)
111 |       rownames(tmp) <- NULL
112 |       colnames(tmp) <- c("source", "target", "tag")
113 |       return(tmp)
114 |     }   
115 |     
116 |   }
117 |   # 2.3 hidden link ["CellTagV1" -> "CellTagV2"]
118 |   hiddenlink_D3 <- foreach(i = (unique(celltag_data$CellTagV2)[-1]), .combine = rbind, .packages="foreach") %do% {
119 |     
120 |     sub_cells <- celltag_data[celltag_data$CellTagV2 == i, ]
121 |     
122 |     prev_tag <- sub_cells$CellTagV1
123 |     prev_tag <- prev_tag[prev_tag != "e"]
124 |     prev_tag <- names(which.max(table(prev_tag)))
125 |     
126 |     if (class(prev_tag) != "NULL") {
127 |       tmp <- as.data.frame(t(c(paste0("CellTagV1", "_", prev_tag),
128 |                                paste0("CellTagV2", "_", i),
129 |                                "CellTagV1")), stringsAsFactors = F)
130 |       rownames(tmp) <- NULL
131 |       colnames(tmp) <- c("source", "target", "tag")
132 |       return(tmp)
133 |     }   
134 |     
135 |   }
136 |   rm(remaining_cells, remaining_cell_id, sub_cells, subcells, all_cell_id, done_id, prev_tag, tag, tags)
137 |   
138 |   # 2.4 integrating all links
139 |   
140 |   modifyCellName <- function(linkList){
141 |     # this function change cell name,  like.. "TTCTCCTGTATCACCA-7"  -> "TTCTCCTGTATCACCA-7_D3"
142 |     # in the date processing algorithm v-0.20, cells that have multiple cell tag will show up mutiple times.
143 |     # thus we have make new name to avoid them being overrapped. 
144 |     
145 |     linkList$target_unmodified <- linkList$target
146 |     
147 |     node_cell <- grep("-", linkList$target)
148 |     
149 |     linkList[node_cell, "target"] <- paste0(linkList[node_cell, "target"], 
150 |                                             "_",
151 |                                             stringr::str_split_fixed(linkList[node_cell, "tag"], "g", 2)[,2])
152 |     return(linkList)
153 |   }
154 |   
155 |   # integrate
156 |   linkList <- rbind(linkList, hiddenlink_D3)
157 |   linkList <- rbind(linkList, hiddenlink_D13)
158 |   
159 |   # change cell name
160 |   linkList <- modifyCellName(linkList)
161 |   
162 |   message("finished")
163 |   
164 |   celltag.obj@network.link.list <- linkList
165 |   return(celltag.obj)
166 |   
167 | }
168 | 
169 | #' Get Nodes from Link List
170 | #'
171 | #' This function extracts the node information from the generated link list.
172 | #' @param celltag.obj A CellTag object with link list filled
173 | #' @return A CellTag object with the attribute (nodes) filled
174 | #' @keywords single-cell RNA-seq data, CellTagging
175 | #' @export
176 | #' @examples
177 | #' getNodesfromLinkList(bam.test.obj)
178 | #' 
179 | getNodesfromLinkList <- function(celltag.obj){
180 |   # This function construct Nodes list from linkList.
181 |   # Use "convertCellTagMatrix2LinkList" function before running this function to get linkList.
182 |   linkList <- celltag.obj@network.link.list
183 |   
184 |   nodes <- union(linkList$target, linkList$source)
185 |   Nodes <- data.frame(nodes, row.names = nodes, stringsAsFactors = F)
186 |   
187 |   
188 |   #tag
189 |   refferTagid <- function(each_node) {
190 |     cells_or_not <- (sum(c("CellTagV1", "CellTagV2", "CellTagV3") %in% strsplit(each_node, "_")[[1]]) == 0) 
191 |     
192 |     if (cells_or_not) {
193 |       ans <- linkList[linkList$target == each_node, "tag"]
194 |     } else {
195 |       ans <- strsplit(each_node, "_")[[1]][1]
196 |     }
197 |     return(ans)
198 |   }
199 |   
200 |   refferUMname <- function(each_node){
201 |     cells_or_not <-  (sum(c("CellTagV1", "CellTagV2", "CellTagV3") %in% strsplit(each_node, "_")[[1]]) == 0) 
202 |     
203 |     
204 |     if (cells_or_not) {
205 |       ans <- linkList[linkList$target == each_node, "target_unmodified"]
206 |     } else {
207 |       ans <- each_node
208 |     }
209 |     return(ans)
210 |     
211 |   }
212 |   
213 |   
214 |   
215 |   Nodes$tag <- sapply(nodes, refferTagid)
216 |   Nodes$node_name_unmodified <- sapply(nodes, refferUMname)
217 |   
218 |   celltag.obj@nodes <- Nodes
219 |   return(celltag.obj)
220 | }
221 | 
222 | #' Add Additional Information to the Nodes
223 | #'
224 | #' This function add auxillary information to the nodes. Such information can include cluster information, cell type information and so on. The information should be stored as a data frame when passing in to the funtion.
225 | #' @param celltag.obj A CellTag object with nodes filled
226 | #' @param additional_data A data frame with auxillary information about the nodes (rownames = the nodes names)
227 | #' @return A CellTag object with the attribute (nodes) modified.
228 | #' @keywords single-cell RNA-seq data, CellTagging
229 | #' @export
230 | #' @examples
231 | #' addData2Nodes(bam.test.obj, cluster.info)
232 | #' 
233 | addData2Nodes <- function(celltag.obj, additional_data){
234 |   
235 |   # Nodes: data frame
236 |   # additional_data: data frame
237 |   #
238 |   # the rownames of additional_data should be same format as "node_name_unmodified" in Nodes
239 |   
240 |   Nodes <- celltag.obj@nodes
241 |   new.nodes <- cbind(Nodes, additional_data[Nodes$node_name_unmodified,])
242 |   no.col <- ncol(additional_data)
243 |   colnames(new.nodes)[(ncol(new.nodes)-no.col+1):ncol(new.nodes)] <- colnames(additional_data)
244 |   
245 |   celltag.obj@nodes <- new.nodes
246 |   return(celltag.obj)
247 | }
248 | 
249 | 


--------------------------------------------------------------------------------
/R/CellTagNetworkVisualiztion.R:
--------------------------------------------------------------------------------
 1 | returnDirectlyConnectedNodes <- function(node, linkList){
 2 |   tmp_link <- linkList[linkList$source %in% node,]
 3 |   tmp_link2 <- linkList[linkList$target %in% node,]
 4 |   
 5 |   tmp_nodes <- union(tmp_link$target, tmp_link2$source)
 6 |   tmp_nodes <- union(tmp_nodes, node)
 7 |   return(tmp_nodes)
 8 | }
 9 | 
10 | 
11 | 
12 | returnAllConnectedNodes <- function(node, linkList){
13 |   for (i in 1:5) {
14 |     node <- returnDirectlyConnectedNodes(node, linkList)
15 |   }
16 |   return(node)
17 | }
18 | 
19 | 
20 | drawNetworkGraph <- function(linkList, Nodes, overlay){
21 |   
22 |   rownames(Nodes) <- 1:nrow(Nodes)
23 |   
24 |   ref <- 1:nrow(Nodes)
25 |   names(ref) <- Nodes$nodes
26 |   linkList$source1 <- ref[as.character(linkList$source)] - 1
27 |   linkList$target1 <- ref[as.character(linkList$target)] - 1
28 |   
29 |   linkList$Value <- 1
30 |   #linkList$Colour <- c("#CD6155", "#566573")[as.numeric(linkList[,3] > 0) + 1]
31 |   
32 |   a <- forceNetwork(Links = linkList, Nodes = Nodes, zoom = T,opacityNoHover = 0.5, 
33 |                     Source = "source1", Target = "target1", arrows = T,
34 |                     NodeID = "nodes", Value ="Value" , #linkColour = linkList$Colour,
35 |                     Group = overlay, opacity = 0.9)
36 |   
37 |   return(a)
38 |   
39 | }
40 | 
41 | #' Draw the Network
42 | #'
43 | #' This function generate a force-directed network based on the link list and nodes information. 
44 | #' @param celltag.obj A CellTag object with link list and nodes filled
45 | #' @param tag Which tags would you like to plot?
46 | #' @param overlay What information would you like to overlay with the network? This should be one of the column names of the node information.
47 | #' @return A CellTag object with the attribute (network) modified.
48 | #' @keywords single-cell RNA-seq data, CellTagging
49 | #' @export
50 | #' @examples
51 | #' drawSubnet(bam.test.obj, "CellTagV1_2", "Cluster")
52 | #' 
53 | drawSubnet <- function(celltag.obj, tag, overlay){
54 |   # e.g. tag; "celltag2.1_698"
55 |   # e.g. color: "cluster" or "tag" or "SuperClone"
56 |   Nodes <- celltag.obj@nodes
57 |   linkList <- celltag.obj@network.link.list
58 |   
59 |   no <- returnAllConnectedNodes(tag, linkList)
60 |   sub_link <- linkList[(linkList$source %in% no) | (linkList$target %in% no),]
61 |   sub_Nodes <- Nodes[Nodes$nodes %in% no ,]
62 |   
63 |   a <- drawNetworkGraph(sub_link, sub_Nodes, overlay)
64 |   
65 |   celltag.obj@network <- a
66 |   return(celltag.obj)
67 | }
68 | 
69 | 


--------------------------------------------------------------------------------
/R/CellTagObjSet.R:
--------------------------------------------------------------------------------
 1 | #' @export
 2 | setClass("CellTag",
 3 |          slots = list(obj.name = "character",
 4 |                       fastq.bam.dir = "character",
 5 |                       curr.version = "character",
 6 |                       celltag.version = "character",
 7 |                       fastq.full.celltag = "ANY",
 8 |                       fastq.only.celltag = "ANY",
 9 |                       celltag.freq.stats = "ANY",
10 |                       whitelist = "ANY",
11 |                       bam.parse.rslt = "ANY",
12 |                       celltag.stats = "ANY",
13 |                       pre.starcode = "ANY",
14 |                       raw.count = "dgCMatrix",
15 |                       collapsed.count = "dgCMatrix",
16 |                       whitelisted.count = "dgCMatrix",
17 |                       metric.filtered.count = "dgCMatrix",
18 |                       binary.mtx = "dgCMatrix",
19 |                       jaccard.mtx = "dsTMatrix",
20 |                       clone.composition = "ANY",
21 |                       clone.size.info = "ANY",
22 |                       celltag.aggr.final = "data.frame",
23 |                       network.link.list = "ANY",
24 |                       nodes = "ANY",
25 |                       network = "ANY"))
26 | #' @export
27 | setMethod("show",
28 |           "CellTag",
29 |           function(object) {
30 |             cat("Object name: ", object@obj.name, "\n")
31 |             cat("Raw CellTag Counts = ", (ncol(object@raw.count)), "\n")
32 |             cat("Raw Number of Cells with CellTag = ", nrow(object@raw.count), "\n")
33 |             cat("Collapsed CellTag Counts = ", ncol(object@collapsed.count), "\n")
34 |             cat("Whitelisted CellTag Counts = ", (ncol(object@whitelisted.count)), "\n")
35 |             cat("Whitelisted Number of Cells with CellTag = ", nrow(object@whitelisted.count), "\n")
36 |           })
37 | 
38 | 


--------------------------------------------------------------------------------
/R/CellTagWhitelistGeneration.R:
--------------------------------------------------------------------------------
 1 | #' CellTag Whitelist Filtering Function
 2 | #'
 3 | #' This function conducts whitelist filtering such that only CellTags with count number over their certain percentile would be considered for clone calling
 4 | #' @param celltag.obj A CellTag Object with CellTag frequency table counted and sorted
 5 | #' @param percentile A fraction cutoff percentile for filtering the CellTags e.g. 0.9 for 90th percentile
 6 | #' @param output.dir Which directory would you like to store these files? If NULL, save to the same directory as the fastq/bam file
 7 | #' @return A CellTag Object with attribute (whitelist) filled.
 8 | #' @keywords single-cell RNA-seq data, CellTagging
 9 | #' @export
10 | #' @examples
11 | #' CellTagWhitelistFiltering(bam.test.obj, 0.9)
12 | #' 
13 | CellTagWhitelistFiltering <- function(celltag.obj, percentile, output.dir = NULL) {
14 |   # Load table and calculate cutoff
15 |   count.sorted.table <- celltag.obj@celltag.freq.stats[[celltag.obj@curr.version]]
16 |   count.cutoff <- quantile(count.sorted.table$Count, probs = percentile)
17 |   count.true.cut <- floor(count.cutoff/10)
18 |   
19 |   # Plot
20 |   plot(count.sorted.table$Count, main="CellTag Whitelist",xlab="CellTag",ylab="Reads")
21 |   abline(v=sum(count.sorted.table$Count >= count.true.cut), col="red", lty=2)
22 |   cat(paste0("Abline Threshold: ", sum(count.sorted.table$Count >= count.true.cut)), "\n")
23 |   
24 |   # Subset the ones pass filtering
25 |   whitelist <- subset(count.sorted.table, Count>=count.true.cut)
26 |   
27 |   if (is.null(output.dir)) output.dir <- paste0(dirname(celltag.obj@fastq.bam.dir), "/", celltag.obj@curr.version, "_whitelist.csv")
28 |   write.csv(whitelist, output.dir, quote = F, row.names = F)
29 |   
30 |   cat("File is saved: ", output.dir, "\n")
31 |   
32 |   celltag.obj@whitelist[[celltag.obj@curr.version]] <- whitelist
33 |   return(celltag.obj)
34 | }
35 | 
36 | #' CellTag Frequency Sort Table
37 | #'
38 | #' This function counts and sorts the identified CellTags from Fastq file
39 | #' @param celltag.obj A CellTag Object with CellTags extracted
40 | #' @return A CellTag Object with attribute (celltag.freq.stats) filled.
41 | #' @keywords single-cell RNA-seq data, CellTagging
42 | #' @export
43 | #' @examples
44 | #' CellTagWhitelistFiltering(bam.test.obj)
45 | #' 
46 | AddCellTagFreqSort <- function(celltag.obj) {
47 |   # Count the occurrence of each CellTag
48 |   cell.tag.count <- as.data.table(table(celltag.obj@fastq.only.celltag[[celltag.obj@curr.version]]), stringsAsFactors = F)
49 |   # Sort the CellTags in descending order of occurrence
50 |   cell.tag.count.sort <- cell.tag.count[order(-cell.tag.count$N), ]
51 |   colnames(cell.tag.count.sort) <- c("CellTag", "Count")
52 |   # Add to the slot in celltag object
53 |   celltag.obj@celltag.freq.stats[[celltag.obj@curr.version]] <- cell.tag.count.sort
54 |   return(celltag.obj)
55 | }
56 | 


--------------------------------------------------------------------------------
/R/CloneCalling.R:
--------------------------------------------------------------------------------
 1 | #' Jaccard Analysis Function
 2 | #'
 3 | #' This function conducts Jaccard analysis to calculate the Jaccard similarity between cells.
 4 | #' @param celltag.obj A CellTag object with the counts filtered based on metrics
 5 | #' @param plot.corr Would you like to plot the correlation matrix?
 6 | #' @return A CellTag object with attribute (jaccard.mtx) filled
 7 | #' @keywords single-cell RNA-seq data, CellTagging
 8 | #' @export
 9 | #' @examples
10 | #' JaccardAnalysis(bam.test.obj)
11 | #'
12 | JaccardAnalysis <- function(celltag.obj, plot.corr = TRUE, fast = FALSE) {
13 |   filtered.whitelised.data <- GetCellTagCurrentVersionWorkingMatrix(celltag.obj, "metric.filtered.count")
14 |   # Calculating the Jaccard matrix
15 |   if (fast) {
16 |     Jac <- proxyC::simil(filtered.whitelised.data, method = "jaccard")
17 |   } else {
18 |     Jac <- proxy::simil(as.matrix(filtered.whitelised.data), method = "Jaccard")
19 |     Jac <- as(Jac, "dsTMatrix")
20 |   }
21 |   
22 |   if ((!fast) & plot.corr) {
23 |     diag(Jac) <- 1
24 |     corrplot(Jac, method="color", order="hclust", hclust.method ="ward.D2", cl.lim=c(0,1), tl.cex=0.1)
25 |   }
26 |   
27 |   celltag.obj@jaccard.mtx <- Jac
28 |   return(celltag.obj)
29 | }
30 | 
31 | #' Clone Calling Function
32 | #'
33 | #' This function conducts clone calling based on the Jaccard results.
34 | #' @param celltag.obj A CellTag object with the jaccard matrix generated
35 | #' @param correlation.cutoff Correlation cutoff for clone membership
36 | #' @return A CellTag object with attributes (clone.composition & clone.size.info) filled.
37 | #' @keywords single-cell RNA-seq data, CellTagging
38 | #' @export
39 | #' @examples
40 | #' CloneCalling(bam.test.obj, 0.7)
41 | #'
42 | CloneCalling <- function(celltag.obj, correlation.cutoff) {
43 |   Jaccard.Matrix <- celltag.obj@jaccard.mtx
44 |   
45 |   # Using the igraph package to facilitate the identification of membership to each clone
46 |   jac.summ <- Matrix::summary(Jaccard.Matrix)
47 |   jac.lower.i <- jac.summ$j
48 |   jac.summ$j <- jac.summ$i
49 |   jac.summ$i <- jac.lower.i
50 |   lower.tri.summ <- subset(jac.summ, i>j) # Exclude diagnol
51 |   
52 |   test <- sparseMatrix(i = lower.tri.summ$i,
53 |                        j = lower.tri.summ$j,
54 |                        x = lower.tri.summ$x,
55 |                        dims = dim(Jaccard.Matrix),
56 |                        dimnames = dimnames(Jaccard.Matrix))
57 |   
58 |   test.df <- as.data.frame(Matrix::summary(test))
59 |   test.df.sub <- test.df[which(test.df$x > correlation.cutoff), ]
60 |   
61 |   check.corelation <- test.df.sub[,c(1,2)]
62 |   colnames(check.corelation) <- c("row", "col")
63 |   check.corelation <- as.matrix(check.corelation)
64 | 
65 |   graph.cor <- graph.data.frame(check.corelation, directed = FALSE)
66 |   groups.cor <- split(unique(as.vector(check.corelation)), clusters(graph.cor)$membership)
67 |   conv.groups.cor <- lapply(groups.cor,
68 |                             function(list.cor){
69 |                               rownames(test)[list.cor]})
70 |   
71 |   # Put clones into tables
72 |   l <- seq(1, length(groups.cor))
73 |   df.conv <- apply(as.matrix(l), 1, 
74 |                    function(x) {
75 |                      data.table(clone.id = x, 
76 |                                 cell.barcode = conv.groups.cor[[x]])
77 |                    }
78 |   )
79 |   
80 |   df.comb <- rbindlist(df.conv)
81 |   
82 |   # Calculate the size of each clone
83 |   counts <- table(df.comb$clone.id)
84 |   counts <- as.data.frame(counts)
85 |   colnames(counts) <- c("Clone.ID", "Frequency")
86 |   
87 |   celltag.obj@clone.composition[[celltag.obj@curr.version]] <- df.comb
88 |   celltag.obj@clone.size.info[[celltag.obj@curr.version]] <- counts
89 |   return(celltag.obj)
90 | }
91 | 
92 | 


--------------------------------------------------------------------------------
/R/CreateCellTagObject.R:
--------------------------------------------------------------------------------
 1 | #' Create a New CellTag Object 
 2 | #'
 3 | #' This function creates a CellTag object that contains the basic information required for the object
 4 | #' @param object.name The name of the object
 5 | #' @param fastq.bam.input The input fastq/bam data file path
 6 | #' @param celltag.version Which version of CellTags are you working with?
 7 | #' @return A CellTag Object with open attributes that can be filled as analysis moving along
 8 | #' @keywords single-cell RNA-seq data, CellTagging
 9 | #' @export
10 | #' @examples
11 | #' CellTagObejct("hf1.d15.test", "hf1.d15.bam", "v1")
12 | #'
13 | CellTagObject <- function(object.name, fastq.bam.directory) {
14 |   ct <- new("CellTag", obj.name = object.name, fastq.bam.dir = fastq.bam.directory)
15 |   return(ct)
16 | }
17 | 


--------------------------------------------------------------------------------
/R/MetricBasedPlottingAndFiltering.R:
--------------------------------------------------------------------------------
 1 | #' Metric-Base Filtering Function
 2 | #'
 3 | #' This function applies further filtering on scRNA-seq data with CellTags based on cutoff values identified from the metric plots.
 4 | #' @param celltag.obj A CellTag Object with count matrix generated
 5 | #' @param cutoff The cutoff decided from the metric plots
 6 | #' @param comparison Would you like to maintain the part less than/greater than the cutoff? Default to less. Choices can be greater or less.
 7 | #' @return A CellTag Object with attribute (metric.filtered.count) filled
 8 | #' @keywords single-cell RNA-seq data, CellTagging
 9 | #' @export
10 | #' @examples
11 | #' MetricBasedFiltering(bam.test.object, 20, "less")
12 | #'
13 | MetricBasedFiltering <- function(celltag.obj, cutoff, comparison = "less", replace.option = FALSE) {
14 |   whitelisted.ct.data <- GetCellTagCurrentVersionWorkingMatrix(celltag.obj, "whitelisted.count")
15 |   metric.filter.ct.data <- GetCellTagCurrentVersionWorkingMatrix(celltag.obj, "metric.filtered.count")
16 |   if (ncol(metric.filter.ct.data) <= 0) {
17 |     whitelisted.celltag.data <- as.matrix(whitelisted.ct.data)
18 |   } else {
19 |     whitelisted.celltag.data <- as.matrix(metric.filter.ct.data)
20 |   }
21 |   # Set up the filtering data frame
22 |   CellTags.per.cell.whitelisted.pf <- as.data.frame(Matrix::rowSums(whitelisted.celltag.data))
23 |   
24 |   # Set up the filtered celltag dataset object
25 |   if (comparison == "less") {
26 |     cell.filter <- subset(CellTags.per.cell.whitelisted.pf, CellTags.per.cell.whitelisted.pf <= (cutoff))
27 |   } else {
28 |     cell.filter <- subset(CellTags.per.cell.whitelisted.pf, CellTags.per.cell.whitelisted.pf >= (cutoff))
29 |   }
30 |   cell.bc.filter <- row.names(cell.filter)
31 |   # Filter celltag dataset
32 |   celltags.whitelisted.new <- whitelisted.celltag.data[cell.bc.filter, ]
33 | 
34 |   new.obj <- SetCellTagCurrentVersionWorkingMatrix(celltag.obj, "metric.filtered.count", as(celltags.whitelisted.new, "dgCMatrix"), replace = replace.option)
35 | 
36 |   return(new.obj)
37 | }
38 | 
39 | #' CellTag Metric Plotting Function
40 | #'
41 | #' This function provides some metric plots for further downstream celltag filtering in the scRNA-seq dataset.
42 | #' @param celltag.obj A CellTag Object
43 | #' @keywords single-cell RNA-seq data, CellTagging
44 | #' @export
45 | #' @examples
46 | #' MetricPlots(bam.test.obj)
47 | #'
48 | MetricPlots <- function(celltag.obj) {
49 |   
50 |   obj.metric.filtered.count <- GetCellTagCurrentVersionWorkingMatrix(celltag.obj, "metric.filtered.count")
51 |   obj.whitelisted.count <- GetCellTagCurrentVersionWorkingMatrix(celltag.obj, "whitelisted.count")
52 |   
53 |   if (ncol(obj.metric.filtered.count) <= 0) {
54 |     if (ncol(obj.whitelisted.count) <= 0) {
55 |       celltag.data <- GetCellTagCurrentVersionWorkingMatrix(celltag.obj, "binary.mtx")
56 |     } else {
57 |       celltag.data <- obj.whitelisted.count
58 |     }
59 |   } else {
60 |     celltag.data <- obj.metric.filtered.count
61 |   }
62 |   
63 |   CellTags.per.cell.whitelisted.pf <- Matrix::rowSums(celltag.data)
64 |   CellTags.per.cell.avg <- mean(CellTags.per.cell.whitelisted.pf)
65 |   CellTags.frequency.whitelisted.pf <- Matrix::colSums(celltag.data)
66 |   CellTags.freq.avg <- mean(CellTags.frequency.whitelisted.pf)
67 |   par(mfrow=c(2,2))
68 |   plot(CellTags.per.cell.whitelisted.pf, main = "CellTag Counts of Individual Cells", xlab = "Cell Index", ylab = "CellTag Counts")
69 |   plot(CellTags.frequency.whitelisted.pf, main = "CellTag Occurrence Frequency Across All Cells", xlab = "Cell Index", ylab = "CellTag Frequency")
70 |   hist(CellTags.per.cell.whitelisted.pf, main = "Histogram of CellTag Counts of Individual Cells", xlab = "CellTag Counts", ylab = "Count")
71 |   hist(CellTags.frequency.whitelisted.pf, main = "Histogram of CellTag Occurrence Frequency Across All Cells", xlab = "CellTag Occurrence Frequency", ylab = "Count")
72 |   cat("Average: ", CellTags.per.cell.avg, "\n")
73 |   cat("Frequency: ", CellTags.freq.avg, "\n")
74 | }
75 | 
76 | 


--------------------------------------------------------------------------------
/R/ScCellTagMatrixProcess.R:
--------------------------------------------------------------------------------
 1 | #' Single-cell RNA-seq Binarization Function
 2 | #'
 3 | #' This function binarize the single-cell celltag data based on a given cutoff. It will generate a binary matrix, which will be stored as a slot in the CellTag Object. The binary matrix will be further used for future processing of the single-cell data.
 4 | #' @param celltag.obj A CellTag object with the raw count matrix generated
 5 | #' @param tag.cutoff How many tags would you like to be used as a cutoff to say that the cells are tagged?
 6 | #' @return A CellTag object with the attribute (binary.mtx) filled.
 7 | #' @keywords single-cell RNA-seq data, CellTagging
 8 | #' @export
 9 | #' @examples
10 | #' SingleCellDataBinarization(bam.test.obj, 2)
11 | #' 
12 | SingleCellDataBinarization <- function(celltag.obj, tag.cutoff, replace.option = FALSE) {
13 |   obj.collapsed.count <- GetCellTagCurrentVersionWorkingMatrix(celltag.obj, "collapsed.count")
14 |   if (sum(dim(obj.collapsed.count)) <= 0) {
15 |     CellTags <- GetCellTagCurrentVersionWorkingMatrix(celltag.obj, "raw.count")
16 |   } else {
17 |     CellTags <- obj.collapsed.count
18 |   }
19 |   CellTags[CellTags < tag.cutoff] <- 0
20 |   CellTags[CellTags > 0] <- 1
21 |   new.obj <- SetCellTagCurrentVersionWorkingMatrix(celltag.obj, "binary.mtx", as(CellTags, "dgCMatrix"), replace = replace.option)
22 |   
23 |   return(new.obj)
24 | }
25 | 
26 | #' Single-cell RNA-seq Whitelisting Function
27 | #'
28 | #' The whitelist is a list of CellTag generated based on assessment of CellTag library. It helps reduce the effect from sequencing error in CellTags. This function conducts whitelist filtering through the single-cell dataset. It will filter out CellTags that are not included in the whitelist.
29 | #' @param celltag.obj A CellTag object with the binary matrix generated
30 | #' @param whitels.cell.tag.file file director to the whitelisted cell tags
31 | #' @return A CellTag object with the attribute (whitelisted.count) filled
32 | #' @keywords single-cell RNA-seq data, CellTagging
33 | #' @export
34 | #' @examples
35 | #' SingleCellDataWhitelist(bam.test.obj, "~/Desktop/My_Favourite_Whitelist.csv")
36 | #' 
37 | SingleCellDataWhitelist <- function(celltag.obj, whitels.cell.tag.file, replace.option = FALSE) {
38 |   # Store the cell names
39 |   CellTags <- as.matrix(GetCellTagCurrentVersionWorkingMatrix(celltag.obj, "binary.mtx"))
40 |   cell.names <- rownames(CellTags)
41 |   
42 |   # Process the celltag matrix to format below
43 |   # row - celltag
44 |   # col - cells
45 |   CellTags <- t(CellTags)
46 |   celltag.rownames <- row.names(CellTags)
47 |   
48 |   # Filter the matrix using whitelist
49 |   if (endsWith(whitels.cell.tag.file, ".csv")) {
50 |     separator <- ","
51 |   } else {
52 |     if (endsWith(whitels.cell.tag.file, ".txt") | endsWith(whitels.cell.tag.file, ".tsv")) {
53 |       separator <- "\t"
54 |     } else {
55 |       separator <- " "
56 |     }
57 |   }
58 |   whitelist <- read.delim(whitels.cell.tag.file, sep = separator, header = T, stringsAsFactors = F)
59 |   whitelist.names <- whitelist[,1]
60 |   whitelist <- Reduce(intersect, list(whitelist.names, celltag.rownames))
61 |   celltags.whitelisted <- CellTags[whitelist,]
62 |   colnames(celltags.whitelisted) <- cell.names
63 |   
64 |   new.obj <- SetCellTagCurrentVersionWorkingMatrix(celltag.obj, "whitelisted.count", as(t(as.matrix(celltags.whitelisted)), "dgCMatrix"), replace = replace.option)
65 |   return(new.obj)
66 | }
67 | 
68 | 
69 | 


--------------------------------------------------------------------------------
/R/scripts.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/morris-lab/CellTagR/de6f6fb041e7c7b85915554c6eb675c087e4b2eb/R/scripts.zip


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # R Package - CellTagR
  2 | 
  3 | ## Important Notices
  4 | We recently fixed the Binarization function name from ```SingleCellDataBinatization``` to ```SingleCellDataBinarization```. Please update your code accordingly, if you are using the latest version of CellTagR.
  5 | 
  6 | We recently found that inside the setter function, the column names of the filtered count matrix are possibly shuffled around during the second round of filtering, thus some CellTags were associated with the wrong cell barcodes. This could lead to inaccurate clone-calling. We suggest users reinstall the package and empty the slot with the following line of code and restart the pipeline from this step: https://github.com/morris-lab/CellTagR#6-additional-filtering.
  7 | ```r
  8 | celltag.obj@metric.filtered.count <- as(matrix(NA, 0, 0), "dgCMatrix")
  9 | ```
 10 | 
 11 | ## Description
 12 | This is a wrapped R package of the workflow (https://github.com/morris-lab/CellTagWorkflow) with additional assessment of the complexity of the Celltag Library sequences. Additionally, previous version of this package can be found https://github.com/morris-lab/PreviousCloneHunter. ***Note: This has been changed and improved. Analysis with previous version will not be compatible.*** This package have a dependency on R version (R >= 3.5.0). This can be used as an alternative approach for this pipeline. For details regarding development and usage of CellTag, please refer to the following papaer - *Biddy et. al. Nature, 2018*, https://www.nature.com/articles/s41586-018-0744-4, *Kong et al., Nature Protocol, 2020*, https://www.nature.com/articles/s41596-019-0247-2
 13 | 
 14 | Install devtools
 15 | ```r
 16 | install.packages("devtools")
 17 | ```
 18 | Install the package from GitHub.
 19 | ```r
 20 | library("devtools")
 21 | devtools::install_github("morris-lab/CellTagR")
 22 | ```
 23 | Load the package
 24 | ```r
 25 | library("CellTagR")
 26 | ```
 27 | 
 28 | ## Assessment of CellTag Library Complexity via Sequencing
 29 | In this first section, we evaluate the CellTag library complexity using sequencing. Following is an example using the sequencing data we generated in lab for pooled CellTag library V2. 
 30 | ### 1. Read in the fastq sequencing data and extract the CellTags
 31 | The extracted CellTags will be stored as an attribute (fastq.full.celltag & fastq.only.celltag) in the resulting object.
 32 | ```r
 33 | # Read in the data file that come with the package
 34 | fpath <- system.file("extdata", "V2-1_R1.zip", package = "CellTagR")
 35 | extract.dir <- "."
 36 | # Extract the dataset
 37 | unzip(fpath, overwrite = FALSE, exdir = ".")
 38 | full.fpath <- paste0(extract.dir, "/", "V2-1_S2_L001_R1_001.fastq")
 39 | # Set up the CellTag Object
 40 | test.obj <- CellTagObject(object.name = "v2.whitelist.test", fastq.bam.directory = full.fpath)
 41 | # Extract the CellTags
 42 | test.obj <- CellTagExtraction(celltag.obj = test.obj, celltag.version = "v2")
 43 | ```
 44 | 
 45 | ### 2. Count the CellTags and sort based on the occurrence of each CellTag
 46 | ```r
 47 | # Count and Sort the CellTags in descending order of occurrence
 48 | test.obj <- AddCellTagFreqSort(test.obj)
 49 | # Check the stats
 50 | test.obj@celltag.freq.stats
 51 | ```
 52 | 
 53 | ### 3. Generation of a whitelist for the CellTag library
 54 | Here, we generating the whitelist for this CellTag library - CellTag V2. This will remove the CellTags with an occurrence number below the threshold. The threshold (using 90th percentile as an example) is determined: floor[(90th quantile)/10]. The percentile can be changed while calling the function. A plot of CellTag reads will be plotted and it can be used to further choose the percentile. If the output directory is offered, whitelist files will be stored in the provided directory. Otherwise, whitelist files will be saved under the same directory as the fastq files with name as <CellTag Version Number>_whitelist.csv (Example: v2_whitelist.csv). 
 55 | 
 56 | ```r
 57 | # Generate the whitelist
 58 | test.obj <- CellTagWhitelistFiltering(celltag.obj = test.obj, percentile = 0.9, output.dir = NULL)
 59 | ```
 60 | The generated whitelist for each library can be used to filter and clean the single-cell CellTag UMI matrices.
 61 | 
 62 | ## Single-Cell CellTag Extraction and Quantification
 63 | In this section, we are presenting an alternative approach that utilizes this package to carry out CellTag extraction, quantification, and generation of UMI count matrices. This can be also accomplished via the workflow supplied - https://github.com/morris-lab/CellTagWorkflow. 
 64 | #### Note: Using the package could be slow for the extraction part. For reference, it took approximately an hour to extract from a 40Gb BAM file using a maximum of 8Gb of memory.
 65 | 
 66 | ### 1. Download the BAM file 
 67 | Here we follow the same step as in https://github.com/morris-lab/CellTagWorkflow to download the a BAM file from the Sequence Read Archive (SRA) server. Again, this file is quite large. Hence, it might take a while to download. The file can be downloaded using wget in terminal as well as in R.
 68 | ```r
 69 | # bash
 70 | wget https://sra-pub-src-1.s3.amazonaws.com/SRR7347033/hf1.d15.possorted_genome_bam.bam.1
 71 | ```
 72 | OR
 73 | ```r
 74 | download.file("https://sra-pub-src-1.s3.amazonaws.com/SRR7347033/hf1.d15.possorted_genome_bam.bam.1", "./hf1.d15.bam")
 75 | ```
 76 | 
 77 | ### (RECOMMENDED) Optional Step: BAM File Filtering
 78 | ***NOTE:*** If BAM file filtering is **NOT** required (although, we strongly recommend this), skip this step and move to *Step 2 - Create a CellTag Object*, in which the entire BAM file will be used. Otherwise, before generating a CellTag object and extracting the CellTags, we will carry out the following BAM filtering step, from which a subset of reads in the BAM file will be searched during CellTag extraction.
 79 | 
 80 | In this step, we will filter the BAM file to reduce the possibility that false positive CellTags will be identified. Briefly, the 17-20 bp sequence that comprises the CellTag barcode may appear by chance in other regions of the transcriptome. These may be identified as CellTags and cells expressing these transcripts may be falsely called as clones. By filtering reads in the BAM file to only include those which are unmapped as well as those mapped to GFP or (optionally) the CellTag UTR, we reduce the chances of extracting false positive CellTags.
 81 | 
 82 | We recommend adding the CellTag UTR and GFP CDS as transgenes to the reference genome used during alignment. These sequences and corresponding GTF entries are stored [here](https://github.com/morris-lab/CellTagR/blob/master/Examples/CellTag_UTR.fa). More information on adding a marker gene to a reference can be found here: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/tutorial_mr.
 83 | 
 84 | ### I. Filter unmapped reads
 85 | First, we will use samtools to efficiently filter umapped reads.
 86 | 
 87 | ```r
 88 | # bash
 89 | samtools view -b -f 4 ./hf1.d15.bam > ./hf1.d15.filtered.bam
 90 | ```
 91 | 
 92 | ### II. Filter transgene reads
 93 | Next, we will filter reads aligned to GFP or the CellTag UTR.
 94 | 
 95 | ```r
 96 | # bash
 97 | samtools view -b  ./hf1.d15.bam GFP >> ./hf1.d15.filtered.bam
 98 | ```
 99 | 
100 | If the CellTag UTR was not included in the reference, the following line may be omitted.
101 | 
102 | ```r
103 | # bash
104 | samtools view -b ./hf1.d15.bam CellTag.UTR >> ./hf1.d15.filtered.bam
105 | ```
106 | 
107 | ### 2. Create a CellTag Object
108 | In this step, we will initialize a CellTag object with a object name and the path to where the bam file is stored **if only one bam file is processed.**
109 | 
110 | ```r
111 | # Set up the CellTag Object
112 | bam.test.obj <- CellTagObject(object.name = "bam.cell.tag.obj", fastq.bam.directory = "./hf1.d15.filtered.bam")
113 | ```
114 | 
115 | **Update: CellTagR now enables read-in of multiple BAM files at a time.** When multiple BAM files need to be processed, ***please use a folder that contains ONLY BAM files and put the fastq.bam.directory as the path of the folder.*** For instance, two bam files need to be processed named as *bam1.bam* and *bam2.bam*. They will be put into a folder named as *beautiful_bams* in the *Desktop*. Then, the input will be *fastq.bam.directory="~/Desktop/beautiful_bams/"* as below.
116 | 
117 | ```r
118 | ## NOT RUN
119 | # Set up the CellTag Object
120 | # bam.test.obj <- CellTagObject(object.name = "bam.cell.tag.obj", fastq.bam.directory = "~/Desktop/beautiful_bams/")
121 | ```
122 | 
123 | ***Note: The following tutorials are only intended for processing ONE CellTag version. To obtain information for all three versions of CellTags, running the following pipeline is required for each CellTag version independently, i.e. finishing process for V1 and then repeating the procedure for V2, and so on. After running the pipeline for each CellTag version, the clonal information of each will be stored in the same object, which can be used to carry out network construction and visualization.***
124 | 
125 | ### 3. Extract the CellTags from the BAM file
126 | In this step, we will extract the CellTag information from the BAM file, which contains information including cell barcodes, CellTag and Unique Molecular Identifiers (UMI). The result generated from this extraction will be a data table containing the following information. The result will then be saved into the slot "bam.parse.rslt" in the object in the following format.
127 | 
128 | |Cell Barcode|Unique Molecular Identifier|CellTag Motif|
129 | |:----------:|:-:|:---------:|
130 | |Cell.BC|UMI|Cell.Tag|
131 | ```r
132 | # Extract the CellTag information
133 | bam.test.obj <- CellTagExtraction(bam.test.obj, celltag.version = "v1")
134 | # Check the bam file result
135 | head(bam.test.obj@bam.parse.rslt[["v1"]])
136 | ```
137 | **Update: CellTagR now enables read-in of multiple BAM files at a time.** Extraction with multiple samples will automatically add prefixes to different samples in the order of BAM file given, i.e. Sample-\<i\>_\<Cell Barcode\>. The order of BAM file processing will be printed as it processes along. Prefixes assignments from users will be coming soon!
138 | 
139 | ### 4. Quantify the CellTag UMI Counts and Generate UMI Count Matrices
140 | In this step, we will quantify the CellTag UMI counts and generate the UMI count matrices. This function will take in two inputs, including the barcode tsv file generated by 10X and celltag object processed from Step 2. The barcode tsv file can be either filtered or raw. **However, note that using the raw barcodes file could require a large amount of memory for using this function**. If filtered barcode files are used, **only cell barcodes that appear in the filtered barcode file** will be preserved. The result will also be saved as a *dgCMatrix* in a slot - "raw.count" - under the object. At the same time, initial CellTag statistics will be saved as another slot under the object. The matrix will be in the format as following. ***If multiple BAM files, please follow the updated.***
141 | 
142 | ||CellTag Motif 1|CellTag Motif 2|\<all tags detected\>|CellTag Motif N|
143 | |:----------:|:-:|:---------:|:--:|:--:|
144 | |Cell.BC|Motif 1|Motif 2|\<all tags detected\>|Motif N|
145 | 
146 | ```r
147 | # Generate the sparse count matrix
148 | bam.test.obj <- CellTagMatrixCount(celltag.obj = bam.test.obj, barcodes.file = "./barcodes.tsv")
149 | # Check the dimension of the raw count matrix
150 | dim(bam.test.obj@raw.count)
151 | ```
152 | 
153 | **Update: CellTagR now enables read in of multiple BAM files at a time.** An aggregated barcode file needs to be generated for multiple BAM file processed with proper prefixes. Please use the *Barcode.Aggregate* function to generate a aggregated barcode file. This function takes in an **ordered** list of barcodes files. The order should be the same as the BAM file order.
154 | 
155 | ```r
156 | Barcode.Aggregate(list("barcode_1.tsv", "barcode_2.tsv"), "./barcodes_all.tsv")
157 | # Generate the sparse count matrix
158 | bam.test.obj <- CellTagMatrixCount(celltag.obj = bam.test.obj, barcodes.file = "./barcodes_all.tsv")
159 | # Check the dimension of the raw count matrix
160 | dim(bam.test.obj@raw.count)
161 | ```
162 | 
163 | The generated CellTag UMI count matrices can then be used in the following steps for clone identification.
164 | 
165 | ## Single-cell CellTag UMI Count Matrix Processing
166 | In this section, we are presenting an alternative approach that utilizes this package we established to carry out clone calling with single-cell CellTag UMI count matrices. In this pipeline below, we are using a subset of dataset generated from the full data (Full data can be found here: https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE99915). Briefly, in our lab, we reprogram mouse embryonic fibroblasts (MEFs) to induced endoderm progenitors (iEPs). This dataset is a single-cell dataset that contains cells collected from different time points during the process. This subset is a part of the first replicate of the data. It contains cells collected at Day 15 with three different CellTag libraries - V1, V2 & V3. 
167 | 
168 | ### 1. Read in the single-cell CellTag UMI count matrix
169 | We generated this object from the above steps, using BAM files. As above, BAM files take a long time to process. Hence, in this repository, we include a sample object saved as .Rds file from the previous steps, in which raw count matrix is included in the slot - "raw.count"
170 | ```r
171 | # Read the RDS file and get the object
172 | dt.mtx.path <- system.file("extdata", "Demo_V1.Rds", package = "CellTagR")
173 | bam.test.obj <- readRDS(dt.mtx.path)
174 | ```
175 | 
176 | ### (RECOMMENDED) Optional Step: CellTag Error Correction
177 | ***NOTE:*** If CellTag error correction is **NOT** required (although, we strongly recommend this), skip this step and move to *Step 2 - binarization*, in which the raw matrix will be used. Otherwise, before binarization and additional filtering, we will carry out the following error correction step via Starcode, from which a collapsed matrix will be used further for binarization.
178 | 
179 | In this step, we will identify CellTags with similar sequences and collapse similar CellTags to the centroid CellTag. For more information and installation, please refer to starcode software - https://github.com/gui11aume/starcode. Briefly, starcode clusters DNA sequences based on the Levenshtein distances between each pair of sequences, from which we collapse similar CellTag sequences to correct for potential errors occurred during single-cell RNA-sequencing process. Default maximum distance from starcode was used to cluster the CellTags.
180 | 
181 | ### I. Prepare for the data to be collapsed
182 | First, we will prepare the data to the format that is accepted by starcode. This function accepts two inputs including the CellTag object with raw count matrix generated and a path to where to save the output text file. The output will be a text file with each line containing one sequence to collapse with others. In this function, we concatenate the CellTag with cell barcode and use the combined sequences as input to execute Starcode. The file to be used for Starcode will be stored under the provided directory.
183 | ```r
184 | # Generating the collapsing file
185 | bam.test.obj <- CellTagDataForCollapsing(celltag.obj = bam.test.obj, output.file = "~/Desktop/collapsing.txt")
186 | ```
187 | 
188 | **Update: CellTagR now enables read-in of multiple BAM files at a time.** Multiple files with their prefixes used before will be incorporated into the output files. Hence multiple files will be generated for collapsing in the given directory. For instance, if there are 2 samples and to be saved on Desktop, they will be named as *collapsing_Sample-1.txt* and *collapsing_Sample-2.txt*.
189 | 
190 | ### II. Run Starcode to cluster CellTags
191 | Following the instruction for Starcode, we will run the following command to generate the result from starcode. **Make sure to run each file generated for each sample if multiple are processed**
192 | 
193 | ```r
194 | ./starcode -s --print-clusters ~/Desktop/collapsing.txt > ~/Desktop/collapsing_result.txt
195 | ```
196 | 
197 | ***Please use a folder containing ONLY the collapsing results! And please name the collapsing results corresponding to their sample names if multiple samples are processed.*** For example, use the name *collapsing_result_Sample-1.txt* for *collapsing_Sample-1.txt*. 
198 | 
199 | ### III. Extract information from Starcode result and collapse similar CellTags
200 | With the collapsed results, we will regenerate the CellTag x Cell Barcode matrix. The collpased matrix will be stored in a slot - "collapsed.count" - in the CellTag object. This function takes two inputs including the CellTag Object to modify and the path to th result file from collapsing. ***If multiple BAM files generated the collapsing result, check the update***
201 | 
202 | ```r
203 | # Recount and generate collapsed matrix
204 | bam.test.obj <- CellTagDataPostCollapsing(celltag.obj = bam.test.obj, collapsed.rslt.file = "~/Desktop/collapsing_rslt.txt")
205 | # Check the dimension of this collapsed count.
206 | head(bam.test.obj@collapsed.count)
207 | ```
208 | 
209 | ***Update: If with multiple BAM file generated collapsing result, run the following lines*** Example: the result files are saved on the desktop in the folder named *star_collapse*.
210 | 
211 | ```r
212 | collapsed.rslt.dir <- "~/Desktop/star_collapse"
213 | # Recount and generate collapsed matrix
214 | bam.test.obj <- CellTagDataPostCollapsing(celltag.obj = bam.test.obj, collapsed.rslt.file = list.files(collapsed.rslt.dir, full.names = T))
215 | # Check the dimension of this collapsed count.
216 | head(bam.test.obj@collapsed.count)
217 | ```
218 | 
219 | Below is an example Jaccard Analysis result with Error Correction using Starcode collapsing (top - without collapsing, bottom - with collapsing):
220 | <p align="center">
221 |     <img src="/Examples/jaccard wo collapsing.png" height="480" width="720">
222 | </p>
223 | 
224 | <p align="center">
225 |     <img src="/Examples/jaccard example.png" height="480" width="720">
226 | </p>
227 | 
228 | ### 2. Binarize the single-cell CellTag UMI count matrix
229 | Here, we binarize the count matrix to contain 0 or 1, where 0 indicates no such CellTag found in a single cell and 1 reports CellTag expression. The suggested cutoff that marks presence or absence is at least 2 counts per CellTag per Cell. For details regarding cutoff choice, please refer to the paper - https://www.nature.com/articles/s41586-018-0744-4. The binary matrix will be stored in a slot - 'binary.mtx' - as a *dgCMatrix*. **Note: If collapsing was performed, binarization will be based on the collapsed count matrix. Otherwise, it will be based on the raw count matrix**
230 | ```r
231 | # Calling binarization
232 | bam.test.obj <- SingleCellDataBinarization(bam.test.obj, 2)
233 | ```
234 | 
235 | ### 3. Metric plots to facilitate for additional filtering
236 | We then generate scatter plots for the number of total celltag counts in each cell and the number each CellTag across all cells. These plots assist filtering and cleaning of the data.
237 | ```r
238 | MetricPlots(bam.test.obj)
239 | ```
240 | Below is an example plot that you could obtain from this object
241 | <p align="center">
242 |   <img src="/Examples/pre_filtering.png" height="720" width="720">
243 | </p>
244 | 
245 | ### 4. Apply the whitelisted CellTags generated from assessment
246 | Based on the whitelist generated earlier, we filter the UMI count matrix to contain only whitelisted CelTags for the current version under processing. The function takes in two inputs including the CellTag object with binarization performed and the path to the whitelist csv file. The whitelist result will be saved in a slot - "whitelisted.count".
247 | ```r
248 | # Read the RDS file and get the object
249 | dt.mtx.whitelist.path <- system.file("extdata", "v1_whitelist.csv", package = "CellTagR")
250 | bam.test.obj <- SingleCellDataWhitelist(bam.test.obj, dt.mtx.whitelist.path)
251 | ```
252 | 
253 | ### 5. Check metric plots after whitelist filtering
254 | Recheck the metric similar to Step 3
255 | ```r
256 | MetricPlots(bam.test.obj)
257 | ```
258 | 
259 | ### 6. Additional filtering
260 | #### Filter out cells with more than 20 CellTags
261 | ```r
262 | bam.test.obj <- MetricBasedFiltering(bam.test.obj, 20, comparison = "less")
263 | ```
264 | #### Filter out cells with less than 2 CellTags
265 | ```r
266 | bam.test.obj <- MetricBasedFiltering(bam.test.obj, 2, comparison = "greater")
267 | ```
268 | ### 7. Last check of metric plots
269 | ```r
270 | MetricPlots(bam.test.obj)
271 | ```
272 | Example plot of last check!
273 | <p align="center">
274 |   <img src="/Examples/post_filtering.png" height="720" width="720">
275 | </p>
276 | If it looks good, proceed to the following steps to call the clones.
277 | 
278 | ### 8. Clone Calling
279 | #### I. Jaccard Analysis
280 | This calculates pairwise Jaccard similarities among cells using the filtered CellTag UMI count matrix. This function takes the CellTag object with metric filtering carried out. This will generate a Jaccard similarity matrix, which is saved as a part of the object in a slot - "jaccard.mtx". It also plots a correlation heatmap with cells ordered by hierarchical clustering. 
281 | 
282 | ```r
283 | bam.test.obj <- JaccardAnalysis(bam.test.obj)
284 | ```
285 | ##### Note: For large sparse matrix, a fast version can be chosen using the parameter *fast*.
286 | ```r
287 | bam.test.obj <- JaccardAnalysis(bam.test.obj, fast = T)
288 | ```
289 | #### II. Clone Calling
290 | Based on the Jaccard similarity matrix, we can call clones of cells. A clone will be selected if the correlations inside of the clones passes the cutoff given (here, 0.7 is used. It can be changed based on the heatmap/correlation matrix generated above). Using this part, a list containing the clonal identities of all cells and the count information for each clone will be stored in the object in slots - "clone.composition" and "clone.size.info". 
291 | 
292 | ##### Clonal Identity Table `clone.composition`
293 | 
294 | |clone.id|cell.barcode|
295 | |:-------:|:------:|
296 | |Clonal ID|Cell BC |
297 | 
298 | ##### Count Table `clone.size.info`
299 | |Clone.ID|Frequency|
300 | |:------:|:-------:|
301 | |Clonal ID|The cell number in the clone|
302 | 
303 | ```r
304 | # Call clones
305 | bam.test.obj <- CloneCalling(celltag.obj = bam.test.obj, correlation.cutoff=0.7)
306 | # Check them out!!
307 | bam.test.obj@clone.composition[["v1"]]
308 | bam.test.obj@clone.size.info[["v1"]]
309 | ```
310 | 
311 | ## Network Construction And Visualization
312 | Having all three CellTag version analyzed and stored in one CellTag object, we will construct network of each individual clone connecting to its descendents. As well as connections between clones, cells in each clone will be visualized on the network as leaf nodes. In the network, each center node denotes a clone. Connections between those nodes suggest a "parent-child" relationship between the clones. Each leaf node denotes a cell. Connections between leaf nodes and center nodes suggest a "belonging" relationship. Additionally, we allow users to further construct a stacked bar chart to facilitate further analysis of the dynamics of different timepoints. 
313 | 
314 | ***Note:*** Here, we provide a demo object in .Rds format that is generated with all three versions processed. The R notebook used to process all three versions are included in the Examples folder.
315 | 
316 | ### 1. Read in the object
317 | ```r
318 | # Read the RDS file and get the object
319 | dt.mtx.path <- system.file("extdata", "bam_v123_obj.Rds", package = "CellTagR")
320 | bam.test.obj <- readRDS(dt.mtx.path)
321 | ```
322 | 
323 | ### 2. Calculate the link list
324 | Here, we convert the CellTag Matrix into a form of link list, which will be further used to construct the linkages in the network
325 | ```r
326 | bam.test.obj <- convertCellTagMatrix2LinkList(bam.test.obj)
327 | ```
328 | The linked list is saved in the slot - "network.link.list", in the following format.
329 | 
330 | |source|target|tag|target_unmodified|
331 | |:-------:|:------:|:------:|:------:|
332 | |The Source Node|The Target Node|Associated CellTag|Original Target Name|
333 | 
334 | In the source node, the data is formatted as \<CellTag Version\>_\<Clone Number\>. These are the centroid nodes for the network. The clone number can be found in the previously filled slot - "clone.composition". In the target node, there are two possibilities. One of possible targets are cells that belong to the centroid clone. The others are clones that are related to the centroid clone, which will suggest "parent-child" relationship between clones. For example, in the table below, the first row describes the belonging relationship of cell with barcode "AAGCCGCAGCTAGCCC-1" to Clone3 from CellTag V1, while the second row indicates a "parent-child" relationship between Clone 1 from CellTag V1 and Clone 42 from CellTag V2. 
335 | 
336 | |source|target|tag|target_unmodified|
337 | |:-------:|:------:|:------:|:------:|
338 | |CellTagV1_3|AAGCCGCAGCTAGCCC-1_V1|CellTagV1|AAGCCGCAGCTAGCCC-1|
339 | |CellTagV1_1|CellTagV2_42|CellTagV1|CellTagV2_42|
340 | 
341 | ### 3. Get nodes from the link list
342 | This will obtain all the nodes that are involved in this network.
343 | ```r
344 | bam.test.obj <- getNodesfromLinkList(bam.test.obj)
345 | ```
346 | 
347 | ### 4. Add additional information
348 | For each leaf node (each cell), other information, such as cluster/cell types, can be available via other analysis. In this step, we will add these information into each node such that these information can be visualized on the network as well. In this scenario, for demo purposes, we used a simulation data frame to serve as a mock cluster information for each node.
349 | ```r
350 | # Simulate some additional data
351 | additional_data <- data.frame(sample(1:10, size = length(rownames(bam.test.obj@celltag.aggr.final)), replace = TRUE), row.names = rownames(bam.test.obj@celltag.aggr.final))
352 | colnames(additional_data) <- "Cluster"
353 | # Add the data to the object
354 | bam.test.obj <- addData2Nodes(bam.test.obj, additional_data)
355 | ```
356 | 
357 | ### 5. Network visualization and plot
358 | Here, we will visualize the network!
359 | ```r
360 | # Network Visualization
361 | bam.test.obj <- drawSubnet(tag = "CellTagV1_2", overlay = "Cluster", celltag.obj = bam.test.obj)
362 | bam.test.obj@network
363 | ```
364 | 
365 | Additionally, the network can be saved to a html file, allowing better visualization and overview. Please make sure to have pandoc to support markdown and output this network.
366 | ```r
367 | saveNetwork(bam.test.obj@network, "~/Desktop/presentation/Demo/hf1.d15.network.construction.html")
368 | ```
369 | 
370 | ### 6. Stack bar chart generation
371 | An important aspect of using CellTagging is to analyze the clonal dynamics of a population of cells. Here, we provide a stack bar chart option to provide some insights.
372 | ```r
373 | # Get the data for ploting
374 | bar.data <- bam.test.obj@celltag.aggr.final
375 | bar.data$Cell.BC <- rownames(bar.data)
376 | 
377 | bar.data <- gather(bar.data, key = "CellTag", value = "Clone", 1:3, na.rm = FALSE)
378 | 
379 | # Using ggplot to plot
380 | ggplot(data = bar.data) + 
381 |   geom_bar(mapping = aes(x = CellTag, fill = factor(Clone)), position = "fill", show.legend = FALSE) + 
382 |   scale_y_continuous(labels = scales::percent_format()) +
383 |   theme_bw()
384 | ```
385 | Below is a sample bar chart!
386 | <p align="center">
387 |   <img src="/Examples/bar_Chart.png" height="540" width="720">
388 | </p>
389 | 
390 | ## Contact Us
391 | 


--------------------------------------------------------------------------------
/inst/extdata/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/morris-lab/CellTagR/de6f6fb041e7c7b85915554c6eb675c087e4b2eb/inst/extdata/.DS_Store


--------------------------------------------------------------------------------
/inst/extdata/Demo_V1.Rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/morris-lab/CellTagR/de6f6fb041e7c7b85915554c6eb675c087e4b2eb/inst/extdata/Demo_V1.Rds


--------------------------------------------------------------------------------
/inst/extdata/V2-1_R1.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/morris-lab/CellTagR/de6f6fb041e7c7b85915554c6eb675c087e4b2eb/inst/extdata/V2-1_R1.zip


--------------------------------------------------------------------------------
/inst/extdata/bam_v123_obj.Rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/morris-lab/CellTagR/de6f6fb041e7c7b85915554c6eb675c087e4b2eb/inst/extdata/bam_v123_obj.Rds


--------------------------------------------------------------------------------
/inst/extdata/hf1.d28.prefiltered.Rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/morris-lab/CellTagR/de6f6fb041e7c7b85915554c6eb675c087e4b2eb/inst/extdata/hf1.d28.prefiltered.Rds


--------------------------------------------------------------------------------
/man/AddCellTagFreqSort.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CellTagWhitelistGeneration.R
 3 | \name{AddCellTagFreqSort}
 4 | \alias{AddCellTagFreqSort}
 5 | \title{CellTag Frequency Sort Table}
 6 | \usage{
 7 | AddCellTagFreqSort(celltag.obj)
 8 | }
 9 | \arguments{
10 | \item{celltag.obj}{A CellTag Object with CellTags extracted}
11 | }
12 | \value{
13 | A CellTag Object with attribute (celltag.freq.stats) filled.
14 | }
15 | \description{
16 | This function counts and sorts the identified CellTags from Fastq file
17 | }
18 | \examples{
19 | CellTagWhitelistFiltering(bam.test.obj)
20 | 
21 | }
22 | \keyword{CellTagging}
23 | \keyword{RNA-seq}
24 | \keyword{data,}
25 | \keyword{single-cell}
26 | 


--------------------------------------------------------------------------------
/man/Barcode.Aggregate.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AuxiliaryFunctions.R
 3 | \name{Barcode.Aggregate}
 4 | \alias{Barcode.Aggregate}
 5 | \title{CellTag Barcode Aggregation function}
 6 | \usage{
 7 | Barcode.Aggregate(file.list, output.file)
 8 | }
 9 | \arguments{
10 | \item{file.list}{files in a list to aggregate in order same as the BAM files}
11 | 
12 | \item{output.file}{where to save this aggregated output file. Should be a .tsv file.}
13 | }
14 | \value{
15 | A list containing the pattern, nucleotides to look for before/after the motif
16 | }
17 | \description{
18 | This function allows barcode aggregation of multiple-file processing.
19 | }
20 | \examples{
21 | Barcode.Aggregate(list("barcodes_1.tsv", "barcodes_2.tsv"), output.file = "barcode_aggr.tsv")
22 | 
23 | }
24 | \keyword{CellTagging}
25 | \keyword{RNA-seq}
26 | \keyword{data,}
27 | \keyword{single-cell}
28 | 


--------------------------------------------------------------------------------
/man/CellTagDataForCollapsing.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CellTagForCollapsing.R
 3 | \name{CellTagDataForCollapsing}
 4 | \alias{CellTagDataForCollapsing}
 5 | \title{CellTag Starcode Prior Collapsing}
 6 | \usage{
 7 | CellTagDataForCollapsing(celltag.obj, output.file)
 8 | }
 9 | \arguments{
10 | \item{celltag.obj}{A CellTag object with the raw count matrix filled.}
11 | 
12 | \item{output.file}{The filepath and name to save the table for collapsing (usually a .txt file)}
13 | }
14 | \value{
15 | A CellTag object with collapsing mapping table stored in pre.starcode slot
16 | }
17 | \description{
18 | This function generate the .txt file that will be fed into starcode - https://github.com/gui11aume/starcode - to collapse similar CellTags.
19 | }
20 | \examples{
21 | CellTagDataForCollapsing(bam.test.obj, "./collapsing.txt")
22 | 
23 | }
24 | \keyword{CellTagging}
25 | \keyword{RNA-seq}
26 | \keyword{data,}
27 | \keyword{single-cell}
28 | 


--------------------------------------------------------------------------------
/man/CellTagDataPostCollapsing.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CellTagForCollapsing.R
 3 | \name{CellTagDataPostCollapsing}
 4 | \alias{CellTagDataPostCollapsing}
 5 | \title{CellTag Starcode Post Collapsing}
 6 | \usage{
 7 | CellTagDataPostCollapsing(celltag.obj, collapsed.rslt.file)
 8 | }
 9 | \arguments{
10 | \item{celltag.obj}{A CellTag object with the pre-starcode mapping matrix filled.}
11 | 
12 | \item{collapsed.rslt.file}{File path to the collapsed result file}
13 | }
14 | \value{
15 | A CellTag object with collapsed count matrix stored in collapsed.count slot
16 | }
17 | \description{
18 | This function processes the result generated from starcode - https://github.com/gui11aume/starcode.
19 | }
20 | \examples{
21 | CellTagDataPostCollapsing(bam.test.obj, "./collapsing_result.txt")
22 | 
23 | }
24 | \keyword{CellTagging}
25 | \keyword{RNA-seq}
26 | \keyword{data,}
27 | \keyword{single-cell}
28 | 


--------------------------------------------------------------------------------
/man/CellTagExtraction.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CellTagExtraction.R
 3 | \name{CellTagExtraction}
 4 | \alias{CellTagExtraction}
 5 | \title{CellTag Extraction Function}
 6 | \usage{
 7 | CellTagExtraction(celltag.obj, celltag.version, technique = "10x")
 8 | }
 9 | \arguments{
10 | \item{celltag.obj}{A CellTag object initialized with path to the fastq/bam file}
11 | 
12 | \item{celltag.version}{The CellTag version to extract}
13 | 
14 | \item{technique}{The technique used for scRNA-seq, Default to 10x. Currently enabled for 10x and dropseq.}
15 | }
16 | \value{
17 | A CellTag object with attribute (bam.parse.rslt) filled
18 | }
19 | \description{
20 | This function extracts CellTags from the raw fastq/bam sequencing file. If it is a fastq file, provides counts of each CellTag and sorts them in desending order. If it is a bam file, returns the barcode, umi, celltag information.
21 | }
22 | \examples{
23 | CellTagExtraction(bam.test.obj)
24 | 
25 | }
26 | \keyword{CellTagging}
27 | \keyword{RNA-seq}
28 | \keyword{data,}
29 | \keyword{single-cell}
30 | 


--------------------------------------------------------------------------------
/man/CellTagMatrixCount.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CellTagMatrixGeneration.R
 3 | \name{CellTagMatrixCount}
 4 | \alias{CellTagMatrixCount}
 5 | \title{CellTag Matrix Generation Function}
 6 | \usage{
 7 | CellTagMatrixCount(celltag.obj, barcodes.file)
 8 | }
 9 | \arguments{
10 | \item{celltag.obj}{A CellTag object with bam file result filled}
11 | 
12 | \item{barcodes.file}{A .tsv output file from 10x CellRanger pipeline. It contains a list of all cell barcodes identified in the filtered dataset.}
13 | }
14 | \value{
15 | A CellTag object with the attribute (raw.count) filled
16 | }
17 | \description{
18 | This function uses the extract information from data processed before and generate a Cell Barcode x CellTag matrix
19 | }
20 | \examples{
21 | CellTagMatrixCount(bam.test.obj, "barcodes.tsv")
22 | 
23 | }
24 | \keyword{CellTagging}
25 | \keyword{RNA-seq}
26 | \keyword{data,}
27 | \keyword{single-cell}
28 | 


--------------------------------------------------------------------------------
/man/CellTagObject.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CreateCellTagObject.R
 3 | \name{CellTagObject}
 4 | \alias{CellTagObject}
 5 | \title{Create a New CellTag Object}
 6 | \usage{
 7 | CellTagObject(object.name, fastq.bam.directory)
 8 | }
 9 | \arguments{
10 | \item{object.name}{The name of the object}
11 | 
12 | \item{fastq.bam.input}{The input fastq/bam data file path}
13 | 
14 | \item{celltag.version}{Which version of CellTags are you working with?}
15 | }
16 | \value{
17 | A CellTag Object with open attributes that can be filled as analysis moving along
18 | }
19 | \description{
20 | This function creates a CellTag object that contains the basic information required for the object
21 | }
22 | \examples{
23 | CellTagObejct("hf1.d15.test", "hf1.d15.bam", "v1")
24 | 
25 | }
26 | \keyword{CellTagging}
27 | \keyword{RNA-seq}
28 | \keyword{data,}
29 | \keyword{single-cell}
30 | 


--------------------------------------------------------------------------------
/man/CellTagPatternCalling.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AuxiliaryFunctions.R
 3 | \name{CellTagPatternCalling}
 4 | \alias{CellTagPatternCalling}
 5 | \title{CellTag Pattern Calling Function}
 6 | \usage{
 7 | CellTagPatternCalling(celltag.version)
 8 | }
 9 | \arguments{
10 | \item{celltag.version}{Which CellTag version are you investigating?}
11 | }
12 | \value{
13 | A list containing the pattern, nucleotides to look for before/after the motif
14 | }
15 | \description{
16 | This function provides motif patterns corresponding to the input celltag version
17 | }
18 | \examples{
19 | CellTagPatternCalling("v1")
20 | 
21 | }
22 | \keyword{CellTagging}
23 | \keyword{RNA-seq}
24 | \keyword{data,}
25 | \keyword{single-cell}
26 | 


--------------------------------------------------------------------------------
/man/CellTagWhitelistFiltering.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CellTagWhitelistGeneration.R
 3 | \name{CellTagWhitelistFiltering}
 4 | \alias{CellTagWhitelistFiltering}
 5 | \title{CellTag Whitelist Filtering Function}
 6 | \usage{
 7 | CellTagWhitelistFiltering(celltag.obj, percentile, output.dir = NULL)
 8 | }
 9 | \arguments{
10 | \item{celltag.obj}{A CellTag Object with CellTag frequency table counted and sorted}
11 | 
12 | \item{percentile}{A fraction cutoff percentile for filtering the CellTags e.g. 0.9 for 90th percentile}
13 | 
14 | \item{output.dir}{Which directory would you like to store these files? If NULL, save to the same directory as the fastq/bam file}
15 | }
16 | \value{
17 | A CellTag Object with attribute (whitelist) filled.
18 | }
19 | \description{
20 | This function conducts whitelist filtering such that only CellTags with count number over their certain percentile would be considered for clone calling
21 | }
22 | \examples{
23 | CellTagWhitelistFiltering(bam.test.obj, 0.9)
24 | 
25 | }
26 | \keyword{CellTagging}
27 | \keyword{RNA-seq}
28 | \keyword{data,}
29 | \keyword{single-cell}
30 | 


--------------------------------------------------------------------------------
/man/CloneCalling.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CloneCalling.R
 3 | \name{CloneCalling}
 4 | \alias{CloneCalling}
 5 | \title{Clone Calling Function}
 6 | \usage{
 7 | CloneCalling(celltag.obj, correlation.cutoff)
 8 | }
 9 | \arguments{
10 | \item{celltag.obj}{A CellTag object with the jaccard matrix generated}
11 | 
12 | \item{correlation.cutoff}{Correlation cutoff for clone membership}
13 | }
14 | \value{
15 | A CellTag object with attributes (clone.composition & clone.size.info) filled.
16 | }
17 | \description{
18 | This function conducts clone calling based on the Jaccard results.
19 | }
20 | \examples{
21 | CloneCalling(bam.test.obj, 0.7)
22 | 
23 | }
24 | \keyword{CellTagging}
25 | \keyword{RNA-seq}
26 | \keyword{data,}
27 | \keyword{single-cell}
28 | 


--------------------------------------------------------------------------------
/man/JaccardAnalysis.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CloneCalling.R
 3 | \name{JaccardAnalysis}
 4 | \alias{JaccardAnalysis}
 5 | \title{Jaccard Analysis Function}
 6 | \usage{
 7 | JaccardAnalysis(celltag.obj, plot.corr = TRUE, fast = FALSE)
 8 | }
 9 | \arguments{
10 | \item{celltag.obj}{A CellTag object with the counts filtered based on metrics}
11 | 
12 | \item{plot.corr}{Would you like to plot the correlation matrix?}
13 | }
14 | \value{
15 | A CellTag object with attribute (jaccard.mtx) filled
16 | }
17 | \description{
18 | This function conducts Jaccard analysis to calculate the Jaccard similarity between cells.
19 | }
20 | \examples{
21 | JaccardAnalysis(bam.test.obj)
22 | 
23 | }
24 | \keyword{CellTagging}
25 | \keyword{RNA-seq}
26 | \keyword{data,}
27 | \keyword{single-cell}
28 | 


--------------------------------------------------------------------------------
/man/MetricBasedFiltering.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/MetricBasedPlottingAndFiltering.R
 3 | \name{MetricBasedFiltering}
 4 | \alias{MetricBasedFiltering}
 5 | \title{Metric-Base Filtering Function}
 6 | \usage{
 7 | MetricBasedFiltering(celltag.obj, cutoff, comparison = "less")
 8 | }
 9 | \arguments{
10 | \item{celltag.obj}{A CellTag Object with count matrix generated}
11 | 
12 | \item{cutoff}{The cutoff decided from the metric plots}
13 | 
14 | \item{comparison}{Would you like to maintain the part less than/greater than the cutoff? Default to less. Choices can be greater or less.}
15 | }
16 | \value{
17 | A CellTag Object with attribute (metric.filtered.count) filled
18 | }
19 | \description{
20 | This function applies further filtering on scRNA-seq data with CellTags based on cutoff values identified from the metric plots.
21 | }
22 | \examples{
23 | MetricBasedFiltering(bam.test.object, 20, "less")
24 | 
25 | }
26 | \keyword{CellTagging}
27 | \keyword{RNA-seq}
28 | \keyword{data,}
29 | \keyword{single-cell}
30 | 


--------------------------------------------------------------------------------
/man/MetricPlots.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/MetricBasedPlottingAndFiltering.R
 3 | \name{MetricPlots}
 4 | \alias{MetricPlots}
 5 | \title{CellTag Metric Plotting Function}
 6 | \usage{
 7 | MetricPlots(celltag.obj)
 8 | }
 9 | \arguments{
10 | \item{celltag.obj}{A CellTag Object}
11 | }
12 | \description{
13 | This function provides some metric plots for further downstream celltag filtering in the scRNA-seq dataset.
14 | }
15 | \examples{
16 | MetricPlots(bam.test.obj)
17 | 
18 | }
19 | \keyword{CellTagging}
20 | \keyword{RNA-seq}
21 | \keyword{data,}
22 | \keyword{single-cell}
23 | 


--------------------------------------------------------------------------------
/man/SingleCellDataBinatization.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ScCellTagMatrixProcess.R
 3 | \name{SingleCellDataBinatization}
 4 | \alias{SingleCellDataBinatization}
 5 | \title{Single-cell RNA-seq Binarization Function}
 6 | \usage{
 7 | SingleCellDataBinatization(celltag.obj, tag.cutoff)
 8 | }
 9 | \arguments{
10 | \item{celltag.obj}{A CellTag object with the raw count matrix generated}
11 | 
12 | \item{tag.cutoff}{How many tags would you like to be used as a cutoff to say that the cells are tagged?}
13 | }
14 | \value{
15 | A CellTag object with the attribute (binary.mtx) filled.
16 | }
17 | \description{
18 | This function binarize the single-cell celltag data based on a given cutoff. It will generate a binary matrix, which will be stored as a slot in the CellTag Object. The binary matrix will be further used for future processing of the single-cell data.
19 | }
20 | \examples{
21 | SingleCellDataBinatization(bam.test.obj, 2)
22 | 
23 | }
24 | \keyword{CellTagging}
25 | \keyword{RNA-seq}
26 | \keyword{data,}
27 | \keyword{single-cell}
28 | 


--------------------------------------------------------------------------------
/man/SingleCellDataWhitelist.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ScCellTagMatrixProcess.R
 3 | \name{SingleCellDataWhitelist}
 4 | \alias{SingleCellDataWhitelist}
 5 | \title{Single-cell RNA-seq Whitelisting Function}
 6 | \usage{
 7 | SingleCellDataWhitelist(celltag.obj, whitels.cell.tag.file)
 8 | }
 9 | \arguments{
10 | \item{celltag.obj}{A CellTag object with the binary matrix generated}
11 | 
12 | \item{whitels.cell.tag.file}{file director to the whitelisted cell tags}
13 | }
14 | \value{
15 | A CellTag object with the attribute (whitelisted.count) filled
16 | }
17 | \description{
18 | The whitelist is a list of CellTag generated based on assessment of CellTag library. It helps reduce the effect from sequencing error in CellTags. This function conducts whitelist filtering through the single-cell dataset. It will filter out CellTags that are not included in the whitelist.
19 | }
20 | \examples{
21 | SingleCellDataWhitelist(bam.test.obj, "~/Desktop/My_Favourite_Whitelist.csv")
22 | 
23 | }
24 | \keyword{CellTagging}
25 | \keyword{RNA-seq}
26 | \keyword{data,}
27 | \keyword{single-cell}
28 | 


--------------------------------------------------------------------------------
/man/addData2Nodes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CellTagNetworkContruction.R
 3 | \name{addData2Nodes}
 4 | \alias{addData2Nodes}
 5 | \title{Add Additional Information to the Nodes}
 6 | \usage{
 7 | addData2Nodes(celltag.obj, additional_data)
 8 | }
 9 | \arguments{
10 | \item{celltag.obj}{A CellTag object with nodes filled}
11 | 
12 | \item{additional_data}{A data frame with auxillary information about the nodes (rownames = the nodes names)}
13 | }
14 | \value{
15 | A CellTag object with the attribute (nodes) modified.
16 | }
17 | \description{
18 | This function add auxillary information to the nodes. Such information can include cluster information, cell type information and so on. The information should be stored as a data frame when passing in to the funtion.
19 | }
20 | \examples{
21 | addData2Nodes(bam.test.obj, cluster.info)
22 | 
23 | }
24 | \keyword{CellTagging}
25 | \keyword{RNA-seq}
26 | \keyword{data,}
27 | \keyword{single-cell}
28 | 


--------------------------------------------------------------------------------
/man/bam.process.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AuxiliaryFunctions.R
 3 | \name{bam.process}
 4 | \alias{bam.process}
 5 | \title{Bam File Process Function}
 6 | \usage{
 7 | bam.process(
 8 |   bam.file,
 9 |   pattern,
10 |   short.nt.before.tag,
11 |   short.nt.after.tag,
12 |   technique
13 | )
14 | }
15 | \arguments{
16 | \item{bam.file}{The input bam data directory}
17 | 
18 | \item{pattern}{The pattern to seek for}
19 | 
20 | \item{short.nt.before.tag}{A short sequence before the 8nt tag to help more specific identification}
21 | 
22 | \item{short.nt.after.tag}{A short sequence after the 8nt tag to help more specific identification}
23 | }
24 | \value{
25 | A data table contains cell barcode, celltag and umi information
26 | }
27 | \description{
28 | This function extracts CellTags from the bam sequencing file, provides cell barcode, umi and their corresponding celltag information.
29 | }
30 | \examples{
31 | bam.process("data.fastq", "CCGGT[ATCG]{8}GAATTC", "CCGGT", "GAATTC")
32 | 
33 | }
34 | \keyword{CellTagging}
35 | \keyword{RNA-seq}
36 | \keyword{data,}
37 | \keyword{single-cell}
38 | 


--------------------------------------------------------------------------------
/man/convertCellTagMatrix2LinkList.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CellTagNetworkContruction.R
 3 | \name{convertCellTagMatrix2LinkList}
 4 | \alias{convertCellTagMatrix2LinkList}
 5 | \title{Convert CellTag Matrix to Link List}
 6 | \usage{
 7 | convertCellTagMatrix2LinkList(celltag.obj)
 8 | }
 9 | \arguments{
10 | \item{celltag.obj}{A CellTag object with all clone information filled}
11 | }
12 | \value{
13 | A CellTag object with the attribute (network.link.list) filled
14 | }
15 | \description{
16 | This function convert the CellTag Matrix to a link list, which is further used for network construction and visualizetion
17 | }
18 | \examples{
19 | convertCellTagMatrix2LinkList(bam.test.obj)
20 | 
21 | }
22 | \keyword{CellTagging}
23 | \keyword{RNA-seq}
24 | \keyword{data,}
25 | \keyword{single-cell}
26 | 


--------------------------------------------------------------------------------
/man/drawSubnet.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CellTagNetworkVisualiztion.R
 3 | \name{drawSubnet}
 4 | \alias{drawSubnet}
 5 | \title{Draw the Network}
 6 | \usage{
 7 | drawSubnet(celltag.obj, tag, overlay)
 8 | }
 9 | \arguments{
10 | \item{celltag.obj}{A CellTag object with link list and nodes filled}
11 | 
12 | \item{tag}{Which tags would you like to plot?}
13 | 
14 | \item{overlay}{What information would you like to overlay with the network? This should be one of the column names of the node information.}
15 | }
16 | \value{
17 | A CellTag object with the attribute (network) modified.
18 | }
19 | \description{
20 | This function generate a force-directed network based on the link list and nodes information.
21 | }
22 | \examples{
23 | drawSubnet(bam.test.obj, "CellTagV1_2", "Cluster")
24 | 
25 | }
26 | \keyword{CellTagging}
27 | \keyword{RNA-seq}
28 | \keyword{data,}
29 | \keyword{single-cell}
30 | 


--------------------------------------------------------------------------------
/man/fastq.process.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AuxiliaryFunctions.R
 3 | \name{fastq.process}
 4 | \alias{fastq.process}
 5 | \title{Fastq Process Function}
 6 | \usage{
 7 | fastq.process(fastq.file, pattern, short.nt.before.tag, short.nt.after.tag)
 8 | }
 9 | \arguments{
10 | \item{fastq.file}{The input fastq/bam data directory}
11 | 
12 | \item{pattern}{The pattern to seek for}
13 | 
14 | \item{short.nt.before.tag}{A short sequence before the 8nt tag to help more specific identification}
15 | 
16 | \item{short.nt.after.tag}{A short sequence after the 8nt tag to help more specific identification}
17 | }
18 | \value{
19 | A list contains count table of CellTags. If requested to save fullTag counts, i.e. save.fullTag.counts = TRUE, return a list of both 8nt tags and full sequences count. Otherwise, a list of 8nt tags counts.
20 | }
21 | \description{
22 | This function extracts CellTags from the raw fastq sequencing file, provides counts of each CellTag and sorts them in desending order.
23 | }
24 | \examples{
25 | fastq.process("data.fastq", "CCGGT[ATCG]{8}GAATTC", "CCGGT", "GAATTC")
26 | 
27 | }
28 | \keyword{CellTagging}
29 | \keyword{RNA-seq}
30 | \keyword{data,}
31 | \keyword{single-cell}
32 | 


--------------------------------------------------------------------------------
/man/getNodesfromLinkList.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CellTagNetworkContruction.R
 3 | \name{getNodesfromLinkList}
 4 | \alias{getNodesfromLinkList}
 5 | \title{Get Nodes from Link List}
 6 | \usage{
 7 | getNodesfromLinkList(celltag.obj)
 8 | }
 9 | \arguments{
10 | \item{celltag.obj}{A CellTag object with link list filled}
11 | }
12 | \value{
13 | A CellTag object with the attribute (nodes) filled
14 | }
15 | \description{
16 | This function extracts the node information from the generated link list.
17 | }
18 | \examples{
19 | getNodesfromLinkList(bam.test.obj)
20 | 
21 | }
22 | \keyword{CellTagging}
23 | \keyword{RNA-seq}
24 | \keyword{data,}
25 | \keyword{single-cell}
26 | 


--------------------------------------------------------------------------------