├── ClusterMap-manual.pdf
├── ClusterMap.Rproj
├── DESCRIPTION
├── LICENSE
├── NAMESPACE
├── R
    ├── circos_map.R
    ├── cluster_map.R
    ├── master.R
    ├── pre_analysis.R
    ├── recolor.R
    └── separability.R
├── README.md
└── man
    ├── add_perc.Rd
    ├── circos_map.Rd
    ├── cluster_map.Rd
    ├── cluster_map_by_marker.Rd
    ├── inna_dist.Rd
    ├── inter_dist.Rd
    ├── make_comb_obj.Rd
    ├── make_single_obj.Rd
    ├── purity_cut.Rd
    ├── recolor_comb.Rd
    ├── recolor_s.Rd
    ├── separability.Rd
    ├── separability_by_group.Rd
    └── separability_pairwise.Rd


/ClusterMap-manual.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgaoo/ClusterMap/b968c6671d208ed94da1f3e28a6af5eddb7373e4/ClusterMap-manual.pdf


--------------------------------------------------------------------------------
/ClusterMap.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: ClusterMap
 2 | Type: Package
 3 | Title: What the Package Does (Title Case)
 4 | Version: 0.1.0
 5 | Author: Xin Gao
 6 | Maintainer: Xin Gao <xig@stowers.org>
 7 | Description: ClusterMap is designed to analyze and compare two or more single cell expression datasets.
 8 | License: MIT + file LICENSE
 9 | Encoding: UTF-8
10 | LazyData: true
11 | RoxygenNote: 6.0.1
12 | Depends: R(>= 3.4.3)
13 | Imports: Seurat(>= 2.2.1),pheatmap(>= 1.0.10),ape(>= 5.1),circlize(>= 0.4.3)
14 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2018
2 | COPYRIGHT HOLDER: Xin Gao


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(add_perc)
 4 | export(circos_map)
 5 | export(cluster_map)
 6 | export(cluster_map_by_marker)
 7 | export(make_comb_obj)
 8 | export(make_single_obj)
 9 | export(purity_cut)
10 | export(recolor_comb)
11 | export(recolor_s)
12 | export(separability)
13 | export(separability_by_group)
14 | export(separability_pairwise)
15 | import(Seurat)
16 | import(ggplot2)
17 | import(ape)
18 | import(circlize)
19 | import(pheatmap)
20 | importFrom(grDevices,col2rgb)
21 | importFrom(grDevices,dev.off)
22 | importFrom(grDevices,hcl)
23 | importFrom(grDevices,pdf)
24 | importFrom(grDevices,png)
25 | importFrom(grDevices,rgb)
26 | importFrom(graphics,plot)
27 | importFrom(stats,dist)
28 | importFrom(stats,hclust)
29 | importFrom(stats,median)
30 | importFrom(stats,setNames)
31 | importFrom(utils,combn)
32 | importFrom(utils,read.csv)
33 | importFrom(utils,read.table)
34 | importFrom(utils,write.csv)
35 | 


--------------------------------------------------------------------------------
/R/circos_map.R:
--------------------------------------------------------------------------------
  1 | ##################### Circos plot #####################
  2 | 
  3 | #' circos_map
  4 | #'
  5 | #' Plot Circos plot for the matching results.
  6 | #'
  7 | #' @import circlize
  8 | #' @param mapRes
  9 | #' A dataframe of the output of function cluster_map_by_marker.
 10 | #' @param cell_num_list
 11 | #' A list of vector of cell numbers for each group and each sample.
 12 | #' @param output
 13 | #' The output directory to save the plot.
 14 | #' @param color_cord
 15 | #' A vector of colors for the cord of circos plot. DEFAULT is NULL. Pre defined internal color will be used.
 16 | #' @param color_sample
 17 | #' A vector of colors for the sample sectors in the circos plot. DEFAULT is NULL. Pre defined internal color will be used.
 18 | #' @param width
 19 | #' The width of circos plot by inch.
 20 | #' @param height
 21 | #' The height of circos plot by inch.
 22 | #' @return circos plot will be save.
 23 | #' @export
 24 | 
 25 | 
 26 | circos_map <- function(mapRes, cell_num_list, output, color_cord = NULL, color_sample = NULL, width=7, height=7)
 27 | { ## circos_map will call function plot_circos, gg_color_hue and makeTransparent.
 28 | 	message("circos plot")
 29 | 
 30 | 	sample_name <- names(cell_num_list)
 31 | 	if (all(sample_name %in% colnames(mapRes)) == FALSE)
 32 | 		stop("names(marker_file_list) or samples in mapRes doesn't match names(cell_num_list).")
 33 | 	## pairwise link
 34 | 	combs <- combn(1:length(sample_name), 2)
 35 | 	mapRes_samples <- mapRes[, sample_name]
 36 | 	colnames(mapRes_samples) <- NA
 37 | 	pair <- do.call(rbind, apply(combs, 2, function(x) mapRes_samples[, x]))
 38 | 	pair <- cbind(pair, mapRes[, c('regroup', 'similarity')])
 39 | 	pair <- pair[pair[, 1] != '' & pair[, 2] != '' & !is.na(pair[, 1]) & !is.na(pair[, 2]), ]
 40 | 	colnames(pair)[1:2] <- c('v1', 'v2')
 41 | 	## split merged clusters with ;
 42 | 	pair_list <- lapply(1:nrow(pair), function(i)
 43 | 	{
 44 | 		y <- pair[i, ]
 45 | 		v1 <- unlist(strsplit(as.vector(y$v1), ';'))
 46 | 		v2 <- unlist(strsplit(as.vector(y$v2), ';'))
 47 | 		v <- c()
 48 | 		for (x in v1) v <- rbind(v, cbind(x, v2))
 49 | 		v <- as.data.frame(v, stringsAsFactors = FALSE)
 50 | 		v$similarity <- y$similarity
 51 | 		v$regroup <- y$regroup
 52 | 		return(v)
 53 | 	})
 54 | 	pair <- do.call(rbind, pair_list)
 55 | 	colnames(pair)[1:2] <- c('v1', 'v2')
 56 | 	pair$v1 <- as.vector(pair$v1)
 57 | 	pair$v2 <- as.vector(pair$v2)
 58 | 	## cell number percentage
 59 | 	cell_perc_list <- lapply(cell_num_list, function(x) round(x/sum(x), 2))
 60 | 	if (is.null(color_sample)) col_sample <- rep(c("#f9865c", "#84e281", "#74d2f7", "#e083fc", "#ffbf66", "#6682ff"), 10) else col_sample <- color_sample
 61 | 	if (is.null(color_cord)) col_cord <- gg_color_hue(nrow(mapRes)) else col_cord <- color_cord
 62 | 	## circos plot
 63 | 	png(paste0(output, '.circos.png'), width=width/7*480, height=height/7*480)
 64 | 		plot_circos(cell_perc_list, pair, mapRes, col_cord, col_sample)
 65 | 	dev.off()
 66 | 	pdf(paste0(output, '.circos.pdf'), width=width, height=height)
 67 | 		plot_circos(cell_perc_list, pair, mapRes, col_cord, col_sample)
 68 | 	dev.off()
 69 | }
 70 | 
 71 | 
 72 | plot_circos <- function(cell_perc_list, pair, mapRes, col_cord, col_sample)
 73 | {
 74 | 	temp=cell_perc_list
 75 | 	names(temp)=paste0(names(temp),"__")
 76 | 	cell_perc <- unlist(temp)
 77 | 	names(cell_perc) <- sub('__.', '__', names(cell_perc))
 78 | 	cell_perc[cell_perc < 0.01] <- 0.01 ## too small to plot
 79 | 	fa <- factor(names(cell_perc), levels = unique(names(cell_perc)))
 80 | 	## initialize
 81 | 	gaps <- lapply(cell_perc_list, function(x) c(rep(1, length(x)-1), 8))
 82 | 	circos.par(gap.after = unlist(gaps), start.degree = -3, cell.padding = c(0, 0, 0, 0))
 83 | 	circos.initialize(fa, xlim = cbind(rep(0, length(cell_perc)), cell_perc))
 84 | 	## plot sample sectors
 85 | 	circos.track(ylim = c(0, 1), track.height = uh(5, "mm"), bg.border = NA)
 86 | 	for (n in names(cell_perc_list)) highlight.sector(paste0(n, "__", names(cell_perc_list[[n]])), track.index = 1,
 87 | 		col = col_sample[match(n, names(cell_perc_list))], padding = c(0, 0, 0.3, 0), text = n, cex = 1.5, text.col = "black", niceFacing = TRUE)
 88 | 	## plot sub-group sectors
 89 | 	circos.track(fa, ylim = c(0, 1), panel.fun = function(x, y)
 90 | 		{
 91 | 		circos.text(CELL_META$xcenter, CELL_META$ylim[1], sub('.*__', '', CELL_META$sector.index),
 92 | 		adj = c(0.3, -2), niceFacing = TRUE)
 93 | 		}, bg.col = 'black', bg.border = NA, track.height = 0.05, track.margin = c(0, 0.1)
 94 | 	)
 95 | 	## plot cord
 96 | 	for(i in 1:nrow(pair))
 97 | 	{
 98 | 		x <- pair[i, ]
 99 | 		col <- makeTransparent(col_cord[x$regroup], round(x$similarity*100))
100 | 		circos.link(x$v1, c(0, cell_perc[x$v1]), x$v2, c(0, cell_perc[x$v2]), col = col, border = NA, h.ratio = 0.5)
101 | 	}
102 | 	circos.clear()
103 | }
104 | 
105 | 
106 | gg_color_hue <- function(n)
107 | {
108 | 	hues = seq(15, 375, length = n + 1)
109 | 	hcl(h = hues, l = 65, c = 100)[1:n]
110 | }
111 | 
112 | 
113 | makeTransparent <- function(someColor, alpha = 100)
114 | {
115 |     col <- col2rgb(someColor)
116 |     rgb(red = col[1], green = col[2], blue = col[3], alpha = alpha, maxColorValue = 255)
117 |   }
118 | 
119 | ########## Add cell percentage to result table ########
120 | 
121 | #' circos_map
122 | #'
123 | #' Plot Circos plot for the matching results.
124 | #'
125 | #' @import circlize
126 | #' @param mapRes
127 | #' A dataframe of the output of function cluster_map_by_marker.
128 | #' @param cell_num_list
129 | #' A list of vector of cell numbers for each group and each sample.
130 | #' @return A dataframe of the matching results with cell percentage column added.
131 | #' @export
132 | 
133 | add_perc <- function(mapRes, cell_num_list)
134 | {
135 | 	sample_name <- names(cell_num_list)
136 | 	if (all(sample_name %in% colnames(mapRes)) == FALSE)
137 | 		stop("names(marker_file_list) or samples in mapRes doesn't match names(cell_num_list).")
138 | 	cell_perc_list <- lapply(cell_num_list, function(x) round(x/sum(x), 2))
139 | 	temp=cell_perc_list
140 | 	names(temp)=paste0(names(temp),"__")
141 | 	cell_perc <- unlist(temp)
142 | 	names(cell_perc) <- sub('__.', '__', names(cell_perc))
143 | 	## add to mapRes
144 | 	res_sub <- mapRes[, sample_name]
145 | 	res_perc <- apply(res_sub, 1:2, function(x)
146 | 	{
147 | 		paste(cell_perc[unlist(strsplit(x, ';'))], collapse = ';')
148 | 	})
149 | 	colnames(res_perc) <- paste0(colnames(res_perc), '_cell_perc')
150 | 	res <- cbind(mapRes, res_perc)
151 | 	return(res)
152 | }
153 | 


--------------------------------------------------------------------------------
/R/cluster_map.R:
--------------------------------------------------------------------------------
  1 | ################# Clustering and tree cut #############
  2 | 
  3 | #' cluster_map_by_marker
  4 | #'
  5 | #' Match groups by marker genes and decompose into new groups by purity cut.
  6 | #'
  7 | #' @import pheatmap
  8 | #' @import ape
  9 | #' @param marker_file_list
 10 | #' A list of csv files. Each file is a marker gene table for a sample. The columns named as 'cluster' and 'gene' are required.
 11 | #' @param cutoff
 12 | #' The edge length cutoff to decide the sub-nodes to merge or not. DEFAULT is 0.1.
 13 | #' @param output
 14 | #' The output directory to save the matching results.
 15 | #' @return A dataframe of the matching results. Heatmap of marker genes and the dendrogram will be saved into files.
 16 | #' @export
 17 | 
 18 | cluster_map_by_marker <- function(marker_file_list, cutoff = 0.1, output)
 19 | { ## cluster_map_by_marker will call function purity_cut.
 20 | 	message("match sub-clusters")
 21 | 	## get marker table
 22 | 	if (is.null(names(marker_file_list)))
 23 | 	{
 24 | 		names(marker_file_list) <- paste0('s', 1:length(marker_file_list))
 25 | 		warning("The names(marker_file_list) is empty. Sample names are assigned as '", paste(names(marker_file_list), collapse = ' '), "'" )
 26 | 	}
 27 | 	markerList <- lapply(marker_file_list, read.csv, as.is = T)
 28 | 	markerList <- lapply(names(markerList), function(n)
 29 | 	{
 30 | 		x <- markerList[[n]]
 31 | 		x$cluster <- paste0(n, '__', x$cluster)
 32 | 		return(x)
 33 | 	})
 34 | 	markers <- do.call(rbind, markerList)
 35 | 	## clustering
 36 | 	da <- table(markers[,c("cluster","gene")])
 37 | 	d <- dist(da, method = 'binary')
 38 | 	hc <- hclust(d, method = 'average')
 39 | 	png(paste0(output, '.hcluster.png'), width = 480*length(marker_file_list)) ## save dendrogram png
 40 | 		plot(hc)
 41 | 	dev.off()
 42 | 	pdf(paste0(output, '.hcluster.pdf'), width = 7*length(marker_file_list)) ## save dendrogram pdf
 43 | 		plot(hc)
 44 | 	dev.off()
 45 | 	## save heatmap png
 46 | 	png(paste0(output, '.heatmap.hcluster.png'), height = 480*length(marker_file_list))
 47 | 	ph <- pheatmap(da, scale = 'none', clustering_method = 'average', color = c('#e3f8f9', '#fc2807'),
 48 | 		show_rownames = T, show_colnames = F, clustering_distance_rows = 'binary',
 49 | 		legend_breaks = c(0, 1), legend_labels = c(0, 1))
 50 | 	dev.off()
 51 | 	ph <- pheatmap(da, scale = 'none', clustering_method = 'average', color = c('#e3f8f9', '#fc2807'),
 52 | 		show_rownames = T, show_colnames = F, clustering_distance_rows = 'binary',
 53 | 		legend_breaks = c(0, 1), legend_labels = c(0, 1),
 54 | 		filename = paste0(output, '.heatmap.hcluster.pdf'), height = 7*length(marker_file_list))
 55 | 	## tree cut
 56 | 	res <- purity_cut(hc, cutoff)
 57 | 	##write.csv(res, file = paste0(output, '.cluster.map.csv'))
 58 | 	return(res)
 59 | }
 60 | 
 61 | #' purity_cut
 62 | #'
 63 | #' Cut hierarchical clustering dendrogram by edge length and purity of the nodes.
 64 | #'
 65 | #' @import ape
 66 | #' @param hcluster
 67 | #' A hclust object.
 68 | #' @param cutoff
 69 | #' The edge length cutoff to decide the sub-nodes to merge or not. DEFAULT is 0.1.
 70 | #' @return A dataframe of the matching results.
 71 | #' @export
 72 | 
 73 | purity_cut <- function(hcluster, cutoff = 0.1)
 74 | {
 75 | 	hcp <- as.phylo(hcluster)
 76 | 	png('ape.tree.png')
 77 | 		plot(hcp, edge.width = 2, label.offset = 0.1)
 78 | 		nodelabels()
 79 | 		tiplabels()
 80 | 	dev.off()
 81 | 	## get edge info
 82 | 	tree <- cbind(hcp$edge, round(hcp$edge.length, 3))
 83 | 	colnames(tree) <- c('high_node', 'low_node', 'edge_length')
 84 | 	tree <- as.data.frame(tree, as.is = T)
 85 | 	tree$too_long <- (tree$edge_length > cutoff/2) ## edge_length is height/2
 86 | 	## get all the offspring of each node
 87 | 	offs_nodeList <- lapply(tree$low_node, function(x) {
 88 |     if (x <= length(hcp$tip.label)) return(hcp$tip.label[x]) else
 89 | 		return(extract.clade(hcp, x)$tip.label)})
 90 | 	names(offs_nodeList) <- tree$low_node
 91 | 	## get sample list that the nodes belong to
 92 | 	sampleList <- lapply(offs_nodeList, function(x) unique(sub('__.*$', '', x)))
 93 | 	tree$low_node_offs_sample <-  unlist(lapply(sampleList, length))
 94 | 	## check if keep the node
 95 | 	nodeList <- split(tree, tree$high_node)
 96 | 	n_single <- unlist(lapply(nodeList, function(x) sum(x$low_node_offs_sample == 1)))
 97 | 	n_too_long <- unlist(lapply(nodeList, function(x) sum(x$too_long)))
 98 | 	no_include <- unlist(lapply(nodeList, function(x)
 99 | 	{
100 | 		low_node <- as.character(x$low_node)
101 | 		sampleList_sub <- sampleList[low_node]
102 | 		include <- (all(sampleList_sub[[1]] %in% sampleList_sub[[2]]) | all(sampleList_sub[[2]] %in% sampleList_sub[[1]]))
103 | 		return(!include)
104 | 	}))
105 | 	keep <- (n_single == 2 | (n_single == 1 & n_too_long < 2)| (n_single == 0 & n_too_long == 0 & no_include))
106 | 	names(keep) <- names(nodeList)
107 | 	## check if any offspring node is cut
108 | 	node_merge_order <- rev(unique(tree$high_node))
109 | 	for (nd in node_merge_order)
110 | 	{
111 | 		nd <- as.character(nd)
112 | 		offs_nodes <- nodeList[[nd]]$low_node
113 | 		keep_sub <- keep[as.character(offs_nodes)]
114 | 		keep_sub[is.na(keep_sub)] <- TRUE
115 | 		tmp <- (sum(keep_sub) == 2 & keep[nd])
116 | 		keep[nd] <- tmp
117 | 	}
118 | 	if (all(keep == TRUE))
119 |         {
120 |                 message("No matched groups.")
121 |                 return(NULL)
122 |         }
123 | 
124 | 	## get rid of lower duplicated nodes
125 | 	res <- offs_nodeList[names(keep)[keep == T]]
126 | 	for (x in rev(names(res)))
127 | 	{
128 | 		if (any(res[[x]] %in% unlist(res[names(res) != x]))) res = res[names(res) != x]
129 | 	}
130 | 	## get singletons
131 | 	singles <- setdiff(hcp$tip.label, unlist(res))
132 | 	res <- c(res, as.list(singles))
133 | 	## reform output
134 | 	lev <- sort(unique(sub('__.*$', '', hcp$tip.label)))
135 | 	res_reform <- do.call(rbind, lapply(res, function(x)
136 | 	{
137 | 		tmp <- split(x, f = factor(sub('__.*$', '', x), levels = lev))
138 | 		unlist(lapply(tmp, paste, collapse = ';'))
139 | 	}))
140 | 	rownames(res_reform) <- 1:nrow(res_reform)
141 | 	res_reform[res_reform == ''] <- NA
142 | 	res_reform <- as.data.frame(res_reform, stringsAsFactors = FALSE)
143 | 	## add similarity
144 | 	res_reform$similarity <- round(1 - branching.times(hcp)[names(res)]*2, 2)
145 | 	## add group
146 | 	res_reform <- res_reform[order(-res_reform$similarity), ]
147 | 	res_reform$regroup <- 1:nrow(res_reform)
148 | 	rownames(res_reform) <-  res_reform$regroup
149 | 	return(res_reform)
150 | }
151 | 


--------------------------------------------------------------------------------
/R/master.R:
--------------------------------------------------------------------------------
  1 | #######################################################
  2 | ####################  ClusterMap  #####################
  3 | #######################################################
  4 | 
  5 | #################### Master function ##################
  6 | 
  7 | #' cluster_map
  8 | #'
  9 | #' A master function to perform the full workflow of ClusterMap.
 10 | #'
 11 | #' @import ggplot2
 12 | #' @import pheatmap
 13 | #' @import ape
 14 | #' @import Seurat
 15 | #' @import circlize
 16 | #'
 17 | #' @importFrom grDevices col2rgb dev.off hcl dev.off pdf png rgb
 18 | #' @importFrom graphics plot
 19 | #' @importFrom stats dist hclust median setNames
 20 | #' @importFrom utils combn read.csv read.table write.csv
 21 | #'
 22 | #' @param marker_file_list
 23 | #' A list of csv files with names. Each file is a marker gene table for a sample. The columns named as 'cluster' and 'gene' are required.
 24 | #' @param edge_cutoff
 25 | #' The edge length cutoff to decide the sub-nodes to merge or not. DEFAULT is 0.1.
 26 | #' @param output
 27 | #' The output directory to save the matching results.
 28 | #' @param cell_num_list
 29 | #' A list of vector of cell numbers for each group and each sample.
 30 | #' @param single_obj_list
 31 | #' A list of Seurat object for each sample, with the same list names as the list names of marker_file_list.
 32 | #' @param comb_obj
 33 | #' A Seurat object for the combined sample. Cells in different samples are labelled by the sample names with the comb_delim. The sample names should be the same as the list names of marker_file_list.
 34 | #' @param comb_delim
 35 | #' The delimiter used in the cell names in the combined object to connect sample name and cell name in individual sample. DEFAULT is '-'.
 36 | #' @param k
 37 | #' K-nearest neighbours used to calculate distance. DEFAULT is 5.
 38 | #' @param reduction
 39 | #' Select the reduction of "tsne", "umap", or "pca" that used for the recolor image.
 40 | #' @return A dataframe of the matching results. Heatmap of marker genes, the corresponding dendrogram, circos plot and recolored t-SNE plots will be saved into files.
 41 | #' @export
 42 | 
 43 | 
 44 | cluster_map <- function(marker_file_list, edge_cutoff = 0.1, output, cell_num_list = NULL, single_obj_list = NULL, comb_obj = NULL, comb_delim = '-', k = 5, seurat_version = 3, reduction="tsne")
 45 | {
 46 | 	circos.clear()
 47 | 	## Version check for comb delim
 48 | 	if(seurat_version == 3){
 49 | 		comb_delim = '_'
 50 | 	}
 51 | 	## match sub groups
 52 | 	mapRes <- cluster_map_by_marker(marker_file_list, cutoff = edge_cutoff, output = output)
 53 | 
 54 | 	## pull out cell_num_list if single Seurat object list is provided.
 55 | 	if (!is.null(single_obj_list))
 56 | 	{
 57 | 		if (all(names(marker_file_list) == names(single_obj_list)) == FALSE | is.null(names(marker_file_list)) | is.null(names(single_obj_list)))
 58 | 			stop("names(marker_file_list) doesn't match names(single_obj_list).")
 59 | 
 60 |         if (single_obj_list[[1]]@version > 3){
 61 | 		cell_num_list <- lapply(single_obj_list,
 62 |                             function(obj){
 63 |                                     summary(Idents(obj))
 64 |                                          })
 65 |         }
 66 | 
 67 |         else if(single_obj_list[[1]]@version < 3){
 68 | 		cell_num_list <- lapply(single_obj_list, function(obj){
 69 |                                     summary(obj@ident)})
 70 | 	    }
 71 |     }
 72 | 	## make circos plot and add cell percentage if cell_num_list is provided or single Seurat object list is provided.
 73 | 	if (!is.null(cell_num_list))
 74 | 	{
 75 | 		if (all(names(marker_file_list) == names(cell_num_list)) == FALSE | is.null(names(marker_file_list)) | is.null(names(cell_num_list)))
 76 | 			stop("names(marker_file_list) doesn't match names(cell_num_list).")
 77 | 		circos_map(mapRes, cell_num_list, output)
 78 | 		mapRes <- add_perc(mapRes, cell_num_list)
 79 | 	}
 80 | 
 81 | 	## Recolor reduction plot for each sample if single Seurat object list is provided.
 82 | 	if (!is.null(single_obj_list))
 83 | 	{
 84 | 		sample_names <- names(single_obj_list)
 85 | 		new_group_list <- lapply(sample_names, function(n){
 86 | 			da <- structure(as.vector(mapRes[, n]), names = mapRes$regroup)
 87 | 			recolor_s(da, single_obj_list[[n]], n, reduction=reduction)
 88 | 		})
 89 | 		names(new_group_list) <- names(single_obj_list)
 90 | 
 91 | 		## Recolor reduction plot for combined sample and calculate separability if combined Seurat object is provided.
 92 | 		if (!is.null(comb_obj))
 93 | 		{
 94 | 			sample_label <- as.factor(sub(paste0(comb_delim, '.*'), '', rownames(comb_obj@meta.data)))
 95 | 			if (all(levels(sample_label) == names(new_group_list)) == FALSE)
 96 | 				{stop("Sample label in comb_obj doesn't match names(new_group_list) or names(single_obj_list).")
 97 | 				 message("Sample label in comb_obj: ")
 98 | 				 print(levels(sample_label))
 99 | 				 message("names(single_obj_list): ")
100 | 				 print(names(single_obj_list))
101 | 				 }
102 | 
103 | 			new_group_list$comb <- recolor_comb(comb_obj, new_group_list, output, comb_delim)
104 | 
105 | 			coord <- as.data.frame(comb_obj@reductions[[reduction]]@cell.embeddings)
106 | 			sepa <- separability_pairwise(coord, group = new_group_list$comb, sample_label, k = k)
107 | 			colnames(sepa) <- paste0(colnames(sepa), '_separability')
108 | 
109 | 			mapRes <- cbind(mapRes, sepa)
110 | 		}
111 | 		saveRDS(new_group_list, file = paste0(output, '.new.group.list.RDS'))
112 | 	}
113 | 	write.csv(mapRes, file = paste0(output, '.results.csv'))
114 | 	return(mapRes)
115 | }
116 | 


--------------------------------------------------------------------------------
/R/pre_analysis.R:
--------------------------------------------------------------------------------
 1 | ##################### Pre-analysis ####################
 2 | 
 3 | #' make_single_obj
 4 | #'
 5 | #' A warper of Seurat function to generate Seurat object and marker genes for single sample.
 6 | #' @import Seurat
 7 | #' @param data_dir
 8 | #' Directory with 10X genomics single cell data or full path of expression table.
 9 | #' @param is.10X
10 | #' The input data is 10X genomics format or not. DEFAULT is TRUE.
11 | #' @param output
12 | #' The output directory to save the marker genes.
13 | #' @return A Seurat object.
14 | #' @export
15 | 
16 | make_single_obj <- function(data_dir, is.10X = TRUE, output)
17 | {
18 | 
19 | 	if (is.10X) da <- Read10X(data_dir) else da <- read.table(data_dir, sep = "\t")
20 | 	obj <- CreateSeuratObject(raw.data = da, min.cells = 3, min.genes = 200)
21 | 	obj <- NormalizeData(obj)
22 | 	obj <- FindVariableGenes(obj)
23 | 	obj <- ScaleData(obj, vars.to.regress = c("nUMI"))
24 | 	obj <- RunPCA(obj, do.print = FALSE)
25 | 	obj <- RunTSNE(obj, dims.use = 1:10)
26 | 	obj <- FindClusters(obj, dims.use = 1:10, resolution = 0.6)
27 | 	markers <- FindAllMarkers(obj, only.pos = TRUE)
28 | 	write.csv(markers, file = paste0(output, '.markers.csv'))
29 | 	return(obj)
30 | }
31 | 
32 | #' make_comb_obj
33 | #'
34 | #' A warper of Seurat function to generate Seurat object for combined sample from single sample.
35 | #' @import Seurat
36 | #' @param data_dirList
37 | #' A list of directory with 10X genomics single cell data or full path of expression table.
38 | #' @param is.10X
39 | #' The input data is 10X genomics format or not. DEFAULT is TRUE.
40 | #' @param comb_delim
41 | #' The delimiter used in the cell names in the combined object to connect sample name and cell name in individual sample. DEFAULT is '-'.
42 | #' @return A Seurat object.
43 | #' @export
44 | 
45 | make_comb_obj <- function(data_dirList, is.10X = TRUE, comb_delim = '-')
46 | {
47 | 
48 | 	## rename cell names in each dataset and then combine
49 | 	n <- names(data_dirList)
50 | 	if (is.null(n)) {
51 | 		names(data_dirList) <- paste0('s', 1:length(data_dirList))
52 | 		warning("The names(data_dirList) is NULL. Sample names are assigned as '", paste(names(data_dirList), collapse = ' '), "'")
53 | 		}
54 | 	da_list <- lapply(n, function(x){
55 | 		if (is.10X) da <- Read10X(data_dirList[[x]]) else da <- read.table(data_dirList[[x]], sep = "\t")
56 | 		colnames(da) <- paste0(x, comb_delim, colnames(da))
57 | 		return(da)
58 | 		})
59 | 	comb_da <- do.call(cbind, da_list)
60 | 
61 | 	obj <- CreateSeuratObject(raw.data = comb_da, min.cells = 3, min.genes = 200)
62 | 	obj <- NormalizeData(obj)
63 | 	obj <- FindVariableGenes(obj)
64 | 	obj <- ScaleData(obj, vars.to.regress = c("nUMI"))
65 | 	obj <- RunPCA(obj, do.print = FALSE)
66 | 	obj <- RunTSNE(obj, dims.use = 1:10)
67 | 	return(obj)
68 | }
69 | 


--------------------------------------------------------------------------------
/R/recolor.R:
--------------------------------------------------------------------------------
  1 | ####################### Recolor #######################
  2 | 
  3 | #' recolor_s
  4 | #'
  5 | #' Recolor a single sample based on the matching results from cluster_map_by_marker.
  6 | #'
  7 | #' @import Seurat
  8 | #'
  9 | #' @param mapRes_sub
 10 | #' A vector of the column named by the sample in the output of cluster_map_by_marker, with the regroup column as the vector name.
 11 | #' @param obj
 12 | #' A Seurat object for the sample.
 13 | #' @param output
 14 | #' The output directory to save the plot.
 15 | #' @param color
 16 | #' A vector of colors used to recolor the new groups. DEFAULT is NULL. Pre defined internal color will be used.
 17 | #' @param reduction
 18 | #' Select the reduction of "tsne", "umap", or "pca" that used for the recolor image.
 19 | #' @return A vector of new group labels with the cell name as the vector name.
 20 | #' @export
 21 | 
 22 | 
 23 | recolor_s <- function(mapRes_sub, obj, output, color = NULL, reduction="tsne")
 24 | {
 25 | 	## recolor_s will call function gg_colr_hue.
 26 | 	message(paste0("recolor ", output))
 27 | 
 28 | 	if (is.null(names(mapRes_sub))) stop("There is no name of mapRes_sub.")
 29 |     l <- lapply(strsplit(mapRes_sub, ';'), sub, pattern = '.*_', replacement = '')
 30 |     new_match <- setNames(unlist(l, use.names = F), rep(names(l), lengths(l)))
 31 |     if(obj@version > 3){
 32 | 	print("Using Seurat v3")
 33 |         new_group <- Idents(object = obj)
 34 |         levels(new_group) <- names(new_match)[match(levels(Idents(object=obj)), new_match)]
 35 |     }
 36 |     else{
 37 |         new_group <- obj@ident
 38 |         levels(new_group) <- names(new_match)[match(levels(obj@ident), new_match)]
 39 |     }
 40 |     new_group <- factor(new_group, levels = names(mapRes_sub))
 41 | 	## t-SNE plot
 42 |     obj@meta.data$regroup <- new_group
 43 | 	
 44 | 	if(obj@version < 3){	
 45 | 	if (is.null(color)) color <- gg_color_hue(length(levels(new_group)))
 46 |     png(paste0(output, '.recolor.tsne.png'))
 47 | 		TSNEPlot(obj, do.label = T, label.size = 8, group.by = 'regroup',
 48 | 			colors.use = color[sort(as.numeric(unique(new_group)))], plot.title = toupper(output))
 49 |     dev.off()
 50 | 	pdf(paste0(output, '.recolor.tsne.pdf'))
 51 | 		TSNEPlot(obj, do.label = T, label.size = 8, group.by = 'regroup',
 52 | 			colors.use = color[sort(as.numeric(unique(new_group)))], plot.title = toupper(output))
 53 |     dev.off()
 54 |     return(new_group)
 55 | }
 56 | 
 57 |    else{	
 58 | 	   print("Using Seurat v3")
 59 | 	   if (is.null(color)) color <- gg_color_hue(length(levels(new_group)))
 60 | 
 61 | 	p3 <- DimPlot(obj, label = T, label.size = 8, group.by = 'regroup',
 62 | 			reduction = reduction,
 63 | 			cols = color[sort(as.numeric(unique(new_group)))])
 64 |     		ggtitle(paste(toupper(output))) 
 65 | 		ggsave(plot = p3, filename = paste0(output, '.recolor.', reduction, '.png'))
 66 | 		ggsave(plot = p3, filename = paste0(output, '.recolor.', reduction, '.pdf'))
 67 | 
 68 |     return(new_group)
 69 | 	}	   
 70 | }
 71 | #' recolor_comb
 72 | #'
 73 | #' Recolor the combined sample based on the matching results from recolor_s.
 74 | #'
 75 | #' @import Seurat
 76 | #'
 77 | #' @param comb_obj
 78 | #' A Seurat object for the combined sample. Cells in different samples are labelled by the sample names with the comb_delim.
 79 | #' @param new_group_list
 80 | #' A list of vectors of new group assignment outputted from recolor_s.
 81 | #' @param output
 82 | #' The output directory to save the plot.
 83 | #' @param comb_delim
 84 | #' The delimiter used in the cell names in the combined object to connect sample name and cell name in individual sample. DEFAULT is '-'.
 85 | #' @param color
 86 | #' A vector of colors used to recolor the new groups. DEFAULT is NULL. Pre defined internal color will be used.
 87 | #' @param reduction
 88 | #' Select the reduction of "tsne", "umap", or "pca" that used for the recolor image.
 89 | #' @return A vector of new group labels with the cell name as the vector name.
 90 | #' @export
 91 | 
 92 | 
 93 | recolor_comb <- function(comb_obj, new_group_list, output, comb_delim = '-', color = NULL, reduction="tsne")
 94 | {
 95 | 	## Change comb_delim if v3 Seurat
 96 | 	if(comb_obj@version > 3){
 97 | 		comb_delim = '_'
 98 | 		print("Changed comb_delim to '_'")
 99 | 	}
100 | 
101 | 	## recolor_comb will call function gg_color_hue.
102 | 	message(paste0("recolor ", output))
103 | 
104 |     sample_label <- as.factor(sub(paste0(comb_delim, '.*'), '', colnames(GetAssayData(object = comb_obj))))
105 | 	message("levels(sample_label):")
106 | 	print(levels(sample_label))
107 | 	message("names(new_group_list):")
108 | 	print(names(new_group_list))
109 |     if (all(levels(sample_label) == names(new_group_list)) == FALSE)
110 | 		stop("Sample label in comb_obj doesn't match names(new_group_list) or names(single_obj_list).")
111 | 	names(sample_label) <- colnames(GetAssayData(object = comb_obj))
112 | 	## color by samples
113 | 	comb_obj$samples <- sample_label
114 | 
115 | 	if (comb_obj@version <3) {
116 | 		print("Seurat v2")
117 | 	png(paste0(output, '.color.by.sample.tsne.png'))
118 | 		TSNEPlot(comb_obj, do.label = F, label.size = 8, group.by = 'samples', plot.title = 'Colored by sample')
119 |     dev.off()
120 | 	pdf(paste0(output, '.color.by.sample.tsne.pdf'))
121 | 		TSNEPlot(comb_obj, do.label = F, label.size = 8, group.by = 'samples', plot.title = 'Colored by sample')
122 |     dev.off()
123 | 	## assign new group
124 |     new_group <- unlist(new_group_list)
125 | 	names(new_group) <- sub('\\.', comb_delim, names(new_group))
126 |     new_group <- factor(new_group, levels = levels(new_group_list[[1]]))
127 |     new_group <- new_group[match(colnames(GetAssayData(object = comb_obj)), as.vector(names(new_group)))] ## some cells may be filtered out in combined sample.
128 | 	if (is.na(new_group[1]))
129 | 		stop("Cell names in comb_obj don't match cell names in new_group_list or single_obj_list. Cell names in comb_obj should be sample name and cell name in individual sample connected by comb_delim.")
130 | 	names(new_group) <- colnames(GetAssayData(object = comb_obj))
131 | 	## color by new group
132 |     comb_obj <- AddMetaData(object = comb_obj, metadata = new_group, col.name = "regroup")
133 | 	if (is.null(color)) color  <-  gg_color_hue(length(levels(new_group)))
134 |     png(paste0(output, '.recolor.tsne.png'))
135 | 		TSNEPlot(comb_obj, do.label = T, label.size = 8, group.by = 'regroup',
136 | 			colors.use = color[sort(as.numeric(unique(new_group)))], plot.title = 'Combined')
137 |     dev.off()
138 |     pdf(paste0(output, '.recolor.tsne.pdf'))
139 | 		TSNEPlot(comb_obj, do.label = T, label.size = 8, group.by = 'regroup',
140 | 			colors.use = color[sort(as.numeric(unique(new_group)))], plot.title = 'Combined')
141 |     dev.off()
142 |     return(new_group)
143 | 		}
144 | else if(comb_obj@version > 3){
145 | 	print("Seurat v3 comb_obj")
146 | 		p5 <- DimPlot(comb_obj, label = F, label.size = 8, group.by = 'samples', 
147 | 			reduction = reduction) + 
148 | 			ggtitle('Colored by sample') 
149 |     			ggsave(plot = p5, filename = paste0(output, '.color.by.sample.', reduction, '.png'))
150 | 		p4 <- DimPlot(comb_obj, label = F, label.size = 8, group.by = 'samples',
151 | 			reduction = reduction) +
152 | 			ggtitle('Colored by sample')  
153 |     			ggsave(plot = p4, filename = paste0(output, '.color.by.sample.', reduction, '.pdf'))
154 | 	## assign new group
155 |     new_group <- unlist(new_group_list)
156 | 	names(new_group) <- sub('\\.', comb_delim, names(new_group))
157 |     new_group <- factor(new_group, levels = levels(new_group_list[[1]]))
158 |     new_group <- new_group[match(colnames(GetAssayData(object = comb_obj)), 
159 | 				 as.vector(names(new_group)))] ## some cells may be filtered out in combined sample.
160 | 	if (is.na(new_group[1]))
161 | 		stop("Cell names in comb_obj don't match cell names in new_group_list or single_obj_list. Cell names in comb_obj should be sample name and cell name in individual sample connected by comb_delim.")
162 | 	names(new_group) <- colnames(GetAssayData(object = comb_obj))
163 | 	## color by new group
164 |     comb_obj <- AddMetaData(object = comb_obj, metadata = new_group, col.name = "regroup")
165 | 	if (is.null(color)) color  <-  gg_color_hue(length(levels(new_group)))
166 | 		plot1 <- DimPlot(comb_obj, label = T, label.size = 8, 
167 | 			reduction = reduction, group.by = 'regroup',
168 | 			cols = color[sort(as.numeric(unique(new_group)))]) +
169 | 			ggtitle('Combined') 
170 |     			ggsave(plot = plot1, filename = paste0(output, '.recolor.', reduction, '.png'))
171 | 		plot2 <- DimPlot(comb_obj, label = T, label.size = 8,
172 | 			reduction = reduction, group.by = 'regroup',
173 | 			cols = color[sort(as.numeric(unique(new_group)))]) + 
174 | 			ggtitle('Combined') 
175 |     			ggsave(plot = plot2, filename = paste0(output, '.recolor.', reduction, '.pdf'))
176 |     return(new_group)
177 | }
178 | 		
179 | }
180 | 
181 | 


--------------------------------------------------------------------------------
/R/separability.R:
--------------------------------------------------------------------------------
  1 | ##################### Separability ####################
  2 | 
  3 | #' separability_pairwise
  4 | #'
  5 | #' Calculate separability for every sample pair. The higher the more separable.
  6 | #'
  7 | #' @param tsne_coord
  8 | #' A dataframe of the two dimension t-SNE coordinates of each cell in the combined sample.
  9 | #' @param group
 10 | #' A vector of group assignment for each cell, with the same order as the tsne_coord.
 11 | #' @param sample_label
 12 | #' A vector of sample labels for each cell, with the same order as the tsne_coord.
 13 | #' @param k
 14 | #' K-nearest neighbours used to calculate distance. DEFAULT is 5.
 15 | #' @return A matrix of separability for each sample pair (column) and each group (row).
 16 | #' @export
 17 | 
 18 | 
 19 | separability_pairwise <- function(tsne_coord, group, sample_label, k = 5)
 20 | {## separability_pairwise will call function separability_by_group.
 21 | 	message("calculate separability for each sample pair")
 22 | 	sample_pair <- combn(levels(sample_label), 2)
 23 | 	colnames(sample_pair) <- paste0(sample_pair[1, ], '.vs.', sample_pair[2, ])
 24 | 	res <- apply(sample_pair, 2, function(x){
 25 | 		print(x)
 26 | 		ind <- sample_label %in% x
 27 | 		tsne_coord_sub <- tsne_coord[ind, ]
 28 | 		group_sub <- group[ind]
 29 | 		sample_label_sub <- sample_label[ind]
 30 | 		sepa <- separability_by_group(tsne_coord_sub, group_sub, sample_label_sub, k = k)
 31 | 		return(sepa)
 32 | 	})
 33 | 	return(res)
 34 | }
 35 | 
 36 | #' separability_by_group
 37 | #'
 38 | #' Calculate separability for each group in a pair of samples. Internal function called by separability_pairwise.
 39 | #'
 40 | #' @param tsne_coord
 41 | #' A dataframe of the two dimension t-SNE coordinates of each cell in the combined sample.
 42 | #' @param group
 43 | #' A vector of group assignment for each cell, with the same order as the tsne_coord.
 44 | #' @param sample_label
 45 | #' A vector of sample labels for each cell, with the same order as the tsne_coord.
 46 | #' @param k
 47 | #' K-nearest neighbours used to calculate distance. DEFAULT is 5.
 48 | #' @return A vector of separability for each group.
 49 | #' @export
 50 | 
 51 | separability_by_group <- function(tsne_coord, group, sample_label, k = 5)
 52 | { ## separability_by_group will call function separability.
 53 |     p <- sapply(levels(group), function(i){
 54 |         m <- tsne_coord[group == i, ]
 55 | 		if (nrow(m) == 0) avg_diff <- NA else
 56 | 		{
 57 | 			class_label <- sample_label[group == i]
 58 | 			avg_diff <- separability(m, class_label, k = k)
 59 | 		}
 60 |     })
 61 |     p <- p/(max(tsne_coord[, 1]) - min(tsne_coord[, 1]))*100
 62 |     round(p, 2)
 63 | }
 64 | 
 65 | #' separability
 66 | #'
 67 | #' Calculate separability for labeled data. Internal function called by separability_by_group.
 68 | #'
 69 | #' @param coord
 70 | #' A dataframe of the two dimension t-SNE coordinates of each cell in the combined sample.
 71 | #' @param class_label
 72 | #' A vector of sample labels for each cell, with the same order as the tsne_coord.
 73 | #' @param k
 74 | #' K-nearest neighbours used to calculate distance. DEFAULT is 5.
 75 | #' @return A single value of separability.
 76 | #' @export
 77 | 
 78 | separability <- function(coord, class_label, k = 5)
 79 | { ## separability will call function inter_dist and inna_dist.
 80 |     if(length(unique(class_label)) > 1)
 81 |     {
 82 |         d <- dist(coord)
 83 |         dls <- split(as.data.frame(as.matrix(d)), class_label)
 84 |         sample1_dls <- split(as.data.frame(t(dls[[1]])), class_label)
 85 |         d1 <- sample1_dls[[1]] ## distance matrix between cells in sample1
 86 |         d21 <- sample1_dls[[2]] ## distance matrix from cells in sample2 to cells in sample1
 87 |         sample2_dls <- split(as.data.frame(t(dls[[2]])), class_label)
 88 |         d2 <- sample2_dls[[2]] ## distance matrix between cells in sample2
 89 |         d12 <- sample2_dls[[1]] ## distance matrix from cells in sample1 to cells in sample2
 90 |         diff1 <- inter_dist(d12, k)-inna_dist(d1, k)
 91 |         diff2 <- inter_dist(d21, k)-inna_dist(d2, k)
 92 |         avg_diff <- mean(c(diff1, diff2)) ## average two samples
 93 |     } else avg_diff <- Inf
 94 |     return(avg_diff)
 95 | }
 96 | 
 97 | #' inter_dist
 98 | #'
 99 | #' Calculate the inter-sample distance. Internal function called by separability.
100 | #'
101 | #' @param x
102 | #' A distance matrix of cells in one sample to the cells in another sample.
103 | #' @param k
104 | #' K-nearest neighbours used to calculate distance. DEFAULT is 5.
105 | #' @return A single value of distance.
106 | 
107 | inter_dist <- function(x, k)
108 | {
109 |     ## knn_mean <- function(x, k) {mean(sort(x)[1:k])}
110 |     ## mean(apply(x, 1, knn_mean, k))
111 |     knn_median <- function(x, k){ median(sort(x)[1:k]) } ## take median of K distance of a cell
112 |     median(apply(x, 1, knn_median, k)) ## take median of distance of all cells
113 | }
114 | 
115 | #' inna_dist
116 | #'
117 | #' Calculate the inna-sample distance. Internal function called by separability.
118 | #'
119 | #' @param x
120 | #' A distance matrix of cells in one sample to the cells in the same sample.
121 | #' @param k
122 | #' K-nearest neighbours used to calculate distance.
123 | #' @return A single value of distance.
124 | 
125 | inna_dist <- function(x, k)
126 | {
127 |     ## knn_mean <- function(x, k) {mean(sort(x)[2:k+1])}
128 |     ## mean(apply(x, 1, knn_mean, k))
129 |     knn_median <- function(x, k){ median(sort(x)[2:k+1]) } ## take median of K distance of a cell
130 |     median(apply(x, 1, knn_median, k)) ## take median of distance of all cells
131 | }
132 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ClusterMap
 2 | 
 3 | ClusterMap is an R package designed to analyze and compare two or more single cell expression datasets. 
 4 | 
 5 | Please cite:
 6 | Gao X, Hu D, Gogol M, Li H. ClusterMap: Compare multiple Single Cell RNA-Seq datasets across different experimental conditions. Bioinformatics. 2019. doi: 10.1093/bioinformatics/btz024.
 7 | 
 8 | ## Installation
 9 | R(>=3.4.3), Seurat(>= 2.2.1),pheatmap(>= 1.0.10),ape(>= 5.1),circlize(>= 0.4.3)
10 | ```r
11 | install_github('devtools')  
12 | library('devtools')  
13 | install_github("xgaoo/ClusterMap")
14 | library('ClusterMap')  
15 | ```
16 | 
17 | ## Tutorial
18 | 
19 | https://xgaoo.github.io/ClusterMap/ClusterMap.html
20 | 


--------------------------------------------------------------------------------
/man/add_perc.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/circos_map.R
 3 | \name{add_perc}
 4 | \alias{add_perc}
 5 | \title{circos_map}
 6 | \usage{
 7 | add_perc(mapRes, cell_num_list)
 8 | }
 9 | \arguments{
10 | \item{mapRes}{A dataframe of the output of function cluster_map_by_marker.}
11 | 
12 | \item{cell_num_list}{A list of vector of cell numbers for each group and each sample.}
13 | }
14 | \value{
15 | A dataframe of the matching results with cell percentage column added.
16 | }
17 | \description{
18 | Plot Circos plot for the matching results.
19 | }
20 | 


--------------------------------------------------------------------------------
/man/circos_map.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/circos_map.R
 3 | \name{circos_map}
 4 | \alias{circos_map}
 5 | \title{circos_map}
 6 | \usage{
 7 | circos_map(mapRes, cell_num_list, output, color_cord = NULL,
 8 |   color_sample = NULL)
 9 | }
10 | \arguments{
11 | \item{mapRes}{A dataframe of the output of function cluster_map_by_marker.}
12 | 
13 | \item{cell_num_list}{A list of vector of cell numbers for each group and each sample.}
14 | 
15 | \item{output}{The output directory to save the plot.}
16 | 
17 | \item{color_cord}{A vector of colors for the cord of circos plot. DEFAULT is NULL. Pre defined internal color will be used.}
18 | 
19 | \item{color_sample}{A vector of colors for the sample sectors in the circos plot. DEFAULT is NULL. Pre defined internal color will be used.}
20 | }
21 | \value{
22 | circos plot will be save.
23 | }
24 | \description{
25 | Plot Circos plot for the matching results.
26 | }
27 | 


--------------------------------------------------------------------------------
/man/cluster_map.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/master.R
 3 | \name{cluster_map}
 4 | \alias{cluster_map}
 5 | \title{cluster_map}
 6 | \usage{
 7 | cluster_map(marker_file_list, edge_cutoff = 0.1, output,
 8 |   cell_num_list = NULL, single_obj_list = NULL, comb_obj = NULL,
 9 |   comb_delim = "-", k = 5)
10 | }
11 | \arguments{
12 | \item{marker_file_list}{A list of csv files with names. Each file is a marker gene table for a sample. The columns named as 'cluster' and 'gene' are required.}
13 | 
14 | \item{edge_cutoff}{The edge length cutoff to decide the sub-nodes to merge or not. DEFAULT is 0.1.}
15 | 
16 | \item{output}{The output directory to save the matching results.}
17 | 
18 | \item{cell_num_list}{A list of vector of cell numbers for each group and each sample.}
19 | 
20 | \item{single_obj_list}{A list of Seurat object for each sample, with the same list names as the list names of marker_file_list.}
21 | 
22 | \item{comb_obj}{A Seurat object for the combined sample. Cells in different samples are labelled by the sample names with the comb_delim. The sample names should be the same as the list names of marker_file_list.}
23 | 
24 | \item{comb_delim}{The delimiter used in the cell names in the combined object to connect sample name and cell name in individual sample. DEFAULT is '-'.}
25 | 
26 | \item{k}{K-nearest neighbours used to calculate distance. DEFAULT is 5.}
27 | }
28 | \value{
29 | A dataframe of the matching results. Heatmap of marker genes, the corresponding dendrogram, circos plot and recolored t-SNE plots will be saved into files.
30 | }
31 | \description{
32 | A master function to perform the full workflow of ClusterMap.
33 | }
34 | 


--------------------------------------------------------------------------------
/man/cluster_map_by_marker.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/cluster_map.R
 3 | \name{cluster_map_by_marker}
 4 | \alias{cluster_map_by_marker}
 5 | \title{cluster_map_by_marker}
 6 | \usage{
 7 | cluster_map_by_marker(marker_file_list, cutoff = 0.1, output)
 8 | }
 9 | \arguments{
10 | \item{marker_file_list}{A list of csv files. Each file is a marker gene table for a sample. The columns named as 'cluster' and 'gene' are required.}
11 | 
12 | \item{cutoff}{The edge length cutoff to decide the sub-nodes to merge or not. DEFAULT is 0.1.}
13 | 
14 | \item{output}{The output directory to save the matching results.}
15 | }
16 | \value{
17 | A dataframe of the matching results. Heatmap of marker genes and the dendrogram will be saved into files.
18 | }
19 | \description{
20 | Match groups by marker genes and decompose into new groups by purity cut.
21 | }
22 | 


--------------------------------------------------------------------------------
/man/inna_dist.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/separability.R
 3 | \name{inna_dist}
 4 | \alias{inna_dist}
 5 | \title{inna_dist}
 6 | \usage{
 7 | inna_dist(x, k)
 8 | }
 9 | \arguments{
10 | \item{x}{A distance matrix of cells in one sample to the cells in the same sample.}
11 | 
12 | \item{k}{K-nearest neighbours used to calculate distance.}
13 | }
14 | \value{
15 | A single value of distance.
16 | }
17 | \description{
18 | Calculate the inna-sample distance. Internal function called by separability.
19 | }
20 | 


--------------------------------------------------------------------------------
/man/inter_dist.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/separability.R
 3 | \name{inter_dist}
 4 | \alias{inter_dist}
 5 | \title{inter_dist}
 6 | \usage{
 7 | inter_dist(x, k)
 8 | }
 9 | \arguments{
10 | \item{x}{A distance matrix of cells in one sample to the cells in another sample.}
11 | 
12 | \item{k}{K-nearest neighbours used to calculate distance. DEFAULT is 5.}
13 | }
14 | \value{
15 | A single value of distance.
16 | }
17 | \description{
18 | Calculate the inter-sample distance. Internal function called by separability.
19 | }
20 | 


--------------------------------------------------------------------------------
/man/make_comb_obj.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/pre_analysis.R
 3 | \name{make_comb_obj}
 4 | \alias{make_comb_obj}
 5 | \title{make_comb_obj}
 6 | \usage{
 7 | make_comb_obj(data_dirList, is.10X = TRUE, comb_delim = "-")
 8 | }
 9 | \arguments{
10 | \item{data_dirList}{A list of directory with 10X genomics single cell data or full path of expression table.}
11 | 
12 | \item{is.10X}{The input data is 10X genomics format or not. DEFAULT is TRUE.}
13 | 
14 | \item{comb_delim}{The delimiter used in the cell names in the combined object to connect sample name and cell name in individual sample. DEFAULT is '-'.}
15 | }
16 | \value{
17 | A Seurat object.
18 | }
19 | \description{
20 | A warper of Seurat function to generate Seurat object for combined sample from single sample.
21 | }
22 | 


--------------------------------------------------------------------------------
/man/make_single_obj.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/pre_analysis.R
 3 | \name{make_single_obj}
 4 | \alias{make_single_obj}
 5 | \title{make_single_obj}
 6 | \usage{
 7 | make_single_obj(data_dir, is.10X = TRUE, output)
 8 | }
 9 | \arguments{
10 | \item{data_dir}{Directory with 10X genomics single cell data or full path of expression table.}
11 | 
12 | \item{is.10X}{The input data is 10X genomics format or not. DEFAULT is TRUE.}
13 | 
14 | \item{output}{The output directory to save the marker genes.}
15 | }
16 | \value{
17 | A Seurat object.
18 | }
19 | \description{
20 | A warper of Seurat function to generate Seurat object and marker genes for single sample.
21 | }
22 | 


--------------------------------------------------------------------------------
/man/purity_cut.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/cluster_map.R
 3 | \name{purity_cut}
 4 | \alias{purity_cut}
 5 | \title{purity_cut}
 6 | \usage{
 7 | purity_cut(hcluster, cutoff = 0.1)
 8 | }
 9 | \arguments{
10 | \item{hcluster}{A hclust object.}
11 | 
12 | \item{cutoff}{The edge length cutoff to decide the sub-nodes to merge or not. DEFAULT is 0.1.}
13 | }
14 | \value{
15 | A dataframe of the matching results.
16 | }
17 | \description{
18 | Cut hierarchical clustering dendrogram by edge length and purity of the nodes.
19 | }
20 | 


--------------------------------------------------------------------------------
/man/recolor_comb.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/recolor.R
 3 | \name{recolor_comb}
 4 | \alias{recolor_comb}
 5 | \title{recolor_comb}
 6 | \usage{
 7 | recolor_comb(comb_obj, new_group_list, output, comb_delim = "-",
 8 |   color = NULL)
 9 | }
10 | \arguments{
11 | \item{comb_obj}{A Seurat object for the combined sample. Cells in different samples are labelled by the sample names with the comb_delim.}
12 | 
13 | \item{new_group_list}{A list of vectors of new group assignment outputted from recolor_s.}
14 | 
15 | \item{output}{The output directory to save the plot.}
16 | 
17 | \item{comb_delim}{The delimiter used in the cell names in the combined object to connect sample name and cell name in individual sample. DEFAULT is '-'.}
18 | 
19 | \item{color}{A vector of colors used to recolor the new groups. DEFAULT is NULL. Pre defined internal color will be used.}
20 | }
21 | \value{
22 | A vector of new group labels with the cell name as the vector name.
23 | }
24 | \description{
25 | Recolor the combined sample based on the matching results from recolor_s.
26 | }
27 | 


--------------------------------------------------------------------------------
/man/recolor_s.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/recolor.R
 3 | \name{recolor_s}
 4 | \alias{recolor_s}
 5 | \title{recolor_s}
 6 | \usage{
 7 | recolor_s(mapRes_sub, obj, output, color = NULL)
 8 | }
 9 | \arguments{
10 | \item{mapRes_sub}{A vector of the column named by the sample in the output of cluster_map_by_marker, with the regroup column as the vector name.}
11 | 
12 | \item{obj}{A Seurat object for the sample.}
13 | 
14 | \item{output}{The output directory to save the plot.}
15 | 
16 | \item{color}{A vector of colors used to recolor the new groups. DEFAULT is NULL. Pre defined internal color will be used.}
17 | }
18 | \value{
19 | A vector of new group labels with the cell name as the vector name.
20 | }
21 | \description{
22 | Recolor a single sample based on the matching results from cluster_map_by_marker.
23 | }
24 | 


--------------------------------------------------------------------------------
/man/separability.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/separability.R
 3 | \name{separability}
 4 | \alias{separability}
 5 | \title{separability}
 6 | \usage{
 7 | separability(coord, class_label, k = 5)
 8 | }
 9 | \arguments{
10 | \item{coord}{A dataframe of the two dimension t-SNE coordinates of each cell in the combined sample.}
11 | 
12 | \item{class_label}{A vector of sample labels for each cell, with the same order as the tsne_coord.}
13 | 
14 | \item{k}{K-nearest neighbours used to calculate distance. DEFAULT is 5.}
15 | }
16 | \value{
17 | A single value of separability.
18 | }
19 | \description{
20 | Calculate separability for labeled data. Internal function called by separability_by_group.
21 | }
22 | 


--------------------------------------------------------------------------------
/man/separability_by_group.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/separability.R
 3 | \name{separability_by_group}
 4 | \alias{separability_by_group}
 5 | \title{separability_by_group}
 6 | \usage{
 7 | separability_by_group(tsne_coord, group, sample_label, k = 5)
 8 | }
 9 | \arguments{
10 | \item{tsne_coord}{A dataframe of the two dimension t-SNE coordinates of each cell in the combined sample.}
11 | 
12 | \item{group}{A vector of group assignment for each cell, with the same order as the tsne_coord.}
13 | 
14 | \item{sample_label}{A vector of sample labels for each cell, with the same order as the tsne_coord.}
15 | 
16 | \item{k}{K-nearest neighbours used to calculate distance. DEFAULT is 5.}
17 | }
18 | \value{
19 | A vector of separability for each group.
20 | }
21 | \description{
22 | Calculate separability for each group in a pair of samples. Internal function called by separability_pairwise.
23 | }
24 | 


--------------------------------------------------------------------------------
/man/separability_pairwise.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/separability.R
 3 | \name{separability_pairwise}
 4 | \alias{separability_pairwise}
 5 | \title{separability_pairwise}
 6 | \usage{
 7 | separability_pairwise(tsne_coord, group, sample_label, k = 5)
 8 | }
 9 | \arguments{
10 | \item{tsne_coord}{A dataframe of the two dimension t-SNE coordinates of each cell in the combined sample.}
11 | 
12 | \item{group}{A vector of group assignment for each cell, with the same order as the tsne_coord.}
13 | 
14 | \item{sample_label}{A vector of sample labels for each cell, with the same order as the tsne_coord.}
15 | 
16 | \item{k}{K-nearest neighbours used to calculate distance. DEFAULT is 5.}
17 | }
18 | \value{
19 | A matrix of separability for each sample pair (column) and each group (row).
20 | }
21 | \description{
22 | Calculate separability for every sample pair. The higher the more separable.
23 | }
24 | 


--------------------------------------------------------------------------------