├── ClusterMap-manual.pdf ├── ClusterMap.Rproj ├── DESCRIPTION ├── LICENSE ├── NAMESPACE ├── R ├── circos_map.R ├── cluster_map.R ├── master.R ├── pre_analysis.R ├── recolor.R └── separability.R ├── README.md └── man ├── add_perc.Rd ├── circos_map.Rd ├── cluster_map.Rd ├── cluster_map_by_marker.Rd ├── inna_dist.Rd ├── inter_dist.Rd ├── make_comb_obj.Rd ├── make_single_obj.Rd ├── purity_cut.Rd ├── recolor_comb.Rd ├── recolor_s.Rd ├── separability.Rd ├── separability_by_group.Rd └── separability_pairwise.Rd /ClusterMap-manual.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xgaoo/ClusterMap/b968c6671d208ed94da1f3e28a6af5eddb7373e4/ClusterMap-manual.pdf -------------------------------------------------------------------------------- /ClusterMap.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: ClusterMap 2 | Type: Package 3 | Title: What the Package Does (Title Case) 4 | Version: 0.1.0 5 | Author: Xin Gao 6 | Maintainer: Xin Gao 7 | Description: ClusterMap is designed to analyze and compare two or more single cell expression datasets. 8 | License: MIT + file LICENSE 9 | Encoding: UTF-8 10 | LazyData: true 11 | RoxygenNote: 6.0.1 12 | Depends: R(>= 3.4.3) 13 | Imports: Seurat(>= 2.2.1),pheatmap(>= 1.0.10),ape(>= 5.1),circlize(>= 0.4.3) 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2018 2 | COPYRIGHT HOLDER: Xin Gao -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(add_perc) 4 | export(circos_map) 5 | export(cluster_map) 6 | export(cluster_map_by_marker) 7 | export(make_comb_obj) 8 | export(make_single_obj) 9 | export(purity_cut) 10 | export(recolor_comb) 11 | export(recolor_s) 12 | export(separability) 13 | export(separability_by_group) 14 | export(separability_pairwise) 15 | import(Seurat) 16 | import(ggplot2) 17 | import(ape) 18 | import(circlize) 19 | import(pheatmap) 20 | importFrom(grDevices,col2rgb) 21 | importFrom(grDevices,dev.off) 22 | importFrom(grDevices,hcl) 23 | importFrom(grDevices,pdf) 24 | importFrom(grDevices,png) 25 | importFrom(grDevices,rgb) 26 | importFrom(graphics,plot) 27 | importFrom(stats,dist) 28 | importFrom(stats,hclust) 29 | importFrom(stats,median) 30 | importFrom(stats,setNames) 31 | importFrom(utils,combn) 32 | importFrom(utils,read.csv) 33 | importFrom(utils,read.table) 34 | importFrom(utils,write.csv) 35 | -------------------------------------------------------------------------------- /R/circos_map.R: -------------------------------------------------------------------------------- 1 | ##################### Circos plot ##################### 2 | 3 | #' circos_map 4 | #' 5 | #' Plot Circos plot for the matching results. 6 | #' 7 | #' @import circlize 8 | #' @param mapRes 9 | #' A dataframe of the output of function cluster_map_by_marker. 10 | #' @param cell_num_list 11 | #' A list of vector of cell numbers for each group and each sample. 12 | #' @param output 13 | #' The output directory to save the plot. 14 | #' @param color_cord 15 | #' A vector of colors for the cord of circos plot. DEFAULT is NULL. Pre defined internal color will be used. 16 | #' @param color_sample 17 | #' A vector of colors for the sample sectors in the circos plot. DEFAULT is NULL. Pre defined internal color will be used. 18 | #' @param width 19 | #' The width of circos plot by inch. 20 | #' @param height 21 | #' The height of circos plot by inch. 22 | #' @return circos plot will be save. 23 | #' @export 24 | 25 | 26 | circos_map <- function(mapRes, cell_num_list, output, color_cord = NULL, color_sample = NULL, width=7, height=7) 27 | { ## circos_map will call function plot_circos, gg_color_hue and makeTransparent. 28 | message("circos plot") 29 | 30 | sample_name <- names(cell_num_list) 31 | if (all(sample_name %in% colnames(mapRes)) == FALSE) 32 | stop("names(marker_file_list) or samples in mapRes doesn't match names(cell_num_list).") 33 | ## pairwise link 34 | combs <- combn(1:length(sample_name), 2) 35 | mapRes_samples <- mapRes[, sample_name] 36 | colnames(mapRes_samples) <- NA 37 | pair <- do.call(rbind, apply(combs, 2, function(x) mapRes_samples[, x])) 38 | pair <- cbind(pair, mapRes[, c('regroup', 'similarity')]) 39 | pair <- pair[pair[, 1] != '' & pair[, 2] != '' & !is.na(pair[, 1]) & !is.na(pair[, 2]), ] 40 | colnames(pair)[1:2] <- c('v1', 'v2') 41 | ## split merged clusters with ; 42 | pair_list <- lapply(1:nrow(pair), function(i) 43 | { 44 | y <- pair[i, ] 45 | v1 <- unlist(strsplit(as.vector(y$v1), ';')) 46 | v2 <- unlist(strsplit(as.vector(y$v2), ';')) 47 | v <- c() 48 | for (x in v1) v <- rbind(v, cbind(x, v2)) 49 | v <- as.data.frame(v, stringsAsFactors = FALSE) 50 | v$similarity <- y$similarity 51 | v$regroup <- y$regroup 52 | return(v) 53 | }) 54 | pair <- do.call(rbind, pair_list) 55 | colnames(pair)[1:2] <- c('v1', 'v2') 56 | pair$v1 <- as.vector(pair$v1) 57 | pair$v2 <- as.vector(pair$v2) 58 | ## cell number percentage 59 | cell_perc_list <- lapply(cell_num_list, function(x) round(x/sum(x), 2)) 60 | if (is.null(color_sample)) col_sample <- rep(c("#f9865c", "#84e281", "#74d2f7", "#e083fc", "#ffbf66", "#6682ff"), 10) else col_sample <- color_sample 61 | if (is.null(color_cord)) col_cord <- gg_color_hue(nrow(mapRes)) else col_cord <- color_cord 62 | ## circos plot 63 | png(paste0(output, '.circos.png'), width=width/7*480, height=height/7*480) 64 | plot_circos(cell_perc_list, pair, mapRes, col_cord, col_sample) 65 | dev.off() 66 | pdf(paste0(output, '.circos.pdf'), width=width, height=height) 67 | plot_circos(cell_perc_list, pair, mapRes, col_cord, col_sample) 68 | dev.off() 69 | } 70 | 71 | 72 | plot_circos <- function(cell_perc_list, pair, mapRes, col_cord, col_sample) 73 | { 74 | temp=cell_perc_list 75 | names(temp)=paste0(names(temp),"__") 76 | cell_perc <- unlist(temp) 77 | names(cell_perc) <- sub('__.', '__', names(cell_perc)) 78 | cell_perc[cell_perc < 0.01] <- 0.01 ## too small to plot 79 | fa <- factor(names(cell_perc), levels = unique(names(cell_perc))) 80 | ## initialize 81 | gaps <- lapply(cell_perc_list, function(x) c(rep(1, length(x)-1), 8)) 82 | circos.par(gap.after = unlist(gaps), start.degree = -3, cell.padding = c(0, 0, 0, 0)) 83 | circos.initialize(fa, xlim = cbind(rep(0, length(cell_perc)), cell_perc)) 84 | ## plot sample sectors 85 | circos.track(ylim = c(0, 1), track.height = uh(5, "mm"), bg.border = NA) 86 | for (n in names(cell_perc_list)) highlight.sector(paste0(n, "__", names(cell_perc_list[[n]])), track.index = 1, 87 | col = col_sample[match(n, names(cell_perc_list))], padding = c(0, 0, 0.3, 0), text = n, cex = 1.5, text.col = "black", niceFacing = TRUE) 88 | ## plot sub-group sectors 89 | circos.track(fa, ylim = c(0, 1), panel.fun = function(x, y) 90 | { 91 | circos.text(CELL_META$xcenter, CELL_META$ylim[1], sub('.*__', '', CELL_META$sector.index), 92 | adj = c(0.3, -2), niceFacing = TRUE) 93 | }, bg.col = 'black', bg.border = NA, track.height = 0.05, track.margin = c(0, 0.1) 94 | ) 95 | ## plot cord 96 | for(i in 1:nrow(pair)) 97 | { 98 | x <- pair[i, ] 99 | col <- makeTransparent(col_cord[x$regroup], round(x$similarity*100)) 100 | circos.link(x$v1, c(0, cell_perc[x$v1]), x$v2, c(0, cell_perc[x$v2]), col = col, border = NA, h.ratio = 0.5) 101 | } 102 | circos.clear() 103 | } 104 | 105 | 106 | gg_color_hue <- function(n) 107 | { 108 | hues = seq(15, 375, length = n + 1) 109 | hcl(h = hues, l = 65, c = 100)[1:n] 110 | } 111 | 112 | 113 | makeTransparent <- function(someColor, alpha = 100) 114 | { 115 | col <- col2rgb(someColor) 116 | rgb(red = col[1], green = col[2], blue = col[3], alpha = alpha, maxColorValue = 255) 117 | } 118 | 119 | ########## Add cell percentage to result table ######## 120 | 121 | #' circos_map 122 | #' 123 | #' Plot Circos plot for the matching results. 124 | #' 125 | #' @import circlize 126 | #' @param mapRes 127 | #' A dataframe of the output of function cluster_map_by_marker. 128 | #' @param cell_num_list 129 | #' A list of vector of cell numbers for each group and each sample. 130 | #' @return A dataframe of the matching results with cell percentage column added. 131 | #' @export 132 | 133 | add_perc <- function(mapRes, cell_num_list) 134 | { 135 | sample_name <- names(cell_num_list) 136 | if (all(sample_name %in% colnames(mapRes)) == FALSE) 137 | stop("names(marker_file_list) or samples in mapRes doesn't match names(cell_num_list).") 138 | cell_perc_list <- lapply(cell_num_list, function(x) round(x/sum(x), 2)) 139 | temp=cell_perc_list 140 | names(temp)=paste0(names(temp),"__") 141 | cell_perc <- unlist(temp) 142 | names(cell_perc) <- sub('__.', '__', names(cell_perc)) 143 | ## add to mapRes 144 | res_sub <- mapRes[, sample_name] 145 | res_perc <- apply(res_sub, 1:2, function(x) 146 | { 147 | paste(cell_perc[unlist(strsplit(x, ';'))], collapse = ';') 148 | }) 149 | colnames(res_perc) <- paste0(colnames(res_perc), '_cell_perc') 150 | res <- cbind(mapRes, res_perc) 151 | return(res) 152 | } 153 | -------------------------------------------------------------------------------- /R/cluster_map.R: -------------------------------------------------------------------------------- 1 | ################# Clustering and tree cut ############# 2 | 3 | #' cluster_map_by_marker 4 | #' 5 | #' Match groups by marker genes and decompose into new groups by purity cut. 6 | #' 7 | #' @import pheatmap 8 | #' @import ape 9 | #' @param marker_file_list 10 | #' A list of csv files. Each file is a marker gene table for a sample. The columns named as 'cluster' and 'gene' are required. 11 | #' @param cutoff 12 | #' The edge length cutoff to decide the sub-nodes to merge or not. DEFAULT is 0.1. 13 | #' @param output 14 | #' The output directory to save the matching results. 15 | #' @return A dataframe of the matching results. Heatmap of marker genes and the dendrogram will be saved into files. 16 | #' @export 17 | 18 | cluster_map_by_marker <- function(marker_file_list, cutoff = 0.1, output) 19 | { ## cluster_map_by_marker will call function purity_cut. 20 | message("match sub-clusters") 21 | ## get marker table 22 | if (is.null(names(marker_file_list))) 23 | { 24 | names(marker_file_list) <- paste0('s', 1:length(marker_file_list)) 25 | warning("The names(marker_file_list) is empty. Sample names are assigned as '", paste(names(marker_file_list), collapse = ' '), "'" ) 26 | } 27 | markerList <- lapply(marker_file_list, read.csv, as.is = T) 28 | markerList <- lapply(names(markerList), function(n) 29 | { 30 | x <- markerList[[n]] 31 | x$cluster <- paste0(n, '__', x$cluster) 32 | return(x) 33 | }) 34 | markers <- do.call(rbind, markerList) 35 | ## clustering 36 | da <- table(markers[,c("cluster","gene")]) 37 | d <- dist(da, method = 'binary') 38 | hc <- hclust(d, method = 'average') 39 | png(paste0(output, '.hcluster.png'), width = 480*length(marker_file_list)) ## save dendrogram png 40 | plot(hc) 41 | dev.off() 42 | pdf(paste0(output, '.hcluster.pdf'), width = 7*length(marker_file_list)) ## save dendrogram pdf 43 | plot(hc) 44 | dev.off() 45 | ## save heatmap png 46 | png(paste0(output, '.heatmap.hcluster.png'), height = 480*length(marker_file_list)) 47 | ph <- pheatmap(da, scale = 'none', clustering_method = 'average', color = c('#e3f8f9', '#fc2807'), 48 | show_rownames = T, show_colnames = F, clustering_distance_rows = 'binary', 49 | legend_breaks = c(0, 1), legend_labels = c(0, 1)) 50 | dev.off() 51 | ph <- pheatmap(da, scale = 'none', clustering_method = 'average', color = c('#e3f8f9', '#fc2807'), 52 | show_rownames = T, show_colnames = F, clustering_distance_rows = 'binary', 53 | legend_breaks = c(0, 1), legend_labels = c(0, 1), 54 | filename = paste0(output, '.heatmap.hcluster.pdf'), height = 7*length(marker_file_list)) 55 | ## tree cut 56 | res <- purity_cut(hc, cutoff) 57 | ##write.csv(res, file = paste0(output, '.cluster.map.csv')) 58 | return(res) 59 | } 60 | 61 | #' purity_cut 62 | #' 63 | #' Cut hierarchical clustering dendrogram by edge length and purity of the nodes. 64 | #' 65 | #' @import ape 66 | #' @param hcluster 67 | #' A hclust object. 68 | #' @param cutoff 69 | #' The edge length cutoff to decide the sub-nodes to merge or not. DEFAULT is 0.1. 70 | #' @return A dataframe of the matching results. 71 | #' @export 72 | 73 | purity_cut <- function(hcluster, cutoff = 0.1) 74 | { 75 | hcp <- as.phylo(hcluster) 76 | png('ape.tree.png') 77 | plot(hcp, edge.width = 2, label.offset = 0.1) 78 | nodelabels() 79 | tiplabels() 80 | dev.off() 81 | ## get edge info 82 | tree <- cbind(hcp$edge, round(hcp$edge.length, 3)) 83 | colnames(tree) <- c('high_node', 'low_node', 'edge_length') 84 | tree <- as.data.frame(tree, as.is = T) 85 | tree$too_long <- (tree$edge_length > cutoff/2) ## edge_length is height/2 86 | ## get all the offspring of each node 87 | offs_nodeList <- lapply(tree$low_node, function(x) { 88 | if (x <= length(hcp$tip.label)) return(hcp$tip.label[x]) else 89 | return(extract.clade(hcp, x)$tip.label)}) 90 | names(offs_nodeList) <- tree$low_node 91 | ## get sample list that the nodes belong to 92 | sampleList <- lapply(offs_nodeList, function(x) unique(sub('__.*$', '', x))) 93 | tree$low_node_offs_sample <- unlist(lapply(sampleList, length)) 94 | ## check if keep the node 95 | nodeList <- split(tree, tree$high_node) 96 | n_single <- unlist(lapply(nodeList, function(x) sum(x$low_node_offs_sample == 1))) 97 | n_too_long <- unlist(lapply(nodeList, function(x) sum(x$too_long))) 98 | no_include <- unlist(lapply(nodeList, function(x) 99 | { 100 | low_node <- as.character(x$low_node) 101 | sampleList_sub <- sampleList[low_node] 102 | include <- (all(sampleList_sub[[1]] %in% sampleList_sub[[2]]) | all(sampleList_sub[[2]] %in% sampleList_sub[[1]])) 103 | return(!include) 104 | })) 105 | keep <- (n_single == 2 | (n_single == 1 & n_too_long < 2)| (n_single == 0 & n_too_long == 0 & no_include)) 106 | names(keep) <- names(nodeList) 107 | ## check if any offspring node is cut 108 | node_merge_order <- rev(unique(tree$high_node)) 109 | for (nd in node_merge_order) 110 | { 111 | nd <- as.character(nd) 112 | offs_nodes <- nodeList[[nd]]$low_node 113 | keep_sub <- keep[as.character(offs_nodes)] 114 | keep_sub[is.na(keep_sub)] <- TRUE 115 | tmp <- (sum(keep_sub) == 2 & keep[nd]) 116 | keep[nd] <- tmp 117 | } 118 | if (all(keep == TRUE)) 119 | { 120 | message("No matched groups.") 121 | return(NULL) 122 | } 123 | 124 | ## get rid of lower duplicated nodes 125 | res <- offs_nodeList[names(keep)[keep == T]] 126 | for (x in rev(names(res))) 127 | { 128 | if (any(res[[x]] %in% unlist(res[names(res) != x]))) res = res[names(res) != x] 129 | } 130 | ## get singletons 131 | singles <- setdiff(hcp$tip.label, unlist(res)) 132 | res <- c(res, as.list(singles)) 133 | ## reform output 134 | lev <- sort(unique(sub('__.*$', '', hcp$tip.label))) 135 | res_reform <- do.call(rbind, lapply(res, function(x) 136 | { 137 | tmp <- split(x, f = factor(sub('__.*$', '', x), levels = lev)) 138 | unlist(lapply(tmp, paste, collapse = ';')) 139 | })) 140 | rownames(res_reform) <- 1:nrow(res_reform) 141 | res_reform[res_reform == ''] <- NA 142 | res_reform <- as.data.frame(res_reform, stringsAsFactors = FALSE) 143 | ## add similarity 144 | res_reform$similarity <- round(1 - branching.times(hcp)[names(res)]*2, 2) 145 | ## add group 146 | res_reform <- res_reform[order(-res_reform$similarity), ] 147 | res_reform$regroup <- 1:nrow(res_reform) 148 | rownames(res_reform) <- res_reform$regroup 149 | return(res_reform) 150 | } 151 | -------------------------------------------------------------------------------- /R/master.R: -------------------------------------------------------------------------------- 1 | ####################################################### 2 | #################### ClusterMap ##################### 3 | ####################################################### 4 | 5 | #################### Master function ################## 6 | 7 | #' cluster_map 8 | #' 9 | #' A master function to perform the full workflow of ClusterMap. 10 | #' 11 | #' @import ggplot2 12 | #' @import pheatmap 13 | #' @import ape 14 | #' @import Seurat 15 | #' @import circlize 16 | #' 17 | #' @importFrom grDevices col2rgb dev.off hcl dev.off pdf png rgb 18 | #' @importFrom graphics plot 19 | #' @importFrom stats dist hclust median setNames 20 | #' @importFrom utils combn read.csv read.table write.csv 21 | #' 22 | #' @param marker_file_list 23 | #' A list of csv files with names. Each file is a marker gene table for a sample. The columns named as 'cluster' and 'gene' are required. 24 | #' @param edge_cutoff 25 | #' The edge length cutoff to decide the sub-nodes to merge or not. DEFAULT is 0.1. 26 | #' @param output 27 | #' The output directory to save the matching results. 28 | #' @param cell_num_list 29 | #' A list of vector of cell numbers for each group and each sample. 30 | #' @param single_obj_list 31 | #' A list of Seurat object for each sample, with the same list names as the list names of marker_file_list. 32 | #' @param comb_obj 33 | #' A Seurat object for the combined sample. Cells in different samples are labelled by the sample names with the comb_delim. The sample names should be the same as the list names of marker_file_list. 34 | #' @param comb_delim 35 | #' The delimiter used in the cell names in the combined object to connect sample name and cell name in individual sample. DEFAULT is '-'. 36 | #' @param k 37 | #' K-nearest neighbours used to calculate distance. DEFAULT is 5. 38 | #' @param reduction 39 | #' Select the reduction of "tsne", "umap", or "pca" that used for the recolor image. 40 | #' @return A dataframe of the matching results. Heatmap of marker genes, the corresponding dendrogram, circos plot and recolored t-SNE plots will be saved into files. 41 | #' @export 42 | 43 | 44 | cluster_map <- function(marker_file_list, edge_cutoff = 0.1, output, cell_num_list = NULL, single_obj_list = NULL, comb_obj = NULL, comb_delim = '-', k = 5, seurat_version = 3, reduction="tsne") 45 | { 46 | circos.clear() 47 | ## Version check for comb delim 48 | if(seurat_version == 3){ 49 | comb_delim = '_' 50 | } 51 | ## match sub groups 52 | mapRes <- cluster_map_by_marker(marker_file_list, cutoff = edge_cutoff, output = output) 53 | 54 | ## pull out cell_num_list if single Seurat object list is provided. 55 | if (!is.null(single_obj_list)) 56 | { 57 | if (all(names(marker_file_list) == names(single_obj_list)) == FALSE | is.null(names(marker_file_list)) | is.null(names(single_obj_list))) 58 | stop("names(marker_file_list) doesn't match names(single_obj_list).") 59 | 60 | if (single_obj_list[[1]]@version > 3){ 61 | cell_num_list <- lapply(single_obj_list, 62 | function(obj){ 63 | summary(Idents(obj)) 64 | }) 65 | } 66 | 67 | else if(single_obj_list[[1]]@version < 3){ 68 | cell_num_list <- lapply(single_obj_list, function(obj){ 69 | summary(obj@ident)}) 70 | } 71 | } 72 | ## make circos plot and add cell percentage if cell_num_list is provided or single Seurat object list is provided. 73 | if (!is.null(cell_num_list)) 74 | { 75 | if (all(names(marker_file_list) == names(cell_num_list)) == FALSE | is.null(names(marker_file_list)) | is.null(names(cell_num_list))) 76 | stop("names(marker_file_list) doesn't match names(cell_num_list).") 77 | circos_map(mapRes, cell_num_list, output) 78 | mapRes <- add_perc(mapRes, cell_num_list) 79 | } 80 | 81 | ## Recolor reduction plot for each sample if single Seurat object list is provided. 82 | if (!is.null(single_obj_list)) 83 | { 84 | sample_names <- names(single_obj_list) 85 | new_group_list <- lapply(sample_names, function(n){ 86 | da <- structure(as.vector(mapRes[, n]), names = mapRes$regroup) 87 | recolor_s(da, single_obj_list[[n]], n, reduction=reduction) 88 | }) 89 | names(new_group_list) <- names(single_obj_list) 90 | 91 | ## Recolor reduction plot for combined sample and calculate separability if combined Seurat object is provided. 92 | if (!is.null(comb_obj)) 93 | { 94 | sample_label <- as.factor(sub(paste0(comb_delim, '.*'), '', rownames(comb_obj@meta.data))) 95 | if (all(levels(sample_label) == names(new_group_list)) == FALSE) 96 | {stop("Sample label in comb_obj doesn't match names(new_group_list) or names(single_obj_list).") 97 | message("Sample label in comb_obj: ") 98 | print(levels(sample_label)) 99 | message("names(single_obj_list): ") 100 | print(names(single_obj_list)) 101 | } 102 | 103 | new_group_list$comb <- recolor_comb(comb_obj, new_group_list, output, comb_delim) 104 | 105 | coord <- as.data.frame(comb_obj@reductions[[reduction]]@cell.embeddings) 106 | sepa <- separability_pairwise(coord, group = new_group_list$comb, sample_label, k = k) 107 | colnames(sepa) <- paste0(colnames(sepa), '_separability') 108 | 109 | mapRes <- cbind(mapRes, sepa) 110 | } 111 | saveRDS(new_group_list, file = paste0(output, '.new.group.list.RDS')) 112 | } 113 | write.csv(mapRes, file = paste0(output, '.results.csv')) 114 | return(mapRes) 115 | } 116 | -------------------------------------------------------------------------------- /R/pre_analysis.R: -------------------------------------------------------------------------------- 1 | ##################### Pre-analysis #################### 2 | 3 | #' make_single_obj 4 | #' 5 | #' A warper of Seurat function to generate Seurat object and marker genes for single sample. 6 | #' @import Seurat 7 | #' @param data_dir 8 | #' Directory with 10X genomics single cell data or full path of expression table. 9 | #' @param is.10X 10 | #' The input data is 10X genomics format or not. DEFAULT is TRUE. 11 | #' @param output 12 | #' The output directory to save the marker genes. 13 | #' @return A Seurat object. 14 | #' @export 15 | 16 | make_single_obj <- function(data_dir, is.10X = TRUE, output) 17 | { 18 | 19 | if (is.10X) da <- Read10X(data_dir) else da <- read.table(data_dir, sep = "\t") 20 | obj <- CreateSeuratObject(raw.data = da, min.cells = 3, min.genes = 200) 21 | obj <- NormalizeData(obj) 22 | obj <- FindVariableGenes(obj) 23 | obj <- ScaleData(obj, vars.to.regress = c("nUMI")) 24 | obj <- RunPCA(obj, do.print = FALSE) 25 | obj <- RunTSNE(obj, dims.use = 1:10) 26 | obj <- FindClusters(obj, dims.use = 1:10, resolution = 0.6) 27 | markers <- FindAllMarkers(obj, only.pos = TRUE) 28 | write.csv(markers, file = paste0(output, '.markers.csv')) 29 | return(obj) 30 | } 31 | 32 | #' make_comb_obj 33 | #' 34 | #' A warper of Seurat function to generate Seurat object for combined sample from single sample. 35 | #' @import Seurat 36 | #' @param data_dirList 37 | #' A list of directory with 10X genomics single cell data or full path of expression table. 38 | #' @param is.10X 39 | #' The input data is 10X genomics format or not. DEFAULT is TRUE. 40 | #' @param comb_delim 41 | #' The delimiter used in the cell names in the combined object to connect sample name and cell name in individual sample. DEFAULT is '-'. 42 | #' @return A Seurat object. 43 | #' @export 44 | 45 | make_comb_obj <- function(data_dirList, is.10X = TRUE, comb_delim = '-') 46 | { 47 | 48 | ## rename cell names in each dataset and then combine 49 | n <- names(data_dirList) 50 | if (is.null(n)) { 51 | names(data_dirList) <- paste0('s', 1:length(data_dirList)) 52 | warning("The names(data_dirList) is NULL. Sample names are assigned as '", paste(names(data_dirList), collapse = ' '), "'") 53 | } 54 | da_list <- lapply(n, function(x){ 55 | if (is.10X) da <- Read10X(data_dirList[[x]]) else da <- read.table(data_dirList[[x]], sep = "\t") 56 | colnames(da) <- paste0(x, comb_delim, colnames(da)) 57 | return(da) 58 | }) 59 | comb_da <- do.call(cbind, da_list) 60 | 61 | obj <- CreateSeuratObject(raw.data = comb_da, min.cells = 3, min.genes = 200) 62 | obj <- NormalizeData(obj) 63 | obj <- FindVariableGenes(obj) 64 | obj <- ScaleData(obj, vars.to.regress = c("nUMI")) 65 | obj <- RunPCA(obj, do.print = FALSE) 66 | obj <- RunTSNE(obj, dims.use = 1:10) 67 | return(obj) 68 | } 69 | -------------------------------------------------------------------------------- /R/recolor.R: -------------------------------------------------------------------------------- 1 | ####################### Recolor ####################### 2 | 3 | #' recolor_s 4 | #' 5 | #' Recolor a single sample based on the matching results from cluster_map_by_marker. 6 | #' 7 | #' @import Seurat 8 | #' 9 | #' @param mapRes_sub 10 | #' A vector of the column named by the sample in the output of cluster_map_by_marker, with the regroup column as the vector name. 11 | #' @param obj 12 | #' A Seurat object for the sample. 13 | #' @param output 14 | #' The output directory to save the plot. 15 | #' @param color 16 | #' A vector of colors used to recolor the new groups. DEFAULT is NULL. Pre defined internal color will be used. 17 | #' @param reduction 18 | #' Select the reduction of "tsne", "umap", or "pca" that used for the recolor image. 19 | #' @return A vector of new group labels with the cell name as the vector name. 20 | #' @export 21 | 22 | 23 | recolor_s <- function(mapRes_sub, obj, output, color = NULL, reduction="tsne") 24 | { 25 | ## recolor_s will call function gg_colr_hue. 26 | message(paste0("recolor ", output)) 27 | 28 | if (is.null(names(mapRes_sub))) stop("There is no name of mapRes_sub.") 29 | l <- lapply(strsplit(mapRes_sub, ';'), sub, pattern = '.*_', replacement = '') 30 | new_match <- setNames(unlist(l, use.names = F), rep(names(l), lengths(l))) 31 | if(obj@version > 3){ 32 | print("Using Seurat v3") 33 | new_group <- Idents(object = obj) 34 | levels(new_group) <- names(new_match)[match(levels(Idents(object=obj)), new_match)] 35 | } 36 | else{ 37 | new_group <- obj@ident 38 | levels(new_group) <- names(new_match)[match(levels(obj@ident), new_match)] 39 | } 40 | new_group <- factor(new_group, levels = names(mapRes_sub)) 41 | ## t-SNE plot 42 | obj@meta.data$regroup <- new_group 43 | 44 | if(obj@version < 3){ 45 | if (is.null(color)) color <- gg_color_hue(length(levels(new_group))) 46 | png(paste0(output, '.recolor.tsne.png')) 47 | TSNEPlot(obj, do.label = T, label.size = 8, group.by = 'regroup', 48 | colors.use = color[sort(as.numeric(unique(new_group)))], plot.title = toupper(output)) 49 | dev.off() 50 | pdf(paste0(output, '.recolor.tsne.pdf')) 51 | TSNEPlot(obj, do.label = T, label.size = 8, group.by = 'regroup', 52 | colors.use = color[sort(as.numeric(unique(new_group)))], plot.title = toupper(output)) 53 | dev.off() 54 | return(new_group) 55 | } 56 | 57 | else{ 58 | print("Using Seurat v3") 59 | if (is.null(color)) color <- gg_color_hue(length(levels(new_group))) 60 | 61 | p3 <- DimPlot(obj, label = T, label.size = 8, group.by = 'regroup', 62 | reduction = reduction, 63 | cols = color[sort(as.numeric(unique(new_group)))]) 64 | ggtitle(paste(toupper(output))) 65 | ggsave(plot = p3, filename = paste0(output, '.recolor.', reduction, '.png')) 66 | ggsave(plot = p3, filename = paste0(output, '.recolor.', reduction, '.pdf')) 67 | 68 | return(new_group) 69 | } 70 | } 71 | #' recolor_comb 72 | #' 73 | #' Recolor the combined sample based on the matching results from recolor_s. 74 | #' 75 | #' @import Seurat 76 | #' 77 | #' @param comb_obj 78 | #' A Seurat object for the combined sample. Cells in different samples are labelled by the sample names with the comb_delim. 79 | #' @param new_group_list 80 | #' A list of vectors of new group assignment outputted from recolor_s. 81 | #' @param output 82 | #' The output directory to save the plot. 83 | #' @param comb_delim 84 | #' The delimiter used in the cell names in the combined object to connect sample name and cell name in individual sample. DEFAULT is '-'. 85 | #' @param color 86 | #' A vector of colors used to recolor the new groups. DEFAULT is NULL. Pre defined internal color will be used. 87 | #' @param reduction 88 | #' Select the reduction of "tsne", "umap", or "pca" that used for the recolor image. 89 | #' @return A vector of new group labels with the cell name as the vector name. 90 | #' @export 91 | 92 | 93 | recolor_comb <- function(comb_obj, new_group_list, output, comb_delim = '-', color = NULL, reduction="tsne") 94 | { 95 | ## Change comb_delim if v3 Seurat 96 | if(comb_obj@version > 3){ 97 | comb_delim = '_' 98 | print("Changed comb_delim to '_'") 99 | } 100 | 101 | ## recolor_comb will call function gg_color_hue. 102 | message(paste0("recolor ", output)) 103 | 104 | sample_label <- as.factor(sub(paste0(comb_delim, '.*'), '', colnames(GetAssayData(object = comb_obj)))) 105 | message("levels(sample_label):") 106 | print(levels(sample_label)) 107 | message("names(new_group_list):") 108 | print(names(new_group_list)) 109 | if (all(levels(sample_label) == names(new_group_list)) == FALSE) 110 | stop("Sample label in comb_obj doesn't match names(new_group_list) or names(single_obj_list).") 111 | names(sample_label) <- colnames(GetAssayData(object = comb_obj)) 112 | ## color by samples 113 | comb_obj$samples <- sample_label 114 | 115 | if (comb_obj@version <3) { 116 | print("Seurat v2") 117 | png(paste0(output, '.color.by.sample.tsne.png')) 118 | TSNEPlot(comb_obj, do.label = F, label.size = 8, group.by = 'samples', plot.title = 'Colored by sample') 119 | dev.off() 120 | pdf(paste0(output, '.color.by.sample.tsne.pdf')) 121 | TSNEPlot(comb_obj, do.label = F, label.size = 8, group.by = 'samples', plot.title = 'Colored by sample') 122 | dev.off() 123 | ## assign new group 124 | new_group <- unlist(new_group_list) 125 | names(new_group) <- sub('\\.', comb_delim, names(new_group)) 126 | new_group <- factor(new_group, levels = levels(new_group_list[[1]])) 127 | new_group <- new_group[match(colnames(GetAssayData(object = comb_obj)), as.vector(names(new_group)))] ## some cells may be filtered out in combined sample. 128 | if (is.na(new_group[1])) 129 | stop("Cell names in comb_obj don't match cell names in new_group_list or single_obj_list. Cell names in comb_obj should be sample name and cell name in individual sample connected by comb_delim.") 130 | names(new_group) <- colnames(GetAssayData(object = comb_obj)) 131 | ## color by new group 132 | comb_obj <- AddMetaData(object = comb_obj, metadata = new_group, col.name = "regroup") 133 | if (is.null(color)) color <- gg_color_hue(length(levels(new_group))) 134 | png(paste0(output, '.recolor.tsne.png')) 135 | TSNEPlot(comb_obj, do.label = T, label.size = 8, group.by = 'regroup', 136 | colors.use = color[sort(as.numeric(unique(new_group)))], plot.title = 'Combined') 137 | dev.off() 138 | pdf(paste0(output, '.recolor.tsne.pdf')) 139 | TSNEPlot(comb_obj, do.label = T, label.size = 8, group.by = 'regroup', 140 | colors.use = color[sort(as.numeric(unique(new_group)))], plot.title = 'Combined') 141 | dev.off() 142 | return(new_group) 143 | } 144 | else if(comb_obj@version > 3){ 145 | print("Seurat v3 comb_obj") 146 | p5 <- DimPlot(comb_obj, label = F, label.size = 8, group.by = 'samples', 147 | reduction = reduction) + 148 | ggtitle('Colored by sample') 149 | ggsave(plot = p5, filename = paste0(output, '.color.by.sample.', reduction, '.png')) 150 | p4 <- DimPlot(comb_obj, label = F, label.size = 8, group.by = 'samples', 151 | reduction = reduction) + 152 | ggtitle('Colored by sample') 153 | ggsave(plot = p4, filename = paste0(output, '.color.by.sample.', reduction, '.pdf')) 154 | ## assign new group 155 | new_group <- unlist(new_group_list) 156 | names(new_group) <- sub('\\.', comb_delim, names(new_group)) 157 | new_group <- factor(new_group, levels = levels(new_group_list[[1]])) 158 | new_group <- new_group[match(colnames(GetAssayData(object = comb_obj)), 159 | as.vector(names(new_group)))] ## some cells may be filtered out in combined sample. 160 | if (is.na(new_group[1])) 161 | stop("Cell names in comb_obj don't match cell names in new_group_list or single_obj_list. Cell names in comb_obj should be sample name and cell name in individual sample connected by comb_delim.") 162 | names(new_group) <- colnames(GetAssayData(object = comb_obj)) 163 | ## color by new group 164 | comb_obj <- AddMetaData(object = comb_obj, metadata = new_group, col.name = "regroup") 165 | if (is.null(color)) color <- gg_color_hue(length(levels(new_group))) 166 | plot1 <- DimPlot(comb_obj, label = T, label.size = 8, 167 | reduction = reduction, group.by = 'regroup', 168 | cols = color[sort(as.numeric(unique(new_group)))]) + 169 | ggtitle('Combined') 170 | ggsave(plot = plot1, filename = paste0(output, '.recolor.', reduction, '.png')) 171 | plot2 <- DimPlot(comb_obj, label = T, label.size = 8, 172 | reduction = reduction, group.by = 'regroup', 173 | cols = color[sort(as.numeric(unique(new_group)))]) + 174 | ggtitle('Combined') 175 | ggsave(plot = plot2, filename = paste0(output, '.recolor.', reduction, '.pdf')) 176 | return(new_group) 177 | } 178 | 179 | } 180 | 181 | -------------------------------------------------------------------------------- /R/separability.R: -------------------------------------------------------------------------------- 1 | ##################### Separability #################### 2 | 3 | #' separability_pairwise 4 | #' 5 | #' Calculate separability for every sample pair. The higher the more separable. 6 | #' 7 | #' @param tsne_coord 8 | #' A dataframe of the two dimension t-SNE coordinates of each cell in the combined sample. 9 | #' @param group 10 | #' A vector of group assignment for each cell, with the same order as the tsne_coord. 11 | #' @param sample_label 12 | #' A vector of sample labels for each cell, with the same order as the tsne_coord. 13 | #' @param k 14 | #' K-nearest neighbours used to calculate distance. DEFAULT is 5. 15 | #' @return A matrix of separability for each sample pair (column) and each group (row). 16 | #' @export 17 | 18 | 19 | separability_pairwise <- function(tsne_coord, group, sample_label, k = 5) 20 | {## separability_pairwise will call function separability_by_group. 21 | message("calculate separability for each sample pair") 22 | sample_pair <- combn(levels(sample_label), 2) 23 | colnames(sample_pair) <- paste0(sample_pair[1, ], '.vs.', sample_pair[2, ]) 24 | res <- apply(sample_pair, 2, function(x){ 25 | print(x) 26 | ind <- sample_label %in% x 27 | tsne_coord_sub <- tsne_coord[ind, ] 28 | group_sub <- group[ind] 29 | sample_label_sub <- sample_label[ind] 30 | sepa <- separability_by_group(tsne_coord_sub, group_sub, sample_label_sub, k = k) 31 | return(sepa) 32 | }) 33 | return(res) 34 | } 35 | 36 | #' separability_by_group 37 | #' 38 | #' Calculate separability for each group in a pair of samples. Internal function called by separability_pairwise. 39 | #' 40 | #' @param tsne_coord 41 | #' A dataframe of the two dimension t-SNE coordinates of each cell in the combined sample. 42 | #' @param group 43 | #' A vector of group assignment for each cell, with the same order as the tsne_coord. 44 | #' @param sample_label 45 | #' A vector of sample labels for each cell, with the same order as the tsne_coord. 46 | #' @param k 47 | #' K-nearest neighbours used to calculate distance. DEFAULT is 5. 48 | #' @return A vector of separability for each group. 49 | #' @export 50 | 51 | separability_by_group <- function(tsne_coord, group, sample_label, k = 5) 52 | { ## separability_by_group will call function separability. 53 | p <- sapply(levels(group), function(i){ 54 | m <- tsne_coord[group == i, ] 55 | if (nrow(m) == 0) avg_diff <- NA else 56 | { 57 | class_label <- sample_label[group == i] 58 | avg_diff <- separability(m, class_label, k = k) 59 | } 60 | }) 61 | p <- p/(max(tsne_coord[, 1]) - min(tsne_coord[, 1]))*100 62 | round(p, 2) 63 | } 64 | 65 | #' separability 66 | #' 67 | #' Calculate separability for labeled data. Internal function called by separability_by_group. 68 | #' 69 | #' @param coord 70 | #' A dataframe of the two dimension t-SNE coordinates of each cell in the combined sample. 71 | #' @param class_label 72 | #' A vector of sample labels for each cell, with the same order as the tsne_coord. 73 | #' @param k 74 | #' K-nearest neighbours used to calculate distance. DEFAULT is 5. 75 | #' @return A single value of separability. 76 | #' @export 77 | 78 | separability <- function(coord, class_label, k = 5) 79 | { ## separability will call function inter_dist and inna_dist. 80 | if(length(unique(class_label)) > 1) 81 | { 82 | d <- dist(coord) 83 | dls <- split(as.data.frame(as.matrix(d)), class_label) 84 | sample1_dls <- split(as.data.frame(t(dls[[1]])), class_label) 85 | d1 <- sample1_dls[[1]] ## distance matrix between cells in sample1 86 | d21 <- sample1_dls[[2]] ## distance matrix from cells in sample2 to cells in sample1 87 | sample2_dls <- split(as.data.frame(t(dls[[2]])), class_label) 88 | d2 <- sample2_dls[[2]] ## distance matrix between cells in sample2 89 | d12 <- sample2_dls[[1]] ## distance matrix from cells in sample1 to cells in sample2 90 | diff1 <- inter_dist(d12, k)-inna_dist(d1, k) 91 | diff2 <- inter_dist(d21, k)-inna_dist(d2, k) 92 | avg_diff <- mean(c(diff1, diff2)) ## average two samples 93 | } else avg_diff <- Inf 94 | return(avg_diff) 95 | } 96 | 97 | #' inter_dist 98 | #' 99 | #' Calculate the inter-sample distance. Internal function called by separability. 100 | #' 101 | #' @param x 102 | #' A distance matrix of cells in one sample to the cells in another sample. 103 | #' @param k 104 | #' K-nearest neighbours used to calculate distance. DEFAULT is 5. 105 | #' @return A single value of distance. 106 | 107 | inter_dist <- function(x, k) 108 | { 109 | ## knn_mean <- function(x, k) {mean(sort(x)[1:k])} 110 | ## mean(apply(x, 1, knn_mean, k)) 111 | knn_median <- function(x, k){ median(sort(x)[1:k]) } ## take median of K distance of a cell 112 | median(apply(x, 1, knn_median, k)) ## take median of distance of all cells 113 | } 114 | 115 | #' inna_dist 116 | #' 117 | #' Calculate the inna-sample distance. Internal function called by separability. 118 | #' 119 | #' @param x 120 | #' A distance matrix of cells in one sample to the cells in the same sample. 121 | #' @param k 122 | #' K-nearest neighbours used to calculate distance. 123 | #' @return A single value of distance. 124 | 125 | inna_dist <- function(x, k) 126 | { 127 | ## knn_mean <- function(x, k) {mean(sort(x)[2:k+1])} 128 | ## mean(apply(x, 1, knn_mean, k)) 129 | knn_median <- function(x, k){ median(sort(x)[2:k+1]) } ## take median of K distance of a cell 130 | median(apply(x, 1, knn_median, k)) ## take median of distance of all cells 131 | } 132 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ClusterMap 2 | 3 | ClusterMap is an R package designed to analyze and compare two or more single cell expression datasets. 4 | 5 | Please cite: 6 | Gao X, Hu D, Gogol M, Li H. ClusterMap: Compare multiple Single Cell RNA-Seq datasets across different experimental conditions. Bioinformatics. 2019. doi: 10.1093/bioinformatics/btz024. 7 | 8 | ## Installation 9 | R(>=3.4.3), Seurat(>= 2.2.1),pheatmap(>= 1.0.10),ape(>= 5.1),circlize(>= 0.4.3) 10 | ```r 11 | install_github('devtools') 12 | library('devtools') 13 | install_github("xgaoo/ClusterMap") 14 | library('ClusterMap') 15 | ``` 16 | 17 | ## Tutorial 18 | 19 | https://xgaoo.github.io/ClusterMap/ClusterMap.html 20 | -------------------------------------------------------------------------------- /man/add_perc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/circos_map.R 3 | \name{add_perc} 4 | \alias{add_perc} 5 | \title{circos_map} 6 | \usage{ 7 | add_perc(mapRes, cell_num_list) 8 | } 9 | \arguments{ 10 | \item{mapRes}{A dataframe of the output of function cluster_map_by_marker.} 11 | 12 | \item{cell_num_list}{A list of vector of cell numbers for each group and each sample.} 13 | } 14 | \value{ 15 | A dataframe of the matching results with cell percentage column added. 16 | } 17 | \description{ 18 | Plot Circos plot for the matching results. 19 | } 20 | -------------------------------------------------------------------------------- /man/circos_map.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/circos_map.R 3 | \name{circos_map} 4 | \alias{circos_map} 5 | \title{circos_map} 6 | \usage{ 7 | circos_map(mapRes, cell_num_list, output, color_cord = NULL, 8 | color_sample = NULL) 9 | } 10 | \arguments{ 11 | \item{mapRes}{A dataframe of the output of function cluster_map_by_marker.} 12 | 13 | \item{cell_num_list}{A list of vector of cell numbers for each group and each sample.} 14 | 15 | \item{output}{The output directory to save the plot.} 16 | 17 | \item{color_cord}{A vector of colors for the cord of circos plot. DEFAULT is NULL. Pre defined internal color will be used.} 18 | 19 | \item{color_sample}{A vector of colors for the sample sectors in the circos plot. DEFAULT is NULL. Pre defined internal color will be used.} 20 | } 21 | \value{ 22 | circos plot will be save. 23 | } 24 | \description{ 25 | Plot Circos plot for the matching results. 26 | } 27 | -------------------------------------------------------------------------------- /man/cluster_map.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/master.R 3 | \name{cluster_map} 4 | \alias{cluster_map} 5 | \title{cluster_map} 6 | \usage{ 7 | cluster_map(marker_file_list, edge_cutoff = 0.1, output, 8 | cell_num_list = NULL, single_obj_list = NULL, comb_obj = NULL, 9 | comb_delim = "-", k = 5) 10 | } 11 | \arguments{ 12 | \item{marker_file_list}{A list of csv files with names. Each file is a marker gene table for a sample. The columns named as 'cluster' and 'gene' are required.} 13 | 14 | \item{edge_cutoff}{The edge length cutoff to decide the sub-nodes to merge or not. DEFAULT is 0.1.} 15 | 16 | \item{output}{The output directory to save the matching results.} 17 | 18 | \item{cell_num_list}{A list of vector of cell numbers for each group and each sample.} 19 | 20 | \item{single_obj_list}{A list of Seurat object for each sample, with the same list names as the list names of marker_file_list.} 21 | 22 | \item{comb_obj}{A Seurat object for the combined sample. Cells in different samples are labelled by the sample names with the comb_delim. The sample names should be the same as the list names of marker_file_list.} 23 | 24 | \item{comb_delim}{The delimiter used in the cell names in the combined object to connect sample name and cell name in individual sample. DEFAULT is '-'.} 25 | 26 | \item{k}{K-nearest neighbours used to calculate distance. DEFAULT is 5.} 27 | } 28 | \value{ 29 | A dataframe of the matching results. Heatmap of marker genes, the corresponding dendrogram, circos plot and recolored t-SNE plots will be saved into files. 30 | } 31 | \description{ 32 | A master function to perform the full workflow of ClusterMap. 33 | } 34 | -------------------------------------------------------------------------------- /man/cluster_map_by_marker.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cluster_map.R 3 | \name{cluster_map_by_marker} 4 | \alias{cluster_map_by_marker} 5 | \title{cluster_map_by_marker} 6 | \usage{ 7 | cluster_map_by_marker(marker_file_list, cutoff = 0.1, output) 8 | } 9 | \arguments{ 10 | \item{marker_file_list}{A list of csv files. Each file is a marker gene table for a sample. The columns named as 'cluster' and 'gene' are required.} 11 | 12 | \item{cutoff}{The edge length cutoff to decide the sub-nodes to merge or not. DEFAULT is 0.1.} 13 | 14 | \item{output}{The output directory to save the matching results.} 15 | } 16 | \value{ 17 | A dataframe of the matching results. Heatmap of marker genes and the dendrogram will be saved into files. 18 | } 19 | \description{ 20 | Match groups by marker genes and decompose into new groups by purity cut. 21 | } 22 | -------------------------------------------------------------------------------- /man/inna_dist.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/separability.R 3 | \name{inna_dist} 4 | \alias{inna_dist} 5 | \title{inna_dist} 6 | \usage{ 7 | inna_dist(x, k) 8 | } 9 | \arguments{ 10 | \item{x}{A distance matrix of cells in one sample to the cells in the same sample.} 11 | 12 | \item{k}{K-nearest neighbours used to calculate distance.} 13 | } 14 | \value{ 15 | A single value of distance. 16 | } 17 | \description{ 18 | Calculate the inna-sample distance. Internal function called by separability. 19 | } 20 | -------------------------------------------------------------------------------- /man/inter_dist.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/separability.R 3 | \name{inter_dist} 4 | \alias{inter_dist} 5 | \title{inter_dist} 6 | \usage{ 7 | inter_dist(x, k) 8 | } 9 | \arguments{ 10 | \item{x}{A distance matrix of cells in one sample to the cells in another sample.} 11 | 12 | \item{k}{K-nearest neighbours used to calculate distance. DEFAULT is 5.} 13 | } 14 | \value{ 15 | A single value of distance. 16 | } 17 | \description{ 18 | Calculate the inter-sample distance. Internal function called by separability. 19 | } 20 | -------------------------------------------------------------------------------- /man/make_comb_obj.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pre_analysis.R 3 | \name{make_comb_obj} 4 | \alias{make_comb_obj} 5 | \title{make_comb_obj} 6 | \usage{ 7 | make_comb_obj(data_dirList, is.10X = TRUE, comb_delim = "-") 8 | } 9 | \arguments{ 10 | \item{data_dirList}{A list of directory with 10X genomics single cell data or full path of expression table.} 11 | 12 | \item{is.10X}{The input data is 10X genomics format or not. DEFAULT is TRUE.} 13 | 14 | \item{comb_delim}{The delimiter used in the cell names in the combined object to connect sample name and cell name in individual sample. DEFAULT is '-'.} 15 | } 16 | \value{ 17 | A Seurat object. 18 | } 19 | \description{ 20 | A warper of Seurat function to generate Seurat object for combined sample from single sample. 21 | } 22 | -------------------------------------------------------------------------------- /man/make_single_obj.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pre_analysis.R 3 | \name{make_single_obj} 4 | \alias{make_single_obj} 5 | \title{make_single_obj} 6 | \usage{ 7 | make_single_obj(data_dir, is.10X = TRUE, output) 8 | } 9 | \arguments{ 10 | \item{data_dir}{Directory with 10X genomics single cell data or full path of expression table.} 11 | 12 | \item{is.10X}{The input data is 10X genomics format or not. DEFAULT is TRUE.} 13 | 14 | \item{output}{The output directory to save the marker genes.} 15 | } 16 | \value{ 17 | A Seurat object. 18 | } 19 | \description{ 20 | A warper of Seurat function to generate Seurat object and marker genes for single sample. 21 | } 22 | -------------------------------------------------------------------------------- /man/purity_cut.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cluster_map.R 3 | \name{purity_cut} 4 | \alias{purity_cut} 5 | \title{purity_cut} 6 | \usage{ 7 | purity_cut(hcluster, cutoff = 0.1) 8 | } 9 | \arguments{ 10 | \item{hcluster}{A hclust object.} 11 | 12 | \item{cutoff}{The edge length cutoff to decide the sub-nodes to merge or not. DEFAULT is 0.1.} 13 | } 14 | \value{ 15 | A dataframe of the matching results. 16 | } 17 | \description{ 18 | Cut hierarchical clustering dendrogram by edge length and purity of the nodes. 19 | } 20 | -------------------------------------------------------------------------------- /man/recolor_comb.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/recolor.R 3 | \name{recolor_comb} 4 | \alias{recolor_comb} 5 | \title{recolor_comb} 6 | \usage{ 7 | recolor_comb(comb_obj, new_group_list, output, comb_delim = "-", 8 | color = NULL) 9 | } 10 | \arguments{ 11 | \item{comb_obj}{A Seurat object for the combined sample. Cells in different samples are labelled by the sample names with the comb_delim.} 12 | 13 | \item{new_group_list}{A list of vectors of new group assignment outputted from recolor_s.} 14 | 15 | \item{output}{The output directory to save the plot.} 16 | 17 | \item{comb_delim}{The delimiter used in the cell names in the combined object to connect sample name and cell name in individual sample. DEFAULT is '-'.} 18 | 19 | \item{color}{A vector of colors used to recolor the new groups. DEFAULT is NULL. Pre defined internal color will be used.} 20 | } 21 | \value{ 22 | A vector of new group labels with the cell name as the vector name. 23 | } 24 | \description{ 25 | Recolor the combined sample based on the matching results from recolor_s. 26 | } 27 | -------------------------------------------------------------------------------- /man/recolor_s.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/recolor.R 3 | \name{recolor_s} 4 | \alias{recolor_s} 5 | \title{recolor_s} 6 | \usage{ 7 | recolor_s(mapRes_sub, obj, output, color = NULL) 8 | } 9 | \arguments{ 10 | \item{mapRes_sub}{A vector of the column named by the sample in the output of cluster_map_by_marker, with the regroup column as the vector name.} 11 | 12 | \item{obj}{A Seurat object for the sample.} 13 | 14 | \item{output}{The output directory to save the plot.} 15 | 16 | \item{color}{A vector of colors used to recolor the new groups. DEFAULT is NULL. Pre defined internal color will be used.} 17 | } 18 | \value{ 19 | A vector of new group labels with the cell name as the vector name. 20 | } 21 | \description{ 22 | Recolor a single sample based on the matching results from cluster_map_by_marker. 23 | } 24 | -------------------------------------------------------------------------------- /man/separability.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/separability.R 3 | \name{separability} 4 | \alias{separability} 5 | \title{separability} 6 | \usage{ 7 | separability(coord, class_label, k = 5) 8 | } 9 | \arguments{ 10 | \item{coord}{A dataframe of the two dimension t-SNE coordinates of each cell in the combined sample.} 11 | 12 | \item{class_label}{A vector of sample labels for each cell, with the same order as the tsne_coord.} 13 | 14 | \item{k}{K-nearest neighbours used to calculate distance. DEFAULT is 5.} 15 | } 16 | \value{ 17 | A single value of separability. 18 | } 19 | \description{ 20 | Calculate separability for labeled data. Internal function called by separability_by_group. 21 | } 22 | -------------------------------------------------------------------------------- /man/separability_by_group.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/separability.R 3 | \name{separability_by_group} 4 | \alias{separability_by_group} 5 | \title{separability_by_group} 6 | \usage{ 7 | separability_by_group(tsne_coord, group, sample_label, k = 5) 8 | } 9 | \arguments{ 10 | \item{tsne_coord}{A dataframe of the two dimension t-SNE coordinates of each cell in the combined sample.} 11 | 12 | \item{group}{A vector of group assignment for each cell, with the same order as the tsne_coord.} 13 | 14 | \item{sample_label}{A vector of sample labels for each cell, with the same order as the tsne_coord.} 15 | 16 | \item{k}{K-nearest neighbours used to calculate distance. DEFAULT is 5.} 17 | } 18 | \value{ 19 | A vector of separability for each group. 20 | } 21 | \description{ 22 | Calculate separability for each group in a pair of samples. Internal function called by separability_pairwise. 23 | } 24 | -------------------------------------------------------------------------------- /man/separability_pairwise.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/separability.R 3 | \name{separability_pairwise} 4 | \alias{separability_pairwise} 5 | \title{separability_pairwise} 6 | \usage{ 7 | separability_pairwise(tsne_coord, group, sample_label, k = 5) 8 | } 9 | \arguments{ 10 | \item{tsne_coord}{A dataframe of the two dimension t-SNE coordinates of each cell in the combined sample.} 11 | 12 | \item{group}{A vector of group assignment for each cell, with the same order as the tsne_coord.} 13 | 14 | \item{sample_label}{A vector of sample labels for each cell, with the same order as the tsne_coord.} 15 | 16 | \item{k}{K-nearest neighbours used to calculate distance. DEFAULT is 5.} 17 | } 18 | \value{ 19 | A matrix of separability for each sample pair (column) and each group (row). 20 | } 21 | \description{ 22 | Calculate separability for every sample pair. The higher the more separable. 23 | } 24 | --------------------------------------------------------------------------------