├── DESCRIPTION
├── LICENSE
├── NAMESPACE
├── R
    ├── plot_cell_fraction.R
    ├── plot_circlize.R
    ├── plot_dot.R
    ├── plot_extra.R
    ├── plot_heatmap.R
    ├── plot_umap.R
    ├── plot_upset.R
    ├── plot_violin.R
    └── utils.R
├── README.html
├── README.md
├── data
    ├── cell_fraction.png
    ├── circlize_plot.png
    ├── dotplot_more_groups.png
    ├── dotplot_more_groups_split.png
    ├── dotplot_multiple.png
    ├── dotplot_single.png
    ├── dotplot_single_split.png
    ├── geneplot_umap.png
    ├── heatmap_group.png
    ├── upset_plot.png
    ├── vlnplot_multiple.png
    ├── vlnplot_multiple_genes.png
    ├── vlnplot_multiple_split.png
    ├── vlnplot_single.png
    └── vlnplot_single_split.png
├── man
    ├── Install.example.Rd
    ├── add_track.Rd
    ├── cell_order.Rd
    ├── change_strip_background.Rd
    ├── complex_dotplot_multiple.Rd
    ├── complex_dotplot_single.Rd
    ├── complex_featureplot.Rd
    ├── complex_heatmap_unique.Rd
    ├── complex_upset_plot.Rd
    ├── complex_vlnplot_multiple.Rd
    ├── complex_vlnplot_single.Rd
    ├── convert_geneid.Rd
    ├── creat_cellphonedb_file.Rd
    ├── create_pyscenic_file.Rd
    ├── data_processing.Rd
    ├── extract_gene_count.Rd
    ├── firstup.Rd
    ├── get_metadata.Rd
    ├── get_segment.Rd
    ├── mk_color_table.Rd
    ├── mk_marker_ct.Rd
    ├── order_gene_down.Rd
    ├── order_gene_up.Rd
    ├── plot_cell_fraction.Rd
    ├── plot_circlize.Rd
    ├── plot_qpcr.Rd
    ├── prepare_circlize_data.Rd
    ├── run_correlation.Rd
    └── transform_coordinates.Rd
├── plot1cell.Rproj
└── vignettes
    └── .gitignore


/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: plot1cell
 2 | Title: A package for single cell data visualization
 3 | Version: 0.0.0.9000
 4 | Authors@R: 
 5 |     person(given = "Haojia",
 6 |            family = "Wu",
 7 |            role = c("aut", "cre"),
 8 |            email = "haojiawu@wustl.edu",
 9 |            comment = c(ORCID = "0000-0002-7866-2544"))
10 | Description: This package allows users to visualize the single cell data on the R objects or output files generated by the popular tools such as Seurat, SCENIC, monocle, CellPhoneDB, CellChat etc.  
11 | Imports:
12 |     Seurat,
13 |     plotly,
14 |     circlize,
15 |     dplyr,
16 |     ggplot2,
17 |     ggh4x,
18 |     MASS,
19 |     scales,
20 |     progress,
21 |     RColorBrewer,
22 |     grid,
23 |     grDevices,
24 |     biomaRt,
25 |     reshape2,
26 |     ggbeeswarm,
27 |     purrr,
28 |     ComplexUpset,
29 |     matrixStats,
30 |     DoubletFinder,
31 |     methods,
32 |     data.table,
33 |     Matrix,
34 |     hdf5r,
35 |     loomR,
36 |     GenomeInfoDb,
37 |     EnsDb.Hsapiens.v86,
38 |     cowplot,
39 |     rlang,
40 |     GEOquery,
41 |     simplifyEnrichment,
42 |     wordcloud,
43 |     ComplexHeatmap
44 | Depends:
45 |     Seurat,
46 |     plotly,
47 |     circlize,
48 |     dplyr,
49 |     ggplot2,
50 |     ggh4x,
51 |     MASS,
52 |     scales,
53 |     progress,
54 |     RColorBrewer,
55 |     grid,
56 |     grDevices,
57 |     biomaRt,
58 |     reshape2,
59 |     ggbeeswarm,
60 |     purrr,
61 |     ComplexUpset,
62 |     matrixStats,
63 |     DoubletFinder,
64 |     methods,
65 |     data.table,
66 |     Matrix,
67 |     hdf5r,
68 |     loomR,
69 |     GenomeInfoDb,
70 |     EnsDb.Hsapiens.v86,
71 |     cowplot,
72 |     rlang,
73 |     GEOquery,
74 |     simplifyEnrichment,
75 |     wordcloud,
76 |     ComplexHeatmap
77 | License: MIT 
78 | Encoding: UTF-8
79 | LazyData: true
80 | Roxygen: list(markdown = TRUE)
81 | RoxygenNote: 7.1.2
82 | Suggests: 
83 |     rmarkdown,
84 |     knitr
85 | VignetteBuilder: knitr
86 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Haojia Wu@TheHumphreysLab
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(Install.example)
 4 | export(add_track)
 5 | export(cell_order)
 6 | export(change_strip_background)
 7 | export(complex_dotplot_multiple)
 8 | export(complex_dotplot_single)
 9 | export(complex_featureplot)
10 | export(complex_heatmap_unique)
11 | export(complex_upset_plot)
12 | export(complex_vlnplot_multiple)
13 | export(complex_vlnplot_single)
14 | export(convert_geneid)
15 | export(creat_cellphonedb_file)
16 | export(create_pyscenic_file)
17 | export(data_processing)
18 | export(extract_gene_count)
19 | export(firstup)
20 | export(get_metadata)
21 | export(get_segment)
22 | export(mk_color_table)
23 | export(mk_marker_ct)
24 | export(order_gene_down)
25 | export(order_gene_up)
26 | export(plot_cell_fraction)
27 | export(plot_circlize)
28 | export(plot_qpcr)
29 | export(prepare_circlize_data)
30 | export(run_correlation)
31 | export(transform_coordinates)
32 | 


--------------------------------------------------------------------------------
/R/plot_cell_fraction.R:
--------------------------------------------------------------------------------
 1 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | # Functions
 3 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | #' Plot cell fractions across groups
 5 | #'
 6 | #' This function is to show the cell fraction changes across groups.
 7 | #'
 8 | #' @param seu_obj A complete Seurat object
 9 | #' @param celltypes Cell types to be included in the plot. Default: all cell types.
10 | #' @param groupby The group to show on x axis. One of the column names in meta.data.
11 | #' @param show_replicate Whether or not to show the individual replicate on the graph. If TRUE, the replicate column name needs to specify in the argument rep_colname.
12 | #' @param rep_colname The column name for biological replicates in the meta data.
13 | #' @param strip.color Colors for the strip background
14 | #' @return A ggplot object
15 | #' @export
16 | 
17 | plot_cell_fraction<-function(
18 |   seu_obj, 
19 |   celltypes=NULL, 
20 |   groupby, 
21 |   show_replicate = FALSE, 
22 |   rep_colname = NULL,
23 |   strip.color = NULL
24 |   ){
25 |   meta_data<-seu_obj@meta.data
26 |   meta_data$celltype<-as.character(Idents(seu_obj))
27 |   groupby_level<-levels(seu_obj@meta.data[,groupby])
28 |   if (is.null(groupby_level)){
29 |     seu_obj@meta.data[,groupby] <-factor(seu_obj@meta.data[,groupby], levels = names(table(seu_obj@meta.data[,groupby])))
30 |     groupby_level<-levels(seu_obj@meta.data[,groupby])
31 |   } 
32 |   if(!show_replicate){
33 |     freq_df<-prop.table(table(meta_data[,"celltype"], meta_data[, groupby]), margin = 2)
34 |     freq_df<-data.frame(freq_df)
35 |     colnames(freq_df)[1:2]<-c("celltype","group")
36 |     freq_df$Freq<-freq_df$Freq*100
37 |   } else {
38 |     if(is.null(rep_colname)){
39 |       stop("Please specify the replicate colname in your meta data!")
40 |     } else {
41 |       meta_data$new_group<-paste(meta_data[,"celltype"], meta_data[,groupby], meta_data[, rep_colname], sep = "___")
42 |       freq_df<-data.frame(table(meta_data$new_group))
43 |       freq_df$Var1<-as.character(freq_df$Var1)
44 |       freq_df$celltype <- as.character(lapply(X=strsplit(freq_df$Var1, split = "___"),FUN = function(x){x[[1]]}))
45 |       freq_df$group <- as.character(lapply(X=strsplit(freq_df$Var1, split = "___"),FUN = function(x){x[[2]]}))
46 |       freq_df$replicate <- as.character(lapply(X=strsplit(freq_df$Var1, split = "___"),FUN = function(x){x[[3]]}))
47 |       total_cell<-aggregate(Freq ~ replicate, freq_df, sum)
48 |       colnames(total_cell)[2]<-"Total"
49 |       freq_df <- merge(freq_df, total_cell, by="replicate")
50 |       freq_df$Freq<-freq_df$Freq*100/freq_df$Total
51 |     }
52 |   }
53 |   freq_df$group <- factor(freq_df$group, levels = groupby_level)
54 |   if(is.null(celltypes)){
55 |     celltypes <- levels(seu_obj)
56 |   }
57 |   freq_df <- freq_df[freq_df$celltype %in% celltypes, ]
58 |   freq_df$celltype <- factor(freq_df$celltype, levels = celltypes)
59 |  p <- ggplot(freq_df, aes(group, Freq, fill=group))+
60 |     geom_bar(position = "dodge",  stat = "summary", fun='mean', width = 0.6)+
61 |     stat_summary(fun.data = mean_se, geom = "errorbar", width=0.2, size=1, color='midnightblue', alpha=0.8)+
62 |     ylab('Percentage of cells')+xlab('')+
63 |     scale_fill_manual(values = brewer.pal(8, 'Set2'))+
64 |     theme(panel.background = element_rect(fill = "white", colour = "grey50"),
65 |           strip.text = element_text(size = 12),axis.title = element_text(size = 14),
66 |           axis.text.x = element_text(angle = 45, vjust = 1, hjust=1, size = 12),
67 |           legend.position = "none", axis.text.y = element_text(size = 12),
68 |           legend.text = element_text(size = 12), legend.title = element_text(size = 12, face = 'bold'), 
69 |           plot.title = element_text(hjust =0.5))
70 |  
71 |  if(show_replicate){
72 |    p <- p +   geom_quasirandom(size=1,width = 0.2, color='midnightblue', alpha=0.8, groupOnX = F)+facet_wrap(~celltype, scales = 'free_y', ncol = length(celltypes))
73 |    g <- change_strip_background(p, type = 'top', strip.color = strip.color)
74 |    print(grid.draw(g))
75 |  } else {
76 |    p
77 |  }
78 | }
79 | 
80 | 


--------------------------------------------------------------------------------
/R/plot_circlize.R:
--------------------------------------------------------------------------------
  1 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  2 | # Functions
  3 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  4 | #' Convert coordinates
  5 | #'
  6 | #' This function converts the Cartesian coordinates to Polar coordinates. 
  7 | #' Input data can be the coordinates from tSNE or UMAP. It outputs a matrix with
  8 | #' polar coordinates.
  9 | #'
 10 | #' @param coord_data Cartesian coordinates from tSNE, UMAP, etc.
 11 | #' @param zoom Value from c(0,1) to adjust the coordinates.
 12 | #' @return A matrix with polar coordinates
 13 | #' @export
 14 | transform_coordinates <- function(
 15 |   coord_data, 
 16 |   zoom
 17 |   ){
 18 |   center_data<-coord_data-mean(c(min(coord_data),max(coord_data)))
 19 |   max_data<-max(center_data)
 20 |   new_data<-center_data*zoom/max_data
 21 |   new_data
 22 | }
 23 | 
 24 | #' Get metadata from a Seurat object
 25 | #'
 26 | #' This function extracts the metadata from a Seurat object and transforms the
 27 | #' UMAP/tSNE coordinates.
 28 | #'
 29 | #' @param seu_obj SeuratObject
 30 | #' @param reductions reductions methods, e.g."umap" or "tsne".
 31 | #' @param color Colors assigned to the cell clusters
 32 | #' @param coord_scale value from c(0,1) to adjust the UMAP/tSNE coordinates.
 33 | #' @return A metadata dataframe
 34 | #' @export
 35 | get_metadata <- function(
 36 |   seu_obj, 
 37 |   reductions = "umap", 
 38 |   coord_scale = 0.8, 
 39 |   color
 40 |   ){
 41 |   metadata<-seu_obj@meta.data
 42 |   metadata$Cluster<-seu_obj@active.ident
 43 |   metadata$dim1<-as.numeric(seu_obj[[reductions]]@cell.embeddings[,1])
 44 |   metadata$dim2<-as.numeric(seu_obj[[reductions]]@cell.embeddings[,2])
 45 |   metadata$x<-transform_coordinates(metadata$dim1, zoom = coord_scale)
 46 |   metadata$y<-transform_coordinates(metadata$dim2, zoom = coord_scale)
 47 |   color_df<-data.frame(Cluster=levels(seu_obj), Colors=color)
 48 |   cellnames<-rownames(metadata)
 49 |   metadata$cells<-rownames(metadata)
 50 |   metadata<-merge(metadata, color_df, by='Cluster')
 51 |   rownames(metadata)<-metadata$cells
 52 |   metadata<-metadata[cellnames,]
 53 |   metadata
 54 | }
 55 | 
 56 | #' Make count matrix for the selected markers
 57 | #'
 58 | #' This function labels the cells based on their expression levels of the selected 
 59 | #' marker genes.
 60 | #'
 61 | #' @param seu_obj SeuratObject
 62 | #' @param features Selected marker genes
 63 | #' @return A dataframe with cells labeled by marker genes
 64 | #' @export
 65 | mk_marker_ct <- function(
 66 |   seu_obj, 
 67 |   features
 68 |   ){
 69 |   dat <- Seurat::FetchData(seu_obj, vars = features)
 70 |   ori_names <- rownames(dat)
 71 |   zero_ct <- dat[rowSums(dat)==0,]
 72 |   non_zero <- dat[rowSums(dat)!=0,]
 73 |   max_genes <- colnames(non_zero)[max.col(non_zero,ties.method="first")]
 74 |   non_zero <- data.frame(cells=rownames(non_zero), genes=max_genes)
 75 |   zero_ct <- data.frame(cells=rownames(zero_ct), genes='No_expr')
 76 |   all_cells <- rbind(non_zero, zero_ct)
 77 |   rownames(all_cells) <- all_cells$cells
 78 |   all_cells <- all_cells[ori_names,]
 79 |   all_cells
 80 | }
 81 | 
 82 | #' Create a dataframe for color mapping
 83 | #'
 84 | #' This function assigns a color for each value in a vector
 85 | #'
 86 | #' @param group Group to be assigned color
 87 | #' @return A dataframe with colors assigned to groups
 88 | #' @export
 89 | mk_color_table <- function(group){
 90 |   n=length(group)
 91 |   colors=scales::hue_pal()(n)
 92 |   color_table <- data.frame(group, colors)
 93 |   color_table
 94 | }
 95 | 
 96 | #' Order the cells from each cluster
 97 | #'
 98 | #' This function orders the cells from each cluster by giving a value from
 99 | #' 1 to max
100 | #' @param dat Data input. 
101 | #' @return An vector with ordered cells
102 | #' @export
103 | cell_order <- function(dat){
104 |   celltypes <- names(table(dat$Cluster))
105 |   new_dat <- list()
106 |   for (i in 1:length(celltypes)){
107 |     dat$Cluster<-as.character(dat$Cluster)
108 |     dat1<-dat[dat$Cluster==celltypes[i],]
109 |     dat1$x_polar<-1:nrow(dat1)
110 |     new_dat[[i]]<-dat1
111 |   }
112 |   new_dat<-do.call('rbind', new_dat)
113 |   new_dat
114 | }
115 | 
116 | #' Create a segment for each element in a group
117 | #'
118 | #' This function creates a segment for each element within a group
119 | #' @param dat Data input. 
120 | #' @param group The group name
121 | #' @return An vector with ordered cells
122 | #' @export
123 | get_segment <- function(
124 |   dat, 
125 |   group
126 |   ){
127 |   dat<-dat[order(dat[,group],decreasing = F), ]
128 |   rownames(dat)<-1:nrow(dat)
129 |   dat<-dat[!duplicated(dat[,group]),]
130 |   dat_seg<-as.integer(rownames(dat))
131 |   dat_seg
132 | }
133 | 
134 | #' Prepare circlize data for plotting
135 | #'
136 | #' This function creates a segment for each element within a group
137 | #' @param seu_obj Seurat object 
138 | #' @param scale Scale factor to zoom in our zoom out the tSNE/UMAP proportionally
139 | #' @return A data frame for plotting
140 | #' @export
141 | prepare_circlize_data <- function(
142 |   seu_obj, 
143 |   scale =0.8
144 |   ){
145 |   celltypes<-levels(seu_obj)
146 |   cell_colors <- scales::hue_pal()(length(celltypes))
147 |   data_plot <- get_metadata(seu_obj, color = cell_colors, coord_scale = scale)
148 |   data_plot <- cell_order(data_plot)
149 |   data_plot$x_polar2 <- log10(data_plot$x_polar)
150 |   data_plot
151 | }
152 | 
153 | #' Generate a circlize plot outside the tSNE/UMAP
154 | #'
155 | #' This function generates a circlize plot outside the tSNE/UMAP
156 | #'
157 | #' @param data_plot Data frame prepared by the prepare_circlize_data function
158 | #' @param do.label Whether to label the clusters
159 | #' @param contour.levels Which contour line to be drawn on the plot. Value: 0-1
160 | #' @param bg.color Canvas background color
161 | #' @param label.cex Label font size
162 | #' @param pt.size Point size of the graph
163 | #' @param kde2d.n Number of grid points in each direction. A kde2d parameter
164 | #' @param contour.nlevels Total number of levels in contour
165 | #' @param col.use Colors used to label the cell type
166 | #' @param repel Whether or not to repel the cell type names on umap
167 | #' @return Return a circlize plot
168 | #' @export
169 | plot_circlize <- function(
170 |   data_plot,
171 |   do.label = T,
172 |   contour.levels = c(0.2,0.3),
173 |   pt.size = 0.5,
174 |   kde2d.n = 1000,
175 |   contour.nlevels = 100,
176 |   bg.color='#F9F2E4',
177 |   col.use=NULL,
178 |   label.cex = 0.5,
179 |   repel=FALSE
180 |   ) {
181 |   data_plot %>%
182 |     dplyr::group_by(Cluster) %>%
183 |     summarise(x = median(x = x), y = median(x = y)) -> centers
184 |   z <- MASS::kde2d(data_plot$x, data_plot$y, n=kde2d.n)
185 |   celltypes<-names(table(data_plot$Cluster))
186 |   cell_colors <- scales::hue_pal()(length(celltypes))
187 |   if(!is.null(col.use)){
188 |     cell_colors=col.use
189 |     col_df<-data.frame(Cluster=celltypes, color2=col.use)
190 |     cells_order<-rownames(data_plot)
191 |     data_plot<-merge(data_plot, col_df, by="Cluster")
192 |     rownames(data_plot)<-data_plot$cells
193 |     data_plot<-data_plot[cells_order,]
194 |     data_plot$Colors<-data_plot$color2
195 |   }
196 |   circos.clear()
197 |   par(bg = bg.color)
198 |   circos.par(cell.padding=c(0,0,0,0), track.margin=c(0.01,0),"track.height" = 0.01, gap.degree =c(rep(2, (length(celltypes)-1)),12),points.overflow.warning=FALSE)
199 |   circos.initialize(sectors =  data_plot$Cluster, x = data_plot$x_polar2)
200 |   circos.track(data_plot$Cluster, data_plot$x_polar2, y=data_plot$dim2, bg.border=NA,panel.fun = function(x, y) {
201 |     circos.text(CELL_META$xcenter,
202 |                 CELL_META$cell.ylim[2]+ mm_y(4),
203 |                 CELL_META$sector.index,
204 |                 cex=0.5, col = 'black', facing = "bending.inside", niceFacing = T)
205 |     circos.axis(labels.cex = 0.3, col = 'black', labels.col =  'black')
206 |   })
207 |   for(i in 1:length(celltypes)){
208 |     dd<-data_plot[data_plot$Cluster==celltypes[i],]
209 |     circos.segments(x0 = min(dd$x_polar2), y0 = 0, x1 = max(dd$x_polar2), y1 = 0, col = cell_colors[i],  lwd=3, sector.index = celltypes[i])
210 |   }
211 |   text(x = 1, y=0.1, labels = "Cluster", cex = 0.4, col = 'black',srt=-90)
212 |   points(data_plot$x,data_plot$y, pch = 19, col = alpha(data_plot$Colors,0.2), cex = pt.size);
213 |   contour(z, drawlabels=F, nlevels= 100, levels = contour.levels,col = '#ae9c76', add=TRUE)
214 |   if(do.label){
215 |     if(repel){
216 |       textplot(x=centers$x, y=centers$y, words =  centers$Cluster,cex = label.cex, new = F,show.lines=F)
217 |     } else {
218 |       text(centers$x,centers$y, labels=centers$Cluster, cex = label.cex, col = 'black')
219 |     }
220 |   } 
221 | }
222 | 
223 | #' Add tracks to the circlize plot
224 | #'
225 | #' This function allows users to add more tracks onto the circlize plot
226 | #' @param data_plot Data for circlize plot 
227 | #' @param group The group to be shown on the new track
228 | #' @param colors Color palette to color the group
229 | #' @param track_lwd The width of the track. Default:3
230 | #' @param track_num Which number this track is? Value is integer and starts with 2 for the 2nd track, track_num=3 for the 3rd track, etc...
231 | #' @return A new circlize track adding to the current circlize plot
232 | #' @export
233 | add_track <- function(
234 |   data_plot, 
235 |   group, 
236 |   track_num,
237 |   track_lwd = 3,
238 |   colors = NULL
239 |   ){
240 |   if(track_num<2){
241 |     stop("The first track is the cluster track. Please change the track_num to a value greater than 1")
242 |   }
243 |   circos.track(data_plot$Cluster, data_plot$x_polar2, y=data_plot$dim2, bg.border=NA)
244 |   celltypes<-names(table(data_plot$Cluster))
245 |   group_names<-names(table(data_plot[,group]))
246 |   if(is.null(colors)){
247 |     col_group = scales::hue_pal()(length(group_names))
248 |   } else {
249 |     col_group = colors
250 |   }
251 |   names(col_group) <- group_names
252 |   for(i in 1:length(celltypes)) {
253 |     data_plot_cl <- data_plot[data_plot$Cluster==celltypes[i],]
254 |     group_names_cl <- names(table(data_plot_cl[,group]))
255 |     col_group_cl <- as.character(col_group[group_names_cl])
256 |     dat_seg <- get_segment(data_plot_cl, group = group)
257 |     dat_seg2 <- c(dat_seg[-1]-1, nrow(data_plot_cl))
258 |     scale_factor<-max(data_plot_cl$x_polar2)/nrow(data_plot_cl)
259 |     dat_seg<-scale_factor*dat_seg
260 |     dat_seg2<-scale_factor*dat_seg2
261 |     circos.segments(x0 = dat_seg, y0 = 0, x1 = dat_seg2, y1 = 0, col = col_group_cl, sector.index = celltypes[i], lwd=track_lwd)
262 |   }
263 |   text(x = (1-0.03*(track_num-1)), y=0.1, labels = group, cex = 0.4, col = 'black',srt=-90)
264 | }
265 | 


--------------------------------------------------------------------------------
/R/plot_dot.R:
--------------------------------------------------------------------------------
  1 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  2 | # Functions
  3 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  4 | #' Plot single gene across groups
  5 | #'
  6 | #' This function can be used for plotting a single gene expression across 
  7 | #' different groups in a study with complex group design.
  8 | #'
  9 | #' @param seu_obj A complete Seurat object.
 10 | #' @param feature Gene name. Only one gene is allowed.
 11 | #' @param celltypes Cell types to be included in the dot plot. Default: all cell types.
 12 | #' @param groups The group to show on x axis. One of the column names in meta.data.
 13 | #' @param splitby The group to separate the gene expression. One of the column names in meta.data.
 14 | #' @param scale.by Methods to scale the dot size. "radius" or "size".
 15 | #' @param color.palette Color for gene expression.
 16 | #' @param strip.color Colors for the strip background.
 17 | #' @param font.size Font size for the labels.
 18 | #' @param do.scale Whether or not to scale the dot when percentage expression of the gene is less than 20.
 19 | #' @return A ggplot object
 20 | #' @export
 21 | 
 22 | complex_dotplot_single <- function(
 23 |   seu_obj, 
 24 |   feature, 
 25 |   celltypes=NULL,
 26 |   groups,
 27 |   splitby=NULL,
 28 |   color.palette = NULL,
 29 |   font.size = 12,
 30 |   strip.color=NULL,
 31 |   do.scale=T,
 32 |   scale.by='radius'
 33 | ){
 34 |   if(is.null(color.palette)){
 35 |     color.palette <- colorRampPalette(c('grey80','lemonchiffon1','indianred1','darkred'))(255)
 36 |   }
 37 |   scale.func <- switch(
 38 |     EXPR = scale.by,
 39 |     'size' = scale_size,
 40 |     'radius' = scale_radius,
 41 |     stop("'scale.by' must be either 'size' or 'radius'")
 42 |   ) ### This function is from Seurat https://github.com/satijalab/seurat
 43 |   if(is.null(celltypes)){
 44 |     celltypes<-levels(seu_obj)
 45 |   }
 46 |   if(length(groups)==1){
 47 |     groups_level<-levels(seu_obj@meta.data[,groups])
 48 |     if (is.null(groups_level)){
 49 |       seu_obj@meta.data[,groups] <-factor(seu_obj@meta.data[,groups], levels = names(table(seu_obj@meta.data[,groups])))
 50 |       groups_level<-levels(seu_obj@meta.data[,groups])
 51 |     } 
 52 |     
 53 |     if(!is.null(splitby)){
 54 |       if (is.null(levels(seu_obj@meta.data[,splitby]))){
 55 |         seu_obj@meta.data[,splitby] <-factor(seu_obj@meta.data[,splitby], levels = names(table(seu_obj@meta.data[,splitby])))
 56 |       }
 57 |       splitby_level<-levels(seu_obj@meta.data[,splitby])
 58 |       count_df<-extract_gene_count(seu_obj, features = feature, cell.types = celltypes, meta.groups = c(groups,splitby))
 59 |       count_df$new_group<-paste(count_df[,groups], count_df[,"celltype"], count_df[,splitby],sep = "___")
 60 |       exp_df<-aggregate(.~new_group, data=count_df[,c('new_group',feature)], FUN=function(x){mean(expm1(x))}) 
 61 |       pct_df<-aggregate(.~new_group, data=count_df[,c('new_group',feature)], FUN=function(x){length(x[x > 0]) / length(x)}) #This is the same data processing as Seurat
 62 |       colnames(exp_df)[2]<-"avg.exp"
 63 |       colnames(pct_df)[2]<-"pct.exp"
 64 |       data_plot<-merge(exp_df, pct_df, by='new_group')
 65 |       data_plot$groups <- as.character(lapply(X=strsplit(data_plot$new_group, split = "___"),FUN = function(x){x[[1]]}))
 66 |       data_plot$celltype <- as.character(lapply(X=strsplit(data_plot$new_group, split = "___"),FUN = function(x){x[[2]]}))
 67 |       data_plot$splitby <- as.character(lapply(X=strsplit(data_plot$new_group, split = "___"),FUN = function(x){x[[3]]}))
 68 |       data_plot$groups <- factor(data_plot$groups, levels = groups_level)
 69 |       data_plot$splitby <- factor(data_plot$splitby, levels = splitby_level)
 70 |       data_plot$celltype <- factor(data_plot$celltype, levels = rev(celltypes))
 71 |     } else {
 72 |       count_df<-extract_gene_count(seu_obj, features = feature, cell.types = celltypes, meta.groups = groups)
 73 |       count_df$new_group<-paste(count_df[,groups], count_df[,"celltype"],sep = "___")
 74 |       exp_df<-aggregate(.~new_group, data=count_df[,c('new_group',feature)], FUN=function(x){mean(expm1(x))})
 75 |       pct_df<-aggregate(.~new_group, data=count_df[,c('new_group',feature)], FUN=function(x){length(x[x > 0]) / length(x)})
 76 |       colnames(exp_df)[2]<-"avg.exp"
 77 |       colnames(pct_df)[2]<-"pct.exp"
 78 |       data_plot<-merge(exp_df, pct_df, by='new_group')
 79 |       data_plot$groups <- as.character(lapply(X=strsplit(data_plot$new_group, split = "___"),FUN = function(x){x[[1]]}))
 80 |       data_plot$celltype <- as.character(lapply(X=strsplit(data_plot$new_group, split = "___"),FUN = function(x){x[[2]]}))
 81 |       data_plot$groups <- factor(data_plot$groups, levels = groups_level)
 82 |       data_plot$celltype <- factor(data_plot$celltype, levels = rev(celltypes))
 83 |     }
 84 |     data_plot$pct.exp <- round(100 * data_plot$pct.exp, 2)
 85 |     data_plot$avg.exp <- scale(data_plot$avg.exp)
 86 |     p<-ggplot(data_plot, aes(y = celltype, x = groups)) +  
 87 |       geom_tile(fill="white", color="white") +
 88 |       geom_point(aes( colour=avg.exp, size =pct.exp))  +  
 89 |       scale_color_gradientn(colours  =  color.palette )+ 
 90 |       theme(panel.background = element_rect(fill = "white", colour = "black"),
 91 |             axis.text.x = element_text(angle = 45, hjust = 1, size = font.size),
 92 |             plot.title = element_text(size = (font.size +2), hjust = 0.5, face = 'bold'),
 93 |             axis.text = element_text(size = font.size),
 94 |             legend.text=element_text(size=(font.size-2)),
 95 |             legend.title = element_text(size = (font.size)),
 96 |             strip.text = element_text( size = font.size),
 97 |             legend.position="right")+
 98 |       ylab("")+xlab("")+ggtitle(feature)
 99 |     if(do.scale){
100 |       p = p + scale_size(range = c(0, 10))
101 |     } else {
102 |       if(max(data_plot$pct.exp)>=20){
103 |         p = p + scale_size(range = c(0, 10))
104 |       } else {
105 |         p = p + scale.func(range = c(0, 10), limits = c(0, 20))
106 |       }
107 |     }
108 |     if(!is.null(splitby)){
109 |       p <- p +facet_wrap(~splitby, scales = 'free_x')
110 |       g <- change_strip_background(p, type = 'top', strip.color = strip.color)
111 |       print(grid.draw(g))
112 |     } else {
113 |       p
114 |     }
115 |   } else {  ### group number greater than 1
116 |     gene_count<-extract_gene_count(seu_obj=seu_obj, features = feature, cell.types = celltypes, meta.groups = c(groups, splitby))
117 |     allgroups<-c(groups,splitby )
118 |     for(i in 1:length(allgroups)){
119 |       if (is.null(levels(seu_obj@meta.data[,allgroups[i]]))){
120 |         seu_obj@meta.data[,allgroups[i]] <-factor(seu_obj@meta.data[,allgroups[i]], levels = names(table(seu_obj@meta.data[,allgroups[i]])))
121 |       }
122 |       group_level<-levels(seu_obj@meta.data[,allgroups[i]])
123 |       gene_count[,allgroups[i]]<-factor(gene_count[,allgroups[i]],
124 |                                         levels = group_level)
125 |     }
126 |     gene_count$celltype<-factor(gene_count$celltype, levels = celltypes)
127 |     all_levels<-list()
128 |     for(i in 1:length(groups)){
129 |       if (is.null(levels(seu_obj@meta.data[,groups[i]]))){
130 |         seu_obj@meta.data[,groups[i]] <-factor(seu_obj@meta.data[,groups[i]], levels = names(table(seu_obj@meta.data[,groups[i]])))
131 |       }
132 |       group_level<-levels(seu_obj@meta.data[,groups[i]])
133 |       all_levels[[i]]<-group_level
134 |     }
135 |     all_levels<-as.character(unlist(all_levels))
136 |     data_plot<-list()
137 |     for(i in 1:length(groups)){
138 |       count_df <- gene_count
139 |       count_df$new_group<-paste(gene_count[,groups[i]], gene_count[,"celltype"],sep = "___")
140 |       exp_df<-aggregate(.~new_group, data=count_df[,c('new_group',feature)], FUN=function(x){mean(expm1(x))})
141 |       pct_df<-aggregate(.~new_group, data=count_df[,c('new_group',feature)], FUN=function(x){length(x[x > 0]) / length(x)})
142 |       colnames(exp_df)[2]<-"avg.exp"
143 |       colnames(pct_df)[2]<-"pct.exp"
144 |       df1<-merge(exp_df, pct_df, by='new_group')
145 |       df1$groupID <- groups[i]
146 |       data_plot[[i]] <- df1
147 |     }
148 |     data_plot <- do.call("rbind", data_plot)
149 |     data_plot$groups <- as.character(lapply(X=strsplit(data_plot$new_group, split = "___"),FUN = function(x){x[[1]]}))
150 |     data_plot$celltype <- as.character(lapply(X=strsplit(data_plot$new_group, split = "___"),FUN = function(x){x[[2]]}))
151 |     data_plot$groups <- factor(data_plot$groups, levels = all_levels)
152 |     data_plot$celltype <- factor(data_plot$celltype, levels = rev(celltypes))
153 |     data_plot$groupID <- factor(data_plot$groupID, levels = groups)
154 |     data_plot$pct.exp <- round(100 * data_plot$pct.exp, 2)
155 |     data_plot$avg.exp <- scale(data_plot$avg.exp)
156 |     if(is.null(splitby)){
157 |       p<-ggplot(data_plot, aes(y = celltype, x = groups)) +  
158 |         geom_tile(fill="white", color="white") +
159 |         geom_point(aes( colour=avg.exp, size =pct.exp))  +  
160 |         scale_color_gradientn(colours  =  color.palette )+ 
161 |         theme(panel.background = element_rect(fill = "white", colour = "black"),
162 |               axis.text.x = element_text(angle = 45, hjust = 1, size = font.size),
163 |               plot.title = element_text(size = (font.size +2), hjust = 0.5, face = 'bold'),
164 |               axis.text = element_text(size = font.size),
165 |               legend.text=element_text(size=(font.size-2)),
166 |               legend.title = element_text(size = (font.size)),
167 |               strip.text = element_text( size = font.size),
168 |               legend.position="right")+
169 |         ylab("")+xlab("")+ggtitle(feature)+facet_wrap(~groupID, scales = 'free_x')
170 |       if(do.scale){
171 |         p = p + scale_size(range = c(0, 10))
172 |       } else {
173 |         if(max(data_plot$pct.exp)>=20){
174 |           p = p + scale_size(range = c(0, 10))
175 |         } else {
176 |           p = p + scale.func(range = c(0, 10), limits = c(0, 20))
177 |         }
178 |       }
179 |       g <- change_strip_background(p, type = 'top',  strip.color = strip.color)
180 |       print(grid::grid.draw(g))
181 |     } else {
182 |       df2<-reshape2::melt(gene_count[,c(groups, splitby)], measure.vars  = groups)
183 |       df2<-df2[!duplicated(df2$value),]
184 |       colnames(df2)[colnames(df2) == "value"]<-"groups"
185 |       data_plot2<-list()
186 |       for(i in 1:length(groups)){
187 |         df3<-data_plot[data_plot$groupID==groups[i],]
188 |         df4<-df2[df2$variable==groups[i],c('groups', splitby[i])]
189 |         colnames(df4)[2]<-"split"
190 |         df5<-merge(df3, df4, by='groups')
191 |         data_plot2[[i]]<-df5
192 |       }
193 |       data_plot2<-do.call("rbind", data_plot2)
194 |       fill_x1<-grDevices::rainbow(length(groups), alpha = 0.5)
195 |       fill_x2<-list()
196 |       for(i in 1:length(splitby)){
197 |         n_col<-unique(gene_count[, splitby[i]])
198 |         fill_x2[[i]]<-scales::hue_pal(l=90)(length(n_col))
199 |       }
200 |       fill_x2<-as.character(unlist(fill_x2))
201 |       fill_x <- c(fill_x1, fill_x2)
202 |       p<-ggplot(data_plot2, aes(y = celltype, x = groups)) +  
203 |         geom_tile(fill="white", color="white") +
204 |         geom_point(aes( colour=avg.exp, size =pct.exp))  +  
205 |         scale_color_gradientn(colours  =  color.palette )+ 
206 |         theme(panel.background = element_rect(fill = "white", colour = "black"),
207 |               axis.text.x = element_text(angle = 45, hjust = 1, size = font.size),
208 |               plot.title = element_text(size = (font.size +2), hjust = 0.5, face = 'bold'),
209 |               axis.text = element_text(size = font.size),
210 |               legend.text=element_text(size=(font.size-2)),
211 |               legend.title = element_text(size = (font.size)),
212 |               strip.text = element_text( size = font.size),
213 |               legend.position="right")+
214 |         ylab("")+xlab("")+ggtitle(feature)+
215 |         facet_nested(~ groupID + split, scales = "free_x", 
216 |                      strip = strip_nested( background_x = elem_list_rect(fill = fill_x)))
217 |       if(do.scale){
218 |         p = p + scale_size(range = c(0, 10))
219 |       } else {
220 |         if(max(data_plot$pct.exp)>=20){
221 |           p = p + scale_size(range = c(0, 10))
222 |         } else {
223 |           p = p + scale.func(range = c(0, 10), limits = c(0, 20))
224 |         }
225 |       }
226 |       p
227 |     }
228 |   }
229 | }
230 | 
231 | #' Plot multiple genes across groups
232 | #'
233 | #' This function allows for visualization of multiple genes in multiple groups. 
234 | #' It takes the single gene expression data generated by PlotSingleGeneGroup,
235 | #' concatenate all data, and produces a dotplot graph where the group ID are in
236 | #' x axis, wrapped by cell types, genes are on the y axis.
237 | #'
238 | #' @param seu_obj A complete Seurat object
239 | #' @param features A vector of gene names.
240 | #' @param celltypes Cell types to be included in the dot plot. Default: all cell types.
241 | #' @param groups Group ID must be one of the column names in the meta.data slot of the Seurat object.
242 | #' @param color.palette Color for gene expression.
243 | #' @param strip.color Colors for the strip background
244 | #' @return A ggplot object
245 | #' @export
246 | 
247 | complex_dotplot_multiple <- function(
248 |   seu_obj, 
249 |   features, 
250 |   celltypes=NULL,
251 |   groups, 
252 |   color.palette = NULL,
253 |   strip.color = NULL
254 |   ){
255 |  pb <- progress_bar$new(
256 |    format = "  Ploting [:bar] :percent eta: :eta",
257 |    clear = FALSE, total = length(features), width = 100)
258 |  plot_list<-list()
259 |  for(i in 1:length(features)){
260 |   pp<-invisible(
261 |     complex_dotplot_single(seu_obj = seu_obj, feature = features[i], groups = groups, celltypes = celltypes)
262 |   )
263 |   pp<-pp$data
264 |   pp$gene <- features[i]
265 |   plot_list[[i]]<-pp
266 |   pb$tick()
267 |   Sys.sleep(1 / length(features))
268 |   }
269 |   all_data<-do.call('rbind', plot_list)
270 |   all_data$gene<-factor(all_data$gene, levels = rev(features)) 
271 |   all_data$celltype <- factor(all_data$celltype, levels = levels(seu_obj))
272 |   if(is.null(color.palette)){
273 |     color.palette <- colorRampPalette(c('grey80','lemonchiffon1','indianred1','darkred'))(255)
274 |   }
275 |   p <- invisible(
276 |     ggplot(all_data, aes(x = groups, y = gene)) +  
277 |     geom_tile(fill="white", color="white") +
278 |     geom_point(aes( colour=avg.exp, size =pct.exp), alpha=0.9)  +  
279 |     scale_color_gradientn(colours  =  color.palette)+ 
280 |     scale_size(range = c(0, 10))+
281 |     theme(panel.background = element_rect(fill = "white", colour = "black"),
282 |           axis.text.x = element_text(angle = 45, hjust = 1),
283 |           plot.title = element_text(size = 16,hjust = 0.5, face = 'bold'),
284 |           axis.text = element_text(size = 12),
285 |           axis.title=element_text(size=8),
286 |           legend.text=element_text(size=8),
287 |           legend.title = element_text(size = 12),
288 |           legend.position="right",
289 |           strip.text = element_text(size = 14,colour = 'black',face = 'bold'))+
290 |     ylab("")+xlab("")+ggtitle('')+
291 |     facet_wrap(~celltype, ncol = length(levels(seu_obj)))
292 |   )
293 |   g <- change_strip_background(p, type = 'top',  strip.color = strip.color)
294 |   print(grid.draw(g))
295 | }
296 | 
297 | 
298 | 


--------------------------------------------------------------------------------
/R/plot_extra.R:
--------------------------------------------------------------------------------
  1 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  2 | # Functions
  3 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  4 | #' Plot the qPCR results
  5 | #'
  6 | #' This function is to process the Cq file generated by the qPCR machine 
  7 | #' in our lab. 
  8 | #' @param qPCR_file Path to the Cq file from qPCR machine. Must be in csv format.
  9 | #' @param metadata_file Path to the metadata file to assign your samples into groups. Must be in csv format.
 10 | #' @param ref_gene The gene used for normalization. e.g. GAPDH. 
 11 | #' @param ref_sample The sample used as reference sample. e.g. sample from the control group.
 12 | #' @param file_name The output file name
 13 | #' @return There are three output files produced from this script. The csv file contains the average quantitative value (2^-ΔΔCt) for each sample (and each gene) after normalized by the reference gene (e.g. GAPDH) and the reference sample (e.g. sample from the control group). This file can be input into Graphpad Prism. The txt file includes all statistics from comparisons of any two given groups. If the run has two groups only, Welch's t test will be performed. Otherwise, one-way ANOVA with post-hoc Tukey's test will be performed. Finally, the tiff file is a boxplot graph to visualize the gene expression across groups. 
 14 | #' @export
 15 | plot_qpcr<-function(
 16 |   qPCR_file, 
 17 |   metadata_file, 
 18 |   ref_gene, 
 19 |   ref_sample, 
 20 |   file_name
 21 |   ){
 22 |   qPCR_data<-read.csv(qPCR_file)
 23 |   meta.data<-read.csv(metadata_file)
 24 |   meta.data[,'Sample']<-as.character(meta.data[,'Sample'])
 25 |   qPCR_data<-qPCR_data[,c('Target','Sample','Cq')]
 26 |   qPCR_data[,'Sample']<-as.character(qPCR_data[,'Sample'])
 27 |   all.genes<-unique(qPCR_data$Target)
 28 |   gene.exist<-ref_gene %in% all.genes
 29 |   if (!gene.exist)
 30 |     stop("Reference gene ID is not correct! Please double check!")
 31 |   all.samples<- unique(qPCR_data$Sample)
 32 |   sample.exist<-ref_sample %in% all.samples
 33 |   if (!sample.exist)
 34 |     stop("The calibrator sample ID is not correct! Please double check!")
 35 |   qPCR_data<-qPCR_data %>% na_if("") %>% na.omit
 36 |   qPCR_data<-qPCR_data %>% group_by(Sample, Target) %>% summarize(Mean = mean(Cq, na.rm=TRUE))
 37 |   genes<-setdiff(all.genes,ref_gene)
 38 |   data_rebuild<-dcast(qPCR_data, Sample ~ Target)
 39 |   data_rebuild<-data_rebuild[data_rebuild$Sample %in% meta.data$Sample,]
 40 |   new.data<-matrix(data = NA, ncol = length(genes), nrow = nrow(data_rebuild))
 41 |   for (i in 1: length(genes)){
 42 |     new.data[,i]<-data_rebuild[,genes[i]]-data_rebuild[,ref_gene]
 43 |   }
 44 |   new.data<-data.frame(new.data)
 45 |   colnames(new.data)<-genes
 46 |   new.data[,'Sample']<-data_rebuild[,'Sample']
 47 |   ref.ct<-new.data[new.data[,'Sample']==ref_sample,][-ncol(new.data)]
 48 |   ref.ct<-as.numeric(ref.ct)
 49 |   new.data2<-new.data[,-ncol(new.data)]
 50 |   new.data2<-data.frame(new.data2)
 51 |   for (i in 1: length(genes)){
 52 |     new.data2[,i]<-new.data[,genes[i]]-ref.ct[i]
 53 |   }
 54 |   new.data2<-2^(-new.data2)
 55 |   colnames(new.data2)<-genes
 56 |   new.data2[,'Sample']<-data_rebuild[,'Sample']
 57 |   new.data2<-merge(new.data2, meta.data, by = 'Sample')
 58 |   write.csv(new.data2, paste0(file_name, '_processed.csv'))
 59 |   groups<-unique(as.character(new.data2$Group))
 60 |   if (length(groups) != 1) {
 61 |     if (length(groups)<=2) {
 62 |       test_stat<-lapply(new.data2[c(-1, -ncol(new.data2))], function(x) t.test(x ~ new.data2$Group))
 63 |       sink(paste0(file_name, '_stats.txt'))
 64 |       print(test_stat)
 65 |       sink() 
 66 |     } else {
 67 |       new.data2$Group <- factor(new.data2$Group, levels= groups)
 68 |       formulae <- lapply(colnames(new.data2)[2:(ncol(new.data2)-1)], function(x) as.formula(paste0(x, " ~ Group")))
 69 |       res1 <- lapply(formulae, function(x) summary(aov(x, data = new.data2)))
 70 |       names(res1) <- format(formulae)
 71 |       res2 <- lapply(formulae, function(x) TukeyHSD(aov(x, data = new.data2)))
 72 |       names(res2) <- format(formulae)
 73 |       res<-c(res1, res2)
 74 |       sink(paste0(file_name, '_stats.txt'))
 75 |       print(res)
 76 |       sink() 
 77 |     }
 78 |   }
 79 |   new.data2<-melt(new.data2)
 80 |   group<-unique(as.character(meta.data$Group))
 81 |   new.data2[,'Group']<-factor(new.data2[,'Group'], levels = group)
 82 |   p1<-ggplot(new.data2, aes(x=Group, y=value))+
 83 |     stat_boxplot(geom ='errorbar', width = 0.2) +
 84 |     geom_boxplot( aes(Group, value, color=Group), width=0.5) +    
 85 |     geom_quasirandom()+
 86 |     ylab("Fold change")+xlab("")+ 
 87 |     theme(panel.background = element_rect(fill = "white", colour = "grey50"),
 88 |           strip.text = element_text(size = 12),axis.title = element_text(size = 12),
 89 |           axis.text.x = element_blank(),legend.position = "bottom", legend.text = element_text(size = 12), 
 90 |           legend.title = element_text(size = 12, face = 'bold'))+
 91 |     facet_wrap(~variable, scales = 'free_y', ncol = 2)
 92 |   num=length(genes)
 93 |   if (num ==1){
 94 |     tiff(paste0(file_name,'.tiff'), units="in", width=3, height=3, res=300)
 95 |     print(p1)
 96 |     invisible(dev.off())
 97 |   } else if (num ==2){
 98 |     tiff(paste0(file_name,'.tiff'), units="in", width=6, height=3, res=300)
 99 |     print(p1)
100 |     invisible(dev.off())
101 |   } else if (num>2 & (num %% 2) == 0) {
102 |     tiff(paste0(file_name,'.tiff'), units="in", width=6, height=3*(num%/%2), res=300)
103 |     print(p1)
104 |     invisible(dev.off())
105 |   } else {
106 |     tiff(paste0(file_name,'.tiff'), units="in", width=6, height=3*(1+num%/%2), res=300)
107 |     print(p1)
108 |     invisible(dev.off())
109 |   }
110 | }
111 | 


--------------------------------------------------------------------------------
/R/plot_heatmap.R:
--------------------------------------------------------------------------------
  1 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  2 | # Functions
  3 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  4 | #' Plot gene expression across groups using ComplexHeatmap
  5 | #'
  6 | #' This function is for identifying the group-specific genes in a selected celltype
  7 | #' and plot the expression of those genes in heatmap.
  8 | #'
  9 | #' @param seu_obj A complete Seurat object
 10 | #' @param celltype Cell types selected for gene plot.
 11 | #' @param group The group to show on x axis. One of the column names in meta.data.
 12 | #' @param gene_highlight Gene names showing on the rows. Default: all genes
 13 | #' @param logfc Fold change to select the genes
 14 | #' @param return_marker If TRUE, a list of specific gene will be returned.
 15 | #' @param col_fun Heatmap color key.
 16 | #' @return A ComplexHeatmap object or/and a gene list
 17 | #' @export
 18 | 
 19 | complex_heatmap_unique<-function(
 20 |   seu_obj,
 21 |   celltype,
 22 |   group,
 23 |   gene_highlight=NULL,
 24 |   logfc=0.5,
 25 |   return_marker=FALSE,
 26 |   col_fun = colorRamp2(c(-2, -1, 0, 1, 2), rev(c("#BF0080", "#CE6EAE", "#dddddd", "#6EAE6E", "#008000")))
 27 | ){
 28 | cell1<-subset(seu_obj, idents=celltype)
 29 | cell1<-SetIdent(cell1, value = group)
 30 | group_levels<-levels(seu_obj@meta.data[,group])
 31 | if (is.null(group_levels)){
 32 |   seu_obj@meta.data[,group] <-factor(seu_obj@meta.data[,group], levels = names(table(seu_obj@meta.data[,group])))
 33 |   group_levels<-levels(seu_obj@meta.data[,group])
 34 |   }
 35 | levels(cell1)<-group_levels
 36 | cell1_avg<-AverageExpression(cell1, verbose = F, return.seurat = F, assays = "RNA")
 37 | cell1_avg<-cell1_avg$RNA
 38 | cell1_avg<-data.frame(cell1_avg)
 39 | all_markers<-FindAllMarkers(cell1, min.pct = 0.1, logfc.threshold = logfc,verbose = F)
 40 | all_markers1<-all_markers[all_markers$avg_log2FC>0,]
 41 | all_markers2<-all_markers[all_markers$avg_log2FC<0,]
 42 | unique_list1<-list()
 43 | for(i in 1:length(group_levels)){
 44 |   group1<-all_markers1[all_markers1$cluster==group_levels[i],]
 45 |   group2<-all_markers1[all_markers1$cluster!=group_levels[i],]
 46 |   group1_unique<-setdiff(group1$gene, intersect(group1$gene, group2$gene))
 47 |   unique_list1[[i]]<-group1_unique
 48 | }
 49 | unique_list2<-list()
 50 | for(i in 1:length(group_levels)){
 51 |   group1<-all_markers2[all_markers2$cluster==group_levels[i],]
 52 |   group2<-all_markers2[all_markers2$cluster!=group_levels[i],]
 53 |   group1_unique<-setdiff(group1$gene, intersect(group1$gene, group2$gene))
 54 |   unique_list2[[i]]<-group1_unique
 55 | }
 56 | unique_list<-c(unique_list1,unique_list2)
 57 | unique_genes<-as.character(unlist(unique_list))
 58 | data_plot<-cell1_avg[unique_genes,]
 59 | gene_num<-c()
 60 | for(i in 1:length(unique_list)){
 61 |   gene_num[i]<-length(unique_list[[i]])
 62 | }
 63 | unique_list<-unique_list[which(gene_num!=0)]
 64 | col_split<-group_levels
 65 | col_split<-factor(col_split, levels = group_levels)
 66 | gene_groups<-c(paste0(group_levels, "_up"),paste0(group_levels, "_down"))
 67 | gene_groups<-gene_groups[which(gene_num!=0)]
 68 | row_split<-list()
 69 | for(i in 1:length(gene_groups)){
 70 |   row_split[[i]]<-rep(gene_groups[i], length(unique_list[[i]]))
 71 | }
 72 | row_split<-as.character(unlist(row_split))
 73 | row_split<-factor(row_split, levels = gene_groups)
 74 | term = list()
 75 | for(i in 1:length(gene_groups)){
 76 |   txt1<-as.character(unlist(strsplit(gene_groups[i], split = "_")))
 77 |   term[[i]]<-data.frame(txt=txt1, index=c(1,2))
 78 | }
 79 | names(term) = gene_groups
 80 | data_plot<-t(scale(t(data_plot)))
 81 | label1<-gene_highlight
 82 | if(is.null(label1)){
 83 |   label1=rownames(data_plot)
 84 | }
 85 | ht_opt$message = FALSE
 86 | ht<- Heatmap(data_plot,name = "mat", cluster_rows = F, cluster_columns = F, 
 87 |              column_title = NULL,col = col_fun, row_title = NULL,
 88 |              cluster_row_slices = FALSE, cluster_column_slices = FALSE,
 89 |              column_split  = col_split, row_split = row_split,
 90 |              column_gap = unit(0, "mm"), row_gap = unit(0, "mm"), 
 91 |              heatmap_legend_param = list(direction = "horizontal",title = celltype)
 92 | ) +
 93 |   rowAnnotation(link = anno_mark(at = match(label1,unique_genes), which = 'row',
 94 |                                  labels = label1, 
 95 |                                  labels_gp = gpar(fontsize = 10), padding = unit(1, "mm"))) +
 96 |   rowAnnotation(wc = anno_word_cloud(row_split, term, add_new_line = TRUE,
 97 |                                      value_range = c(1, 2), fontsize_range = c(12, 12)) ) 
 98 | ht=draw(ht,heatmap_legend_side = "top")
 99 | down_groups<-sum(grepl("down", gene_groups))
100 | gaps<-c(1:length(group_levels), 1:length(group_levels))
101 | gaps<-gaps[which(gene_num!=0)]
102 | for(i in 1:length(gene_groups)){
103 |   if(i<=down_groups){
104 |     decorate_heatmap_body("mat", row_slice = i, column_slice = 1, code = {
105 |       grid.rect(unit(gaps[i]-1, "npc"), unit(1, "npc"), 
106 |                 width = 1 * unit(1, "npc"),
107 |                 height = 1 * unit(1, "npc"),
108 |                 gp = gpar(lwd = 2, lty = 1, fill=NA, col='black'), just = c("left", "top") 
109 |       )
110 |     }
111 |     )
112 |   } else {
113 |     decorate_heatmap_body("mat", row_slice = i, column_slice = 1, code = {
114 |       grid.rect(unit(gaps[i]-1, "npc"), unit(1, "npc"), 
115 |                 width = 1 * unit(1, "npc"),
116 |                 height = 1 * unit(1, "npc"),
117 |                 gp = gpar(lwd = 2, lty = 1, fill=NA, col='blue'), just = c("left", "top") 
118 |       )
119 |     }
120 |     )
121 |   }
122 | }
123 | if(return_marker){
124 |   names(unique_list)<-gene_groups
125 |   return(unique_list)
126 | }
127 | }
128 | 


--------------------------------------------------------------------------------
/R/plot_umap.R:
--------------------------------------------------------------------------------
 1 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | # Functions
 3 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | #' Plot gene expression on umap
 5 | #'
 6 | #' This function can be used for plotting a single gene or multiple genes expression across 
 7 | #' different groups in a seurat featureplot format. 
 8 | #'
 9 | #' @param seu_obj A complete Seurat object.
10 | #' @param features Gene names to be plotted.
11 | #' @param group The group to show on y axis. One of the column names in meta.data.
12 | #' @param select Select the elements within the group to show.
13 | #' @param cols Change the color legend.
14 | #' @param label.size Change the label size.
15 | #' @param strip.color Colors for the strip background.
16 | #' @param pt.size Point size for each cell.
17 | #' @return A ggplot object
18 | #' @export
19 | 
20 | complex_featureplot<-function(
21 |   seu_obj, 
22 |   features, 
23 |   group, 
24 |   select = NULL, 
25 |   cols = NULL, 
26 |   label.size = 12, 
27 |   order = F, 
28 |   strip.color = NULL,
29 |   pt.size = 0.01
30 | ){
31 |   gene_count<-extract_gene_count(seu_obj,features = features, meta.groups = group)
32 |   if (is.null(levels(seu_obj@meta.data[,group]))){
33 |     seu_obj@meta.data[,group] <-factor(seu_obj@meta.data[,group], levels = names(table(seu_obj@meta.data[,group])))
34 |   }
35 |   group_level<-levels(seu_obj@meta.data[,group])
36 |   gene_count[,group]<-factor(gene_count[,group],
37 |                                     levels = group_level)
38 |   if(!is.null(select)){
39 |     gene_count<-gene_count[gene_count[, group] %in% select,]
40 |   }
41 |   colnames(gene_count)[which(colnames(gene_count)==group)]<-"group"
42 |   all_col<-setdiff(colnames(gene_count), features)
43 |   df_list<-list()
44 |   for(i in 1:length(features)){
45 |     df<-gene_count[, c(features[i], all_col)]
46 |     df$gene<-features[i]
47 |     colnames(df)[1]<-"Exp"
48 |     df$Exp<-rescale(df$Exp, to = c(0,5))
49 |     df_list[[i]]<-df
50 |   }
51 |   df_all<-do.call("rbind", df_list)
52 |   if(is.null(cols)){
53 |     cols=colorRampPalette(c('grey90','lemonchiffon1','indianred1','darkred'))(255)
54 |   }
55 |   df_all$gene<-factor(df_all$gene, levels=features)
56 |   if(order){
57 |     df_all$isExpr<-ifelse(df_all$Exp>0, "Yes", "NO")
58 |     p<-ggplot(df_all, aes(UMAP1, UMAP2))+geom_point(color="gray80",size=pt.size)+
59 |       geom_point(data = df_all[df_all$isExpr=="Yes",], aes(UMAP1, UMAP2, color=Exp), size=pt.size)
60 |   } else {
61 |     p<-ggplot(df_all, aes(UMAP1, UMAP2, color=Exp))+geom_point(size=pt.size)
62 |   }
63 |   p<-p +
64 |     scale_color_gradientn(colours  =  cols,
65 |                           na.value = "white", limits=c(quantile(df_all$Exp, 0,na.rm= T), quantile(df_all$Exp, 1,na.rm= T)),
66 |                           breaks = c(quantile(df_all$Exp, 0,na.rm= T), quantile(df_all$Exp, 1,na.rm= T)), labels = c("min","max"))+ 
67 |     theme(panel.background = element_rect(fill = "white", colour = "white"),
68 |           axis.ticks = element_blank(),
69 |           axis.line = element_blank(),
70 |           axis.text = element_blank(),
71 |           axis.title=element_blank(),
72 |           strip.text = element_text(size=label.size),
73 |           legend.title = element_blank(),
74 |           legend.key.size = unit(0.5, 'cm'),
75 |           legend.position="bottom")+
76 |     facet_grid(group ~ gene)
77 |   g <- change_strip_background(p, type = 'both',  strip.color = strip.color)
78 |   print(grid::grid.draw(g))
79 | }


--------------------------------------------------------------------------------
/R/plot_upset.R:
--------------------------------------------------------------------------------
  1 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  2 | # Functions
  3 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  4 | #' UpSet plot to visualize the number of unique and shared DEGs across group 
  5 | #'
  6 | #' This function takes Seurat object as input and visualize the genes that 
  7 | #' are unique to a particular group or shared by multiple groups. 
  8 | #' @param seu_obj A complete Seurat object.
  9 | #' @param celltype The cell type to analyze.
 10 | #' @param group Group factor in meta data.
 11 | #' @param logfc Log fold change to select the DEGs
 12 | #' @param min_size Minimal number of observations in an intersection for it to be included
 13 | #' @return An UpSet plot
 14 | #' @export
 15 | 
 16 | complex_upset_plot<-function(
 17 |   seu_obj, 
 18 |   celltype, 
 19 |   group,
 20 |   logfc = 0.5,
 21 |   min_size = 1
 22 | ){
 23 |   cell1<-subset(seu_obj, idents=celltype)
 24 |   cell1<-SetIdent(cell1, value = group)
 25 |   group_levels<-levels(seu_obj@meta.data[,group])
 26 |   if (is.null(group_levels)){
 27 |     seu_obj@meta.data[,group] <-factor(seu_obj@meta.data[,group], levels = names(table(seu_obj@meta.data[,group])))
 28 |     group_levels<-levels(seu_obj@meta.data[,group])
 29 |   }
 30 |   levels(cell1)<-group_levels
 31 |   all_markers<-FindAllMarkers(cell1, min.pct = 0.1, logfc.threshold = logfc,verbose = F)
 32 |   all_markers1<-all_markers[all_markers$avg_log2FC>0,]
 33 |   all_markers2<-all_markers[all_markers$avg_log2FC<0,]
 34 |   gene_list1<-list()
 35 |   for(i in 1:length(group_levels)){
 36 |     cluster_marker <- all_markers1[all_markers1$cluster == group_levels[i],]$gene
 37 |     cluster_marker <- data.frame("gene" = cluster_marker)
 38 |     cluster_marker$cell1 <- 1
 39 |     colnames(cluster_marker)[2] <- group_levels[i]
 40 |     gene_list1[[i]] <- cluster_marker
 41 |   }
 42 |   combined_data1 <- purrr::reduce(gene_list1, full_join)
 43 |   combined_data1[is.na(combined_data1)] <- 0
 44 |   gene_list2<-list()
 45 |   for(i in 1:length(group_levels)){
 46 |     cluster_marker <- all_markers2[all_markers2$cluster == group_levels[i],]$gene
 47 |     cluster_marker <- data.frame("gene" = cluster_marker)
 48 |     cluster_marker$cell1 <- 1
 49 |     colnames(cluster_marker)[2] <- group_levels[i]
 50 |     gene_list2[[i]] <- cluster_marker
 51 |   }
 52 |   combined_data2 <- purrr::reduce(gene_list2, full_join)
 53 |   combined_data2[is.na(combined_data2)] <- 0
 54 |   combined_data1$Direction<-"Upregulated"
 55 |   combined_data2$Direction<-"Downregulated"
 56 |   
 57 |   gene_count1<-data.frame(table(all_markers1$gene))
 58 |   colnames(gene_count1)[1]<-"gene"
 59 |   combined_data1<-merge(combined_data1, gene_count1, by='gene')
 60 |   combined_data1$Freq<-as.integer(combined_data1$Freq)
 61 |   combined_data1$type<-ifelse(combined_data1$Freq==1, "Unique","Shared")
 62 |   
 63 |   gene_count2<-data.frame(table(all_markers2$gene))
 64 |   colnames(gene_count2)[1]<-"gene"
 65 |   combined_data2<-merge(combined_data2, gene_count2, by='gene')
 66 |   combined_data2$Freq<-as.integer(combined_data2$Freq)
 67 |   combined_data2$type<-ifelse(combined_data2$Freq==1, "Unique","Shared")
 68 |   
 69 |   combined_data<-rbind(combined_data1, combined_data2)
 70 | 
 71 | 
 72 |   metadata<-data.frame(set=group_levels)
 73 |   metadata$color_col<-metadata$set
 74 |   upset(combined_data, group_levels, 
 75 |         base_annotations=list(
 76 |           "Unique or shared DEG"=intersection_size(
 77 |             counts=T,
 78 |             mapping=aes(fill=Direction),
 79 |             width=0.7,
 80 |             alpha=0.4
 81 |           ) + scale_fill_manual(values= c("blue", "orange"))+
 82 |             theme_void()+
 83 |             theme(legend.position = "top", legend.title = element_blank())
 84 |         ),
 85 |         set_sizes=(
 86 |           upset_set_size(
 87 |             geom=geom_bar(
 88 |               aes(fill=type, x=group),
 89 |               width=0.7
 90 |             ),
 91 |             position='right'
 92 |           )+ scale_fill_manual(values= c("hotpink",'green'))+theme_void()+
 93 |             theme(axis.line.x = element_line(colour = "black"),
 94 |                   axis.ticks.x =element_line(size = 0.5, color="black") ,
 95 |                   axis.ticks.length = unit(.05, "cm"),
 96 |                   axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 8))
 97 |         ),
 98 |         width_ratio=0.1,
 99 |         stripes=upset_stripes(
100 |           geom = geom_point(size=0.1),
101 |           mapping=aes(color=color_col),
102 |           data=metadata
103 |         ),
104 |         name = celltype,
105 |         min_size = min_size
106 |   )
107 | }
108 | 


--------------------------------------------------------------------------------
/R/plot_violin.R:
--------------------------------------------------------------------------------
  1 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  2 | # Functions
  3 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  4 | #' Violin plot for a single gene across groups
  5 | #'
  6 | #' This function generates violin plot(s) to compare the expression of a single gene across 
  7 | #' different groups or cell types. It is designed for visualizing a complicated scenario: 
  8 | #' Gene expression on multiple cell types and multiple conditions.
  9 | #'
 10 | #' @param seu_obj A complete Seurat object.
 11 | #' @param feature Gene name. Only one gene is allowed.
 12 | #' @param celltypes Cell types of interest. By default, all cell types are included.
 13 | #' @param groups Groups selected for plotting. Support multiple groups.
 14 | #' @param add.dot Whether or not to add points on the violins.
 15 | #' @param font.size Font size for the labels.
 16 | #' @param pt.size Point size for the data points on the violin.
 17 | #' @param splitby Group to split the gene expression. Only works when length(groups)==1.
 18 | #' @param alpha Point transparency. value from 0 to 1.
 19 | #' @param strip.color Colors for the strip background.
 20 | #' @return A ggplot object
 21 | #' @export
 22 | 
 23 | 
 24 | complex_vlnplot_single <- function(
 25 |   seu_obj,
 26 |   feature,
 27 |   celltypes=NULL,
 28 |   groups,
 29 |   add.dot = T,
 30 |   font.size=14,
 31 |   pt.size=0.1,
 32 |   splitby=NULL,
 33 |   alpha=0.5,
 34 |   strip.color=NULL
 35 | ){
 36 |   if(length(feature)>1){
 37 |     stop("Only one gene is allowed in this method. Please use complex_vlnplot_multiple if you want to plot multiple genes.")
 38 |   }
 39 |   if(is.null(celltypes)){
 40 |     celltypes = levels(seu_obj)
 41 |   } 
 42 |   gene_count<-extract_gene_count(seu_obj=seu_obj, features = feature, cell.types = celltypes, meta.groups = c(groups, splitby))
 43 |   allgroups<-c(groups,splitby )
 44 |   for(i in 1:length(allgroups)){
 45 |     if (is.null(levels(seu_obj@meta.data[,allgroups[i]]))){
 46 |       seu_obj@meta.data[,allgroups[i]] <-factor(seu_obj@meta.data[,allgroups[i]], levels = names(table(seu_obj@meta.data[,allgroups[i]])))
 47 |     }
 48 |     group_level<-levels(seu_obj@meta.data[,allgroups[i]])
 49 |     gene_count[,allgroups[i]]<-factor(gene_count[,allgroups[i]],
 50 |                                       levels = group_level)
 51 |   }
 52 |   set.seed(seed = 42)
 53 |   noise <- rnorm(n = length(x = gene_count[,feature])) / 100000 ## This follows the same data processing for VlnPlot in Seurat
 54 |   gene_count[, feature]<-gene_count[,feature]+noise
 55 |   gene_count$celltype<-factor(gene_count$celltype, levels = celltypes)
 56 |   if (length(groups)==1) {
 57 |     p<-ggplot(gene_count, aes_string(x = groups, y = feature, fill = groups)) +
 58 |       geom_violin(scale = 'width', adjust = 1, trim = TRUE, size=0.3, alpha=alpha, color="pink")+
 59 |       xlab("") +
 60 |       ylab("") +
 61 |       ggtitle(feature)+
 62 |       theme(panel.background = element_rect(fill = "white",colour = "black"),
 63 |             axis.title = element_text(size = font.size), 
 64 |             axis.text.x = element_text(size = font.size, angle = 45, hjust = 1, vjust = 1),
 65 |             axis.text.y = element_text(size=(font.size-2)),
 66 |             strip.text = element_text( size = font.size),
 67 |             legend.title = element_blank(),
 68 |             legend.position = 'none',
 69 |             plot.title = element_text(size=(font.size),face = "bold", hjust = 0.5))
 70 |     if(add.dot){
 71 |       p = p + geom_quasirandom(size=pt.size, alpha=0.2)
 72 |     }
 73 |     if(is.null(splitby)){
 74 |       p <- p + facet_wrap(~celltype, ncol = 1, strip.position = "right")
 75 |       g <- change_strip_background(p, type = 'right',  strip.color = strip.color)
 76 |       print(grid::grid.draw(g))
 77 |     } else {
 78 |       p<-p + facet_grid(as.formula(paste("celltype","~", splitby)), scales = "free_x") 
 79 |       g <- change_strip_background(p, type = 'both',  strip.color = strip.color)
 80 |       print(grid::grid.draw(g))
 81 |     }
 82 |     
 83 |   } else {
 84 |     if(is.null(splitby)){
 85 |     all_levels<-list()
 86 |     for(i in 1:length(groups)){
 87 |       if (is.null(levels(seu_obj@meta.data[,groups[i]]))){
 88 |         seu_obj@meta.data[,groups[i]] <-factor(seu_obj@meta.data[,groups[i]], levels = names(table(seu_obj@meta.data[,groups[i]])))
 89 |       }
 90 |       group_level<-levels(seu_obj@meta.data[,groups[i]])
 91 |       all_levels[[i]]<-group_level
 92 |     }
 93 |     all_levels<-as.character(unlist(all_levels))
 94 |     gene_count<-reshape2::melt(gene_count[,c(feature, groups, "celltype")], measure.vars  = groups)
 95 |     gene_count$value<-factor(gene_count$value, levels = all_levels)
 96 |     gene_count$celltype<-factor(gene_count$celltype, levels = celltypes)
 97 |     p<-ggplot(gene_count, aes_string(x="value", y=feature, fill="value"))+
 98 |       geom_violin(scale = 'width', adjust = 1, trim = TRUE, size=0.3, alpha=alpha, color="pink")+
 99 |       xlab("") + ylab("") + ggtitle(feature) +
100 |       theme(panel.background = element_rect(fill = "white",colour = "black"),
101 |             axis.title = element_text(size = font.size), 
102 |             axis.text.x = element_text(size = font.size, angle = 45, hjust = 1, vjust = 1),
103 |             axis.text.y = element_text(size=(font.size-2)),
104 |             strip.text = element_text( size = font.size),
105 |             legend.title = element_blank(),
106 |             legend.position = 'none',
107 |             plot.title = element_text(size=(font.size),face = "bold", hjust = 0.5))+
108 |       facet_grid(celltype~variable, scales = 'free_x')
109 |     if(add.dot){
110 |       p = p + geom_quasirandom(size=pt.size, alpha=0.2)
111 |     }
112 |     g <- change_strip_background(p, type = 'both',  strip.color = strip.color)
113 |     print(grid::grid.draw(g))
114 |     } else {
115 |       count_list<-list()
116 |       for(i in 1:length(groups)){
117 |         df1<-gene_count[, c(groups[i],splitby[i],feature, "celltype")]
118 |         colnames(df1)[1:2]<-c("group","split")
119 |         df1$new_group<-groups[i]
120 |         count_list[[i]]<-df1
121 |       }
122 |       new_count<-do.call("rbind", count_list)
123 |       new_count$celltype<-factor(new_count$celltype, levels = celltypes)
124 |       group_level<-list()
125 |       for(i in 1:length(groups)){
126 |         if (is.null(levels(seu_obj@meta.data[,groups[i]]))){
127 |           seu_obj@meta.data[,groups[i]] <-factor(seu_obj@meta.data[,groups[i]], levels = names(table(seu_obj@meta.data[,groups[i]])))
128 |         }
129 |         group_level[[i]]<-levels(seu_obj@meta.data[,groups[i]])
130 |       }
131 |       group_level<-as.character(unlist(group_level))
132 |       new_count$group<-factor(new_count$group, levels=group_level)
133 |       fill_x1<-grDevices::rainbow(length(groups), alpha = 0.5)
134 |       fill_x2<-list()
135 |       for(i in 1:length(splitby)){
136 |         n_col<-unique(gene_count[, splitby[i]])
137 |         fill_x2[[i]]<-scales::hue_pal(l=90)(length(n_col))
138 |       }
139 |       fill_x2<-as.character(unlist(fill_x2))
140 |       fill_x <- c(fill_x1, fill_x2)
141 |       fill_y <- scales::hue_pal(l=90)(length(celltypes))
142 |       p<- ggplot(new_count, aes_string(x="group", y=feature, fill="group"))+
143 |         geom_violin(scale = 'width', adjust = 1, trim = TRUE, size=0.3, alpha=alpha, color="pink")+
144 |         xlab("") + ylab("") + ggtitle(feature) +
145 |         theme(panel.background = element_rect(fill = "white",colour = "black"),
146 |               axis.title = element_text(size = font.size), 
147 |               axis.text.x = element_text(size = font.size, angle = 45, hjust = 1, vjust = 1),
148 |               axis.text.y = element_text(size=(font.size-2)),
149 |               strip.text = element_text( size = font.size),
150 |               legend.title = element_blank(),
151 |               legend.position = 'none',
152 |               plot.title = element_text(size=(font.size),face = "bold", hjust = 0.5)) +
153 |         facet_nested(celltype ~ new_group + split, scales = "free_x", 
154 |                      strip = strip_nested( background_x = elem_list_rect(fill = fill_x),
155 |                                            background_y = elem_list_rect(fill = fill_y)))
156 |       if(add.dot){
157 |         p = p + geom_quasirandom(size=pt.size, alpha=0.2)
158 |         print(p)
159 |       } else {
160 |         p
161 |       }
162 |     }
163 |   }
164 | }
165 | 
166 | #' Violin plot for multiple genes across groups
167 | #'
168 | #' This function generates violin plot(s) to compare the expression of multiple genes across 
169 | #' different groups or cell types. It is designed for visualizing a complicated scenario: 
170 | #' Gene expression of multiple genes on multiple cell types across groups.
171 | #'
172 | #' @param seu_obj A complete Seurat object
173 | #' @param features Gene name. Only one gene is allowed.
174 | #' @param celltypes Cell types of interest. By default, all cell types are included.
175 | #' @param group Only one groupID is allowed.
176 | #' @param add.dot Whether or not to add points on the violins.
177 | #' @param font.size Font size for the labels.
178 | #' @param pt.size Point size for the data points on the violin
179 | #' @param alpha Point transparency. value from 0 to 1.
180 | #' @param strip.color Colors for the strip background
181 | #' @return A ggplot object
182 | #' @export
183 | 
184 | complex_vlnplot_multiple <- function(
185 |   seu_obj,
186 |   features,
187 |   celltypes=NULL,
188 |   group,
189 |   add.dot = T,
190 |   font.size=12,
191 |   pt.size=0.1,
192 |   alpha=0.01,
193 |   strip.color = NULL
194 | ){
195 |   if(length(features)<2){
196 |     stop("At least two genes are required. For single gene violin plot, please use complex_vlnplot_single instead.")
197 |   }
198 |   if(length(group)>1){
199 |     stop("Use violin plot to show multiple genes in multiple group categories across multiple cell types will look too messy. Please use one group ID only.")
200 |   }
201 |   if(is.null(celltypes)){
202 |     celltypes = levels(seu_obj)
203 |   } 
204 |   gene_count<-extract_gene_count(seu_obj=seu_obj, features = features, cell.types = celltypes, meta.groups = group)
205 |   if (is.null(levels(seu_obj@meta.data[,group]))){
206 |     seu_obj@meta.data[,group] <-factor(seu_obj@meta.data[,group], levels = names(table(seu_obj@meta.data[,group])))
207 |   }
208 |   group_level<-levels(seu_obj@meta.data[,group])
209 |   gene_count[,group]<-factor(gene_count[,group],levels = group_level)
210 |   for(i in 1:length(features)){
211 |     set.seed(seed = 42)
212 |     noise <- rnorm(n = length(x = gene_count[,features[i]])) / 100000 ## This follows the same data processing for VlnPlot in Seurat
213 |     gene_count[, features[i]]<-gene_count[,features[i]]+noise
214 |   }
215 |   gene_count$Cell<-rownames(gene_count)
216 |   gene_count <- reshape2::melt(gene_count, id.vars = c("Cell","celltype",group), measure.vars = features,
217 |                                variable.name = "Genes", value.name = "Expr")
218 |   gene_count[, group]<-factor(gene_count[, group], levels = group_level)
219 |   gene_count[, "celltype"]<-factor(gene_count[, "celltype"], levels = celltypes)
220 |   p<-ggplot(gene_count, aes_string(group, "Expr", fill = group)) +
221 |     geom_violin(scale = 'width', adjust = 1, trim = TRUE, size=0.3, alpha=0.5, color="pink") +
222 |     scale_y_continuous(expand = c(0, 0), position="left", labels = function(x)
223 |       c(rep(x = "", times = length(x)-2), x[length(x) - 1], "")) +
224 |     theme(panel.background = element_rect(fill = "white",colour = "black"),
225 |           axis.title = element_text(size = font.size), 
226 |           axis.text.x = element_text(size = font.size, angle = 45, hjust = 1, vjust = 1),
227 |           axis.text.y = element_text(size=(font.size)),
228 |           strip.text = element_text( size = font.size),
229 |           legend.title = element_blank(),
230 |           legend.position = 'none') +
231 |     facet_grid(celltype~Genes, scales =  'free_x') +
232 |     xlab("") + ylab("")
233 |   if(add.dot){
234 |     p = p + geom_quasirandom(size=pt.size, alpha=alpha)
235 |   }
236 |   g <- change_strip_background(p, type = 'both',  strip.color = strip.color)
237 |   print(grid::grid.draw(g))
238 | }
239 | 
240 | 


--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
  1 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  2 | # Functions
  3 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  4 | #' Generate the files required by CellPhoneDB
  5 | #'
  6 | #' This function can generate the files used as input into CellPhoneDB analysis. 
  7 | #' Two files will be created: normalized count matrix with the designated group
  8 | #' and the metadata to descript each cell.
  9 | #'
 10 | #' @param seu_obj A complete Seurat object
 11 | #' @param group The group selected for analysis
 12 | #' @return Output two txt files for CellPhoneDB
 13 | #' @export
 14 | creat_cellphonedb_file <- function(seu_obj, group){
 15 |   counts <- as.data.frame(
 16 |     as.matrix(
 17 |       seu_obj@assays$RNA@data)
 18 |   )
 19 |   mouse_genes<-rownames(counts)
 20 |   human <- useMart("ensembl", dataset = "hsapiens_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") 
 21 |   mouse <- useMart("ensembl", dataset = "mmusculus_gene_ensembl", host = "https://dec2021.archive.ensembl.org/")
 22 |   genesV2 = getLDS(attributes = c("mgi_symbol"), filters = "mgi_symbol", 
 23 |                    values = mouse_genes , mart = mouse, attributesL = c("hgnc_symbol",'ensembl_gene_id'), 
 24 |                    martL = human, uniqueRows=F)
 25 |   genesV2$Gene.stable.ID<-as.character(genesV2$Gene.stable.ID)
 26 |   genesV2$MGI.symbol<-as.character(genesV2$MGI.symbol)
 27 |   genesV2<-genesV2[!duplicated(genesV2$Gene.stable.ID),]
 28 |   genesV2<-genesV2[!duplicated(genesV2$MGI.symbol),]
 29 |   counts<-counts[genesV2$MGI.symbol,]
 30 |   rownames(counts)<-genesV2$Gene.stable.ID
 31 |   metadata <- data.frame(Cell = colnames(counts),
 32 |                          cell_type = as.character(seu_obj@active.ident)
 33 |   )
 34 |   metadata$Cell<-gsub('\\-','\\.',metadata$Cell)
 35 |   counts<-na.omit(counts)
 36 |   counts$Gene<-rownames(counts)
 37 |   counts<-counts[,c(ncol(counts), 1:(ncol(counts)-1))]
 38 |   colnames(counts)<-gsub('\\-','\\.',colnames(counts))
 39 |   
 40 |   write.table(counts,
 41 |               file =paste0(group, '_counts.txt'),
 42 |               quote = F,
 43 |               col.names = T,
 44 |               row.names = F,
 45 |               sep = '\t')
 46 |   
 47 |   write.table(metadata,
 48 |               file = paste0(group, '_metadata.txt'),
 49 |               quote = F,
 50 |               col.names = T,
 51 |               row.names = F,
 52 |               sep = '\t')
 53 |   
 54 | }
 55 | 
 56 | #' Generate the files required by pySCENIC
 57 | #'
 58 | #' This function can generate the file used as input into pySCENIC analysis. 
 59 | #' A loom file will be created.
 60 | #'
 61 | #' @param seu_obj A complete Seurat object
 62 | #' @param celltypes The cell types being selected for analysis
 63 | #' @param min.gene Cutoff to filter the low expression genes
 64 | #' @return A loom file
 65 | #' @export
 66 | create_pyscenic_file <- function (seu_obj, celltypes, min.gene = 1) {
 67 |   seu_sub<-subset(seu_obj, idents=celltypes)
 68 |   sub_count<-seu_sub@assays$RNA@counts
 69 |   sub_count<-sub_count[rowSums(sub_count) > min.gene,]
 70 |   seu_sub<-subset(seu_sub, features=rownames(sub_count))
 71 |   SaveH5Seurat(seu_sub, filename = "seu_sub.h5Seurat")
 72 |   Convert("seu_sub.h5Seurat", dest = "h5ad")
 73 |   cat("Please run the following lines in python.\n
 74 |       import scanpy as sc \n
 75 |       import numpy as np \n
 76 |       import loompy as lp \n
 77 |       adata = sc.read(\"seu_sub.h5ad\") \n
 78 |       row_attrs = { \n
 79 |       \"Gene\": np.array(adata.var_names), \n
 80 |       } \n
 81 |       col_attrs = { \n
 82 |      \"CellID\": np.array(adata.obs_names), \n
 83 |      \"nGene\": np.array(np.sum(adata.X.transpose()>0, axis=0)).flatten(), \n
 84 |       \"nUMI\": np.array(np.sum(adata.X.transpose(),axis=0)).flatten(), \n
 85 |       } \n
 86 |       lp.create(\"seu_sub_filtered.loom\",adata.X.transpose(),row_attrs, \n
 87 |           col_attrs) \n
 88 |   ")
 89 | }
 90 | 
 91 | 
 92 | #' Compute and plot correlations between two datasets
 93 | #'
 94 | #' This function can compute the correlations on the samples from two count matrix.
 95 | #' It will use Pearson method as default.
 96 | #'
 97 | #' @param data1 The first data frame with genes in rows and samples in columns
 98 | #' @param data2 The second data frame with genes in rows and samples in columns
 99 | #' @param ngenes Number of top variable genes used for the computation
100 | #' @param method.use Default is "pearson". Other methods include kendall" or "spearman". See ?cor.
101 | #' @param color.use Color palette for the heatmap plot.
102 | #' @return A heatmap plot
103 | #' @export
104 | run_correlation<-function(
105 |   data1, 
106 |   data2, 
107 |   ngenes=2000, 
108 |   method.use='pearson', 
109 |   color.use=NULL
110 |   ){
111 |   genes1<-rownames(data1)
112 |   genes2<-rownames(data2)
113 |   comm_genes<-intersect(genes1, genes1)
114 |   data1<-data1[comm_genes,]
115 |   data2<-data2[comm_genes,]
116 |   merged<-cbind(data1, data2)
117 |   datExpr<-as.matrix(merged)
118 |   Pvars <- rowVars(datExpr)
119 |   select <- order(Pvars, decreasing = TRUE)[seq_len(min(ngenes, length(Pvars)))]
120 |   clust_select<-datExpr[select,]
121 |   pearsonplot<-cor(clust_select, use="complete.obs", method=method.use)
122 |   cell1<-length(colnames(data1))
123 |   cell2<-length(colnames(data2))
124 |   pearsonplot<-pearsonplot[1:cell1, (1+cell1):ncol(pearsonplot)]
125 |   pearsonplot<-round (pearsonplot, 2)
126 |   pearsonplot<-as.matrix(pearsonplot)
127 |   colors = color.use
128 |   if(is.null(colors)){
129 |     colors<-colorRampPalette(c("lemonchiffon1","white","darkred"))(255)
130 |   }
131 |   pearsonplot[pearsonplot<0]<-0
132 |   pheatmap(pearsonplot, col=colors, display_numbers = F, cluster_cols = F, cluster_rows = F, fontsize = 8)
133 | }
134 | 
135 | #' A function to convert ensembl ID to gene name
136 | #'
137 | #' This function is for replace the ensembl ID with actual gene names
138 | #'
139 | #' @param count Count data input
140 | #' @param species value can be "mouse" or "human"
141 | #' @return A new count data with external gene names in the rows
142 | #' @export
143 | 
144 | convert_geneid <- function(
145 |   count, 
146 |   species
147 |   ){
148 |   if (species == "mouse") {
149 |     mart = useMart("ensembl", dataset = "mmusculus_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") 
150 |   }
151 |   else if (species == "human") {
152 |     mart = useMart("ensembl", dataset = "hsapiens_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") 
153 |   }
154 |   else {
155 |     print("Please specify a species!")
156 |   }
157 |   results <- getBM(attributes = c("ensembl_gene_id", "external_gene_name"), 
158 |                    mart = mart)
159 |   results$external_gene_name[results$external_gene_name==""]<-NA
160 |   results<-na.omit(results)
161 |   all1 <- data.frame(as.matrix(count))
162 |   all1$gene <- rownames(count)
163 |   annotate2 <- results[which(results$ensembl_gene_id %in% all1$gene), 
164 |   ]
165 |   all1 <- all1[annotate2$ensembl_gene_id, ]
166 |   all1 <- cbind(all1, annotate2)
167 |   all1 <- all1[!duplicated(all1$external_gene_name), ]
168 |   rownames(all1) <- all1$external_gene_name
169 |   new_count <- all1[, -c((ncol(all1) - 2):ncol(all1))]
170 |   new_count <- new_count[-nrow(new_count), ]
171 |   new_count
172 | }
173 | 
174 | #' A function to process the h5 file from CellBender and generate QC plots
175 | #'
176 | #' This function works only with the h5 file output from CellBender. It takes the 
177 | #' count table, clusters the cells, removes the doublets (DoubletFinder), and 
178 | #' recluster the cells, and finally output a Seurat object
179 | #'
180 | #' @param cellbender_h5 The h5 file from CellBender output
181 | #' @param sampleID ID given to the run
182 | #' @param out_dir Output directory
183 | #' @param species It can be "human" or "mouse"
184 | #' @param type It can be "cell" or "nucleus". If your data is from snRNA-seq, use "type = nucleus". Otherwise, use "type = cell".
185 | #' @return QC plots and seurat objects before and after QC
186 | #' @export
187 | 
188 | data_processing <- function(
189 |   cellbender_h5, 
190 |   sampleID, 
191 |   out_dir, 
192 |   species,
193 |   type
194 |   ){
195 |   new_df<-Read10X_h5(cellbender_h5)
196 |   colnames(new_df)<-paste(sampleID, colnames(new_df), sep="_")
197 |   ##### 1.initial QC and clustering ####
198 |   Sample4<- CreateSeuratObject(counts = new_df, project = sampleID, min.cells = 3, min.features = 200)
199 |   metadata<-Sample4@meta.data
200 |   png(paste0(out_dir,sampleID, "_counts_histogram_before_clean.png"), units="in", width=6, height=4, res=300)
201 |   hist(
202 |     metadata$nFeature_RNA,
203 |     breaks = 1000
204 |   )
205 |   dev.off()
206 |   if(species=="human"){
207 |     Sample4[["percent.mt"]] <- PercentageFeatureSet(Sample4, pattern = "^MT-")
208 |   } else {
209 |     Sample4[["percent.mt"]] <- PercentageFeatureSet(Sample4, pattern = "^mt-")
210 |   }
211 |   png(paste0(out_dir,sampleID, "_counts_QC_before_clean.png"), units="in", width=8, height=4, res=300)
212 |   print(VlnPlot(Sample4, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1))
213 |   dev.off()
214 |   if(type=="nucleus"){
215 |     Sample4<- subset(Sample4, subset = nFeature_RNA > 500 & nFeature_RNA < 6000 & percent.mt < 5)
216 |   } else {
217 |     Sample4<- subset(Sample4, subset = nFeature_RNA > 500 & nFeature_RNA < 6000 & percent.mt < 50)
218 |   }
219 |   Sample4<- NormalizeData(Sample4)
220 |   Sample4<- FindVariableFeatures(Sample4, selection.method = "vst", nfeatures = 2000)
221 |   all.genes <- rownames(Sample4)
222 |   Sample4<- ScaleData(Sample4, features = all.genes)
223 |   Sample4<- RunPCA(Sample4, features = VariableFeatures(object = Sample4), npcs = 50)
224 |   png(paste0(out_dir,sampleID, "_PCA_elbowplot_before_clean.png"), units="in", width=6, height=4, res=300)
225 |   print(ElbowPlot(Sample4, ndims = 50))
226 |   dev.off()
227 |   Sample4<- FindNeighbors(Sample4, dims = 1:30)
228 |   Sample4<- FindClusters(Sample4, resolution = 0.5)
229 |   Sample4<- RunUMAP(Sample4, dims = 1:30)
230 |   Sample4<- CreateSeuratObject(counts = Sample4@assays$RNA@counts, project = sampleID, min.cells = 3, min.features = 200)
231 |   metadata<-Sample4@meta.data
232 |   png(paste0(out_dir,sampleID, "_counts_histogram_before_clean.png"), units="in", width=6, height=4, res=300)
233 |   hist(
234 |     metadata$nFeature_RNA,
235 |     breaks = 1000
236 |   )
237 |   dev.off()
238 |   if(species=="human"){
239 |     Sample4[["percent.mt"]] <- PercentageFeatureSet(Sample4, pattern = "^MT-")
240 |   } else {
241 |     Sample4[["percent.mt"]] <- PercentageFeatureSet(Sample4, pattern = "^mt-")
242 |   }
243 |   png(paste0(out_dir,sampleID, "_counts_QC_before_clean.png"), units="in", width=8, height=4, res=300)
244 |   print(VlnPlot(Sample4, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1))
245 |   dev.off()
246 |   Sample4<- NormalizeData(Sample4)
247 |   Sample4<- FindVariableFeatures(Sample4, selection.method = "vst", nfeatures = 2000)
248 |   all.genes <- rownames(Sample4)
249 |   Sample4<- ScaleData(Sample4, features = all.genes)
250 |   Sample4<- RunPCA(Sample4, features = VariableFeatures(object = Sample4), npcs = 50)
251 |   png(paste0(out_dir,sampleID, "_PCA_elbowplot_before_clean.png"), units="in", width=6, height=4, res=300)
252 |   print(ElbowPlot(Sample4, ndims = 50))
253 |   dev.off()
254 |   Sample4<- FindNeighbors(Sample4, dims = 1:30)
255 |   Sample4<- FindClusters(Sample4, resolution = 0.5)
256 |   Sample4<- RunUMAP(Sample4, dims = 1:30)
257 |   Sample4<-RunTSNE(Sample4, dims = 1:30, perplexity=50)
258 |   png(paste0(out_dir,sampleID, "_UMAP_before_clean.png"), units="in", width=6, height=4, res=300)
259 |   print(DimPlot(Sample4, reduction = "umap"))
260 |   dev.off()
261 |   png(paste0(out_dir,sampleID, "_tSNE_before_clean.png"), units="in", width=6, height=4, res=300)
262 |   print(DimPlot(Sample4, reduction = "tsne"))
263 |   dev.off()
264 |   genes_select<-c('Nphs1','Slc5a12',"Slc7a12",'Slc12a1',"Slc12a3","Emcn",'Fhl2',"Tnc","Aqp2","Ptprc","Slc26a4",'Kit')
265 |   if(species=="human"){
266 |   genes_select<-toupper(genes_select)
267 |   }
268 |   png(paste0(out_dir,sampleID, "_markers_before_clean.png"), units="in", width=15, height=15, res=300)
269 |   print(FeaturePlot(Sample4, features = genes_select, reduction = 'tsne'))
270 |   dev.off()
271 |   png(paste0(out_dir,sampleID, "_nGene_tSNE_before_clean.png"), units="in", width=6, height=4, res=300)
272 |   print(FeaturePlot(Sample4, features = "nFeature_RNA", reduction = 'tsne'))
273 |   dev.off()
274 |   saveRDS(Sample4, file = paste0(out_dir,sampleID,'_seurat.rds'))
275 |   Sys.time()
276 |   print(paste(sampleID,"initial clustering done!", sep = ":"))
277 |   
278 |   ### 2.doubleFinder to remove doublets ###
279 |   doublet_rate<-0.018+8.4e-06*ncol(Sample4)  ##This function is based on the data from the Demuxlet paper##
280 |   ndoublets<-doublet_rate*ncol(Sample4)
281 |   Sample4<-doubletFinder_v3(Sample4,PCs = 1:30, nExp = ndoublets, pK = 0.09)
282 |   png(paste0(out_dir,sampleID, "_doublets_tSNE.png"), units="in", width=6, height=4, res=300)
283 |   print(TSNEPlot(Sample4, group.by=paste0('DF.classifications_0.25_0.09_', ndoublets)))
284 |   dev.off()
285 |   Sample4<-SetIdent(Sample4, value = paste0('DF.classifications_0.25_0.09_', ndoublets))
286 |   Sample4_clean<-subset(Sample4, idents = 'Singlet')
287 |   Sys.time()
288 |   print(paste(sampleID,"doubletFinder done!", sep = ":"))
289 |   
290 |   ### 3.recluster the singlets ####
291 |   Sample4_clean <- CreateSeuratObject(counts = Sample4_clean@assays$RNA@counts, project = sampleID, min.cells = 3, min.features = 200)
292 |   Sample4_clean
293 |   metadata<-Sample4_clean@meta.data
294 |   png(paste0(out_dir,sampleID, "_counts_histogram_after_clean.png"), units="in", width=6, height=4, res=300)
295 |   hist(
296 |     metadata$nFeature_RNA,
297 |     breaks = 1000
298 |   )
299 |   dev.off()
300 |   Sample4_clean[["percent.mt"]] <- PercentageFeatureSet(Sample4_clean, pattern = "^mt-")
301 |   png(paste0(out_dir,sampleID, "_counts_QC_after_clean.png"), units="in", width=8, height=4, res=300)
302 |   print(VlnPlot(Sample4_clean, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1))
303 |   dev.off()
304 |   Sample4_clean <- NormalizeData(Sample4_clean)
305 |   Sample4_clean <- FindVariableFeatures(Sample4_clean, selection.method = "vst", nfeatures = 2000)
306 |   all.genes <- rownames(Sample4_clean)
307 |   Sample4_clean <- ScaleData(Sample4_clean, features = all.genes)
308 |   Sample4_clean <- RunPCA(Sample4_clean, features = VariableFeatures(object = Sample4_clean), npcs = 50)
309 |   png(paste0(out_dir,sampleID, "_PCA_elbowplot_after_clean.png"), units="in", width=6, height=4, res=300)
310 |   print(ElbowPlot(Sample4_clean, ndims = 50))
311 |   dev.off()
312 |   Sample4_clean <- FindNeighbors(Sample4_clean, dims = 1:25)
313 |   Sample4_clean <- FindClusters(Sample4_clean, resolution = 0.6)
314 |   Sample4_clean <- RunUMAP(Sample4_clean, dims = 1:25, min.dist = 0.6, spread = 3)
315 |   print(paste(sampleID,"final clustering done!", sep = ":"))
316 |   png(paste0(out_dir,sampleID, "_UMAP_after_clean.png"), units="in", width=6, height=4, res=300)
317 |   print(DimPlot(Sample4_clean, reduction = "umap", label = T))
318 |   dev.off()
319 |   png(paste0(out_dir,sampleID, "_markers_after_clean.png"), units="in", width=15, height=15, res=300)
320 |   print(FeaturePlot(Sample4_clean, features = genes_select))
321 |   dev.off()
322 |   png(paste0(out_dir,sampleID, "_nGene_UMAP_after_clean.png"), units="in", width=6, height=4, res=300)
323 |   print(FeaturePlot(Sample4_clean, features = 'nFeature_RNA'))
324 |   dev.off()
325 |   saveRDS(Sample4_clean, file=paste0(out_dir,sampleID,'_seurat_clean.rds'))
326 |   print(paste(sampleID,"all done!", sep = ":"))
327 |   Sys.time()
328 | }
329 | 
330 | 
331 | #' A function to extract gene counts for ploting
332 | #'
333 | #' This function is a modified Seurat::FetchData function to extract gene 
334 | #' counts and the associated meta data for ploting. It returns a dataframe
335 | #' with the requested information from the Seurat object.
336 | #'
337 | #' @param seu_obj A finished Seurat Object with cell type annotation in the active.ident slot
338 | #' @param features Gene names to extract expression data
339 | #' @param cell.types The cell types to be inspected. By default, it will incorporate all cell types.
340 | #' @param data.type The data slot to be accessed. By default, the "data" slot will be used.
341 | #' @param meta.groups The colnames in the meta.data slot you want to include.
342 | #' @return A data frame with the requested info.
343 | #' @export
344 | #' 
345 | extract_gene_count <- function(
346 |   seu_obj, 
347 |   features, 
348 |   cell.types=NULL, 
349 |   data.type="data", 
350 |   meta.groups=NULL
351 |  ){
352 |   if(is.null(cell.types)){
353 |     cell.types=levels(seu_obj)
354 |   }
355 |   seu_obj@meta.data$celltype<-as.character(seu_obj@active.ident)
356 |   if(is.null(meta.groups)){
357 |     meta.groups=colnames(seu_obj@meta.data)
358 |   }
359 |   if(!is.null(cell.types)){
360 |     new_seu<-subset(seu_obj, idents=cell.types)
361 |   }
362 |   feature_count<-Seurat::FetchData(new_seu, slot = data.type, vars = c(features,meta.groups,"celltype"))
363 |   umap_data<-data.frame(new_seu[["umap"]]@cell.embeddings)
364 |   feature_count$UMAP1<-umap_data$UMAP_1
365 |   feature_count$UMAP2<-umap_data$UMAP_2
366 |   feature_count
367 | }
368 | 
369 | 
370 | #' A function to make gene name first letter capital
371 | #'
372 | #' The function is modified from this thread: https://stackoverflow.com/questions/18509527/first-letter-to-upper-case/18509816
373 | #'
374 | #' @param gene Gene name
375 | #' @export
376 | #' 
377 | 
378 | firstup <- function(
379 |   gene
380 |   ){
381 |   x <- tolower(gene)
382 |   substr(x, 1, 1) <- toupper(substr(x, 1, 1))
383 |   x
384 | }
385 | 
386 | #' A function to change the strip background color in ggplot
387 | #' @param ggplt_obj A ggplot object
388 | #' @param type Strip on the "top" or "right" side only or "both" sides
389 | #' @param strip.color A color vector
390 | #' @export
391 | #' 
392 | change_strip_background <- function(
393 |   ggplt_obj, 
394 |   type = "top",
395 |   strip.color=NULL
396 |   ){
397 |   g <- ggplot_gtable(ggplot_build(ggplt_obj))
398 |   if(type == "top"){
399 |     strip_both <- which(grepl('strip-t', g$layout$name))
400 |     fills<-strip.color
401 |     if(is.null(fills)){
402 |     fills<- scales::hue_pal(l=90)(length(strip_both))
403 |     }
404 |   } else if(type=="right"){
405 |     strip_both <- which(grepl('strip-r', g$layout$name))
406 |     fills<-strip.color
407 |     if(is.null(fills)){
408 |       fills<- scales::hue_pal(l=90)(length(strip_both))
409 |     }
410 |   } else {
411 |     strip_t <- which(grepl('strip-t', g$layout$name))
412 |     strip_r <- which(grepl('strip-r', g$layout$name))
413 |     strip_both<-c(strip_t, strip_r)
414 |     fills<-strip.color
415 |     if(is.null(fills)){
416 |       fills<- c(scales::hue_pal(l=90)(length(strip_t)),scales::hue_pal(l=90)(length(strip_r)))
417 |     }
418 |   }
419 |   k <- 1
420 |   for (i in strip_both) {
421 |     j <- which(grepl('rect', g$grobs[[i]]$grobs[[1]]$childrenOrder))
422 |     g$grobs[[i]]$grobs[[1]]$children[[j]]$gp$fill <- fills[k]
423 |     k <- k+1
424 |   }
425 |   g
426 | }
427 | 
428 | #' A function to generate an example dataset for demo
429 | #' @export
430 | Install.example<-function(){
431 |   getGEOSuppFiles(GEO = "GSE139107")
432 |   setwd("GSE139107/")
433 |   all_files<-list.files(pattern = "dge")
434 |   all_ct<-list()
435 |   for(i in 1:length(all_files)){
436 |     ct_data<-read.delim(all_files[i])
437 |     ct_data<-Matrix(as.matrix(ct_data), sparse = T)
438 |     all_ct[[i]]<-ct_data
439 |     print(all_files[i])
440 |   }
441 |   all.count<-RowMergeSparseMatrices(all_ct[[1]], all_ct[-1])
442 |   meta.data<-read.delim('GSE139107_MouseIRI.metadata.txt.gz')
443 |   all.count<-all.count[,rownames(meta.data)]
444 |   iri <- CreateSeuratObject(counts = all.count, min.cells = 0, min.features = 0, meta.data = meta.data)
445 |   iri.list <- SplitObject(iri, split.by = "orig.ident")
446 |   iri.list <- lapply(X = iri.list, FUN = function(x) {
447 |     x <- NormalizeData(x, verbose = FALSE)
448 |     x <- FindVariableFeatures(x, verbose = FALSE)
449 |   })
450 |   features <- SelectIntegrationFeatures(object.list = iri.list)
451 |   iri.list <- lapply(X = iri.list, FUN = function(x) {
452 |     x <- ScaleData(x, features = features, verbose = FALSE)
453 |     x <- RunPCA(x, features = features, verbose = FALSE)
454 |   })
455 |   anchors <- FindIntegrationAnchors(object.list = iri.list, reference = c(1, 2), reduction = "rpca",
456 |                                     dims = 1:50)
457 |   iri.integrated <- IntegrateData(anchorset = anchors, dims = 1:50)
458 |   iri.integrated <- ScaleData(iri.integrated, verbose = FALSE)
459 |   iri.integrated <- RunPCA(iri.integrated, verbose = FALSE)
460 |   iri.integrated <- RunUMAP(iri.integrated, dims = 1:25, min.dist = 0.2)
461 |   iri.integrated<-SetIdent(iri.integrated, value = 'celltype')
462 |   levels(iri.integrated)<-c("PTS1"  ,  "PTS2"  ,  "PTS3", "NewPT1", "NewPT2",
463 |                             "DTL-ATL", "MTAL", "CTAL1" ,  "CTAL2","MD","DCT" ,
464 |                             "DCT-CNT","CNT","PC1","PC2","ICA","ICB","Uro","Pod","PEC",
465 |                             "EC1","EC2","Fib","Per","Mø","Tcell")
466 |   levels(iri.integrated@meta.data$Group)<-c("Control","4hours","12hours","2days","14days","6weeks" )
467 |   DefaultAssay(iri.integrated)<-"RNA"
468 |   setwd("../")
469 |   unlink("GSE139107/",recursive=TRUE)
470 |   iri.integrated
471 | }
472 | 
473 | #' A function to order genes upregulated from the first to the last column
474 | #' @param df A data frames with genes in row and samples in column
475 | #' @export
476 | order_gene_up<-function(df){
477 |   min.col <- function(m, ...) max.col(-m, ...)
478 |   df$celltype<-colnames(df)[min.col(df,ties.method="first")]
479 |   df$celltype<-factor(df$celltype, levels = names(df))
480 |   df<-df[order(df$celltype),]
481 |   gene.names<-list()
482 |   for (i in names(df)){
483 |     aa<-df[df$celltype==i,]
484 |     aa<-aa[order(aa[,i],decreasing = T),]
485 |     gene.names[[i]]<-rownames(aa)
486 |   }
487 |   gene.names<-as.character(unlist(gene.names))
488 |   return(gene.names)
489 | }
490 | 
491 | #' A function to order genes downregulated from the first to the last column
492 | #' @param df A data frames with genes in row and samples in column
493 | #' @export
494 | order_gene_down<-function(df){
495 |   df$celltype<-colnames(df)[max.col(df,ties.method="first")]
496 |   df$celltype<-factor(df$celltype, levels = names(df))
497 |   df<-df[order(df$celltype),]
498 |   gene.names<-list()
499 |   for (i in names(df)){
500 |     aa<-df[df$celltype==i,]
501 |     aa<-aa[order(aa[,i],decreasing = F),]
502 |     gene.names[[i]]<-rownames(aa)
503 |   }
504 |   gene.names<-as.character(unlist(gene.names))
505 |   return(gene.names)
506 | }
507 | 
508 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # plot1cell: a package for advanced single cell data visualization
  2 | This R package allows users to visualize the single cell data on the R object or output files generated by Seurat. It is currently under active development. 
  3 | 
  4 | ## Installation
  5 | ```plot1cell``` R package can be easily installed from Github using devtools. Please make sure you have installed <a href="https://satijalab.org/seurat/">Seurat 4.0</a>, <a href="https://github.com/jokergoo/circlize">circlize</a> and <a href="https://github.com/jokergoo/ComplexHeatmap">ComplexHeatmap</a> packages.
  6 | 
  7 | ```R
  8 | devtools::install_github("TheHumphreysLab/plot1cell")
  9 | ## or the development version, devtools::install_github("HaojiaWu/plot1cell")
 10 | 
 11 | ## You might need to install the dependencies below if they are not available in your R library.
 12 | bioc.packages <- c("biomaRt","GenomeInfoDb","EnsDb.Hsapiens.v86","GEOquery","simplifyEnrichment","ComplexHeatmap")
 13 | BiocManager::install(bioc.packages)
 14 | dev.packages <- c("chris-mcginnis-ucsf/DoubletFinder","Novartis/hdf5r","mojaveazure/loomR")
 15 | devtools::install_github(dev.packages)
 16 | ## If you can't get the hdf5r package installed, please see the fix here:
 17 | ## https://github.com/hhoeflin/hdf5r/issues/94
 18 | 
 19 | ```
 20 | 
 21 | ## Usage
 22 | We provide some example codes to help generate figures from user's provided Seurat object. The Seurat object input to ```plot1cell``` should be a final object with complete clustering and cell type annotation. If a seurat object is not available, we suggest to use the demo data from Satija's lab (https://satijalab.org/seurat/articles/integration_introduction.html). To demonstrate the plotting functions in plot1cell, we re-created a Seurat object from our recent paper <a href="https://www.pnas.org/doi/10.1073/pnas.2005477117">Kirita et al, PNAS 2020</a> by integrating the count matrices we uploaded to GEO ([GSE139107](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE139107)).
 23 | ```R
 24 | library(plot1cell)
 25 | iri.integrated <- Install.example() 
 26 | 
 27 | # Please note that this Seurat object is just for demo purpose and 
 28 | # is not exactly the same as the one we published on PNAS.
 29 | # It takes about 2 hours to run in a linux server with 500GB RAM and 32 CPU cores.
 30 | # You can skip this step and use your own Seurat object instead
 31 | ```
 32 | 
 33 | ### 1. Circlize plot to visualize cell clustering and meta data
 34 | This circlize plot was inspired by the data visualization in a published paper (Figure1, https://www.nature.com/articles/s41586-021-03775-x) from Linnarsson's lab.
 35 | 
 36 | ```R
 37 | ###Check and see the meta data info on your Seurat object
 38 | colnames(iri.integrated@meta.data)  
 39 | 
 40 | ###Prepare data for ploting
 41 | circ_data <- prepare_circlize_data(iri.integrated, scale = 0.8 )
 42 | set.seed(1234)
 43 | cluster_colors<-rand_color(length(levels(iri.integrated)))
 44 | group_colors<-rand_color(length(names(table(iri.integrated$Group))))
 45 | rep_colors<-rand_color(length(names(table(iri.integrated$orig.ident))))
 46 | 
 47 | ###plot and save figures
 48 | png(filename =  'circlize_plot.png', width = 6, height = 6,units = 'in', res = 300)
 49 | plot_circlize(circ_data,do.label = T, pt.size = 0.01, col.use = cluster_colors ,bg.color = 'white', kde2d.n = 200, repel = T, label.cex = 0.6)
 50 | add_track(circ_data, group = "Group", colors = group_colors, track_num = 2) ## can change it to one of the columns in the meta data of your seurat object
 51 | add_track(circ_data, group = "orig.ident",colors = rep_colors, track_num = 3) ## can change it to one of the columns in the meta data of your seurat object
 52 | dev.off()
 53 | ```
 54 | ![alt text](https://github.com/HaojiaWu/Plot1cell/blob/master/data/circlize_plot.png) <br />
 55 | 
 56 | ### 2. Dotplot to show gene expression across groups
 57 | Here is an example to use plot1cell to show one gene expression across different cell types in different groups.
 58 | ```R
 59 | png(filename =  'dotplot_single.png', width = 4, height = 6,units = 'in', res = 100)
 60 | complex_dotplot_single(seu_obj = iri.integrated, feature = "Havcr1",groups = "Group")
 61 | dev.off()
 62 | ```
 63 | ![alt text](https://github.com/HaojiaWu/Plot1cell/blob/master/data/dotplot_single.png) <br />
 64 | If the group factor can be classified by another factor, ```complex_dotplot_single``` allows splitting the group factor by another group factor too. Here is an example for demo.
 65 | ```R
 66 | iri.integrated@meta.data$Phase<-plyr::mapvalues(iri.integrated@meta.data$Group, from = levels(iri.integrated@meta.data$Group), to = c("Healthy",rep("Injury",3), rep("Recovery",2)))
 67 | iri.integrated@meta.data$Phase<-as.character(iri.integrated@meta.data$Phase)
 68 | png(filename =  'dotplot_single_split.png', width = 4, height = 6,units = 'in', res = 100)
 69 | complex_dotplot_single(iri.integrated, feature = "Havcr1",groups = "Group",splitby = "Phase")
 70 | dev.off()
 71 | ```
 72 | ![alt text](https://github.com/HaojiaWu/Plot1cell/blob/master/data/dotplot_single_split.png) <br />
 73 | 
 74 | To visualize the same gene on multiple group factors, simply add more group factor IDs to the ```groups``` argument.
 75 | ```R
 76 | png(filename =  'dotplot_more_groups.png', width = 8, height = 6,units = 'in', res = 100)
 77 | complex_dotplot_single(seu_obj = iri.integrated, feature = "Havcr1",groups= c("Group","Replicates"))
 78 | dev.off()
 79 | ```
 80 | ![alt text](https://github.com/HaojiaWu/Plot1cell/blob/master/data/dotplot_more_groups.png) <br />
 81 | 
 82 | Each group factor can be further splitted by its own factor if the ```splitby``` argument is provided. Note that in this case, the order of the group factors needs to match the order of splitby factors.
 83 | ```R
 84 | iri.integrated@meta.data$ReplicateID<-plyr::mapvalues(iri.integrated@meta.data$Replicates, from = names(table((iri.integrated@meta.data$Replicates))), to = c(rep("Rep1",3),rep("Rep2",3), rep("Rep3",1)))
 85 | iri.integrated@meta.data$ReplicateID<-as.character(iri.integrated@meta.data$ReplicateID)
 86 | 
 87 | png(filename =  'dotplot_more_groups_split.png', width = 9, height = 6,units = 'in', res = 200)
 88 | complex_dotplot_single(seu_obj = iri.integrated, feature = "Havcr1",groups= c("Group","Replicates"), splitby = c("Phase","ReplicateID"))
 89 | dev.off()
 90 | ### In this example, "Phase" is a splitby factor for "Group" and "ReplicateID" is a splitby factor for "Replicates".
 91 | ```
 92 | ![alt text](https://github.com/HaojiaWu/Plot1cell/blob/master/data/dotplot_more_groups_split.png) <br />
 93 | Note that the Replicates group here is just for showcase purpose. This is not a meaningful group ID in our snRNA-seq dataset.
 94 | 
 95 | To visualize multiple genes in dotplot format, ```complex_dotplot_multiple``` should be used.
 96 | ```R
 97 | png(filename =  'dotplot_multiple.png', width = 10, height = 4,units = 'in', res = 300)
 98 | complex_dotplot_multiple(seu_obj = iri.integrated, features = c("Slc34a1","Slc7a13","Havcr1","Krt20","Vcam1"),group = "Group", celltypes = c("PTS1" ,   "PTS2"  ,  "PTS3"  ,  "NewPT1" , "NewPT2"))
 99 | dev.off()
100 | ```
101 | ![alt text](https://github.com/HaojiaWu/Plot1cell/blob/master/data/dotplot_multiple.png) <br />
102 | 
103 | ### 3. Violin plot to show gene expression across groups
104 | #### One gene/one group factor violin plot:
105 | ```R
106 | png(filename =  'vlnplot_single.png', width = 4, height = 6,units = 'in', res = 100)
107 | complex_vlnplot_single(iri.integrated, feature = "Havcr1", groups = "Group",celltypes   = c("PTS1" ,   "PTS2"  ,  "PTS3"  ,  "NewPT1" , "NewPT2"))
108 | dev.off()
109 | ```
110 | 
111 | ![alt text](https://github.com/HaojiaWu/Plot1cell/blob/master/data/vlnplot_single.png) <br />
112 | 
113 | Similar to complex_dotplot_single, the complex_vlnplot_single function also allows splitting the group factor by another factor with the argument ```splitby```.
114 | ```R
115 | png(filename =  'vlnplot_single_split.png', width = 4, height = 6,units = 'in', res = 100)
116 | complex_vlnplot_single(iri.integrated, feature = "Havcr1", groups = "Group",celltypes   = c("PTS1" ,   "PTS2"  ,  "PTS3"  ,  "NewPT1" , "NewPT2"), splitby = "Phase")
117 | dev.off()
118 | ```
119 | ![alt text](https://github.com/HaojiaWu/Plot1cell/blob/master/data/vlnplot_single_split.png) <br />
120 | #### One gene/multiple group factors violin plot:
121 | ```R
122 | png(filename =  'vlnplot_multiple.png', width = 6, height = 6,units = 'in', res = 100)
123 | complex_vlnplot_single(iri.integrated, feature = "Havcr1", groups = c("Group","Replicates"),celltypes   = c("PTS1" ,   "PTS2"  ,  "PTS3"  ,  "NewPT1" , "NewPT2"), font.size = 10)
124 | dev.off()
125 | ```
126 | ![alt text](https://github.com/HaojiaWu/Plot1cell/blob/master/data/vlnplot_multiple.png) <br />
127 | 
128 | Similar to the functionality in complex_dotplot, each group factor can also be splitted by another factor in violin plot. For example:
129 | ```R
130 | png(filename =  'vlnplot_multiple_split.png', width = 7, height = 5,units = 'in', res = 200)
131 | complex_vlnplot_single(iri.integrated, feature = "Havcr1", groups = c("Group","Replicates"),
132 |                         celltypes   = c("PTS1" ,   "PTS2"  ,  "PTS3"  ,  "NewPT1" , "NewPT2"), 
133 |                         font.size = 10, splitby = c("Phase","ReplicateID"), pt.size=0.05)
134 | dev.off()
135 | ```
136 | 
137 | ![alt text](https://github.com/HaojiaWu/Plot1cell/blob/master/data/vlnplot_multiple_split.png) <br />
138 | 
139 | 
140 | #### Multiple genes/one group factor violin plot:
141 | ```R
142 | png(filename =  'vlnplot_multiple_genes.png', width = 6, height = 6,units = 'in', res = 300)
143 | complex_vlnplot_multiple(iri.integrated, features = c("Havcr1",  "Slc34a1", "Vcam1",   "Krt20"  , "Slc7a13", "Slc5a12"), celltypes = c("PTS1" ,   "PTS2"  ,  "PTS3"  ,  "NewPT1" , "NewPT2"), group = "Group", add.dot=T, pt.size=0.01, alpha=0.01, font.size = 10)
144 | dev.off()
145 | ```
146 | ![alt text](https://github.com/HaojiaWu/Plot1cell/blob/master/data/vlnplot_multiple_genes.png) <br />
147 | 
148 | #### Multiple genes/multiple group factors.
149 | The violin plot will look too messy in this scenario so it is not included in plot1cell. <br />
150 | 
151 | ### 4. Umap geneplot across groups
152 | ```R
153 | png(filename =  'data/geneplot_umap.png', width = 8, height = 6,units = 'in', res = 100)
154 | complex_featureplot(iri.integrated, features = c("Havcr1",  "Slc34a1", "Vcam1",   "Krt20"  , "Slc7a13"), group = "Group", select = c("Control","12hours","6weeks"), order = F)
155 | dev.off()
156 | ```
157 | ![alt text](https://github.com/HaojiaWu/Plot1cell/blob/master/data/geneplot_umap.png) <br />
158 | 
159 | ### 5. ComplexHeatmap to show unique genes across groups
160 | plot1cell can directly identify the condition specific genes in a selected cell type and plot those genes using ComplexHeatmap. An example is shown below:
161 | ```R
162 | iri.integrated$Group2<-plyr::mapvalues(iri.integrated$Group, from = c("Control", "4hours",  "12hours", "2days",   "14days" , "6weeks" ),
163 | to = c("Ctrl","Hr4","Hr12","Day2", "Day14","Wk6"))
164 | iri.integrated$Group2<-factor(iri.integrated$Group2, levels = c("Ctrl","Hr4","Hr12","Day2", "Day14","Wk6"))
165 | png(filename =  'heatmap_group.png', width = 4, height = 8,units = 'in', res = 100)
166 | complex_heatmap_unique(seu_obj = iri.integrated, celltype = "NewPT2", group = "Group2",gene_highlight = c("Slc22a28","Vcam1","Krt20","Havcr1"))
167 | dev.off()
168 | ```
169 | ![alt text](https://github.com/HaojiaWu/Plot1cell/blob/master/data/heatmap_group.png) <br />
170 | 
171 | ### 6. Upset plot to show the unique and shared DEGs across groups.
172 | 
173 | ```R
174 | png(filename =  'upset_plot.png', width = 8, height = 4,units = 'in', res = 300)
175 | complex_upset_plot(iri.integrated, celltype = "NewPT2", group = "Group", min_size = 10, logfc=0.5)
176 | dev.off()
177 | ```
178 | ![alt text](https://github.com/HaojiaWu/Plot1cell/blob/master/data/upset_plot.png) <br />
179 | 
180 | ### 7. Cell proportion change across groups
181 | 
182 | ```R
183 | png(filename =  'cell_fraction.png', width = 8, height = 4,units = 'in', res = 300)
184 | plot_cell_fraction(iri.integrated,  celltypes = c("PTS1" ,   "PTS2"  ,  "PTS3"  ,  "NewPT1" , "NewPT2"), groupby = "Group", show_replicate = T, rep_colname = "orig.ident")
185 | dev.off()
186 | ```
187 | 
188 | ![alt text](https://github.com/HaojiaWu/Plot1cell/blob/master/data/cell_fraction.png) <br />
189 | 
190 | ### 8. Other ploting functions
191 | There are other functions for plotting/data processing in plot1cell. 
192 | ```R
193 | help(package = plot1cell)
194 | ```
195 | Many more functions will be added in the future package development. For questions, please raise an issue in this github page or contact <a href="https://humphreyslab.com">TheHumphreysLab</a>. 
196 | 
197 | ### 9. Attributions
198 | This package uses many methods from Seurat (https://github.com/satijalab/seurat) to process the data for ploting. The circlize and heatmap plots were generated by the circlize (https://github.com/jokergoo/circlize) and ComplexHeatmap (https://github.com/jokergoo/ComplexHeatmap) packages. The Upset plot was generated by the ComplexUpset package (https://github.com/krassowski/complex-upset). Most of other graphs were generated using ggplot2 (https://github.com/tidyverse/ggplot2). The package benefits from the following dependencies.
199 | 
200 | ```R
201 |     Seurat,
202 |     plotly,
203 |     circlize,
204 |     dplyr,
205 |     ggplot2,
206 |     ggh4x,
207 |     MASS,
208 |     scales,
209 |     progress,
210 |     RColorBrewer,
211 |     grid,
212 |     grDevices,
213 |     biomaRt,
214 |     reshape2,
215 |     ggbeeswarm,
216 |     purrr,
217 |     ComplexUpset,
218 |     matrixStats,
219 |     DoubletFinder,
220 |     methods,
221 |     data.table,
222 |     Matrix,
223 |     hdf5r,
224 |     loomR,
225 |     GenomeInfoDb,
226 |     EnsDb.Hsapiens.v86,
227 |     cowplot,
228 |     rlang,
229 |     GEOquery,
230 |     simplifyEnrichment,
231 |     wordcloud,
232 |     ComplexHeatmap
233 | ```
234 | ### 10. Citation
235 | Please consider citing our paper if you find ```plot1cell``` useful. <br />
236 | https://www.cell.com/cell-metabolism/fulltext/S1550-4131(22)00192-9 <br />
237 | **Cell Metab.** 2022 Jul 5;34(7):1064-1078.e6.<br />
238 | Wu H, Gonzalez Villalobos R, Yao X, Reilly D, Chen T, Rankin M, Myshkin E, Breyer MD, Humphreys BD. <br />
239 | **Mapping the single-cell transcriptomic response of murine diabetic kidney disease to therapies.** 
240 | 
241 | ### Star History
242 | 
243 | [![Star History Chart](https://api.star-history.com/svg?repos=TheHumphreysLab/plot1cell&type=Date)](https://star-history.com/#TheHumphreysLab/plot1cell&Date)
244 | 


--------------------------------------------------------------------------------
/data/cell_fraction.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheHumphreysLab/plot1cell/5b573491a093bcd3c9c3740cb1a59c19a4aba4df/data/cell_fraction.png


--------------------------------------------------------------------------------
/data/circlize_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheHumphreysLab/plot1cell/5b573491a093bcd3c9c3740cb1a59c19a4aba4df/data/circlize_plot.png


--------------------------------------------------------------------------------
/data/dotplot_more_groups.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheHumphreysLab/plot1cell/5b573491a093bcd3c9c3740cb1a59c19a4aba4df/data/dotplot_more_groups.png


--------------------------------------------------------------------------------
/data/dotplot_more_groups_split.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheHumphreysLab/plot1cell/5b573491a093bcd3c9c3740cb1a59c19a4aba4df/data/dotplot_more_groups_split.png


--------------------------------------------------------------------------------
/data/dotplot_multiple.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheHumphreysLab/plot1cell/5b573491a093bcd3c9c3740cb1a59c19a4aba4df/data/dotplot_multiple.png


--------------------------------------------------------------------------------
/data/dotplot_single.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheHumphreysLab/plot1cell/5b573491a093bcd3c9c3740cb1a59c19a4aba4df/data/dotplot_single.png


--------------------------------------------------------------------------------
/data/dotplot_single_split.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheHumphreysLab/plot1cell/5b573491a093bcd3c9c3740cb1a59c19a4aba4df/data/dotplot_single_split.png


--------------------------------------------------------------------------------
/data/geneplot_umap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheHumphreysLab/plot1cell/5b573491a093bcd3c9c3740cb1a59c19a4aba4df/data/geneplot_umap.png


--------------------------------------------------------------------------------
/data/heatmap_group.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheHumphreysLab/plot1cell/5b573491a093bcd3c9c3740cb1a59c19a4aba4df/data/heatmap_group.png


--------------------------------------------------------------------------------
/data/upset_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheHumphreysLab/plot1cell/5b573491a093bcd3c9c3740cb1a59c19a4aba4df/data/upset_plot.png


--------------------------------------------------------------------------------
/data/vlnplot_multiple.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheHumphreysLab/plot1cell/5b573491a093bcd3c9c3740cb1a59c19a4aba4df/data/vlnplot_multiple.png


--------------------------------------------------------------------------------
/data/vlnplot_multiple_genes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheHumphreysLab/plot1cell/5b573491a093bcd3c9c3740cb1a59c19a4aba4df/data/vlnplot_multiple_genes.png


--------------------------------------------------------------------------------
/data/vlnplot_multiple_split.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheHumphreysLab/plot1cell/5b573491a093bcd3c9c3740cb1a59c19a4aba4df/data/vlnplot_multiple_split.png


--------------------------------------------------------------------------------
/data/vlnplot_single.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheHumphreysLab/plot1cell/5b573491a093bcd3c9c3740cb1a59c19a4aba4df/data/vlnplot_single.png


--------------------------------------------------------------------------------
/data/vlnplot_single_split.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheHumphreysLab/plot1cell/5b573491a093bcd3c9c3740cb1a59c19a4aba4df/data/vlnplot_single_split.png


--------------------------------------------------------------------------------
/man/Install.example.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{Install.example}
 4 | \alias{Install.example}
 5 | \title{A function to generate an example dataset for demo}
 6 | \usage{
 7 | Install.example()
 8 | }
 9 | \description{
10 | A function to generate an example dataset for demo
11 | }
12 | 


--------------------------------------------------------------------------------
/man/add_track.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_circlize.R
 3 | \name{add_track}
 4 | \alias{add_track}
 5 | \title{Add tracks to the circlize plot}
 6 | \usage{
 7 | add_track(data_plot, group, track_num, colors = NULL)
 8 | }
 9 | \arguments{
10 | \item{data_plot}{Data for circlize plot}
11 | 
12 | \item{group}{The group to be shown on the new track}
13 | 
14 | \item{track_num}{Which number this track is? Value is integer and starts with 2 for the 2nd track, track_num=3 for the 3rd track, etc...}
15 | 
16 | \item{colors}{Color palette to color the group}
17 | }
18 | \value{
19 | A new circlize track adding to the current circlize plot
20 | }
21 | \description{
22 | This function allows users to add more tracks into the circlize plot
23 | }
24 | 


--------------------------------------------------------------------------------
/man/cell_order.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_circlize.R
 3 | \name{cell_order}
 4 | \alias{cell_order}
 5 | \title{Order the cells from each cluster}
 6 | \usage{
 7 | cell_order(dat)
 8 | }
 9 | \arguments{
10 | \item{dat}{Data input.}
11 | }
12 | \value{
13 | An vector with ordered cells
14 | }
15 | \description{
16 | This function orders the cells from each cluster by giving a value from
17 | 1 to max
18 | }
19 | 


--------------------------------------------------------------------------------
/man/change_strip_background.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{change_strip_background}
 4 | \alias{change_strip_background}
 5 | \title{A function to change the strip background color in ggplot}
 6 | \usage{
 7 | change_strip_background(ggplt_obj, type = "top", strip.color = NULL)
 8 | }
 9 | \arguments{
10 | \item{ggplt_obj}{A ggplot object}
11 | 
12 | \item{type}{Strip on the "top" or "right" side only or "both" sides}
13 | 
14 | \item{strip.color}{A color vector}
15 | }
16 | \description{
17 | A function to change the strip background color in ggplot
18 | }
19 | 


--------------------------------------------------------------------------------
/man/complex_dotplot_multiple.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_dot.R
 3 | \name{complex_dotplot_multiple}
 4 | \alias{complex_dotplot_multiple}
 5 | \title{Plot multiple genes across groups}
 6 | \usage{
 7 | complex_dotplot_multiple(
 8 |   seu_obj,
 9 |   features,
10 |   celltypes = NULL,
11 |   groups,
12 |   color.palette = NULL,
13 |   strip.color = NULL
14 | )
15 | }
16 | \arguments{
17 | \item{seu_obj}{A complete Seurat object}
18 | 
19 | \item{features}{A vector of gene names.}
20 | 
21 | \item{celltypes}{Cell types to be included in the dot plot. Default: all cell types.}
22 | 
23 | \item{groups}{Group ID must be one of the column names in the meta.data slot of the Seurat object.}
24 | 
25 | \item{color.palette}{Color for gene expression.}
26 | 
27 | \item{strip.color}{Colors for the strip background}
28 | }
29 | \value{
30 | A ggplot object
31 | }
32 | \description{
33 | This function allows for visualization of multiple genes in multiple groups.
34 | It takes the single gene expression data generated by PlotSingleGeneGroup,
35 | concatenate all data, and produces a dotplot graph where the group ID are in
36 | x axis, wrapped by cell types, genes are on the y axis.
37 | }
38 | 


--------------------------------------------------------------------------------
/man/complex_dotplot_single.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_dot.R
 3 | \name{complex_dotplot_single}
 4 | \alias{complex_dotplot_single}
 5 | \title{Plot single gene across groups}
 6 | \usage{
 7 | complex_dotplot_single(
 8 |   seu_obj,
 9 |   feature,
10 |   celltypes = NULL,
11 |   groups,
12 |   splitby = NULL,
13 |   color.palette = NULL,
14 |   font.size = 12,
15 |   strip.color = NULL,
16 |   do.scale = T,
17 |   scale.by = "radius"
18 | )
19 | }
20 | \arguments{
21 | \item{seu_obj}{A complete Seurat object.}
22 | 
23 | \item{feature}{Gene name. Only one gene is allowed.}
24 | 
25 | \item{celltypes}{Cell types to be included in the dot plot. Default: all cell types.}
26 | 
27 | \item{groups}{The group to show on x axis. One of the column names in meta.data.}
28 | 
29 | \item{splitby}{The group to separate the gene expression. One of the column names in meta.data.}
30 | 
31 | \item{color.palette}{Color for gene expression.}
32 | 
33 | \item{font.size}{Font size for the labels.}
34 | 
35 | \item{strip.color}{Colors for the strip background.}
36 | 
37 | \item{do.scale}{Whether or not to scale the dot when percentage expression of the gene is less than 20.}
38 | 
39 | \item{scale.by}{Methods to scale the dot size. "radius" or "size".}
40 | }
41 | \value{
42 | A ggplot object
43 | }
44 | \description{
45 | This function can be used for plotting a single gene expression across
46 | different groups in a study with complex group design.
47 | }
48 | 


--------------------------------------------------------------------------------
/man/complex_featureplot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_umap.R
 3 | \name{complex_featureplot}
 4 | \alias{complex_featureplot}
 5 | \title{Plot gene expression on umap}
 6 | \usage{
 7 | complex_featureplot(
 8 |   seu_obj,
 9 |   features,
10 |   group,
11 |   select = NULL,
12 |   cols = NULL,
13 |   label.size = 12,
14 |   order = F,
15 |   strip.color = NULL,
16 |   pt.size = 0.01
17 | )
18 | }
19 | \arguments{
20 | \item{seu_obj}{A complete Seurat object.}
21 | 
22 | \item{features}{Gene names to be plotted.}
23 | 
24 | \item{group}{The group to show on y axis. One of the column names in meta.data.}
25 | 
26 | \item{select}{Select the elements within the group to show.}
27 | 
28 | \item{cols}{Change the color legend.}
29 | 
30 | \item{label.size}{Change the label size.}
31 | 
32 | \item{strip.color}{Colors for the strip background.}
33 | 
34 | \item{pt.size}{Point size for each cell.}
35 | }
36 | \value{
37 | A ggplot object
38 | }
39 | \description{
40 | This function can be used for plotting a single gene or multiple genes expression across
41 | different groups in a seurat featureplot format.
42 | }
43 | 


--------------------------------------------------------------------------------
/man/complex_heatmap_unique.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_heatmap.R
 3 | \name{complex_heatmap_unique}
 4 | \alias{complex_heatmap_unique}
 5 | \title{Plot gene expression across groups using ComplexHeatmap}
 6 | \usage{
 7 | complex_heatmap_unique(
 8 |   seu_obj,
 9 |   celltype,
10 |   group,
11 |   gene_highlight = NULL,
12 |   logfc = 0.5,
13 |   return_marker = FALSE,
14 |   col_fun = colorRamp2(c(-2, -1, 0, 1, 2), rev(c("#BF0080", "#CE6EAE", "#dddddd",
15 |     "#6EAE6E", "#008000")))
16 | )
17 | }
18 | \arguments{
19 | \item{seu_obj}{A complete Seurat object}
20 | 
21 | \item{celltype}{Cell types selected for gene plot.}
22 | 
23 | \item{group}{The group to show on x axis. One of the column names in meta.data.}
24 | 
25 | \item{gene_highlight}{Gene names showing on the rows. Default: all genes}
26 | 
27 | \item{logfc}{Fold change to select the genes}
28 | 
29 | \item{return_marker}{If TRUE, a list of specific gene will be returned.}
30 | 
31 | \item{col_fun}{Heatmap color key.}
32 | }
33 | \value{
34 | A ComplexHeatmap object or/and a gene list
35 | }
36 | \description{
37 | This function is for identifying the group-specific genes in a selected celltype
38 | and plot the expression of those genes in heatmap.
39 | }
40 | 


--------------------------------------------------------------------------------
/man/complex_upset_plot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_upset.R
 3 | \name{complex_upset_plot}
 4 | \alias{complex_upset_plot}
 5 | \title{UpSet plot visualize the number of unique and shared DEGs across group}
 6 | \usage{
 7 | complex_upset_plot(seu_obj, celltype, group, logfc = 0.5, min_size = 1)
 8 | }
 9 | \arguments{
10 | \item{seu_obj}{A complete Seurat object.}
11 | 
12 | \item{celltype}{The cell type to analyze.}
13 | 
14 | \item{group}{Group factor in meta data.}
15 | 
16 | \item{logfc}{Log fold change to select the DEGs}
17 | 
18 | \item{min_size}{Minimal number of observations in an intersection for it to be included}
19 | }
20 | \value{
21 | An UpSet plot
22 | }
23 | \description{
24 | This function takes Seurat object as input and visualize the genes that
25 | are unique to a particular group or shared by multiple group.
26 | }
27 | 


--------------------------------------------------------------------------------
/man/complex_vlnplot_multiple.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_violin.R
 3 | \name{complex_vlnplot_multiple}
 4 | \alias{complex_vlnplot_multiple}
 5 | \title{Violin plot for multiple genes across groups}
 6 | \usage{
 7 | complex_vlnplot_multiple(
 8 |   seu_obj,
 9 |   features,
10 |   celltypes = NULL,
11 |   group,
12 |   add.dot = T,
13 |   font.size = 12,
14 |   pt.size = 0.1,
15 |   alpha = 0.01,
16 |   strip.color = NULL
17 | )
18 | }
19 | \arguments{
20 | \item{seu_obj}{A complete Seurat object}
21 | 
22 | \item{features}{Gene name. Only one gene is allowed.}
23 | 
24 | \item{celltypes}{Cell types of interest. By default, all cell types are included.}
25 | 
26 | \item{group}{Only one groupID is allowed.}
27 | 
28 | \item{add.dot}{Whether or not to add points on the violins.}
29 | 
30 | \item{font.size}{Font size for the labels.}
31 | 
32 | \item{pt.size}{Point size for the data points on the violin}
33 | 
34 | \item{alpha}{Point transparency. value from 0 to 1.}
35 | 
36 | \item{strip.color}{Colors for the strip background}
37 | }
38 | \value{
39 | A ggplot object
40 | }
41 | \description{
42 | This function generates violin plot(s) to compare the expression of multiple genes across
43 | different groups or cell types. It is designed for visualizing a complicated scenario:
44 | Gene expression of multiple genes on multiple cell types across groups.
45 | }
46 | 


--------------------------------------------------------------------------------
/man/complex_vlnplot_single.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_violin.R
 3 | \name{complex_vlnplot_single}
 4 | \alias{complex_vlnplot_single}
 5 | \title{Violin plot for a single gene across groups}
 6 | \usage{
 7 | complex_vlnplot_single(
 8 |   seu_obj,
 9 |   feature,
10 |   celltypes = NULL,
11 |   groups,
12 |   add.dot = T,
13 |   font.size = 14,
14 |   pt.size = 0.1,
15 |   splitby = NULL,
16 |   alpha = 0.5,
17 |   strip.color = NULL
18 | )
19 | }
20 | \arguments{
21 | \item{seu_obj}{A complete Seurat object.}
22 | 
23 | \item{feature}{Gene name. Only one gene is allowed.}
24 | 
25 | \item{celltypes}{Cell types of interest. By default, all cell types are included.}
26 | 
27 | \item{groups}{Groups selected for plotting. Support multiple groups.}
28 | 
29 | \item{add.dot}{Whether or not to add points on the violins.}
30 | 
31 | \item{font.size}{Font size for the labels.}
32 | 
33 | \item{pt.size}{Point size for the data points on the violin.}
34 | 
35 | \item{splitby}{Group to split the gene expression. Only works when length(groups)==1.}
36 | 
37 | \item{alpha}{Point transparency. value from 0 to 1.}
38 | 
39 | \item{strip.color}{Colors for the strip background.}
40 | }
41 | \value{
42 | A ggplot object
43 | }
44 | \description{
45 | This function generates violin plot(s) to compare the expression of a single gene across
46 | different groups or cell types. It is designed for visualizing a complicated scenario:
47 | Gene expression on multiple cell types and multiple conditions.
48 | }
49 | 


--------------------------------------------------------------------------------
/man/convert_geneid.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{convert_geneid}
 4 | \alias{convert_geneid}
 5 | \title{A function to convert ensembl ID to gene name}
 6 | \usage{
 7 | convert_geneid(count, species)
 8 | }
 9 | \arguments{
10 | \item{count}{Count data input}
11 | 
12 | \item{species}{value can be "mouse" or "human"}
13 | }
14 | \value{
15 | A new count data with external gene names in the rows
16 | }
17 | \description{
18 | This function is for replace the ensembl ID with actual gene names
19 | }
20 | 


--------------------------------------------------------------------------------
/man/creat_cellphonedb_file.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{creat_cellphonedb_file}
 4 | \alias{creat_cellphonedb_file}
 5 | \title{Generate the files required by CellPhoneDB}
 6 | \usage{
 7 | creat_cellphonedb_file(seu_obj, group)
 8 | }
 9 | \arguments{
10 | \item{seu_obj}{A complete Seurat object}
11 | 
12 | \item{group}{The group selected for analysis}
13 | }
14 | \value{
15 | Output two txt files for CellPhoneDB
16 | }
17 | \description{
18 | This function can generate the files used as input into CellPhoneDB analysis.
19 | Two files will be created: normalized count matrix with the designated group
20 | and the metadata to descript each cell.
21 | }
22 | 


--------------------------------------------------------------------------------
/man/create_pyscenic_file.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{create_pyscenic_file}
 4 | \alias{create_pyscenic_file}
 5 | \title{Generate the files required by pySCENIC}
 6 | \usage{
 7 | create_pyscenic_file(seu_obj, celltypes, min.gene = 1)
 8 | }
 9 | \arguments{
10 | \item{seu_obj}{A complete Seurat object}
11 | 
12 | \item{celltypes}{The cell types being selected for analysis}
13 | 
14 | \item{min.gene}{Cutoff to filter the low expression genes}
15 | }
16 | \value{
17 | A loom file
18 | }
19 | \description{
20 | This function can generate the file used as input into pySCENIC analysis.
21 | A loom file will be created.
22 | }
23 | 


--------------------------------------------------------------------------------
/man/data_processing.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{data_processing}
 4 | \alias{data_processing}
 5 | \title{A function to process the h5 file from CellBender and generate QC plots}
 6 | \usage{
 7 | data_processing(cellbender_h5, sampleID, out_dir, species, type)
 8 | }
 9 | \arguments{
10 | \item{cellbender_h5}{The h5 file from CellBender output}
11 | 
12 | \item{sampleID}{ID given to the run}
13 | 
14 | \item{out_dir}{Output directory}
15 | 
16 | \item{species}{It can be "human" or "mouse"}
17 | 
18 | \item{type}{It can be "cell" or "nucleus". If your data is from snRNA-seq, use "type = nucleus". Otherwise, use "type = cell".}
19 | }
20 | \value{
21 | QC plots and seurat objects before and after QC
22 | }
23 | \description{
24 | This function works only with the h5 file output from CellBender. It takes the
25 | count table, clusters the cells, removes the doublets (DoubletFinder), and
26 | recluster the cells, and finally output a Seurat object
27 | }
28 | 


--------------------------------------------------------------------------------
/man/extract_gene_count.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{extract_gene_count}
 4 | \alias{extract_gene_count}
 5 | \title{A function to extract gene counts for ploting}
 6 | \usage{
 7 | extract_gene_count(
 8 |   seu_obj,
 9 |   features,
10 |   cell.types = NULL,
11 |   data.type = "data",
12 |   meta.groups = NULL
13 | )
14 | }
15 | \arguments{
16 | \item{seu_obj}{A finished Seurat Object with cell type annotation in the active.ident slot}
17 | 
18 | \item{features}{Gene names to extract expression data}
19 | 
20 | \item{cell.types}{The cell types to be inspected. By default, it will incorporate all cell types.}
21 | 
22 | \item{data.type}{The data slot to be accessed. By default, the "data" slot will be used.}
23 | 
24 | \item{meta.groups}{The colnames in the meta.data slot you want to include.}
25 | }
26 | \value{
27 | A data frame with the requested info.
28 | }
29 | \description{
30 | This function is a modified Seurat::FetchData function to extract gene
31 | counts and the associated meta data for ploting. It returns a dataframe
32 | with the requested information from the Seurat object.
33 | }
34 | 


--------------------------------------------------------------------------------
/man/firstup.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{firstup}
 4 | \alias{firstup}
 5 | \title{A function to make gene name first letter capital}
 6 | \usage{
 7 | firstup(gene)
 8 | }
 9 | \arguments{
10 | \item{gene}{Gene name}
11 | }
12 | \description{
13 | The function is modified from this thread: https://stackoverflow.com/questions/18509527/first-letter-to-upper-case/18509816
14 | }
15 | 


--------------------------------------------------------------------------------
/man/get_metadata.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_circlize.R
 3 | \name{get_metadata}
 4 | \alias{get_metadata}
 5 | \title{Get metadata from a Seurat object}
 6 | \usage{
 7 | get_metadata(seu_obj, reductions = "umap", coord_scale = 0.8, color)
 8 | }
 9 | \arguments{
10 | \item{seu_obj}{SeuratObject}
11 | 
12 | \item{reductions}{reductions methods, e.g."umap" or "tsne".}
13 | 
14 | \item{coord_scale}{value from c(0,1) to adjust the UMAP/tSNE coordinates.}
15 | 
16 | \item{color}{Colors assigned to the cell clusters}
17 | }
18 | \value{
19 | A metadata dataframe
20 | }
21 | \description{
22 | This function extracts the metadata from a Seurat object and transforms the
23 | UMAP/tSNE coordinates.
24 | }
25 | 


--------------------------------------------------------------------------------
/man/get_segment.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_circlize.R
 3 | \name{get_segment}
 4 | \alias{get_segment}
 5 | \title{Create a segment for each element in a group}
 6 | \usage{
 7 | get_segment(dat, group)
 8 | }
 9 | \arguments{
10 | \item{dat}{Data input.}
11 | 
12 | \item{group}{The group name}
13 | }
14 | \value{
15 | An vector with ordered cells
16 | }
17 | \description{
18 | This function creates a segment for each element within a group
19 | }
20 | 


--------------------------------------------------------------------------------
/man/mk_color_table.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_circlize.R
 3 | \name{mk_color_table}
 4 | \alias{mk_color_table}
 5 | \title{Create a dataframe for color mapping}
 6 | \usage{
 7 | mk_color_table(group)
 8 | }
 9 | \arguments{
10 | \item{group}{Group to be assigned color}
11 | }
12 | \value{
13 | A dataframe with colors assigned to groups
14 | }
15 | \description{
16 | This function assigns a color for each value in a vector
17 | }
18 | 


--------------------------------------------------------------------------------
/man/mk_marker_ct.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_circlize.R
 3 | \name{mk_marker_ct}
 4 | \alias{mk_marker_ct}
 5 | \title{Make count matrix for the selected markers}
 6 | \usage{
 7 | mk_marker_ct(seu_obj, features)
 8 | }
 9 | \arguments{
10 | \item{seu_obj}{SeuratObject}
11 | 
12 | \item{features}{Selected marker genes}
13 | }
14 | \value{
15 | A dataframe with cells labeled by marker genes
16 | }
17 | \description{
18 | This function labels the cells based their expression levels of the selected
19 | marker genes.
20 | }
21 | 


--------------------------------------------------------------------------------
/man/order_gene_down.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{order_gene_down}
 4 | \alias{order_gene_down}
 5 | \title{A function to order genes downregulated from the first to the last column}
 6 | \usage{
 7 | order_gene_down(df)
 8 | }
 9 | \arguments{
10 | \item{df}{A data frames with genes in row and samples in column}
11 | }
12 | \description{
13 | A function to order genes downregulated from the first to the last column
14 | }
15 | 


--------------------------------------------------------------------------------
/man/order_gene_up.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{order_gene_up}
 4 | \alias{order_gene_up}
 5 | \title{A function to order genes upregulated from the first to the last column}
 6 | \usage{
 7 | order_gene_up(df)
 8 | }
 9 | \arguments{
10 | \item{df}{A data frames with genes in row and samples in column}
11 | }
12 | \description{
13 | A function to order genes upregulated from the first to the last column
14 | }
15 | 


--------------------------------------------------------------------------------
/man/plot_cell_fraction.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_cell_fraction.R
 3 | \name{plot_cell_fraction}
 4 | \alias{plot_cell_fraction}
 5 | \title{Plot cell fractions across groups}
 6 | \usage{
 7 | plot_cell_fraction(
 8 |   seu_obj,
 9 |   celltypes = NULL,
10 |   groupby,
11 |   show_replicate = FALSE,
12 |   rep_colname = NULL,
13 |   strip.color = NULL
14 | )
15 | }
16 | \arguments{
17 | \item{seu_obj}{A complete Seurat object}
18 | 
19 | \item{celltypes}{Cell types to be included in the plot. Default: all cell types.}
20 | 
21 | \item{groupby}{The group to show on x axis. One of the column names in meta.data.}
22 | 
23 | \item{show_replicate}{Whether or not to show the individual replicate on the graph. If TRUE, the replicate column name needs to specify in the argument rep_colname.}
24 | 
25 | \item{rep_colname}{The column name for biological replicates in the meta data.}
26 | 
27 | \item{strip.color}{Colors for the strip background}
28 | }
29 | \value{
30 | A ggplot object
31 | }
32 | \description{
33 | This function is to show the cell fraction changes across groups.
34 | }
35 | 


--------------------------------------------------------------------------------
/man/plot_circlize.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_circlize.R
 3 | \name{plot_circlize}
 4 | \alias{plot_circlize}
 5 | \title{Generate a circlize plot outside the tSNE/UMAP}
 6 | \usage{
 7 | plot_circlize(
 8 |   data_plot,
 9 |   do.label = T,
10 |   contour.levels = c(0.2, 0.3),
11 |   pt.size = 0.5,
12 |   kde2d.n = 1000,
13 |   contour.nlevels = 100,
14 |   bg.color = "#F9F2E4",
15 |   col.use = NULL,
16 |   label.cex = 0.5,
17 |   repel = FALSE
18 | )
19 | }
20 | \arguments{
21 | \item{data_plot}{Data frame prepared by the prepare_circlize_data function}
22 | 
23 | \item{do.label}{Whether to label the clusters}
24 | 
25 | \item{contour.levels}{Which contour line to be drawn on the plot. Value: 0-1}
26 | 
27 | \item{pt.size}{Point size of the graph}
28 | 
29 | \item{kde2d.n}{Number of grid points in each direction. A kde2d parameter}
30 | 
31 | \item{contour.nlevels}{Total number of levels in contour}
32 | 
33 | \item{bg.color}{Canvas background color}
34 | 
35 | \item{col.use}{Colors used to label the cell type}
36 | 
37 | \item{label.cex}{Label font size}
38 | 
39 | \item{repel}{Whether or not to repel the cell type names on umap}
40 | }
41 | \value{
42 | Return a circlize plot
43 | }
44 | \description{
45 | This function generates a circlize plot outside the tSNE/UMAP
46 | }
47 | 


--------------------------------------------------------------------------------
/man/plot_qpcr.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_extra.R
 3 | \name{plot_qpcr}
 4 | \alias{plot_qpcr}
 5 | \title{Plot the qPCR results}
 6 | \usage{
 7 | plot_qpcr(qPCR_file, metadata_file, ref_gene, ref_sample, file_name)
 8 | }
 9 | \arguments{
10 | \item{qPCR_file}{Path to the Cq file from qPCR machine. Must be in csv format.}
11 | 
12 | \item{metadata_file}{Path to the metadata file to assign your samples into groups. Must be in csv format.}
13 | 
14 | \item{ref_gene}{The gene used for normalization. e.g. GAPDH.}
15 | 
16 | \item{ref_sample}{The sample used as reference sample. e.g. sample from the control group.}
17 | 
18 | \item{file_name}{The output file name}
19 | }
20 | \value{
21 | There are three output files produced from this script. The csv file contains the average quantitative value (2^-ΔΔCt) for each sample (and each gene) after normalized by the reference gene (e.g. GAPDH) and the reference sample (e.g. sample from the control group). This file can be input into Graphpad Prism. The txt file includes all statistics from comparisons of any two given groups. If the run has two groups only, Welch's t test will be performed. Otherwise, one-way ANOVA with post-hoc Tukey's test will be performed. Finally, the tiff file is a boxplot graph to visualize the gene expression across groups.
22 | }
23 | \description{
24 | This function is to process the Cq file generated by the qPCR machine
25 | in our lab.
26 | }
27 | 


--------------------------------------------------------------------------------
/man/prepare_circlize_data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_circlize.R
 3 | \name{prepare_circlize_data}
 4 | \alias{prepare_circlize_data}
 5 | \title{Prepare circlize data for plotting}
 6 | \usage{
 7 | prepare_circlize_data(seu_obj, scale = 0.8)
 8 | }
 9 | \arguments{
10 | \item{seu_obj}{Seurat object}
11 | 
12 | \item{scale}{Scale factor to zoom in our zoom out the tSNE/UMAP proportionally}
13 | }
14 | \value{
15 | A data frame for plotting
16 | }
17 | \description{
18 | This function creates a segment for each element within a group
19 | }
20 | 


--------------------------------------------------------------------------------
/man/run_correlation.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{run_correlation}
 4 | \alias{run_correlation}
 5 | \title{Compute and plot correlations between two datasets}
 6 | \usage{
 7 | run_correlation(
 8 |   data1,
 9 |   data2,
10 |   ngenes = 2000,
11 |   method.use = "pearson",
12 |   color.use = NULL
13 | )
14 | }
15 | \arguments{
16 | \item{data1}{The first data frame with genes in rows and samples in columns}
17 | 
18 | \item{data2}{The second data frame with genes in rows and samples in columns}
19 | 
20 | \item{ngenes}{Number of top variable genes used for the computation}
21 | 
22 | \item{method.use}{Default is "pearson". Other methods include kendall" or "spearman". See ?cor.}
23 | 
24 | \item{color.use}{Color palette for the heatmap plot.}
25 | }
26 | \value{
27 | A heatmap plot
28 | }
29 | \description{
30 | This function can compute the correlations on the samples from two count matrix.
31 | It will use Pearson method as default.
32 | }
33 | 


--------------------------------------------------------------------------------
/man/transform_coordinates.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_circlize.R
 3 | \name{transform_coordinates}
 4 | \alias{transform_coordinates}
 5 | \title{Convert coordinates}
 6 | \usage{
 7 | transform_coordinates(coord_data, zoom)
 8 | }
 9 | \arguments{
10 | \item{coord_data}{Cartesian coordinates from tSNE, UMAP, etc.}
11 | 
12 | \item{zoom}{Value from c(0,1) to adjust the coordinates.}
13 | }
14 | \value{
15 | A matrix with polar coordinates
16 | }
17 | \description{
18 | This function converts the Cartesian coordinates to Polar coordinates.
19 | Input data can be the coordinates from tSNE or UMAP. It outputs a matrix with
20 | polar coordinates.
21 | }
22 | 


--------------------------------------------------------------------------------
/plot1cell.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: No
 4 | SaveWorkspace: No
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | Encoding: UTF-8
 9 | 
10 | AutoAppendNewline: Yes
11 | StripTrailingWhitespace: Yes
12 | LineEndingConversion: Posix
13 | 
14 | BuildType: Package
15 | PackageUseDevtools: Yes
16 | PackageInstallArgs: --no-multiarch --with-keep.source
17 | PackageRoxygenize: rd,collate,namespace
18 | 


--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 | 


--------------------------------------------------------------------------------