├── CSV-based flow analysis ├── 00_src │ ├── ce_diff_test.r │ ├── ce_diff_test_tsne.r │ ├── ce_diff_test_umap.r │ ├── plot_all_dmrd_figures.r │ ├── plot_all_embedsom_figures.R │ ├── plot_dimensionality_reduction.r │ └── plot_dimensionality_reduction_embedSOM.R ├── Exporting data in csv format.PNG ├── Setting axis in FlowJo for Aurora data.pptx ├── analyze_flow_cytometry_csv.R ├── analyze_flow_cytometry_parameter_csv.R ├── flowcytoscript_setup.R └── highlight_changed_regions.R ├── README.md ├── flow analysis ├── analyze_flow_cytometry.r ├── analyze_flow_cytometry.sh ├── analyze_flow_cytometry_parameter.r ├── ce_diff_test.r ├── ce_diff_test_tsne.r ├── ce_diff_test_umap.r ├── get_channels.R ├── plot_all_dmrd_figures.r └── plot_dimensionality_reduction.r └── scRNA-seq analysis └── t-SNE_analysis_full.rmd /CSV-based flow analysis/00_src/ce_diff_test.r: -------------------------------------------------------------------------------- 1 | 2 | # calculates test on differences of cross-entropy 3 | 4 | ce.diff.test <- function( 5 | cross.entropy, 6 | event.partition, 7 | partition.label, partition.color, partition.line.type, 8 | base.test, base.dist, 9 | dendrogram.order.weight, 10 | result, cdf.figure, dendrogram.figure 11 | ) 12 | { 13 | partition <- levels( event.partition ) 14 | partition.n <- length( partition ) 15 | 16 | cross.entropy.split <- split( cross.entropy, event.partition ) 17 | 18 | if ( base.test == "ks" ) 19 | { 20 | if ( partition.n == 2 ) 21 | { 22 | ks.test.res <- ks.test( cross.entropy.split[[ 1 ]], 23 | cross.entropy.split[[ 2 ]] ) 24 | 25 | test.res <- list( ks.single = ks.test.res ) 26 | } 27 | else if ( partition.n > 2 ) 28 | { 29 | comparison.n <- partition.n * ( partition.n - 1 ) / 2 30 | 31 | ks.pair <- vector( "list", comparison.n ) 32 | comparison <- character( comparison.n ) 33 | D.stat <- numeric( comparison.n ) 34 | p.value <- numeric( comparison.n ) 35 | 36 | k <- 1 37 | 38 | for ( i in 1 : ( partition.n - 1 ) ) 39 | for ( j in (i+1) : partition.n ) 40 | { 41 | ks.pair[[ k ]] <- ks.test( cross.entropy.split[[ i ]], 42 | cross.entropy.split[[ j ]] ) 43 | 44 | comparison[ k ] <- sprintf( "%s - %s", 45 | partition.label[ partition[ i ] ], 46 | partition.label[ partition[ j ] ] ) 47 | 48 | D.stat[ k ] <- ks.pair[[ k ]]$statistic 49 | 50 | p.value[ k ] <- ks.pair[[ k ]]$p.value 51 | 52 | k <- k + 1 53 | } 54 | 55 | p.value.adj <- p.adjust( p.value, "holm" ) 56 | 57 | test.res <- list( ks.multiple = list( ks.pair = ks.pair, 58 | comparison = comparison, D.stat = D.stat, 59 | p.value = p.value, p.value.adj = p.value.adj ) ) 60 | } 61 | else 62 | stop( "no partitions for testing cross-entropy differences" ) 63 | } 64 | else if ( base.test == "rank" ) 65 | { 66 | if ( partition.n == 2 ) 67 | { 68 | wilcox.test.res <- wilcox.test( cross.entropy.split[[ 1 ]], 69 | cross.entropy.split[[ 2 ]] ) 70 | 71 | test.res <- list( rank.single = wilcox.test.res ) 72 | } 73 | else if ( partition.n > 2 ) 74 | { 75 | kruskal.test.res <- kruskal.test( cross.entropy, 76 | event.partition ) 77 | 78 | dunn.test.res <- dunn.test( cross.entropy, event.partition, 79 | method = "holm", alpha = fcs.ce.diff.test.alpha, altp = TRUE, 80 | kw = FALSE, table = FALSE, list = TRUE ) 81 | 82 | test.res <- list( rank.multiple = list( 83 | kruskal = kruskal.test.res, dunn = dunn.test.res ) ) 84 | } 85 | else 86 | stop( "no partitions for testing cross-entropy differences" ) 87 | } 88 | else 89 | stop( "wrong base test for cross-entropy differences" ) 90 | 91 | if ( ! is.null( result ) ) 92 | { 93 | result.file <- file( result, "w" ) 94 | sink( result.file ) 95 | 96 | tr.name <- names( test.res ) 97 | stopifnot( length( tr.name ) == 1 ) 98 | 99 | if ( tr.name == "ks.single" ) 100 | { 101 | cat( "\n** Kolmogorov-Smirnov test\n") 102 | 103 | print( test.res$ks.single ) 104 | } 105 | else if ( tr.name == "ks.multiple" ) 106 | { 107 | cat( "\n** Multiple Kolmogorov-Smirnov tests with Holm correction\n\n") 108 | 109 | comparison.width <- max( nchar( test.res$ks.multiple$comparison ) ) 110 | 111 | for ( i in 1 : length( test.res$ks.multiple$comparison ) ) 112 | cat( sprintf( "%-*s\t\tD = %g\t\tpv = %g\t\tadj-pv = %g\n", 113 | comparison.width, 114 | test.res$ks.multiple$comparison[ i ], 115 | test.res$ks.multiple$D.stat[ i ], 116 | test.res$ks.multiple$p.value[ i ], 117 | test.res$ks.multiple$p.value.adj[ i ] ) ) 118 | } 119 | else if ( tr.name == "rank.single" ) 120 | { 121 | cat( "\n** Wilcoxon rank sum test\n") 122 | 123 | print( test.res$rank.single ) 124 | } 125 | else if ( tr.name == "rank.multiple" ) 126 | { 127 | cat( "\n** Kruskal-Wallis rank sum test\n") 128 | 129 | print( test.res$rank.multiple$kruskal ) 130 | 131 | cat( "\n** Dunn post-hoc test with Holm correction\n\n") 132 | 133 | comparison.width <- max( nchar( 134 | test.res$rank.multiple$dunn$comparison ) ) 135 | 136 | for ( i in 1 : length( test.res$rank.multiple$dunn$comparisons ) ) 137 | cat( sprintf( "%-*s\t\tZ = %g\t\tpv = %g\t\tadj-pv = %g\n", 138 | comparison.width, 139 | test.res$rank.multiple$dunn$comparisons[ i ], 140 | test.res$rank.multiple$dunn$Z[ i ], 141 | test.res$rank.multiple$dunn$altP[ i ], 142 | test.res$rank.multiple$dunn$altP.adjusted[ i ] ) ) 143 | } 144 | else 145 | { 146 | sink() 147 | close( result.file ) 148 | stop( "unknown test in ce-diff result" ) 149 | } 150 | 151 | sink() 152 | close( result.file ) 153 | } 154 | 155 | if ( ! is.null( cdf.figure ) ) 156 | { 157 | if ( is.null( partition.label ) ) 158 | partition.label = partition 159 | 160 | if ( is.null( partition.color ) ) 161 | partition.color <- rainbow( partition.n ) 162 | 163 | if ( is.null( partition.line.type ) ) 164 | partition.line.type <- rep( 1, partition.n ) 165 | 166 | png( filename = cdf.figure, width = fcs.ce.diff.figure.cdf.width, 167 | height = fcs.ce.diff.figure.cdf.height ) 168 | 169 | par( mar = c( 5.5, 6, 2, 1.5 ) ) 170 | 171 | plot( ecdf( cross.entropy ), ylim = c( 0, 1 ), 172 | xlab = "Cross-entropy", ylab = "CDF", main = "", 173 | cex.lab = 3, cex.axis = 2.5, 174 | col = fcs.ce.diff.figure.cdf.all.color, 175 | lwd = fcs.ce.diff.figure.line.width - 1, do.points = FALSE ) 176 | 177 | for ( pall in partition ) 178 | { 179 | ces <- cross.entropy.split[[ pall ]] 180 | ces.n <- length( ces ) 181 | 182 | if ( ces.n < fcs.ce.diff.figure.cdf.resolution ) { 183 | plot( ecdf( ces ), col = partition.color[ pall ], 184 | lty = partition.line.type[ pall ], 185 | lwd = fcs.ce.diff.figure.line.width, 186 | do.points = FALSE, add = TRUE ) 187 | } 188 | else { 189 | ecdf.x <- sort( ces ) 190 | ecdf.y <- 1 : ces.n / ces.n 191 | 192 | lines( ecdf.x, ecdf.y, col = partition.color[ pall ], 193 | lty = partition.line.type[ pall ], 194 | lwd = fcs.ce.diff.figure.line.width ) 195 | } 196 | } 197 | 198 | legend( "bottomright", 199 | legend = c( fcs.ce.diff.figure.cdf.all.label, partition.label ), 200 | col = c( fcs.ce.diff.figure.cdf.all.color, partition.color ), 201 | lty = partition.line.type, 202 | lwd = fcs.ce.diff.figure.line.width, 203 | cex = fcs.ce.diff.figure.font.size ) 204 | 205 | dev.off() 206 | } 207 | 208 | if ( ! is.null( dendrogram.figure ) && partition.n > 2 ) 209 | { 210 | cross.entropy.dist <- matrix( 0, nrow = partition.n, 211 | ncol = partition.n ) 212 | 213 | for ( i in 1 : ( partition.n - 1 ) ) 214 | for ( j in (i+1) : partition.n ) 215 | { 216 | if ( base.dist == "ks" ) 217 | cross.entropy.dist[ i, j ] <- ks.test( 218 | cross.entropy.split[[ i ]], 219 | cross.entropy.split[[ j ]] 220 | )$statistic 221 | else if ( base.dist == "median" ) 222 | cross.entropy.dist[ i, j ] <- abs( 223 | median( cross.entropy.split[[ i ]] ) - 224 | median( cross.entropy.split[[ j ]] ) 225 | ) 226 | else 227 | stop( "wrong base dist for cross-entropy differences" ) 228 | 229 | cross.entropy.dist[ j, i ] <- cross.entropy.dist[ i, j ] 230 | } 231 | 232 | cross.entropy.hclust <- hclust( as.dist( cross.entropy.dist ) ) 233 | 234 | if ( ! is.null( dendrogram.order.weight ) ) 235 | cross.entropy.hclust <- as.hclust( reorder( 236 | as.dendrogram( cross.entropy.hclust ), 237 | dendrogram.order.weight, 238 | agglo.FUN = mean 239 | ) ) 240 | 241 | if ( is.null( partition.label ) ) 242 | partition.label = partition 243 | 244 | png( filename = dendrogram.figure, 245 | width = fcs.ce.diff.figure.dendrogram.width, 246 | height = fcs.ce.diff.figure.dendrogram.height ) 247 | 248 | par( mar = c( 5, 5.6, 4, 1.4 ) ) 249 | 250 | plot( cross.entropy.hclust, 251 | labels = partition.label, hang = -1, 252 | xlab = "", ylab = "", main = "", sub = "", cex.axis = 3, 253 | cex = fcs.ce.diff.figure.font.size ) 254 | 255 | dev.off() 256 | } 257 | 258 | test.res 259 | } 260 | 261 | -------------------------------------------------------------------------------- /CSV-based flow analysis/00_src/ce_diff_test_tsne.r: -------------------------------------------------------------------------------- 1 | 2 | # calculates cross-entropy for tsne plots and calls ce.diff.test 3 | 4 | 5 | ce.diff.test.tsne <- function( 6 | orig.data, tsne.data, 7 | event.partition, 8 | partition.label = NULL, partition.color = NULL, partition.line.type = NULL, 9 | base.test = "ks", base.dist = "ks", 10 | prob.sample.n = NULL, dendrogram.order.weight = NULL, 11 | result = NULL, cdf.figure = NULL, dendrogram.figure = NULL 12 | ) 13 | { 14 | stopifnot( nrow( orig.data ) == nrow( tsne.data ) && 15 | nrow( orig.data ) == length( event.partition ) ) 16 | 17 | data.n <- nrow( orig.data ) 18 | 19 | if ( ! is.null( prob.sample.n ) && prob.sample.n < data.n ) 20 | prob.sample.idx <- sample( data.n, prob.sample.n ) 21 | else 22 | prob.sample.idx <- 1 : data.n 23 | 24 | if ( fcs.use.cached.results && 25 | file.exists( fcs.ce.diff.tsne.cache.file.path ) ) 26 | { 27 | cat( "Using cached results for probability\n" ) 28 | 29 | load( fcs.ce.diff.tsne.cache.file.path ) 30 | } 31 | else 32 | { 33 | cat( "Calculating probability\n" ) 34 | 35 | # sampling here temporary, until optimizing dist( tsne.dat ) below 36 | orig.tsne.prob <- calculate.probability.tsne( 37 | orig.data[ prob.sample.idx, ], 38 | tsne.data[ prob.sample.idx, ] 39 | ) 40 | 41 | save( orig.tsne.prob, file = fcs.ce.diff.tsne.cache.file.path ) 42 | } 43 | 44 | cross.entropy.all <- calculate.cross.entropy( orig.tsne.prob$orig, 45 | orig.tsne.prob$tsne ) 46 | 47 | event.partition.all <- event.partition[ prob.sample.idx ] 48 | 49 | ce.diff.test( 50 | cross.entropy.all, 51 | event.partition.all, 52 | partition.label, partition.color, partition.line.type, 53 | base.test, base.dist, 54 | dendrogram.order.weight, 55 | result, cdf.figure, dendrogram.figure 56 | ) 57 | } 58 | 59 | 60 | calculate.probability.tsne <- function( orig.dat, tsne.dat ) 61 | { 62 | orig.dat.n <- nrow( orig.dat ) 63 | tsne.dat.n <- nrow( tsne.dat ) 64 | 65 | stopifnot( orig.dat.n == tsne.dat.n ) 66 | 67 | # find nearest neighbors in original space and their distances 68 | 69 | orig.dat.nn2 <- nn2( normalize_input( orig.dat ), 70 | k = fcs.ce.diff.tsne.perplexity.factor * fcs.tsne.perplexity + 1 ) 71 | 72 | orig.dat.self.idx <- sapply( 1 : orig.dat.n, function( ri ) { 73 | ri.idx <- which( orig.dat.nn2$nn.idx[ ri, ] == ri ) 74 | ifelse( length( ri.idx ) == 1, ri.idx, NA ) 75 | } ) 76 | 77 | stopifnot( ! is.na( orig.dat.self.idx ) ) 78 | 79 | orig.neigh <- t( sapply( 1 : orig.dat.n, function( ri ) 80 | orig.dat.nn2$nn.idx[ ri, - orig.dat.self.idx[ ri ] ] ) ) 81 | 82 | orig.dist2 <- t( sapply( 1 : orig.dat.n, function( ri ) 83 | orig.dat.nn2$nn.dists[ ri, - orig.dat.self.idx[ ri ] ]^2 ) ) 84 | 85 | # calculate probabilities associated to distances in original space 86 | 87 | orig.stdev <- apply( orig.dist2, 1, function( dd2 ) { 88 | tsne.perplexity.error <- function( ss, dd2 ) { 89 | p <- exp( - dd2 / (2*ss^2) ) 90 | if ( sum( p ) < .Machine$double.eps ) 91 | p <- 1 92 | p <- p / sum( p ) 93 | p <- p[ p > 0 ] 94 | 2^( - sum( p * log2( p ) ) ) - fcs.tsne.perplexity 95 | } 96 | 97 | dd2.min.idx <- 1 98 | dd2.ascen <- sort( dd2 ) 99 | while( dd2.ascen[ dd2.min.idx ] == 0 ) 100 | dd2.min.idx <- dd2.min.idx + 1 101 | ss.lower <- dd2.ascen[ dd2.min.idx ] 102 | 103 | dd2.max.idx <- 1 104 | dd2.descen <- sort( dd2, decreasing = TRUE ) 105 | while( is.infinite( dd2.descen[ dd2.max.idx ] ) ) 106 | dd2.max.idx <- dd2.max.idx + 1 107 | ss.upper <- dd2.descen[ dd2.max.idx ] 108 | 109 | while( tsne.perplexity.error( ss.upper, dd2 ) < 0 ) 110 | { 111 | ss.lower <- ss.upper 112 | ss.upper <- 2 * ss.upper 113 | } 114 | 115 | while( tsne.perplexity.error( ss.lower, dd2 ) > 0 ) 116 | { 117 | ss.upper <- ss.lower 118 | ss.lower <- ss.lower / 2 119 | } 120 | 121 | uniroot( tsne.perplexity.error, dd2, 122 | interval = c( ss.lower, ss.upper ), 123 | tol = ( ss.upper - ss.lower ) * .Machine$double.eps^0.25 )$root 124 | } ) 125 | 126 | orig.prob <- t( sapply( 1 : orig.dat.n, function( i ) { 127 | p <- exp( - orig.dist2[ i, ] / ( 2 * orig.stdev[ i ]^2 ) ) 128 | p / sum( p ) 129 | } ) ) 130 | 131 | # symmetrize probabilities in original space 132 | 133 | for ( i in 1 : orig.dat.n ) 134 | for ( j2 in 1 : length( orig.neigh[ i, ] ) ) 135 | { 136 | j <- orig.neigh[ i, j2 ] 137 | 138 | i2 <- match( i, orig.neigh[ j, ] ) 139 | 140 | if ( ! is.na( i2 ) ) 141 | { 142 | if ( j > i ) 143 | { 144 | sym.prob <- ( orig.prob[ i, j2 ] + orig.prob[ j, i2 ] ) / 2 145 | orig.prob[ i, j2 ] <- sym.prob 146 | orig.prob[ j, i2 ] <- sym.prob 147 | } 148 | } 149 | else 150 | orig.prob[ i, j2 ] <- orig.prob[ i, j2 ] / 2 151 | } 152 | 153 | orig.prob <- sweep( orig.prob, 1, rowSums( orig.prob ), "/" ) 154 | 155 | # get distances in tsne space for closest neighbors in original space 156 | 157 | tsne.dist2 <- t( sapply( 1 : tsne.dat.n, function( i ) 158 | sapply( orig.neigh[ i, ], function( j ) 159 | sum( ( tsne.dat[ i, ] - tsne.dat[ j, ] )^2 ) 160 | ) 161 | ) ) 162 | 163 | # calculate probabilities associated to distances in tsne representation 164 | 165 | tsne.prob.factor <- tsne.dat.n / 166 | ( 2 * sum( 1 / ( 1 + dist( tsne.dat )^2 ) ) ) 167 | 168 | tsne.prob <- t( apply( tsne.dist2, 1, function( dd2 ) 169 | p <- tsne.prob.factor / ( 1 + dd2 ) 170 | ) ) 171 | 172 | list( orig = orig.prob, tsne = tsne.prob ) 173 | } 174 | 175 | 176 | calculate.cross.entropy <- function( prim.prob, secd.prob ) 177 | { 178 | prim.prob.n <- nrow( prim.prob ) 179 | secd.prob.n <- nrow( secd.prob ) 180 | 181 | prim.prob.m <- ncol( prim.prob ) 182 | secd.prob.m <- ncol( secd.prob ) 183 | 184 | stopifnot( prim.prob.n == secd.prob.n && prim.prob.m == secd.prob.m ) 185 | 186 | sapply( 1 : prim.prob.n, function( i ) 187 | - sum( prim.prob[ i, ] * log( secd.prob[ i, ] ) ) 188 | ) 189 | } 190 | 191 | -------------------------------------------------------------------------------- /CSV-based flow analysis/00_src/ce_diff_test_umap.r: -------------------------------------------------------------------------------- 1 | 2 | # calculates cross-entropy for umap plots and calls ce.diff.test 3 | 4 | 5 | ce.diff.test.umap <- function( 6 | orig.dist, orig.knn, umap.data, umap.param, 7 | event.partition, 8 | partition.label = NULL, partition.color = NULL, partition.line.type = NULL, 9 | base.test = "ks", base.dist = "ks", 10 | prob.sample.n = NULL, dendrogram.order.weight = NULL, 11 | result = NULL, cdf.figure = NULL, dendrogram.figure = NULL 12 | ) 13 | { 14 | stopifnot( nrow( orig.dist ) == nrow( umap.data ) && 15 | nrow( orig.dist ) == length( event.partition ) ) 16 | 17 | data.n <- nrow( orig.dist ) 18 | 19 | if ( ! is.null( prob.sample.n ) && prob.sample.n < data.n ) 20 | prob.sample.idx <- sample( data.n, prob.sample.n ) 21 | else 22 | prob.sample.idx <- 1 : data.n 23 | 24 | if ( fcs.use.cached.results && 25 | file.exists( fcs.ce.diff.umap.cache.file.path ) ) 26 | { 27 | cat( "Using cached results for probability\n" ) 28 | 29 | load( fcs.ce.diff.umap.cache.file.path ) 30 | } 31 | else 32 | { 33 | cat( "Calculating probability\n" ) 34 | 35 | orig.umap.prob <- calculate.probability.umap( orig.dist, orig.knn, 36 | umap.data, umap.param ) 37 | 38 | save( orig.umap.prob, file = fcs.ce.diff.umap.cache.file.path ) 39 | } 40 | 41 | cross.entropy.all <- calculate.fuzzy.cross.entropy( 42 | orig.umap.prob$orig[ prob.sample.idx, ], 43 | orig.umap.prob$umap[ prob.sample.idx, ] 44 | ) 45 | 46 | event.partition.all <- event.partition[ prob.sample.idx ] 47 | 48 | ce.diff.test( 49 | cross.entropy.all, 50 | event.partition.all, 51 | partition.label, partition.color, partition.line.type, 52 | base.test, base.dist, 53 | dendrogram.order.weight, 54 | result, cdf.figure, dendrogram.figure 55 | ) 56 | } 57 | 58 | 59 | calculate.probability.umap <- function( orig.dis, orig.kn, umap.dat, 60 | umap.param ) 61 | { 62 | orig.dat.n <- nrow( orig.dis ) 63 | umap.dat.n <- nrow( umap.dat ) 64 | 65 | stopifnot( orig.dat.n == umap.dat.n ) 66 | 67 | # get nearest neighbors in original space and their distances 68 | 69 | orig.self.idx <- sapply( 1 : orig.dat.n, function( ri ) { 70 | ri.idx <- which( orig.kn[ ri, ] == ri ) 71 | ifelse( length( ri.idx ) == 1, ri.idx, NA ) 72 | } ) 73 | 74 | stopifnot( ! is.na( orig.self.idx ) ) 75 | 76 | orig.neigh <- t( sapply( 1 : orig.dat.n, function( ri ) 77 | orig.kn[ ri, - orig.self.idx[ ri ] ] ) ) 78 | 79 | orig.dis.reduc <- t( sapply( 1 : orig.dat.n, function( ri ) 80 | orig.dis[ ri, - orig.self.idx[ ri ] ] ) ) 81 | 82 | # calculate probabilities associated to distances in original space 83 | 84 | umap.sigma <- apply( orig.dis.reduc, 1, function( dd ) { 85 | umap.sigma.error <- function( ss, dd ) { 86 | p <- exp( - pmax( 0, dd - min( dd ) ) / ss ) 87 | sum( p ) - log2( length( p ) ) 88 | } 89 | 90 | dd.ascen <- sort( dd ) 91 | dd.ascen <- dd.ascen[ dd.ascen > 0 ] 92 | ss.lower <- dd.ascen[ 1 ] 93 | 94 | dd.descen <- sort( dd, decreasing = TRUE ) 95 | dd.descen <- dd.descen[ ! is.infinite( dd.descen ) ] 96 | ss.upper <- dd.descen[ 1 ] 97 | 98 | while( umap.sigma.error( ss.upper, dd ) < 0 ) 99 | { 100 | ss.lower <- ss.upper 101 | ss.upper <- 2 * ss.upper 102 | } 103 | 104 | while( umap.sigma.error( ss.lower, dd ) > 0 ) 105 | { 106 | ss.upper <- ss.lower 107 | ss.lower <- ss.lower / 2 108 | } 109 | 110 | uniroot( umap.sigma.error, dd, 111 | interval = c( ss.lower, ss.upper ), 112 | tol = ( ss.upper - ss.lower ) * .Machine$double.eps^0.25 )$root 113 | } ) 114 | 115 | orig.prob <- t( sapply( 1 : orig.dat.n, function( i ) 116 | exp( - pmax( 0, orig.dis.reduc[ i, ] - min( orig.dis.reduc[ i, ] ) ) / 117 | umap.sigma[ i ] ) 118 | ) ) 119 | 120 | # symmetrize probabilities in original space 121 | 122 | for ( i in 1 : orig.dat.n ) 123 | for ( j2 in 1 : length( orig.neigh[ i, ] ) ) 124 | { 125 | j <- orig.neigh[ i, j2 ] 126 | 127 | i2 <- match( i, orig.neigh[ j, ] ) 128 | 129 | if ( ! is.na( i2 ) ) 130 | { 131 | if ( j > i ) 132 | { 133 | sym.prob <- orig.prob[ i, j2 ] + orig.prob[ j, i2 ] - 134 | orig.prob[ i, j2 ] * orig.prob[ j, i2 ] 135 | orig.prob[ i, j2 ] <- sym.prob 136 | orig.prob[ j, i2 ] <- sym.prob 137 | } 138 | } 139 | } 140 | 141 | # get distances in umap space for closest neighbors in original space 142 | 143 | umap.dist2 <- t( sapply( 1 : umap.dat.n, function( i ) 144 | sapply( orig.neigh[ i, ], function( j ) 145 | sum( ( umap.dat[ i, ] - umap.dat[ j, ] )^2 ) 146 | ) 147 | ) ) 148 | 149 | # calculate probabilities associated to distances in umap representation 150 | 151 | umap.a <- umap.param$a 152 | umap.b <- umap.param$b 153 | 154 | umap.prob <- t( apply( umap.dist2, 1, function( dd2 ) 155 | p <- 1 / ( 1 + umap.a * dd2 ^ umap.b ) 156 | ) ) 157 | 158 | list( orig = orig.prob, umap = umap.prob ) 159 | } 160 | 161 | 162 | calculate.fuzzy.cross.entropy <- function( prim.prob, secd.prob ) 163 | { 164 | prim.prob.n <- nrow( prim.prob ) 165 | secd.prob.n <- nrow( secd.prob ) 166 | 167 | prim.prob.m <- ncol( prim.prob ) 168 | secd.prob.m <- ncol( secd.prob ) 169 | 170 | stopifnot( prim.prob.n == secd.prob.n && prim.prob.m == secd.prob.m ) 171 | 172 | sapply( 1 : prim.prob.n, function( i ) 173 | - sum( prim.prob[ i, ] * log( secd.prob[ i, ] ) + 174 | ( 1 - prim.prob[ i, ] ) * log( 1 - secd.prob[ i, ] ) ) 175 | ) 176 | } 177 | 178 | -------------------------------------------------------------------------------- /CSV-based flow analysis/00_src/plot_all_dmrd_figures.r: -------------------------------------------------------------------------------- 1 | 2 | # plots all dimensionality reduction figures 3 | 4 | 5 | plot.all.dmrd.figures <- function( 6 | redu.data, redu.data.max, 7 | redu.figure.lims.factor, redu.figure.point.size, 8 | redu.figure.dir, redu.figure.plot, 9 | dmrd.data, dmrd.event.cluster, dmrd.event.condition 10 | ) 11 | { 12 | redu.lims <- redu.figure.lims.factor * redu.data.max * c( -1, 1 ) 13 | 14 | the.dmrd.figure.width <- fcs.dmrd.figure.width + 15 | fcs.dmrd.label.factor.width * 16 | max( nchar( fcs.condition.label ), nchar( flow.sample.label ), 17 | nchar( fcs.cluster.label ) ) 18 | 19 | the.dmrd.figure.width.multi <- 20 | fcs.dmrd.figure.ncol * fcs.dmrd.figure.width + 21 | fcs.dmrd.label.factor.width * 22 | max( nchar( fcs.condition.label ), nchar( flow.sample.label ), 23 | nchar( fcs.cluster.label ) ) 24 | 25 | the.dmrd.figure.height <- fcs.dmrd.figure.height 26 | 27 | the.dmrd.figure.height.multi <- 28 | fcs.dmrd.figure.nrow * fcs.dmrd.figure.height 29 | 30 | # plot all events colored by cluster 31 | 32 | redu.plot <- plot.dimensionality.reduction( 33 | redu.data[ , 1 ], redu.data[ , 2 ], 34 | event.partition = dmrd.event.cluster, 35 | partition.label = fcs.cluster.label, 36 | partition.color = fcs.cluster.color, 37 | dmrd.lims = redu.lims, 38 | point.size = redu.figure.point.size, 39 | show.guide = TRUE 40 | ) 41 | 42 | ggsave( 43 | file.path( redu.figure.dir, 44 | sprintf( "%s_all_events__cluster.png", redu.figure.plot ) ), 45 | redu.plot, 46 | width = the.dmrd.figure.width, height = the.dmrd.figure.height 47 | ) 48 | 49 | # plot all events colored by condition 50 | 51 | redu.plot <- plot.dimensionality.reduction( 52 | redu.data[ , 1 ], redu.data[ , 2 ], 53 | event.partition = dmrd.event.condition, 54 | partition.label = fcs.condition.label, 55 | partition.color = adjustcolor( fcs.condition.color, 56 | alpha.f = fcs.dmrd.color.alpha ), 57 | dmrd.lims = redu.lims, 58 | point.size = redu.figure.point.size, 59 | show.guide = TRUE 60 | ) 61 | 62 | ggsave( 63 | file.path( redu.figure.dir, 64 | sprintf( "%s_all_events__condition.png", redu.figure.plot ) ), 65 | redu.plot, 66 | width = the.dmrd.figure.width, height = the.dmrd.figure.height 67 | ) 68 | 69 | # plot all events colored by each marker level 70 | 71 | for ( fch in fcs.channel ) 72 | { 73 | dmrd.event.level <- dmrd.data[ , fch ] 74 | 75 | redu.plot <- plot.dimensionality.reduction( 76 | redu.data[ , 1 ], redu.data[ , 2 ], 77 | event.level = dmrd.event.level, 78 | dmrd.lims = redu.lims, 79 | point.size = redu.figure.point.size, 80 | show.guide = TRUE, guide.name = fcs.channel.label[ fch ] 81 | ) 82 | 83 | ggsave( 84 | file.path( redu.figure.dir, 85 | sprintf( "%s_all_events__%s.png", redu.figure.plot, 86 | fcs.channel.label[ fch ] ) ), 87 | redu.plot, 88 | width = the.dmrd.figure.width, height = the.dmrd.figure.height 89 | ) 90 | } 91 | 92 | # plot all conditions colored by cluster 93 | 94 | redu.plot <- plot.dimensionality.reduction( 95 | redu.data[ , 1 ], redu.data[ , 2 ], 96 | event.group = dmrd.event.condition, 97 | group.label = fcs.condition.label, 98 | event.partition = dmrd.event.cluster, 99 | partition.label = fcs.cluster.label, 100 | partition.color = fcs.cluster.color, 101 | dmrd.lims = redu.lims, 102 | dmrd.nrow = fcs.dmrd.figure.nrow, dmrd.ncol = fcs.dmrd.figure.ncol, 103 | point.size = redu.figure.point.size, 104 | show.guide = TRUE 105 | ) 106 | 107 | ggsave( 108 | file.path( redu.figure.dir, 109 | sprintf( "%s_all_conditions__cluster.png", redu.figure.plot ) ), 110 | redu.plot, 111 | width = the.dmrd.figure.width.multi, 112 | height = the.dmrd.figure.height.multi 113 | ) 114 | 115 | # plot all conditions colored by each marker level 116 | 117 | for ( fch in fcs.channel ) 118 | { 119 | dmrd.event.level <- dmrd.data[ , fch ] 120 | 121 | redu.plot <- plot.dimensionality.reduction( 122 | redu.data[ , 1 ], redu.data[ , 2 ], 123 | event.group = dmrd.event.condition, 124 | group.label = fcs.condition.label, 125 | event.level = dmrd.event.level, 126 | dmrd.lims = redu.lims, 127 | dmrd.nrow = fcs.dmrd.figure.nrow, dmrd.ncol = fcs.dmrd.figure.ncol, 128 | point.size = redu.figure.point.size, 129 | show.guide = TRUE, guide.name = fcs.channel.label[ fch ] 130 | ) 131 | 132 | ggsave( 133 | file.path( redu.figure.dir, 134 | sprintf( "%s_all_conditions__%s.png", redu.figure.plot, 135 | fcs.channel.label[ fch ] ) ), 136 | redu.plot, 137 | width = the.dmrd.figure.width.multi, 138 | height = the.dmrd.figure.height.multi 139 | ) 140 | } 141 | 142 | # plot each condition colored by cluster 143 | 144 | for ( cond in fcs.condition ) 145 | { 146 | dmrd.cond.idx <- which( dmrd.event.condition == cond ) 147 | 148 | redu.cond.data <- redu.data[ dmrd.cond.idx, ] 149 | dmrd.cond.event.cluster <- dmrd.event.cluster[ dmrd.cond.idx ] 150 | 151 | redu.plot <- plot.dimensionality.reduction( 152 | redu.cond.data[ , 1 ], redu.cond.data[ , 2 ], 153 | event.partition = dmrd.cond.event.cluster, 154 | partition.label = fcs.cluster.label, 155 | partition.color = fcs.cluster.color, 156 | dmrd.lims = redu.lims, 157 | point.size = redu.figure.point.size, 158 | show.guide = TRUE 159 | ) 160 | 161 | ggsave( 162 | file.path( redu.figure.dir, 163 | sprintf( "%s_%s__cluster.png", redu.figure.plot, 164 | fcs.condition.label[ cond ] ) ), 165 | redu.plot, 166 | width = the.dmrd.figure.width, height = the.dmrd.figure.height 167 | ) 168 | } 169 | 170 | # plot each condition colored by sample 171 | 172 | for ( cond in fcs.condition ) 173 | { 174 | dmrd.cond.idx <- which( dmrd.event.condition == cond ) 175 | 176 | redu.cond.data <- redu.data[ dmrd.cond.idx, ] 177 | dmrd.cond.event.sample <- dmrd.event.sample[ dmrd.cond.idx ] 178 | 179 | redu.plot <- plot.dimensionality.reduction( 180 | redu.cond.data[ , 1 ], redu.cond.data[ , 2 ], 181 | event.partition = dmrd.cond.event.sample, 182 | partition.label = flow.sample.label, 183 | partition.color = adjustcolor( flow.sample.color.single, 184 | alpha.f = fcs.dmrd.color.alpha ), 185 | dmrd.lims = redu.lims, 186 | point.size = redu.figure.point.size, 187 | show.guide = TRUE 188 | ) 189 | 190 | ggsave( 191 | file.path( redu.figure.dir, 192 | sprintf( "%s_%s__sample.png", redu.figure.plot, 193 | fcs.condition.label[ cond ] ) ), 194 | redu.plot, 195 | width = the.dmrd.figure.width, height = the.dmrd.figure.height 196 | ) 197 | } 198 | } 199 | 200 | -------------------------------------------------------------------------------- /CSV-based flow analysis/00_src/plot_all_embedsom_figures.R: -------------------------------------------------------------------------------- 1 | 2 | # plots all dimensionality reduction figures 3 | 4 | 5 | plot.all.embedsom.figures <- function( 6 | redu.data, redu.data.max, 7 | redu.figure.lims.factor, redu.figure.point.size, 8 | redu.figure.dir, redu.figure.plot, 9 | dmrd.data, dmrd.event.cluster, dmrd.event.condition 10 | ) 11 | { 12 | redu.lims <- redu.figure.lims.factor * redu.data.max * c( 0, 1) 13 | 14 | the.dmrd.figure.width <- fcs.dmrd.figure.width + 15 | fcs.dmrd.label.factor.width * 16 | max( nchar( fcs.condition.label ), nchar( flow.sample.label ), 17 | nchar( fcs.cluster.label ) ) 18 | 19 | the.dmrd.figure.width.multi <- 20 | fcs.dmrd.figure.ncol * fcs.dmrd.figure.width + 21 | fcs.dmrd.label.factor.width * 22 | max( nchar( fcs.condition.label ), nchar( flow.sample.label ), 23 | nchar( fcs.cluster.label ) ) 24 | 25 | the.dmrd.figure.height <- fcs.dmrd.figure.height 26 | 27 | the.dmrd.figure.height.multi <- 28 | fcs.dmrd.figure.nrow * fcs.dmrd.figure.height 29 | 30 | # plot all events colored by cluster 31 | 32 | redu.plot <- plot.dimensionality.reduction( 33 | redu.data[ , 1 ], redu.data[ , 2 ], 34 | event.partition = dmrd.event.cluster, 35 | partition.label = fcs.cluster.label, 36 | partition.color = adjustcolor( fcs.cluster.color, 37 | alpha.f = fcs.dmrd.color.alpha ), 38 | dmrd.lims = redu.lims, 39 | point.size = redu.figure.point.size, 40 | show.guide = TRUE 41 | ) 42 | 43 | ggsave( 44 | file.path( redu.figure.dir, 45 | sprintf( "%s_all_events__cluster.png", redu.figure.plot ) ), 46 | redu.plot, 47 | width = the.dmrd.figure.width, height = the.dmrd.figure.height 48 | ) 49 | 50 | # plot all events colored by condition 51 | 52 | redu.plot <- plot.dimensionality.reduction( 53 | redu.data[ , 1 ], redu.data[ , 2 ], 54 | event.partition = dmrd.event.condition, 55 | partition.label = fcs.condition.label, 56 | partition.color = adjustcolor( fcs.condition.color, 57 | alpha.f = fcs.dmrd.color.alpha ), 58 | dmrd.lims = redu.lims, 59 | point.size = redu.figure.point.size, 60 | show.guide = TRUE 61 | ) 62 | 63 | ggsave( 64 | file.path( redu.figure.dir, 65 | sprintf( "%s_all_events__condition.png", redu.figure.plot ) ), 66 | redu.plot, 67 | width = the.dmrd.figure.width, height = the.dmrd.figure.height 68 | ) 69 | 70 | # plot all events colored by each marker level 71 | 72 | for ( fch in fcs.channel ) 73 | { 74 | dmrd.event.level <- dmrd.data[ , fch ] 75 | 76 | redu.plot <- plot.dimensionality.reduction( 77 | redu.data[ , 1 ], redu.data[ , 2 ], 78 | event.level = dmrd.event.level, 79 | dmrd.lims = redu.lims, 80 | point.size = redu.figure.point.size, 81 | show.guide = TRUE, guide.name = fcs.channel.label[ fch ] 82 | ) 83 | 84 | ggsave( 85 | file.path( redu.figure.dir, 86 | sprintf( "%s_all_events__%s.png", redu.figure.plot, 87 | fcs.channel.label[ fch ] ) ), 88 | redu.plot, 89 | width = the.dmrd.figure.width, height = the.dmrd.figure.height 90 | ) 91 | } 92 | 93 | # plot all conditions colored by cluster 94 | 95 | redu.plot <- plot.dimensionality.reduction( 96 | redu.data[ , 1 ], redu.data[ , 2 ], 97 | event.group = dmrd.event.condition, 98 | group.label = fcs.condition.label, 99 | event.partition = dmrd.event.cluster, 100 | partition.label = fcs.cluster.label, 101 | partition.color = fcs.cluster.color, 102 | dmrd.lims = redu.lims, 103 | dmrd.nrow = fcs.dmrd.figure.nrow, dmrd.ncol = fcs.dmrd.figure.ncol, 104 | point.size = redu.figure.point.size, 105 | show.guide = TRUE 106 | ) 107 | 108 | ggsave( 109 | file.path( redu.figure.dir, 110 | sprintf( "%s_all_conditions__cluster.png", redu.figure.plot ) ), 111 | redu.plot, 112 | width = the.dmrd.figure.width.multi, 113 | height = the.dmrd.figure.height.multi 114 | ) 115 | 116 | # plot all conditions colored by each marker level 117 | 118 | for ( fch in fcs.channel ) 119 | { 120 | dmrd.event.level <- dmrd.data[ , fch ] 121 | 122 | redu.plot <- plot.dimensionality.reduction( 123 | redu.data[ , 1 ], redu.data[ , 2 ], 124 | event.group = dmrd.event.condition, 125 | group.label = fcs.condition.label, 126 | event.level = dmrd.event.level, 127 | dmrd.lims = redu.lims, 128 | dmrd.nrow = fcs.dmrd.figure.nrow, dmrd.ncol = fcs.dmrd.figure.ncol, 129 | point.size = redu.figure.point.size, 130 | show.guide = TRUE, guide.name = fcs.channel.label[ fch ] 131 | ) 132 | 133 | ggsave( 134 | file.path( redu.figure.dir, 135 | sprintf( "%s_all_conditions__%s.png", redu.figure.plot, 136 | fcs.channel.label[ fch ] ) ), 137 | redu.plot, 138 | width = the.dmrd.figure.width.multi, 139 | height = the.dmrd.figure.height.multi 140 | ) 141 | } 142 | 143 | # plot each condition colored by cluster 144 | 145 | for ( cond in fcs.condition ) 146 | { 147 | dmrd.cond.idx <- which( dmrd.event.condition == cond ) 148 | 149 | redu.cond.data <- redu.data[ dmrd.cond.idx, ] 150 | dmrd.cond.event.cluster <- dmrd.event.cluster[ dmrd.cond.idx ] 151 | 152 | redu.plot <- plot.dimensionality.reduction( 153 | redu.cond.data[ , 1 ], redu.cond.data[ , 2 ], 154 | event.partition = dmrd.cond.event.cluster, 155 | partition.label = fcs.cluster.label, 156 | partition.color = fcs.cluster.color, 157 | dmrd.lims = redu.lims, 158 | point.size = redu.figure.point.size, 159 | show.guide = TRUE 160 | ) 161 | 162 | ggsave( 163 | file.path( redu.figure.dir, 164 | sprintf( "%s_%s__cluster.png", redu.figure.plot, 165 | fcs.condition.label[ cond ] ) ), 166 | redu.plot, 167 | width = the.dmrd.figure.width, height = the.dmrd.figure.height 168 | ) 169 | } 170 | 171 | # plot each condition colored by sample 172 | 173 | for ( cond in fcs.condition ) 174 | { 175 | dmrd.cond.idx <- which( dmrd.event.condition == cond ) 176 | 177 | redu.cond.data <- redu.data[ dmrd.cond.idx, ] 178 | dmrd.cond.event.sample <- dmrd.event.sample[ dmrd.cond.idx ] 179 | 180 | redu.plot <- plot.dimensionality.reduction( 181 | redu.cond.data[ , 1 ], redu.cond.data[ , 2 ], 182 | event.partition = dmrd.cond.event.sample, 183 | partition.label = flow.sample.label, 184 | partition.color = adjustcolor( flow.sample.color.single, 185 | alpha.f = fcs.dmrd.color.alpha ), 186 | dmrd.lims = redu.lims, 187 | point.size = redu.figure.point.size, 188 | show.guide = TRUE 189 | ) 190 | 191 | ggsave( 192 | file.path( redu.figure.dir, 193 | sprintf( "%s_%s__sample.png", redu.figure.plot, 194 | fcs.condition.label[ cond ] ) ), 195 | redu.plot, 196 | width = the.dmrd.figure.width, height = the.dmrd.figure.height 197 | ) 198 | } 199 | } 200 | 201 | -------------------------------------------------------------------------------- /CSV-based flow analysis/00_src/plot_dimensionality_reduction.r: -------------------------------------------------------------------------------- 1 | 2 | # plots one dimensionality reduction figure 3 | 4 | 5 | plot.dimensionality.reduction <- function( 6 | dmrd.x, dmrd.y, 7 | event.group = NULL, group.label = NULL, 8 | event.partition = NULL, partition.label = NULL, partition.color = NULL, 9 | event.level = NULL, 10 | dmrd.lims = NULL, dmrd.nrow = NULL, dmrd.ncol = NULL, 11 | point.size = NULL, show.guide = FALSE, guide.name = NULL 12 | ) 13 | { 14 | if ( ! is.null( event.partition ) ) 15 | { 16 | if ( is.null( event.group ) ) 17 | ggdf <- data.frame( dmrd.x, dmrd.y, event.partition ) 18 | else 19 | ggdf <- data.frame( dmrd.x, dmrd.y, event.partition, event.group ) 20 | 21 | if ( is.null( partition.label ) ) 22 | plot.label <- waiver() 23 | else 24 | plot.label <- partition.label 25 | 26 | if ( show.guide ) 27 | plot.guide <- guide_legend( keyheight = 0.8, 28 | override.aes = list( size = fcs.dmrd.legend.point.size ), 29 | label.theme = element_text( size = fcs.dmrd.legend.label.size ), 30 | title = guide.name, title.position = "top", 31 | title.theme = element_text( size = fcs.dmrd.legend.title.size ) ) 32 | else 33 | plot.guide <- FALSE 34 | 35 | dmrd.plot <- ggplot( ggdf, aes( x = dmrd.x, 36 | y = dmrd.y, color = event.partition ) ) + 37 | scale_color_manual( values = partition.color, labels = plot.label, 38 | guide = plot.guide ) 39 | } 40 | else if ( ! is.null( event.level ) ) 41 | { 42 | if ( is.null( event.group ) ) 43 | ggdf <- data.frame( dmrd.x, dmrd.y, event.level ) 44 | else 45 | ggdf <- data.frame( dmrd.x, dmrd.y, event.level, event.group ) 46 | 47 | if ( show.guide ) 48 | plot.guide <- guide_colorbar( barwidth = 0.8, barheight = 10, 49 | title = guide.name, title.position = "top", 50 | title.theme = element_text( size = fcs.dmrd.legend.title.size ) ) 51 | else 52 | plot.guide <- FALSE 53 | 54 | dmrd.plot <- ggplot( ggdf, aes( x = dmrd.x, y = dmrd.y, 55 | color = event.level ) ) + 56 | scale_color_gradientn( colors = fcs.dmrd.density.palette, 57 | labels = NULL, guide = plot.guide ) 58 | } 59 | else 60 | { 61 | if ( is.null( event.group ) ) 62 | ggdf <- data.frame( dmrd.x, dmrd.y ) 63 | else 64 | ggdf <- data.frame( dmrd.x, dmrd.y, event.group ) 65 | 66 | dmrd.plot <- ggplot( ggdf, aes( x = dmrd.x, y = dmrd.y ) ) 67 | } 68 | 69 | if ( is.null( dmrd.lims ) ) { 70 | dmrd.xy.max <- max( abs( c( dmrd.x, dmrd.y ) ) ) 71 | dmrd.lims <- dmrd.xy.max * c( -1, 1 ) 72 | } 73 | 74 | if ( is.null( point.size ) ) 75 | point.size <- 0.5 76 | 77 | dmrd.plot <- dmrd.plot + 78 | coord_fixed() + 79 | lims( x = dmrd.lims, y = dmrd.lims ) + 80 | geom_point( shape = 20, stroke = 0, size = point.size ) + 81 | theme_bw() + 82 | theme( axis.title = element_blank(), 83 | axis.text = element_blank(), 84 | axis.ticks = element_blank(), 85 | panel.grid.major = element_blank(), 86 | panel.grid.minor = element_blank() ) 87 | 88 | if ( ! is.null( event.group ) ) 89 | { 90 | dmrd.plot <- dmrd.plot + 91 | facet_wrap( vars( event.group ), 92 | labeller = as_labeller( group.label ), 93 | nrow = dmrd.nrow, ncol = dmrd.ncol ) + 94 | theme( strip.background = element_rect( fill = "white" ), 95 | strip.text = element_text( size = fcs.dmrd.group.title.size ) ) 96 | } 97 | 98 | dmrd.plot 99 | } 100 | 101 | -------------------------------------------------------------------------------- /CSV-based flow analysis/00_src/plot_dimensionality_reduction_embedSOM.R: -------------------------------------------------------------------------------- 1 | 2 | # plots one dimensionality reduction figure 3 | 4 | 5 | plot.dimensionality.reduction.embedsom <- function( 6 | dmrd.x, dmrd.y, 7 | event.group = NULL, group.label = NULL, 8 | event.partition = NULL, partition.label = NULL, partition.color = NULL, 9 | event.level = NULL, 10 | dmrd.lims = NULL, dmrd.nrow = NULL, dmrd.ncol = NULL, 11 | point.size = NULL, show.guide = FALSE, guide.name = NULL 12 | ) 13 | { 14 | if ( ! is.null( event.partition ) ) 15 | { 16 | if ( is.null( event.group ) ) 17 | ggdf <- data.frame( dmrd.x, dmrd.y, event.partition ) 18 | else 19 | ggdf <- data.frame( dmrd.x, dmrd.y, event.partition, event.group ) 20 | 21 | if ( is.null( partition.label ) ) 22 | plot.label <- waiver() 23 | else 24 | plot.label <- partition.label 25 | 26 | if ( show.guide ) 27 | plot.guide <- guide_legend( keyheight = 0.8, 28 | override.aes = list( size = fcs.dmrd.legend.point.size ), 29 | label.theme = element_text( size = fcs.dmrd.legend.label.size ), 30 | title = guide.name, title.position = "top", 31 | title.theme = element_text( size = fcs.dmrd.legend.title.size ) ) 32 | else 33 | plot.guide <- FALSE 34 | 35 | dmrd.plot <- ggplot( ggdf, aes( x = dmrd.x, 36 | y = dmrd.y, color = event.partition ) ) + 37 | scale_color_manual( values = partition.color, labels = plot.label, 38 | guide = plot.guide ) 39 | } 40 | else if ( ! is.null( event.level ) ) 41 | { 42 | if ( is.null( event.group ) ) 43 | ggdf <- data.frame( dmrd.x, dmrd.y, event.level ) 44 | else 45 | ggdf <- data.frame( dmrd.x, dmrd.y, event.level, event.group ) 46 | 47 | if ( show.guide ) 48 | plot.guide <- guide_colorbar( barwidth = 0.8, barheight = 10, 49 | title = guide.name, title.position = "top", 50 | title.theme = element_text( size = fcs.dmrd.legend.title.size ) ) 51 | else 52 | plot.guide <- FALSE 53 | 54 | dmrd.plot <- ggplot( ggdf, aes( x = dmrd.x, y = dmrd.y, 55 | color = event.level ) ) + 56 | scale_color_gradientn( colors = fcs.dmrd.density.palette, 57 | labels = NULL, guide = plot.guide ) 58 | } 59 | else 60 | { 61 | if ( is.null( event.group ) ) 62 | ggdf <- data.frame( dmrd.x, dmrd.y ) 63 | else 64 | ggdf <- data.frame( dmrd.x, dmrd.y, event.group ) 65 | 66 | dmrd.plot <- ggplot( ggdf, aes( x = dmrd.x, y = dmrd.y ) ) 67 | } 68 | 69 | if ( is.null( dmrd.lims ) ) { 70 | dmrd.xy.max <- max( abs( c( dmrd.x, dmrd.y ) ) ) 71 | dmrd.lims <- dmrd.xy.max * c( -1, 1 ) 72 | } 73 | 74 | if ( is.null( point.size ) ) 75 | point.size <- 0.5 76 | 77 | dmrd.plot <- dmrd.plot + 78 | coord_fixed() + 79 | lims( x = dmrd.lims, y = dmrd.lims ) + 80 | geom_point( shape = 20, stroke = 0, size = point.size, alpha = fcs.dmrd.color.alpha ) + 81 | theme_bw() + 82 | theme( axis.title = element_blank(), 83 | axis.text = element_blank(), 84 | axis.ticks = element_blank(), 85 | panel.grid.major = element_blank(), 86 | panel.grid.minor = element_blank() ) 87 | 88 | if ( ! is.null( event.group ) ) 89 | { 90 | dmrd.plot <- dmrd.plot + 91 | facet_wrap( vars( event.group ), 92 | labeller = as_labeller( group.label ), 93 | nrow = dmrd.nrow, ncol = dmrd.ncol ) + 94 | theme( strip.background = element_rect( fill = "white" ), 95 | strip.text = element_text( size = fcs.dmrd.group.title.size ) ) 96 | } 97 | 98 | dmrd.plot 99 | } 100 | 101 | -------------------------------------------------------------------------------- /CSV-based flow analysis/Exporting data in csv format.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AdrianListon/Cross-Entropy-test/8652d6a98a7ba9ee1bb81c736e45ce9ad191abc6/CSV-based flow analysis/Exporting data in csv format.PNG -------------------------------------------------------------------------------- /CSV-based flow analysis/Setting axis in FlowJo for Aurora data.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AdrianListon/Cross-Entropy-test/8652d6a98a7ba9ee1bb81c736e45ce9ad191abc6/CSV-based flow analysis/Setting axis in FlowJo for Aurora data.pptx -------------------------------------------------------------------------------- /CSV-based flow analysis/analyze_flow_cytometry_csv.R: -------------------------------------------------------------------------------- 1 | 2 | # script for the analysis of flow cytometry data--------------- 3 | 4 | library( digest ) 5 | require( dunn.test ) 6 | library( ggplot2 ) 7 | library( ggridges ) 8 | require( RANN ) 9 | library( RColorBrewer ) 10 | library( reshape2 ) 11 | library( Rtsne ) 12 | library( umap ) 13 | library( dplyr ) 14 | library( EmbedSOM ) 15 | 16 | # function to set random seed depending on base number and string--------------- 17 | 18 | set.seed.here <- function( seed.base, seed.char ) 19 | { 20 | seed.add <- strtoi( substr( digest( seed.char, "xxhash32" ), 2, 8 ), 16 ) 21 | seed.new <- seed.base + seed.add 22 | set.seed( seed.new ) 23 | invisible( seed.new ) 24 | } 25 | 26 | # source parameters--------------- 27 | 28 | param.filename <- "./analyze_flow_cytometry_parameter_csv.r" 29 | 30 | source( param.filename ) 31 | 32 | # select channels for analysis--------------- 33 | 34 | channel.selection.data <- read.csv( list.files(fcs.data.dir, "\\.csv$", full.names = TRUE)[1] ) 35 | channels <- data.frame( desc = unname( colnames( channel.selection.data ) )) 36 | for( i in 1:dim(channels)[1] ){ 37 | channels$out[i] = paste0("\"", channels$desc[i], "\" = \"", channels$desc[i], "\",\n") 38 | } 39 | descs.filtered <- channels$desc[!is.na(channels$desc) & channels$desc != '-'] 40 | channels.filtered <- filter(channels, desc %in% descs.filtered) 41 | 42 | # paste the output below into fcs.channel in the parameter file, deleting unwanted channels 43 | cat('"', paste0(channels.filtered$desc, collapse = '","'), '"', sep = "") 44 | 45 | source( param.filename ) 46 | 47 | # add the output to fcs.channel.label in the parameter file 48 | temp <- channels$out[channels$desc %in% fcs.channel] 49 | temp[length(temp)] <- gsub(',', '', temp[length(temp)]) 50 | cat(paste(temp, collapse = "")) 51 | 52 | source( param.filename ) 53 | 54 | 55 | # check consistency in parameters--------------- 56 | 57 | stopifnot( names( fcs.channel.label ) == fcs.channel ) 58 | 59 | stopifnot( names( fcs.condition.label ) == fcs.condition ) 60 | stopifnot( names( fcs.condition.color ) == fcs.condition ) 61 | stopifnot( names( fcs.condition.line.type ) == fcs.condition ) 62 | 63 | stopifnot( names( fcs.cluster.label ) == fcs.cluster ) 64 | stopifnot( names( fcs.cluster.color ) == fcs.cluster ) 65 | stopifnot( names( fcs.cluster.line.type ) == fcs.cluster ) 66 | 67 | stopifnot( sum( is.null( fcs.dmrd.data.sample.n ), 68 | is.null( fcs.dmrd.data.sample.n.per.condition ), 69 | is.null( fcs.dmrd.data.sample.n.per.sample ) ) >= 2 ) 70 | 71 | stopifnot( unlist( fcs.cluster.group ) >= 1 & 72 | unlist( fcs.cluster.group ) <= fcs.cluster.n ) 73 | 74 | 75 | # source functions--------------- 76 | 77 | source( file.path( fcs.src.dir, "ce_diff_test.r" ) ) 78 | source( file.path( fcs.src.dir, "ce_diff_test_tsne.r" ) ) 79 | source( file.path( fcs.src.dir, "ce_diff_test_umap.r" ) ) 80 | source( file.path( fcs.src.dir, "plot_all_dmrd_figures.r" ) ) 81 | source( file.path( fcs.src.dir, "plot_all_embedsom_figures.r" ) ) 82 | source( file.path( fcs.src.dir, "plot_dimensionality_reduction.r" ) ) 83 | source( file.path( fcs.src.dir, "plot_dimensionality_reduction_embedSOM.r" ) ) 84 | 85 | 86 | # create dirs--------------- 87 | 88 | figure.dir <- c( 89 | fcs.ce.diff.tsne.figure.dir, 90 | fcs.ce.diff.umap.figure.dir, 91 | fcs.density.figure.dir, 92 | fcs.heatmap.figure.dir, 93 | fcs.histogram.figure.dir, 94 | fcs.tsne.figure.dir, 95 | fcs.umap.figure.dir, 96 | fcs.embedsom.figure.dir, 97 | fcs.ce.diff.embedsom.figure.dir 98 | ) 99 | 100 | table.dir <- fcs.cluster.table.dir 101 | 102 | data.dir <- sapply( names( fcs.cluster.group ), function( fcg.name ) 103 | sprintf( "%s/%s_%s", fcs.cluster.data.dir, fcs.cluster.data, fcg.name ) ) 104 | 105 | for ( the.dir in c( figure.dir, table.dir, data.dir ) ) 106 | if ( ! file.exists( the.dir ) ) 107 | dir.create( the.dir, recursive = TRUE ) 108 | 109 | 110 | # read csv data--------------- 111 | 112 | flow.data.filename.all <- list.files( fcs.data.dir, "\\.csv$" ) 113 | 114 | flow.data.filename <- grep( paste0( fcs.condition, collapse = "|" ), 115 | flow.data.filename.all, value = TRUE ) 116 | 117 | sample.name.format <- paste0( "%s.%0", fcs.sample.number.width, "d" ) 118 | event.name.format <- paste0( "%s.%0", fcs.event.number.width, "d" ) 119 | 120 | flow.data.filename.sample <- rep( "", length( flow.data.filename ) ) 121 | names( flow.data.filename.sample ) <- flow.data.filename 122 | 123 | sample.idx.next <- rep( 1, fcs.condition.n ) 124 | names( sample.idx.next ) <- fcs.condition 125 | 126 | flow.data <- lapply( flow.data.filename, function( flow.data.fn ) { 127 | # cat( flow.data.fn, "\n" ) 128 | sample.data <- as.matrix(read.csv( file.path( fcs.data.dir, flow.data.fn ) )) 129 | 130 | condition <- fcs.condition[ sapply( fcs.condition, grepl, flow.data.fn ) ] 131 | stopifnot( length( condition ) == 1 ) 132 | 133 | if ( ! all( fcs.channel %in% colnames( sample.data ) ) ) 134 | { 135 | cat( sprintf( "File: %s\n", flow.data.fn ) ) 136 | print( sort( fcs.channel[ 137 | ! fcs.channel %in% colnames( sample.data ) ] ) ) 138 | print( sort( colnames( sample.data ) ) ) 139 | stop( "mismatch in names of channels" ) 140 | } 141 | 142 | sample.name <- sprintf( sample.name.format, condition, 143 | sample.idx.next[ condition ] ) 144 | 145 | sample.data <- sample.data[ , fcs.channel, drop = FALSE ] 146 | 147 | event.n <- nrow( sample.data ) 148 | if ( event.n > 0 ) { 149 | event.name <- sprintf( event.name.format, sample.name, 1 : event.n ) 150 | rownames( sample.data ) <- event.name 151 | } 152 | 153 | flow.data.filename.sample[ flow.data.fn ] <<- sample.name 154 | sample.idx.next[ condition ] <<- sample.idx.next[ condition ] + 1 155 | 156 | sample.data 157 | } ) 158 | 159 | flow.data <- do.call( rbind, flow.data ) 160 | 161 | # define samples--------------- 162 | flow.sample <- flow.data.filename.sample 163 | names( flow.sample ) <- NULL 164 | 165 | stopifnot( flow.sample == 166 | unique( sub( "\\.[0-9]+$", "", rownames( flow.data ) ) ) ) 167 | 168 | flow.sample.n <- length ( flow.sample ) 169 | 170 | flow.sample.condition <- factor( sub( "\\.[0-9]+$", "", flow.sample ), 171 | levels = fcs.condition ) 172 | names( flow.sample.condition ) <- flow.sample 173 | 174 | # reorder samples to follow order of conditions 175 | flow.sample <- flow.sample[ order( flow.sample.condition ) ] 176 | flow.sample.condition <- flow.sample.condition[ flow.sample ] 177 | 178 | flow.sample.label <- sapply( flow.sample, function( fs ) { 179 | sample.cond <- sub( "^(.*)\\.[0-9]+$", "\\1", fs ) 180 | sample.num <- sub( "^.*\\.([0-9]+)$", "\\1", fs ) 181 | sprintf( "%s-%s", fcs.condition.label[ sample.cond ], sample.num ) 182 | } ) 183 | 184 | flow.sample.filename <- sapply( flow.sample, function( fs ) 185 | names( which( flow.data.filename.sample == fs ) ) ) 186 | 187 | # define events 188 | flow.event <- rownames( flow.data ) 189 | flow.event.n <- length( flow.event ) 190 | 191 | flow.event.sample <- factor( sub( "\\.[0-9]+$", "", flow.event ), 192 | levels = flow.sample ) 193 | names( flow.event.sample ) <- flow.event 194 | 195 | flow.event.condition <- factor( sub( "\\.[0-9]+$", "", flow.event.sample ), 196 | levels = fcs.condition ) 197 | names( flow.event.condition ) <- flow.event 198 | 199 | # reorder events to follow order of samples 200 | flow.event.order <- order( flow.event.sample ) 201 | 202 | flow.data <- flow.data[ flow.event.order, ] 203 | flow.event <- flow.event[ flow.event.order ] 204 | flow.event.sample <- flow.event.sample[ flow.event.order ] 205 | flow.event.condition <- flow.event.condition[ flow.event.order ] 206 | 207 | flow.event.sample.n <- as.vector( table( flow.event.sample ) ) 208 | names( flow.event.sample.n ) <- flow.sample 209 | 210 | flow.event.condition.n <- as.vector( table( flow.event.condition ) ) 211 | names( flow.event.condition.n ) <- fcs.condition 212 | 213 | flow.data.filename 214 | 215 | table( flow.sample.condition ) 216 | 217 | str( flow.data ) 218 | flow.event.condition.n 219 | flow.event.sample.n 220 | 221 | 222 | # define figure parameters for samples--------------- 223 | 224 | flow.sample.color <- fcs.condition.color[ flow.sample.condition ] 225 | names( flow.sample.color ) <- flow.sample 226 | 227 | flow.sample.color.single <- unlist( lapply( fcs.condition, function( fc ) { 228 | cond.sample.n <- sum( flow.sample.condition == fc ) 229 | rep( 230 | fcs.color.pool, 231 | ceiling( cond.sample.n / fcs.color.pool.n ) 232 | )[ 1 : cond.sample.n ] 233 | } ) ) 234 | names( flow.sample.color.single ) <- flow.sample 235 | 236 | flow.sample.line.type <- fcs.condition.line.type[ flow.sample.condition ] 237 | names( flow.sample.line.type ) <- flow.sample 238 | 239 | flow.sample.line.type.single <- unlist( lapply( fcs.condition, function( fc ) { 240 | cond.sample.n <- sum( flow.sample.condition == fc ) 241 | rep( 242 | fcs.line.type.pool, 243 | ceiling( cond.sample.n / fcs.line.type.pool.n ) 244 | )[ 1 : cond.sample.n ] 245 | } ) ) 246 | names( flow.sample.line.type.single ) <- flow.sample 247 | 248 | flow.ce.diff.figure.dendrogram.weight.sample <- 249 | fcs.ce.diff.figure.dendrogram.weight.condition[ flow.sample.condition ] 250 | names( flow.ce.diff.figure.dendrogram.weight.sample ) <- flow.sample 251 | 252 | 253 | # plot density distributions of transformed data--------------- 254 | 255 | set.seed.here( fcs.seed.base, "plot density distributions of transformed data" ) 256 | 257 | { 258 | if ( ! is.null( fcs.density.data.sample.n ) && 259 | fcs.density.data.sample.n < flow.event.n ) 260 | density.data.idx <- sort( sample( flow.event.n, 261 | fcs.density.data.sample.n ) ) 262 | else 263 | density.data.idx <- 1 : flow.event.n 264 | } 265 | 266 | density.data <- flow.data[ density.data.idx, ] 267 | 268 | density.data.ggdf.all <- melt( density.data, 269 | varnames = c( "partition", "channel" ), value.name = "density.value" ) 270 | density.data.ggdf.all$partition <- fcs.density.partition.all 271 | density.data.ggdf.all$channel <- as.character( density.data.ggdf.all$channel ) 272 | 273 | density.data.ggdf.condition <- lapply( fcs.condition, function( fc ) { 274 | ggdf.condition <- melt( 275 | density.data[ flow.event.condition[ density.data.idx ] == fc, , 276 | drop = FALSE], 277 | varnames = c( "partition", "channel" ), 278 | value.name = "density.value" ) 279 | if ( nrow( ggdf.condition ) > 0 ) { 280 | ggdf.condition$partition <- fc 281 | ggdf.condition$channel <- as.character( ggdf.condition$channel ) 282 | } 283 | ggdf.condition 284 | } ) 285 | density.data.ggdf.condition <- do.call( rbind, density.data.ggdf.condition ) 286 | 287 | density.data.ggdf.sample <- lapply( flow.sample, function( fs ) { 288 | ggdf.sample <- melt( 289 | density.data[ flow.event.sample[ density.data.idx ] == fs, , 290 | drop = FALSE ], 291 | varnames = c( "partition", "channel" ), 292 | value.name = "density.value" ) 293 | if ( nrow( ggdf.sample ) > 0 ) { 294 | ggdf.sample$partition <- fs 295 | ggdf.sample$channel <- as.character( ggdf.sample$channel ) 296 | } 297 | ggdf.sample 298 | } ) 299 | density.data.ggdf.sample <- do.call( rbind, density.data.ggdf.sample ) 300 | 301 | density.data.ggdf <- rbind( density.data.ggdf.all, density.data.ggdf.condition, 302 | density.data.ggdf.sample ) 303 | 304 | density.data.partition <- c( fcs.density.partition.all, fcs.condition, 305 | flow.sample ) 306 | density.data.partition.n <- length( density.data.partition ) 307 | 308 | density.data.partition.label <- c( fcs.density.partition.all.label, 309 | fcs.condition.label, flow.sample.label ) 310 | 311 | density.data.ggdf$partition <- factor( density.data.ggdf$partition, 312 | levels = density.data.partition ) 313 | 314 | density.data.ggdf$channel <- factor( density.data.ggdf$channel, 315 | levels = fcs.channel ) 316 | 317 | density.plot.color <- c( fcs.density.partition.all.color, fcs.condition.color, 318 | flow.sample.color ) 319 | 320 | density.plot <- ggplot( density.data.ggdf, aes( x = density.value, 321 | y = partition, color = partition, fill = partition ) ) + 322 | geom_density_ridges( size = fcs.density.line.size, 323 | alpha = fcs.density.line.alpha, show.legend = FALSE ) + 324 | labs( x = NULL, y = NULL ) + 325 | scale_y_discrete( limits = rev( density.data.partition ), 326 | breaks = rev( density.data.partition ), 327 | labels = rev( density.data.partition.label ) ) + 328 | scale_color_manual( values = density.plot.color ) + 329 | scale_fill_manual( values = density.plot.color ) + 330 | facet_wrap( vars( channel ), nrow = 1, labeller = labeller( 331 | channel = fcs.channel.label ) ) + 332 | theme_ridges( line_size = fcs.density.line.size, 333 | font_size = fcs.density.font.size ) + 334 | theme( strip.background = element_rect( fill = "white" ) ) 335 | 336 | ggsave( 337 | file.path( fcs.density.figure.dir, 338 | sprintf( "%s.png", fcs.density.figure.sample ) ), 339 | density.plot, 340 | width = fcs.density.figure.width.base * ( fcs.channel.n + 1 ), 341 | height = fcs.density.figure.height.base * ( density.data.partition.n + 1 ) 342 | ) 343 | 344 | 345 | # select data for dimensionality reduction--------------- 346 | 347 | set.seed.here( fcs.seed.base, "select data for dimensionality reduction" ) 348 | 349 | { 350 | if ( ! is.null( fcs.dmrd.data.sample.n ) ) 351 | { 352 | if ( fcs.dmrd.data.sample.n < flow.event.n ) 353 | dmrd.data.idx <- sort( sample( flow.event.n, fcs.dmrd.data.sample.n ) ) 354 | else 355 | dmrd.data.idx <- 1 : flow.event.n 356 | } 357 | else if ( ! is.null( fcs.dmrd.data.sample.n.per.condition ) ) 358 | { 359 | dmrd.data.idx <- unlist( sapply( fcs.condition, function( fc ) { 360 | fc.idx <- which( flow.event.condition == fc ) 361 | if ( fcs.dmrd.data.sample.n.per.condition < length( fc.idx ) ) 362 | sort( sample( fc.idx, fcs.dmrd.data.sample.n.per.condition ) ) 363 | else 364 | fc.idx 365 | } ) ) 366 | names( dmrd.data.idx ) <- NULL 367 | } 368 | else if ( ! is.null( fcs.dmrd.data.sample.n.per.sample ) ) 369 | { 370 | dmrd.data.idx <- unlist( sapply( flow.sample, function( fs ) { 371 | fs.idx <- which( flow.event.sample == fs ) 372 | if ( fcs.dmrd.data.sample.n.per.sample < length( fs.idx ) ) 373 | sort( sample( fs.idx, fcs.dmrd.data.sample.n.per.sample ) ) 374 | else 375 | fs.idx 376 | } ) ) 377 | names( dmrd.data.idx ) <- NULL 378 | } 379 | else 380 | dmrd.data.idx <- 1 : flow.event.n 381 | } 382 | 383 | dmrd.data <- flow.data[ dmrd.data.idx, ] 384 | 385 | dmrd.event.sample <- flow.event.sample[ dmrd.data.idx ] 386 | dmrd.event.condition <- flow.event.condition[ dmrd.data.idx ] 387 | 388 | str( dmrd.data ) 389 | table( dmrd.event.condition ) 390 | table( dmrd.event.sample ) 391 | 392 | 393 | # get flowsom clusters--------------- 394 | 395 | set.seed.here( fcs.seed.base, "get flowsom clusters" ) 396 | 397 | # build som objects--embedsom method 398 | flow.som <- EmbedSOM::SOM(flow.data, xdim = fcs.flow.som.dim, 399 | ydim = fcs.flow.som.dim, batch = TRUE, 400 | parallel = TRUE, threads = fcs.tsne.thread.n ) 401 | 402 | # get clusters 403 | embedsom.cluster <- hclust( dist( flow.som$codes ) ) 404 | flow.som.event.cluster <- cutree( embedsom.cluster, fcs.cluster.n )[flow.som$mapping[ , 1] ] 405 | 406 | # check quality of flowSOM clustering (optional)--------------- 407 | embed.som <- EmbedSOM::EmbedSOM(data=flow.data, map=flow.som, 408 | parallel = TRUE, threads = fcs.tsne.thread.n) 409 | 410 | # reorder clusters from bigger to smaller--------------- 411 | flow.som.cluster.rank <- 1 + fcs.cluster.n - 412 | rank( table( flow.som.event.cluster ), ties.method = "last" ) 413 | flow.som.event.cluster <- flow.som.cluster.rank[ flow.som.event.cluster ] 414 | names( flow.som.event.cluster ) <- NULL 415 | 416 | # set clusters as a factor 417 | flow.som.event.cluster <- factor( flow.som.event.cluster, 418 | levels = 1 : fcs.cluster.n ) 419 | levels( flow.som.event.cluster ) <- fcs.cluster 420 | 421 | # reorder events 422 | flow.event.cluster <- flow.som.event.cluster 423 | dmrd.event.cluster <- flow.event.cluster[ dmrd.data.idx ] 424 | 425 | flow.event.cluster.n <- as.vector( table( flow.event.cluster ) ) 426 | names( flow.event.cluster.n ) <- fcs.cluster 427 | 428 | dmrd.event.cluster.n <- as.vector( table( dmrd.event.cluster ) ) 429 | names( dmrd.event.cluster.n ) <- fcs.cluster 430 | 431 | length( flow.event.cluster ) 432 | table( flow.event.cluster ) 433 | 434 | length( dmrd.event.cluster ) 435 | table( dmrd.event.cluster ) 436 | 437 | 438 | # save cluster counts--------------- 439 | 440 | flow.cluster.count <- sapply( fcs.cluster, function( fc ) 441 | table( flow.event.sample[ flow.event.cluster == fc ] ) ) 442 | 443 | stopifnot( rownames( flow.cluster.count ) == flow.sample ) 444 | stopifnot( colnames( flow.cluster.count ) == fcs.cluster ) 445 | 446 | rownames( flow.cluster.count ) <- flow.sample.label 447 | colnames( flow.cluster.count ) <- fcs.cluster.label 448 | 449 | write.csv( flow.cluster.count, file = file.path( fcs.cluster.table.dir, 450 | sprintf( "%s.csv", fcs.cluster.table.counts ) ) ) 451 | 452 | flow.cluster.percent <- flow.cluster.count/rowSums(flow.cluster.count)*100 453 | 454 | write.csv( flow.cluster.percent, file = file.path( fcs.cluster.table.dir, 455 | sprintf( "%s.csv", "cluster_proportions" ) ) ) 456 | 457 | 458 | # plot density distributions by cluster--------------- 459 | 460 | density.data.ggdf.cluster <- lapply( fcs.cluster, function( fc ) { 461 | ggdf.cluster <- melt( 462 | density.data[ flow.event.cluster[ density.data.idx ] == fc, , 463 | drop = FALSE ], 464 | varnames = c( "partition", "channel" ), 465 | value.name = "density.value" 466 | ) 467 | if ( nrow( ggdf.cluster ) > 0 ) { 468 | ggdf.cluster$partition <- fc 469 | ggdf.cluster$channel <- as.character( ggdf.cluster$channel ) 470 | } 471 | ggdf.cluster 472 | } ) 473 | density.data.ggdf.cluster <- do.call( rbind, density.data.ggdf.cluster ) 474 | 475 | density.data.ggdf <- rbind( density.data.ggdf.all, density.data.ggdf.cluster ) 476 | 477 | density.data.partition <- c( fcs.density.partition.all, fcs.cluster ) 478 | density.data.partition.n <- length( density.data.partition ) 479 | 480 | density.data.partition.label <- c( fcs.density.partition.all.label, 481 | fcs.cluster.label ) 482 | 483 | density.data.ggdf$partition <- factor( density.data.ggdf$partition, 484 | levels = density.data.partition ) 485 | 486 | density.data.ggdf$channel <- factor( density.data.ggdf$channel, 487 | levels = fcs.channel ) 488 | 489 | density.plot.color <- c( fcs.density.partition.all.color, fcs.cluster.color ) 490 | 491 | density.plot <- ggplot( density.data.ggdf, aes( x = density.value, 492 | y = partition, color = partition, fill = partition ) ) + 493 | geom_density_ridges( size = fcs.density.line.size, 494 | alpha = fcs.density.line.alpha, show.legend = FALSE ) + 495 | labs( x = NULL, y = NULL ) + 496 | scale_y_discrete( limits = rev( density.data.partition ), 497 | breaks = rev( density.data.partition ), 498 | labels = rev( density.data.partition.label ) ) + 499 | scale_color_manual( values = density.plot.color ) + 500 | scale_fill_manual( values = density.plot.color ) + 501 | facet_wrap( vars( channel ), nrow = 1, labeller = labeller( 502 | channel = fcs.channel.label ) ) + 503 | theme_ridges( line_size = fcs.density.line.size, 504 | font_size = fcs.density.font.size ) + 505 | theme( strip.background = element_rect( fill = "white" ) ) 506 | 507 | ggsave( 508 | file.path( fcs.density.figure.dir, 509 | sprintf( "%s.png", fcs.density.figure.cluster ) ), 510 | density.plot, 511 | width = fcs.density.figure.width.base * ( fcs.channel.n + 1 ), 512 | height = fcs.density.figure.height.base * ( density.data.partition.n + 1 ) 513 | ) 514 | 515 | 516 | # plot embedsom figures--------------- 517 | 518 | embedsom.data.max <- max( embed.som ) 519 | 520 | plot.all.embedsom.figures( 521 | embed.som, embedsom.data.max, 522 | fcs.embedsom.figure.lims.factor, fcs.embedsom.figure.point.size, 523 | fcs.embedsom.figure.dir, "embedSOM_plot", 524 | flow.data, flow.event.cluster, flow.event.condition 525 | ) 526 | 527 | 528 | # plot heatmaps--------------- 529 | 530 | heatmap.type <- c( "by_condition", "by_sample", "by_cluster" ) 531 | 532 | for ( ht in heatmap.type ) 533 | { 534 | if ( ht == "by_condition" ) { 535 | flow.data.group.median <- apply( flow.data, 2, tapply, 536 | flow.event.condition, median ) 537 | margin.col <- fcs.heatmap.label.factor.col * 538 | max( nchar( fcs.condition.label ) ) 539 | group.label <- fcs.condition.label 540 | group.color <- fcs.condition.color 541 | } 542 | else if ( ht == "by_sample" ) { 543 | flow.data.group.median <- apply( flow.data, 2, tapply, 544 | flow.event.sample, median ) 545 | margin.col <- fcs.heatmap.label.factor.col * 546 | max( nchar( flow.sample.label ) ) 547 | group.label <- flow.sample.label 548 | group.color <- flow.sample.color 549 | } 550 | else if ( ht == "by_cluster" ) { 551 | flow.data.group.median <- apply( flow.data, 2, tapply, 552 | flow.event.cluster, median ) 553 | margin.col <- fcs.heatmap.label.factor.col * 554 | max( nchar( fcs.cluster.label ) ) 555 | group.label <- fcs.cluster.label 556 | group.color <- fcs.cluster.color 557 | } 558 | else 559 | stop( "wrong heatmap type" ) 560 | 561 | margin.row <- 562 | fcs.heatmap.label.factor.row * max( nchar( fcs.channel.label ) ) 563 | 564 | if ( margin.row < 5 ) 565 | margin.row <- 5 566 | if ( margin.col < 5 ) 567 | margin.col <- 5 568 | 569 | png( filename = file.path( fcs.heatmap.figure.dir, 570 | sprintf( "%s_%s.png", fcs.heatmap.figure, ht ) ), 571 | width = fcs.heatmap.width, height = fcs.heatmap.height ) 572 | heatmap( t( flow.data.group.median ), scale = "row", 573 | labRow = fcs.channel.label, labCol = group.label, 574 | col = fcs.heatmap.palette, ColSideColors = group.color, 575 | cexRow = fcs.heatmap.font.size, cexCol = fcs.heatmap.font.size, 576 | margins = c( margin.col, margin.row ) ) 577 | dev.off() 578 | } 579 | 580 | 581 | # plot histograms--------------- 582 | 583 | histogram.type <- c( "by_cluster", "by_sample", "by_assay" ) 584 | 585 | condition.sample.n <- as.vector( table( flow.sample.condition ) ) 586 | 587 | for ( ht in histogram.type ) 588 | { 589 | flow.data.cluster.fraction <- lapply( fcs.cluster, function( fc ) { 590 | sample.event.n <- as.vector( table( 591 | flow.event.sample[ flow.event.cluster == fc ] ) ) 592 | 593 | if ( ht == "by_cluster" ) 594 | { 595 | sample.fraction <- log2( 596 | ( sample.event.n / flow.event.cluster.n[ fc ] ) / 597 | ( flow.event.sample.n / flow.event.n ) 598 | ) 599 | sample.fraction[ is.infinite( sample.fraction ) ] <- NA 600 | fraction <- tapply( sample.fraction, flow.sample.condition, mean, 601 | na.rm = TRUE ) 602 | std.err <- tapply( sample.fraction, flow.sample.condition, sd, 603 | na.rm = TRUE ) / sqrt( condition.sample.n ) 604 | } 605 | else if ( ht == "by_sample" ) 606 | { 607 | sample.fraction <- 100 * sample.event.n / flow.event.sample.n 608 | fraction <- tapply( sample.fraction, flow.sample.condition, mean, 609 | na.rm = TRUE ) 610 | std.err <- tapply( sample.fraction, flow.sample.condition, sd, 611 | na.rm = TRUE ) / sqrt( condition.sample.n ) 612 | } 613 | else if ( ht == "by_assay" ) 614 | { 615 | sample.fraction <- 100 * sample.event.n / flow.event.n 616 | fraction <- tapply( sample.fraction, flow.sample.condition, sum, 617 | na.rm = TRUE ) 618 | std.err <- NA 619 | } 620 | else 621 | stop( "wrong histogram type" ) 622 | 623 | data.frame( cluster = fc, condition = fcs.condition, fraction, std.err ) 624 | } ) 625 | 626 | flow.data.cluster.fraction <- do.call( rbind, flow.data.cluster.fraction ) 627 | flow.data.cluster.fraction$condition <- factor( 628 | flow.data.cluster.fraction$condition, levels = fcs.condition ) 629 | 630 | histogram.plot <- ggplot( flow.data.cluster.fraction, 631 | aes( x = cluster, y = fraction, fill = condition ) ) + 632 | geom_bar( stat = "identity", position = position_dodge2() ) + 633 | geom_errorbar( aes( ymin = fraction - std.err, 634 | ymax = fraction + std.err ), 635 | size = fcs.histogram.error.bar.size, 636 | position = position_dodge2() ) + 637 | scale_x_discrete( labels = fcs.cluster.label, name = "" ) + 638 | scale_fill_manual( values = fcs.condition.color, 639 | limits = fcs.condition, breaks = fcs.condition, 640 | labels = fcs.condition.label ) + 641 | theme_bw() + 642 | theme( 643 | axis.text = element_text( size = fcs.histogram.font.size, 644 | angle = 90 ), 645 | axis.title = element_text( size = fcs.histogram.font.size + 1 ), 646 | legend.title = element_blank(), 647 | legend.text = element_text( size = fcs.histogram.font.size + 1 ), 648 | legend.key.size = unit( fcs.histogram.legend.key.size, "lines" ), 649 | panel.grid.major = element_blank(), 650 | panel.grid.minor = element_blank() 651 | ) 652 | 653 | if ( ht == "by_cluster" ) 654 | histogram.plot <- histogram.plot + 655 | ylab( "Average log2( frequency ratio )" ) 656 | else if ( ht == "by_sample" ) 657 | histogram.plot <- histogram.plot + ylab( "Average frequency" ) 658 | else if ( ht == "by_assay" ) 659 | histogram.plot <- histogram.plot + ylab( "Frequency" ) 660 | 661 | ggsave( 662 | file.path( fcs.histogram.figure.dir, 663 | sprintf( "%s_%s.png", fcs.histogram.figure, ht ) ), 664 | histogram.plot, 665 | width = fcs.histogram.width, 666 | height = fcs.histogram.height + 667 | fcs.histogram.label.factor.height * 668 | max( nchar( fcs.cluster.label ) ) 669 | ) 670 | } 671 | 672 | 673 | # calculate tsne representation--------------- 674 | 675 | { 676 | if ( fcs.use.cached.results && file.exists( fcs.tsne.cache.file.path ) ) 677 | { 678 | cat( "Using cached results for tsne\n" ) 679 | 680 | load( fcs.tsne.cache.file.path ) 681 | } 682 | else 683 | { 684 | set.seed.here( fcs.seed.base, "calculate tsne representation" ) 685 | 686 | cat( "Calculating tsne\n" ) 687 | 688 | tsne.result <- Rtsne( dmrd.data, perplexity = fcs.tsne.perplexity, 689 | theta = fcs.tsne.theta, 690 | exaggeration_factor = fcs.tsne.exaggeration.factor, 691 | max_iter = fcs.tsne.iter.n, check_duplicates = FALSE, pca = FALSE, 692 | num_threads = fcs.tsne.thread.n ) 693 | 694 | save( tsne.result, file = fcs.tsne.cache.file.path ) 695 | } 696 | } 697 | 698 | tsne.data <- tsne.result$Y 699 | 700 | str( tsne.result ) 701 | 702 | 703 | # plot tsne convergence--------------- 704 | 705 | tsne.iter <- 1 + 50 * ( 1 : length( tsne.result$itercosts ) - 1 ) 706 | tsne.cost <- tsne.result$itercosts 707 | 708 | png( filename = file.path( fcs.tsne.figure.dir, 709 | sprintf( "%s.png", fcs.tsne.figure.convergence ) ), 710 | width = fcs.tsne.figure.convergence.width, 711 | height = fcs.tsne.figure.convergence.height ) 712 | par( mar = c( 5, 5.8, 2, 1.4 ) ) 713 | plot( tsne.iter, tsne.cost, log = "y", xlab = "Iteration", 714 | ylab = "Total cost", xlim = c( 0, fcs.tsne.iter.n ), 715 | ylim = c( 1, max( tsne.cost ) ), col = "blue3", 716 | pch = 20, cex = 1, cex.lab = 2.5, cex.axis = 2 ) 717 | abline( h = tsne.cost[ length( tsne.cost ) ], lty = 2, lwd = 1.5, 718 | col = "blue3" ) 719 | dev.off() 720 | 721 | 722 | # plot tsne representations--------------- 723 | 724 | tsne.data.max <- max( abs( tsne.data ) ) 725 | 726 | plot.all.dmrd.figures( 727 | tsne.data, tsne.data.max, 728 | fcs.tsne.figure.lims.factor, fcs.tsne.figure.point.size, 729 | fcs.tsne.figure.dir, fcs.tsne.figure.plot, 730 | dmrd.data, dmrd.event.cluster, dmrd.event.condition 731 | ) 732 | 733 | 734 | # calculate umap representation--------------- 735 | 736 | { 737 | if ( fcs.use.cached.results && file.exists( fcs.umap.cache.file.path ) ) 738 | { 739 | cat( "Using cached results for umap\n" ) 740 | 741 | load( fcs.umap.cache.file.path ) 742 | } 743 | else 744 | { 745 | set.seed.here( fcs.seed.base, "calculate umap representation" ) 746 | 747 | cat( "Calculating umap\n" ) 748 | 749 | umap.config <- umap.defaults 750 | umap.config$n_epochs <- fcs.umap.iter.n 751 | umap.config$verbose <- TRUE 752 | 753 | umap.result <- umap( dmrd.data, config = umap.config ) 754 | 755 | save( umap.result, file = fcs.umap.cache.file.path ) 756 | } 757 | } 758 | 759 | umap.data <- umap.result$layout 760 | dimnames( umap.data ) <- NULL 761 | 762 | str( umap.result ) 763 | 764 | 765 | # plot umap representations--------------- 766 | 767 | umap.data.max <- max( abs( apply( umap.data, 2, function( x ) 768 | quantile( x, c( 0.25, 0.75 ) ) + c( -1, 1 ) * IQR( x ) ) ) ) 769 | 770 | plot.all.dmrd.figures( 771 | umap.data, umap.data.max, 772 | fcs.umap.figure.lims.factor, fcs.umap.figure.point.size, 773 | fcs.umap.figure.dir, fcs.umap.figure.plot, 774 | dmrd.data, dmrd.event.cluster, dmrd.event.condition 775 | ) 776 | 777 | 778 | # calculate cross-entropy test for tsne by condition--------------- 779 | 780 | set.seed.here( fcs.seed.base, "calculate cross-entropy test for tsne" ) 781 | 782 | ce.diff.test.tsne.res <- ce.diff.test.tsne( 783 | dmrd.data, tsne.data, 784 | dmrd.event.condition, 785 | partition.label = fcs.condition.label, 786 | partition.color = fcs.condition.color, 787 | partition.line.type = fcs.condition.line.type, 788 | base.test = fcs.ce.diff.base.test, 789 | base.dist = fcs.ce.diff.base.dist, 790 | prob.sample.n = fcs.ce.diff.prob.sample.n, 791 | dendrogram.order.weight = fcs.ce.diff.figure.dendrogram.weight.condition, 792 | result = file.path( fcs.ce.diff.tsne.figure.dir, 793 | sprintf( "%s_condition.txt", fcs.ce.diff.tsne.result ) ), 794 | cdf.figure = file.path( fcs.ce.diff.tsne.figure.dir, 795 | sprintf( "%s_condition.png", fcs.ce.diff.tsne.figure.cdf ) ), 796 | dendrogram.figure = file.path( fcs.ce.diff.tsne.figure.dir, 797 | sprintf( "%s_condition.png", fcs.ce.diff.tsne.figure.dendrogram ) ) ) 798 | 799 | print( ce.diff.test.tsne.res ) 800 | 801 | 802 | # calculate cross-entropy test for tsne by sample--------------- 803 | 804 | set.seed.here( fcs.seed.base, "calculate cross-entropy test for tsne" ) 805 | 806 | ce.diff.test.tsne.res <- ce.diff.test.tsne( 807 | dmrd.data, tsne.data, 808 | dmrd.event.sample, 809 | partition.label = flow.sample.label, 810 | partition.color = flow.sample.color, 811 | partition.line.type = flow.sample.line.type.single, 812 | base.test = fcs.ce.diff.base.test, 813 | base.dist = fcs.ce.diff.base.dist, 814 | prob.sample.n = fcs.ce.diff.prob.sample.n, 815 | dendrogram.order.weight = flow.ce.diff.figure.dendrogram.weight.sample, 816 | result = file.path( fcs.ce.diff.tsne.figure.dir, 817 | sprintf( "%s_sample.txt", fcs.ce.diff.tsne.result ) ), 818 | cdf.figure = file.path( fcs.ce.diff.tsne.figure.dir, 819 | sprintf( "%s_sample.png", fcs.ce.diff.tsne.figure.cdf ) ), 820 | dendrogram.figure = file.path( fcs.ce.diff.tsne.figure.dir, 821 | sprintf( "%s_sample.png", fcs.ce.diff.tsne.figure.dendrogram ) ) ) 822 | 823 | print( ce.diff.test.tsne.res ) 824 | 825 | 826 | # calculate cross-entropy test for tsne by cluster--------------- 827 | 828 | set.seed.here( fcs.seed.base, "calculate cross-entropy test for tsne" ) 829 | 830 | ce.diff.test.tsne.res <- ce.diff.test.tsne( 831 | dmrd.data, tsne.data, 832 | dmrd.event.cluster, 833 | partition.label = fcs.cluster.label, 834 | partition.color = fcs.cluster.color, 835 | partition.line.type = fcs.cluster.line.type, 836 | base.test = fcs.ce.diff.base.test, 837 | base.dist = fcs.ce.diff.base.dist, 838 | prob.sample.n = fcs.ce.diff.prob.sample.n, 839 | dendrogram.order.weight = fcs.ce.diff.figure.dendrogram.weight.cluster, 840 | result = file.path( fcs.ce.diff.tsne.figure.dir, 841 | sprintf( "%s_cluster.txt", fcs.ce.diff.tsne.result ) ), 842 | cdf.figure = file.path( fcs.ce.diff.tsne.figure.dir, 843 | sprintf( "%s_cluster.png", fcs.ce.diff.tsne.figure.cdf ) ), 844 | dendrogram.figure = file.path( fcs.ce.diff.tsne.figure.dir, 845 | sprintf( "%s_cluster.png", fcs.ce.diff.tsne.figure.dendrogram ) ) ) 846 | 847 | print( ce.diff.test.tsne.res ) 848 | 849 | 850 | # calculate cross-entropy test for umap by condition--------------- 851 | 852 | set.seed.here( fcs.seed.base, "calculate cross-entropy test for umap" ) 853 | 854 | ce.diff.test.umap.res <- ce.diff.test.umap( 855 | umap.result$knn$distances, umap.result$knn$indexes, 856 | umap.data, umap.result$config, 857 | dmrd.event.condition, 858 | partition.label = fcs.condition.label, 859 | partition.color = fcs.condition.color, 860 | partition.line.type = fcs.condition.line.type, 861 | base.test = fcs.ce.diff.base.test, 862 | base.dist = fcs.ce.diff.base.dist, 863 | prob.sample.n = fcs.ce.diff.prob.sample.n, 864 | dendrogram.order.weight = fcs.ce.diff.figure.dendrogram.weight.condition, 865 | result = file.path( fcs.ce.diff.umap.figure.dir, 866 | sprintf( "%s_condition.txt", fcs.ce.diff.umap.result ) ), 867 | cdf.figure = file.path( fcs.ce.diff.umap.figure.dir, 868 | sprintf( "%s_condition.png", fcs.ce.diff.umap.figure.cdf ) ), 869 | dendrogram.figure = file.path( fcs.ce.diff.umap.figure.dir, 870 | sprintf( "%s_condition.png", fcs.ce.diff.umap.figure.dendrogram ) ) ) 871 | 872 | print( ce.diff.test.umap.res ) 873 | 874 | 875 | # calculate cross-entropy test for umap by sample--------------- 876 | 877 | set.seed.here( fcs.seed.base, "calculate cross-entropy test for umap" ) 878 | 879 | ce.diff.test.umap.res <- ce.diff.test.umap( 880 | umap.result$knn$distances, umap.result$knn$indexes, 881 | umap.data, umap.result$config, 882 | dmrd.event.sample, 883 | partition.label = flow.sample.label, 884 | partition.color = flow.sample.color, 885 | partition.line.type = flow.sample.line.type.single, 886 | base.test = fcs.ce.diff.base.test, 887 | base.dist = fcs.ce.diff.base.dist, 888 | prob.sample.n = fcs.ce.diff.prob.sample.n, 889 | dendrogram.order.weight = flow.ce.diff.figure.dendrogram.weight.sample, 890 | result = file.path( fcs.ce.diff.umap.figure.dir, 891 | sprintf( "%s_sample.txt", fcs.ce.diff.umap.result ) ), 892 | cdf.figure = file.path( fcs.ce.diff.umap.figure.dir, 893 | sprintf( "%s_sample.png", fcs.ce.diff.umap.figure.cdf ) ), 894 | dendrogram.figure = file.path( fcs.ce.diff.umap.figure.dir, 895 | sprintf( "%s_sample.png", fcs.ce.diff.umap.figure.dendrogram ) ) ) 896 | 897 | print( ce.diff.test.umap.res ) 898 | 899 | 900 | # calculate cross-entropy test for umap by cluster--------------- 901 | 902 | set.seed.here( fcs.seed.base, "calculate cross-entropy test for umap" ) 903 | 904 | ce.diff.test.umap.res <- ce.diff.test.umap( 905 | umap.result$knn$distances, umap.result$knn$indexes, 906 | umap.data, umap.result$config, 907 | dmrd.event.cluster, 908 | partition.label = fcs.cluster.label, 909 | partition.color = fcs.cluster.color, 910 | partition.line.type = fcs.cluster.line.type, 911 | base.test = fcs.ce.diff.base.test, 912 | base.dist = fcs.ce.diff.base.dist, 913 | prob.sample.n = fcs.ce.diff.prob.sample.n, 914 | dendrogram.order.weight = fcs.ce.diff.figure.dendrogram.weight.cluster, 915 | result = file.path( fcs.ce.diff.umap.figure.dir, 916 | sprintf( "%s_cluster.txt", fcs.ce.diff.umap.result ) ), 917 | cdf.figure = file.path( fcs.ce.diff.umap.figure.dir, 918 | sprintf( "%s_cluster.png", fcs.ce.diff.umap.figure.cdf ) ), 919 | dendrogram.figure = file.path( fcs.ce.diff.umap.figure.dir, 920 | sprintf( "%s_cluster.png", fcs.ce.diff.umap.figure.dendrogram ) ) ) 921 | 922 | print( ce.diff.test.umap.res ) 923 | 924 | 925 | # calculate cross-entropy test for embedsom by condition--------------- 926 | 927 | set.seed.here( fcs.seed.base, "calculate cross-entropy test for embedSOM" ) 928 | 929 | ce.diff.test.embedsom.res <- ce.diff.test.tsne( 930 | dmrd.data, embed.som, 931 | dmrd.event.condition, 932 | partition.label = fcs.condition.label, 933 | partition.color = fcs.condition.color, 934 | partition.line.type = fcs.condition.line.type, 935 | base.test = fcs.ce.diff.base.test, 936 | base.dist = fcs.ce.diff.base.dist, 937 | prob.sample.n = fcs.ce.diff.prob.sample.n, 938 | dendrogram.order.weight = fcs.ce.diff.figure.dendrogram.weight.condition, 939 | result = file.path( fcs.ce.diff.embedsom.figure.dir, 940 | sprintf( "%s_condition.txt", fcs.ce.diff.embedsom.result ) ), 941 | cdf.figure = file.path( fcs.ce.diff.embedsom.figure.dir, 942 | sprintf( "%s_condition.png", fcs.ce.diff.embedsom.figure.cdf ) ), 943 | dendrogram.figure = file.path( fcs.ce.diff.embedsom.figure.dir, 944 | sprintf( "%s_condition.png", fcs.ce.diff.embedsom.figure.dendrogram ) ) ) 945 | 946 | print( ce.diff.test.embedsom.res ) 947 | 948 | 949 | # output session info--------------- 950 | 951 | sessionInfo() 952 | 953 | -------------------------------------------------------------------------------- /CSV-based flow analysis/analyze_flow_cytometry_parameter_csv.R: -------------------------------------------------------------------------------- 1 | # Analysis of 20210608 tSNE-diff data 2 | 3 | 4 | # data parameters--------------- 5 | 6 | # Tip: create two folders inside your analysis directory called "Data" and "Output". 7 | # Put the fcs files in the Data folder. 8 | # Put the parameter file in the Output folder. Double click on it to start Rstudio. 9 | # Opening in this way will set your working directory to the Output folder. 10 | # Type getwd(). Copy the result into the line below, swapping "Data" for "Output" 11 | fcs.data.dir <- "D:/tSNE-diff paper/Analysis/20210608 tSNE-diff flow/Data/csv 20k events" 12 | 13 | fcs.condition <- c( "LN", "Spleen", 14 | "siLPL" ) 15 | 16 | fcs.condition.label <- c( 17 | "LN" = "LN", "Spleen" = "Spleen", 18 | "siLPL" = "siLPL" 19 | ) 20 | 21 | # Run the analyze_flow_cytometry_csv script through line 50 and insert the output below. 22 | # Remove unwanted channels. 23 | 24 | fcs.channel <- c( 25 | "RORgT","CD44","Gr.1","IgM","F4_80","Ki67","CD90.2","CCR9", 26 | "TCRgd","PDCA.1","CD11c","Ly.6C","CD103","IgD","NK1.1", 27 | "CTLA.4","c.Kit","CD62L","GITR","CD150","CXCR3","Siglec.F", 28 | "TCRb","PD.1","XCR1","CD127","CCR2","CD45","CD4","CD8","CD3", 29 | "CD19","CD11b","CD38","GATA.3","CD86","Foxp3","CD172a","CD64", 30 | "Helios","CD24", 31 | "MHCII","NKp46","CD69","B220","CD25","ICOS","KLRG1","T.bet" 32 | ) 33 | 34 | fcs.channel.label <- c( 35 | "RORgT" = "RORgT", 36 | "CD44" = "CD44", 37 | "Gr.1" = "Gr.1", 38 | "IgM" = "IgM", 39 | "F4_80" = "F4_80", 40 | "Ki67" = "Ki67", 41 | "CD90.2" = "CD90.2", 42 | "CCR9" = "CCR9", 43 | "TCRgd" = "TCRgd", 44 | "PDCA.1" = "PDCA.1", 45 | "CD11c" = "CD11c", 46 | "Ly.6C" = "Ly.6C", 47 | "CD103" = "CD103", 48 | "IgD" = "IgD", 49 | "NK1.1" = "NK1.1", 50 | "CTLA.4" = "CTLA.4", 51 | "c.Kit" = "c.Kit", 52 | "CD62L" = "CD62L", 53 | "GITR" = "GITR", 54 | "CD150" = "CD150", 55 | "CXCR3" = "CXCR3", 56 | "Siglec.F" = "Siglec.F", 57 | "TCRb" = "TCRb", 58 | "PD.1" = "PD.1", 59 | "XCR1" = "XCR1", 60 | "CD127" = "CD127", 61 | "CCR2" = "CCR2", 62 | "CD45" = "CD45", 63 | "CD4" = "CD4", 64 | "CD8" = "CD8", 65 | "CD3" = "CD3", 66 | "CD19" = "CD19", 67 | "CD11b" = "CD11b", 68 | "CD38" = "CD38", 69 | "GATA.3" = "GATA.3", 70 | "CD86" = "CD86", 71 | "Foxp3" = "Foxp3", 72 | "CD172a" = "CD172a", 73 | "CD64" = "CD64", 74 | "Helios" = "Helios", 75 | "CD24" = "CD24", 76 | "MHCII" = "MHCII", 77 | "NKp46" = "NKp46", 78 | "CD69" = "CD69", 79 | "B220" = "B220", 80 | "CD25" = "CD25", 81 | "ICOS" = "ICOS", 82 | "KLRG1" = "KLRG1", 83 | "T.bet" = "T.bet" 84 | ) 85 | 86 | fcs.condition.n <- length( fcs.condition ) 87 | fcs.channel.n <- length( fcs.channel ) 88 | 89 | 90 | # general parameters--------------- 91 | # Tip: edit the directory to match the location of the script files. 92 | fcs.src.dir <- "D:/Flowcytoscript/Modifications/CSV version/00_src" 93 | 94 | # Tip: use today's date 95 | fcs.seed.base <- 20220623 96 | 97 | 98 | # Tip: Set to FALSE while optimizing the tSNE and FlowSOM. 99 | # Once you have a good run, set to TRUE and then change labels, colors, etc. 100 | fcs.use.cached.results <- TRUE 101 | 102 | fcs.sample.number.width <- 2 103 | fcs.event.number.width <- 6 104 | 105 | 106 | # graphics parameters--------------- 107 | 108 | fcs.color.pool <- c( 109 | brewer.pal( 8, "Set1" )[ -6 ], 110 | brewer.pal( 7, "Set2" )[ c( 1, 3, 6 ) ], 111 | adjustcolor( brewer.pal( 8, "Set1" )[ -6 ], 112 | red.f = 0.9, green.f = 0.8, blue.f = 0.7 ), 113 | adjustcolor( brewer.pal( 7, "Set2" )[ c( 1, 3, 6 ) ], 114 | red.f = 0.9, green.f = 0.8, blue.f = 0.7 ), 115 | adjustcolor( brewer.pal( 8, "Set1" )[ -6 ], 116 | red.f = 0.8, green.f = 0.6, blue.f = 0.5 ), 117 | adjustcolor( brewer.pal( 7, "Set2" )[ c( 1, 3, 6 ) ], 118 | red.f = 0.8, green.f = 0.6, blue.f = 0.5 ), 119 | adjustcolor( brewer.pal( 8, "Set1" )[ -6 ], 120 | red.f = 0.3, green.f = 0.3, blue.f = 0.3 ), 121 | adjustcolor( brewer.pal( 7, "Set2" )[ c( 1, 3, 6 ) ], 122 | red.f = 0.3, green.f = 0.3, blue.f = 0.3 ) ) 123 | fcs.color.pool.n <- length( fcs.color.pool ) 124 | 125 | fcs.line.type.pool <- 1:6 126 | fcs.line.type.pool.n <- length( fcs.line.type.pool ) 127 | 128 | fcs.condition.color <- rep( 129 | fcs.color.pool, 130 | ceiling( fcs.condition.n / fcs.color.pool.n ) 131 | )[ 1 : fcs.condition.n ] 132 | names( fcs.condition.color ) <- fcs.condition 133 | 134 | fcs.condition.line.type <- rep( 135 | fcs.line.type.pool, 136 | ceiling( fcs.condition.n / fcs.line.type.pool.n ) 137 | )[ 1 : fcs.condition.n ] 138 | names( fcs.condition.line.type ) <- fcs.condition 139 | 140 | 141 | # density parameters--------------- 142 | # Sampling largely irrelevant for this method 143 | 144 | fcs.density.data.sample.n <- NULL 145 | 146 | fcs.density.partition.all <- "all" 147 | fcs.density.partition.all.label <- c( "all" = "All" ) 148 | fcs.density.partition.all.color <- c( "all" = "grey" ) 149 | 150 | fcs.density.font.size <- 4.5 151 | 152 | fcs.density.line.size <- 0.2 153 | fcs.density.line.alpha <- 0.3 154 | 155 | fcs.density.figure.width.base <- 0.4 156 | fcs.density.figure.height.base <- 0.1 157 | 158 | fcs.density.figure.dir <- "./figure_density" 159 | 160 | fcs.density.figure.sample <- "density_sample" 161 | fcs.density.figure.cluster <- "density_cluster" 162 | 163 | 164 | # cluster parameters--------------- 165 | # Tip: use more clusters for more diverse cell collections. Use as few as possible to speed up processing. 166 | # For example, for a broad immune phenotyping panel use 40-50 clusters. 167 | # For pre-gated cell types (e.g., CD8s), use 10-12 clusters. 168 | 169 | fcs.cluster.n <- 40 170 | 171 | # Tip: naming clusters can be done automatically as clusters 1:n via the first command below. 172 | # To rename your clusters based on marker expression, insert a # before the first command, 173 | # remove the # before the second command, 174 | # and rename the clusters appropriately. 175 | 176 | fcs.cluster <- sprintf( "%02d", 1 : fcs.cluster.n ) 177 | #fcs.cluster <- c("B cells", "CD4 T cells", "CD8 T cells", "Monocytes", 178 | # "Neutrophils", "NK cells", "Eosinophils", 179 | # "cDC", "Macrophages") 180 | 181 | fcs.cluster.label <- fcs.cluster 182 | names( fcs.cluster.label ) <- fcs.cluster 183 | 184 | # Tip: this controls the grouping of the fcs files that are generated based on the flowSOM clustering. 185 | # If some of the clusters are related (e.g., CD14+ and CD16+ monocytes), 186 | # you might wish to group these into a single output folder. 187 | # If so, you may use a command such as "a" = c(1, 5, 7), 188 | # where the numbers are the numbered flowSOM clusters. 189 | fcs.cluster.group <- as.list(1:fcs.cluster.n) 190 | names(fcs.cluster.group) <- make.unique(rep(letters, length.out = fcs.cluster.n), sep='') 191 | 192 | fcs.cluster.color <- rep( 193 | fcs.color.pool, 194 | ceiling( fcs.cluster.n / fcs.color.pool.n ) 195 | )[ 1 : fcs.cluster.n ] 196 | names( fcs.cluster.color ) <- fcs.cluster 197 | 198 | fcs.cluster.line.type <- rep( 199 | fcs.line.type.pool, 200 | ceiling( fcs.cluster.n / fcs.line.type.pool.n ) 201 | )[ 1 : fcs.cluster.n ] 202 | names( fcs.cluster.line.type ) <- fcs.cluster 203 | 204 | # 24 is recommended for EmbedSOM 205 | fcs.flow.som.dim <- 24 206 | 207 | fcs.cluster.table.dir <- "./table_cluster" 208 | fcs.cluster.data.dir <- "./data_cluster" 209 | 210 | fcs.cluster.table.counts <- "cluster_counts" 211 | 212 | fcs.cluster.data <- "cluster_group" 213 | 214 | 215 | # heatmap parameters--------------- 216 | 217 | fcs.heatmap.palette.n <- 100 218 | fcs.heatmap.palette <- colorRampPalette( brewer.pal( 9, "YlOrRd" ) )( 219 | fcs.heatmap.palette.n ) 220 | 221 | fcs.heatmap.font.size <- 2.5 222 | 223 | fcs.heatmap.label.factor.row <- 1.4 224 | fcs.heatmap.label.factor.col <- 1.4 225 | 226 | fcs.heatmap.width <- 2000 227 | fcs.heatmap.height <- 2000 228 | 229 | fcs.heatmap.figure.dir <- "./figure_heatmap" 230 | 231 | fcs.heatmap.figure <- "heatmap" 232 | 233 | 234 | # histogram parameters--------------- 235 | 236 | fcs.histogram.font.size <- 7 237 | fcs.histogram.error.bar.size <- 0.2 238 | fcs.histogram.legend.key.size <- 0.7 239 | 240 | fcs.histogram.label.factor.height <- 0.05 241 | 242 | fcs.histogram.width <- 4.5 243 | fcs.histogram.height <- 3 244 | 245 | fcs.histogram.figure.dir <- "./figure_histogram" 246 | 247 | fcs.histogram.figure <- "histogram" 248 | 249 | 250 | # dimensionality reduction parameters--------------- 251 | # Tip: Use the third option in most cases. 252 | # More cells will take longer. 253 | # You might downsample to 50-100k cells at first, then run again with more cells if you like the result. 254 | 255 | fcs.dmrd.data.sample.n <- NULL 256 | fcs.dmrd.data.sample.n.per.condition <- NULL 257 | fcs.dmrd.data.sample.n.per.sample <- 2000 258 | 259 | fcs.dmrd.gradient.color <- c( "black", "blue", "green", "yellow", "red" ) 260 | fcs.dmrd.gradient.palette.n <- 100 261 | fcs.dmrd.density.palette <- colorRampPalette( fcs.dmrd.gradient.color )( 262 | fcs.dmrd.gradient.palette.n ) 263 | 264 | fcs.dmrd.color.alpha <- 0.3 265 | 266 | fcs.dmrd.group.title.size <- 8 267 | 268 | fcs.dmrd.legend.title.size <- 7 269 | fcs.dmrd.legend.label.size <- 7 270 | fcs.dmrd.legend.point.size <- 3 271 | 272 | fcs.dmrd.label.factor.width <- 0.1 273 | 274 | # Tip: set the number of rows in the output figures here. 275 | fcs.dmrd.figure.nrow <- 1 276 | fcs.dmrd.figure.ncol <- ceiling( fcs.condition.n / fcs.dmrd.figure.nrow ) 277 | 278 | # Tip: set the figure size here. 279 | fcs.dmrd.figure.width <- 2 280 | fcs.dmrd.figure.height <- 2 281 | 282 | 283 | # tsne parameters--------------- 284 | # Tip: more iterations take longer. 285 | # More iterations are needed for more cells 286 | # For a first look, try 1000. 287 | # For a final figure on ~100k cells, use 5000. 288 | # Tip 2: Set fcs.tsne.thread.n to 0 for max processing speed. 289 | # To enable you to do something else meanwhile, 290 | # set it to one or two less than the number of threads on your processor (check specs online). 291 | 292 | fcs.tsne.iter.n <- 5000 293 | fcs.tsne.thread.n <- 0 294 | 295 | # Tip: don't change this unless you know why 296 | fcs.tsne.perplexity <- 30 297 | fcs.tsne.theta <- 0.5 298 | fcs.tsne.exaggeration.factor <- 12 299 | 300 | fcs.tsne.figure.lims.factor <- 1.0 301 | fcs.tsne.figure.point.size <- 1.2 302 | 303 | fcs.tsne.figure.convergence.width <- 1200 304 | fcs.tsne.figure.convergence.height <- 800 305 | 306 | fcs.tsne.figure.dir <- "./figure_tsne" 307 | 308 | fcs.tsne.figure.convergence <- "tsne_convergence" 309 | fcs.tsne.figure.plot <- "tsne_plot" 310 | 311 | fcs.tsne.cache.file.path <- "./tsne_cache.dat" 312 | 313 | 314 | # umap parameters--------------- 315 | # Tip: you don't generally need to change the UMAP iterations 316 | 317 | fcs.umap.iter.n <- 1000 318 | 319 | fcs.umap.figure.lims.factor <- 0.8 320 | fcs.umap.figure.point.size <- 1.2 321 | 322 | fcs.umap.figure.dir <- "./figure_umap" 323 | 324 | fcs.umap.figure.plot <- "umap_plot" 325 | 326 | fcs.umap.cache.file.path <- "./umap_cache.dat" 327 | 328 | # embedsom parameters--------------- 329 | fcs.embedsom.figure.lims.factor <- 1.0 330 | fcs.embedsom.figure.point.size <- 0.05 331 | fcs.ce.diff.embedsom.figure.dir <- "./figure_embedsom_ce_diff" 332 | fcs.embedsom.figure.dir <- "./figure_embedsom" 333 | fcs.ce.diff.embedsom.result <- "embedsom_ce_diff_result" 334 | fcs.ce.diff.embedsom.cache.file.path <- "./embedsom_ce_diff_cache.dat" 335 | fcs.ce.diff.embedsom.figure.cdf <- "embedsom_ce_diff_cdf" 336 | fcs.ce.diff.embedsom.figure.dendrogram <- "embedsom_ce_diff_dendrogram" 337 | 338 | # cross-entropy test parameters--------------- 339 | # Tips: Set this depending on your RAM and number of groups. 340 | # You won't be able to analyze more than about 100k cells unless you have >32GB RAM. 341 | # The crossentropy test works best with at least 10k cells per group. 342 | # Multiple hypothesis testing will greatly reduce your ability to distinguish statistical differences. 343 | 344 | 345 | fcs.ce.diff.prob.sample.n <- 120000 346 | 347 | # Tip: set to "ks" unless you have a good statistical reason for using rank testing. 348 | # In that case, use "rank" and "median". 349 | 350 | fcs.ce.diff.base.test <- "ks" 351 | fcs.ce.diff.base.dist <- "ks" 352 | 353 | fcs.ce.diff.test.alpha <- 0.05 354 | 355 | fcs.ce.diff.figure.font.size <- 2 356 | fcs.ce.diff.figure.line.width <- 3 357 | 358 | fcs.ce.diff.figure.cdf.resolution <- 500 359 | fcs.ce.diff.figure.cdf.all.color <- "black" 360 | fcs.ce.diff.figure.cdf.all.label <- "All" 361 | 362 | fcs.ce.diff.figure.dendrogram.weight.condition <- 1 : fcs.condition.n 363 | names( fcs.ce.diff.figure.dendrogram.weight.condition ) <- fcs.condition 364 | 365 | fcs.ce.diff.figure.dendrogram.weight.cluster <- 1 : fcs.cluster.n 366 | names( fcs.ce.diff.figure.dendrogram.weight.cluster ) <- fcs.cluster 367 | 368 | fcs.ce.diff.figure.cdf.width <- 1200 369 | fcs.ce.diff.figure.cdf.height <- 800 370 | 371 | fcs.ce.diff.figure.dendrogram.width <- 2000 372 | fcs.ce.diff.figure.dendrogram.height <- 800 373 | 374 | 375 | # cross-entropy test parameters for tsne-------------- 376 | 377 | fcs.ce.diff.tsne.perplexity.factor <- 3 378 | 379 | fcs.ce.diff.tsne.figure.dir <- "./figure_tsne_ce_diff" 380 | 381 | fcs.ce.diff.tsne.figure.cdf <- "tsne_ce_diff_cdf" 382 | fcs.ce.diff.tsne.figure.dendrogram <- "tsne_ce_diff_dendrogram" 383 | fcs.ce.diff.tsne.result <- "tsne_ce_diff_result" 384 | 385 | fcs.ce.diff.tsne.cache.file.path <- "./tsne_ce_diff_cache.dat" 386 | 387 | 388 | # cross-entropy test parameters for umap--------------- 389 | 390 | fcs.ce.diff.umap.figure.dir <- "./figure_umap_ce_diff" 391 | 392 | fcs.ce.diff.umap.figure.cdf <- "umap_ce_diff_cdf" 393 | fcs.ce.diff.umap.figure.dendrogram <- "umap_ce_diff_dendrogram" 394 | fcs.ce.diff.umap.result <- "umap_ce_diff_result" 395 | 396 | fcs.ce.diff.umap.cache.file.path <- "./umap_ce_diff_cache.dat" 397 | 398 | -------------------------------------------------------------------------------- /CSV-based flow analysis/flowcytoscript_setup.R: -------------------------------------------------------------------------------- 1 | # Installation of required packages for csv version of flowcytoscript 2 | # Run this once to set things up. 3 | # Before running, you may wish to update R and/or RStudio. 4 | 5 | install.packages("digest") 6 | install.packages("dunn.test") 7 | install.packages("ggplot2") 8 | install.packages("ggridges") 9 | install.packages("RANN") 10 | install.packages("RColorBrewer") 11 | install.packages("reshape2") 12 | install.packages("Rtsne") 13 | install.packages("umap") 14 | install.packages("dplyr") 15 | install.packages("FNN") 16 | install.packages("devtools") 17 | 18 | if (!require("BiocManager", quietly = TRUE)) 19 | install.packages("BiocManager") 20 | 21 | BiocManager::install("FlowSOM") 22 | 23 | library(devtools) 24 | devtools::install_github('exaexa/EmbedSOM') -------------------------------------------------------------------------------- /CSV-based flow analysis/highlight_changed_regions.R: -------------------------------------------------------------------------------- 1 | # Optional add-on to flowcytoscript analysis 2 | # Use T-REX approach from Cytolab to determine changing knn regions 3 | 4 | # first run tSNE, UMAP and/or EmbedSOM using flowcytoscript 5 | 6 | library(FNN) 7 | 8 | ## Set things up------------- 9 | # pick two groups for identifying differences from the output below 10 | # copy into trex.condition <- c() between parenthesis 11 | print(fcs.condition) 12 | trex.condition <- c("Spleen", "siLPL") 13 | 14 | 15 | # subset data based on those groups 16 | trex.dmrd.data <- subset(dmrd.data, dmrd.event.condition %in% trex.condition) 17 | trex.umap.data <- umap.data 18 | rownames(trex.umap.data) <- rownames(dmrd.data) 19 | trex.umap.data <- subset(trex.umap.data, dmrd.event.condition %in% trex.condition) 20 | trex.tsne.data <- tsne.data 21 | rownames(trex.tsne.data) <- rownames(dmrd.data) 22 | trex.tsne.data <- subset(trex.tsne.data, dmrd.event.condition %in% trex.condition) 23 | trex.embed.som <- embed.som 24 | rownames(trex.embed.som) <- rownames(flow.data) 25 | trex.embed.som <- subset(trex.embed.som, flow.event.condition %in% trex.condition) 26 | kvalue <- 60 27 | 28 | 29 | ## Now you may generate plots showing regions that are disproportionately in one condition or the other. 30 | ## Use the three sections below to generate these with either tSNE, UMAP or EmbedSOM plots 31 | 32 | 33 | ## Using tSNE as the plot-------------------------- 34 | # KNN search per cell 35 | tsne.neighbor.index <- knnx.index(trex.tsne.data,trex.tsne.data,k=kvalue) 36 | first.condition.length <- nrow(subset(dmrd.data, dmrd.event.condition == trex.condition[1])) 37 | tsne.neighbor.index[tsne.neighbor.index <= first.condition.length] <- 0 38 | tsne.neighbor.index[tsne.neighbor.index > first.condition.length] <- 1 39 | 40 | # calculate percent change in each KNN region 41 | percent.change.tsne <- (rowSums(tsne.neighbor.index) / kvalue * 100) 42 | 43 | # binning and plot info 44 | range <- apply(apply( trex.tsne.data, 2, range), 2, diff) 45 | graphical.ratio <- (range[1] / range[2]) 46 | test.round <- round(percent.change.tsne) 47 | trex.plot.tsne <- 48 | data.frame(x = trex.tsne.data[, 1], y = trex.tsne.data[, 2], col = test.round) 49 | trex.plot.tsne$cuts <- cut(trex.plot.tsne$col, c(0, 5, 15, 85, 95, 100), include.lowest = TRUE, right = FALSE) 50 | trex.plot.tsne$cuts <- factor(trex.plot.tsne$cuts, 51 | levels = c("[15,85)", "[5,15)", "[0,5)", "[85,95)", "[95,100]")) 52 | ordered.plot.tsne <- trex.plot.tsne[order(trex.plot.tsne$cuts), ] 53 | 54 | 55 | # create T-REX plot 56 | 57 | trex_comparison <- paste(trex.condition[1], " vs ", trex.condition[2], sep = "") 58 | dir.create("figure_trex_tsne") 59 | png( 60 | paste( 61 | "./figure_trex_tsne/", 62 | strftime(Sys.time(), "%Y-%m-%d_%H%M%S"), 63 | " tSNE TREX plot.png", 64 | sep = "" 65 | ), 66 | res = 200, 67 | width = 1500, 68 | height = 1500 69 | ) 70 | final.trex.plot.tsne <- 71 | ggplot(ordered.plot.tsne) + geom_point(aes(x = x, y = y, colour = cuts), cex = 1) + 72 | scale_color_manual( 73 | name = "Ratio", 74 | values = c( 75 | "[15,85)" = "lightgray", 76 | "[5,15)" = "lightskyblue", 77 | "[0,5)" = "navyblue", 78 | "[85,95)" = "lightcoral", 79 | "[95,100]" = "darkred" 80 | ) 81 | ) + 82 | theme_bw() + theme(panel.grid.major = element_blank(), 83 | panel.grid.minor = element_blank()) + 84 | labs (x = "tSNE x", y = "tSNE y", title = paste( trex_comparison," - Percent Change",sep = "")) + 85 | coord_fixed(ratio = graphical.ratio)+ 86 | guides(color = guide_legend(override.aes = list(size=3))) 87 | print(final.trex.plot.tsne) 88 | dev.off() 89 | 90 | print(final.trex.plot.tsne) 91 | 92 | 93 | ## Using UMAP as the plot-------------------------- 94 | # KNN search per cell 95 | umap.neighbor.index <- knnx.index(trex.umap.data,trex.umap.data,k=kvalue) 96 | first.condition.length <- nrow(subset(dmrd.data, dmrd.event.condition == trex.condition[1])) 97 | umap.neighbor.index[umap.neighbor.index <= first.condition.length] <- 0 98 | umap.neighbor.index[umap.neighbor.index > first.condition.length] <- 1 99 | 100 | # calculate percent change in each KNN region 101 | percent.change.umap <- (rowSums(umap.neighbor.index) / kvalue * 100) 102 | 103 | # binning and plot info 104 | range <- apply(apply( trex.umap.data, 2, range), 2, diff) 105 | graphical.ratio <- (range[1] / range[2]) 106 | test.round <- round(percent.change.umap) 107 | trex.plot.umap <- 108 | data.frame(x = trex.umap.data[, 1], y = trex.umap.data[, 2], col = test.round) 109 | trex.plot.umap$cuts <- cut(trex.plot.umap$col, c(0, 5, 15, 85, 95, 100), include.lowest = TRUE, right = FALSE) 110 | trex.plot.umap$cuts <- factor(trex.plot.umap$cuts, 111 | levels = c("[15,85)", "[5,15)", "[0,5)", "[85,95)", "[95,100]")) 112 | ordered.plot.umap <- trex.plot.umap[order(trex.plot.umap$cuts), ] 113 | range <- apply(apply(trex.umap.data, 2, range), 2, diff) 114 | graphical.ratio <- (range[1] / range[2]) 115 | 116 | # create T-REX plot 117 | 118 | trex_comparison <- paste(trex.condition[1], " vs ", trex.condition[2], sep = "") 119 | dir.create("figure_trex_umap") 120 | png( 121 | paste( 122 | "./figure_trex_umap/", 123 | strftime(Sys.time(), "%Y-%m-%d_%H%M%S"), 124 | " UMAP TREX plot.png", 125 | sep = "" 126 | ), 127 | res = 200, 128 | width = 1500, 129 | height = 1500 130 | ) 131 | final.trex.plot.umap <- 132 | ggplot(ordered.plot.umap) + geom_point(aes(x = x, y = y, colour = cuts), cex = 1) + 133 | scale_color_manual( 134 | name = "Ratio", 135 | values = c( 136 | "[15,85)" = "lightgray", 137 | "[5,15)" = "lightskyblue", 138 | "[0,5)" = "navyblue", 139 | "[85,95)" = "lightcoral", 140 | "[95,100]" = "darkred" 141 | ) 142 | ) + 143 | theme_bw() + theme(panel.grid.major = element_blank(), 144 | panel.grid.minor = element_blank()) + 145 | labs (x = "UMAP1", y = "UMAP2", title = paste( trex_comparison," - Percent Change",sep = "")) + 146 | coord_fixed(ratio = graphical.ratio)+ 147 | guides(color = guide_legend(override.aes = list(size=3))) 148 | print(final.trex.plot.umap) 149 | dev.off() 150 | 151 | print(final.trex.plot.umap) 152 | 153 | 154 | 155 | ## Using EmbedSOM as the plot-------------------------- 156 | # KNN search per cell 157 | som.neighbor.index <- knnx.index(trex.embed.som,trex.embed.som,k=kvalue) 158 | first.condition.length <- nrow(subset(flow.data, flow.event.condition == trex.condition[1])) 159 | som.neighbor.index[som.neighbor.index <= first.condition.length] <- 0 160 | som.neighbor.index[som.neighbor.index > first.condition.length] <- 1 161 | 162 | # calculate percent change in each KNN region 163 | percent.change.som <- (rowSums(som.neighbor.index) / kvalue * 100) 164 | 165 | # binning and plot info 166 | range <- apply(apply( trex.embed.som, 2, range), 2, diff) 167 | graphical.ratio <- (range[1] / range[2]) 168 | test.round <- round(percent.change.som) 169 | trex.plot.som <- 170 | data.frame(x = trex.embed.som[, 1], y = trex.embed.som[, 2], col = test.round) 171 | trex.plot.som$cuts <- cut(trex.plot.som$col, c(0, 5, 15, 85, 95, 100), include.lowest = TRUE, right = FALSE) 172 | trex.plot.som$cuts <- factor(trex.plot.som$cuts, 173 | levels = c("[15,85)", "[5,15)", "[0,5)", "[85,95)", "[95,100]")) 174 | ordered.plot.som <- trex.plot.som[order(trex.plot.som$cuts), ] 175 | range <- apply(apply(trex.embed.som, 2, range), 2, diff) 176 | graphical.ratio <- (range[1] / range[2]) 177 | 178 | # create T-REX plot 179 | 180 | trex_comparison <- paste(trex.condition[1], " vs ", trex.condition[2], sep = "") 181 | dir.create("figure_trex_embedsom") 182 | png( 183 | paste( 184 | "./figure_trex_embedsom/", 185 | strftime(Sys.time(), "%Y-%m-%d_%H%M%S"), 186 | " EmbedSOM TREX plot.png", 187 | sep = "" 188 | ), 189 | res = 200, 190 | width = 1500, 191 | height = 1500 192 | ) 193 | final.trex.plot.som <- 194 | ggplot(ordered.plot.som) + geom_point(aes(x = x, y = y, colour = cuts), cex = 1) + 195 | scale_color_manual( 196 | name = "Ratio", 197 | values = c( 198 | "[15,85)" = "lightgray", 199 | "[5,15)" = "lightskyblue", 200 | "[0,5)" = "navyblue", 201 | "[85,95)" = "lightcoral", 202 | "[95,100]" = "darkred" 203 | ) 204 | ) + 205 | theme_bw() + theme(panel.grid.major = element_blank(), 206 | panel.grid.minor = element_blank()) + 207 | labs (x = "EmbedSOM1", y = "EmbedSOM2", title = paste( trex_comparison," - Percent Change",sep = "")) + 208 | coord_fixed(ratio = graphical.ratio)+ 209 | guides(color = guide_legend(override.aes = list(size=3))) 210 | print(final.trex.plot.som) 211 | dev.off() 212 | 213 | print(final.trex.plot.som) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Cross-Entropy-test 2 | Unified script for applying the t-SNE diff method on flow cytometry or single cell RNA-seq data 3 | 4 | Code is available and free for academic users. Commercial users should contact Adrian Liston to discuss licensing options. 5 | 6 | For flow cytometry analysis, we recommend using the CSV-based method. This allows users to set the data scales (transformations) 7 | in FlowJo or any other standard flow cytometry data analysis software. Scaling of the data is critical for optimal visualization and clustering. 8 | To understand how to set scales in FlowJo, see the instruction file "Setting axis in FlowJo for Aurora data.pptx". Use the channel values format. 9 | To export your data in CSV format, preserving the transformations from FlowJo, see the instructions in "Exporting data in csv format.PNG". For more 10 | detail, see https://docs.flowjo.com/flowjo/graphs-and-gating/gw-transform-overview/ 11 | 12 | The CSV-based version incorporates EmbedSOM for fast parallelized SOM clustering and visualization. There is also an option to use the Irish lab's 13 | T-REX method as an add-on for visualizing and identifying changed regions in the tSNE, UMAP or EmbedSOM plots. 14 | https://elifesciences.org/articles/64653 15 | 16 | Alternatively, you may use the legacy version "flow analysis", which starts with FCS files. 17 | 18 | For all flow cytometry analysis, we recommend pre-gating and exporting the populations of cells you are interested in. While exporting, 19 | adding group or variable tags to the file names will help you sort the files with the script. For a tutorial on using the script, see the lab's website: 20 | http://www.listonlab.uk/cross-entropy-test/ 21 | 22 | Publication here: 23 | https://www.cell.com/cell-reports-methods/pdfExtended/S2667-2375(22)00295-8 24 | 25 | To install all the packages and dependencies for running the script, run flowcytoscript_setup.R 26 | 27 | The parameter file (analyze_flow_cytometry_parameter_csv.R) contains all the variables you may wish to modify, 28 | so no changes should be necessary in the main script (analyze_flow_cytometry_csv.R) or source (00_src) files. 29 | -------------------------------------------------------------------------------- /flow analysis/analyze_flow_cytometry.r: -------------------------------------------------------------------------------- 1 | 2 | # script for the analysis of flow cytometry data 3 | 4 | 5 | library( ConsensusClusterPlus ) 6 | library( digest ) 7 | require( dunn.test ) 8 | library( flowCore ) 9 | library( FlowSOM ) 10 | library( ggplot2 ) 11 | library( ggridges ) 12 | require( RANN ) 13 | library( RColorBrewer ) 14 | library( reshape2 ) 15 | library( Rtsne ) 16 | library( umap ) 17 | 18 | 19 | # function to set random seed depending on base number and string 20 | 21 | set.seed.here <- function( seed.base, seed.char ) 22 | { 23 | seed.add <- strtoi( substr( digest( seed.char, "xxhash32" ), 2, 8 ), 16 ) 24 | seed.new <- seed.base + seed.add 25 | set.seed( seed.new ) 26 | invisible( seed.new ) 27 | } 28 | 29 | 30 | # source parameters 31 | 32 | param.filename <- "./analyze_flow_cytometry_parameter.r" 33 | 34 | source( param.filename ) 35 | 36 | 37 | # check consistency in parameters 38 | 39 | stopifnot( names( fcs.channel.label ) == fcs.channel ) 40 | stopifnot( names( fcs.channel.asinh.scale ) == fcs.channel ) 41 | 42 | stopifnot( names( fcs.condition.label ) == fcs.condition ) 43 | stopifnot( names( fcs.condition.color ) == fcs.condition ) 44 | stopifnot( names( fcs.condition.line.type ) == fcs.condition ) 45 | 46 | stopifnot( names( fcs.cluster.label ) == fcs.cluster ) 47 | stopifnot( names( fcs.cluster.color ) == fcs.cluster ) 48 | stopifnot( names( fcs.cluster.line.type ) == fcs.cluster ) 49 | 50 | stopifnot( sum( is.null( fcs.dmrd.data.sample.n ), 51 | is.null( fcs.dmrd.data.sample.n.per.condition ), 52 | is.null( fcs.dmrd.data.sample.n.per.sample ) ) >= 2 ) 53 | 54 | stopifnot( unlist( fcs.cluster.group ) >= 1 & 55 | unlist( fcs.cluster.group ) <= fcs.cluster.n ) 56 | 57 | 58 | # source functions 59 | 60 | source( file.path( fcs.src.dir, "ce_diff_test.r" ) ) 61 | source( file.path( fcs.src.dir, "ce_diff_test_tsne.r" ) ) 62 | source( file.path( fcs.src.dir, "ce_diff_test_umap.r" ) ) 63 | source( file.path( fcs.src.dir, "plot_all_dmrd_figures.r" ) ) 64 | source( file.path( fcs.src.dir, "plot_dimensionality_reduction.r" ) ) 65 | 66 | 67 | # create dirs 68 | 69 | figure.dir <- c( 70 | fcs.ce.diff.tsne.figure.dir, 71 | fcs.ce.diff.umap.figure.dir, 72 | fcs.density.figure.dir, 73 | fcs.heatmap.figure.dir, 74 | fcs.histogram.figure.dir, 75 | fcs.tsne.figure.dir, 76 | fcs.umap.figure.dir 77 | ) 78 | 79 | table.dir <- fcs.cluster.table.dir 80 | 81 | data.dir <- sapply( names( fcs.cluster.group ), function( fcg.name ) 82 | sprintf( "%s/%s_%s", fcs.cluster.data.dir, fcs.cluster.data, fcg.name ) ) 83 | 84 | for ( the.dir in c( figure.dir, table.dir, data.dir ) ) 85 | if ( ! file.exists( the.dir ) ) 86 | dir.create( the.dir, recursive = TRUE ) 87 | 88 | 89 | # read fcs data 90 | 91 | flow.data.filename.all <- list.files( fcs.data.dir, "\\.fcs$" ) 92 | 93 | flow.data.filename <- grep( paste0( fcs.condition, collapse = "|" ), 94 | flow.data.filename.all, value = TRUE ) 95 | 96 | sample.name.format <- paste0( "%s.%0", fcs.sample.number.width, "d" ) 97 | event.name.format <- paste0( "%s.%0", fcs.event.number.width, "d" ) 98 | 99 | flow.data.filename.sample <- rep( "", length( flow.data.filename ) ) 100 | names( flow.data.filename.sample ) <- flow.data.filename 101 | 102 | sample.idx.next <- rep( 1, fcs.condition.n ) 103 | names( sample.idx.next ) <- fcs.condition 104 | 105 | flow.data <- lapply( flow.data.filename, function( flow.data.fn ) { 106 | # cat( flow.data.fn, "\n" ) 107 | sample.flow.frame <- read.FCS( file.path( fcs.data.dir, flow.data.fn ), 108 | transformation = NULL, truncate_max_range = FALSE ) 109 | 110 | condition <- fcs.condition[ sapply( fcs.condition, grepl, flow.data.fn ) ] 111 | stopifnot( length( condition ) == 1 ) 112 | 113 | sample.data <- exprs( sample.flow.frame ) 114 | 115 | if ( ! all( fcs.channel %in% colnames( sample.data ) ) ) 116 | { 117 | cat( sprintf( "File: %s\n", flow.data.fn ) ) 118 | print( sort( fcs.channel[ 119 | ! fcs.channel %in% colnames( sample.data ) ] ) ) 120 | print( sort( colnames( sample.data ) ) ) 121 | stop( "mismatch in names of fcs channels" ) 122 | } 123 | 124 | sample.name <- sprintf( sample.name.format, condition, 125 | sample.idx.next[ condition ] ) 126 | 127 | sample.data <- sample.data[ , fcs.channel, drop = FALSE ] 128 | 129 | event.n <- nrow( sample.data ) 130 | if ( event.n > 0 ) { 131 | event.name <- sprintf( event.name.format, sample.name, 1 : event.n ) 132 | rownames( sample.data ) <- event.name 133 | } 134 | 135 | flow.data.filename.sample[ flow.data.fn ] <<- sample.name 136 | sample.idx.next[ condition ] <<- sample.idx.next[ condition ] + 1 137 | 138 | sample.data 139 | } ) 140 | 141 | flow.data <- do.call( rbind, flow.data ) 142 | 143 | # define samples 144 | flow.sample <- flow.data.filename.sample 145 | names( flow.sample ) <- NULL 146 | 147 | stopifnot( flow.sample == 148 | unique( sub( "\\.[0-9]+$", "", rownames( flow.data ) ) ) ) 149 | 150 | flow.sample.n <- length ( flow.sample ) 151 | 152 | flow.sample.condition <- factor( sub( "\\.[0-9]+$", "", flow.sample ), 153 | levels = fcs.condition ) 154 | names( flow.sample.condition ) <- flow.sample 155 | 156 | # reorder samples to follow order of conditions 157 | flow.sample <- flow.sample[ order( flow.sample.condition ) ] 158 | flow.sample.condition <- flow.sample.condition[ flow.sample ] 159 | 160 | flow.sample.label <- sapply( flow.sample, function( fs ) { 161 | sample.cond <- sub( "^(.*)\\.[0-9]+$", "\\1", fs ) 162 | sample.num <- sub( "^.*\\.([0-9]+)$", "\\1", fs ) 163 | sprintf( "%s-%s", fcs.condition.label[ sample.cond ], sample.num ) 164 | } ) 165 | 166 | flow.sample.filename <- sapply( flow.sample, function( fs ) 167 | names( which( flow.data.filename.sample == fs ) ) ) 168 | 169 | # define events 170 | flow.event <- rownames( flow.data ) 171 | flow.event.n <- length( flow.event ) 172 | 173 | flow.event.sample <- factor( sub( "\\.[0-9]+$", "", flow.event ), 174 | levels = flow.sample ) 175 | names( flow.event.sample ) <- flow.event 176 | 177 | flow.event.condition <- factor( sub( "\\.[0-9]+$", "", flow.event.sample ), 178 | levels = fcs.condition ) 179 | names( flow.event.condition ) <- flow.event 180 | 181 | # reorder events to follow order of samples 182 | flow.event.order <- order( flow.event.sample ) 183 | 184 | flow.data <- flow.data[ flow.event.order, ] 185 | flow.event <- flow.event[ flow.event.order ] 186 | flow.event.sample <- flow.event.sample[ flow.event.order ] 187 | flow.event.condition <- flow.event.condition[ flow.event.order ] 188 | 189 | flow.event.sample.n <- as.vector( table( flow.event.sample ) ) 190 | names( flow.event.sample.n ) <- flow.sample 191 | 192 | flow.event.condition.n <- as.vector( table( flow.event.condition ) ) 193 | names( flow.event.condition.n ) <- fcs.condition 194 | 195 | flow.data.filename 196 | 197 | table( flow.sample.condition ) 198 | 199 | str( flow.data ) 200 | flow.event.condition.n 201 | flow.event.sample.n 202 | 203 | 204 | # define figure parameters for samples 205 | 206 | flow.sample.color <- fcs.condition.color[ flow.sample.condition ] 207 | names( flow.sample.color ) <- flow.sample 208 | 209 | flow.sample.color.single <- unlist( lapply( fcs.condition, function( fc ) { 210 | cond.sample.n <- sum( flow.sample.condition == fc ) 211 | rep( 212 | fcs.color.pool, 213 | ceiling( cond.sample.n / fcs.color.pool.n ) 214 | )[ 1 : cond.sample.n ] 215 | } ) ) 216 | names( flow.sample.color.single ) <- flow.sample 217 | 218 | flow.sample.line.type <- fcs.condition.line.type[ flow.sample.condition ] 219 | names( flow.sample.line.type ) <- flow.sample 220 | 221 | flow.sample.line.type.single <- unlist( lapply( fcs.condition, function( fc ) { 222 | cond.sample.n <- sum( flow.sample.condition == fc ) 223 | rep( 224 | fcs.line.type.pool, 225 | ceiling( cond.sample.n / fcs.line.type.pool.n ) 226 | )[ 1 : cond.sample.n ] 227 | } ) ) 228 | names( flow.sample.line.type.single ) <- flow.sample 229 | 230 | flow.ce.diff.figure.dendrogram.weight.sample <- 231 | fcs.ce.diff.figure.dendrogram.weight.condition[ flow.sample.condition ] 232 | names( flow.ce.diff.figure.dendrogram.weight.sample ) <- flow.sample 233 | 234 | 235 | # transform data 236 | 237 | for ( fch in fcs.channel ) 238 | flow.data[ , fch ] <- asinh( flow.data[ , fch ] / 239 | fcs.channel.asinh.scale[ fch ] ) 240 | 241 | 242 | # plot density distributions of transformed data 243 | 244 | set.seed.here( fcs.seed.base, "plot density distributions of transformed data" ) 245 | 246 | { 247 | if ( ! is.null( fcs.density.data.sample.n ) && 248 | fcs.density.data.sample.n < flow.event.n ) 249 | density.data.idx <- sort( sample( flow.event.n, 250 | fcs.density.data.sample.n ) ) 251 | else 252 | density.data.idx <- 1 : flow.event.n 253 | } 254 | 255 | density.data <- flow.data[ density.data.idx, ] 256 | 257 | density.data.ggdf.all <- melt( density.data, 258 | varnames = c( "partition", "channel" ), value.name = "density.value" ) 259 | density.data.ggdf.all$partition <- fcs.density.partition.all 260 | density.data.ggdf.all$channel <- as.character( density.data.ggdf.all$channel ) 261 | 262 | density.data.ggdf.condition <- lapply( fcs.condition, function( fc ) { 263 | ggdf.condition <- melt( 264 | density.data[ flow.event.condition[ density.data.idx ] == fc, , 265 | drop = FALSE], 266 | varnames = c( "partition", "channel" ), 267 | value.name = "density.value" ) 268 | if ( nrow( ggdf.condition ) > 0 ) { 269 | ggdf.condition$partition <- fc 270 | ggdf.condition$channel <- as.character( ggdf.condition$channel ) 271 | } 272 | ggdf.condition 273 | } ) 274 | density.data.ggdf.condition <- do.call( rbind, density.data.ggdf.condition ) 275 | 276 | density.data.ggdf.sample <- lapply( flow.sample, function( fs ) { 277 | ggdf.sample <- melt( 278 | density.data[ flow.event.sample[ density.data.idx ] == fs, , 279 | drop = FALSE ], 280 | varnames = c( "partition", "channel" ), 281 | value.name = "density.value" ) 282 | if ( nrow( ggdf.sample ) > 0 ) { 283 | ggdf.sample$partition <- fs 284 | ggdf.sample$channel <- as.character( ggdf.sample$channel ) 285 | } 286 | ggdf.sample 287 | } ) 288 | density.data.ggdf.sample <- do.call( rbind, density.data.ggdf.sample ) 289 | 290 | density.data.ggdf <- rbind( density.data.ggdf.all, density.data.ggdf.condition, 291 | density.data.ggdf.sample ) 292 | 293 | density.data.partition <- c( fcs.density.partition.all, fcs.condition, 294 | flow.sample ) 295 | density.data.partition.n <- length( density.data.partition ) 296 | 297 | density.data.partition.label <- c( fcs.density.partition.all.label, 298 | fcs.condition.label, flow.sample.label ) 299 | 300 | density.data.ggdf$partition <- factor( density.data.ggdf$partition, 301 | levels = density.data.partition ) 302 | 303 | density.data.ggdf$channel <- factor( density.data.ggdf$channel, 304 | levels = fcs.channel ) 305 | 306 | density.plot.color <- c( fcs.density.partition.all.color, fcs.condition.color, 307 | flow.sample.color ) 308 | 309 | density.plot <- ggplot( density.data.ggdf, aes( x = density.value, 310 | y = partition, color = partition, fill = partition ) ) + 311 | geom_density_ridges( size = fcs.density.line.size, 312 | alpha = fcs.density.line.alpha, show.legend = FALSE ) + 313 | labs( x = NULL, y = NULL ) + 314 | scale_y_discrete( limits = rev( density.data.partition ), 315 | breaks = rev( density.data.partition ), 316 | labels = rev( density.data.partition.label ) ) + 317 | scale_color_manual( values = density.plot.color ) + 318 | scale_fill_manual( values = density.plot.color ) + 319 | facet_wrap( vars( channel ), nrow = 1, labeller = labeller( 320 | channel = fcs.channel.label ) ) + 321 | theme_ridges( line_size = fcs.density.line.size, 322 | font_size = fcs.density.font.size ) + 323 | theme( strip.background = element_rect( fill = "white" ) ) 324 | 325 | ggsave( 326 | file.path( fcs.density.figure.dir, 327 | sprintf( "%s.png", fcs.density.figure.sample ) ), 328 | density.plot, 329 | width = fcs.density.figure.width.base * ( fcs.channel.n + 1 ), 330 | height = fcs.density.figure.height.base * ( density.data.partition.n + 1 ) 331 | ) 332 | 333 | 334 | # select data for dimensionality reduction 335 | 336 | set.seed.here( fcs.seed.base, "select data for dimensionality reduction" ) 337 | 338 | { 339 | if ( ! is.null( fcs.dmrd.data.sample.n ) ) 340 | { 341 | if ( fcs.dmrd.data.sample.n < flow.event.n ) 342 | dmrd.data.idx <- sort( sample( flow.event.n, fcs.dmrd.data.sample.n ) ) 343 | else 344 | dmrd.data.idx <- 1 : flow.event.n 345 | } 346 | else if ( ! is.null( fcs.dmrd.data.sample.n.per.condition ) ) 347 | { 348 | dmrd.data.idx <- unlist( sapply( fcs.condition, function( fc ) { 349 | fc.idx <- which( flow.event.condition == fc ) 350 | if ( fcs.dmrd.data.sample.n.per.condition < length( fc.idx ) ) 351 | sort( sample( fc.idx, fcs.dmrd.data.sample.n.per.condition ) ) 352 | else 353 | fc.idx 354 | } ) ) 355 | names( dmrd.data.idx ) <- NULL 356 | } 357 | else if ( ! is.null( fcs.dmrd.data.sample.n.per.sample ) ) 358 | { 359 | dmrd.data.idx <- unlist( sapply( flow.sample, function( fs ) { 360 | fs.idx <- which( flow.event.sample == fs ) 361 | if ( fcs.dmrd.data.sample.n.per.sample < length( fs.idx ) ) 362 | sort( sample( fs.idx, fcs.dmrd.data.sample.n.per.sample ) ) 363 | else 364 | fs.idx 365 | } ) ) 366 | names( dmrd.data.idx ) <- NULL 367 | } 368 | else 369 | dmrd.data.idx <- 1 : flow.event.n 370 | } 371 | 372 | dmrd.data <- flow.data[ dmrd.data.idx, ] 373 | 374 | dmrd.event.sample <- flow.event.sample[ dmrd.data.idx ] 375 | dmrd.event.condition <- flow.event.condition[ dmrd.data.idx ] 376 | 377 | str( dmrd.data ) 378 | table( dmrd.event.condition ) 379 | table( dmrd.event.sample ) 380 | 381 | 382 | # get flowsom clusters 383 | 384 | set.seed.here( fcs.seed.base, "get flowsom clusters" ) 385 | 386 | # read fcs data as a flowSet 387 | flow.set <- read.flowSet( flow.data.filename, fcs.data.dir, 388 | column.pattern = paste0( fcs.channel, collapse = "|" ), 389 | transformation = NULL, truncate_max_range = FALSE ) 390 | 391 | stopifnot( sort( fcs.channel ) == sort( colnames( flow.set ) ) ) 392 | 393 | flow.set <- flow.set[ , fcs.channel ] 394 | 395 | # transform data 396 | for ( idx in 1 : length( flow.set ) ) 397 | for ( fch in fcs.channel ) 398 | exprs( flow.set[[ idx ]] )[ , fch ] <- 399 | asinh( exprs( flow.set[[ idx ]] )[ , fch ] / 400 | fcs.channel.asinh.scale[ fch ] ) 401 | 402 | # build som objects 403 | flow.som <- ReadInput( flow.set, transform = FALSE, scale = FALSE ) 404 | flow.som <- BuildSOM( flow.som, xdim = fcs.flow.som.dim, 405 | ydim = fcs.flow.som.dim ) 406 | flow.som.mapping <- flow.som$map$mapping[ , 1 ] 407 | flow.som.codes <- flow.som$map$codes 408 | 409 | # get clusters from som mapping 410 | consensus.cluster <- ConsensusClusterPlus( t( flow.som.codes ), 411 | maxK = fcs.cluster.n, reps = 100, pItem = 0.9, pFeature = 1, 412 | clusterAlg = "hc", innerLinkage = "average", finalLinkage = "average", 413 | distance = "euclidean", 414 | seed = set.seed.here( fcs.seed.base, "get clusters from som mapping" ) ) 415 | 416 | flow.som.event.cluster <- consensus.cluster[[ fcs.cluster.n ]]$ 417 | consensusClass[ flow.som.mapping ] 418 | 419 | # reorder clusters from bigger to smaller 420 | flow.som.cluster.rank <- 1 + fcs.cluster.n - 421 | rank( table( flow.som.event.cluster ), ties.method = "last" ) 422 | flow.som.event.cluster <- flow.som.cluster.rank[ flow.som.event.cluster ] 423 | names( flow.som.event.cluster ) <- NULL 424 | 425 | # set clusters as a factor 426 | flow.som.event.cluster <- factor( flow.som.event.cluster, 427 | levels = 1 : fcs.cluster.n ) 428 | levels( flow.som.event.cluster ) <- fcs.cluster 429 | 430 | # reorder events 431 | flow.event.cluster <- flow.som.event.cluster[ flow.event.order ] 432 | dmrd.event.cluster <- flow.event.cluster[ dmrd.data.idx ] 433 | 434 | flow.event.cluster.n <- as.vector( table( flow.event.cluster ) ) 435 | names( flow.event.cluster.n ) <- fcs.cluster 436 | 437 | dmrd.event.cluster.n <- as.vector( table( dmrd.event.cluster ) ) 438 | names( dmrd.event.cluster.n ) <- fcs.cluster 439 | 440 | length( flow.event.cluster ) 441 | table( flow.event.cluster ) 442 | 443 | length( dmrd.event.cluster ) 444 | table( dmrd.event.cluster ) 445 | 446 | 447 | # save cluster counts 448 | 449 | flow.cluster.count <- sapply( fcs.cluster, function( fc ) 450 | table( flow.event.sample[ flow.event.cluster == fc ] ) ) 451 | 452 | stopifnot( rownames( flow.cluster.count ) == flow.sample ) 453 | stopifnot( colnames( flow.cluster.count ) == fcs.cluster ) 454 | 455 | rownames( flow.cluster.count ) <- flow.sample.label 456 | colnames( flow.cluster.count ) <- fcs.cluster.label 457 | 458 | write.csv( flow.cluster.count, file = file.path( fcs.cluster.table.dir, 459 | sprintf( "%s.csv", fcs.cluster.table.counts ) ) ) 460 | 461 | 462 | # plot density distributions by cluster 463 | 464 | density.data.ggdf.cluster <- lapply( fcs.cluster, function( fc ) { 465 | ggdf.cluster <- melt( 466 | density.data[ flow.event.cluster[ density.data.idx ] == fc, , 467 | drop = FALSE ], 468 | varnames = c( "partition", "channel" ), 469 | value.name = "density.value" 470 | ) 471 | if ( nrow( ggdf.cluster ) > 0 ) { 472 | ggdf.cluster$partition <- fc 473 | ggdf.cluster$channel <- as.character( ggdf.cluster$channel ) 474 | } 475 | ggdf.cluster 476 | } ) 477 | density.data.ggdf.cluster <- do.call( rbind, density.data.ggdf.cluster ) 478 | 479 | density.data.ggdf <- rbind( density.data.ggdf.all, density.data.ggdf.cluster ) 480 | 481 | density.data.partition <- c( fcs.density.partition.all, fcs.cluster ) 482 | density.data.partition.n <- length( density.data.partition ) 483 | 484 | density.data.partition.label <- c( fcs.density.partition.all.label, 485 | fcs.cluster.label ) 486 | 487 | density.data.ggdf$partition <- factor( density.data.ggdf$partition, 488 | levels = density.data.partition ) 489 | 490 | density.data.ggdf$channel <- factor( density.data.ggdf$channel, 491 | levels = fcs.channel ) 492 | 493 | density.plot.color <- c( fcs.density.partition.all.color, fcs.cluster.color ) 494 | 495 | density.plot <- ggplot( density.data.ggdf, aes( x = density.value, 496 | y = partition, color = partition, fill = partition ) ) + 497 | geom_density_ridges( size = fcs.density.line.size, 498 | alpha = fcs.density.line.alpha, show.legend = FALSE ) + 499 | labs( x = NULL, y = NULL ) + 500 | scale_y_discrete( limits = rev( density.data.partition ), 501 | breaks = rev( density.data.partition ), 502 | labels = rev( density.data.partition.label ) ) + 503 | scale_color_manual( values = density.plot.color ) + 504 | scale_fill_manual( values = density.plot.color ) + 505 | facet_wrap( vars( channel ), nrow = 1, labeller = labeller( 506 | channel = fcs.channel.label ) ) + 507 | theme_ridges( line_size = fcs.density.line.size, 508 | font_size = fcs.density.font.size ) + 509 | theme( strip.background = element_rect( fill = "white" ) ) 510 | 511 | ggsave( 512 | file.path( fcs.density.figure.dir, 513 | sprintf( "%s.png", fcs.density.figure.cluster ) ), 514 | density.plot, 515 | width = fcs.density.figure.width.base * ( fcs.channel.n + 1 ), 516 | height = fcs.density.figure.height.base * ( density.data.partition.n + 1 ) 517 | ) 518 | 519 | 520 | # save cluster data 521 | 522 | for ( fs in flow.sample ) 523 | { 524 | sample.filename <- flow.sample.filename[ fs ] 525 | sample.flow.frame <- read.FCS( file.path( fcs.data.dir, sample.filename ), 526 | transformation = NULL, truncate_max_range = FALSE ) 527 | sample.event.idx <- which( flow.event.sample == fs ) 528 | 529 | for ( fcg.name in names( fcs.cluster.group ) ) 530 | { 531 | cluster.group <- fcs.cluster[ fcs.cluster.group[[ fcg.name ]] ] 532 | cluster.group.event.idx <- which( 533 | ( flow.event.cluster %in% cluster.group )[ sample.event.idx ] ) 534 | 535 | if ( length( cluster.group.event.idx ) > 0 ) 536 | { 537 | cluster.flow.frame <- sample.flow.frame[ cluster.group.event.idx, ] 538 | cluster.data.dir <- sprintf( "%s/%s_%s", fcs.cluster.data.dir, 539 | fcs.cluster.data, fcg.name ) 540 | cluster.data.filename <- sprintf( "%s_%s_%s.fcs", fs, 541 | fcs.cluster.data, fcg.name ) 542 | invisible( write.FCS( cluster.flow.frame, 543 | file.path( cluster.data.dir, cluster.data.filename ) ) ) 544 | } 545 | } 546 | } 547 | 548 | 549 | # plot heatmaps 550 | 551 | heatmap.type <- c( "by_condition", "by_sample", "by_cluster" ) 552 | 553 | for ( ht in heatmap.type ) 554 | { 555 | if ( ht == "by_condition" ) { 556 | flow.data.group.median <- apply( flow.data, 2, tapply, 557 | flow.event.condition, median ) 558 | margin.col <- fcs.heatmap.label.factor.col * 559 | max( nchar( fcs.condition.label ) ) 560 | group.label <- fcs.condition.label 561 | group.color <- fcs.condition.color 562 | } 563 | else if ( ht == "by_sample" ) { 564 | flow.data.group.median <- apply( flow.data, 2, tapply, 565 | flow.event.sample, median ) 566 | margin.col <- fcs.heatmap.label.factor.col * 567 | max( nchar( flow.sample.label ) ) 568 | group.label <- flow.sample.label 569 | group.color <- flow.sample.color 570 | } 571 | else if ( ht == "by_cluster" ) { 572 | flow.data.group.median <- apply( flow.data, 2, tapply, 573 | flow.event.cluster, median ) 574 | margin.col <- fcs.heatmap.label.factor.col * 575 | max( nchar( fcs.cluster.label ) ) 576 | group.label <- fcs.cluster.label 577 | group.color <- fcs.cluster.color 578 | } 579 | else 580 | stop( "wrong heatmap type" ) 581 | 582 | margin.row <- 583 | fcs.heatmap.label.factor.row * max( nchar( fcs.channel.label ) ) 584 | 585 | if ( margin.row < 5 ) 586 | margin.row <- 5 587 | if ( margin.col < 5 ) 588 | margin.col <- 5 589 | 590 | png( filename = file.path( fcs.heatmap.figure.dir, 591 | sprintf( "%s_%s.png", fcs.heatmap.figure, ht ) ), 592 | width = fcs.heatmap.width, height = fcs.heatmap.height ) 593 | heatmap( t( flow.data.group.median ), scale = "row", 594 | labRow = fcs.channel.label, labCol = group.label, 595 | col = fcs.heatmap.palette, ColSideColors = group.color, 596 | cexRow = fcs.heatmap.font.size, cexCol = fcs.heatmap.font.size, 597 | margins = c( margin.col, margin.row ) ) 598 | dev.off() 599 | } 600 | 601 | 602 | # plot histograms 603 | 604 | histogram.type <- c( "by_cluster", "by_sample", "by_assay" ) 605 | 606 | condition.sample.n <- as.vector( table( flow.sample.condition ) ) 607 | 608 | for ( ht in histogram.type ) 609 | { 610 | flow.data.cluster.fraction <- lapply( fcs.cluster, function( fc ) { 611 | sample.event.n <- as.vector( table( 612 | flow.event.sample[ flow.event.cluster == fc ] ) ) 613 | 614 | if ( ht == "by_cluster" ) 615 | { 616 | sample.fraction <- log2( 617 | ( sample.event.n / flow.event.cluster.n[ fc ] ) / 618 | ( flow.event.sample.n / flow.event.n ) 619 | ) 620 | sample.fraction[ is.infinite( sample.fraction ) ] <- NA 621 | fraction <- tapply( sample.fraction, flow.sample.condition, mean, 622 | na.rm = TRUE ) 623 | std.err <- tapply( sample.fraction, flow.sample.condition, sd, 624 | na.rm = TRUE ) / sqrt( condition.sample.n ) 625 | } 626 | else if ( ht == "by_sample" ) 627 | { 628 | sample.fraction <- 100 * sample.event.n / flow.event.sample.n 629 | fraction <- tapply( sample.fraction, flow.sample.condition, mean, 630 | na.rm = TRUE ) 631 | std.err <- tapply( sample.fraction, flow.sample.condition, sd, 632 | na.rm = TRUE ) / sqrt( condition.sample.n ) 633 | } 634 | else if ( ht == "by_assay" ) 635 | { 636 | sample.fraction <- 100 * sample.event.n / flow.event.n 637 | fraction <- tapply( sample.fraction, flow.sample.condition, sum, 638 | na.rm = TRUE ) 639 | std.err <- NA 640 | } 641 | else 642 | stop( "wrong histogram type" ) 643 | 644 | data.frame( cluster = fc, condition = fcs.condition, fraction, std.err ) 645 | } ) 646 | 647 | flow.data.cluster.fraction <- do.call( rbind, flow.data.cluster.fraction ) 648 | flow.data.cluster.fraction$condition <- factor( 649 | flow.data.cluster.fraction$condition, levels = fcs.condition ) 650 | 651 | histogram.plot <- ggplot( flow.data.cluster.fraction, 652 | aes( x = cluster, y = fraction, fill = condition ) ) + 653 | geom_bar( stat = "identity", position = position_dodge2() ) + 654 | geom_errorbar( aes( ymin = fraction - std.err, 655 | ymax = fraction + std.err ), 656 | size = fcs.histogram.error.bar.size, 657 | position = position_dodge2() ) + 658 | scale_x_discrete( labels = fcs.cluster.label, name = "" ) + 659 | scale_fill_manual( values = fcs.condition.color, 660 | limits = fcs.condition, breaks = fcs.condition, 661 | labels = fcs.condition.label ) + 662 | theme_bw() + 663 | theme( 664 | axis.text = element_text( size = fcs.histogram.font.size, 665 | angle = 90 ), 666 | axis.title = element_text( size = fcs.histogram.font.size + 1 ), 667 | legend.title = element_blank(), 668 | legend.text = element_text( size = fcs.histogram.font.size + 1 ), 669 | legend.key.size = unit( fcs.histogram.legend.key.size, "lines" ), 670 | panel.grid.major = element_blank(), 671 | panel.grid.minor = element_blank() 672 | ) 673 | 674 | if ( ht == "by_cluster" ) 675 | histogram.plot <- histogram.plot + 676 | ylab( "Average log2( frequency ratio )" ) 677 | else if ( ht == "by_sample" ) 678 | histogram.plot <- histogram.plot + ylab( "Average frequency" ) 679 | else if ( ht == "by_assay" ) 680 | histogram.plot <- histogram.plot + ylab( "Frequency" ) 681 | 682 | ggsave( 683 | file.path( fcs.histogram.figure.dir, 684 | sprintf( "%s_%s.png", fcs.histogram.figure, ht ) ), 685 | histogram.plot, 686 | width = fcs.histogram.width, 687 | height = fcs.histogram.height + 688 | fcs.histogram.label.factor.height * 689 | max( nchar( fcs.cluster.label ) ) 690 | ) 691 | } 692 | 693 | 694 | # calculate tsne representation 695 | 696 | { 697 | if ( fcs.use.cached.results && file.exists( fcs.tsne.cache.file.path ) ) 698 | { 699 | cat( "Using cached results for tsne\n" ) 700 | 701 | load( fcs.tsne.cache.file.path ) 702 | } 703 | else 704 | { 705 | set.seed.here( fcs.seed.base, "calculate tsne representation" ) 706 | 707 | cat( "Calculating tsne\n" ) 708 | 709 | tsne.result <- Rtsne( dmrd.data, perplexity = fcs.tsne.perplexity, 710 | theta = fcs.tsne.theta, 711 | exaggeration_factor = fcs.tsne.exaggeration.factor, 712 | max_iter = fcs.tsne.iter.n, check_duplicates = FALSE, pca = FALSE, 713 | num_threads = fcs.tsne.thread.n ) 714 | 715 | save( tsne.result, file = fcs.tsne.cache.file.path ) 716 | } 717 | } 718 | 719 | tsne.data <- tsne.result$Y 720 | 721 | str( tsne.result ) 722 | 723 | 724 | # plot tsne convergence 725 | 726 | tsne.iter <- 1 + 50 * ( 1 : length( tsne.result$itercosts ) - 1 ) 727 | tsne.cost <- tsne.result$itercosts 728 | 729 | png( filename = file.path( fcs.tsne.figure.dir, 730 | sprintf( "%s.png", fcs.tsne.figure.convergence ) ), 731 | width = fcs.tsne.figure.convergence.width, 732 | height = fcs.tsne.figure.convergence.height ) 733 | par( mar = c( 5, 5.8, 2, 1.4 ) ) 734 | plot( tsne.iter, tsne.cost, log = "y", xlab = "Iteration", 735 | ylab = "Total cost", xlim = c( 0, fcs.tsne.iter.n ), 736 | ylim = c( 1, max( tsne.cost ) ), col = "blue3", 737 | pch = 20, cex = 1, cex.lab = 2.5, cex.axis = 2 ) 738 | abline( h = tsne.cost[ length( tsne.cost ) ], lty = 2, lwd = 1.5, 739 | col = "blue3" ) 740 | dev.off() 741 | 742 | 743 | # plot tsne representations 744 | 745 | tsne.data.max <- max( abs( tsne.data ) ) 746 | 747 | plot.all.dmrd.figures( 748 | tsne.data, tsne.data.max, 749 | fcs.tsne.figure.lims.factor, fcs.tsne.figure.point.size, 750 | fcs.tsne.figure.dir, fcs.tsne.figure.plot, 751 | dmrd.data, dmrd.event.cluster, dmrd.event.condition 752 | ) 753 | 754 | 755 | # calculate umap representation 756 | 757 | { 758 | if ( fcs.use.cached.results && file.exists( fcs.umap.cache.file.path ) ) 759 | { 760 | cat( "Using cached results for umap\n" ) 761 | 762 | load( fcs.umap.cache.file.path ) 763 | } 764 | else 765 | { 766 | set.seed.here( fcs.seed.base, "calculate umap representation" ) 767 | 768 | cat( "Calculating umap\n" ) 769 | 770 | umap.config <- umap.defaults 771 | umap.config$n_epochs <- fcs.umap.iter.n 772 | umap.config$verbose <- TRUE 773 | 774 | umap.result <- umap( dmrd.data, config = umap.config ) 775 | 776 | save( umap.result, file = fcs.umap.cache.file.path ) 777 | } 778 | } 779 | 780 | umap.data <- umap.result$layout 781 | dimnames( umap.data ) <- NULL 782 | 783 | str( umap.result ) 784 | 785 | 786 | # plot umap representations 787 | 788 | umap.data.max <- max( abs( apply( umap.data, 2, function( x ) 789 | quantile( x, c( 0.25, 0.75 ) ) + c( -1, 1 ) * IQR( x ) ) ) ) 790 | 791 | plot.all.dmrd.figures( 792 | umap.data, umap.data.max, 793 | fcs.umap.figure.lims.factor, fcs.umap.figure.point.size, 794 | fcs.umap.figure.dir, fcs.umap.figure.plot, 795 | dmrd.data, dmrd.event.cluster, dmrd.event.condition 796 | ) 797 | 798 | 799 | # calculate cross-entropy test for tsne by condition 800 | 801 | set.seed.here( fcs.seed.base, "calculate cross-entropy test for tsne" ) 802 | 803 | ce.diff.test.tsne.res <- ce.diff.test.tsne( 804 | dmrd.data, tsne.data, 805 | dmrd.event.condition, 806 | partition.label = fcs.condition.label, 807 | partition.color = fcs.condition.color, 808 | partition.line.type = fcs.condition.line.type, 809 | base.test = fcs.ce.diff.base.test, 810 | base.dist = fcs.ce.diff.base.dist, 811 | prob.sample.n = fcs.ce.diff.prob.sample.n, 812 | dendrogram.order.weight = fcs.ce.diff.figure.dendrogram.weight.condition, 813 | result = file.path( fcs.ce.diff.tsne.figure.dir, 814 | sprintf( "%s_condition.txt", fcs.ce.diff.tsne.result ) ), 815 | cdf.figure = file.path( fcs.ce.diff.tsne.figure.dir, 816 | sprintf( "%s_condition.png", fcs.ce.diff.tsne.figure.cdf ) ), 817 | dendrogram.figure = file.path( fcs.ce.diff.tsne.figure.dir, 818 | sprintf( "%s_condition.png", fcs.ce.diff.tsne.figure.dendrogram ) ) ) 819 | 820 | print( ce.diff.test.tsne.res ) 821 | 822 | 823 | # calculate cross-entropy test for tsne by sample 824 | 825 | set.seed.here( fcs.seed.base, "calculate cross-entropy test for tsne" ) 826 | 827 | ce.diff.test.tsne.res <- ce.diff.test.tsne( 828 | dmrd.data, tsne.data, 829 | dmrd.event.sample, 830 | partition.label = flow.sample.label, 831 | partition.color = flow.sample.color, 832 | partition.line.type = flow.sample.line.type.single, 833 | base.test = fcs.ce.diff.base.test, 834 | base.dist = fcs.ce.diff.base.dist, 835 | prob.sample.n = fcs.ce.diff.prob.sample.n, 836 | dendrogram.order.weight = flow.ce.diff.figure.dendrogram.weight.sample, 837 | result = file.path( fcs.ce.diff.tsne.figure.dir, 838 | sprintf( "%s_sample.txt", fcs.ce.diff.tsne.result ) ), 839 | cdf.figure = file.path( fcs.ce.diff.tsne.figure.dir, 840 | sprintf( "%s_sample.png", fcs.ce.diff.tsne.figure.cdf ) ), 841 | dendrogram.figure = file.path( fcs.ce.diff.tsne.figure.dir, 842 | sprintf( "%s_sample.png", fcs.ce.diff.tsne.figure.dendrogram ) ) ) 843 | 844 | print( ce.diff.test.tsne.res ) 845 | 846 | 847 | # calculate cross-entropy test for tsne by cluster 848 | 849 | set.seed.here( fcs.seed.base, "calculate cross-entropy test for tsne" ) 850 | 851 | ce.diff.test.tsne.res <- ce.diff.test.tsne( 852 | dmrd.data, tsne.data, 853 | dmrd.event.cluster, 854 | partition.label = fcs.cluster.label, 855 | partition.color = fcs.cluster.color, 856 | partition.line.type = fcs.cluster.line.type, 857 | base.test = fcs.ce.diff.base.test, 858 | base.dist = fcs.ce.diff.base.dist, 859 | prob.sample.n = fcs.ce.diff.prob.sample.n, 860 | dendrogram.order.weight = fcs.ce.diff.figure.dendrogram.weight.cluster, 861 | result = file.path( fcs.ce.diff.tsne.figure.dir, 862 | sprintf( "%s_cluster.txt", fcs.ce.diff.tsne.result ) ), 863 | cdf.figure = file.path( fcs.ce.diff.tsne.figure.dir, 864 | sprintf( "%s_cluster.png", fcs.ce.diff.tsne.figure.cdf ) ), 865 | dendrogram.figure = file.path( fcs.ce.diff.tsne.figure.dir, 866 | sprintf( "%s_cluster.png", fcs.ce.diff.tsne.figure.dendrogram ) ) ) 867 | 868 | print( ce.diff.test.tsne.res ) 869 | 870 | 871 | # calculate cross-entropy test for umap by condition 872 | 873 | set.seed.here( fcs.seed.base, "calculate cross-entropy test for umap" ) 874 | 875 | ce.diff.test.umap.res <- ce.diff.test.umap( 876 | umap.result$knn$distances, umap.result$knn$indexes, 877 | umap.data, umap.result$config, 878 | dmrd.event.condition, 879 | partition.label = fcs.condition.label, 880 | partition.color = fcs.condition.color, 881 | partition.line.type = fcs.condition.line.type, 882 | base.test = fcs.ce.diff.base.test, 883 | base.dist = fcs.ce.diff.base.dist, 884 | prob.sample.n = fcs.ce.diff.prob.sample.n, 885 | dendrogram.order.weight = fcs.ce.diff.figure.dendrogram.weight.condition, 886 | result = file.path( fcs.ce.diff.umap.figure.dir, 887 | sprintf( "%s_condition.txt", fcs.ce.diff.umap.result ) ), 888 | cdf.figure = file.path( fcs.ce.diff.umap.figure.dir, 889 | sprintf( "%s_condition.png", fcs.ce.diff.umap.figure.cdf ) ), 890 | dendrogram.figure = file.path( fcs.ce.diff.umap.figure.dir, 891 | sprintf( "%s_condition.png", fcs.ce.diff.umap.figure.dendrogram ) ) ) 892 | 893 | print( ce.diff.test.umap.res ) 894 | 895 | 896 | # calculate cross-entropy test for umap by sample 897 | 898 | set.seed.here( fcs.seed.base, "calculate cross-entropy test for umap" ) 899 | 900 | ce.diff.test.umap.res <- ce.diff.test.umap( 901 | umap.result$knn$distances, umap.result$knn$indexes, 902 | umap.data, umap.result$config, 903 | dmrd.event.sample, 904 | partition.label = flow.sample.label, 905 | partition.color = flow.sample.color, 906 | partition.line.type = flow.sample.line.type.single, 907 | base.test = fcs.ce.diff.base.test, 908 | base.dist = fcs.ce.diff.base.dist, 909 | prob.sample.n = fcs.ce.diff.prob.sample.n, 910 | dendrogram.order.weight = flow.ce.diff.figure.dendrogram.weight.sample, 911 | result = file.path( fcs.ce.diff.umap.figure.dir, 912 | sprintf( "%s_sample.txt", fcs.ce.diff.umap.result ) ), 913 | cdf.figure = file.path( fcs.ce.diff.umap.figure.dir, 914 | sprintf( "%s_sample.png", fcs.ce.diff.umap.figure.cdf ) ), 915 | dendrogram.figure = file.path( fcs.ce.diff.umap.figure.dir, 916 | sprintf( "%s_sample.png", fcs.ce.diff.umap.figure.dendrogram ) ) ) 917 | 918 | print( ce.diff.test.umap.res ) 919 | 920 | 921 | # calculate cross-entropy test for umap by cluster 922 | 923 | set.seed.here( fcs.seed.base, "calculate cross-entropy test for umap" ) 924 | 925 | ce.diff.test.umap.res <- ce.diff.test.umap( 926 | umap.result$knn$distances, umap.result$knn$indexes, 927 | umap.data, umap.result$config, 928 | dmrd.event.cluster, 929 | partition.label = fcs.cluster.label, 930 | partition.color = fcs.cluster.color, 931 | partition.line.type = fcs.cluster.line.type, 932 | base.test = fcs.ce.diff.base.test, 933 | base.dist = fcs.ce.diff.base.dist, 934 | prob.sample.n = fcs.ce.diff.prob.sample.n, 935 | dendrogram.order.weight = fcs.ce.diff.figure.dendrogram.weight.cluster, 936 | result = file.path( fcs.ce.diff.umap.figure.dir, 937 | sprintf( "%s_cluster.txt", fcs.ce.diff.umap.result ) ), 938 | cdf.figure = file.path( fcs.ce.diff.umap.figure.dir, 939 | sprintf( "%s_cluster.png", fcs.ce.diff.umap.figure.cdf ) ), 940 | dendrogram.figure = file.path( fcs.ce.diff.umap.figure.dir, 941 | sprintf( "%s_cluster.png", fcs.ce.diff.umap.figure.dendrogram ) ) ) 942 | 943 | print( ce.diff.test.umap.res ) 944 | 945 | 946 | # output session info 947 | 948 | sessionInfo() 949 | 950 | -------------------------------------------------------------------------------- /flow analysis/analyze_flow_cytometry.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | src_dir=../00_src 4 | 5 | R CMD BATCH --quiet --vanilla $src_dir/analyze_flow_cytometry.r \ 6 | ./analyze_flow_cytometry.out 7 | 8 | 9 | -------------------------------------------------------------------------------- /flow analysis/analyze_flow_cytometry_parameter.r: -------------------------------------------------------------------------------- 1 | 2 | # tSNE diff paper--20210608 tSNE-diff flow--Figure 5E SpleenPercentLymph LymphPercentSpleen 3 | 4 | 5 | # data parameters 6 | 7 | # Tip: create two folders inside your analysis directory called "Data" and "Output". 8 | # Put the fcs files in the Data folder. 9 | # Put the parameter file in the Output folder. Double click on it to start Rstudio. 10 | # Opening in this way will set your working directory to the Output folder. 11 | # Type getwd(). Copy the result into the line below, swapping "Data" for "Output" 12 | fcs.data.dir <- "D://tSNE-diff paper/Analysis/20210608 tSNE-diff flow/Data/LymphPercentSpleen/" 13 | 14 | fcs.condition <- c( "LymphpercentSpleen", "Sp_", "SpleenpercentLymph" ) 15 | 16 | fcs.condition.label <- c( 17 | "LymphpercentSpleen" = "LymphpercentSpleen", 18 | "Sp_" = "Spleen", 19 | "SpleenpercentLymph" = "SpleenpercentLymph" 20 | ) 21 | 22 | # Run the get_channels script and insert the output below. 23 | fcs.channel <- c( 24 | "FJComp-APC-A", 25 | "FJComp-APC-Fire 750-A", 26 | "FJComp-APC-Fire810-A", 27 | "FJComp-Alexa Fluor 532-A", 28 | "FJComp-Alexa Fluor 561-A", 29 | "FJComp-Alexa Fluor 700-A", 30 | "FJComp-Alexa Fluor 790-A", 31 | "FJComp-BB515-A", 32 | "FJComp-BB660-P2-A", 33 | "FJComp-BB700-A", 34 | "FJComp-BB755-P-A", 35 | "FJComp-BB790-P-A", 36 | "FJComp-BUV395-A", 37 | "FJComp-BUV496-A", 38 | "FJComp-BUV563-A", 39 | "FJComp-BUV615-A", 40 | "FJComp-BUV661-A", 41 | "FJComp-BUV737-A", 42 | "FJComp-BUV805-A", 43 | "FJComp-BV421-A", 44 | "FJComp-BV480-A", 45 | "FJComp-BV510-A", 46 | "FJComp-BV570-A", 47 | "FJComp-BV605-A", 48 | "FJComp-BV650-A", 49 | "FJComp-BV711-A", 50 | "FJComp-BV750-A", 51 | "FJComp-Nova Blue 530-A", 52 | "FJComp-Nova Blue 585-A", 53 | "FJComp-Nova Blue 610-A", 54 | "FJComp-Nova Red 685-A", 55 | "FJComp-Nova Yellow 690-A", 56 | "FJComp-PE Fire 640-A", 57 | "FJComp-PE Fire 810-A", 58 | "FJComp-PE-A", 59 | "FJComp-PE-Cy5-A", 60 | "FJComp-PE-Cy5.5-A", 61 | "FJComp-PE-Cy7-A", 62 | "FJComp-PE-Dazzle594-A", 63 | "FJComp-Pacific Blue-A", 64 | "FJComp-Pacific Orange-A", 65 | "FJComp-PerCP-A", 66 | "FJComp-PerCP-eFluor 710-A", 67 | "FJComp-Qdot 705-A", 68 | "FJComp-SBUV445-A", 69 | "FJComp-SBV515-A", 70 | "FJComp-Super Bright 436-A", 71 | "FJComp-Super Bright 780-A", 72 | "FJComp-eFluor 660-A" 73 | ) 74 | 75 | fcs.channel.label <- c( 76 | "FJComp-APC-A" = "RORgT", 77 | "FJComp-APC-Fire 750-A" = "CD44", 78 | "FJComp-APC-Fire810-A" = "Gr-1", 79 | "FJComp-Alexa Fluor 532-A" = "IgM", 80 | "FJComp-Alexa Fluor 561-A" = "F4-80", 81 | "FJComp-Alexa Fluor 700-A" = "Ki67", 82 | "FJComp-Alexa Fluor 790-A" = "CD90.2", 83 | "FJComp-BB515-A" = "CCR9", 84 | "FJComp-BB660-P2-A" = "TCRgd", 85 | "FJComp-BB700-A" = "PDCA-1", 86 | "FJComp-BB755-P-A" = "CD11c", 87 | "FJComp-BB790-P-A" = "Ly-6C", 88 | "FJComp-BUV395-A" = "CD103", 89 | "FJComp-BUV496-A" = "IgD", 90 | "FJComp-BUV563-A" = "NK1.1", 91 | "FJComp-BUV615-A" = "CTLA-4", 92 | "FJComp-BUV661-A" = "c-Kit", 93 | "FJComp-BUV737-A" = "CD62L", 94 | "FJComp-BUV805-A" = "GITR", 95 | "FJComp-BV421-A" = "CD150", 96 | "FJComp-BV480-A" = "CXCR3", 97 | "FJComp-BV510-A" = "Siglec F", 98 | "FJComp-BV570-A" = "TCRb", 99 | "FJComp-BV605-A" = "PD-1", 100 | "FJComp-BV650-A" = "XCR1", 101 | "FJComp-BV711-A" = "CD127", 102 | "FJComp-BV750-A" = "CCR2", 103 | "FJComp-Nova Blue 530-A" = "CD45", 104 | "FJComp-Nova Blue 585-A" = "CD4", 105 | "FJComp-Nova Blue 610-A" = "CD8", 106 | "FJComp-Nova Red 685-A" = "CD3", 107 | "FJComp-Nova Yellow 690-A" = "CD19", 108 | "FJComp-PE Fire 640-A" = "CD11b", 109 | "FJComp-PE Fire 810-A" = "CD38", 110 | "FJComp-PE-A" = "GATA-3", 111 | "FJComp-PE-Cy5-A" = "CD86", 112 | "FJComp-PE-Cy5.5-A" = "Foxp3", 113 | "FJComp-PE-Cy7-A" = "CD172a", 114 | "FJComp-PE-Dazzle594-A" = "CD64", 115 | "FJComp-Pacific Blue-A" = "Helios", 116 | "FJComp-Pacific Orange-A" = "CD24", 117 | "FJComp-PerCP-A" = "MHCII", 118 | "FJComp-PerCP-eFluor 710-A" = "NKp46", 119 | "FJComp-Qdot 705-A" = "CD69", 120 | "FJComp-SBUV445-A" = "B220", 121 | "FJComp-SBV515-A" = "CD25", 122 | "FJComp-Super Bright 436-A" = "ICOS", 123 | "FJComp-Super Bright 780-A" = "KLRG1", 124 | "FJComp-eFluor 660-A" = "T-bet" 125 | ) 126 | 127 | # Tip: set each to 200 to start. 128 | # Modify (increasing) until the density plots resemble the distributions you are familiar with. 129 | fcs.channel.asinh.scale <- c( 130 | "FJComp-APC-A" = 2000, 131 | "FJComp-APC-Fire 750-A" = 4000, 132 | "FJComp-APC-Fire810-A" = 5000, 133 | "FJComp-Alexa Fluor 532-A" = 2000, 134 | "FJComp-Alexa Fluor 561-A" = 8000, 135 | "FJComp-Alexa Fluor 700-A" = 3000, 136 | "FJComp-Alexa Fluor 790-A" = 2000, 137 | "FJComp-BB515-A" = 4000, 138 | "FJComp-BB660-P2-A" = 2000, 139 | "FJComp-BB700-A" = 4000, 140 | "FJComp-BB755-P-A" = 2000, 141 | "FJComp-BB790-P-A" = 4000, 142 | "FJComp-BUV395-A" = 1000, 143 | "FJComp-BUV496-A" = 2000, 144 | "FJComp-BUV563-A" = 3000, 145 | "FJComp-BUV615-A" = 1000, 146 | "FJComp-BUV661-A" = 2000, 147 | "FJComp-BUV737-A" = 2000, 148 | "FJComp-BUV805-A" = 2000, 149 | "FJComp-BV421-A" = 6000, 150 | "FJComp-BV480-A" = 3000, 151 | "FJComp-BV510-A" = 20000, 152 | "FJComp-BV570-A" = 3000, 153 | "FJComp-BV605-A" = 2000, 154 | "FJComp-BV650-A" = 2000, 155 | "FJComp-BV711-A" = 1000, 156 | "FJComp-BV750-A" = 3000, 157 | "FJComp-Nova Blue 530-A" = 1000, 158 | "FJComp-Nova Blue 585-A" = 4000, 159 | "FJComp-Nova Blue 610-A" = 3000, 160 | "FJComp-Nova Red 685-A" = 2000, 161 | "FJComp-Nova Yellow 690-A" = 5000, 162 | "FJComp-PE Fire 640-A" = 10000, 163 | "FJComp-PE Fire 810-A" = 2500, 164 | "FJComp-PE-A" = 12000, 165 | "FJComp-PE-Cy5-A" = 8000, 166 | "FJComp-PE-Cy5.5-A" = 8000, 167 | "FJComp-PE-Cy7-A" = 3000, 168 | "FJComp-PE-Dazzle594-A" = 3000, 169 | "FJComp-Pacific Blue-A" = 4000, 170 | "FJComp-Pacific Orange-A" = 5000, 171 | "FJComp-PerCP-A" = 6000, 172 | "FJComp-PerCP-eFluor 710-A" = 4000, 173 | "FJComp-Qdot 705-A" = 5000, 174 | "FJComp-SBUV445-A" = 1500, 175 | "FJComp-SBV515-A" = 2000, 176 | "FJComp-Super Bright 436-A" = 8000, 177 | "FJComp-Super Bright 780-A" = 8000, 178 | "FJComp-eFluor 660-A" = 3000 179 | ) 180 | 181 | fcs.condition.n <- length( fcs.condition ) 182 | fcs.channel.n <- length( fcs.channel ) 183 | 184 | 185 | # general parameters 186 | # Tip: edit the directory to match the location of the script files. 187 | fcs.src.dir <- "D:/Carlos tSNE script/00_src" 188 | 189 | # Tip: use today's date 190 | fcs.seed.base <- 20210608 191 | 192 | # Tip: Set to FALSE while optimizing the tSNE and FlowSOM. 193 | # Once you have a good run, set to TRUE and then change labels, colors, etc. 194 | fcs.use.cached.results <- FALSE 195 | 196 | fcs.sample.number.width <- 2 197 | fcs.event.number.width <- 6 198 | 199 | 200 | # graphics parameters 201 | 202 | fcs.color.pool <- c( 203 | brewer.pal( 8, "Set1" )[ -6 ], 204 | brewer.pal( 7, "Set2" )[ c( 1, 3, 6 ) ], 205 | adjustcolor( brewer.pal( 8, "Set1" )[ -6 ], 206 | red.f = 0.9, green.f = 0.8, blue.f = 0.7 ), 207 | adjustcolor( brewer.pal( 7, "Set2" )[ c( 1, 3, 6 ) ], 208 | red.f = 0.9, green.f = 0.8, blue.f = 0.7 ), 209 | adjustcolor( brewer.pal( 8, "Set1" )[ -6 ], 210 | red.f = 0.8, green.f = 0.6, blue.f = 0.5 ), 211 | adjustcolor( brewer.pal( 7, "Set2" )[ c( 1, 3, 6 ) ], 212 | red.f = 0.8, green.f = 0.6, blue.f = 0.5 ), 213 | adjustcolor( brewer.pal( 8, "Set1" )[ -6 ], 214 | red.f = 0.3, green.f = 0.3, blue.f = 0.3 ), 215 | adjustcolor( brewer.pal( 7, "Set2" )[ c( 1, 3, 6 ) ], 216 | red.f = 0.3, green.f = 0.3, blue.f = 0.3 ) ) 217 | fcs.color.pool.n <- length( fcs.color.pool ) 218 | 219 | fcs.line.type.pool <- 1:6 220 | fcs.line.type.pool.n <- length( fcs.line.type.pool ) 221 | 222 | fcs.condition.color <- rep( 223 | fcs.color.pool, 224 | ceiling( fcs.condition.n / fcs.color.pool.n ) 225 | )[ 1 : fcs.condition.n ] 226 | names( fcs.condition.color ) <- fcs.condition 227 | 228 | fcs.condition.line.type <- rep( 229 | fcs.line.type.pool, 230 | ceiling( fcs.condition.n / fcs.line.type.pool.n ) 231 | )[ 1 : fcs.condition.n ] 232 | names( fcs.condition.line.type ) <- fcs.condition 233 | 234 | 235 | # density parameters 236 | # Tip: set to 50000 while you are setting the asinh scaling. 237 | # Once done, set to NULL and re-run the script on all the data. 238 | 239 | fcs.density.data.sample.n <- NULL 240 | 241 | fcs.density.partition.all <- "all" 242 | fcs.density.partition.all.label <- c( "all" = "All" ) 243 | fcs.density.partition.all.color <- c( "all" = "grey" ) 244 | 245 | fcs.density.font.size <- 4.5 246 | 247 | fcs.density.line.size <- 0.2 248 | fcs.density.line.alpha <- 0.3 249 | 250 | fcs.density.figure.width.base <- 0.4 251 | fcs.density.figure.height.base <- 0.1 252 | 253 | fcs.density.figure.dir <- "./figure_density" 254 | 255 | fcs.density.figure.sample <- "density_sample" 256 | fcs.density.figure.cluster <- "density_cluster" 257 | 258 | 259 | # cluster parameters 260 | # Tip: In general use the same number for fcs.cluster.n and fcs.flow.som.dim unless you know why to do otherwise. 261 | # Tip: use more clusters for more diverse cell collections. Use as few as possible to speed up processing. 262 | # For example, for a broad immune phenotyping panel use 40-50 clusters. 263 | # For pre-gated cell types (e.g., CD8s), use 10-12 clusters. 264 | fcs.cluster.n <- 40 265 | 266 | # Tip: naming clusters can be done automatically as clusters 1:n via the first command below. 267 | # To rename your clusters based on marker expression, insert a # before the first command, 268 | # remove the # before the second command, 269 | # and rename the clusters appropriately. 270 | fcs.cluster <- sprintf( "%02d", 1 : fcs.cluster.n ) 271 | #fcs.cluster <- c("B cells", "Naive CD4 T cells", "Activated CD4 T cells", 272 | # "Naive CD8 T cells", "Activated CD8 T cells", "Macrophages", 273 | # "Monocytes", "Neutrophils", "NK cells", 274 | # "cDCs", "pDCs", "Eosinophils") 275 | 276 | 277 | fcs.cluster.label <- sprintf( "Cluster-%s", fcs.cluster ) 278 | names( fcs.cluster.label ) <- fcs.cluster 279 | 280 | # Tip: this controls the grouping of the fcs files that are generated based on the flowSOM clustering. 281 | # If some of the clusters are related (e.g., CD14+ and CD16+ monocytes), 282 | # you might wish to group these into a single output folder. 283 | # If so, you may use a command such as "a" = c(1, 5, 7), 284 | # where the numbers are the numbered flowSOM clusters. 285 | fcs.cluster.group <- as.list(1:fcs.cluster.n) 286 | names(fcs.cluster.group) <- make.unique(rep(letters, length.out = fcs.cluster.n), sep='') 287 | 288 | fcs.cluster.color <- rep( 289 | fcs.color.pool, 290 | ceiling( fcs.cluster.n / fcs.color.pool.n ) 291 | )[ 1 : fcs.cluster.n ] 292 | names( fcs.cluster.color ) <- fcs.cluster 293 | 294 | fcs.cluster.line.type <- rep( 295 | fcs.line.type.pool, 296 | ceiling( fcs.cluster.n / fcs.line.type.pool.n ) 297 | )[ 1 : fcs.cluster.n ] 298 | names( fcs.cluster.line.type ) <- fcs.cluster 299 | 300 | fcs.flow.som.dim <- 40 301 | 302 | fcs.cluster.table.dir <- "./table_cluster" 303 | fcs.cluster.data.dir <- "./data_cluster" 304 | 305 | fcs.cluster.table.counts <- "cluster_counts" 306 | 307 | fcs.cluster.data <- "cluster_group" 308 | 309 | 310 | # heatmap parameters 311 | 312 | fcs.heatmap.palette.n <- 100 313 | fcs.heatmap.palette <- colorRampPalette( brewer.pal( 9, "YlOrRd" ) )( 314 | fcs.heatmap.palette.n ) 315 | 316 | fcs.heatmap.font.size <- 2.5 317 | 318 | fcs.heatmap.label.factor.row <- 1.4 319 | fcs.heatmap.label.factor.col <- 1.4 320 | 321 | fcs.heatmap.width <- 2500 322 | fcs.heatmap.height <- 2500 323 | 324 | fcs.heatmap.figure.dir <- "./figure_heatmap" 325 | 326 | fcs.heatmap.figure <- "heatmap" 327 | 328 | 329 | # histogram parameters 330 | 331 | fcs.histogram.font.size <- 7 332 | fcs.histogram.error.bar.size <- 0.2 333 | fcs.histogram.legend.key.size <- 0.7 334 | 335 | fcs.histogram.label.factor.height <- 0.05 336 | 337 | fcs.histogram.width <- 4.5 338 | fcs.histogram.height <- 3 339 | 340 | fcs.histogram.figure.dir <- "./figure_histogram" 341 | 342 | fcs.histogram.figure <- "histogram" 343 | 344 | 345 | # dimensionality reduction parameters 346 | # Tip: Use the third option in most cases. 347 | # More cells will take longer. 348 | # You might downsample to 50-100k cells at first, then run again with more cells if you like the result. 349 | fcs.dmrd.data.sample.n <- NULL 350 | fcs.dmrd.data.sample.n.per.condition <- 8297 351 | fcs.dmrd.data.sample.n.per.sample <- NULL 352 | 353 | fcs.dmrd.gradient.color <- c( "black", "blue", "green", "yellow", "red" ) 354 | fcs.dmrd.gradient.palette.n <- 100 355 | fcs.dmrd.density.palette <- colorRampPalette( fcs.dmrd.gradient.color )( 356 | fcs.dmrd.gradient.palette.n ) 357 | 358 | fcs.dmrd.color.alpha <- 0.3 359 | 360 | fcs.dmrd.group.title.size <- 8 361 | 362 | fcs.dmrd.legend.title.size <- 7 363 | fcs.dmrd.legend.label.size <- 7 364 | fcs.dmrd.legend.point.size <- 3 365 | 366 | fcs.dmrd.label.factor.width <- 0.1 367 | 368 | # Tip: set the number of rows in the output figures here. 369 | fcs.dmrd.figure.nrow <- 2 370 | fcs.dmrd.figure.ncol <- ceiling( fcs.condition.n / fcs.dmrd.figure.nrow ) 371 | 372 | fcs.dmrd.figure.width <- 3 373 | fcs.dmrd.figure.height <- 3 374 | 375 | 376 | # tsne parameters 377 | # Tip: more iterations take longer. 378 | # More iterations are needed for more cells 379 | # For a first look, try 1000. 380 | # For a final figure on ~100k cells, use 5000. 381 | # Tip 2: Set fcs.tsne.thread.n to 0 for max processing speed. 382 | # To enable you to do something else meanwhile, 383 | # set it to one or two less than the number of threads on your processor (check specs online). 384 | fcs.tsne.iter.n <- 5000 385 | fcs.tsne.thread.n <- 0 386 | 387 | # Tip: don't change this unless you know why 388 | fcs.tsne.perplexity <- 30 389 | fcs.tsne.theta <- 0.5 390 | fcs.tsne.exaggeration.factor <- 12 391 | 392 | fcs.tsne.figure.lims.factor <- 1.0 393 | fcs.tsne.figure.point.size <- 1.2 394 | 395 | fcs.tsne.figure.convergence.width <- 1200 396 | fcs.tsne.figure.convergence.height <- 800 397 | 398 | fcs.tsne.figure.dir <- "./figure_tsne" 399 | 400 | fcs.tsne.figure.convergence <- "tsne_convergence" 401 | fcs.tsne.figure.plot <- "tsne_plot" 402 | 403 | fcs.tsne.cache.file.path <- "./tsne_cache.dat" 404 | 405 | 406 | # umap parameters 407 | # Tip: you don't generally need to change the UMAP iterations 408 | fcs.umap.iter.n <- 1000 409 | 410 | fcs.umap.figure.lims.factor <- 0.8 411 | fcs.umap.figure.point.size <- 1.2 412 | 413 | fcs.umap.figure.dir <- "./figure_umap" 414 | 415 | fcs.umap.figure.plot <- "umap_plot" 416 | 417 | fcs.umap.cache.file.path <- "./umap_cache.dat" 418 | 419 | 420 | # cross-entropy test parameters 421 | # Tips: Set this depending on your RAM and number of groups. 422 | # You won't be able to analyze more than about 100k cells unless you have >32GB RAM. 423 | # The crossentropy test works best with at least 10k cells per group. 424 | # Multiple hypothesis testing will greatly reduce your ability to distinguish statistical differences. 425 | fcs.ce.diff.prob.sample.n <- NULL 426 | 427 | # Tip: set to "ks" unless you have a good statistical reason for using rank testing. 428 | # In that case, use "rank" and "median". 429 | fcs.ce.diff.base.test <- "ks" 430 | fcs.ce.diff.base.dist <- "ks" 431 | 432 | fcs.ce.diff.test.alpha <- 0.05 433 | 434 | fcs.ce.diff.figure.font.size <- 2 435 | fcs.ce.diff.figure.line.width <- 3 436 | 437 | fcs.ce.diff.figure.cdf.resolution <- 500 438 | fcs.ce.diff.figure.cdf.all.color <- "black" 439 | fcs.ce.diff.figure.cdf.all.label <- "All" 440 | 441 | fcs.ce.diff.figure.dendrogram.weight.condition <- 1 : fcs.condition.n 442 | names( fcs.ce.diff.figure.dendrogram.weight.condition ) <- fcs.condition 443 | 444 | fcs.ce.diff.figure.dendrogram.weight.cluster <- 1 : fcs.cluster.n 445 | names( fcs.ce.diff.figure.dendrogram.weight.cluster ) <- fcs.cluster 446 | 447 | fcs.ce.diff.figure.cdf.width <- 1200 448 | fcs.ce.diff.figure.cdf.height <- 800 449 | 450 | fcs.ce.diff.figure.dendrogram.width <- 2000 451 | fcs.ce.diff.figure.dendrogram.height <- 800 452 | 453 | 454 | # cross-entropy test parameters for tsne 455 | 456 | fcs.ce.diff.tsne.perplexity.factor <- 3 457 | 458 | fcs.ce.diff.tsne.figure.dir <- "./figure_tsne_ce_diff" 459 | 460 | fcs.ce.diff.tsne.figure.cdf <- "tsne_ce_diff_cdf" 461 | fcs.ce.diff.tsne.figure.dendrogram <- "tsne_ce_diff_dendrogram" 462 | fcs.ce.diff.tsne.result <- "tsne_ce_diff_result" 463 | 464 | fcs.ce.diff.tsne.cache.file.path <- "./tsne_ce_diff_cache.dat" 465 | 466 | 467 | # cross-entropy test parameters for umap 468 | 469 | fcs.ce.diff.umap.figure.dir <- "./figure_umap_ce_diff" 470 | 471 | fcs.ce.diff.umap.figure.cdf <- "umap_ce_diff_cdf" 472 | fcs.ce.diff.umap.figure.dendrogram <- "umap_ce_diff_dendrogram" 473 | fcs.ce.diff.umap.result <- "umap_ce_diff_result" 474 | 475 | fcs.ce.diff.umap.cache.file.path <- "./umap_ce_diff_cache.dat" 476 | 477 | -------------------------------------------------------------------------------- /flow analysis/ce_diff_test.r: -------------------------------------------------------------------------------- 1 | 2 | # calculates test on differences of cross-entropy 3 | 4 | ce.diff.test <- function( 5 | cross.entropy, 6 | event.partition, 7 | partition.label, partition.color, partition.line.type, 8 | base.test, base.dist, 9 | dendrogram.order.weight, 10 | result, cdf.figure, dendrogram.figure 11 | ) 12 | { 13 | partition <- levels( event.partition ) 14 | partition.n <- length( partition ) 15 | 16 | cross.entropy.split <- split( cross.entropy, event.partition ) 17 | 18 | if ( base.test == "ks" ) 19 | { 20 | if ( partition.n == 2 ) 21 | { 22 | ks.test.res <- ks.test( cross.entropy.split[[ 1 ]], 23 | cross.entropy.split[[ 2 ]] ) 24 | 25 | test.res <- list( ks.single = ks.test.res ) 26 | } 27 | else if ( partition.n > 2 ) 28 | { 29 | comparison.n <- partition.n * ( partition.n - 1 ) / 2 30 | 31 | ks.pair <- vector( "list", comparison.n ) 32 | comparison <- character( comparison.n ) 33 | D.stat <- numeric( comparison.n ) 34 | p.value <- numeric( comparison.n ) 35 | 36 | k <- 1 37 | 38 | for ( i in 1 : ( partition.n - 1 ) ) 39 | for ( j in (i+1) : partition.n ) 40 | { 41 | ks.pair[[ k ]] <- ks.test( cross.entropy.split[[ i ]], 42 | cross.entropy.split[[ j ]] ) 43 | 44 | comparison[ k ] <- sprintf( "%s - %s", 45 | partition.label[ partition[ i ] ], 46 | partition.label[ partition[ j ] ] ) 47 | 48 | D.stat[ k ] <- ks.pair[[ k ]]$statistic 49 | 50 | p.value[ k ] <- ks.pair[[ k ]]$p.value 51 | 52 | k <- k + 1 53 | } 54 | 55 | p.value.adj <- p.adjust( p.value, "holm" ) 56 | 57 | test.res <- list( ks.multiple = list( ks.pair = ks.pair, 58 | comparison = comparison, D.stat = D.stat, 59 | p.value = p.value, p.value.adj = p.value.adj ) ) 60 | } 61 | else 62 | stop( "no partitions for testing cross-entropy differences" ) 63 | } 64 | else if ( base.test == "rank" ) 65 | { 66 | if ( partition.n == 2 ) 67 | { 68 | wilcox.test.res <- wilcox.test( cross.entropy.split[[ 1 ]], 69 | cross.entropy.split[[ 2 ]] ) 70 | 71 | test.res <- list( rank.single = wilcox.test.res ) 72 | } 73 | else if ( partition.n > 2 ) 74 | { 75 | kruskal.test.res <- kruskal.test( cross.entropy, 76 | event.partition ) 77 | 78 | dunn.test.res <- dunn.test( cross.entropy, event.partition, 79 | method = "holm", alpha = fcs.ce.diff.test.alpha, altp = TRUE, 80 | kw = FALSE, table = FALSE, list = TRUE ) 81 | 82 | test.res <- list( rank.multiple = list( 83 | kruskal = kruskal.test.res, dunn = dunn.test.res ) ) 84 | } 85 | else 86 | stop( "no partitions for testing cross-entropy differences" ) 87 | } 88 | else 89 | stop( "wrong base test for cross-entropy differences" ) 90 | 91 | if ( ! is.null( result ) ) 92 | { 93 | result.file <- file( result, "w" ) 94 | sink( result.file ) 95 | 96 | tr.name <- names( test.res ) 97 | stopifnot( length( tr.name ) == 1 ) 98 | 99 | if ( tr.name == "ks.single" ) 100 | { 101 | cat( "\n** Kolmogorov-Smirnov test\n") 102 | 103 | print( test.res$ks.single ) 104 | } 105 | else if ( tr.name == "ks.multiple" ) 106 | { 107 | cat( "\n** Multiple Kolmogorov-Smirnov tests with Holm correction\n\n") 108 | 109 | comparison.width <- max( nchar( test.res$ks.multiple$comparison ) ) 110 | 111 | for ( i in 1 : length( test.res$ks.multiple$comparison ) ) 112 | cat( sprintf( "%-*s\t\tD = %g\t\tpv = %g\t\tadj-pv = %g\n", 113 | comparison.width, 114 | test.res$ks.multiple$comparison[ i ], 115 | test.res$ks.multiple$D.stat[ i ], 116 | test.res$ks.multiple$p.value[ i ], 117 | test.res$ks.multiple$p.value.adj[ i ] ) ) 118 | } 119 | else if ( tr.name == "rank.single" ) 120 | { 121 | cat( "\n** Wilcoxon rank sum test\n") 122 | 123 | print( test.res$rank.single ) 124 | } 125 | else if ( tr.name == "rank.multiple" ) 126 | { 127 | cat( "\n** Kruskal-Wallis rank sum test\n") 128 | 129 | print( test.res$rank.multiple$kruskal ) 130 | 131 | cat( "\n** Dunn post-hoc test with Holm correction\n\n") 132 | 133 | comparison.width <- max( nchar( 134 | test.res$rank.multiple$dunn$comparison ) ) 135 | 136 | for ( i in 1 : length( test.res$rank.multiple$dunn$comparisons ) ) 137 | cat( sprintf( "%-*s\t\tZ = %g\t\tpv = %g\t\tadj-pv = %g\n", 138 | comparison.width, 139 | test.res$rank.multiple$dunn$comparisons[ i ], 140 | test.res$rank.multiple$dunn$Z[ i ], 141 | test.res$rank.multiple$dunn$altP[ i ], 142 | test.res$rank.multiple$dunn$altP.adjusted[ i ] ) ) 143 | } 144 | else 145 | { 146 | sink() 147 | close( result.file ) 148 | stop( "unknown test in ce-diff result" ) 149 | } 150 | 151 | sink() 152 | close( result.file ) 153 | } 154 | 155 | if ( ! is.null( cdf.figure ) ) 156 | { 157 | if ( is.null( partition.label ) ) 158 | partition.label = partition 159 | 160 | if ( is.null( partition.color ) ) 161 | partition.color <- rainbow( partition.n ) 162 | 163 | if ( is.null( partition.line.type ) ) 164 | partition.line.type <- rep( 1, partition.n ) 165 | 166 | png( filename = cdf.figure, width = fcs.ce.diff.figure.cdf.width, 167 | height = fcs.ce.diff.figure.cdf.height ) 168 | 169 | par( mar = c( 5.5, 6, 2, 1.5 ) ) 170 | 171 | plot( ecdf( cross.entropy ), ylim = c( 0, 1 ), 172 | xlab = "Cross-entropy", ylab = "CDF", main = "", 173 | cex.lab = 3, cex.axis = 2.5, 174 | col = fcs.ce.diff.figure.cdf.all.color, 175 | lwd = fcs.ce.diff.figure.line.width - 1, do.points = FALSE ) 176 | 177 | for ( pall in partition ) 178 | { 179 | ces <- cross.entropy.split[[ pall ]] 180 | ces.n <- length( ces ) 181 | 182 | if ( ces.n < fcs.ce.diff.figure.cdf.resolution ) { 183 | plot( ecdf( ces ), col = partition.color[ pall ], 184 | lty = partition.line.type[ pall ], 185 | lwd = fcs.ce.diff.figure.line.width, 186 | do.points = FALSE, add = TRUE ) 187 | } 188 | else { 189 | ecdf.x <- sort( ces ) 190 | ecdf.y <- 1 : ces.n / ces.n 191 | 192 | lines( ecdf.x, ecdf.y, col = partition.color[ pall ], 193 | lty = partition.line.type[ pall ], 194 | lwd = fcs.ce.diff.figure.line.width ) 195 | } 196 | } 197 | 198 | legend( "bottomright", 199 | legend = c( fcs.ce.diff.figure.cdf.all.label, partition.label ), 200 | col = c( fcs.ce.diff.figure.cdf.all.color, partition.color ), 201 | lty = partition.line.type, 202 | lwd = fcs.ce.diff.figure.line.width, 203 | cex = fcs.ce.diff.figure.font.size ) 204 | 205 | dev.off() 206 | } 207 | 208 | if ( ! is.null( dendrogram.figure ) && partition.n > 2 ) 209 | { 210 | cross.entropy.dist <- matrix( 0, nrow = partition.n, 211 | ncol = partition.n ) 212 | 213 | for ( i in 1 : ( partition.n - 1 ) ) 214 | for ( j in (i+1) : partition.n ) 215 | { 216 | if ( base.dist == "ks" ) 217 | cross.entropy.dist[ i, j ] <- ks.test( 218 | cross.entropy.split[[ i ]], 219 | cross.entropy.split[[ j ]] 220 | )$statistic 221 | else if ( base.dist == "median" ) 222 | cross.entropy.dist[ i, j ] <- abs( 223 | median( cross.entropy.split[[ i ]] ) - 224 | median( cross.entropy.split[[ j ]] ) 225 | ) 226 | else 227 | stop( "wrong base dist for cross-entropy differences" ) 228 | 229 | cross.entropy.dist[ j, i ] <- cross.entropy.dist[ i, j ] 230 | } 231 | 232 | cross.entropy.hclust <- hclust( as.dist( cross.entropy.dist ) ) 233 | 234 | if ( ! is.null( dendrogram.order.weight ) ) 235 | cross.entropy.hclust <- as.hclust( reorder( 236 | as.dendrogram( cross.entropy.hclust ), 237 | dendrogram.order.weight, 238 | agglo.FUN = mean 239 | ) ) 240 | 241 | if ( is.null( partition.label ) ) 242 | partition.label = partition 243 | 244 | png( filename = dendrogram.figure, 245 | width = fcs.ce.diff.figure.dendrogram.width, 246 | height = fcs.ce.diff.figure.dendrogram.height ) 247 | 248 | par( mar = c( 5, 5.6, 4, 1.4 ) ) 249 | 250 | plot( cross.entropy.hclust, 251 | labels = partition.label, hang = -1, 252 | xlab = "", ylab = "", main = "", sub = "", cex.axis = 3, 253 | cex = fcs.ce.diff.figure.font.size ) 254 | 255 | dev.off() 256 | } 257 | 258 | test.res 259 | } 260 | 261 | -------------------------------------------------------------------------------- /flow analysis/ce_diff_test_tsne.r: -------------------------------------------------------------------------------- 1 | 2 | # calculates cross-entropy for tsne plots and calls ce.diff.test 3 | 4 | 5 | ce.diff.test.tsne <- function( 6 | orig.data, tsne.data, 7 | event.partition, 8 | partition.label = NULL, partition.color = NULL, partition.line.type = NULL, 9 | base.test = "ks", base.dist = "ks", 10 | prob.sample.n = NULL, dendrogram.order.weight = NULL, 11 | result = NULL, cdf.figure = NULL, dendrogram.figure = NULL 12 | ) 13 | { 14 | stopifnot( nrow( orig.data ) == nrow( tsne.data ) && 15 | nrow( orig.data ) == length( event.partition ) ) 16 | 17 | data.n <- nrow( orig.data ) 18 | 19 | if ( ! is.null( prob.sample.n ) && prob.sample.n < data.n ) 20 | prob.sample.idx <- sample( data.n, prob.sample.n ) 21 | else 22 | prob.sample.idx <- 1 : data.n 23 | 24 | if ( fcs.use.cached.results && 25 | file.exists( fcs.ce.diff.tsne.cache.file.path ) ) 26 | { 27 | cat( "Using cached results for probability\n" ) 28 | 29 | load( fcs.ce.diff.tsne.cache.file.path ) 30 | } 31 | else 32 | { 33 | cat( "Calculating probability\n" ) 34 | 35 | # sampling here temporary, until optimizing dist( tsne.dat ) below 36 | orig.tsne.prob <- calculate.probability.tsne( 37 | orig.data[ prob.sample.idx, ], 38 | tsne.data[ prob.sample.idx, ] 39 | ) 40 | 41 | save( orig.tsne.prob, file = fcs.ce.diff.tsne.cache.file.path ) 42 | } 43 | 44 | cross.entropy.all <- calculate.cross.entropy( orig.tsne.prob$orig, 45 | orig.tsne.prob$tsne ) 46 | 47 | event.partition.all <- event.partition[ prob.sample.idx ] 48 | 49 | ce.diff.test( 50 | cross.entropy.all, 51 | event.partition.all, 52 | partition.label, partition.color, partition.line.type, 53 | base.test, base.dist, 54 | dendrogram.order.weight, 55 | result, cdf.figure, dendrogram.figure 56 | ) 57 | } 58 | 59 | 60 | calculate.probability.tsne <- function( orig.dat, tsne.dat ) 61 | { 62 | orig.dat.n <- nrow( orig.dat ) 63 | tsne.dat.n <- nrow( tsne.dat ) 64 | 65 | stopifnot( orig.dat.n == tsne.dat.n ) 66 | 67 | # find nearest neighbors in original space and their distances 68 | 69 | orig.dat.nn2 <- nn2( normalize_input( orig.dat ), 70 | k = fcs.ce.diff.tsne.perplexity.factor * fcs.tsne.perplexity + 1 ) 71 | 72 | orig.dat.self.idx <- sapply( 1 : orig.dat.n, function( ri ) { 73 | ri.idx <- which( orig.dat.nn2$nn.idx[ ri, ] == ri ) 74 | ifelse( length( ri.idx ) == 1, ri.idx, NA ) 75 | } ) 76 | 77 | stopifnot( ! is.na( orig.dat.self.idx ) ) 78 | 79 | orig.neigh <- t( sapply( 1 : orig.dat.n, function( ri ) 80 | orig.dat.nn2$nn.idx[ ri, - orig.dat.self.idx[ ri ] ] ) ) 81 | 82 | orig.dist2 <- t( sapply( 1 : orig.dat.n, function( ri ) 83 | orig.dat.nn2$nn.dists[ ri, - orig.dat.self.idx[ ri ] ]^2 ) ) 84 | 85 | # calculate probabilities associated to distances in original space 86 | 87 | orig.stdev <- apply( orig.dist2, 1, function( dd2 ) { 88 | tsne.perplexity.error <- function( ss, dd2 ) { 89 | p <- exp( - dd2 / (2*ss^2) ) 90 | if ( sum( p ) < .Machine$double.eps ) 91 | p <- 1 92 | p <- p / sum( p ) 93 | p <- p[ p > 0 ] 94 | 2^( - sum( p * log2( p ) ) ) - fcs.tsne.perplexity 95 | } 96 | 97 | dd2.min.idx <- 1 98 | dd2.ascen <- sort( dd2 ) 99 | while( dd2.ascen[ dd2.min.idx ] == 0 ) 100 | dd2.min.idx <- dd2.min.idx + 1 101 | ss.lower <- dd2.ascen[ dd2.min.idx ] 102 | 103 | dd2.max.idx <- 1 104 | dd2.descen <- sort( dd2, decreasing = TRUE ) 105 | while( is.infinite( dd2.descen[ dd2.max.idx ] ) ) 106 | dd2.max.idx <- dd2.max.idx + 1 107 | ss.upper <- dd2.descen[ dd2.max.idx ] 108 | 109 | while( tsne.perplexity.error( ss.upper, dd2 ) < 0 ) 110 | { 111 | ss.lower <- ss.upper 112 | ss.upper <- 2 * ss.upper 113 | } 114 | 115 | while( tsne.perplexity.error( ss.lower, dd2 ) > 0 ) 116 | { 117 | ss.upper <- ss.lower 118 | ss.lower <- ss.lower / 2 119 | } 120 | 121 | uniroot( tsne.perplexity.error, dd2, 122 | interval = c( ss.lower, ss.upper ), 123 | tol = ( ss.upper - ss.lower ) * .Machine$double.eps^0.25 )$root 124 | } ) 125 | 126 | orig.prob <- t( sapply( 1 : orig.dat.n, function( i ) { 127 | p <- exp( - orig.dist2[ i, ] / ( 2 * orig.stdev[ i ]^2 ) ) 128 | p / sum( p ) 129 | } ) ) 130 | 131 | # symmetrize probabilities in original space 132 | 133 | for ( i in 1 : orig.dat.n ) 134 | for ( j2 in 1 : length( orig.neigh[ i, ] ) ) 135 | { 136 | j <- orig.neigh[ i, j2 ] 137 | 138 | i2 <- match( i, orig.neigh[ j, ] ) 139 | 140 | if ( ! is.na( i2 ) ) 141 | { 142 | if ( j > i ) 143 | { 144 | sym.prob <- ( orig.prob[ i, j2 ] + orig.prob[ j, i2 ] ) / 2 145 | orig.prob[ i, j2 ] <- sym.prob 146 | orig.prob[ j, i2 ] <- sym.prob 147 | } 148 | } 149 | else 150 | orig.prob[ i, j2 ] <- orig.prob[ i, j2 ] / 2 151 | } 152 | 153 | orig.prob <- sweep( orig.prob, 1, rowSums( orig.prob ), "/" ) 154 | 155 | # get distances in tsne space for closest neighbors in original space 156 | 157 | tsne.dist2 <- t( sapply( 1 : tsne.dat.n, function( i ) 158 | sapply( orig.neigh[ i, ], function( j ) 159 | sum( ( tsne.dat[ i, ] - tsne.dat[ j, ] )^2 ) 160 | ) 161 | ) ) 162 | 163 | # calculate probabilities associated to distances in tsne representation 164 | 165 | tsne.prob.factor <- tsne.dat.n / 166 | ( 2 * sum( 1 / ( 1 + dist( tsne.dat )^2 ) ) ) 167 | 168 | tsne.prob <- t( apply( tsne.dist2, 1, function( dd2 ) 169 | p <- tsne.prob.factor / ( 1 + dd2 ) 170 | ) ) 171 | 172 | list( orig = orig.prob, tsne = tsne.prob ) 173 | } 174 | 175 | 176 | calculate.cross.entropy <- function( prim.prob, secd.prob ) 177 | { 178 | prim.prob.n <- nrow( prim.prob ) 179 | secd.prob.n <- nrow( secd.prob ) 180 | 181 | prim.prob.m <- ncol( prim.prob ) 182 | secd.prob.m <- ncol( secd.prob ) 183 | 184 | stopifnot( prim.prob.n == secd.prob.n && prim.prob.m == secd.prob.m ) 185 | 186 | sapply( 1 : prim.prob.n, function( i ) 187 | - sum( prim.prob[ i, ] * log( secd.prob[ i, ] ) ) 188 | ) 189 | } 190 | 191 | -------------------------------------------------------------------------------- /flow analysis/ce_diff_test_umap.r: -------------------------------------------------------------------------------- 1 | 2 | # calculates cross-entropy for umap plots and calls ce.diff.test 3 | 4 | 5 | ce.diff.test.umap <- function( 6 | orig.dist, orig.knn, umap.data, umap.param, 7 | event.partition, 8 | partition.label = NULL, partition.color = NULL, partition.line.type = NULL, 9 | base.test = "ks", base.dist = "ks", 10 | prob.sample.n = NULL, dendrogram.order.weight = NULL, 11 | result = NULL, cdf.figure = NULL, dendrogram.figure = NULL 12 | ) 13 | { 14 | stopifnot( nrow( orig.dist ) == nrow( umap.data ) && 15 | nrow( orig.dist ) == length( event.partition ) ) 16 | 17 | data.n <- nrow( orig.dist ) 18 | 19 | if ( ! is.null( prob.sample.n ) && prob.sample.n < data.n ) 20 | prob.sample.idx <- sample( data.n, prob.sample.n ) 21 | else 22 | prob.sample.idx <- 1 : data.n 23 | 24 | if ( fcs.use.cached.results && 25 | file.exists( fcs.ce.diff.umap.cache.file.path ) ) 26 | { 27 | cat( "Using cached results for probability\n" ) 28 | 29 | load( fcs.ce.diff.umap.cache.file.path ) 30 | } 31 | else 32 | { 33 | cat( "Calculating probability\n" ) 34 | 35 | orig.umap.prob <- calculate.probability.umap( orig.dist, orig.knn, 36 | umap.data, umap.param ) 37 | 38 | save( orig.umap.prob, file = fcs.ce.diff.umap.cache.file.path ) 39 | } 40 | 41 | cross.entropy.all <- calculate.fuzzy.cross.entropy( 42 | orig.umap.prob$orig[ prob.sample.idx, ], 43 | orig.umap.prob$umap[ prob.sample.idx, ] 44 | ) 45 | 46 | event.partition.all <- event.partition[ prob.sample.idx ] 47 | 48 | ce.diff.test( 49 | cross.entropy.all, 50 | event.partition.all, 51 | partition.label, partition.color, partition.line.type, 52 | base.test, base.dist, 53 | dendrogram.order.weight, 54 | result, cdf.figure, dendrogram.figure 55 | ) 56 | } 57 | 58 | 59 | calculate.probability.umap <- function( orig.dis, orig.kn, umap.dat, 60 | umap.param ) 61 | { 62 | orig.dat.n <- nrow( orig.dis ) 63 | umap.dat.n <- nrow( umap.dat ) 64 | 65 | stopifnot( orig.dat.n == umap.dat.n ) 66 | 67 | # get nearest neighbors in original space and their distances 68 | 69 | orig.self.idx <- sapply( 1 : orig.dat.n, function( ri ) { 70 | ri.idx <- which( orig.kn[ ri, ] == ri ) 71 | ifelse( length( ri.idx ) == 1, ri.idx, NA ) 72 | } ) 73 | 74 | stopifnot( ! is.na( orig.self.idx ) ) 75 | 76 | orig.neigh <- t( sapply( 1 : orig.dat.n, function( ri ) 77 | orig.kn[ ri, - orig.self.idx[ ri ] ] ) ) 78 | 79 | orig.dis.reduc <- t( sapply( 1 : orig.dat.n, function( ri ) 80 | orig.dis[ ri, - orig.self.idx[ ri ] ] ) ) 81 | 82 | # calculate probabilities associated to distances in original space 83 | 84 | umap.sigma <- apply( orig.dis.reduc, 1, function( dd ) { 85 | umap.sigma.error <- function( ss, dd ) { 86 | p <- exp( - pmax( 0, dd - min( dd ) ) / ss ) 87 | sum( p ) - log2( length( p ) ) 88 | } 89 | 90 | dd.ascen <- sort( dd ) 91 | dd.ascen <- dd.ascen[ dd.ascen > 0 ] 92 | ss.lower <- dd.ascen[ 1 ] 93 | 94 | dd.descen <- sort( dd, decreasing = TRUE ) 95 | dd.descen <- dd.descen[ ! is.infinite( dd.descen ) ] 96 | ss.upper <- dd.descen[ 1 ] 97 | 98 | while( umap.sigma.error( ss.upper, dd ) < 0 ) 99 | { 100 | ss.lower <- ss.upper 101 | ss.upper <- 2 * ss.upper 102 | } 103 | 104 | while( umap.sigma.error( ss.lower, dd ) > 0 ) 105 | { 106 | ss.upper <- ss.lower 107 | ss.lower <- ss.lower / 2 108 | } 109 | 110 | uniroot( umap.sigma.error, dd, 111 | interval = c( ss.lower, ss.upper ), 112 | tol = ( ss.upper - ss.lower ) * .Machine$double.eps^0.25 )$root 113 | } ) 114 | 115 | orig.prob <- t( sapply( 1 : orig.dat.n, function( i ) 116 | exp( - pmax( 0, orig.dis.reduc[ i, ] - min( orig.dis.reduc[ i, ] ) ) / 117 | umap.sigma[ i ] ) 118 | ) ) 119 | 120 | # symmetrize probabilities in original space 121 | 122 | for ( i in 1 : orig.dat.n ) 123 | for ( j2 in 1 : length( orig.neigh[ i, ] ) ) 124 | { 125 | j <- orig.neigh[ i, j2 ] 126 | 127 | i2 <- match( i, orig.neigh[ j, ] ) 128 | 129 | if ( ! is.na( i2 ) ) 130 | { 131 | if ( j > i ) 132 | { 133 | sym.prob <- orig.prob[ i, j2 ] + orig.prob[ j, i2 ] - 134 | orig.prob[ i, j2 ] * orig.prob[ j, i2 ] 135 | orig.prob[ i, j2 ] <- sym.prob 136 | orig.prob[ j, i2 ] <- sym.prob 137 | } 138 | } 139 | } 140 | 141 | # get distances in umap space for closest neighbors in original space 142 | 143 | umap.dist2 <- t( sapply( 1 : umap.dat.n, function( i ) 144 | sapply( orig.neigh[ i, ], function( j ) 145 | sum( ( umap.dat[ i, ] - umap.dat[ j, ] )^2 ) 146 | ) 147 | ) ) 148 | 149 | # calculate probabilities associated to distances in umap representation 150 | 151 | umap.a <- umap.param$a 152 | umap.b <- umap.param$b 153 | 154 | umap.prob <- t( apply( umap.dist2, 1, function( dd2 ) 155 | p <- 1 / ( 1 + umap.a * dd2 ^ umap.b ) 156 | ) ) 157 | 158 | list( orig = orig.prob, umap = umap.prob ) 159 | } 160 | 161 | 162 | calculate.fuzzy.cross.entropy <- function( prim.prob, secd.prob ) 163 | { 164 | prim.prob.n <- nrow( prim.prob ) 165 | secd.prob.n <- nrow( secd.prob ) 166 | 167 | prim.prob.m <- ncol( prim.prob ) 168 | secd.prob.m <- ncol( secd.prob ) 169 | 170 | stopifnot( prim.prob.n == secd.prob.n && prim.prob.m == secd.prob.m ) 171 | 172 | sapply( 1 : prim.prob.n, function( i ) 173 | - sum( prim.prob[ i, ] * log( secd.prob[ i, ] ) + 174 | ( 1 - prim.prob[ i, ] ) * log( 1 - secd.prob[ i, ] ) ) 175 | ) 176 | } 177 | 178 | -------------------------------------------------------------------------------- /flow analysis/get_channels.R: -------------------------------------------------------------------------------- 1 | # get channels script 2 | # run this to get the channel names for the flowcytoscript parameter file 3 | 4 | library(flowCore) 5 | library(dplyr) 6 | library( RColorBrewer ) 7 | 8 | # source parameters 9 | 10 | param.filename <- "./analyze_flow_cytometry_parameter.r" 11 | 12 | source( param.filename ) 13 | 14 | flowFrame <- read.FCS(list.files(fcs.data.dir, "\\.fcs$", full.names = TRUE)[1], truncate_max_range = FALSE) 15 | 16 | channels <- data.frame(name = unname(pData(parameters(flowFrame))$name), 17 | desc = unname(pData(parameters(flowFrame))$desc)) 18 | 19 | for(i in 1:dim(channels)[1] ){ 20 | channels$out1[i] = paste0("\"", channels$name[i], "\", #", channels$desc[i], "\n") 21 | channels$out2[i] = paste0("\"", channels$name[i], "\" = \"", channels$desc[i], "\",\n") 22 | channels$out3[i] = paste0("\"", channels$name[i], "\" = 200, #", channels$desc[i], "\n") 23 | } 24 | 25 | descs.filtered <- channels$desc[!is.na(channels$desc) & channels$desc != '-'] 26 | channels.filtered <- filter(channels, desc %in% descs.filtered) 27 | cat('"', paste0(channels.filtered$desc, collapse = '","'), '"', sep = "") 28 | 29 | # copy the previous output into the channels.of.interest and delete unnecessary markers 30 | 31 | channels.of.interest = c("Bcl-6","Ki67","CD95", 32 | "Ly-6C","CD127","GATA-3", 33 | "CD62L","CXCR5","CD25","CD44","ICOS","RORgT","PD-1","CXCR3", 34 | "CXCR4","CD86", 35 | "T-bet","IRF4","GL7","CD69") 36 | 37 | 38 | # add the output to fcs.channel in the parameter file 39 | temp <- channels$out1[channels$desc %in% channels.of.interest] 40 | temp[length(temp)] <- gsub(',', '', temp[length(temp)]) 41 | cat(paste(temp, collapse = "")) 42 | 43 | # add the output to fcs.channel.label in the parameter file 44 | temp <- channels$out2[channels$desc %in% channels.of.interest] 45 | temp[length(temp)] <- gsub(',', '', temp[length(temp)]) 46 | cat(paste(temp, collapse = "")) 47 | 48 | # add the output to fcs.channel.asinh.scale in the parameter file 49 | temp <- channels$out3[channels$desc %in% channels.of.interest] 50 | temp[length(temp)] <- gsub(',', '', temp[length(temp)]) 51 | cat(paste(temp, collapse = "")) 52 | 53 | # save the analyze_flow_cytometry_parameter.r file 54 | # and run the analyze_flow_cytometry.r script -------------------------------------------------------------------------------- /flow analysis/plot_all_dmrd_figures.r: -------------------------------------------------------------------------------- 1 | 2 | # plots all dimensionality reduction figures 3 | 4 | 5 | plot.all.dmrd.figures <- function( 6 | redu.data, redu.data.max, 7 | redu.figure.lims.factor, redu.figure.point.size, 8 | redu.figure.dir, redu.figure.plot, 9 | dmrd.data, dmrd.event.cluster, dmrd.event.condition 10 | ) 11 | { 12 | redu.lims <- redu.figure.lims.factor * redu.data.max * c( -1, 1 ) 13 | 14 | the.dmrd.figure.width <- fcs.dmrd.figure.width + 15 | fcs.dmrd.label.factor.width * 16 | max( nchar( fcs.condition.label ), nchar( flow.sample.label ), 17 | nchar( fcs.cluster.label ) ) 18 | 19 | the.dmrd.figure.width.multi <- 20 | fcs.dmrd.figure.ncol * fcs.dmrd.figure.width + 21 | fcs.dmrd.label.factor.width * 22 | max( nchar( fcs.condition.label ), nchar( flow.sample.label ), 23 | nchar( fcs.cluster.label ) ) 24 | 25 | the.dmrd.figure.height <- fcs.dmrd.figure.height 26 | 27 | the.dmrd.figure.height.multi <- 28 | fcs.dmrd.figure.nrow * fcs.dmrd.figure.height 29 | 30 | # plot all events colored by cluster 31 | 32 | redu.plot <- plot.dimensionality.reduction( 33 | redu.data[ , 1 ], redu.data[ , 2 ], 34 | event.partition = dmrd.event.cluster, 35 | partition.label = fcs.cluster.label, 36 | partition.color = fcs.cluster.color, 37 | dmrd.lims = redu.lims, 38 | point.size = redu.figure.point.size, 39 | show.guide = TRUE 40 | ) 41 | 42 | ggsave( 43 | file.path( redu.figure.dir, 44 | sprintf( "%s_all_events__cluster.png", redu.figure.plot ) ), 45 | redu.plot, 46 | width = the.dmrd.figure.width, height = the.dmrd.figure.height 47 | ) 48 | 49 | # plot all events colored by condition 50 | 51 | redu.plot <- plot.dimensionality.reduction( 52 | redu.data[ , 1 ], redu.data[ , 2 ], 53 | event.partition = dmrd.event.condition, 54 | partition.label = fcs.condition.label, 55 | partition.color = adjustcolor( fcs.condition.color, 56 | alpha.f = fcs.dmrd.color.alpha ), 57 | dmrd.lims = redu.lims, 58 | point.size = redu.figure.point.size, 59 | show.guide = TRUE 60 | ) 61 | 62 | ggsave( 63 | file.path( redu.figure.dir, 64 | sprintf( "%s_all_events__condition.png", redu.figure.plot ) ), 65 | redu.plot, 66 | width = the.dmrd.figure.width, height = the.dmrd.figure.height 67 | ) 68 | 69 | # plot all events colored by each marker level 70 | 71 | for ( fch in fcs.channel ) 72 | { 73 | dmrd.event.level <- dmrd.data[ , fch ] 74 | 75 | redu.plot <- plot.dimensionality.reduction( 76 | redu.data[ , 1 ], redu.data[ , 2 ], 77 | event.level = dmrd.event.level, 78 | dmrd.lims = redu.lims, 79 | point.size = redu.figure.point.size, 80 | show.guide = TRUE, guide.name = fcs.channel.label[ fch ] 81 | ) 82 | 83 | ggsave( 84 | file.path( redu.figure.dir, 85 | sprintf( "%s_all_events__%s.png", redu.figure.plot, 86 | fcs.channel.label[ fch ] ) ), 87 | redu.plot, 88 | width = the.dmrd.figure.width, height = the.dmrd.figure.height 89 | ) 90 | } 91 | 92 | # plot all conditions colored by cluster 93 | 94 | redu.plot <- plot.dimensionality.reduction( 95 | redu.data[ , 1 ], redu.data[ , 2 ], 96 | event.group = dmrd.event.condition, 97 | group.label = fcs.condition.label, 98 | event.partition = dmrd.event.cluster, 99 | partition.label = fcs.cluster.label, 100 | partition.color = fcs.cluster.color, 101 | dmrd.lims = redu.lims, 102 | dmrd.nrow = fcs.dmrd.figure.nrow, dmrd.ncol = fcs.dmrd.figure.ncol, 103 | point.size = redu.figure.point.size, 104 | show.guide = TRUE 105 | ) 106 | 107 | ggsave( 108 | file.path( redu.figure.dir, 109 | sprintf( "%s_all_conditions__cluster.png", redu.figure.plot ) ), 110 | redu.plot, 111 | width = the.dmrd.figure.width.multi, 112 | height = the.dmrd.figure.height.multi 113 | ) 114 | 115 | # plot all conditions colored by each marker level 116 | 117 | for ( fch in fcs.channel ) 118 | { 119 | dmrd.event.level <- dmrd.data[ , fch ] 120 | 121 | redu.plot <- plot.dimensionality.reduction( 122 | redu.data[ , 1 ], redu.data[ , 2 ], 123 | event.group = dmrd.event.condition, 124 | group.label = fcs.condition.label, 125 | event.level = dmrd.event.level, 126 | dmrd.lims = redu.lims, 127 | dmrd.nrow = fcs.dmrd.figure.nrow, dmrd.ncol = fcs.dmrd.figure.ncol, 128 | point.size = redu.figure.point.size, 129 | show.guide = TRUE, guide.name = fcs.channel.label[ fch ] 130 | ) 131 | 132 | ggsave( 133 | file.path( redu.figure.dir, 134 | sprintf( "%s_all_conditions__%s.png", redu.figure.plot, 135 | fcs.channel.label[ fch ] ) ), 136 | redu.plot, 137 | width = the.dmrd.figure.width.multi, 138 | height = the.dmrd.figure.height.multi 139 | ) 140 | } 141 | 142 | # plot each condition colored by cluster 143 | 144 | for ( cond in fcs.condition ) 145 | { 146 | dmrd.cond.idx <- which( dmrd.event.condition == cond ) 147 | 148 | redu.cond.data <- redu.data[ dmrd.cond.idx, ] 149 | dmrd.cond.event.cluster <- dmrd.event.cluster[ dmrd.cond.idx ] 150 | 151 | redu.plot <- plot.dimensionality.reduction( 152 | redu.cond.data[ , 1 ], redu.cond.data[ , 2 ], 153 | event.partition = dmrd.cond.event.cluster, 154 | partition.label = fcs.cluster.label, 155 | partition.color = fcs.cluster.color, 156 | dmrd.lims = redu.lims, 157 | point.size = redu.figure.point.size, 158 | show.guide = TRUE 159 | ) 160 | 161 | ggsave( 162 | file.path( redu.figure.dir, 163 | sprintf( "%s_%s__cluster.png", redu.figure.plot, 164 | fcs.condition.label[ cond ] ) ), 165 | redu.plot, 166 | width = the.dmrd.figure.width, height = the.dmrd.figure.height 167 | ) 168 | } 169 | 170 | # plot each condition colored by sample 171 | 172 | for ( cond in fcs.condition ) 173 | { 174 | dmrd.cond.idx <- which( dmrd.event.condition == cond ) 175 | 176 | redu.cond.data <- redu.data[ dmrd.cond.idx, ] 177 | dmrd.cond.event.sample <- dmrd.event.sample[ dmrd.cond.idx ] 178 | 179 | redu.plot <- plot.dimensionality.reduction( 180 | redu.cond.data[ , 1 ], redu.cond.data[ , 2 ], 181 | event.partition = dmrd.cond.event.sample, 182 | partition.label = flow.sample.label, 183 | partition.color = adjustcolor( flow.sample.color.single, 184 | alpha.f = fcs.dmrd.color.alpha ), 185 | dmrd.lims = redu.lims, 186 | point.size = redu.figure.point.size, 187 | show.guide = TRUE 188 | ) 189 | 190 | ggsave( 191 | file.path( redu.figure.dir, 192 | sprintf( "%s_%s__sample.png", redu.figure.plot, 193 | fcs.condition.label[ cond ] ) ), 194 | redu.plot, 195 | width = the.dmrd.figure.width, height = the.dmrd.figure.height 196 | ) 197 | } 198 | } 199 | 200 | -------------------------------------------------------------------------------- /flow analysis/plot_dimensionality_reduction.r: -------------------------------------------------------------------------------- 1 | 2 | # plots one dimensionality reduction figure 3 | 4 | 5 | plot.dimensionality.reduction <- function( 6 | dmrd.x, dmrd.y, 7 | event.group = NULL, group.label = NULL, 8 | event.partition = NULL, partition.label = NULL, partition.color = NULL, 9 | event.level = NULL, 10 | dmrd.lims = NULL, dmrd.nrow = NULL, dmrd.ncol = NULL, 11 | point.size = NULL, show.guide = FALSE, guide.name = NULL 12 | ) 13 | { 14 | if ( ! is.null( event.partition ) ) 15 | { 16 | if ( is.null( event.group ) ) 17 | ggdf <- data.frame( dmrd.x, dmrd.y, event.partition ) 18 | else 19 | ggdf <- data.frame( dmrd.x, dmrd.y, event.partition, event.group ) 20 | 21 | if ( is.null( partition.label ) ) 22 | plot.label <- waiver() 23 | else 24 | plot.label <- partition.label 25 | 26 | if ( show.guide ) 27 | plot.guide <- guide_legend( keyheight = 0.8, 28 | override.aes = list( size = fcs.dmrd.legend.point.size ), 29 | label.theme = element_text( size = fcs.dmrd.legend.label.size ), 30 | title = guide.name, title.position = "top", 31 | title.theme = element_text( size = fcs.dmrd.legend.title.size ) ) 32 | else 33 | plot.guide <- FALSE 34 | 35 | dmrd.plot <- ggplot( ggdf, aes( x = dmrd.x, 36 | y = dmrd.y, color = event.partition ) ) + 37 | scale_color_manual( values = partition.color, labels = plot.label, 38 | guide = plot.guide ) 39 | } 40 | else if ( ! is.null( event.level ) ) 41 | { 42 | if ( is.null( event.group ) ) 43 | ggdf <- data.frame( dmrd.x, dmrd.y, event.level ) 44 | else 45 | ggdf <- data.frame( dmrd.x, dmrd.y, event.level, event.group ) 46 | 47 | if ( show.guide ) 48 | plot.guide <- guide_colorbar( barwidth = 0.8, barheight = 10, 49 | title = guide.name, title.position = "top", 50 | title.theme = element_text( size = fcs.dmrd.legend.title.size ) ) 51 | else 52 | plot.guide <- FALSE 53 | 54 | dmrd.plot <- ggplot( ggdf, aes( x = dmrd.x, y = dmrd.y, 55 | color = event.level ) ) + 56 | scale_color_gradientn( colors = fcs.dmrd.density.palette, 57 | labels = NULL, guide = plot.guide ) 58 | } 59 | else 60 | { 61 | if ( is.null( event.group ) ) 62 | ggdf <- data.frame( dmrd.x, dmrd.y ) 63 | else 64 | ggdf <- data.frame( dmrd.x, dmrd.y, event.group ) 65 | 66 | dmrd.plot <- ggplot( ggdf, aes( x = dmrd.x, y = dmrd.y ) ) 67 | } 68 | 69 | if ( is.null( dmrd.lims ) ) { 70 | dmrd.xy.max <- max( abs( c( dmrd.x, dmrd.y ) ) ) 71 | dmrd.lims <- dmrd.xy.max * c( -1, 1 ) 72 | } 73 | 74 | if ( is.null( point.size ) ) 75 | point.size <- 0.5 76 | 77 | dmrd.plot <- dmrd.plot + 78 | coord_fixed() + 79 | lims( x = dmrd.lims, y = dmrd.lims ) + 80 | geom_point( shape = 20, stroke = 0, size = point.size ) + 81 | theme_bw() + 82 | theme( axis.title = element_blank(), 83 | axis.text = element_blank(), 84 | axis.ticks = element_blank(), 85 | panel.grid.major = element_blank(), 86 | panel.grid.minor = element_blank() ) 87 | 88 | if ( ! is.null( event.group ) ) 89 | { 90 | dmrd.plot <- dmrd.plot + 91 | facet_wrap( vars( event.group ), 92 | labeller = as_labeller( group.label ), 93 | nrow = dmrd.nrow, ncol = dmrd.ncol ) + 94 | theme( strip.background = element_rect( fill = "white" ), 95 | strip.text = element_text( size = fcs.dmrd.group.title.size ) ) 96 | } 97 | 98 | dmrd.plot 99 | } 100 | 101 | --------------------------------------------------------------------------------