├── CSV-based flow analysis
    ├── 00_src
    │   ├── ce_diff_test.r
    │   ├── ce_diff_test_tsne.r
    │   ├── ce_diff_test_umap.r
    │   ├── plot_all_dmrd_figures.r
    │   ├── plot_all_embedsom_figures.R
    │   ├── plot_dimensionality_reduction.r
    │   └── plot_dimensionality_reduction_embedSOM.R
    ├── Exporting data in csv format.PNG
    ├── Setting axis in FlowJo for Aurora data.pptx
    ├── analyze_flow_cytometry_csv.R
    ├── analyze_flow_cytometry_parameter_csv.R
    ├── flowcytoscript_setup.R
    └── highlight_changed_regions.R
├── README.md
├── flow analysis
    ├── analyze_flow_cytometry.r
    ├── analyze_flow_cytometry.sh
    ├── analyze_flow_cytometry_parameter.r
    ├── ce_diff_test.r
    ├── ce_diff_test_tsne.r
    ├── ce_diff_test_umap.r
    ├── get_channels.R
    ├── plot_all_dmrd_figures.r
    └── plot_dimensionality_reduction.r
└── scRNA-seq analysis
    └── t-SNE_analysis_full.rmd


/CSV-based flow analysis/00_src/ce_diff_test.r:
--------------------------------------------------------------------------------
  1 | 
  2 | # calculates test on differences of cross-entropy
  3 | 
  4 | ce.diff.test <- function( 
  5 |     cross.entropy, 
  6 |     event.partition, 
  7 |     partition.label, partition.color, partition.line.type, 
  8 |     base.test, base.dist, 
  9 |     dendrogram.order.weight, 
 10 |     result, cdf.figure, dendrogram.figure 
 11 | )
 12 | {
 13 |     partition <- levels( event.partition )
 14 |     partition.n <- length( partition )
 15 |     
 16 |     cross.entropy.split <- split( cross.entropy, event.partition )
 17 |     
 18 |     if ( base.test == "ks" )
 19 |     {
 20 |         if ( partition.n == 2 )
 21 |         {
 22 |             ks.test.res <- ks.test( cross.entropy.split[[ 1 ]], 
 23 |                 cross.entropy.split[[ 2 ]] )
 24 |             
 25 |             test.res <- list( ks.single = ks.test.res )
 26 |         }
 27 |         else if ( partition.n > 2 )
 28 |         {
 29 |             comparison.n <- partition.n * ( partition.n - 1 ) / 2
 30 |             
 31 |             ks.pair <- vector( "list", comparison.n )
 32 |             comparison <- character( comparison.n )
 33 |             D.stat <- numeric( comparison.n )
 34 |             p.value <- numeric( comparison.n )
 35 | 
 36 |             k <- 1
 37 |             
 38 |             for ( i in 1 : ( partition.n - 1 ) )
 39 |                 for ( j in (i+1) : partition.n )
 40 |                 {
 41 |                     ks.pair[[ k ]] <- ks.test( cross.entropy.split[[ i ]], 
 42 |                         cross.entropy.split[[ j ]] )
 43 |                     
 44 |                     comparison[ k ] <- sprintf( "%s - %s", 
 45 |                         partition.label[ partition[ i ] ], 
 46 |                         partition.label[ partition[ j ] ] )
 47 |                     
 48 |                     D.stat[ k ] <- ks.pair[[ k ]]$statistic
 49 |                     
 50 |                     p.value[ k ] <- ks.pair[[ k ]]$p.value
 51 |                     
 52 |                     k <- k + 1
 53 |                 }
 54 |             
 55 |             p.value.adj <- p.adjust( p.value, "holm" )
 56 |             
 57 |             test.res <- list( ks.multiple = list( ks.pair = ks.pair, 
 58 |                 comparison = comparison, D.stat = D.stat, 
 59 |                 p.value = p.value, p.value.adj = p.value.adj ) )
 60 |         }
 61 |         else
 62 |             stop( "no partitions for testing cross-entropy differences" )
 63 |     }
 64 |     else if ( base.test == "rank" )
 65 |     {
 66 |         if ( partition.n == 2 )
 67 |         {
 68 |             wilcox.test.res <- wilcox.test( cross.entropy.split[[ 1 ]], 
 69 |                 cross.entropy.split[[ 2 ]] )
 70 |             
 71 |             test.res <- list( rank.single = wilcox.test.res )
 72 |         }
 73 |         else if ( partition.n > 2 )
 74 |         {
 75 |             kruskal.test.res <- kruskal.test( cross.entropy, 
 76 |                 event.partition )
 77 |             
 78 |             dunn.test.res <- dunn.test( cross.entropy, event.partition, 
 79 |                 method = "holm", alpha = fcs.ce.diff.test.alpha, altp = TRUE, 
 80 |                 kw = FALSE, table = FALSE, list = TRUE )
 81 |             
 82 |             test.res <- list( rank.multiple = list( 
 83 |                 kruskal = kruskal.test.res, dunn = dunn.test.res ) )
 84 |         }
 85 |         else
 86 |             stop( "no partitions for testing cross-entropy differences" )
 87 |     }
 88 |     else
 89 |         stop( "wrong base test for cross-entropy differences" )
 90 |     
 91 |     if ( ! is.null( result ) )
 92 |     {
 93 |         result.file <- file( result, "w" )
 94 |         sink( result.file )
 95 |         
 96 |         tr.name <- names( test.res )
 97 |         stopifnot( length( tr.name ) == 1 )
 98 |         
 99 |         if ( tr.name == "ks.single" )
100 |         {
101 |             cat( "\n**  Kolmogorov-Smirnov test\n")
102 |             
103 |             print( test.res$ks.single )
104 |         }
105 |         else if ( tr.name == "ks.multiple" )
106 |         {
107 |             cat( "\n**  Multiple Kolmogorov-Smirnov tests with Holm correction\n\n")
108 |             
109 |             comparison.width <- max( nchar( test.res$ks.multiple$comparison ) )
110 |             
111 |             for ( i in 1 : length( test.res$ks.multiple$comparison ) )
112 |                 cat( sprintf( "%-*s\t\tD = %g\t\tpv = %g\t\tadj-pv = %g\n", 
113 |                     comparison.width, 
114 |                     test.res$ks.multiple$comparison[ i ], 
115 |                     test.res$ks.multiple$D.stat[ i ], 
116 |                     test.res$ks.multiple$p.value[ i ], 
117 |                     test.res$ks.multiple$p.value.adj[ i ] ) )
118 |         }
119 |         else if ( tr.name == "rank.single" )
120 |         {
121 |             cat( "\n**  Wilcoxon rank sum test\n")
122 |             
123 |             print( test.res$rank.single )
124 |         }
125 |         else if ( tr.name == "rank.multiple" )
126 |         {
127 |             cat( "\n**  Kruskal-Wallis rank sum test\n")
128 |             
129 |             print( test.res$rank.multiple$kruskal )
130 |             
131 |             cat( "\n**  Dunn post-hoc test with Holm correction\n\n")
132 |             
133 |             comparison.width <- max( nchar( 
134 |                 test.res$rank.multiple$dunn$comparison ) )
135 |             
136 |             for ( i in 1 : length( test.res$rank.multiple$dunn$comparisons ) )
137 |                 cat( sprintf( "%-*s\t\tZ = %g\t\tpv = %g\t\tadj-pv = %g\n", 
138 |                     comparison.width, 
139 |                     test.res$rank.multiple$dunn$comparisons[ i ], 
140 |                     test.res$rank.multiple$dunn$Z[ i ], 
141 |                     test.res$rank.multiple$dunn$altP[ i ], 
142 |                     test.res$rank.multiple$dunn$altP.adjusted[ i ] ) )
143 |         }
144 |         else
145 |         {
146 |             sink()
147 |             close( result.file )
148 |             stop( "unknown test in ce-diff result" )
149 |         }
150 |         
151 |         sink()
152 |         close( result.file )
153 |     }
154 |     
155 |     if ( ! is.null( cdf.figure ) )
156 |     {
157 |         if ( is.null( partition.label ) )
158 |             partition.label = partition
159 |         
160 |         if ( is.null( partition.color ) )
161 |             partition.color <- rainbow( partition.n )
162 |         
163 |         if ( is.null( partition.line.type ) )
164 |             partition.line.type <- rep( 1, partition.n )
165 |         
166 |         png( filename = cdf.figure, width = fcs.ce.diff.figure.cdf.width, 
167 |             height = fcs.ce.diff.figure.cdf.height )
168 |         
169 |         par( mar = c( 5.5, 6, 2, 1.5 ) )
170 |         
171 |         plot( ecdf( cross.entropy ), ylim = c( 0, 1 ), 
172 |             xlab = "Cross-entropy", ylab = "CDF", main = "", 
173 |             cex.lab = 3, cex.axis = 2.5, 
174 |             col = fcs.ce.diff.figure.cdf.all.color, 
175 |             lwd = fcs.ce.diff.figure.line.width - 1, do.points = FALSE )
176 |         
177 |         for ( pall in partition )
178 |         {
179 |             ces <- cross.entropy.split[[ pall ]]
180 |             ces.n <- length( ces )
181 |             
182 |             if ( ces.n < fcs.ce.diff.figure.cdf.resolution ) {
183 |                 plot( ecdf( ces ), col = partition.color[ pall ], 
184 |                     lty = partition.line.type[ pall ], 
185 |                     lwd = fcs.ce.diff.figure.line.width, 
186 |                     do.points = FALSE, add = TRUE )
187 |             }
188 |             else {
189 |                 ecdf.x <- sort( ces )
190 |                 ecdf.y <- 1 : ces.n / ces.n
191 |                 
192 |                 lines( ecdf.x, ecdf.y, col = partition.color[ pall ], 
193 |                     lty = partition.line.type[ pall ], 
194 |                     lwd = fcs.ce.diff.figure.line.width )
195 |             }
196 |         }
197 |         
198 |         legend( "bottomright", 
199 |             legend = c( fcs.ce.diff.figure.cdf.all.label, partition.label ), 
200 |             col = c( fcs.ce.diff.figure.cdf.all.color, partition.color ), 
201 |             lty = partition.line.type, 
202 |             lwd = fcs.ce.diff.figure.line.width, 
203 |             cex = fcs.ce.diff.figure.font.size )
204 |         
205 |         dev.off()
206 |     }
207 |     
208 |     if ( ! is.null( dendrogram.figure ) && partition.n > 2 )
209 |     {
210 |         cross.entropy.dist <- matrix( 0, nrow = partition.n, 
211 |             ncol = partition.n )
212 |         
213 |         for ( i in 1 : ( partition.n - 1 ) )
214 |             for ( j in (i+1) : partition.n )
215 |             {
216 |                 if ( base.dist == "ks" )
217 |                     cross.entropy.dist[ i, j ] <- ks.test( 
218 |                         cross.entropy.split[[ i ]], 
219 |                         cross.entropy.split[[ j ]] 
220 |                     )$statistic
221 |                 else if ( base.dist == "median" )
222 |                     cross.entropy.dist[ i, j ] <- abs( 
223 |                         median( cross.entropy.split[[ i ]] ) - 
224 |                         median( cross.entropy.split[[ j ]] )
225 |                     )
226 |                 else
227 |                     stop( "wrong base dist for cross-entropy differences" )
228 |                 
229 |                 cross.entropy.dist[ j, i ] <- cross.entropy.dist[ i, j ]
230 |             }
231 |         
232 |         cross.entropy.hclust <- hclust( as.dist( cross.entropy.dist ) )
233 |         
234 |         if ( ! is.null( dendrogram.order.weight ) )
235 |             cross.entropy.hclust <- as.hclust( reorder( 
236 |                 as.dendrogram( cross.entropy.hclust ), 
237 |                 dendrogram.order.weight, 
238 |                 agglo.FUN = mean
239 |             ) )
240 |         
241 |         if ( is.null( partition.label ) )
242 |             partition.label = partition
243 |         
244 |         png( filename = dendrogram.figure, 
245 |             width = fcs.ce.diff.figure.dendrogram.width, 
246 |             height = fcs.ce.diff.figure.dendrogram.height )
247 |         
248 |         par( mar = c( 5, 5.6, 4, 1.4 ) )
249 |         
250 |         plot( cross.entropy.hclust, 
251 |             labels = partition.label, hang = -1, 
252 |             xlab = "", ylab = "", main = "", sub = "", cex.axis = 3, 
253 |             cex = fcs.ce.diff.figure.font.size )
254 |         
255 |         dev.off()
256 |     }
257 |     
258 |     test.res
259 | }
260 | 
261 | 


--------------------------------------------------------------------------------
/CSV-based flow analysis/00_src/ce_diff_test_tsne.r:
--------------------------------------------------------------------------------
  1 | 
  2 | # calculates cross-entropy for tsne plots and calls ce.diff.test
  3 | 
  4 | 
  5 | ce.diff.test.tsne <- function( 
  6 |     orig.data, tsne.data, 
  7 |     event.partition, 
  8 |     partition.label = NULL, partition.color = NULL, partition.line.type = NULL, 
  9 |     base.test = "ks", base.dist = "ks", 
 10 |     prob.sample.n = NULL, dendrogram.order.weight = NULL, 
 11 |     result = NULL, cdf.figure = NULL, dendrogram.figure = NULL 
 12 | )
 13 | {
 14 |     stopifnot( nrow( orig.data ) == nrow( tsne.data ) && 
 15 |         nrow( orig.data ) == length( event.partition ) )
 16 |     
 17 |     data.n <- nrow( orig.data )
 18 |     
 19 |     if ( ! is.null( prob.sample.n ) && prob.sample.n < data.n )
 20 |         prob.sample.idx <- sample( data.n, prob.sample.n )
 21 |     else
 22 |         prob.sample.idx <- 1 : data.n
 23 |     
 24 |     if ( fcs.use.cached.results && 
 25 |         file.exists( fcs.ce.diff.tsne.cache.file.path ) )
 26 |     {
 27 |         cat( "Using cached results for probability\n" )
 28 |         
 29 |         load( fcs.ce.diff.tsne.cache.file.path )
 30 |     }
 31 |     else
 32 |     {
 33 |         cat( "Calculating probability\n" )
 34 |         
 35 |         # sampling here temporary, until optimizing dist( tsne.dat ) below
 36 |         orig.tsne.prob <- calculate.probability.tsne( 
 37 |             orig.data[ prob.sample.idx, ], 
 38 |             tsne.data[ prob.sample.idx, ] 
 39 |         )
 40 |         
 41 |         save( orig.tsne.prob, file = fcs.ce.diff.tsne.cache.file.path )
 42 |     }
 43 |     
 44 |     cross.entropy.all <- calculate.cross.entropy( orig.tsne.prob$orig, 
 45 |         orig.tsne.prob$tsne )
 46 |     
 47 |     event.partition.all <- event.partition[ prob.sample.idx ]
 48 |     
 49 |     ce.diff.test( 
 50 |         cross.entropy.all, 
 51 |         event.partition.all, 
 52 |         partition.label, partition.color, partition.line.type, 
 53 |         base.test, base.dist, 
 54 |         dendrogram.order.weight, 
 55 |         result, cdf.figure, dendrogram.figure
 56 |     )
 57 | }
 58 | 
 59 | 
 60 | calculate.probability.tsne <- function( orig.dat, tsne.dat )
 61 | {
 62 |     orig.dat.n <- nrow( orig.dat )
 63 |     tsne.dat.n <- nrow( tsne.dat )
 64 |     
 65 |     stopifnot( orig.dat.n == tsne.dat.n )
 66 |     
 67 |     # find nearest neighbors in original space and their distances
 68 |     
 69 |     orig.dat.nn2 <- nn2( normalize_input( orig.dat ), 
 70 |         k = fcs.ce.diff.tsne.perplexity.factor * fcs.tsne.perplexity + 1 )
 71 |     
 72 |     orig.dat.self.idx <- sapply( 1 : orig.dat.n, function( ri ) {
 73 |         ri.idx <- which( orig.dat.nn2$nn.idx[ ri, ] == ri )
 74 |         ifelse( length( ri.idx ) == 1, ri.idx, NA )
 75 |     } )
 76 |     
 77 |     stopifnot( ! is.na( orig.dat.self.idx ) )
 78 |     
 79 |     orig.neigh <- t( sapply( 1 : orig.dat.n, function( ri ) 
 80 |         orig.dat.nn2$nn.idx[ ri, - orig.dat.self.idx[ ri ] ] ) )
 81 |     
 82 |     orig.dist2 <- t( sapply( 1 : orig.dat.n, function( ri ) 
 83 |         orig.dat.nn2$nn.dists[ ri, - orig.dat.self.idx[ ri ] ]^2 ) )
 84 |     
 85 |     # calculate probabilities associated to distances in original space
 86 |     
 87 |     orig.stdev <- apply( orig.dist2, 1, function( dd2 ) {
 88 |         tsne.perplexity.error <- function( ss, dd2 ) {
 89 |             p <- exp( - dd2 / (2*ss^2) )
 90 |             if ( sum( p ) < .Machine$double.eps )
 91 |                 p <- 1
 92 |             p <- p / sum( p )
 93 |             p <- p[ p > 0 ]
 94 |             2^( - sum( p * log2( p ) ) ) - fcs.tsne.perplexity
 95 |         }
 96 |         
 97 |         dd2.min.idx <- 1
 98 |         dd2.ascen <- sort( dd2 )
 99 |         while( dd2.ascen[ dd2.min.idx ] == 0 )
100 |             dd2.min.idx <- dd2.min.idx + 1
101 |         ss.lower <- dd2.ascen[ dd2.min.idx ]
102 |         
103 |         dd2.max.idx <- 1
104 |         dd2.descen <- sort( dd2, decreasing = TRUE )
105 |         while( is.infinite( dd2.descen[ dd2.max.idx ] ) )
106 |             dd2.max.idx <- dd2.max.idx + 1
107 |         ss.upper <- dd2.descen[ dd2.max.idx ]
108 |         
109 |         while( tsne.perplexity.error( ss.upper, dd2 ) < 0 )
110 |         {
111 |             ss.lower <- ss.upper
112 |             ss.upper <- 2 * ss.upper
113 |         }
114 |         
115 |         while( tsne.perplexity.error( ss.lower, dd2 ) > 0 )
116 |         {
117 |             ss.upper <- ss.lower
118 |             ss.lower <- ss.lower / 2
119 |         }
120 |         
121 |         uniroot( tsne.perplexity.error, dd2, 
122 |             interval = c( ss.lower, ss.upper ), 
123 |             tol = ( ss.upper - ss.lower ) * .Machine$double.eps^0.25 )$root
124 |     } )
125 |     
126 |     orig.prob <- t( sapply( 1 : orig.dat.n, function( i ) {
127 |         p <- exp( - orig.dist2[ i, ] /  ( 2 * orig.stdev[ i ]^2 ) )
128 |         p / sum( p )
129 |     } ) )
130 |     
131 |     # symmetrize probabilities in original space
132 |     
133 |     for ( i in 1 : orig.dat.n )
134 |         for ( j2 in 1 : length( orig.neigh[ i, ] ) )
135 |         {
136 |             j <- orig.neigh[ i, j2 ]
137 |             
138 |             i2 <- match( i, orig.neigh[ j, ] )
139 |             
140 |             if ( ! is.na( i2 ) )
141 |             {
142 |                 if ( j > i )
143 |                 {
144 |                     sym.prob <- ( orig.prob[ i, j2 ] + orig.prob[ j, i2 ] ) / 2
145 |                     orig.prob[ i, j2 ] <- sym.prob
146 |                     orig.prob[ j, i2 ] <- sym.prob
147 |                 }
148 |             }
149 |             else
150 |                 orig.prob[ i, j2 ] <- orig.prob[ i, j2 ] / 2
151 |         }
152 |     
153 |     orig.prob <- sweep( orig.prob, 1, rowSums( orig.prob ), "/" )
154 |     
155 |     # get distances in tsne space for closest neighbors in original space
156 |     
157 |     tsne.dist2 <- t( sapply( 1 : tsne.dat.n, function( i )
158 |         sapply( orig.neigh[ i, ], function( j )
159 |             sum( ( tsne.dat[ i, ] - tsne.dat[ j, ] )^2 )
160 |         )
161 |     ) )
162 |     
163 |     # calculate probabilities associated to distances in tsne representation
164 |     
165 |     tsne.prob.factor <- tsne.dat.n / 
166 |         ( 2 * sum( 1 / ( 1 + dist( tsne.dat )^2 ) ) )
167 |     
168 |     tsne.prob <- t( apply( tsne.dist2, 1, function( dd2 ) 
169 |         p <- tsne.prob.factor / ( 1 + dd2 )
170 |     ) )
171 |     
172 |     list( orig = orig.prob, tsne = tsne.prob )
173 | }
174 | 
175 | 
176 | calculate.cross.entropy <- function( prim.prob, secd.prob )
177 | {
178 |     prim.prob.n <- nrow( prim.prob )
179 |     secd.prob.n <- nrow( secd.prob )
180 |     
181 |     prim.prob.m <- ncol( prim.prob )
182 |     secd.prob.m <- ncol( secd.prob )
183 |     
184 |     stopifnot( prim.prob.n == secd.prob.n && prim.prob.m == secd.prob.m )
185 |     
186 |     sapply( 1 : prim.prob.n, function( i ) 
187 |         - sum( prim.prob[ i, ] * log( secd.prob[ i, ] ) )
188 |     )
189 | }
190 | 
191 | 


--------------------------------------------------------------------------------
/CSV-based flow analysis/00_src/ce_diff_test_umap.r:
--------------------------------------------------------------------------------
  1 | 
  2 | # calculates cross-entropy for umap plots and calls ce.diff.test
  3 | 
  4 | 
  5 | ce.diff.test.umap <- function( 
  6 |     orig.dist, orig.knn, umap.data, umap.param, 
  7 |     event.partition, 
  8 |     partition.label = NULL, partition.color = NULL, partition.line.type = NULL, 
  9 |     base.test = "ks", base.dist = "ks", 
 10 |     prob.sample.n = NULL, dendrogram.order.weight = NULL, 
 11 |     result = NULL, cdf.figure = NULL, dendrogram.figure = NULL 
 12 | )
 13 | {
 14 |     stopifnot( nrow( orig.dist ) == nrow( umap.data ) && 
 15 |         nrow( orig.dist ) == length( event.partition ) )
 16 |     
 17 |     data.n <- nrow( orig.dist )
 18 |     
 19 |     if ( ! is.null( prob.sample.n ) && prob.sample.n < data.n )
 20 |         prob.sample.idx <- sample( data.n, prob.sample.n )
 21 |     else
 22 |         prob.sample.idx <- 1 : data.n
 23 |     
 24 |     if ( fcs.use.cached.results && 
 25 |             file.exists( fcs.ce.diff.umap.cache.file.path ) )
 26 |     {
 27 |         cat( "Using cached results for probability\n" )
 28 |         
 29 |         load( fcs.ce.diff.umap.cache.file.path )
 30 |     }
 31 |     else
 32 |     {
 33 |         cat( "Calculating probability\n" )
 34 |         
 35 |         orig.umap.prob <- calculate.probability.umap( orig.dist, orig.knn, 
 36 |             umap.data, umap.param )
 37 |         
 38 |         save( orig.umap.prob, file = fcs.ce.diff.umap.cache.file.path )
 39 |     }
 40 |     
 41 |     cross.entropy.all <- calculate.fuzzy.cross.entropy( 
 42 |         orig.umap.prob$orig[ prob.sample.idx, ], 
 43 |         orig.umap.prob$umap[ prob.sample.idx, ] 
 44 |     )
 45 |     
 46 |     event.partition.all <- event.partition[ prob.sample.idx ]
 47 |     
 48 |     ce.diff.test( 
 49 |         cross.entropy.all, 
 50 |         event.partition.all, 
 51 |         partition.label, partition.color, partition.line.type, 
 52 |         base.test, base.dist, 
 53 |         dendrogram.order.weight, 
 54 |         result, cdf.figure, dendrogram.figure
 55 |     )
 56 | }
 57 | 
 58 | 
 59 | calculate.probability.umap <- function( orig.dis, orig.kn, umap.dat, 
 60 |     umap.param )
 61 | {
 62 |     orig.dat.n <- nrow( orig.dis )
 63 |     umap.dat.n <- nrow( umap.dat )
 64 |     
 65 |     stopifnot( orig.dat.n == umap.dat.n )
 66 |     
 67 |     # get nearest neighbors in original space and their distances
 68 |     
 69 |     orig.self.idx <- sapply( 1 : orig.dat.n, function( ri ) {
 70 |         ri.idx <- which( orig.kn[ ri, ] == ri )
 71 |         ifelse( length( ri.idx ) == 1, ri.idx, NA )
 72 |     } )
 73 |     
 74 |     stopifnot( ! is.na( orig.self.idx ) )
 75 |     
 76 |     orig.neigh <- t( sapply( 1 : orig.dat.n, function( ri ) 
 77 |         orig.kn[ ri, - orig.self.idx[ ri ] ] ) )
 78 |     
 79 |     orig.dis.reduc <- t( sapply( 1 : orig.dat.n, function( ri ) 
 80 |         orig.dis[ ri, - orig.self.idx[ ri ] ] ) )
 81 |     
 82 |     # calculate probabilities associated to distances in original space
 83 |     
 84 |     umap.sigma <- apply( orig.dis.reduc, 1, function( dd ) {
 85 |         umap.sigma.error <- function( ss, dd ) {
 86 |             p <- exp( - pmax( 0, dd - min( dd ) ) / ss )
 87 |             sum( p ) - log2( length( p ) )
 88 |         }
 89 |         
 90 |         dd.ascen <- sort( dd )
 91 |         dd.ascen <- dd.ascen[ dd.ascen > 0 ]
 92 |         ss.lower <- dd.ascen[ 1 ]
 93 |         
 94 |         dd.descen <- sort( dd, decreasing = TRUE )
 95 |         dd.descen <- dd.descen[ ! is.infinite( dd.descen ) ]
 96 |         ss.upper <- dd.descen[ 1 ]
 97 |         
 98 |         while( umap.sigma.error( ss.upper, dd ) < 0 )
 99 |         {
100 |             ss.lower <- ss.upper
101 |             ss.upper <- 2 * ss.upper
102 |         }
103 |         
104 |         while( umap.sigma.error( ss.lower, dd ) > 0 )
105 |         {
106 |             ss.upper <- ss.lower
107 |             ss.lower <- ss.lower / 2
108 |         }
109 |         
110 |         uniroot( umap.sigma.error, dd, 
111 |             interval = c( ss.lower, ss.upper ), 
112 |             tol = ( ss.upper - ss.lower ) * .Machine$double.eps^0.25 )$root
113 |     } )
114 |     
115 |     orig.prob <- t( sapply( 1 : orig.dat.n, function( i ) 
116 |         exp( - pmax( 0, orig.dis.reduc[ i, ] - min( orig.dis.reduc[ i, ] ) ) / 
117 |             umap.sigma[ i ] )
118 |     ) )
119 |     
120 |     # symmetrize probabilities in original space
121 |     
122 |     for ( i in 1 : orig.dat.n )
123 |         for ( j2 in 1 : length( orig.neigh[ i, ] ) )
124 |         {
125 |             j <- orig.neigh[ i, j2 ]
126 |             
127 |             i2 <- match( i, orig.neigh[ j, ] )
128 |             
129 |             if ( ! is.na( i2 ) )
130 |             {
131 |                 if ( j > i )
132 |                 {
133 |                     sym.prob <- orig.prob[ i, j2 ] + orig.prob[ j, i2 ] - 
134 |                         orig.prob[ i, j2 ] * orig.prob[ j, i2 ]
135 |                     orig.prob[ i, j2 ] <- sym.prob
136 |                     orig.prob[ j, i2 ] <- sym.prob
137 |                 }
138 |             }
139 |         }
140 |     
141 |     # get distances in umap space for closest neighbors in original space
142 |     
143 |     umap.dist2 <- t( sapply( 1 : umap.dat.n, function( i )
144 |         sapply( orig.neigh[ i, ], function( j )
145 |             sum( ( umap.dat[ i, ] - umap.dat[ j, ] )^2 )
146 |         )
147 |     ) )
148 |     
149 |     # calculate probabilities associated to distances in umap representation
150 |     
151 |     umap.a <- umap.param$a
152 |     umap.b <- umap.param$b
153 |     
154 |     umap.prob <- t( apply( umap.dist2, 1, function( dd2 ) 
155 |         p <- 1 / ( 1 + umap.a * dd2 ^ umap.b )
156 |     ) )
157 |     
158 |     list( orig = orig.prob, umap = umap.prob )
159 | }
160 | 
161 | 
162 | calculate.fuzzy.cross.entropy <- function( prim.prob, secd.prob )
163 | {
164 |     prim.prob.n <- nrow( prim.prob )
165 |     secd.prob.n <- nrow( secd.prob )
166 |     
167 |     prim.prob.m <- ncol( prim.prob )
168 |     secd.prob.m <- ncol( secd.prob )
169 |     
170 |     stopifnot( prim.prob.n == secd.prob.n && prim.prob.m == secd.prob.m )
171 |     
172 |     sapply( 1 : prim.prob.n, function( i ) 
173 |         - sum( prim.prob[ i, ] * log( secd.prob[ i, ] ) + 
174 |             ( 1 - prim.prob[ i, ] ) * log( 1 - secd.prob[ i, ] ) )
175 |     )
176 | }
177 | 
178 | 


--------------------------------------------------------------------------------
/CSV-based flow analysis/00_src/plot_all_dmrd_figures.r:
--------------------------------------------------------------------------------
  1 | 
  2 | # plots all dimensionality reduction figures
  3 | 
  4 | 
  5 | plot.all.dmrd.figures <- function( 
  6 |     redu.data, redu.data.max, 
  7 |     redu.figure.lims.factor, redu.figure.point.size, 
  8 |     redu.figure.dir, redu.figure.plot, 
  9 |     dmrd.data, dmrd.event.cluster, dmrd.event.condition 
 10 | )
 11 | {
 12 |     redu.lims <- redu.figure.lims.factor * redu.data.max * c( -1, 1 )
 13 |     
 14 |     the.dmrd.figure.width <- fcs.dmrd.figure.width + 
 15 |         fcs.dmrd.label.factor.width * 
 16 |             max( nchar( fcs.condition.label ), nchar( flow.sample.label ), 
 17 |                 nchar( fcs.cluster.label ) )
 18 |     
 19 |     the.dmrd.figure.width.multi <- 
 20 |         fcs.dmrd.figure.ncol * fcs.dmrd.figure.width + 
 21 |         fcs.dmrd.label.factor.width * 
 22 |             max( nchar( fcs.condition.label ), nchar( flow.sample.label ), 
 23 |                 nchar( fcs.cluster.label ) )
 24 |     
 25 |     the.dmrd.figure.height <- fcs.dmrd.figure.height
 26 |     
 27 |     the.dmrd.figure.height.multi <- 
 28 |         fcs.dmrd.figure.nrow * fcs.dmrd.figure.height
 29 |     
 30 |     # plot all events colored by cluster
 31 |     
 32 |     redu.plot <- plot.dimensionality.reduction( 
 33 |         redu.data[ , 1 ], redu.data[ , 2 ], 
 34 |         event.partition = dmrd.event.cluster, 
 35 |         partition.label = fcs.cluster.label, 
 36 |         partition.color = fcs.cluster.color, 
 37 |         dmrd.lims = redu.lims, 
 38 |         point.size = redu.figure.point.size, 
 39 |         show.guide = TRUE 
 40 |     )
 41 |     
 42 |     ggsave( 
 43 |         file.path( redu.figure.dir, 
 44 |             sprintf( "%s_all_events__cluster.png", redu.figure.plot ) ), 
 45 |         redu.plot, 
 46 |         width = the.dmrd.figure.width, height = the.dmrd.figure.height 
 47 |     )
 48 |     
 49 |     # plot all events colored by condition
 50 |     
 51 |     redu.plot <- plot.dimensionality.reduction( 
 52 |         redu.data[ , 1 ], redu.data[ , 2 ], 
 53 |         event.partition = dmrd.event.condition, 
 54 |         partition.label = fcs.condition.label, 
 55 |         partition.color = adjustcolor( fcs.condition.color, 
 56 |             alpha.f = fcs.dmrd.color.alpha ), 
 57 |         dmrd.lims = redu.lims, 
 58 |         point.size = redu.figure.point.size, 
 59 |         show.guide = TRUE 
 60 |     )
 61 |     
 62 |     ggsave( 
 63 |         file.path( redu.figure.dir, 
 64 |             sprintf( "%s_all_events__condition.png", redu.figure.plot ) ), 
 65 |         redu.plot, 
 66 |         width = the.dmrd.figure.width, height = the.dmrd.figure.height 
 67 |     )
 68 |     
 69 |     # plot all events colored by each marker level
 70 |     
 71 |     for ( fch in fcs.channel )
 72 |     {
 73 |         dmrd.event.level <- dmrd.data[ , fch ]
 74 |         
 75 |         redu.plot <- plot.dimensionality.reduction( 
 76 |             redu.data[ , 1 ], redu.data[ , 2 ], 
 77 |             event.level = dmrd.event.level, 
 78 |             dmrd.lims = redu.lims, 
 79 |             point.size = redu.figure.point.size, 
 80 |             show.guide = TRUE, guide.name = fcs.channel.label[ fch ] 
 81 |         )
 82 |         
 83 |         ggsave( 
 84 |             file.path( redu.figure.dir, 
 85 |                 sprintf( "%s_all_events__%s.png", redu.figure.plot, 
 86 |                     fcs.channel.label[ fch ] ) ), 
 87 |             redu.plot, 
 88 |             width = the.dmrd.figure.width, height = the.dmrd.figure.height 
 89 |         )
 90 |     }
 91 |     
 92 |     # plot all conditions colored by cluster
 93 |     
 94 |     redu.plot <- plot.dimensionality.reduction( 
 95 |         redu.data[ , 1 ], redu.data[ , 2 ], 
 96 |         event.group = dmrd.event.condition, 
 97 |         group.label = fcs.condition.label, 
 98 |         event.partition = dmrd.event.cluster, 
 99 |         partition.label = fcs.cluster.label, 
100 |         partition.color = fcs.cluster.color, 
101 |         dmrd.lims = redu.lims, 
102 |         dmrd.nrow = fcs.dmrd.figure.nrow, dmrd.ncol = fcs.dmrd.figure.ncol, 
103 |         point.size = redu.figure.point.size, 
104 |         show.guide = TRUE 
105 |     )
106 |     
107 |     ggsave( 
108 |         file.path( redu.figure.dir, 
109 |             sprintf( "%s_all_conditions__cluster.png", redu.figure.plot ) ), 
110 |         redu.plot, 
111 |         width = the.dmrd.figure.width.multi, 
112 |         height = the.dmrd.figure.height.multi 
113 |     )
114 |     
115 |     # plot all conditions colored by each marker level
116 |     
117 |     for ( fch in fcs.channel )
118 |     {
119 |         dmrd.event.level <- dmrd.data[ , fch ]
120 |         
121 |         redu.plot <- plot.dimensionality.reduction( 
122 |             redu.data[ , 1 ], redu.data[ , 2 ], 
123 |             event.group = dmrd.event.condition, 
124 |             group.label = fcs.condition.label, 
125 |             event.level = dmrd.event.level, 
126 |             dmrd.lims = redu.lims, 
127 |             dmrd.nrow = fcs.dmrd.figure.nrow, dmrd.ncol = fcs.dmrd.figure.ncol, 
128 |             point.size = redu.figure.point.size, 
129 |             show.guide = TRUE, guide.name = fcs.channel.label[ fch ] 
130 |         )
131 |         
132 |         ggsave( 
133 |             file.path( redu.figure.dir, 
134 |                 sprintf( "%s_all_conditions__%s.png", redu.figure.plot, 
135 |                     fcs.channel.label[ fch ] ) ), 
136 |             redu.plot, 
137 |             width = the.dmrd.figure.width.multi, 
138 |             height = the.dmrd.figure.height.multi 
139 |         )
140 |     }
141 |     
142 |     # plot each condition colored by cluster
143 |     
144 |     for ( cond in fcs.condition )
145 |     {
146 |         dmrd.cond.idx <- which( dmrd.event.condition == cond )
147 |         
148 |         redu.cond.data <- redu.data[ dmrd.cond.idx, ]
149 |         dmrd.cond.event.cluster <- dmrd.event.cluster[ dmrd.cond.idx ]
150 |         
151 |         redu.plot <- plot.dimensionality.reduction( 
152 |             redu.cond.data[ , 1 ], redu.cond.data[ , 2 ], 
153 |             event.partition = dmrd.cond.event.cluster, 
154 |             partition.label = fcs.cluster.label, 
155 |             partition.color = fcs.cluster.color, 
156 |             dmrd.lims = redu.lims, 
157 |             point.size = redu.figure.point.size, 
158 |             show.guide = TRUE 
159 |         )
160 |         
161 |         ggsave( 
162 |             file.path( redu.figure.dir, 
163 |                 sprintf( "%s_%s__cluster.png", redu.figure.plot, 
164 |                     fcs.condition.label[ cond ] ) ), 
165 |             redu.plot, 
166 |             width = the.dmrd.figure.width, height = the.dmrd.figure.height 
167 |         )
168 |     }
169 |     
170 |     # plot each condition colored by sample
171 |     
172 |     for ( cond in fcs.condition )
173 |     {
174 |         dmrd.cond.idx <- which( dmrd.event.condition == cond )
175 |         
176 |         redu.cond.data <- redu.data[ dmrd.cond.idx, ]
177 |         dmrd.cond.event.sample <- dmrd.event.sample[ dmrd.cond.idx ]
178 |         
179 |         redu.plot <- plot.dimensionality.reduction( 
180 |             redu.cond.data[ , 1 ], redu.cond.data[ , 2 ], 
181 |             event.partition = dmrd.cond.event.sample, 
182 |             partition.label = flow.sample.label, 
183 |             partition.color = adjustcolor( flow.sample.color.single, 
184 |                 alpha.f = fcs.dmrd.color.alpha ), 
185 |             dmrd.lims = redu.lims, 
186 |             point.size = redu.figure.point.size, 
187 |             show.guide = TRUE 
188 |         )
189 |         
190 |         ggsave( 
191 |             file.path( redu.figure.dir, 
192 |                 sprintf( "%s_%s__sample.png", redu.figure.plot, 
193 |                     fcs.condition.label[ cond ] ) ), 
194 |             redu.plot, 
195 |             width = the.dmrd.figure.width, height = the.dmrd.figure.height 
196 |         )
197 |     }
198 | }
199 | 
200 | 


--------------------------------------------------------------------------------
/CSV-based flow analysis/00_src/plot_all_embedsom_figures.R:
--------------------------------------------------------------------------------
  1 | 
  2 | # plots all dimensionality reduction figures
  3 | 
  4 | 
  5 | plot.all.embedsom.figures <- function( 
  6 |     redu.data, redu.data.max, 
  7 |     redu.figure.lims.factor, redu.figure.point.size, 
  8 |     redu.figure.dir, redu.figure.plot, 
  9 |     dmrd.data, dmrd.event.cluster, dmrd.event.condition 
 10 | )
 11 | {
 12 |     redu.lims <- redu.figure.lims.factor * redu.data.max * c( 0, 1)
 13 |     
 14 |     the.dmrd.figure.width <- fcs.dmrd.figure.width + 
 15 |         fcs.dmrd.label.factor.width * 
 16 |             max( nchar( fcs.condition.label ), nchar( flow.sample.label ), 
 17 |                 nchar( fcs.cluster.label ) )
 18 |     
 19 |     the.dmrd.figure.width.multi <- 
 20 |         fcs.dmrd.figure.ncol * fcs.dmrd.figure.width + 
 21 |         fcs.dmrd.label.factor.width * 
 22 |             max( nchar( fcs.condition.label ), nchar( flow.sample.label ), 
 23 |                 nchar( fcs.cluster.label ) )
 24 |     
 25 |     the.dmrd.figure.height <- fcs.dmrd.figure.height
 26 |     
 27 |     the.dmrd.figure.height.multi <- 
 28 |         fcs.dmrd.figure.nrow * fcs.dmrd.figure.height
 29 |     
 30 |     # plot all events colored by cluster
 31 |     
 32 |     redu.plot <- plot.dimensionality.reduction( 
 33 |         redu.data[ , 1 ], redu.data[ , 2 ], 
 34 |         event.partition = dmrd.event.cluster, 
 35 |         partition.label = fcs.cluster.label, 
 36 |         partition.color = adjustcolor( fcs.cluster.color, 
 37 |                                        alpha.f = fcs.dmrd.color.alpha ),
 38 |         dmrd.lims = redu.lims, 
 39 |         point.size = redu.figure.point.size, 
 40 |         show.guide = TRUE 
 41 |     )
 42 |     
 43 |     ggsave( 
 44 |         file.path( redu.figure.dir, 
 45 |             sprintf( "%s_all_events__cluster.png", redu.figure.plot ) ), 
 46 |         redu.plot, 
 47 |         width = the.dmrd.figure.width, height = the.dmrd.figure.height 
 48 |     )
 49 |     
 50 |     # plot all events colored by condition
 51 |     
 52 |     redu.plot <- plot.dimensionality.reduction( 
 53 |         redu.data[ , 1 ], redu.data[ , 2 ], 
 54 |         event.partition = dmrd.event.condition, 
 55 |         partition.label = fcs.condition.label, 
 56 |         partition.color = adjustcolor( fcs.condition.color, 
 57 |             alpha.f = fcs.dmrd.color.alpha ), 
 58 |         dmrd.lims = redu.lims, 
 59 |         point.size = redu.figure.point.size, 
 60 |         show.guide = TRUE 
 61 |     )
 62 |     
 63 |     ggsave( 
 64 |         file.path( redu.figure.dir, 
 65 |             sprintf( "%s_all_events__condition.png", redu.figure.plot ) ), 
 66 |         redu.plot, 
 67 |         width = the.dmrd.figure.width, height = the.dmrd.figure.height 
 68 |     )
 69 |     
 70 |     # plot all events colored by each marker level
 71 |     
 72 |     for ( fch in fcs.channel )
 73 |     {
 74 |         dmrd.event.level <- dmrd.data[ , fch ]
 75 |         
 76 |         redu.plot <- plot.dimensionality.reduction( 
 77 |             redu.data[ , 1 ], redu.data[ , 2 ], 
 78 |             event.level = dmrd.event.level, 
 79 |             dmrd.lims = redu.lims, 
 80 |             point.size = redu.figure.point.size, 
 81 |             show.guide = TRUE, guide.name = fcs.channel.label[ fch ] 
 82 |         )
 83 |         
 84 |         ggsave( 
 85 |             file.path( redu.figure.dir, 
 86 |                 sprintf( "%s_all_events__%s.png", redu.figure.plot, 
 87 |                     fcs.channel.label[ fch ] ) ), 
 88 |             redu.plot, 
 89 |             width = the.dmrd.figure.width, height = the.dmrd.figure.height 
 90 |         )
 91 |     }
 92 |     
 93 |     # plot all conditions colored by cluster
 94 |     
 95 |     redu.plot <- plot.dimensionality.reduction( 
 96 |         redu.data[ , 1 ], redu.data[ , 2 ], 
 97 |         event.group = dmrd.event.condition, 
 98 |         group.label = fcs.condition.label, 
 99 |         event.partition = dmrd.event.cluster, 
100 |         partition.label = fcs.cluster.label, 
101 |         partition.color = fcs.cluster.color, 
102 |         dmrd.lims = redu.lims, 
103 |         dmrd.nrow = fcs.dmrd.figure.nrow, dmrd.ncol = fcs.dmrd.figure.ncol, 
104 |         point.size = redu.figure.point.size, 
105 |         show.guide = TRUE 
106 |     )
107 |     
108 |     ggsave( 
109 |         file.path( redu.figure.dir, 
110 |             sprintf( "%s_all_conditions__cluster.png", redu.figure.plot ) ), 
111 |         redu.plot, 
112 |         width = the.dmrd.figure.width.multi, 
113 |         height = the.dmrd.figure.height.multi 
114 |     )
115 |     
116 |     # plot all conditions colored by each marker level
117 |     
118 |     for ( fch in fcs.channel )
119 |     {
120 |         dmrd.event.level <- dmrd.data[ , fch ]
121 |         
122 |         redu.plot <- plot.dimensionality.reduction( 
123 |             redu.data[ , 1 ], redu.data[ , 2 ], 
124 |             event.group = dmrd.event.condition, 
125 |             group.label = fcs.condition.label, 
126 |             event.level = dmrd.event.level, 
127 |             dmrd.lims = redu.lims, 
128 |             dmrd.nrow = fcs.dmrd.figure.nrow, dmrd.ncol = fcs.dmrd.figure.ncol, 
129 |             point.size = redu.figure.point.size, 
130 |             show.guide = TRUE, guide.name = fcs.channel.label[ fch ] 
131 |         )
132 |         
133 |         ggsave( 
134 |             file.path( redu.figure.dir, 
135 |                 sprintf( "%s_all_conditions__%s.png", redu.figure.plot, 
136 |                     fcs.channel.label[ fch ] ) ), 
137 |             redu.plot, 
138 |             width = the.dmrd.figure.width.multi, 
139 |             height = the.dmrd.figure.height.multi 
140 |         )
141 |     }
142 |     
143 |     # plot each condition colored by cluster
144 |     
145 |     for ( cond in fcs.condition )
146 |     {
147 |         dmrd.cond.idx <- which( dmrd.event.condition == cond )
148 |         
149 |         redu.cond.data <- redu.data[ dmrd.cond.idx, ]
150 |         dmrd.cond.event.cluster <- dmrd.event.cluster[ dmrd.cond.idx ]
151 |         
152 |         redu.plot <- plot.dimensionality.reduction( 
153 |             redu.cond.data[ , 1 ], redu.cond.data[ , 2 ], 
154 |             event.partition = dmrd.cond.event.cluster, 
155 |             partition.label = fcs.cluster.label, 
156 |             partition.color = fcs.cluster.color, 
157 |             dmrd.lims = redu.lims, 
158 |             point.size = redu.figure.point.size, 
159 |             show.guide = TRUE 
160 |         )
161 |         
162 |         ggsave( 
163 |             file.path( redu.figure.dir, 
164 |                 sprintf( "%s_%s__cluster.png", redu.figure.plot, 
165 |                     fcs.condition.label[ cond ] ) ), 
166 |             redu.plot, 
167 |             width = the.dmrd.figure.width, height = the.dmrd.figure.height 
168 |         )
169 |     }
170 |     
171 |     # plot each condition colored by sample
172 |     
173 |     for ( cond in fcs.condition )
174 |     {
175 |         dmrd.cond.idx <- which( dmrd.event.condition == cond )
176 |         
177 |         redu.cond.data <- redu.data[ dmrd.cond.idx, ]
178 |         dmrd.cond.event.sample <- dmrd.event.sample[ dmrd.cond.idx ]
179 |         
180 |         redu.plot <- plot.dimensionality.reduction( 
181 |             redu.cond.data[ , 1 ], redu.cond.data[ , 2 ], 
182 |             event.partition = dmrd.cond.event.sample, 
183 |             partition.label = flow.sample.label, 
184 |             partition.color = adjustcolor( flow.sample.color.single, 
185 |                 alpha.f = fcs.dmrd.color.alpha ), 
186 |             dmrd.lims = redu.lims, 
187 |             point.size = redu.figure.point.size, 
188 |             show.guide = TRUE 
189 |         )
190 |         
191 |         ggsave( 
192 |             file.path( redu.figure.dir, 
193 |                 sprintf( "%s_%s__sample.png", redu.figure.plot, 
194 |                     fcs.condition.label[ cond ] ) ), 
195 |             redu.plot, 
196 |             width = the.dmrd.figure.width, height = the.dmrd.figure.height 
197 |         )
198 |     }
199 | }
200 | 
201 | 


--------------------------------------------------------------------------------
/CSV-based flow analysis/00_src/plot_dimensionality_reduction.r:
--------------------------------------------------------------------------------
  1 | 
  2 | # plots one dimensionality reduction figure
  3 | 
  4 | 
  5 | plot.dimensionality.reduction <- function( 
  6 |     dmrd.x, dmrd.y, 
  7 |     event.group = NULL, group.label = NULL, 
  8 |     event.partition = NULL, partition.label = NULL, partition.color = NULL, 
  9 |     event.level = NULL, 
 10 |     dmrd.lims = NULL, dmrd.nrow = NULL, dmrd.ncol = NULL, 
 11 |     point.size = NULL, show.guide = FALSE, guide.name = NULL 
 12 | )
 13 | {
 14 |     if ( ! is.null( event.partition ) )
 15 |     {
 16 |         if ( is.null( event.group ) )
 17 |             ggdf <- data.frame( dmrd.x, dmrd.y, event.partition )
 18 |         else
 19 |             ggdf <- data.frame( dmrd.x, dmrd.y, event.partition, event.group )
 20 |         
 21 |         if ( is.null( partition.label ) )
 22 |             plot.label <- waiver()
 23 |         else
 24 |             plot.label <- partition.label
 25 |         
 26 |         if ( show.guide )
 27 |             plot.guide <- guide_legend( keyheight = 0.8, 
 28 |                 override.aes = list( size = fcs.dmrd.legend.point.size ), 
 29 |                 label.theme = element_text( size = fcs.dmrd.legend.label.size ), 
 30 |                 title = guide.name, title.position = "top", 
 31 |                 title.theme = element_text( size = fcs.dmrd.legend.title.size ) )
 32 |         else
 33 |             plot.guide <- FALSE
 34 |         
 35 |         dmrd.plot <- ggplot( ggdf, aes( x = dmrd.x, 
 36 |                 y = dmrd.y, color = event.partition ) ) + 
 37 |             scale_color_manual( values = partition.color, labels = plot.label, 
 38 |                 guide = plot.guide )
 39 |     }
 40 |     else if ( ! is.null( event.level ) )
 41 |     {
 42 |         if ( is.null( event.group ) )
 43 |             ggdf <- data.frame( dmrd.x, dmrd.y, event.level )
 44 |         else
 45 |             ggdf <- data.frame( dmrd.x, dmrd.y, event.level, event.group )
 46 |         
 47 |         if ( show.guide )
 48 |             plot.guide <- guide_colorbar( barwidth = 0.8, barheight = 10, 
 49 |                 title = guide.name, title.position = "top", 
 50 |                 title.theme = element_text( size = fcs.dmrd.legend.title.size ) )
 51 |         else
 52 |             plot.guide <- FALSE
 53 |         
 54 |         dmrd.plot <- ggplot( ggdf, aes( x = dmrd.x, y = dmrd.y, 
 55 |                 color = event.level ) ) + 
 56 |             scale_color_gradientn( colors = fcs.dmrd.density.palette, 
 57 |                 labels = NULL, guide = plot.guide )
 58 |     }
 59 |     else
 60 |     {
 61 |         if ( is.null( event.group ) )
 62 |             ggdf <- data.frame( dmrd.x, dmrd.y )
 63 |         else
 64 |             ggdf <- data.frame( dmrd.x, dmrd.y, event.group )
 65 |         
 66 |         dmrd.plot <- ggplot( ggdf, aes( x = dmrd.x, y = dmrd.y ) )
 67 |     }
 68 |     
 69 |     if ( is.null( dmrd.lims ) ) {
 70 |         dmrd.xy.max <- max( abs( c( dmrd.x, dmrd.y ) ) )
 71 |         dmrd.lims <- dmrd.xy.max * c( -1, 1 )
 72 |     }
 73 |     
 74 |     if ( is.null( point.size ) )
 75 |         point.size <- 0.5
 76 |     
 77 |     dmrd.plot <- dmrd.plot + 
 78 |         coord_fixed() + 
 79 |         lims( x = dmrd.lims, y = dmrd.lims ) + 
 80 |         geom_point( shape = 20, stroke = 0, size = point.size ) + 
 81 |         theme_bw() + 
 82 |         theme( axis.title = element_blank(), 
 83 |             axis.text  = element_blank(), 
 84 |             axis.ticks = element_blank(), 
 85 |             panel.grid.major = element_blank(), 
 86 |             panel.grid.minor = element_blank() )
 87 |     
 88 |     if ( ! is.null( event.group ) )
 89 |     {
 90 |         dmrd.plot <- dmrd.plot + 
 91 |             facet_wrap( vars( event.group ), 
 92 |                 labeller = as_labeller( group.label ), 
 93 |                 nrow = dmrd.nrow, ncol = dmrd.ncol ) + 
 94 |             theme( strip.background = element_rect( fill = "white" ), 
 95 |                 strip.text = element_text( size = fcs.dmrd.group.title.size ) )
 96 |     }
 97 |     
 98 |     dmrd.plot
 99 | }
100 | 
101 | 


--------------------------------------------------------------------------------
/CSV-based flow analysis/00_src/plot_dimensionality_reduction_embedSOM.R:
--------------------------------------------------------------------------------
  1 | 
  2 | # plots one dimensionality reduction figure
  3 | 
  4 | 
  5 | plot.dimensionality.reduction.embedsom <- function( 
  6 |     dmrd.x, dmrd.y, 
  7 |     event.group = NULL, group.label = NULL, 
  8 |     event.partition = NULL, partition.label = NULL, partition.color = NULL, 
  9 |     event.level = NULL, 
 10 |     dmrd.lims = NULL, dmrd.nrow = NULL, dmrd.ncol = NULL, 
 11 |     point.size = NULL, show.guide = FALSE, guide.name = NULL 
 12 | )
 13 | {
 14 |     if ( ! is.null( event.partition ) )
 15 |     {
 16 |         if ( is.null( event.group ) )
 17 |             ggdf <- data.frame( dmrd.x, dmrd.y, event.partition )
 18 |         else
 19 |             ggdf <- data.frame( dmrd.x, dmrd.y, event.partition, event.group )
 20 |         
 21 |         if ( is.null( partition.label ) )
 22 |             plot.label <- waiver()
 23 |         else
 24 |             plot.label <- partition.label
 25 |         
 26 |         if ( show.guide )
 27 |             plot.guide <- guide_legend( keyheight = 0.8, 
 28 |                 override.aes = list( size = fcs.dmrd.legend.point.size ), 
 29 |                 label.theme = element_text( size = fcs.dmrd.legend.label.size ), 
 30 |                 title = guide.name, title.position = "top", 
 31 |                 title.theme = element_text( size = fcs.dmrd.legend.title.size ) )
 32 |         else
 33 |             plot.guide <- FALSE
 34 |         
 35 |         dmrd.plot <- ggplot( ggdf, aes( x = dmrd.x, 
 36 |                 y = dmrd.y, color = event.partition ) ) + 
 37 |             scale_color_manual( values = partition.color, labels = plot.label, 
 38 |                 guide = plot.guide )
 39 |     }
 40 |     else if ( ! is.null( event.level ) )
 41 |     {
 42 |         if ( is.null( event.group ) )
 43 |             ggdf <- data.frame( dmrd.x, dmrd.y, event.level )
 44 |         else
 45 |             ggdf <- data.frame( dmrd.x, dmrd.y, event.level, event.group )
 46 |         
 47 |         if ( show.guide )
 48 |             plot.guide <- guide_colorbar( barwidth = 0.8, barheight = 10, 
 49 |                 title = guide.name, title.position = "top", 
 50 |                 title.theme = element_text( size = fcs.dmrd.legend.title.size ) )
 51 |         else
 52 |             plot.guide <- FALSE
 53 |         
 54 |         dmrd.plot <- ggplot( ggdf, aes( x = dmrd.x, y = dmrd.y, 
 55 |                 color = event.level ) ) + 
 56 |             scale_color_gradientn( colors = fcs.dmrd.density.palette, 
 57 |                 labels = NULL, guide = plot.guide )
 58 |     }
 59 |     else
 60 |     {
 61 |         if ( is.null( event.group ) )
 62 |             ggdf <- data.frame( dmrd.x, dmrd.y )
 63 |         else
 64 |             ggdf <- data.frame( dmrd.x, dmrd.y, event.group )
 65 |         
 66 |         dmrd.plot <- ggplot( ggdf, aes( x = dmrd.x, y = dmrd.y ) )
 67 |     }
 68 |     
 69 |     if ( is.null( dmrd.lims ) ) {
 70 |         dmrd.xy.max <- max( abs( c( dmrd.x, dmrd.y ) ) )
 71 |         dmrd.lims <- dmrd.xy.max * c( -1, 1 )
 72 |     }
 73 |     
 74 |     if ( is.null( point.size ) )
 75 |         point.size <- 0.5
 76 |     
 77 |     dmrd.plot <- dmrd.plot + 
 78 |         coord_fixed() + 
 79 |         lims( x = dmrd.lims, y = dmrd.lims ) + 
 80 |         geom_point( shape = 20, stroke = 0, size = point.size, alpha = fcs.dmrd.color.alpha ) + 
 81 |         theme_bw() + 
 82 |         theme( axis.title = element_blank(), 
 83 |             axis.text  = element_blank(), 
 84 |             axis.ticks = element_blank(), 
 85 |             panel.grid.major = element_blank(), 
 86 |             panel.grid.minor = element_blank() )
 87 |     
 88 |     if ( ! is.null( event.group ) )
 89 |     {
 90 |         dmrd.plot <- dmrd.plot + 
 91 |             facet_wrap( vars( event.group ), 
 92 |                 labeller = as_labeller( group.label ), 
 93 |                 nrow = dmrd.nrow, ncol = dmrd.ncol ) + 
 94 |             theme( strip.background = element_rect( fill = "white" ), 
 95 |                 strip.text = element_text( size = fcs.dmrd.group.title.size ) )
 96 |     }
 97 |     
 98 |     dmrd.plot
 99 | }
100 | 
101 | 


--------------------------------------------------------------------------------
/CSV-based flow analysis/Exporting data in csv format.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AdrianListon/Cross-Entropy-test/8652d6a98a7ba9ee1bb81c736e45ce9ad191abc6/CSV-based flow analysis/Exporting data in csv format.PNG


--------------------------------------------------------------------------------
/CSV-based flow analysis/Setting axis in FlowJo for Aurora data.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AdrianListon/Cross-Entropy-test/8652d6a98a7ba9ee1bb81c736e45ce9ad191abc6/CSV-based flow analysis/Setting axis in FlowJo for Aurora data.pptx


--------------------------------------------------------------------------------
/CSV-based flow analysis/analyze_flow_cytometry_csv.R:
--------------------------------------------------------------------------------
  1 | 
  2 | # script for the analysis of flow cytometry data---------------
  3 | 
  4 | library( digest )
  5 | require( dunn.test )
  6 | library( ggplot2 )
  7 | library( ggridges )
  8 | require( RANN )
  9 | library( RColorBrewer )
 10 | library( reshape2 )
 11 | library( Rtsne )
 12 | library( umap )
 13 | library( dplyr )
 14 | library( EmbedSOM )
 15 | 
 16 | # function to set random seed depending on base number and string---------------
 17 | 
 18 | set.seed.here <- function( seed.base, seed.char )
 19 | {
 20 |     seed.add <- strtoi( substr( digest( seed.char, "xxhash32" ), 2, 8 ), 16 )
 21 |     seed.new <- seed.base + seed.add    
 22 |     set.seed( seed.new )
 23 |     invisible( seed.new )
 24 | }
 25 | 
 26 | # source parameters---------------
 27 | 
 28 | param.filename <- "./analyze_flow_cytometry_parameter_csv.r"
 29 | 
 30 | source( param.filename )
 31 | 
 32 | # select channels for analysis---------------
 33 | 
 34 | channel.selection.data <- read.csv( list.files(fcs.data.dir, "\\.csv$", full.names = TRUE)[1] )
 35 | channels <- data.frame( desc = unname( colnames( channel.selection.data ) ))
 36 | for( i in 1:dim(channels)[1] ){
 37 |     channels$out[i] = paste0("\"", channels$desc[i], "\" = \"", channels$desc[i], "\",\n") 
 38 | }
 39 | descs.filtered <- channels$desc[!is.na(channels$desc) & channels$desc != '-'] 
 40 | channels.filtered <- filter(channels, desc %in% descs.filtered)
 41 | 
 42 | # paste the output below into fcs.channel in the parameter file, deleting unwanted channels
 43 | cat('"', paste0(channels.filtered$desc, collapse = '","'), '"', sep = "")
 44 | 
 45 | source( param.filename )
 46 | 
 47 | # add the output to fcs.channel.label in the parameter file
 48 | temp <- channels$out[channels$desc %in% fcs.channel]
 49 | temp[length(temp)] <- gsub(',', '', temp[length(temp)])
 50 | cat(paste(temp, collapse = ""))
 51 | 
 52 | source( param.filename )
 53 | 
 54 | 
 55 | # check consistency in parameters---------------
 56 | 
 57 | stopifnot( names( fcs.channel.label ) == fcs.channel )
 58 | 
 59 | stopifnot( names( fcs.condition.label ) == fcs.condition )
 60 | stopifnot( names( fcs.condition.color ) == fcs.condition )
 61 | stopifnot( names( fcs.condition.line.type ) == fcs.condition )
 62 | 
 63 | stopifnot( names( fcs.cluster.label ) == fcs.cluster )
 64 | stopifnot( names( fcs.cluster.color ) == fcs.cluster )
 65 | stopifnot( names( fcs.cluster.line.type ) == fcs.cluster )
 66 | 
 67 | stopifnot( sum( is.null( fcs.dmrd.data.sample.n ), 
 68 |     is.null( fcs.dmrd.data.sample.n.per.condition ), 
 69 |     is.null( fcs.dmrd.data.sample.n.per.sample ) ) >= 2 )
 70 | 
 71 | stopifnot( unlist( fcs.cluster.group ) >= 1 & 
 72 |     unlist( fcs.cluster.group ) <= fcs.cluster.n )
 73 | 
 74 | 
 75 | # source functions---------------
 76 | 
 77 | source( file.path( fcs.src.dir, "ce_diff_test.r" ) )
 78 | source( file.path( fcs.src.dir, "ce_diff_test_tsne.r" ) )
 79 | source( file.path( fcs.src.dir, "ce_diff_test_umap.r" ) )
 80 | source( file.path( fcs.src.dir, "plot_all_dmrd_figures.r" ) )
 81 | source( file.path( fcs.src.dir, "plot_all_embedsom_figures.r" ) )
 82 | source( file.path( fcs.src.dir, "plot_dimensionality_reduction.r" ) )
 83 | source( file.path( fcs.src.dir, "plot_dimensionality_reduction_embedSOM.r" ) )
 84 | 
 85 | 
 86 | # create dirs---------------
 87 | 
 88 | figure.dir <- c( 
 89 |     fcs.ce.diff.tsne.figure.dir, 
 90 |     fcs.ce.diff.umap.figure.dir, 
 91 |     fcs.density.figure.dir, 
 92 |     fcs.heatmap.figure.dir, 
 93 |     fcs.histogram.figure.dir, 
 94 |     fcs.tsne.figure.dir, 
 95 |     fcs.umap.figure.dir,
 96 |     fcs.embedsom.figure.dir,
 97 |     fcs.ce.diff.embedsom.figure.dir
 98 | )
 99 | 
100 | table.dir <- fcs.cluster.table.dir
101 | 
102 | data.dir <- sapply( names( fcs.cluster.group ), function( fcg.name ) 
103 |     sprintf( "%s/%s_%s", fcs.cluster.data.dir, fcs.cluster.data, fcg.name ) )
104 | 
105 | for ( the.dir in c( figure.dir, table.dir, data.dir ) )
106 |     if ( ! file.exists( the.dir ) )
107 |         dir.create( the.dir, recursive = TRUE )
108 | 
109 | 
110 | # read csv data---------------
111 | 
112 | flow.data.filename.all <- list.files( fcs.data.dir, "\\.csv$" )
113 | 
114 | flow.data.filename <- grep( paste0( fcs.condition, collapse = "|" ), 
115 |     flow.data.filename.all, value = TRUE )
116 | 
117 | sample.name.format <- paste0( "%s.%0", fcs.sample.number.width, "d" )
118 | event.name.format <- paste0( "%s.%0", fcs.event.number.width, "d" )
119 | 
120 | flow.data.filename.sample <- rep( "", length( flow.data.filename ) )
121 | names( flow.data.filename.sample ) <- flow.data.filename
122 | 
123 | sample.idx.next <- rep( 1, fcs.condition.n )
124 | names( sample.idx.next ) <- fcs.condition
125 | 
126 | flow.data <- lapply( flow.data.filename, function( flow.data.fn ) {
127 |  #   cat( flow.data.fn, "\n" )
128 |     sample.data <- as.matrix(read.csv( file.path( fcs.data.dir, flow.data.fn ) ))
129 |     
130 |     condition <- fcs.condition[ sapply( fcs.condition, grepl, flow.data.fn ) ]
131 |     stopifnot( length( condition ) == 1 )
132 |     
133 |     if ( ! all( fcs.channel %in% colnames( sample.data ) ) )
134 |     {
135 |         cat( sprintf( "File: %s\n", flow.data.fn ) )
136 |         print( sort( fcs.channel[ 
137 |             ! fcs.channel %in% colnames( sample.data ) ] ) )
138 |         print( sort( colnames( sample.data ) ) )
139 |         stop( "mismatch in names of channels" )
140 |     }
141 | 
142 |     sample.name <- sprintf( sample.name.format, condition, 
143 |         sample.idx.next[ condition ] )
144 |     
145 |     sample.data <- sample.data[ , fcs.channel, drop = FALSE ]
146 |     
147 |     event.n <- nrow( sample.data )
148 |     if ( event.n > 0 ) {
149 |         event.name <- sprintf( event.name.format, sample.name, 1 : event.n )
150 |         rownames( sample.data ) <- event.name
151 |     }
152 |     
153 |     flow.data.filename.sample[ flow.data.fn ] <<- sample.name
154 |     sample.idx.next[ condition ] <<- sample.idx.next[ condition ] + 1
155 |     
156 |     sample.data
157 | } )
158 | 
159 | flow.data <- do.call( rbind, flow.data )
160 | 
161 | # define samples---------------
162 | flow.sample <- flow.data.filename.sample
163 | names( flow.sample ) <- NULL
164 | 
165 | stopifnot( flow.sample == 
166 |     unique( sub( "\\.[0-9]+$", "", rownames( flow.data ) ) ) )
167 | 
168 | flow.sample.n <- length ( flow.sample )
169 | 
170 | flow.sample.condition <- factor( sub( "\\.[0-9]+$", "", flow.sample ), 
171 |     levels = fcs.condition )
172 | names( flow.sample.condition ) <- flow.sample
173 | 
174 | # reorder samples to follow order of conditions
175 | flow.sample <- flow.sample[ order( flow.sample.condition ) ]
176 | flow.sample.condition <- flow.sample.condition[ flow.sample ]
177 | 
178 | flow.sample.label <- sapply( flow.sample, function( fs ) {
179 |     sample.cond <- sub( "^(.*)\\.[0-9]+$", "\\1", fs )
180 |     sample.num <- sub( "^.*\\.([0-9]+)$", "\\1", fs )
181 |     sprintf( "%s-%s", fcs.condition.label[ sample.cond ], sample.num )
182 | } )
183 | 
184 | flow.sample.filename <- sapply( flow.sample, function( fs  ) 
185 |     names( which( flow.data.filename.sample == fs ) ) )
186 | 
187 | # define events
188 | flow.event <- rownames( flow.data )
189 | flow.event.n <- length( flow.event )
190 | 
191 | flow.event.sample <- factor( sub( "\\.[0-9]+$", "", flow.event ), 
192 |     levels = flow.sample )
193 | names( flow.event.sample ) <- flow.event
194 | 
195 | flow.event.condition <- factor( sub( "\\.[0-9]+$", "", flow.event.sample ), 
196 |     levels = fcs.condition )
197 | names( flow.event.condition ) <- flow.event
198 | 
199 | # reorder events to follow order of samples
200 | flow.event.order <- order( flow.event.sample )
201 | 
202 | flow.data <- flow.data[ flow.event.order, ]
203 | flow.event <- flow.event[ flow.event.order ]
204 | flow.event.sample <- flow.event.sample[ flow.event.order ]
205 | flow.event.condition <- flow.event.condition[ flow.event.order ]
206 | 
207 | flow.event.sample.n <- as.vector( table( flow.event.sample ) )
208 | names( flow.event.sample.n ) <- flow.sample
209 | 
210 | flow.event.condition.n <- as.vector( table( flow.event.condition ) )
211 | names( flow.event.condition.n ) <- fcs.condition
212 | 
213 | flow.data.filename
214 | 
215 | table( flow.sample.condition )
216 | 
217 | str( flow.data )
218 | flow.event.condition.n
219 | flow.event.sample.n
220 | 
221 | 
222 | # define figure parameters for samples---------------
223 | 
224 | flow.sample.color <- fcs.condition.color[ flow.sample.condition ]
225 | names( flow.sample.color ) <- flow.sample
226 | 
227 | flow.sample.color.single <- unlist( lapply( fcs.condition, function( fc ) {
228 |     cond.sample.n <- sum( flow.sample.condition == fc )
229 |     rep( 
230 |         fcs.color.pool, 
231 |         ceiling( cond.sample.n / fcs.color.pool.n ) 
232 |     )[ 1 : cond.sample.n ]
233 | } ) )
234 | names( flow.sample.color.single ) <- flow.sample
235 | 
236 | flow.sample.line.type <- fcs.condition.line.type[ flow.sample.condition ]
237 | names( flow.sample.line.type ) <- flow.sample
238 | 
239 | flow.sample.line.type.single <- unlist( lapply( fcs.condition, function( fc ) {
240 |     cond.sample.n <- sum( flow.sample.condition == fc )
241 |     rep( 
242 |         fcs.line.type.pool, 
243 |         ceiling( cond.sample.n / fcs.line.type.pool.n ) 
244 |     )[ 1 : cond.sample.n ]
245 | } ) )
246 | names( flow.sample.line.type.single ) <- flow.sample
247 | 
248 | flow.ce.diff.figure.dendrogram.weight.sample <- 
249 |     fcs.ce.diff.figure.dendrogram.weight.condition[ flow.sample.condition ]
250 | names( flow.ce.diff.figure.dendrogram.weight.sample ) <- flow.sample
251 | 
252 | 
253 | # plot density distributions of transformed data---------------
254 | 
255 | set.seed.here( fcs.seed.base, "plot density distributions of transformed data" )
256 | 
257 | {
258 | if ( ! is.null( fcs.density.data.sample.n ) && 
259 |     fcs.density.data.sample.n < flow.event.n )
260 |     density.data.idx <- sort( sample( flow.event.n, 
261 |         fcs.density.data.sample.n ) )
262 | else
263 |     density.data.idx <- 1 : flow.event.n
264 | }
265 | 
266 | density.data <- flow.data[ density.data.idx, ]
267 | 
268 | density.data.ggdf.all <- melt( density.data, 
269 |     varnames = c( "partition", "channel" ), value.name = "density.value" )
270 | density.data.ggdf.all$partition <- fcs.density.partition.all
271 | density.data.ggdf.all$channel <- as.character( density.data.ggdf.all$channel )
272 | 
273 | density.data.ggdf.condition <- lapply( fcs.condition, function( fc ) {
274 |     ggdf.condition <- melt( 
275 |         density.data[ flow.event.condition[ density.data.idx ] == fc, , 
276 |             drop = FALSE], 
277 |         varnames = c( "partition", "channel" ), 
278 |         value.name = "density.value" )
279 |     if ( nrow( ggdf.condition ) > 0 ) {
280 |         ggdf.condition$partition <- fc
281 |         ggdf.condition$channel <- as.character( ggdf.condition$channel )
282 |     }
283 |     ggdf.condition
284 | } )
285 | density.data.ggdf.condition <- do.call( rbind, density.data.ggdf.condition )
286 | 
287 | density.data.ggdf.sample <- lapply( flow.sample, function( fs ) {
288 |     ggdf.sample <- melt( 
289 |         density.data[ flow.event.sample[ density.data.idx ] == fs, , 
290 |             drop = FALSE ], 
291 |         varnames = c( "partition", "channel" ), 
292 |         value.name = "density.value" )
293 |     if ( nrow( ggdf.sample ) > 0 ) {
294 |         ggdf.sample$partition <- fs
295 |         ggdf.sample$channel <- as.character( ggdf.sample$channel )
296 |     }
297 |     ggdf.sample
298 | } )
299 | density.data.ggdf.sample <- do.call( rbind, density.data.ggdf.sample )
300 | 
301 | density.data.ggdf <- rbind( density.data.ggdf.all, density.data.ggdf.condition, 
302 |     density.data.ggdf.sample )
303 | 
304 | density.data.partition <- c( fcs.density.partition.all, fcs.condition, 
305 |     flow.sample )
306 | density.data.partition.n <- length( density.data.partition )
307 | 
308 | density.data.partition.label <- c( fcs.density.partition.all.label, 
309 |     fcs.condition.label, flow.sample.label )
310 | 
311 | density.data.ggdf$partition <- factor( density.data.ggdf$partition, 
312 |     levels = density.data.partition )
313 | 
314 | density.data.ggdf$channel <- factor( density.data.ggdf$channel, 
315 |     levels = fcs.channel )
316 | 
317 | density.plot.color <- c( fcs.density.partition.all.color, fcs.condition.color, 
318 |     flow.sample.color )
319 | 
320 | density.plot <- ggplot( density.data.ggdf, aes( x = density.value, 
321 |         y = partition, color = partition, fill = partition ) ) + 
322 |     geom_density_ridges( size = fcs.density.line.size, 
323 |         alpha = fcs.density.line.alpha, show.legend = FALSE ) + 
324 |     labs( x = NULL, y = NULL ) + 
325 |     scale_y_discrete( limits = rev( density.data.partition ), 
326 |         breaks = rev( density.data.partition ), 
327 |         labels = rev( density.data.partition.label ) ) + 
328 |     scale_color_manual( values = density.plot.color ) + 
329 |     scale_fill_manual( values = density.plot.color ) + 
330 |     facet_wrap( vars( channel ), nrow = 1, labeller = labeller( 
331 |         channel = fcs.channel.label ) ) + 
332 |     theme_ridges( line_size = fcs.density.line.size, 
333 |         font_size = fcs.density.font.size ) + 
334 |     theme( strip.background = element_rect( fill = "white" ) )
335 | 
336 | ggsave( 
337 |     file.path( fcs.density.figure.dir, 
338 |         sprintf( "%s.png", fcs.density.figure.sample ) ), 
339 |     density.plot, 
340 |     width = fcs.density.figure.width.base * ( fcs.channel.n + 1 ), 
341 |     height = fcs.density.figure.height.base * ( density.data.partition.n + 1 ) 
342 | )
343 | 
344 | 
345 | # select data for dimensionality reduction---------------
346 | 
347 | set.seed.here( fcs.seed.base, "select data for dimensionality reduction" )
348 | 
349 | {
350 | if ( ! is.null( fcs.dmrd.data.sample.n ) )
351 | {
352 |     if ( fcs.dmrd.data.sample.n < flow.event.n )
353 |         dmrd.data.idx <- sort( sample( flow.event.n, fcs.dmrd.data.sample.n ) )
354 |     else
355 |         dmrd.data.idx <- 1 : flow.event.n
356 | }
357 | else if ( ! is.null( fcs.dmrd.data.sample.n.per.condition ) )
358 | {
359 |     dmrd.data.idx <- unlist( sapply( fcs.condition, function( fc ) {
360 |         fc.idx <- which( flow.event.condition == fc )
361 |         if ( fcs.dmrd.data.sample.n.per.condition < length( fc.idx ) )
362 |             sort( sample( fc.idx, fcs.dmrd.data.sample.n.per.condition ) )
363 |         else
364 |             fc.idx
365 |     } ) )
366 |     names( dmrd.data.idx ) <- NULL
367 | }
368 | else if ( ! is.null( fcs.dmrd.data.sample.n.per.sample ) )
369 | {
370 |     dmrd.data.idx <- unlist( sapply( flow.sample, function( fs ) {
371 |         fs.idx <- which( flow.event.sample == fs )
372 |         if ( fcs.dmrd.data.sample.n.per.sample < length( fs.idx ) )
373 |             sort( sample( fs.idx, fcs.dmrd.data.sample.n.per.sample ) )
374 |         else
375 |             fs.idx
376 |     } ) )
377 |     names( dmrd.data.idx ) <- NULL
378 | }
379 | else
380 |     dmrd.data.idx <- 1 : flow.event.n
381 | }
382 | 
383 | dmrd.data <- flow.data[ dmrd.data.idx, ]
384 | 
385 | dmrd.event.sample <- flow.event.sample[ dmrd.data.idx ]
386 | dmrd.event.condition <- flow.event.condition[ dmrd.data.idx ]
387 | 
388 | str( dmrd.data )
389 | table( dmrd.event.condition )
390 | table( dmrd.event.sample )
391 | 
392 | 
393 | # get flowsom clusters---------------
394 | 
395 | set.seed.here( fcs.seed.base, "get flowsom clusters" )
396 | 
397 | # build som objects--embedsom method
398 | flow.som <- EmbedSOM::SOM(flow.data, xdim = fcs.flow.som.dim, 
399 |                           ydim = fcs.flow.som.dim, batch = TRUE,
400 |                           parallel = TRUE, threads = fcs.tsne.thread.n )
401 | 
402 | # get clusters
403 | embedsom.cluster <- hclust( dist( flow.som$codes ) )
404 | flow.som.event.cluster <- cutree( embedsom.cluster, fcs.cluster.n )[flow.som$mapping[ , 1] ]
405 | 
406 | # check quality of flowSOM clustering (optional)---------------
407 | embed.som <- EmbedSOM::EmbedSOM(data=flow.data, map=flow.som, 
408 |                                 parallel = TRUE, threads = fcs.tsne.thread.n)
409 | 
410 | # reorder clusters from bigger to smaller---------------
411 | flow.som.cluster.rank <- 1 + fcs.cluster.n - 
412 |     rank( table( flow.som.event.cluster ), ties.method = "last" )
413 | flow.som.event.cluster <- flow.som.cluster.rank[ flow.som.event.cluster ]
414 | names( flow.som.event.cluster ) <- NULL
415 | 
416 | # set clusters as a factor
417 | flow.som.event.cluster <- factor( flow.som.event.cluster, 
418 |     levels = 1 : fcs.cluster.n )
419 | levels( flow.som.event.cluster ) <- fcs.cluster
420 | 
421 | # reorder events
422 | flow.event.cluster <- flow.som.event.cluster
423 | dmrd.event.cluster <- flow.event.cluster[ dmrd.data.idx ]
424 | 
425 | flow.event.cluster.n <- as.vector( table( flow.event.cluster ) )
426 | names( flow.event.cluster.n ) <- fcs.cluster
427 | 
428 | dmrd.event.cluster.n <- as.vector( table( dmrd.event.cluster ) )
429 | names( dmrd.event.cluster.n ) <- fcs.cluster
430 | 
431 | length( flow.event.cluster )
432 | table( flow.event.cluster )
433 | 
434 | length( dmrd.event.cluster )
435 | table( dmrd.event.cluster )
436 | 
437 | 
438 | # save cluster counts---------------
439 | 
440 | flow.cluster.count <- sapply( fcs.cluster, function( fc )
441 |     table( flow.event.sample[ flow.event.cluster == fc ] ) )
442 | 
443 | stopifnot( rownames( flow.cluster.count ) == flow.sample )
444 | stopifnot( colnames( flow.cluster.count ) == fcs.cluster )
445 | 
446 | rownames( flow.cluster.count ) <- flow.sample.label
447 | colnames( flow.cluster.count ) <- fcs.cluster.label
448 | 
449 | write.csv( flow.cluster.count, file = file.path( fcs.cluster.table.dir, 
450 |     sprintf( "%s.csv", fcs.cluster.table.counts ) ) )
451 | 
452 | flow.cluster.percent <- flow.cluster.count/rowSums(flow.cluster.count)*100
453 | 
454 | write.csv( flow.cluster.percent, file = file.path( fcs.cluster.table.dir, 
455 |     sprintf( "%s.csv", "cluster_proportions" ) ) )
456 | 
457 | 
458 | # plot density distributions by cluster---------------
459 | 
460 | density.data.ggdf.cluster <- lapply( fcs.cluster, function( fc ) { 
461 |     ggdf.cluster <- melt( 
462 |         density.data[ flow.event.cluster[ density.data.idx ] == fc, , 
463 |             drop = FALSE ], 
464 |         varnames = c( "partition", "channel" ), 
465 |         value.name = "density.value" 
466 |     )
467 |     if ( nrow( ggdf.cluster ) > 0 ) {
468 |         ggdf.cluster$partition <- fc
469 |         ggdf.cluster$channel <- as.character( ggdf.cluster$channel )
470 |     }
471 |     ggdf.cluster
472 | } )
473 | density.data.ggdf.cluster <- do.call( rbind, density.data.ggdf.cluster )
474 | 
475 | density.data.ggdf <- rbind( density.data.ggdf.all, density.data.ggdf.cluster )
476 | 
477 | density.data.partition <- c( fcs.density.partition.all, fcs.cluster )
478 | density.data.partition.n <- length( density.data.partition )
479 | 
480 | density.data.partition.label <- c( fcs.density.partition.all.label, 
481 |     fcs.cluster.label )
482 | 
483 | density.data.ggdf$partition <- factor( density.data.ggdf$partition, 
484 |     levels = density.data.partition )
485 | 
486 | density.data.ggdf$channel <- factor( density.data.ggdf$channel, 
487 |     levels = fcs.channel )
488 | 
489 | density.plot.color <- c( fcs.density.partition.all.color, fcs.cluster.color )
490 | 
491 | density.plot <- ggplot( density.data.ggdf, aes( x = density.value, 
492 |         y = partition, color = partition, fill = partition ) ) + 
493 |     geom_density_ridges( size = fcs.density.line.size, 
494 |         alpha = fcs.density.line.alpha, show.legend = FALSE ) + 
495 |     labs( x = NULL, y = NULL ) + 
496 |     scale_y_discrete( limits = rev( density.data.partition ), 
497 |         breaks = rev( density.data.partition ), 
498 |         labels = rev( density.data.partition.label ) ) + 
499 |     scale_color_manual( values = density.plot.color ) + 
500 |     scale_fill_manual( values = density.plot.color ) + 
501 |     facet_wrap( vars( channel ), nrow = 1, labeller = labeller( 
502 |         channel = fcs.channel.label ) ) + 
503 |     theme_ridges( line_size = fcs.density.line.size, 
504 |         font_size = fcs.density.font.size ) + 
505 |     theme( strip.background = element_rect( fill = "white" ) )
506 | 
507 | ggsave( 
508 |     file.path( fcs.density.figure.dir, 
509 |         sprintf( "%s.png", fcs.density.figure.cluster ) ), 
510 |     density.plot, 
511 |     width = fcs.density.figure.width.base * ( fcs.channel.n + 1 ), 
512 |     height = fcs.density.figure.height.base * ( density.data.partition.n + 1 ) 
513 | )
514 | 
515 | 
516 | # plot embedsom figures---------------
517 | 
518 | embedsom.data.max <- max( embed.som )
519 | 
520 | plot.all.embedsom.figures( 
521 |     embed.som, embedsom.data.max, 
522 |     fcs.embedsom.figure.lims.factor, fcs.embedsom.figure.point.size, 
523 |     fcs.embedsom.figure.dir, "embedSOM_plot", 
524 |     flow.data, flow.event.cluster, flow.event.condition 
525 | )
526 | 
527 | 
528 | # plot heatmaps---------------
529 | 
530 | heatmap.type <- c( "by_condition", "by_sample", "by_cluster" )
531 | 
532 | for ( ht in heatmap.type )
533 | {
534 |     if ( ht == "by_condition" ) {
535 |         flow.data.group.median <- apply( flow.data, 2, tapply, 
536 |             flow.event.condition, median )
537 |         margin.col <- fcs.heatmap.label.factor.col * 
538 |             max( nchar( fcs.condition.label ) )
539 |         group.label <- fcs.condition.label
540 |         group.color <- fcs.condition.color
541 |     }
542 |     else if ( ht == "by_sample" ) {
543 |         flow.data.group.median <- apply( flow.data, 2, tapply, 
544 |             flow.event.sample, median )
545 |         margin.col <- fcs.heatmap.label.factor.col * 
546 |             max( nchar( flow.sample.label ) )
547 |         group.label <- flow.sample.label
548 |         group.color <- flow.sample.color
549 |     }
550 |     else if ( ht == "by_cluster" ) {
551 |         flow.data.group.median <- apply( flow.data, 2, tapply, 
552 |             flow.event.cluster, median )
553 |         margin.col <- fcs.heatmap.label.factor.col * 
554 |             max( nchar( fcs.cluster.label ) )
555 |         group.label <- fcs.cluster.label
556 |         group.color <- fcs.cluster.color
557 |     }
558 |     else
559 |         stop( "wrong heatmap type" )
560 |     
561 |     margin.row <- 
562 |         fcs.heatmap.label.factor.row * max( nchar( fcs.channel.label ) )
563 |     
564 |     if ( margin.row < 5 )
565 |         margin.row <- 5
566 |     if ( margin.col < 5 )
567 |         margin.col <- 5
568 |     
569 |     png( filename = file.path( fcs.heatmap.figure.dir, 
570 |             sprintf( "%s_%s.png", fcs.heatmap.figure, ht ) ), 
571 |         width = fcs.heatmap.width, height = fcs.heatmap.height )
572 |     heatmap( t( flow.data.group.median ), scale = "row", 
573 |         labRow = fcs.channel.label,  labCol = group.label, 
574 |         col = fcs.heatmap.palette, ColSideColors = group.color, 
575 |         cexRow = fcs.heatmap.font.size, cexCol = fcs.heatmap.font.size, 
576 |         margins = c( margin.col, margin.row ) )
577 |     dev.off()
578 | }
579 | 
580 | 
581 | # plot histograms---------------
582 | 
583 | histogram.type <- c( "by_cluster", "by_sample", "by_assay" )
584 | 
585 | condition.sample.n <- as.vector( table( flow.sample.condition ) )
586 | 
587 | for ( ht in histogram.type )
588 | {
589 |     flow.data.cluster.fraction <- lapply( fcs.cluster, function( fc ) {
590 |         sample.event.n <- as.vector( table( 
591 |             flow.event.sample[ flow.event.cluster == fc ] ) )
592 |         
593 |         if ( ht == "by_cluster" )
594 |         {
595 |             sample.fraction <- log2( 
596 |                 ( sample.event.n / flow.event.cluster.n[ fc ] ) / 
597 |                 ( flow.event.sample.n / flow.event.n ) 
598 |             )
599 |             sample.fraction[ is.infinite( sample.fraction ) ] <- NA
600 |             fraction <- tapply( sample.fraction, flow.sample.condition, mean, 
601 |                 na.rm = TRUE )
602 |             std.err <- tapply( sample.fraction, flow.sample.condition, sd, 
603 |                 na.rm = TRUE ) / sqrt( condition.sample.n )
604 |         }
605 |         else if ( ht == "by_sample" )
606 |         {
607 |             sample.fraction <- 100 * sample.event.n / flow.event.sample.n
608 |             fraction <- tapply( sample.fraction, flow.sample.condition, mean, 
609 |                 na.rm = TRUE )
610 |             std.err <- tapply( sample.fraction, flow.sample.condition, sd, 
611 |                 na.rm = TRUE ) / sqrt( condition.sample.n )
612 |         }
613 |         else if ( ht == "by_assay" )
614 |         {
615 |             sample.fraction <- 100 * sample.event.n / flow.event.n
616 |             fraction <- tapply( sample.fraction, flow.sample.condition, sum, 
617 |                 na.rm = TRUE )
618 |             std.err <- NA
619 |         }
620 |         else
621 |             stop( "wrong histogram type" )
622 |         
623 |         data.frame( cluster = fc, condition = fcs.condition, fraction, std.err )
624 |     } )
625 |     
626 |     flow.data.cluster.fraction <- do.call( rbind, flow.data.cluster.fraction )
627 |     flow.data.cluster.fraction$condition <- factor( 
628 |         flow.data.cluster.fraction$condition, levels = fcs.condition )
629 |     
630 |     histogram.plot <- ggplot( flow.data.cluster.fraction, 
631 |         aes( x = cluster, y = fraction, fill = condition ) ) + 
632 |         geom_bar( stat = "identity", position = position_dodge2() ) + 
633 |         geom_errorbar( aes( ymin = fraction - std.err, 
634 |                 ymax = fraction + std.err ), 
635 |             size = fcs.histogram.error.bar.size,
636 |             position = position_dodge2() ) + 
637 |         scale_x_discrete( labels = fcs.cluster.label, name = "" ) + 
638 |         scale_fill_manual( values = fcs.condition.color, 
639 |             limits = fcs.condition, breaks = fcs.condition, 
640 |             labels = fcs.condition.label ) + 
641 |         theme_bw() + 
642 |         theme( 
643 |             axis.text = element_text( size = fcs.histogram.font.size, 
644 |                 angle = 90 ), 
645 |             axis.title = element_text( size = fcs.histogram.font.size + 1 ), 
646 |             legend.title = element_blank(), 
647 |             legend.text = element_text( size = fcs.histogram.font.size + 1 ), 
648 |             legend.key.size = unit( fcs.histogram.legend.key.size, "lines" ), 
649 |             panel.grid.major = element_blank(), 
650 |             panel.grid.minor = element_blank() 
651 |         )
652 |     
653 |     if ( ht == "by_cluster" )
654 |         histogram.plot <- histogram.plot + 
655 |             ylab( "Average log2( frequency ratio )" )
656 |     else if ( ht == "by_sample" )
657 |         histogram.plot <- histogram.plot + ylab( "Average frequency" )
658 |     else if ( ht == "by_assay" )
659 |         histogram.plot <- histogram.plot + ylab( "Frequency" )
660 |     
661 |     ggsave( 
662 |         file.path( fcs.histogram.figure.dir, 
663 |             sprintf( "%s_%s.png", fcs.histogram.figure, ht ) ), 
664 |         histogram.plot, 
665 |         width = fcs.histogram.width, 
666 |         height = fcs.histogram.height + 
667 |             fcs.histogram.label.factor.height * 
668 |                 max( nchar( fcs.cluster.label ) ) 
669 |     )
670 | }
671 | 
672 | 
673 | # calculate tsne representation---------------
674 | 
675 | {
676 |     if ( fcs.use.cached.results && file.exists( fcs.tsne.cache.file.path ) )
677 |     {
678 |         cat( "Using cached results for tsne\n" )
679 |         
680 |         load( fcs.tsne.cache.file.path )
681 |     }
682 |     else
683 |     {
684 |         set.seed.here( fcs.seed.base, "calculate tsne representation" )
685 |         
686 |         cat( "Calculating tsne\n" )
687 |         
688 |         tsne.result <- Rtsne( dmrd.data, perplexity = fcs.tsne.perplexity, 
689 |             theta = fcs.tsne.theta, 
690 |             exaggeration_factor = fcs.tsne.exaggeration.factor, 
691 |             max_iter = fcs.tsne.iter.n, check_duplicates = FALSE, pca = FALSE, 
692 |             num_threads = fcs.tsne.thread.n )
693 |         
694 |         save( tsne.result, file = fcs.tsne.cache.file.path )
695 |     }
696 | }
697 | 
698 | tsne.data <- tsne.result$Y
699 | 
700 | str( tsne.result )
701 | 
702 | 
703 | # plot tsne convergence---------------
704 | 
705 | tsne.iter <- 1 + 50 * ( 1 : length( tsne.result$itercosts ) - 1 )
706 | tsne.cost <- tsne.result$itercosts
707 | 
708 | png( filename = file.path( fcs.tsne.figure.dir, 
709 |         sprintf( "%s.png", fcs.tsne.figure.convergence ) ), 
710 |     width = fcs.tsne.figure.convergence.width, 
711 |     height = fcs.tsne.figure.convergence.height )
712 | par( mar = c( 5, 5.8, 2, 1.4 ) )
713 | plot( tsne.iter, tsne.cost, log = "y", xlab = "Iteration", 
714 |     ylab = "Total cost", xlim = c( 0, fcs.tsne.iter.n ), 
715 |     ylim = c( 1, max( tsne.cost ) ), col = "blue3", 
716 |     pch = 20, cex = 1, cex.lab = 2.5, cex.axis = 2 )
717 | abline( h = tsne.cost[ length( tsne.cost ) ], lty = 2, lwd = 1.5, 
718 |     col = "blue3" )
719 | dev.off()
720 | 
721 | 
722 | # plot tsne representations---------------
723 | 
724 | tsne.data.max <- max( abs( tsne.data ) )
725 | 
726 | plot.all.dmrd.figures( 
727 |     tsne.data, tsne.data.max, 
728 |     fcs.tsne.figure.lims.factor, fcs.tsne.figure.point.size, 
729 |     fcs.tsne.figure.dir, fcs.tsne.figure.plot, 
730 |     dmrd.data, dmrd.event.cluster, dmrd.event.condition 
731 | )
732 | 
733 | 
734 | # calculate umap representation---------------
735 | 
736 | {
737 |     if ( fcs.use.cached.results && file.exists( fcs.umap.cache.file.path ) )
738 |     {
739 |         cat( "Using cached results for umap\n" )
740 |         
741 |         load( fcs.umap.cache.file.path )
742 |     }
743 |     else
744 |     {
745 |         set.seed.here( fcs.seed.base, "calculate umap representation" )
746 |         
747 |         cat( "Calculating umap\n" )
748 |         
749 |         umap.config <- umap.defaults
750 |         umap.config$n_epochs <- fcs.umap.iter.n
751 |         umap.config$verbose <- TRUE
752 |         
753 |         umap.result <- umap( dmrd.data, config = umap.config )
754 |         
755 |         save( umap.result, file = fcs.umap.cache.file.path )
756 |     }
757 | }
758 | 
759 | umap.data <- umap.result$layout
760 | dimnames( umap.data ) <- NULL
761 | 
762 | str( umap.result )
763 | 
764 | 
765 | # plot umap representations---------------
766 | 
767 | umap.data.max <- max( abs( apply( umap.data, 2, function( x ) 
768 |     quantile( x, c( 0.25, 0.75 ) ) + c( -1, 1 ) * IQR( x ) ) ) )
769 | 
770 | plot.all.dmrd.figures( 
771 |     umap.data, umap.data.max, 
772 |     fcs.umap.figure.lims.factor, fcs.umap.figure.point.size, 
773 |     fcs.umap.figure.dir, fcs.umap.figure.plot, 
774 |     dmrd.data, dmrd.event.cluster, dmrd.event.condition 
775 | )
776 | 
777 | 
778 | # calculate cross-entropy test for tsne by condition---------------
779 | 
780 | set.seed.here( fcs.seed.base, "calculate cross-entropy test for tsne" )
781 | 
782 | ce.diff.test.tsne.res <- ce.diff.test.tsne( 
783 |     dmrd.data, tsne.data, 
784 |     dmrd.event.condition, 
785 |     partition.label = fcs.condition.label, 
786 |     partition.color = fcs.condition.color, 
787 |     partition.line.type = fcs.condition.line.type, 
788 |     base.test = fcs.ce.diff.base.test, 
789 |     base.dist = fcs.ce.diff.base.dist, 
790 |     prob.sample.n = fcs.ce.diff.prob.sample.n, 
791 |     dendrogram.order.weight = fcs.ce.diff.figure.dendrogram.weight.condition, 
792 |     result = file.path( fcs.ce.diff.tsne.figure.dir, 
793 |         sprintf( "%s_condition.txt", fcs.ce.diff.tsne.result ) ), 
794 |     cdf.figure = file.path( fcs.ce.diff.tsne.figure.dir, 
795 |         sprintf( "%s_condition.png", fcs.ce.diff.tsne.figure.cdf ) ), 
796 |     dendrogram.figure = file.path( fcs.ce.diff.tsne.figure.dir, 
797 |         sprintf( "%s_condition.png", fcs.ce.diff.tsne.figure.dendrogram ) ) )
798 | 
799 | print( ce.diff.test.tsne.res )
800 | 
801 | 
802 | # calculate cross-entropy test for tsne by sample---------------
803 | 
804 | set.seed.here( fcs.seed.base, "calculate cross-entropy test for tsne" )
805 | 
806 | ce.diff.test.tsne.res <- ce.diff.test.tsne( 
807 |     dmrd.data, tsne.data, 
808 |     dmrd.event.sample, 
809 |     partition.label = flow.sample.label, 
810 |     partition.color = flow.sample.color, 
811 |     partition.line.type = flow.sample.line.type.single, 
812 |     base.test = fcs.ce.diff.base.test, 
813 |     base.dist = fcs.ce.diff.base.dist, 
814 |     prob.sample.n = fcs.ce.diff.prob.sample.n, 
815 |     dendrogram.order.weight = flow.ce.diff.figure.dendrogram.weight.sample, 
816 |     result = file.path( fcs.ce.diff.tsne.figure.dir, 
817 |         sprintf( "%s_sample.txt", fcs.ce.diff.tsne.result ) ), 
818 |     cdf.figure = file.path( fcs.ce.diff.tsne.figure.dir, 
819 |         sprintf( "%s_sample.png", fcs.ce.diff.tsne.figure.cdf ) ), 
820 |     dendrogram.figure = file.path( fcs.ce.diff.tsne.figure.dir, 
821 |         sprintf( "%s_sample.png", fcs.ce.diff.tsne.figure.dendrogram ) ) )
822 | 
823 | print( ce.diff.test.tsne.res )
824 | 
825 | 
826 | # calculate cross-entropy test for tsne by cluster---------------
827 | 
828 | set.seed.here( fcs.seed.base, "calculate cross-entropy test for tsne" )
829 | 
830 | ce.diff.test.tsne.res <- ce.diff.test.tsne( 
831 |     dmrd.data, tsne.data, 
832 |     dmrd.event.cluster, 
833 |     partition.label = fcs.cluster.label, 
834 |     partition.color = fcs.cluster.color, 
835 |     partition.line.type = fcs.cluster.line.type, 
836 |     base.test = fcs.ce.diff.base.test, 
837 |     base.dist = fcs.ce.diff.base.dist, 
838 |     prob.sample.n = fcs.ce.diff.prob.sample.n, 
839 |     dendrogram.order.weight = fcs.ce.diff.figure.dendrogram.weight.cluster, 
840 |     result = file.path( fcs.ce.diff.tsne.figure.dir, 
841 |         sprintf( "%s_cluster.txt", fcs.ce.diff.tsne.result ) ), 
842 |     cdf.figure = file.path( fcs.ce.diff.tsne.figure.dir, 
843 |         sprintf( "%s_cluster.png", fcs.ce.diff.tsne.figure.cdf ) ), 
844 |     dendrogram.figure = file.path( fcs.ce.diff.tsne.figure.dir, 
845 |         sprintf( "%s_cluster.png", fcs.ce.diff.tsne.figure.dendrogram ) ) )
846 | 
847 | print( ce.diff.test.tsne.res )
848 | 
849 | 
850 | # calculate cross-entropy test for umap by condition---------------
851 | 
852 | set.seed.here( fcs.seed.base, "calculate cross-entropy test for umap" )
853 | 
854 | ce.diff.test.umap.res <- ce.diff.test.umap( 
855 |     umap.result$knn$distances, umap.result$knn$indexes, 
856 |     umap.data, umap.result$config, 
857 |     dmrd.event.condition, 
858 |     partition.label = fcs.condition.label, 
859 |     partition.color = fcs.condition.color, 
860 |     partition.line.type = fcs.condition.line.type, 
861 |     base.test = fcs.ce.diff.base.test, 
862 |     base.dist = fcs.ce.diff.base.dist, 
863 |     prob.sample.n = fcs.ce.diff.prob.sample.n, 
864 |     dendrogram.order.weight = fcs.ce.diff.figure.dendrogram.weight.condition, 
865 |     result = file.path( fcs.ce.diff.umap.figure.dir, 
866 |         sprintf( "%s_condition.txt", fcs.ce.diff.umap.result ) ), 
867 |     cdf.figure = file.path( fcs.ce.diff.umap.figure.dir, 
868 |         sprintf( "%s_condition.png", fcs.ce.diff.umap.figure.cdf ) ), 
869 |     dendrogram.figure = file.path( fcs.ce.diff.umap.figure.dir, 
870 |         sprintf( "%s_condition.png", fcs.ce.diff.umap.figure.dendrogram ) ) )
871 | 
872 | print( ce.diff.test.umap.res )
873 | 
874 | 
875 | # calculate cross-entropy test for umap by sample---------------
876 | 
877 | set.seed.here( fcs.seed.base, "calculate cross-entropy test for umap" )
878 | 
879 | ce.diff.test.umap.res <- ce.diff.test.umap( 
880 |     umap.result$knn$distances, umap.result$knn$indexes, 
881 |     umap.data, umap.result$config, 
882 |     dmrd.event.sample, 
883 |     partition.label = flow.sample.label, 
884 |     partition.color = flow.sample.color, 
885 |     partition.line.type = flow.sample.line.type.single, 
886 |     base.test = fcs.ce.diff.base.test, 
887 |     base.dist = fcs.ce.diff.base.dist, 
888 |     prob.sample.n = fcs.ce.diff.prob.sample.n, 
889 |     dendrogram.order.weight = flow.ce.diff.figure.dendrogram.weight.sample, 
890 |     result = file.path( fcs.ce.diff.umap.figure.dir, 
891 |         sprintf( "%s_sample.txt", fcs.ce.diff.umap.result ) ), 
892 |     cdf.figure = file.path( fcs.ce.diff.umap.figure.dir, 
893 |         sprintf( "%s_sample.png", fcs.ce.diff.umap.figure.cdf ) ), 
894 |     dendrogram.figure = file.path( fcs.ce.diff.umap.figure.dir, 
895 |         sprintf( "%s_sample.png", fcs.ce.diff.umap.figure.dendrogram ) ) )
896 | 
897 | print( ce.diff.test.umap.res )
898 | 
899 | 
900 | # calculate cross-entropy test for umap by cluster---------------
901 | 
902 | set.seed.here( fcs.seed.base, "calculate cross-entropy test for umap" )
903 | 
904 | ce.diff.test.umap.res <- ce.diff.test.umap( 
905 |     umap.result$knn$distances, umap.result$knn$indexes, 
906 |     umap.data, umap.result$config, 
907 |     dmrd.event.cluster, 
908 |     partition.label = fcs.cluster.label, 
909 |     partition.color = fcs.cluster.color, 
910 |     partition.line.type = fcs.cluster.line.type, 
911 |     base.test = fcs.ce.diff.base.test, 
912 |     base.dist = fcs.ce.diff.base.dist, 
913 |     prob.sample.n = fcs.ce.diff.prob.sample.n, 
914 |     dendrogram.order.weight = fcs.ce.diff.figure.dendrogram.weight.cluster, 
915 |     result = file.path( fcs.ce.diff.umap.figure.dir, 
916 |         sprintf( "%s_cluster.txt", fcs.ce.diff.umap.result ) ), 
917 |     cdf.figure = file.path( fcs.ce.diff.umap.figure.dir, 
918 |         sprintf( "%s_cluster.png", fcs.ce.diff.umap.figure.cdf ) ), 
919 |     dendrogram.figure = file.path( fcs.ce.diff.umap.figure.dir, 
920 |         sprintf( "%s_cluster.png", fcs.ce.diff.umap.figure.dendrogram ) ) )
921 | 
922 | print( ce.diff.test.umap.res )
923 | 
924 | 
925 | # calculate cross-entropy test for embedsom by condition---------------
926 | 
927 | set.seed.here( fcs.seed.base, "calculate cross-entropy test for embedSOM" )
928 | 
929 | ce.diff.test.embedsom.res <- ce.diff.test.tsne( 
930 |     dmrd.data, embed.som, 
931 |     dmrd.event.condition, 
932 |     partition.label = fcs.condition.label, 
933 |     partition.color = fcs.condition.color, 
934 |     partition.line.type = fcs.condition.line.type, 
935 |     base.test = fcs.ce.diff.base.test, 
936 |     base.dist = fcs.ce.diff.base.dist, 
937 |     prob.sample.n = fcs.ce.diff.prob.sample.n, 
938 |     dendrogram.order.weight = fcs.ce.diff.figure.dendrogram.weight.condition, 
939 |     result = file.path( fcs.ce.diff.embedsom.figure.dir, 
940 |                         sprintf( "%s_condition.txt", fcs.ce.diff.embedsom.result ) ), 
941 |     cdf.figure = file.path( fcs.ce.diff.embedsom.figure.dir, 
942 |                             sprintf( "%s_condition.png", fcs.ce.diff.embedsom.figure.cdf ) ), 
943 |     dendrogram.figure = file.path( fcs.ce.diff.embedsom.figure.dir, 
944 |                                    sprintf( "%s_condition.png", fcs.ce.diff.embedsom.figure.dendrogram ) ) )
945 | 
946 | print( ce.diff.test.embedsom.res )
947 | 
948 | 
949 | # output session info---------------
950 | 
951 | sessionInfo()
952 | 
953 | 


--------------------------------------------------------------------------------
/CSV-based flow analysis/analyze_flow_cytometry_parameter_csv.R:
--------------------------------------------------------------------------------
  1 | # Analysis of 20210608 tSNE-diff data
  2 | 
  3 | 
  4 | # data parameters---------------
  5 | 
  6 | # Tip: create two folders inside your analysis directory called "Data" and "Output".
  7 | # Put the fcs files in the Data folder.
  8 | # Put the parameter file in the Output folder. Double click on it to start Rstudio.
  9 | # Opening in this way will set your working directory to the Output folder.
 10 | # Type getwd(). Copy the result into the line below, swapping "Data" for "Output"
 11 | fcs.data.dir <- "D:/tSNE-diff paper/Analysis/20210608 tSNE-diff flow/Data/csv 20k events"
 12 | 
 13 | fcs.condition <- c( "LN", "Spleen",
 14 |                     "siLPL" )
 15 | 
 16 | fcs.condition.label <- c( 
 17 |     "LN" = "LN", "Spleen" = "Spleen",
 18 |     "siLPL" = "siLPL"
 19 | )
 20 | 
 21 | # Run the analyze_flow_cytometry_csv script through line 50 and insert the output below.
 22 | # Remove unwanted channels.
 23 | 
 24 | fcs.channel <- c( 
 25 |   "RORgT","CD44","Gr.1","IgM","F4_80","Ki67","CD90.2","CCR9",
 26 |   "TCRgd","PDCA.1","CD11c","Ly.6C","CD103","IgD","NK1.1",
 27 |   "CTLA.4","c.Kit","CD62L","GITR","CD150","CXCR3","Siglec.F",
 28 |   "TCRb","PD.1","XCR1","CD127","CCR2","CD45","CD4","CD8","CD3",
 29 |   "CD19","CD11b","CD38","GATA.3","CD86","Foxp3","CD172a","CD64",
 30 |   "Helios","CD24",
 31 |   "MHCII","NKp46","CD69","B220","CD25","ICOS","KLRG1","T.bet"  
 32 | )
 33 | 
 34 | fcs.channel.label <- c( 
 35 |   "RORgT" = "RORgT",
 36 |   "CD44" = "CD44",
 37 |   "Gr.1" = "Gr.1",
 38 |   "IgM" = "IgM",
 39 |   "F4_80" = "F4_80",
 40 |   "Ki67" = "Ki67",
 41 |   "CD90.2" = "CD90.2",
 42 |   "CCR9" = "CCR9",
 43 |   "TCRgd" = "TCRgd",
 44 |   "PDCA.1" = "PDCA.1",
 45 |   "CD11c" = "CD11c",
 46 |   "Ly.6C" = "Ly.6C",
 47 |   "CD103" = "CD103",
 48 |   "IgD" = "IgD",
 49 |   "NK1.1" = "NK1.1",
 50 |   "CTLA.4" = "CTLA.4",
 51 |   "c.Kit" = "c.Kit",
 52 |   "CD62L" = "CD62L",
 53 |   "GITR" = "GITR",
 54 |   "CD150" = "CD150",
 55 |   "CXCR3" = "CXCR3",
 56 |   "Siglec.F" = "Siglec.F",
 57 |   "TCRb" = "TCRb",
 58 |   "PD.1" = "PD.1",
 59 |   "XCR1" = "XCR1",
 60 |   "CD127" = "CD127",
 61 |   "CCR2" = "CCR2",
 62 |   "CD45" = "CD45",
 63 |   "CD4" = "CD4",
 64 |   "CD8" = "CD8",
 65 |   "CD3" = "CD3",
 66 |   "CD19" = "CD19",
 67 |   "CD11b" = "CD11b",
 68 |   "CD38" = "CD38",
 69 |   "GATA.3" = "GATA.3",
 70 |   "CD86" = "CD86",
 71 |   "Foxp3" = "Foxp3",
 72 |   "CD172a" = "CD172a",
 73 |   "CD64" = "CD64",
 74 |   "Helios" = "Helios",
 75 |   "CD24" = "CD24",
 76 |   "MHCII" = "MHCII",
 77 |   "NKp46" = "NKp46",
 78 |   "CD69" = "CD69",
 79 |   "B220" = "B220",
 80 |   "CD25" = "CD25",
 81 |   "ICOS" = "ICOS",
 82 |   "KLRG1" = "KLRG1",
 83 |   "T.bet" = "T.bet"
 84 | )
 85 | 
 86 | fcs.condition.n <- length( fcs.condition )
 87 | fcs.channel.n <- length( fcs.channel )
 88 | 
 89 | 
 90 | # general parameters---------------
 91 | # Tip: edit the directory to match the location of the script files. 
 92 | fcs.src.dir <- "D:/Flowcytoscript/Modifications/CSV version/00_src"
 93 | 
 94 | # Tip: use today's date
 95 | fcs.seed.base <- 20220623
 96 | 
 97 | 
 98 | # Tip: Set to FALSE while optimizing the tSNE and FlowSOM. 
 99 | # Once you have a good run, set to TRUE and then change labels, colors, etc.
100 | fcs.use.cached.results <- TRUE
101 | 
102 | fcs.sample.number.width <- 2
103 | fcs.event.number.width <- 6
104 | 
105 | 
106 | # graphics parameters---------------
107 | 
108 | fcs.color.pool <- c( 
109 |     brewer.pal( 8, "Set1" )[ -6 ], 
110 |     brewer.pal( 7, "Set2" )[ c( 1, 3, 6 ) ], 
111 |     adjustcolor( brewer.pal( 8, "Set1" )[ -6 ], 
112 |         red.f = 0.9, green.f = 0.8, blue.f = 0.7 ), 
113 |     adjustcolor( brewer.pal( 7, "Set2" )[ c( 1, 3, 6 ) ], 
114 |         red.f = 0.9, green.f = 0.8, blue.f = 0.7 ), 
115 |     adjustcolor( brewer.pal( 8, "Set1" )[ -6 ], 
116 |         red.f = 0.8, green.f = 0.6, blue.f = 0.5 ), 
117 |     adjustcolor( brewer.pal( 7, "Set2" )[ c( 1, 3, 6 ) ], 
118 |         red.f = 0.8, green.f = 0.6, blue.f = 0.5 ), 
119 |     adjustcolor( brewer.pal( 8, "Set1" )[ -6 ], 
120 |         red.f = 0.3, green.f = 0.3, blue.f = 0.3 ), 
121 |     adjustcolor( brewer.pal( 7, "Set2" )[ c( 1, 3, 6 ) ], 
122 |         red.f = 0.3, green.f = 0.3, blue.f = 0.3 ) )
123 | fcs.color.pool.n <- length( fcs.color.pool )
124 | 
125 | fcs.line.type.pool <- 1:6
126 | fcs.line.type.pool.n <- length( fcs.line.type.pool )
127 | 
128 | fcs.condition.color <- rep( 
129 |     fcs.color.pool, 
130 |     ceiling( fcs.condition.n / fcs.color.pool.n ) 
131 | )[ 1 : fcs.condition.n ]
132 | names( fcs.condition.color ) <- fcs.condition
133 | 
134 | fcs.condition.line.type <- rep( 
135 |     fcs.line.type.pool, 
136 |     ceiling( fcs.condition.n / fcs.line.type.pool.n ) 
137 | )[ 1 : fcs.condition.n ]
138 | names( fcs.condition.line.type ) <- fcs.condition
139 | 
140 | 
141 | # density parameters---------------
142 | # Sampling largely irrelevant for this method
143 | 
144 | fcs.density.data.sample.n <- NULL
145 | 
146 | fcs.density.partition.all <- "all"
147 | fcs.density.partition.all.label <- c( "all" = "All" )
148 | fcs.density.partition.all.color <- c( "all" = "grey" )
149 | 
150 | fcs.density.font.size <- 4.5
151 | 
152 | fcs.density.line.size <- 0.2
153 | fcs.density.line.alpha <- 0.3
154 | 
155 | fcs.density.figure.width.base <- 0.4
156 | fcs.density.figure.height.base <- 0.1
157 | 
158 | fcs.density.figure.dir <- "./figure_density"
159 | 
160 | fcs.density.figure.sample <- "density_sample"
161 | fcs.density.figure.cluster <- "density_cluster"
162 | 
163 | 
164 | # cluster parameters---------------
165 | # Tip: use more clusters for more diverse cell collections. Use as few as possible to speed up processing.
166 | # For example, for a broad immune phenotyping panel use 40-50 clusters.
167 | # For pre-gated cell types (e.g., CD8s), use 10-12 clusters.
168 | 
169 | fcs.cluster.n <- 40
170 | 
171 | # Tip: naming clusters can be done automatically as clusters 1:n via the first command below.
172 | # To rename your clusters based on marker expression, insert a # before the first command,
173 | # remove the # before the second command,
174 | # and rename the clusters appropriately.
175 | 
176 | fcs.cluster <- sprintf( "%02d", 1 : fcs.cluster.n )
177 | #fcs.cluster <- c("B cells", "CD4 T cells", "CD8 T cells", "Monocytes",
178 | #                "Neutrophils", "NK cells", "Eosinophils",
179 | #                "cDC", "Macrophages")
180 | 
181 | fcs.cluster.label <- fcs.cluster
182 | names( fcs.cluster.label ) <- fcs.cluster
183 | 
184 | # Tip: this controls the grouping of the fcs files that are generated based on the flowSOM clustering.
185 | # If some of the clusters are related (e.g., CD14+ and CD16+ monocytes), 
186 | # you might wish to group these into a single output folder.
187 | # If so, you may use a command such as "a" = c(1, 5, 7),
188 | # where the numbers are the numbered flowSOM clusters.
189 | fcs.cluster.group <- as.list(1:fcs.cluster.n)
190 | names(fcs.cluster.group) <- make.unique(rep(letters, length.out = fcs.cluster.n), sep='')
191 | 
192 | fcs.cluster.color <- rep( 
193 |     fcs.color.pool, 
194 |     ceiling( fcs.cluster.n / fcs.color.pool.n ) 
195 | )[ 1 : fcs.cluster.n ]
196 | names( fcs.cluster.color ) <- fcs.cluster
197 | 
198 | fcs.cluster.line.type <- rep( 
199 |     fcs.line.type.pool, 
200 |     ceiling( fcs.cluster.n / fcs.line.type.pool.n ) 
201 | )[ 1 : fcs.cluster.n ]
202 | names( fcs.cluster.line.type ) <- fcs.cluster
203 | 
204 | # 24 is recommended for EmbedSOM
205 | fcs.flow.som.dim <- 24
206 | 
207 | fcs.cluster.table.dir <- "./table_cluster"
208 | fcs.cluster.data.dir <- "./data_cluster"
209 | 
210 | fcs.cluster.table.counts <- "cluster_counts"
211 | 
212 | fcs.cluster.data <- "cluster_group"
213 | 
214 | 
215 | # heatmap parameters---------------
216 | 
217 | fcs.heatmap.palette.n <- 100
218 | fcs.heatmap.palette <- colorRampPalette( brewer.pal( 9, "YlOrRd" ) )( 
219 |     fcs.heatmap.palette.n )
220 | 
221 | fcs.heatmap.font.size <- 2.5
222 | 
223 | fcs.heatmap.label.factor.row <- 1.4
224 | fcs.heatmap.label.factor.col <- 1.4
225 | 
226 | fcs.heatmap.width <- 2000
227 | fcs.heatmap.height <- 2000
228 | 
229 | fcs.heatmap.figure.dir <- "./figure_heatmap"
230 | 
231 | fcs.heatmap.figure <- "heatmap"
232 | 
233 | 
234 | # histogram parameters---------------
235 | 
236 | fcs.histogram.font.size <- 7
237 | fcs.histogram.error.bar.size <- 0.2
238 | fcs.histogram.legend.key.size <- 0.7
239 | 
240 | fcs.histogram.label.factor.height <- 0.05
241 | 
242 | fcs.histogram.width <- 4.5
243 | fcs.histogram.height <- 3
244 | 
245 | fcs.histogram.figure.dir <- "./figure_histogram"
246 | 
247 | fcs.histogram.figure <- "histogram"
248 | 
249 | 
250 | # dimensionality reduction parameters---------------
251 | # Tip: Use the third option in most cases.
252 | # More cells will take longer. 
253 | # You might downsample to 50-100k cells at first, then run again with more cells if you like the result.
254 | 
255 | fcs.dmrd.data.sample.n <- NULL
256 | fcs.dmrd.data.sample.n.per.condition <- NULL
257 | fcs.dmrd.data.sample.n.per.sample <- 2000
258 | 
259 | fcs.dmrd.gradient.color <- c( "black", "blue", "green", "yellow", "red" )
260 | fcs.dmrd.gradient.palette.n <- 100
261 | fcs.dmrd.density.palette <- colorRampPalette( fcs.dmrd.gradient.color )( 
262 |     fcs.dmrd.gradient.palette.n )
263 | 
264 | fcs.dmrd.color.alpha <- 0.3
265 | 
266 | fcs.dmrd.group.title.size <- 8
267 | 
268 | fcs.dmrd.legend.title.size <- 7
269 | fcs.dmrd.legend.label.size <- 7
270 | fcs.dmrd.legend.point.size <- 3
271 | 
272 | fcs.dmrd.label.factor.width <- 0.1
273 | 
274 | # Tip: set the number of rows in the output figures here.
275 | fcs.dmrd.figure.nrow <- 1
276 | fcs.dmrd.figure.ncol <- ceiling( fcs.condition.n / fcs.dmrd.figure.nrow )
277 | 
278 | # Tip: set the figure size here.
279 | fcs.dmrd.figure.width <- 2
280 | fcs.dmrd.figure.height <- 2
281 | 
282 | 
283 | # tsne parameters---------------
284 | # Tip: more iterations take longer.
285 | # More iterations are needed for more cells
286 | # For a first look, try 1000.
287 | # For a final figure on ~100k cells, use 5000.
288 | # Tip 2: Set fcs.tsne.thread.n to 0 for max processing speed.
289 | # To enable you to do something else meanwhile, 
290 | # set it to one or two less than the number of threads on your processor (check specs online).
291 | 
292 | fcs.tsne.iter.n <- 5000
293 | fcs.tsne.thread.n <- 0
294 | 
295 | # Tip: don't change this unless you know why
296 | fcs.tsne.perplexity <- 30
297 | fcs.tsne.theta <- 0.5
298 | fcs.tsne.exaggeration.factor <- 12
299 | 
300 | fcs.tsne.figure.lims.factor <- 1.0
301 | fcs.tsne.figure.point.size <- 1.2
302 | 
303 | fcs.tsne.figure.convergence.width <- 1200
304 | fcs.tsne.figure.convergence.height <- 800
305 | 
306 | fcs.tsne.figure.dir <- "./figure_tsne"
307 | 
308 | fcs.tsne.figure.convergence <- "tsne_convergence"
309 | fcs.tsne.figure.plot <- "tsne_plot"
310 | 
311 | fcs.tsne.cache.file.path <- "./tsne_cache.dat"
312 | 
313 | 
314 | # umap parameters---------------
315 | # Tip: you don't generally need to change the UMAP iterations
316 | 
317 | fcs.umap.iter.n <- 1000
318 | 
319 | fcs.umap.figure.lims.factor <- 0.8
320 | fcs.umap.figure.point.size <- 1.2
321 | 
322 | fcs.umap.figure.dir <- "./figure_umap"
323 | 
324 | fcs.umap.figure.plot <- "umap_plot"
325 | 
326 | fcs.umap.cache.file.path <- "./umap_cache.dat"
327 | 
328 | # embedsom parameters---------------
329 | fcs.embedsom.figure.lims.factor <- 1.0
330 | fcs.embedsom.figure.point.size <- 0.05
331 | fcs.ce.diff.embedsom.figure.dir <- "./figure_embedsom_ce_diff"
332 | fcs.embedsom.figure.dir <- "./figure_embedsom"
333 | fcs.ce.diff.embedsom.result <- "embedsom_ce_diff_result"
334 | fcs.ce.diff.embedsom.cache.file.path <- "./embedsom_ce_diff_cache.dat"
335 | fcs.ce.diff.embedsom.figure.cdf <- "embedsom_ce_diff_cdf"
336 | fcs.ce.diff.embedsom.figure.dendrogram <- "embedsom_ce_diff_dendrogram"
337 | 
338 | # cross-entropy test parameters---------------
339 | # Tips: Set this depending on your RAM and number of groups.
340 | # You won't be able to analyze more than about 100k cells unless you have >32GB RAM.
341 | # The crossentropy test works best with at least 10k cells per group.
342 | # Multiple hypothesis testing will greatly reduce your ability to distinguish statistical differences.
343 | 
344 | 
345 | fcs.ce.diff.prob.sample.n <- 120000
346 | 
347 | # Tip: set to "ks" unless you have a good statistical reason for using rank testing.
348 | # In that case, use "rank" and "median".
349 | 
350 | fcs.ce.diff.base.test <- "ks"
351 | fcs.ce.diff.base.dist <- "ks"
352 | 
353 | fcs.ce.diff.test.alpha <- 0.05
354 | 
355 | fcs.ce.diff.figure.font.size <- 2
356 | fcs.ce.diff.figure.line.width <- 3
357 | 
358 | fcs.ce.diff.figure.cdf.resolution <- 500
359 | fcs.ce.diff.figure.cdf.all.color <- "black"
360 | fcs.ce.diff.figure.cdf.all.label <- "All"
361 | 
362 | fcs.ce.diff.figure.dendrogram.weight.condition <- 1 : fcs.condition.n
363 | names( fcs.ce.diff.figure.dendrogram.weight.condition ) <- fcs.condition
364 | 
365 | fcs.ce.diff.figure.dendrogram.weight.cluster <- 1 : fcs.cluster.n
366 | names( fcs.ce.diff.figure.dendrogram.weight.cluster ) <- fcs.cluster
367 | 
368 | fcs.ce.diff.figure.cdf.width <- 1200
369 | fcs.ce.diff.figure.cdf.height <- 800
370 | 
371 | fcs.ce.diff.figure.dendrogram.width <- 2000
372 | fcs.ce.diff.figure.dendrogram.height <- 800
373 | 
374 | 
375 | # cross-entropy test parameters for tsne--------------
376 | 
377 | fcs.ce.diff.tsne.perplexity.factor <- 3
378 | 
379 | fcs.ce.diff.tsne.figure.dir <- "./figure_tsne_ce_diff"
380 | 
381 | fcs.ce.diff.tsne.figure.cdf <- "tsne_ce_diff_cdf"
382 | fcs.ce.diff.tsne.figure.dendrogram <- "tsne_ce_diff_dendrogram"
383 | fcs.ce.diff.tsne.result <- "tsne_ce_diff_result"
384 | 
385 | fcs.ce.diff.tsne.cache.file.path <- "./tsne_ce_diff_cache.dat"
386 | 
387 | 
388 | # cross-entropy test parameters for umap---------------
389 | 
390 | fcs.ce.diff.umap.figure.dir <- "./figure_umap_ce_diff"
391 | 
392 | fcs.ce.diff.umap.figure.cdf <- "umap_ce_diff_cdf"
393 | fcs.ce.diff.umap.figure.dendrogram <- "umap_ce_diff_dendrogram"
394 | fcs.ce.diff.umap.result <- "umap_ce_diff_result"
395 | 
396 | fcs.ce.diff.umap.cache.file.path <- "./umap_ce_diff_cache.dat"
397 | 
398 | 


--------------------------------------------------------------------------------
/CSV-based flow analysis/flowcytoscript_setup.R:
--------------------------------------------------------------------------------
 1 | # Installation of required packages for csv version of flowcytoscript
 2 | # Run this once to set things up.
 3 | # Before running, you may wish to update R and/or RStudio.
 4 | 
 5 | install.packages("digest")
 6 | install.packages("dunn.test")
 7 | install.packages("ggplot2")
 8 | install.packages("ggridges")
 9 | install.packages("RANN")
10 | install.packages("RColorBrewer")
11 | install.packages("reshape2")
12 | install.packages("Rtsne")
13 | install.packages("umap")
14 | install.packages("dplyr")
15 | install.packages("FNN")
16 | install.packages("devtools")
17 | 
18 | if (!require("BiocManager", quietly = TRUE))
19 |   install.packages("BiocManager")
20 | 
21 | BiocManager::install("FlowSOM")
22 | 
23 | library(devtools)
24 | devtools::install_github('exaexa/EmbedSOM')


--------------------------------------------------------------------------------
/CSV-based flow analysis/highlight_changed_regions.R:
--------------------------------------------------------------------------------
  1 | # Optional add-on to flowcytoscript analysis
  2 | # Use T-REX approach from Cytolab to determine changing knn regions
  3 | 
  4 | # first run tSNE, UMAP and/or EmbedSOM using flowcytoscript
  5 | 
  6 | library(FNN)
  7 | 
  8 | ## Set things up-------------
  9 | # pick two groups for identifying differences from the output below
 10 | # copy into trex.condition <- c() between parenthesis
 11 | print(fcs.condition)
 12 | trex.condition <- c("Spleen", "siLPL")
 13 | 
 14 | 
 15 | # subset data based on those groups
 16 | trex.dmrd.data <- subset(dmrd.data, dmrd.event.condition %in% trex.condition)
 17 | trex.umap.data <- umap.data
 18 | rownames(trex.umap.data) <- rownames(dmrd.data)
 19 | trex.umap.data <- subset(trex.umap.data, dmrd.event.condition %in% trex.condition)
 20 | trex.tsne.data <- tsne.data
 21 | rownames(trex.tsne.data) <- rownames(dmrd.data)
 22 | trex.tsne.data <- subset(trex.tsne.data, dmrd.event.condition %in% trex.condition)
 23 | trex.embed.som <- embed.som
 24 | rownames(trex.embed.som) <- rownames(flow.data)
 25 | trex.embed.som <- subset(trex.embed.som, flow.event.condition %in% trex.condition)
 26 | kvalue <- 60
 27 | 
 28 | 
 29 | ## Now you may generate plots showing regions that are disproportionately in one condition or the other.
 30 | ## Use the three sections below to generate these with either tSNE, UMAP or EmbedSOM plots
 31 | 
 32 | 
 33 | ## Using tSNE as the plot--------------------------
 34 | # KNN search per cell 
 35 | tsne.neighbor.index <- knnx.index(trex.tsne.data,trex.tsne.data,k=kvalue)
 36 | first.condition.length <- nrow(subset(dmrd.data, dmrd.event.condition == trex.condition[1]))
 37 | tsne.neighbor.index[tsne.neighbor.index <= first.condition.length] <- 0
 38 | tsne.neighbor.index[tsne.neighbor.index > first.condition.length] <- 1
 39 | 
 40 | # calculate percent change in each KNN region
 41 | percent.change.tsne <- (rowSums(tsne.neighbor.index) / kvalue * 100)
 42 | 
 43 | # binning and plot info
 44 | range <- apply(apply( trex.tsne.data, 2, range), 2, diff)
 45 | graphical.ratio <- (range[1] / range[2])
 46 | test.round <- round(percent.change.tsne)
 47 | trex.plot.tsne <-
 48 |   data.frame(x = trex.tsne.data[, 1], y = trex.tsne.data[, 2], col = test.round)
 49 | trex.plot.tsne$cuts <- cut(trex.plot.tsne$col, c(0, 5, 15, 85, 95, 100), include.lowest = TRUE, right = FALSE)
 50 | trex.plot.tsne$cuts <- factor(trex.plot.tsne$cuts,
 51 |                          levels = c("[15,85)", "[5,15)", "[0,5)", "[85,95)", "[95,100]"))
 52 | ordered.plot.tsne <- trex.plot.tsne[order(trex.plot.tsne$cuts), ]
 53 | 
 54 | 
 55 | # create T-REX plot
 56 | 
 57 | trex_comparison <- paste(trex.condition[1], " vs ", trex.condition[2], sep = "") 
 58 | dir.create("figure_trex_tsne")
 59 | png(
 60 |   paste(
 61 |     "./figure_trex_tsne/",
 62 |     strftime(Sys.time(), "%Y-%m-%d_%H%M%S"),
 63 |     " tSNE TREX plot.png",
 64 |     sep = ""
 65 |   ),
 66 |   res = 200,
 67 |   width = 1500,
 68 |   height = 1500
 69 | )
 70 | final.trex.plot.tsne <-
 71 |   ggplot(ordered.plot.tsne) + geom_point(aes(x = x, y = y, colour = cuts), cex = 1) +
 72 |   scale_color_manual(
 73 |     name = "Ratio",
 74 |     values = c(
 75 |       "[15,85)" = "lightgray",
 76 |       "[5,15)" = "lightskyblue",
 77 |       "[0,5)" = "navyblue",
 78 |       "[85,95)" = "lightcoral",
 79 |       "[95,100]" = "darkred"
 80 |     )
 81 |   ) +
 82 |   theme_bw() + theme(panel.grid.major = element_blank(),
 83 |                      panel.grid.minor = element_blank()) +
 84 |   labs (x = "tSNE x", y = "tSNE y", title = paste( trex_comparison," - Percent Change",sep = "")) + 
 85 |   coord_fixed(ratio = graphical.ratio)+
 86 |   guides(color = guide_legend(override.aes = list(size=3)))
 87 | print(final.trex.plot.tsne)
 88 | dev.off()
 89 | 
 90 | print(final.trex.plot.tsne)
 91 | 
 92 | 
 93 | ## Using UMAP as the plot--------------------------
 94 | # KNN search per cell 
 95 | umap.neighbor.index <- knnx.index(trex.umap.data,trex.umap.data,k=kvalue)
 96 | first.condition.length <- nrow(subset(dmrd.data, dmrd.event.condition == trex.condition[1]))
 97 | umap.neighbor.index[umap.neighbor.index <= first.condition.length] <- 0
 98 | umap.neighbor.index[umap.neighbor.index > first.condition.length] <- 1
 99 | 
100 | # calculate percent change in each KNN region
101 | percent.change.umap <- (rowSums(umap.neighbor.index) / kvalue * 100)
102 | 
103 | # binning and plot info
104 | range <- apply(apply( trex.umap.data, 2, range), 2, diff)
105 | graphical.ratio <- (range[1] / range[2])
106 | test.round <- round(percent.change.umap)
107 | trex.plot.umap <-
108 |   data.frame(x = trex.umap.data[, 1], y = trex.umap.data[, 2], col = test.round)
109 | trex.plot.umap$cuts <- cut(trex.plot.umap$col, c(0, 5, 15, 85, 95, 100), include.lowest = TRUE, right = FALSE)
110 | trex.plot.umap$cuts <- factor(trex.plot.umap$cuts,
111 |                               levels = c("[15,85)", "[5,15)", "[0,5)", "[85,95)", "[95,100]"))
112 | ordered.plot.umap <- trex.plot.umap[order(trex.plot.umap$cuts), ]
113 | range <- apply(apply(trex.umap.data, 2, range), 2, diff)
114 | graphical.ratio <- (range[1] / range[2])
115 | 
116 | # create T-REX plot
117 | 
118 | trex_comparison <- paste(trex.condition[1], " vs ", trex.condition[2], sep = "") 
119 | dir.create("figure_trex_umap")
120 | png(
121 |   paste(
122 |     "./figure_trex_umap/",
123 |     strftime(Sys.time(), "%Y-%m-%d_%H%M%S"),
124 |     " UMAP TREX plot.png",
125 |     sep = ""
126 |   ),
127 |   res = 200,
128 |   width = 1500,
129 |   height = 1500
130 | )
131 | final.trex.plot.umap <-
132 |   ggplot(ordered.plot.umap) + geom_point(aes(x = x, y = y, colour = cuts), cex = 1) +
133 |   scale_color_manual(
134 |     name = "Ratio",
135 |     values = c(
136 |       "[15,85)" = "lightgray",
137 |       "[5,15)" = "lightskyblue",
138 |       "[0,5)" = "navyblue",
139 |       "[85,95)" = "lightcoral",
140 |       "[95,100]" = "darkred"
141 |     )
142 |   ) +
143 |   theme_bw() + theme(panel.grid.major = element_blank(),
144 |                      panel.grid.minor = element_blank()) +
145 |   labs (x = "UMAP1", y = "UMAP2", title = paste( trex_comparison," - Percent Change",sep = "")) + 
146 |   coord_fixed(ratio = graphical.ratio)+
147 |   guides(color = guide_legend(override.aes = list(size=3)))
148 | print(final.trex.plot.umap)
149 | dev.off()
150 | 
151 | print(final.trex.plot.umap)
152 | 
153 | 
154 | 
155 | ## Using EmbedSOM as the plot--------------------------
156 | # KNN search per cell 
157 | som.neighbor.index <- knnx.index(trex.embed.som,trex.embed.som,k=kvalue)
158 | first.condition.length <- nrow(subset(flow.data, flow.event.condition == trex.condition[1]))
159 | som.neighbor.index[som.neighbor.index <= first.condition.length] <- 0
160 | som.neighbor.index[som.neighbor.index > first.condition.length] <- 1
161 | 
162 | # calculate percent change in each KNN region
163 | percent.change.som <- (rowSums(som.neighbor.index) / kvalue * 100)
164 | 
165 | # binning and plot info
166 | range <- apply(apply( trex.embed.som, 2, range), 2, diff)
167 | graphical.ratio <- (range[1] / range[2])
168 | test.round <- round(percent.change.som)
169 | trex.plot.som <-
170 |   data.frame(x = trex.embed.som[, 1], y = trex.embed.som[, 2], col = test.round)
171 | trex.plot.som$cuts <- cut(trex.plot.som$col, c(0, 5, 15, 85, 95, 100), include.lowest = TRUE, right = FALSE)
172 | trex.plot.som$cuts <- factor(trex.plot.som$cuts,
173 |                               levels = c("[15,85)", "[5,15)", "[0,5)", "[85,95)", "[95,100]"))
174 | ordered.plot.som <- trex.plot.som[order(trex.plot.som$cuts), ]
175 | range <- apply(apply(trex.embed.som, 2, range), 2, diff)
176 | graphical.ratio <- (range[1] / range[2])
177 | 
178 | # create T-REX plot
179 | 
180 | trex_comparison <- paste(trex.condition[1], " vs ", trex.condition[2], sep = "") 
181 | dir.create("figure_trex_embedsom")
182 | png(
183 |   paste(
184 |     "./figure_trex_embedsom/",
185 |     strftime(Sys.time(), "%Y-%m-%d_%H%M%S"),
186 |     " EmbedSOM TREX plot.png",
187 |     sep = ""
188 |   ),
189 |   res = 200,
190 |   width = 1500,
191 |   height = 1500
192 | )
193 | final.trex.plot.som <-
194 |   ggplot(ordered.plot.som) + geom_point(aes(x = x, y = y, colour = cuts), cex = 1) +
195 |   scale_color_manual(
196 |     name = "Ratio",
197 |     values = c(
198 |       "[15,85)" = "lightgray",
199 |       "[5,15)" = "lightskyblue",
200 |       "[0,5)" = "navyblue",
201 |       "[85,95)" = "lightcoral",
202 |       "[95,100]" = "darkred"
203 |     )
204 |   ) +
205 |   theme_bw() + theme(panel.grid.major = element_blank(),
206 |                      panel.grid.minor = element_blank()) +
207 |   labs (x = "EmbedSOM1", y = "EmbedSOM2", title = paste( trex_comparison," - Percent Change",sep = "")) + 
208 |   coord_fixed(ratio = graphical.ratio)+
209 |   guides(color = guide_legend(override.aes = list(size=3)))
210 | print(final.trex.plot.som)
211 | dev.off()
212 | 
213 | print(final.trex.plot.som)


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Cross-Entropy-test
 2 | Unified script for applying the t-SNE diff method on flow cytometry or single cell RNA-seq data
 3 | 
 4 | Code is available and free for academic users. Commercial users should contact Adrian Liston to discuss licensing options.
 5 | 
 6 | For flow cytometry analysis, we recommend using the CSV-based method. This allows users to set the data scales (transformations) 
 7 | in FlowJo or any other standard flow cytometry data analysis software. Scaling of the data is critical for optimal visualization and clustering. 
 8 | To understand how to set scales in FlowJo, see the instruction file "Setting axis in FlowJo for Aurora data.pptx". Use the channel values format.
 9 | To export your data in CSV format, preserving the transformations from FlowJo, see the instructions in "Exporting data in csv format.PNG". For more 
10 | detail, see https://docs.flowjo.com/flowjo/graphs-and-gating/gw-transform-overview/
11 | 
12 | The CSV-based version incorporates EmbedSOM for fast parallelized SOM clustering and visualization. There is also an option to use the Irish lab's 
13 | T-REX method as an add-on for visualizing and identifying changed regions in the tSNE, UMAP or EmbedSOM plots.
14 | https://elifesciences.org/articles/64653
15 | 
16 | Alternatively, you may use the legacy version "flow analysis", which starts with FCS files.
17 | 
18 | For all flow cytometry analysis, we recommend pre-gating and exporting the populations of cells you are interested in. While exporting, 
19 | adding group or variable tags to the file names will help you sort the files with the script. For a tutorial on using the script, see the lab's website:
20 | http://www.listonlab.uk/cross-entropy-test/
21 | 
22 | Publication here:
23 | https://www.cell.com/cell-reports-methods/pdfExtended/S2667-2375(22)00295-8
24 | 
25 | To install all the packages and dependencies for running the script, run flowcytoscript_setup.R
26 | 
27 | The parameter file (analyze_flow_cytometry_parameter_csv.R) contains all the variables you may wish to modify, 
28 | so no changes should be necessary in the main script (analyze_flow_cytometry_csv.R) or source (00_src) files.
29 | 


--------------------------------------------------------------------------------
/flow analysis/analyze_flow_cytometry.r:
--------------------------------------------------------------------------------
  1 | 
  2 | # script for the analysis of flow cytometry data
  3 | 
  4 | 
  5 | library( ConsensusClusterPlus )
  6 | library( digest )
  7 | require( dunn.test )
  8 | library( flowCore )
  9 | library( FlowSOM )
 10 | library( ggplot2 )
 11 | library( ggridges )
 12 | require( RANN )
 13 | library( RColorBrewer )
 14 | library( reshape2 )
 15 | library( Rtsne )
 16 | library( umap )
 17 | 
 18 | 
 19 | # function to set random seed depending on base number and string
 20 | 
 21 | set.seed.here <- function( seed.base, seed.char )
 22 | {
 23 |     seed.add <- strtoi( substr( digest( seed.char, "xxhash32" ), 2, 8 ), 16 )
 24 |     seed.new <- seed.base + seed.add    
 25 |     set.seed( seed.new )
 26 |     invisible( seed.new )
 27 | }
 28 | 
 29 | 
 30 | # source parameters
 31 | 
 32 | param.filename <- "./analyze_flow_cytometry_parameter.r"
 33 | 
 34 | source( param.filename )
 35 | 
 36 | 
 37 | # check consistency in parameters
 38 | 
 39 | stopifnot( names( fcs.channel.label ) == fcs.channel )
 40 | stopifnot( names( fcs.channel.asinh.scale ) == fcs.channel )
 41 | 
 42 | stopifnot( names( fcs.condition.label ) == fcs.condition )
 43 | stopifnot( names( fcs.condition.color ) == fcs.condition )
 44 | stopifnot( names( fcs.condition.line.type ) == fcs.condition )
 45 | 
 46 | stopifnot( names( fcs.cluster.label ) == fcs.cluster )
 47 | stopifnot( names( fcs.cluster.color ) == fcs.cluster )
 48 | stopifnot( names( fcs.cluster.line.type ) == fcs.cluster )
 49 | 
 50 | stopifnot( sum( is.null( fcs.dmrd.data.sample.n ), 
 51 |     is.null( fcs.dmrd.data.sample.n.per.condition ), 
 52 |     is.null( fcs.dmrd.data.sample.n.per.sample ) ) >= 2 )
 53 | 
 54 | stopifnot( unlist( fcs.cluster.group ) >= 1 & 
 55 |     unlist( fcs.cluster.group ) <= fcs.cluster.n )
 56 | 
 57 | 
 58 | # source functions
 59 | 
 60 | source( file.path( fcs.src.dir, "ce_diff_test.r" ) )
 61 | source( file.path( fcs.src.dir, "ce_diff_test_tsne.r" ) )
 62 | source( file.path( fcs.src.dir, "ce_diff_test_umap.r" ) )
 63 | source( file.path( fcs.src.dir, "plot_all_dmrd_figures.r" ) )
 64 | source( file.path( fcs.src.dir, "plot_dimensionality_reduction.r" ) )
 65 | 
 66 | 
 67 | # create dirs
 68 | 
 69 | figure.dir <- c( 
 70 |     fcs.ce.diff.tsne.figure.dir, 
 71 |     fcs.ce.diff.umap.figure.dir, 
 72 |     fcs.density.figure.dir, 
 73 |     fcs.heatmap.figure.dir, 
 74 |     fcs.histogram.figure.dir, 
 75 |     fcs.tsne.figure.dir, 
 76 |     fcs.umap.figure.dir 
 77 | )
 78 | 
 79 | table.dir <- fcs.cluster.table.dir
 80 | 
 81 | data.dir <- sapply( names( fcs.cluster.group ), function( fcg.name ) 
 82 |     sprintf( "%s/%s_%s", fcs.cluster.data.dir, fcs.cluster.data, fcg.name ) )
 83 | 
 84 | for ( the.dir in c( figure.dir, table.dir, data.dir ) )
 85 |     if ( ! file.exists( the.dir ) )
 86 |         dir.create( the.dir, recursive = TRUE )
 87 | 
 88 | 
 89 | # read fcs data
 90 | 
 91 | flow.data.filename.all <- list.files( fcs.data.dir, "\\.fcs$" )
 92 | 
 93 | flow.data.filename <- grep( paste0( fcs.condition, collapse = "|" ), 
 94 |     flow.data.filename.all, value = TRUE )
 95 | 
 96 | sample.name.format <- paste0( "%s.%0", fcs.sample.number.width, "d" )
 97 | event.name.format <- paste0( "%s.%0", fcs.event.number.width, "d" )
 98 | 
 99 | flow.data.filename.sample <- rep( "", length( flow.data.filename ) )
100 | names( flow.data.filename.sample ) <- flow.data.filename
101 | 
102 | sample.idx.next <- rep( 1, fcs.condition.n )
103 | names( sample.idx.next ) <- fcs.condition
104 | 
105 | flow.data <- lapply( flow.data.filename, function( flow.data.fn ) {
106 |     #   cat( flow.data.fn, "\n" )
107 |         sample.flow.frame <- read.FCS( file.path( fcs.data.dir, flow.data.fn ), 
108 |         transformation = NULL, truncate_max_range = FALSE )
109 |     
110 |     condition <- fcs.condition[ sapply( fcs.condition, grepl, flow.data.fn ) ]
111 |     stopifnot( length( condition ) == 1 )
112 |     
113 |     sample.data <- exprs( sample.flow.frame )
114 |     
115 |     if ( ! all( fcs.channel %in% colnames( sample.data ) ) )
116 |     {
117 |         cat( sprintf( "File: %s\n", flow.data.fn ) )
118 |         print( sort( fcs.channel[ 
119 |             ! fcs.channel %in% colnames( sample.data ) ] ) )
120 |         print( sort( colnames( sample.data ) ) )
121 |         stop( "mismatch in names of fcs channels" )
122 |     }
123 | 
124 |     sample.name <- sprintf( sample.name.format, condition, 
125 |         sample.idx.next[ condition ] )
126 |     
127 |     sample.data <- sample.data[ , fcs.channel, drop = FALSE ]
128 |     
129 |     event.n <- nrow( sample.data )
130 |     if ( event.n > 0 ) {
131 |         event.name <- sprintf( event.name.format, sample.name, 1 : event.n )
132 |         rownames( sample.data ) <- event.name
133 |     }
134 |     
135 |     flow.data.filename.sample[ flow.data.fn ] <<- sample.name
136 |     sample.idx.next[ condition ] <<- sample.idx.next[ condition ] + 1
137 |     
138 |     sample.data
139 | } )
140 | 
141 | flow.data <- do.call( rbind, flow.data )
142 | 
143 | # define samples
144 | flow.sample <- flow.data.filename.sample
145 | names( flow.sample ) <- NULL
146 | 
147 | stopifnot( flow.sample == 
148 |     unique( sub( "\\.[0-9]+$", "", rownames( flow.data ) ) ) )
149 | 
150 | flow.sample.n <- length ( flow.sample )
151 | 
152 | flow.sample.condition <- factor( sub( "\\.[0-9]+$", "", flow.sample ), 
153 |     levels = fcs.condition )
154 | names( flow.sample.condition ) <- flow.sample
155 | 
156 | # reorder samples to follow order of conditions
157 | flow.sample <- flow.sample[ order( flow.sample.condition ) ]
158 | flow.sample.condition <- flow.sample.condition[ flow.sample ]
159 | 
160 | flow.sample.label <- sapply( flow.sample, function( fs ) {
161 |     sample.cond <- sub( "^(.*)\\.[0-9]+$", "\\1", fs )
162 |     sample.num <- sub( "^.*\\.([0-9]+)$", "\\1", fs )
163 |     sprintf( "%s-%s", fcs.condition.label[ sample.cond ], sample.num )
164 | } )
165 | 
166 | flow.sample.filename <- sapply( flow.sample, function( fs  ) 
167 |     names( which( flow.data.filename.sample == fs ) ) )
168 | 
169 | # define events
170 | flow.event <- rownames( flow.data )
171 | flow.event.n <- length( flow.event )
172 | 
173 | flow.event.sample <- factor( sub( "\\.[0-9]+$", "", flow.event ), 
174 |     levels = flow.sample )
175 | names( flow.event.sample ) <- flow.event
176 | 
177 | flow.event.condition <- factor( sub( "\\.[0-9]+$", "", flow.event.sample ), 
178 |     levels = fcs.condition )
179 | names( flow.event.condition ) <- flow.event
180 | 
181 | # reorder events to follow order of samples
182 | flow.event.order <- order( flow.event.sample )
183 | 
184 | flow.data <- flow.data[ flow.event.order, ]
185 | flow.event <- flow.event[ flow.event.order ]
186 | flow.event.sample <- flow.event.sample[ flow.event.order ]
187 | flow.event.condition <- flow.event.condition[ flow.event.order ]
188 | 
189 | flow.event.sample.n <- as.vector( table( flow.event.sample ) )
190 | names( flow.event.sample.n ) <- flow.sample
191 | 
192 | flow.event.condition.n <- as.vector( table( flow.event.condition ) )
193 | names( flow.event.condition.n ) <- fcs.condition
194 | 
195 | flow.data.filename
196 | 
197 | table( flow.sample.condition )
198 | 
199 | str( flow.data )
200 | flow.event.condition.n
201 | flow.event.sample.n
202 | 
203 | 
204 | # define figure parameters for samples
205 | 
206 | flow.sample.color <- fcs.condition.color[ flow.sample.condition ]
207 | names( flow.sample.color ) <- flow.sample
208 | 
209 | flow.sample.color.single <- unlist( lapply( fcs.condition, function( fc ) {
210 |     cond.sample.n <- sum( flow.sample.condition == fc )
211 |     rep( 
212 |         fcs.color.pool, 
213 |         ceiling( cond.sample.n / fcs.color.pool.n ) 
214 |     )[ 1 : cond.sample.n ]
215 | } ) )
216 | names( flow.sample.color.single ) <- flow.sample
217 | 
218 | flow.sample.line.type <- fcs.condition.line.type[ flow.sample.condition ]
219 | names( flow.sample.line.type ) <- flow.sample
220 | 
221 | flow.sample.line.type.single <- unlist( lapply( fcs.condition, function( fc ) {
222 |     cond.sample.n <- sum( flow.sample.condition == fc )
223 |     rep( 
224 |         fcs.line.type.pool, 
225 |         ceiling( cond.sample.n / fcs.line.type.pool.n ) 
226 |     )[ 1 : cond.sample.n ]
227 | } ) )
228 | names( flow.sample.line.type.single ) <- flow.sample
229 | 
230 | flow.ce.diff.figure.dendrogram.weight.sample <- 
231 |     fcs.ce.diff.figure.dendrogram.weight.condition[ flow.sample.condition ]
232 | names( flow.ce.diff.figure.dendrogram.weight.sample ) <- flow.sample
233 | 
234 | 
235 | # transform data
236 | 
237 | for ( fch in fcs.channel )
238 |     flow.data[ , fch ] <- asinh( flow.data[ , fch ] / 
239 |         fcs.channel.asinh.scale[ fch ] )
240 | 
241 | 
242 | # plot density distributions of transformed data
243 | 
244 | set.seed.here( fcs.seed.base, "plot density distributions of transformed data" )
245 | 
246 | {
247 | if ( ! is.null( fcs.density.data.sample.n ) && 
248 |     fcs.density.data.sample.n < flow.event.n )
249 |     density.data.idx <- sort( sample( flow.event.n, 
250 |         fcs.density.data.sample.n ) )
251 | else
252 |     density.data.idx <- 1 : flow.event.n
253 | }
254 | 
255 | density.data <- flow.data[ density.data.idx, ]
256 | 
257 | density.data.ggdf.all <- melt( density.data, 
258 |     varnames = c( "partition", "channel" ), value.name = "density.value" )
259 | density.data.ggdf.all$partition <- fcs.density.partition.all
260 | density.data.ggdf.all$channel <- as.character( density.data.ggdf.all$channel )
261 | 
262 | density.data.ggdf.condition <- lapply( fcs.condition, function( fc ) {
263 |     ggdf.condition <- melt( 
264 |         density.data[ flow.event.condition[ density.data.idx ] == fc, , 
265 |             drop = FALSE], 
266 |         varnames = c( "partition", "channel" ), 
267 |         value.name = "density.value" )
268 |     if ( nrow( ggdf.condition ) > 0 ) {
269 |         ggdf.condition$partition <- fc
270 |         ggdf.condition$channel <- as.character( ggdf.condition$channel )
271 |     }
272 |     ggdf.condition
273 | } )
274 | density.data.ggdf.condition <- do.call( rbind, density.data.ggdf.condition )
275 | 
276 | density.data.ggdf.sample <- lapply( flow.sample, function( fs ) {
277 |     ggdf.sample <- melt( 
278 |         density.data[ flow.event.sample[ density.data.idx ] == fs, , 
279 |             drop = FALSE ], 
280 |         varnames = c( "partition", "channel" ), 
281 |         value.name = "density.value" )
282 |     if ( nrow( ggdf.sample ) > 0 ) {
283 |         ggdf.sample$partition <- fs
284 |         ggdf.sample$channel <- as.character( ggdf.sample$channel )
285 |     }
286 |     ggdf.sample
287 | } )
288 | density.data.ggdf.sample <- do.call( rbind, density.data.ggdf.sample )
289 | 
290 | density.data.ggdf <- rbind( density.data.ggdf.all, density.data.ggdf.condition, 
291 |     density.data.ggdf.sample )
292 | 
293 | density.data.partition <- c( fcs.density.partition.all, fcs.condition, 
294 |     flow.sample )
295 | density.data.partition.n <- length( density.data.partition )
296 | 
297 | density.data.partition.label <- c( fcs.density.partition.all.label, 
298 |     fcs.condition.label, flow.sample.label )
299 | 
300 | density.data.ggdf$partition <- factor( density.data.ggdf$partition, 
301 |     levels = density.data.partition )
302 | 
303 | density.data.ggdf$channel <- factor( density.data.ggdf$channel, 
304 |     levels = fcs.channel )
305 | 
306 | density.plot.color <- c( fcs.density.partition.all.color, fcs.condition.color, 
307 |     flow.sample.color )
308 | 
309 | density.plot <- ggplot( density.data.ggdf, aes( x = density.value, 
310 |         y = partition, color = partition, fill = partition ) ) + 
311 |     geom_density_ridges( size = fcs.density.line.size, 
312 |         alpha = fcs.density.line.alpha, show.legend = FALSE ) + 
313 |     labs( x = NULL, y = NULL ) + 
314 |     scale_y_discrete( limits = rev( density.data.partition ), 
315 |         breaks = rev( density.data.partition ), 
316 |         labels = rev( density.data.partition.label ) ) + 
317 |     scale_color_manual( values = density.plot.color ) + 
318 |     scale_fill_manual( values = density.plot.color ) + 
319 |     facet_wrap( vars( channel ), nrow = 1, labeller = labeller( 
320 |         channel = fcs.channel.label ) ) + 
321 |     theme_ridges( line_size = fcs.density.line.size, 
322 |         font_size = fcs.density.font.size ) + 
323 |     theme( strip.background = element_rect( fill = "white" ) )
324 | 
325 | ggsave( 
326 |     file.path( fcs.density.figure.dir, 
327 |         sprintf( "%s.png", fcs.density.figure.sample ) ), 
328 |     density.plot, 
329 |     width = fcs.density.figure.width.base * ( fcs.channel.n + 1 ), 
330 |     height = fcs.density.figure.height.base * ( density.data.partition.n + 1 ) 
331 | )
332 | 
333 | 
334 | # select data for dimensionality reduction
335 | 
336 | set.seed.here( fcs.seed.base, "select data for dimensionality reduction" )
337 | 
338 | {
339 | if ( ! is.null( fcs.dmrd.data.sample.n ) )
340 | {
341 |     if ( fcs.dmrd.data.sample.n < flow.event.n )
342 |         dmrd.data.idx <- sort( sample( flow.event.n, fcs.dmrd.data.sample.n ) )
343 |     else
344 |         dmrd.data.idx <- 1 : flow.event.n
345 | }
346 | else if ( ! is.null( fcs.dmrd.data.sample.n.per.condition ) )
347 | {
348 |     dmrd.data.idx <- unlist( sapply( fcs.condition, function( fc ) {
349 |         fc.idx <- which( flow.event.condition == fc )
350 |         if ( fcs.dmrd.data.sample.n.per.condition < length( fc.idx ) )
351 |             sort( sample( fc.idx, fcs.dmrd.data.sample.n.per.condition ) )
352 |         else
353 |             fc.idx
354 |     } ) )
355 |     names( dmrd.data.idx ) <- NULL
356 | }
357 | else if ( ! is.null( fcs.dmrd.data.sample.n.per.sample ) )
358 | {
359 |     dmrd.data.idx <- unlist( sapply( flow.sample, function( fs ) {
360 |         fs.idx <- which( flow.event.sample == fs )
361 |         if ( fcs.dmrd.data.sample.n.per.sample < length( fs.idx ) )
362 |             sort( sample( fs.idx, fcs.dmrd.data.sample.n.per.sample ) )
363 |         else
364 |             fs.idx
365 |     } ) )
366 |     names( dmrd.data.idx ) <- NULL
367 | }
368 | else
369 |     dmrd.data.idx <- 1 : flow.event.n
370 | }
371 | 
372 | dmrd.data <- flow.data[ dmrd.data.idx, ]
373 | 
374 | dmrd.event.sample <- flow.event.sample[ dmrd.data.idx ]
375 | dmrd.event.condition <- flow.event.condition[ dmrd.data.idx ]
376 | 
377 | str( dmrd.data )
378 | table( dmrd.event.condition )
379 | table( dmrd.event.sample )
380 | 
381 | 
382 | # get flowsom clusters
383 | 
384 | set.seed.here( fcs.seed.base, "get flowsom clusters" )
385 | 
386 | # read fcs data as a flowSet
387 | flow.set <- read.flowSet( flow.data.filename, fcs.data.dir, 
388 |     column.pattern = paste0( fcs.channel, collapse = "|" ), 
389 |     transformation = NULL, truncate_max_range = FALSE )
390 | 
391 | stopifnot( sort( fcs.channel ) == sort( colnames( flow.set ) ) )
392 | 
393 | flow.set <- flow.set[ , fcs.channel ]
394 | 
395 | # transform data
396 | for ( idx in 1 : length( flow.set ) )
397 |     for ( fch in fcs.channel )
398 |         exprs( flow.set[[ idx ]] )[ , fch ] <- 
399 |             asinh( exprs( flow.set[[ idx ]] )[ , fch ] / 
400 |                 fcs.channel.asinh.scale[ fch ] )
401 | 
402 | # build som objects
403 | flow.som <- ReadInput( flow.set, transform = FALSE, scale = FALSE )
404 | flow.som <- BuildSOM( flow.som, xdim = fcs.flow.som.dim, 
405 |     ydim = fcs.flow.som.dim )
406 | flow.som.mapping <- flow.som$map$mapping[ , 1 ]
407 | flow.som.codes <- flow.som$map$codes
408 | 
409 | # get clusters from som mapping
410 | consensus.cluster <- ConsensusClusterPlus( t( flow.som.codes ),
411 |     maxK = fcs.cluster.n, reps = 100, pItem = 0.9, pFeature = 1,
412 |     clusterAlg = "hc", innerLinkage = "average", finalLinkage = "average",
413 |     distance = "euclidean", 
414 |     seed = set.seed.here( fcs.seed.base, "get clusters from som mapping" ) )
415 | 
416 | flow.som.event.cluster <- consensus.cluster[[ fcs.cluster.n ]]$
417 |     consensusClass[ flow.som.mapping ]
418 | 
419 | # reorder clusters from bigger to smaller
420 | flow.som.cluster.rank <- 1 + fcs.cluster.n - 
421 |     rank( table( flow.som.event.cluster ), ties.method = "last" )
422 | flow.som.event.cluster <- flow.som.cluster.rank[ flow.som.event.cluster ]
423 | names( flow.som.event.cluster ) <- NULL
424 | 
425 | # set clusters as a factor
426 | flow.som.event.cluster <- factor( flow.som.event.cluster, 
427 |     levels = 1 : fcs.cluster.n )
428 | levels( flow.som.event.cluster ) <- fcs.cluster
429 | 
430 | # reorder events
431 | flow.event.cluster <- flow.som.event.cluster[ flow.event.order ]
432 | dmrd.event.cluster <- flow.event.cluster[ dmrd.data.idx ]
433 | 
434 | flow.event.cluster.n <- as.vector( table( flow.event.cluster ) )
435 | names( flow.event.cluster.n ) <- fcs.cluster
436 | 
437 | dmrd.event.cluster.n <- as.vector( table( dmrd.event.cluster ) )
438 | names( dmrd.event.cluster.n ) <- fcs.cluster
439 | 
440 | length( flow.event.cluster )
441 | table( flow.event.cluster )
442 | 
443 | length( dmrd.event.cluster )
444 | table( dmrd.event.cluster )
445 | 
446 | 
447 | # save cluster counts
448 | 
449 | flow.cluster.count <- sapply( fcs.cluster, function( fc )
450 |     table( flow.event.sample[ flow.event.cluster == fc ] ) )
451 | 
452 | stopifnot( rownames( flow.cluster.count ) == flow.sample )
453 | stopifnot( colnames( flow.cluster.count ) == fcs.cluster )
454 | 
455 | rownames( flow.cluster.count ) <- flow.sample.label
456 | colnames( flow.cluster.count ) <- fcs.cluster.label
457 | 
458 | write.csv( flow.cluster.count, file = file.path( fcs.cluster.table.dir, 
459 |     sprintf( "%s.csv", fcs.cluster.table.counts ) ) )
460 | 
461 | 
462 | # plot density distributions by cluster
463 | 
464 | density.data.ggdf.cluster <- lapply( fcs.cluster, function( fc ) { 
465 |     ggdf.cluster <- melt( 
466 |         density.data[ flow.event.cluster[ density.data.idx ] == fc, , 
467 |             drop = FALSE ], 
468 |         varnames = c( "partition", "channel" ), 
469 |         value.name = "density.value" 
470 |     )
471 |     if ( nrow( ggdf.cluster ) > 0 ) {
472 |         ggdf.cluster$partition <- fc
473 |         ggdf.cluster$channel <- as.character( ggdf.cluster$channel )
474 |     }
475 |     ggdf.cluster
476 | } )
477 | density.data.ggdf.cluster <- do.call( rbind, density.data.ggdf.cluster )
478 | 
479 | density.data.ggdf <- rbind( density.data.ggdf.all, density.data.ggdf.cluster )
480 | 
481 | density.data.partition <- c( fcs.density.partition.all, fcs.cluster )
482 | density.data.partition.n <- length( density.data.partition )
483 | 
484 | density.data.partition.label <- c( fcs.density.partition.all.label, 
485 |     fcs.cluster.label )
486 | 
487 | density.data.ggdf$partition <- factor( density.data.ggdf$partition, 
488 |     levels = density.data.partition )
489 | 
490 | density.data.ggdf$channel <- factor( density.data.ggdf$channel, 
491 |     levels = fcs.channel )
492 | 
493 | density.plot.color <- c( fcs.density.partition.all.color, fcs.cluster.color )
494 | 
495 | density.plot <- ggplot( density.data.ggdf, aes( x = density.value, 
496 |         y = partition, color = partition, fill = partition ) ) + 
497 |     geom_density_ridges( size = fcs.density.line.size, 
498 |         alpha = fcs.density.line.alpha, show.legend = FALSE ) + 
499 |     labs( x = NULL, y = NULL ) + 
500 |     scale_y_discrete( limits = rev( density.data.partition ), 
501 |         breaks = rev( density.data.partition ), 
502 |         labels = rev( density.data.partition.label ) ) + 
503 |     scale_color_manual( values = density.plot.color ) + 
504 |     scale_fill_manual( values = density.plot.color ) + 
505 |     facet_wrap( vars( channel ), nrow = 1, labeller = labeller( 
506 |         channel = fcs.channel.label ) ) + 
507 |     theme_ridges( line_size = fcs.density.line.size, 
508 |         font_size = fcs.density.font.size ) + 
509 |     theme( strip.background = element_rect( fill = "white" ) )
510 | 
511 | ggsave( 
512 |     file.path( fcs.density.figure.dir, 
513 |         sprintf( "%s.png", fcs.density.figure.cluster ) ), 
514 |     density.plot, 
515 |     width = fcs.density.figure.width.base * ( fcs.channel.n + 1 ), 
516 |     height = fcs.density.figure.height.base * ( density.data.partition.n + 1 ) 
517 | )
518 | 
519 | 
520 | # save cluster data
521 | 
522 | for ( fs in flow.sample )
523 | {
524 |     sample.filename <- flow.sample.filename[ fs ]
525 |     sample.flow.frame <- read.FCS( file.path( fcs.data.dir, sample.filename ), 
526 |         transformation = NULL, truncate_max_range = FALSE )
527 |     sample.event.idx <- which( flow.event.sample == fs )
528 |     
529 |     for ( fcg.name in names( fcs.cluster.group ) )
530 |     {
531 |         cluster.group <- fcs.cluster[ fcs.cluster.group[[ fcg.name ]] ]
532 |         cluster.group.event.idx <- which( 
533 |             ( flow.event.cluster %in% cluster.group )[ sample.event.idx ] )
534 |         
535 |         if ( length( cluster.group.event.idx ) > 0 )
536 |         {
537 |             cluster.flow.frame <- sample.flow.frame[ cluster.group.event.idx, ]
538 |             cluster.data.dir <- sprintf( "%s/%s_%s", fcs.cluster.data.dir, 
539 |                 fcs.cluster.data, fcg.name )
540 |             cluster.data.filename <- sprintf( "%s_%s_%s.fcs", fs, 
541 |                 fcs.cluster.data, fcg.name )
542 |             invisible( write.FCS( cluster.flow.frame, 
543 |                 file.path( cluster.data.dir, cluster.data.filename ) ) )
544 |         }
545 |     }
546 | }
547 | 
548 | 
549 | # plot heatmaps
550 | 
551 | heatmap.type <- c( "by_condition", "by_sample", "by_cluster" )
552 | 
553 | for ( ht in heatmap.type )
554 | {
555 |     if ( ht == "by_condition" ) {
556 |         flow.data.group.median <- apply( flow.data, 2, tapply, 
557 |             flow.event.condition, median )
558 |         margin.col <- fcs.heatmap.label.factor.col * 
559 |             max( nchar( fcs.condition.label ) )
560 |         group.label <- fcs.condition.label
561 |         group.color <- fcs.condition.color
562 |     }
563 |     else if ( ht == "by_sample" ) {
564 |         flow.data.group.median <- apply( flow.data, 2, tapply, 
565 |             flow.event.sample, median )
566 |         margin.col <- fcs.heatmap.label.factor.col * 
567 |             max( nchar( flow.sample.label ) )
568 |         group.label <- flow.sample.label
569 |         group.color <- flow.sample.color
570 |     }
571 |     else if ( ht == "by_cluster" ) {
572 |         flow.data.group.median <- apply( flow.data, 2, tapply, 
573 |             flow.event.cluster, median )
574 |         margin.col <- fcs.heatmap.label.factor.col * 
575 |             max( nchar( fcs.cluster.label ) )
576 |         group.label <- fcs.cluster.label
577 |         group.color <- fcs.cluster.color
578 |     }
579 |     else
580 |         stop( "wrong heatmap type" )
581 |     
582 |     margin.row <- 
583 |         fcs.heatmap.label.factor.row * max( nchar( fcs.channel.label ) )
584 |     
585 |     if ( margin.row < 5 )
586 |         margin.row <- 5
587 |     if ( margin.col < 5 )
588 |         margin.col <- 5
589 |     
590 |     png( filename = file.path( fcs.heatmap.figure.dir, 
591 |             sprintf( "%s_%s.png", fcs.heatmap.figure, ht ) ), 
592 |         width = fcs.heatmap.width, height = fcs.heatmap.height )
593 |     heatmap( t( flow.data.group.median ), scale = "row", 
594 |         labRow = fcs.channel.label,  labCol = group.label, 
595 |         col = fcs.heatmap.palette, ColSideColors = group.color, 
596 |         cexRow = fcs.heatmap.font.size, cexCol = fcs.heatmap.font.size, 
597 |         margins = c( margin.col, margin.row ) )
598 |     dev.off()
599 | }
600 | 
601 | 
602 | # plot histograms
603 | 
604 | histogram.type <- c( "by_cluster", "by_sample", "by_assay" )
605 | 
606 | condition.sample.n <- as.vector( table( flow.sample.condition ) )
607 | 
608 | for ( ht in histogram.type )
609 | {
610 |     flow.data.cluster.fraction <- lapply( fcs.cluster, function( fc ) {
611 |         sample.event.n <- as.vector( table( 
612 |             flow.event.sample[ flow.event.cluster == fc ] ) )
613 |         
614 |         if ( ht == "by_cluster" )
615 |         {
616 |             sample.fraction <- log2( 
617 |                 ( sample.event.n / flow.event.cluster.n[ fc ] ) / 
618 |                 ( flow.event.sample.n / flow.event.n ) 
619 |             )
620 |             sample.fraction[ is.infinite( sample.fraction ) ] <- NA
621 |             fraction <- tapply( sample.fraction, flow.sample.condition, mean, 
622 |                 na.rm = TRUE )
623 |             std.err <- tapply( sample.fraction, flow.sample.condition, sd, 
624 |                 na.rm = TRUE ) / sqrt( condition.sample.n )
625 |         }
626 |         else if ( ht == "by_sample" )
627 |         {
628 |             sample.fraction <- 100 * sample.event.n / flow.event.sample.n
629 |             fraction <- tapply( sample.fraction, flow.sample.condition, mean, 
630 |                 na.rm = TRUE )
631 |             std.err <- tapply( sample.fraction, flow.sample.condition, sd, 
632 |                 na.rm = TRUE ) / sqrt( condition.sample.n )
633 |         }
634 |         else if ( ht == "by_assay" )
635 |         {
636 |             sample.fraction <- 100 * sample.event.n / flow.event.n
637 |             fraction <- tapply( sample.fraction, flow.sample.condition, sum, 
638 |                 na.rm = TRUE )
639 |             std.err <- NA
640 |         }
641 |         else
642 |             stop( "wrong histogram type" )
643 |         
644 |         data.frame( cluster = fc, condition = fcs.condition, fraction, std.err )
645 |     } )
646 |     
647 |     flow.data.cluster.fraction <- do.call( rbind, flow.data.cluster.fraction )
648 |     flow.data.cluster.fraction$condition <- factor( 
649 |         flow.data.cluster.fraction$condition, levels = fcs.condition )
650 |     
651 |     histogram.plot <- ggplot( flow.data.cluster.fraction, 
652 |         aes( x = cluster, y = fraction, fill = condition ) ) + 
653 |         geom_bar( stat = "identity", position = position_dodge2() ) + 
654 |         geom_errorbar( aes( ymin = fraction - std.err, 
655 |                 ymax = fraction + std.err ), 
656 |             size = fcs.histogram.error.bar.size,
657 |             position = position_dodge2() ) + 
658 |         scale_x_discrete( labels = fcs.cluster.label, name = "" ) + 
659 |         scale_fill_manual( values = fcs.condition.color, 
660 |             limits = fcs.condition, breaks = fcs.condition, 
661 |             labels = fcs.condition.label ) + 
662 |         theme_bw() + 
663 |         theme( 
664 |             axis.text = element_text( size = fcs.histogram.font.size, 
665 |                 angle = 90 ), 
666 |             axis.title = element_text( size = fcs.histogram.font.size + 1 ), 
667 |             legend.title = element_blank(), 
668 |             legend.text = element_text( size = fcs.histogram.font.size + 1 ), 
669 |             legend.key.size = unit( fcs.histogram.legend.key.size, "lines" ), 
670 |             panel.grid.major = element_blank(), 
671 |             panel.grid.minor = element_blank() 
672 |         )
673 |     
674 |     if ( ht == "by_cluster" )
675 |         histogram.plot <- histogram.plot + 
676 |             ylab( "Average log2( frequency ratio )" )
677 |     else if ( ht == "by_sample" )
678 |         histogram.plot <- histogram.plot + ylab( "Average frequency" )
679 |     else if ( ht == "by_assay" )
680 |         histogram.plot <- histogram.plot + ylab( "Frequency" )
681 |     
682 |     ggsave( 
683 |         file.path( fcs.histogram.figure.dir, 
684 |             sprintf( "%s_%s.png", fcs.histogram.figure, ht ) ), 
685 |         histogram.plot, 
686 |         width = fcs.histogram.width, 
687 |         height = fcs.histogram.height + 
688 |             fcs.histogram.label.factor.height * 
689 |                 max( nchar( fcs.cluster.label ) ) 
690 |     )
691 | }
692 | 
693 | 
694 | # calculate tsne representation
695 | 
696 | {
697 |     if ( fcs.use.cached.results && file.exists( fcs.tsne.cache.file.path ) )
698 |     {
699 |         cat( "Using cached results for tsne\n" )
700 |         
701 |         load( fcs.tsne.cache.file.path )
702 |     }
703 |     else
704 |     {
705 |         set.seed.here( fcs.seed.base, "calculate tsne representation" )
706 |         
707 |         cat( "Calculating tsne\n" )
708 |         
709 |         tsne.result <- Rtsne( dmrd.data, perplexity = fcs.tsne.perplexity, 
710 |             theta = fcs.tsne.theta, 
711 |             exaggeration_factor = fcs.tsne.exaggeration.factor, 
712 |             max_iter = fcs.tsne.iter.n, check_duplicates = FALSE, pca = FALSE, 
713 |             num_threads = fcs.tsne.thread.n )
714 |         
715 |         save( tsne.result, file = fcs.tsne.cache.file.path )
716 |     }
717 | }
718 | 
719 | tsne.data <- tsne.result$Y
720 | 
721 | str( tsne.result )
722 | 
723 | 
724 | # plot tsne convergence
725 | 
726 | tsne.iter <- 1 + 50 * ( 1 : length( tsne.result$itercosts ) - 1 )
727 | tsne.cost <- tsne.result$itercosts
728 | 
729 | png( filename = file.path( fcs.tsne.figure.dir, 
730 |         sprintf( "%s.png", fcs.tsne.figure.convergence ) ), 
731 |     width = fcs.tsne.figure.convergence.width, 
732 |     height = fcs.tsne.figure.convergence.height )
733 | par( mar = c( 5, 5.8, 2, 1.4 ) )
734 | plot( tsne.iter, tsne.cost, log = "y", xlab = "Iteration", 
735 |     ylab = "Total cost", xlim = c( 0, fcs.tsne.iter.n ), 
736 |     ylim = c( 1, max( tsne.cost ) ), col = "blue3", 
737 |     pch = 20, cex = 1, cex.lab = 2.5, cex.axis = 2 )
738 | abline( h = tsne.cost[ length( tsne.cost ) ], lty = 2, lwd = 1.5, 
739 |     col = "blue3" )
740 | dev.off()
741 | 
742 | 
743 | # plot tsne representations
744 | 
745 | tsne.data.max <- max( abs( tsne.data ) )
746 | 
747 | plot.all.dmrd.figures( 
748 |     tsne.data, tsne.data.max, 
749 |     fcs.tsne.figure.lims.factor, fcs.tsne.figure.point.size, 
750 |     fcs.tsne.figure.dir, fcs.tsne.figure.plot, 
751 |     dmrd.data, dmrd.event.cluster, dmrd.event.condition 
752 | )
753 | 
754 | 
755 | # calculate umap representation
756 | 
757 | {
758 |     if ( fcs.use.cached.results && file.exists( fcs.umap.cache.file.path ) )
759 |     {
760 |         cat( "Using cached results for umap\n" )
761 |         
762 |         load( fcs.umap.cache.file.path )
763 |     }
764 |     else
765 |     {
766 |         set.seed.here( fcs.seed.base, "calculate umap representation" )
767 |         
768 |         cat( "Calculating umap\n" )
769 |         
770 |         umap.config <- umap.defaults
771 |         umap.config$n_epochs <- fcs.umap.iter.n
772 |         umap.config$verbose <- TRUE
773 |         
774 |         umap.result <- umap( dmrd.data, config = umap.config )
775 |         
776 |         save( umap.result, file = fcs.umap.cache.file.path )
777 |     }
778 | }
779 | 
780 | umap.data <- umap.result$layout
781 | dimnames( umap.data ) <- NULL
782 | 
783 | str( umap.result )
784 | 
785 | 
786 | # plot umap representations
787 | 
788 | umap.data.max <- max( abs( apply( umap.data, 2, function( x ) 
789 |     quantile( x, c( 0.25, 0.75 ) ) + c( -1, 1 ) * IQR( x ) ) ) )
790 | 
791 | plot.all.dmrd.figures( 
792 |     umap.data, umap.data.max, 
793 |     fcs.umap.figure.lims.factor, fcs.umap.figure.point.size, 
794 |     fcs.umap.figure.dir, fcs.umap.figure.plot, 
795 |     dmrd.data, dmrd.event.cluster, dmrd.event.condition 
796 | )
797 | 
798 | 
799 | # calculate cross-entropy test for tsne by condition
800 | 
801 | set.seed.here( fcs.seed.base, "calculate cross-entropy test for tsne" )
802 | 
803 | ce.diff.test.tsne.res <- ce.diff.test.tsne( 
804 |     dmrd.data, tsne.data, 
805 |     dmrd.event.condition, 
806 |     partition.label = fcs.condition.label, 
807 |     partition.color = fcs.condition.color, 
808 |     partition.line.type = fcs.condition.line.type, 
809 |     base.test = fcs.ce.diff.base.test, 
810 |     base.dist = fcs.ce.diff.base.dist, 
811 |     prob.sample.n = fcs.ce.diff.prob.sample.n, 
812 |     dendrogram.order.weight = fcs.ce.diff.figure.dendrogram.weight.condition, 
813 |     result = file.path( fcs.ce.diff.tsne.figure.dir, 
814 |         sprintf( "%s_condition.txt", fcs.ce.diff.tsne.result ) ), 
815 |     cdf.figure = file.path( fcs.ce.diff.tsne.figure.dir, 
816 |         sprintf( "%s_condition.png", fcs.ce.diff.tsne.figure.cdf ) ), 
817 |     dendrogram.figure = file.path( fcs.ce.diff.tsne.figure.dir, 
818 |         sprintf( "%s_condition.png", fcs.ce.diff.tsne.figure.dendrogram ) ) )
819 | 
820 | print( ce.diff.test.tsne.res )
821 | 
822 | 
823 | # calculate cross-entropy test for tsne by sample
824 | 
825 | set.seed.here( fcs.seed.base, "calculate cross-entropy test for tsne" )
826 | 
827 | ce.diff.test.tsne.res <- ce.diff.test.tsne( 
828 |     dmrd.data, tsne.data, 
829 |     dmrd.event.sample, 
830 |     partition.label = flow.sample.label, 
831 |     partition.color = flow.sample.color, 
832 |     partition.line.type = flow.sample.line.type.single, 
833 |     base.test = fcs.ce.diff.base.test, 
834 |     base.dist = fcs.ce.diff.base.dist, 
835 |     prob.sample.n = fcs.ce.diff.prob.sample.n, 
836 |     dendrogram.order.weight = flow.ce.diff.figure.dendrogram.weight.sample, 
837 |     result = file.path( fcs.ce.diff.tsne.figure.dir, 
838 |         sprintf( "%s_sample.txt", fcs.ce.diff.tsne.result ) ), 
839 |     cdf.figure = file.path( fcs.ce.diff.tsne.figure.dir, 
840 |         sprintf( "%s_sample.png", fcs.ce.diff.tsne.figure.cdf ) ), 
841 |     dendrogram.figure = file.path( fcs.ce.diff.tsne.figure.dir, 
842 |         sprintf( "%s_sample.png", fcs.ce.diff.tsne.figure.dendrogram ) ) )
843 | 
844 | print( ce.diff.test.tsne.res )
845 | 
846 | 
847 | # calculate cross-entropy test for tsne by cluster
848 | 
849 | set.seed.here( fcs.seed.base, "calculate cross-entropy test for tsne" )
850 | 
851 | ce.diff.test.tsne.res <- ce.diff.test.tsne( 
852 |     dmrd.data, tsne.data, 
853 |     dmrd.event.cluster, 
854 |     partition.label = fcs.cluster.label, 
855 |     partition.color = fcs.cluster.color, 
856 |     partition.line.type = fcs.cluster.line.type, 
857 |     base.test = fcs.ce.diff.base.test, 
858 |     base.dist = fcs.ce.diff.base.dist, 
859 |     prob.sample.n = fcs.ce.diff.prob.sample.n, 
860 |     dendrogram.order.weight = fcs.ce.diff.figure.dendrogram.weight.cluster, 
861 |     result = file.path( fcs.ce.diff.tsne.figure.dir, 
862 |         sprintf( "%s_cluster.txt", fcs.ce.diff.tsne.result ) ), 
863 |     cdf.figure = file.path( fcs.ce.diff.tsne.figure.dir, 
864 |         sprintf( "%s_cluster.png", fcs.ce.diff.tsne.figure.cdf ) ), 
865 |     dendrogram.figure = file.path( fcs.ce.diff.tsne.figure.dir, 
866 |         sprintf( "%s_cluster.png", fcs.ce.diff.tsne.figure.dendrogram ) ) )
867 | 
868 | print( ce.diff.test.tsne.res )
869 | 
870 | 
871 | # calculate cross-entropy test for umap by condition
872 | 
873 | set.seed.here( fcs.seed.base, "calculate cross-entropy test for umap" )
874 | 
875 | ce.diff.test.umap.res <- ce.diff.test.umap( 
876 |     umap.result$knn$distances, umap.result$knn$indexes, 
877 |     umap.data, umap.result$config, 
878 |     dmrd.event.condition, 
879 |     partition.label = fcs.condition.label, 
880 |     partition.color = fcs.condition.color, 
881 |     partition.line.type = fcs.condition.line.type, 
882 |     base.test = fcs.ce.diff.base.test, 
883 |     base.dist = fcs.ce.diff.base.dist, 
884 |     prob.sample.n = fcs.ce.diff.prob.sample.n, 
885 |     dendrogram.order.weight = fcs.ce.diff.figure.dendrogram.weight.condition, 
886 |     result = file.path( fcs.ce.diff.umap.figure.dir, 
887 |         sprintf( "%s_condition.txt", fcs.ce.diff.umap.result ) ), 
888 |     cdf.figure = file.path( fcs.ce.diff.umap.figure.dir, 
889 |         sprintf( "%s_condition.png", fcs.ce.diff.umap.figure.cdf ) ), 
890 |     dendrogram.figure = file.path( fcs.ce.diff.umap.figure.dir, 
891 |         sprintf( "%s_condition.png", fcs.ce.diff.umap.figure.dendrogram ) ) )
892 | 
893 | print( ce.diff.test.umap.res )
894 | 
895 | 
896 | # calculate cross-entropy test for umap by sample
897 | 
898 | set.seed.here( fcs.seed.base, "calculate cross-entropy test for umap" )
899 | 
900 | ce.diff.test.umap.res <- ce.diff.test.umap( 
901 |     umap.result$knn$distances, umap.result$knn$indexes, 
902 |     umap.data, umap.result$config, 
903 |     dmrd.event.sample, 
904 |     partition.label = flow.sample.label, 
905 |     partition.color = flow.sample.color, 
906 |     partition.line.type = flow.sample.line.type.single, 
907 |     base.test = fcs.ce.diff.base.test, 
908 |     base.dist = fcs.ce.diff.base.dist, 
909 |     prob.sample.n = fcs.ce.diff.prob.sample.n, 
910 |     dendrogram.order.weight = flow.ce.diff.figure.dendrogram.weight.sample, 
911 |     result = file.path( fcs.ce.diff.umap.figure.dir, 
912 |         sprintf( "%s_sample.txt", fcs.ce.diff.umap.result ) ), 
913 |     cdf.figure = file.path( fcs.ce.diff.umap.figure.dir, 
914 |         sprintf( "%s_sample.png", fcs.ce.diff.umap.figure.cdf ) ), 
915 |     dendrogram.figure = file.path( fcs.ce.diff.umap.figure.dir, 
916 |         sprintf( "%s_sample.png", fcs.ce.diff.umap.figure.dendrogram ) ) )
917 | 
918 | print( ce.diff.test.umap.res )
919 | 
920 | 
921 | # calculate cross-entropy test for umap by cluster
922 | 
923 | set.seed.here( fcs.seed.base, "calculate cross-entropy test for umap" )
924 | 
925 | ce.diff.test.umap.res <- ce.diff.test.umap( 
926 |     umap.result$knn$distances, umap.result$knn$indexes, 
927 |     umap.data, umap.result$config, 
928 |     dmrd.event.cluster, 
929 |     partition.label = fcs.cluster.label, 
930 |     partition.color = fcs.cluster.color, 
931 |     partition.line.type = fcs.cluster.line.type, 
932 |     base.test = fcs.ce.diff.base.test, 
933 |     base.dist = fcs.ce.diff.base.dist, 
934 |     prob.sample.n = fcs.ce.diff.prob.sample.n, 
935 |     dendrogram.order.weight = fcs.ce.diff.figure.dendrogram.weight.cluster, 
936 |     result = file.path( fcs.ce.diff.umap.figure.dir, 
937 |         sprintf( "%s_cluster.txt", fcs.ce.diff.umap.result ) ), 
938 |     cdf.figure = file.path( fcs.ce.diff.umap.figure.dir, 
939 |         sprintf( "%s_cluster.png", fcs.ce.diff.umap.figure.cdf ) ), 
940 |     dendrogram.figure = file.path( fcs.ce.diff.umap.figure.dir, 
941 |         sprintf( "%s_cluster.png", fcs.ce.diff.umap.figure.dendrogram ) ) )
942 | 
943 | print( ce.diff.test.umap.res )
944 | 
945 | 
946 | # output session info
947 | 
948 | sessionInfo()
949 | 
950 | 


--------------------------------------------------------------------------------
/flow analysis/analyze_flow_cytometry.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | src_dir=../00_src
4 | 
5 | R CMD BATCH --quiet --vanilla $src_dir/analyze_flow_cytometry.r \
6 |     ./analyze_flow_cytometry.out
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/flow analysis/analyze_flow_cytometry_parameter.r:
--------------------------------------------------------------------------------
  1 | 
  2 | # tSNE diff paper--20210608 tSNE-diff flow--Figure 5E SpleenPercentLymph LymphPercentSpleen
  3 | 
  4 | 
  5 | # data parameters
  6 | 
  7 | # Tip: create two folders inside your analysis directory called "Data" and "Output".
  8 | # Put the fcs files in the Data folder.
  9 | # Put the parameter file in the Output folder. Double click on it to start Rstudio.
 10 | # Opening in this way will set your working directory to the Output folder.
 11 | # Type getwd(). Copy the result into the line below, swapping "Data" for "Output"
 12 | fcs.data.dir <- "D://tSNE-diff paper/Analysis/20210608 tSNE-diff flow/Data/LymphPercentSpleen/"
 13 | 
 14 | fcs.condition <- c( "LymphpercentSpleen", "Sp_", "SpleenpercentLymph" )
 15 | 
 16 | fcs.condition.label <- c( 
 17 |     "LymphpercentSpleen" = "LymphpercentSpleen",
 18 |     "Sp_" = "Spleen",
 19 |     "SpleenpercentLymph" = "SpleenpercentLymph"
 20 | )
 21 | 
 22 | # Run the get_channels script and insert the output below.
 23 | fcs.channel <- c( 
 24 |     "FJComp-APC-A", 
 25 |     "FJComp-APC-Fire 750-A",
 26 |     "FJComp-APC-Fire810-A",
 27 |     "FJComp-Alexa Fluor 532-A", 
 28 |     "FJComp-Alexa Fluor 561-A", 
 29 |     "FJComp-Alexa Fluor 700-A",  
 30 |     "FJComp-Alexa Fluor 790-A", 
 31 |     "FJComp-BB515-A", 
 32 |     "FJComp-BB660-P2-A", 
 33 |     "FJComp-BB700-A", 
 34 |     "FJComp-BB755-P-A", 
 35 |     "FJComp-BB790-P-A", 
 36 |     "FJComp-BUV395-A", 
 37 |     "FJComp-BUV496-A",
 38 |     "FJComp-BUV563-A",
 39 |     "FJComp-BUV615-A",
 40 |     "FJComp-BUV661-A",
 41 |     "FJComp-BUV737-A",
 42 |     "FJComp-BUV805-A",
 43 |     "FJComp-BV421-A", 
 44 |     "FJComp-BV480-A", 
 45 |     "FJComp-BV510-A",
 46 |     "FJComp-BV570-A",
 47 |     "FJComp-BV605-A",
 48 |     "FJComp-BV650-A",
 49 |     "FJComp-BV711-A", 
 50 |     "FJComp-BV750-A",
 51 |     "FJComp-Nova Blue 530-A",
 52 |     "FJComp-Nova Blue 585-A",
 53 |     "FJComp-Nova Blue 610-A",
 54 |     "FJComp-Nova Red 685-A",
 55 |     "FJComp-Nova Yellow 690-A",
 56 |     "FJComp-PE Fire 640-A",
 57 |     "FJComp-PE Fire 810-A",
 58 |     "FJComp-PE-A",
 59 |     "FJComp-PE-Cy5-A",
 60 |     "FJComp-PE-Cy5.5-A",
 61 |     "FJComp-PE-Cy7-A",
 62 |     "FJComp-PE-Dazzle594-A",
 63 |     "FJComp-Pacific Blue-A",
 64 |     "FJComp-Pacific Orange-A",
 65 |     "FJComp-PerCP-A",
 66 |     "FJComp-PerCP-eFluor 710-A",
 67 |     "FJComp-Qdot 705-A",
 68 |     "FJComp-SBUV445-A",
 69 |     "FJComp-SBV515-A",
 70 |     "FJComp-Super Bright 436-A",
 71 |     "FJComp-Super Bright 780-A",
 72 |     "FJComp-eFluor 660-A"
 73 | )
 74 | 
 75 | fcs.channel.label <- c( 
 76 |     "FJComp-APC-A" = "RORgT", 
 77 |     "FJComp-APC-Fire 750-A" = "CD44",
 78 |     "FJComp-APC-Fire810-A" = "Gr-1",
 79 |     "FJComp-Alexa Fluor 532-A" = "IgM", 
 80 |     "FJComp-Alexa Fluor 561-A" = "F4-80", 
 81 |     "FJComp-Alexa Fluor 700-A" = "Ki67",  
 82 |     "FJComp-Alexa Fluor 790-A" = "CD90.2", 
 83 |     "FJComp-BB515-A" = "CCR9", 
 84 |     "FJComp-BB660-P2-A" = "TCRgd", 
 85 |     "FJComp-BB700-A" = "PDCA-1", 
 86 |     "FJComp-BB755-P-A" = "CD11c", 
 87 |     "FJComp-BB790-P-A" = "Ly-6C", 
 88 |     "FJComp-BUV395-A" = "CD103", 
 89 |     "FJComp-BUV496-A" = "IgD",
 90 |     "FJComp-BUV563-A" = "NK1.1",
 91 |     "FJComp-BUV615-A" = "CTLA-4",
 92 |     "FJComp-BUV661-A" = "c-Kit",
 93 |     "FJComp-BUV737-A" = "CD62L",
 94 |     "FJComp-BUV805-A" = "GITR",
 95 |     "FJComp-BV421-A" = "CD150", 
 96 |     "FJComp-BV480-A" = "CXCR3", 
 97 |     "FJComp-BV510-A" = "Siglec F",
 98 |     "FJComp-BV570-A" = "TCRb",
 99 |     "FJComp-BV605-A" = "PD-1",
100 |     "FJComp-BV650-A" = "XCR1",
101 |     "FJComp-BV711-A" = "CD127", 
102 |     "FJComp-BV750-A" = "CCR2",
103 |     "FJComp-Nova Blue 530-A" = "CD45",
104 |     "FJComp-Nova Blue 585-A" = "CD4",
105 |     "FJComp-Nova Blue 610-A" = "CD8",
106 |     "FJComp-Nova Red 685-A" = "CD3",
107 |     "FJComp-Nova Yellow 690-A" = "CD19",
108 |     "FJComp-PE Fire 640-A" = "CD11b",
109 |     "FJComp-PE Fire 810-A" = "CD38",
110 |     "FJComp-PE-A" = "GATA-3",
111 |     "FJComp-PE-Cy5-A" = "CD86",
112 |     "FJComp-PE-Cy5.5-A" = "Foxp3",
113 |     "FJComp-PE-Cy7-A" = "CD172a",
114 |     "FJComp-PE-Dazzle594-A" = "CD64",
115 |     "FJComp-Pacific Blue-A" = "Helios",
116 |     "FJComp-Pacific Orange-A" = "CD24",
117 |     "FJComp-PerCP-A" = "MHCII",
118 |     "FJComp-PerCP-eFluor 710-A" = "NKp46",
119 |     "FJComp-Qdot 705-A" = "CD69",
120 |     "FJComp-SBUV445-A" = "B220",
121 |     "FJComp-SBV515-A" = "CD25",
122 |     "FJComp-Super Bright 436-A" = "ICOS",
123 |     "FJComp-Super Bright 780-A" = "KLRG1",
124 |     "FJComp-eFluor 660-A" = "T-bet"
125 | )
126 | 
127 | # Tip: set each to 200 to start.
128 | # Modify (increasing) until the density plots resemble the distributions you are familiar with.
129 | fcs.channel.asinh.scale <- c( 
130 |     "FJComp-APC-A" = 2000, 
131 |     "FJComp-APC-Fire 750-A" = 4000, 
132 |     "FJComp-APC-Fire810-A" = 5000, 
133 |     "FJComp-Alexa Fluor 532-A" = 2000, 
134 |     "FJComp-Alexa Fluor 561-A" = 8000, 
135 |     "FJComp-Alexa Fluor 700-A" = 3000, 
136 |     "FJComp-Alexa Fluor 790-A" = 2000, 
137 |     "FJComp-BB515-A" = 4000,  
138 |     "FJComp-BB660-P2-A" = 2000,  
139 |     "FJComp-BB700-A" = 4000, 
140 |     "FJComp-BB755-P-A" = 2000, 
141 |     "FJComp-BB790-P-A" = 4000, 
142 |     "FJComp-BUV395-A" = 1000, 
143 |     "FJComp-BUV496-A" = 2000, 
144 |     "FJComp-BUV563-A" = 3000, 
145 |     "FJComp-BUV615-A" = 1000, 
146 |     "FJComp-BUV661-A" = 2000, 
147 |     "FJComp-BUV737-A" = 2000, 
148 |     "FJComp-BUV805-A" = 2000, 
149 |     "FJComp-BV421-A" = 6000, 
150 |     "FJComp-BV480-A" = 3000, 
151 |     "FJComp-BV510-A" = 20000, 
152 |     "FJComp-BV570-A" = 3000, 
153 |     "FJComp-BV605-A" = 2000,
154 |     "FJComp-BV650-A" = 2000, 
155 |     "FJComp-BV711-A" = 1000, 
156 |     "FJComp-BV750-A" = 3000, 
157 |     "FJComp-Nova Blue 530-A" = 1000, 
158 |     "FJComp-Nova Blue 585-A" = 4000, 
159 |     "FJComp-Nova Blue 610-A" = 3000, 
160 |     "FJComp-Nova Red 685-A" = 2000, 
161 |     "FJComp-Nova Yellow 690-A" = 5000, 
162 |     "FJComp-PE Fire 640-A" = 10000, 
163 |     "FJComp-PE Fire 810-A" = 2500, 
164 |     "FJComp-PE-A" = 12000, 
165 |     "FJComp-PE-Cy5-A" = 8000, 
166 |     "FJComp-PE-Cy5.5-A" = 8000, 
167 |     "FJComp-PE-Cy7-A" = 3000, 
168 |     "FJComp-PE-Dazzle594-A" = 3000, 
169 |     "FJComp-Pacific Blue-A" = 4000, 
170 |     "FJComp-Pacific Orange-A" = 5000, 
171 |     "FJComp-PerCP-A" = 6000,
172 |     "FJComp-PerCP-eFluor 710-A" = 4000, 
173 |     "FJComp-Qdot 705-A" = 5000, 
174 |     "FJComp-SBUV445-A" = 1500,
175 |     "FJComp-SBV515-A" = 2000, 
176 |     "FJComp-Super Bright 436-A" = 8000, 
177 |     "FJComp-Super Bright 780-A" = 8000,
178 |     "FJComp-eFluor 660-A" = 3000
179 | )
180 | 
181 | fcs.condition.n <- length( fcs.condition )
182 | fcs.channel.n <- length( fcs.channel )
183 | 
184 | 
185 | # general parameters
186 | # Tip: edit the directory to match the location of the script files. 
187 | fcs.src.dir <- "D:/Carlos tSNE script/00_src"
188 | 
189 | # Tip: use today's date
190 | fcs.seed.base <- 20210608
191 | 
192 | # Tip: Set to FALSE while optimizing the tSNE and FlowSOM. 
193 | # Once you have a good run, set to TRUE and then change labels, colors, etc.
194 | fcs.use.cached.results <- FALSE
195 | 
196 | fcs.sample.number.width <- 2
197 | fcs.event.number.width <- 6
198 | 
199 | 
200 | # graphics parameters
201 | 
202 | fcs.color.pool <- c( 
203 |     brewer.pal( 8, "Set1" )[ -6 ], 
204 |     brewer.pal( 7, "Set2" )[ c( 1, 3, 6 ) ], 
205 |     adjustcolor( brewer.pal( 8, "Set1" )[ -6 ], 
206 |         red.f = 0.9, green.f = 0.8, blue.f = 0.7 ), 
207 |     adjustcolor( brewer.pal( 7, "Set2" )[ c( 1, 3, 6 ) ], 
208 |         red.f = 0.9, green.f = 0.8, blue.f = 0.7 ), 
209 |     adjustcolor( brewer.pal( 8, "Set1" )[ -6 ], 
210 |         red.f = 0.8, green.f = 0.6, blue.f = 0.5 ), 
211 |     adjustcolor( brewer.pal( 7, "Set2" )[ c( 1, 3, 6 ) ], 
212 |         red.f = 0.8, green.f = 0.6, blue.f = 0.5 ), 
213 |     adjustcolor( brewer.pal( 8, "Set1" )[ -6 ], 
214 |         red.f = 0.3, green.f = 0.3, blue.f = 0.3 ), 
215 |     adjustcolor( brewer.pal( 7, "Set2" )[ c( 1, 3, 6 ) ], 
216 |         red.f = 0.3, green.f = 0.3, blue.f = 0.3 ) )
217 | fcs.color.pool.n <- length( fcs.color.pool )
218 | 
219 | fcs.line.type.pool <- 1:6
220 | fcs.line.type.pool.n <- length( fcs.line.type.pool )
221 | 
222 | fcs.condition.color <- rep( 
223 |     fcs.color.pool, 
224 |     ceiling( fcs.condition.n / fcs.color.pool.n ) 
225 | )[ 1 : fcs.condition.n ]
226 | names( fcs.condition.color ) <- fcs.condition
227 | 
228 | fcs.condition.line.type <- rep( 
229 |     fcs.line.type.pool, 
230 |     ceiling( fcs.condition.n / fcs.line.type.pool.n ) 
231 | )[ 1 : fcs.condition.n ]
232 | names( fcs.condition.line.type ) <- fcs.condition
233 | 
234 | 
235 | # density parameters
236 | # Tip: set to 50000 while you are setting the asinh scaling.
237 | # Once done, set to NULL and re-run the script on all the data.
238 | 
239 | fcs.density.data.sample.n <- NULL
240 | 
241 | fcs.density.partition.all <- "all"
242 | fcs.density.partition.all.label <- c( "all" = "All" )
243 | fcs.density.partition.all.color <- c( "all" = "grey" )
244 | 
245 | fcs.density.font.size <- 4.5
246 | 
247 | fcs.density.line.size <- 0.2
248 | fcs.density.line.alpha <- 0.3
249 | 
250 | fcs.density.figure.width.base <- 0.4
251 | fcs.density.figure.height.base <- 0.1
252 | 
253 | fcs.density.figure.dir <- "./figure_density"
254 | 
255 | fcs.density.figure.sample <- "density_sample"
256 | fcs.density.figure.cluster <- "density_cluster"
257 | 
258 | 
259 | # cluster parameters
260 | # Tip: In general use the same number for fcs.cluster.n and fcs.flow.som.dim unless you know why to do otherwise.
261 | # Tip: use more clusters for more diverse cell collections. Use as few as possible to speed up processing.
262 | # For example, for a broad immune phenotyping panel use 40-50 clusters.
263 | # For pre-gated cell types (e.g., CD8s), use 10-12 clusters.
264 | fcs.cluster.n <- 40
265 | 
266 | # Tip: naming clusters can be done automatically as clusters 1:n via the first command below.
267 | # To rename your clusters based on marker expression, insert a # before the first command,
268 | # remove the # before the second command,
269 | # and rename the clusters appropriately.
270 | fcs.cluster <- sprintf( "%02d", 1 : fcs.cluster.n )
271 | #fcs.cluster <- c("B cells", "Naive CD4 T cells", "Activated CD4 T cells", 
272 | #                 "Naive CD8 T cells", "Activated CD8 T cells", "Macrophages",
273 | #                 "Monocytes", "Neutrophils", "NK cells", 
274 | #                 "cDCs", "pDCs", "Eosinophils")
275 | 
276 | 
277 | fcs.cluster.label <- sprintf( "Cluster-%s", fcs.cluster )
278 | names( fcs.cluster.label ) <- fcs.cluster
279 | 
280 | # Tip: this controls the grouping of the fcs files that are generated based on the flowSOM clustering.
281 | # If some of the clusters are related (e.g., CD14+ and CD16+ monocytes), 
282 | # you might wish to group these into a single output folder.
283 | # If so, you may use a command such as "a" = c(1, 5, 7),
284 | # where the numbers are the numbered flowSOM clusters.
285 | fcs.cluster.group <- as.list(1:fcs.cluster.n)
286 | names(fcs.cluster.group) <- make.unique(rep(letters, length.out = fcs.cluster.n), sep='')
287 | 
288 | fcs.cluster.color <- rep( 
289 |     fcs.color.pool, 
290 |     ceiling( fcs.cluster.n / fcs.color.pool.n ) 
291 | )[ 1 : fcs.cluster.n ]
292 | names( fcs.cluster.color ) <- fcs.cluster
293 | 
294 | fcs.cluster.line.type <- rep( 
295 |     fcs.line.type.pool, 
296 |     ceiling( fcs.cluster.n / fcs.line.type.pool.n ) 
297 | )[ 1 : fcs.cluster.n ]
298 | names( fcs.cluster.line.type ) <- fcs.cluster
299 | 
300 | fcs.flow.som.dim <- 40
301 | 
302 | fcs.cluster.table.dir <- "./table_cluster"
303 | fcs.cluster.data.dir <- "./data_cluster"
304 | 
305 | fcs.cluster.table.counts <- "cluster_counts"
306 | 
307 | fcs.cluster.data <- "cluster_group"
308 | 
309 | 
310 | # heatmap parameters
311 | 
312 | fcs.heatmap.palette.n <- 100
313 | fcs.heatmap.palette <- colorRampPalette( brewer.pal( 9, "YlOrRd" ) )( 
314 |     fcs.heatmap.palette.n )
315 | 
316 | fcs.heatmap.font.size <- 2.5
317 | 
318 | fcs.heatmap.label.factor.row <- 1.4
319 | fcs.heatmap.label.factor.col <- 1.4
320 | 
321 | fcs.heatmap.width <- 2500
322 | fcs.heatmap.height <- 2500
323 | 
324 | fcs.heatmap.figure.dir <- "./figure_heatmap"
325 | 
326 | fcs.heatmap.figure <- "heatmap"
327 | 
328 | 
329 | # histogram parameters
330 | 
331 | fcs.histogram.font.size <- 7
332 | fcs.histogram.error.bar.size <- 0.2
333 | fcs.histogram.legend.key.size <- 0.7
334 | 
335 | fcs.histogram.label.factor.height <- 0.05
336 | 
337 | fcs.histogram.width <- 4.5
338 | fcs.histogram.height <- 3
339 | 
340 | fcs.histogram.figure.dir <- "./figure_histogram"
341 | 
342 | fcs.histogram.figure <- "histogram"
343 | 
344 | 
345 | # dimensionality reduction parameters
346 | # Tip: Use the third option in most cases.
347 | # More cells will take longer. 
348 | # You might downsample to 50-100k cells at first, then run again with more cells if you like the result.
349 | fcs.dmrd.data.sample.n <- NULL
350 | fcs.dmrd.data.sample.n.per.condition <- 8297
351 | fcs.dmrd.data.sample.n.per.sample <- NULL
352 | 
353 | fcs.dmrd.gradient.color <- c( "black", "blue", "green", "yellow", "red" )
354 | fcs.dmrd.gradient.palette.n <- 100
355 | fcs.dmrd.density.palette <- colorRampPalette( fcs.dmrd.gradient.color )( 
356 |     fcs.dmrd.gradient.palette.n )
357 | 
358 | fcs.dmrd.color.alpha <- 0.3
359 | 
360 | fcs.dmrd.group.title.size <- 8
361 | 
362 | fcs.dmrd.legend.title.size <- 7
363 | fcs.dmrd.legend.label.size <- 7
364 | fcs.dmrd.legend.point.size <- 3
365 | 
366 | fcs.dmrd.label.factor.width <- 0.1
367 | 
368 | # Tip: set the number of rows in the output figures here.
369 | fcs.dmrd.figure.nrow <- 2
370 | fcs.dmrd.figure.ncol <- ceiling( fcs.condition.n / fcs.dmrd.figure.nrow )
371 | 
372 | fcs.dmrd.figure.width <- 3
373 | fcs.dmrd.figure.height <- 3
374 | 
375 | 
376 | # tsne parameters
377 | # Tip: more iterations take longer.
378 | # More iterations are needed for more cells
379 | # For a first look, try 1000.
380 | # For a final figure on ~100k cells, use 5000.
381 | # Tip 2: Set fcs.tsne.thread.n to 0 for max processing speed.
382 | # To enable you to do something else meanwhile, 
383 | # set it to one or two less than the number of threads on your processor (check specs online).
384 | fcs.tsne.iter.n <- 5000
385 | fcs.tsne.thread.n <- 0
386 | 
387 | # Tip: don't change this unless you know why
388 | fcs.tsne.perplexity <- 30
389 | fcs.tsne.theta <- 0.5
390 | fcs.tsne.exaggeration.factor <- 12
391 | 
392 | fcs.tsne.figure.lims.factor <- 1.0
393 | fcs.tsne.figure.point.size <- 1.2
394 | 
395 | fcs.tsne.figure.convergence.width <- 1200
396 | fcs.tsne.figure.convergence.height <- 800
397 | 
398 | fcs.tsne.figure.dir <- "./figure_tsne"
399 | 
400 | fcs.tsne.figure.convergence <- "tsne_convergence"
401 | fcs.tsne.figure.plot <- "tsne_plot"
402 | 
403 | fcs.tsne.cache.file.path <- "./tsne_cache.dat"
404 | 
405 | 
406 | # umap parameters
407 | # Tip: you don't generally need to change the UMAP iterations
408 | fcs.umap.iter.n <- 1000
409 | 
410 | fcs.umap.figure.lims.factor <- 0.8
411 | fcs.umap.figure.point.size <- 1.2
412 | 
413 | fcs.umap.figure.dir <- "./figure_umap"
414 | 
415 | fcs.umap.figure.plot <- "umap_plot"
416 | 
417 | fcs.umap.cache.file.path <- "./umap_cache.dat"
418 | 
419 | 
420 | # cross-entropy test parameters
421 | # Tips: Set this depending on your RAM and number of groups.
422 | # You won't be able to analyze more than about 100k cells unless you have >32GB RAM.
423 | # The crossentropy test works best with at least 10k cells per group.
424 | # Multiple hypothesis testing will greatly reduce your ability to distinguish statistical differences.
425 | fcs.ce.diff.prob.sample.n <- NULL
426 | 
427 | # Tip: set to "ks" unless you have a good statistical reason for using rank testing.
428 | # In that case, use "rank" and "median".
429 | fcs.ce.diff.base.test <- "ks"
430 | fcs.ce.diff.base.dist <- "ks"
431 | 
432 | fcs.ce.diff.test.alpha <- 0.05
433 | 
434 | fcs.ce.diff.figure.font.size <- 2
435 | fcs.ce.diff.figure.line.width <- 3
436 | 
437 | fcs.ce.diff.figure.cdf.resolution <- 500
438 | fcs.ce.diff.figure.cdf.all.color <- "black"
439 | fcs.ce.diff.figure.cdf.all.label <- "All"
440 | 
441 | fcs.ce.diff.figure.dendrogram.weight.condition <- 1 : fcs.condition.n
442 | names( fcs.ce.diff.figure.dendrogram.weight.condition ) <- fcs.condition
443 | 
444 | fcs.ce.diff.figure.dendrogram.weight.cluster <- 1 : fcs.cluster.n
445 | names( fcs.ce.diff.figure.dendrogram.weight.cluster ) <- fcs.cluster
446 | 
447 | fcs.ce.diff.figure.cdf.width <- 1200
448 | fcs.ce.diff.figure.cdf.height <- 800
449 | 
450 | fcs.ce.diff.figure.dendrogram.width <- 2000
451 | fcs.ce.diff.figure.dendrogram.height <- 800
452 | 
453 | 
454 | # cross-entropy test parameters for tsne
455 | 
456 | fcs.ce.diff.tsne.perplexity.factor <- 3
457 | 
458 | fcs.ce.diff.tsne.figure.dir <- "./figure_tsne_ce_diff"
459 | 
460 | fcs.ce.diff.tsne.figure.cdf <- "tsne_ce_diff_cdf"
461 | fcs.ce.diff.tsne.figure.dendrogram <- "tsne_ce_diff_dendrogram"
462 | fcs.ce.diff.tsne.result <- "tsne_ce_diff_result"
463 | 
464 | fcs.ce.diff.tsne.cache.file.path <- "./tsne_ce_diff_cache.dat"
465 | 
466 | 
467 | # cross-entropy test parameters for umap
468 | 
469 | fcs.ce.diff.umap.figure.dir <- "./figure_umap_ce_diff"
470 | 
471 | fcs.ce.diff.umap.figure.cdf <- "umap_ce_diff_cdf"
472 | fcs.ce.diff.umap.figure.dendrogram <- "umap_ce_diff_dendrogram"
473 | fcs.ce.diff.umap.result <- "umap_ce_diff_result"
474 | 
475 | fcs.ce.diff.umap.cache.file.path <- "./umap_ce_diff_cache.dat"
476 | 
477 | 


--------------------------------------------------------------------------------
/flow analysis/ce_diff_test.r:
--------------------------------------------------------------------------------
  1 | 
  2 | # calculates test on differences of cross-entropy
  3 | 
  4 | ce.diff.test <- function( 
  5 |     cross.entropy, 
  6 |     event.partition, 
  7 |     partition.label, partition.color, partition.line.type, 
  8 |     base.test, base.dist, 
  9 |     dendrogram.order.weight, 
 10 |     result, cdf.figure, dendrogram.figure 
 11 | )
 12 | {
 13 |     partition <- levels( event.partition )
 14 |     partition.n <- length( partition )
 15 |     
 16 |     cross.entropy.split <- split( cross.entropy, event.partition )
 17 |     
 18 |     if ( base.test == "ks" )
 19 |     {
 20 |         if ( partition.n == 2 )
 21 |         {
 22 |             ks.test.res <- ks.test( cross.entropy.split[[ 1 ]], 
 23 |                 cross.entropy.split[[ 2 ]] )
 24 |             
 25 |             test.res <- list( ks.single = ks.test.res )
 26 |         }
 27 |         else if ( partition.n > 2 )
 28 |         {
 29 |             comparison.n <- partition.n * ( partition.n - 1 ) / 2
 30 |             
 31 |             ks.pair <- vector( "list", comparison.n )
 32 |             comparison <- character( comparison.n )
 33 |             D.stat <- numeric( comparison.n )
 34 |             p.value <- numeric( comparison.n )
 35 | 
 36 |             k <- 1
 37 |             
 38 |             for ( i in 1 : ( partition.n - 1 ) )
 39 |                 for ( j in (i+1) : partition.n )
 40 |                 {
 41 |                     ks.pair[[ k ]] <- ks.test( cross.entropy.split[[ i ]], 
 42 |                         cross.entropy.split[[ j ]] )
 43 |                     
 44 |                     comparison[ k ] <- sprintf( "%s - %s", 
 45 |                         partition.label[ partition[ i ] ], 
 46 |                         partition.label[ partition[ j ] ] )
 47 |                     
 48 |                     D.stat[ k ] <- ks.pair[[ k ]]$statistic
 49 |                     
 50 |                     p.value[ k ] <- ks.pair[[ k ]]$p.value
 51 |                     
 52 |                     k <- k + 1
 53 |                 }
 54 |             
 55 |             p.value.adj <- p.adjust( p.value, "holm" )
 56 |             
 57 |             test.res <- list( ks.multiple = list( ks.pair = ks.pair, 
 58 |                 comparison = comparison, D.stat = D.stat, 
 59 |                 p.value = p.value, p.value.adj = p.value.adj ) )
 60 |         }
 61 |         else
 62 |             stop( "no partitions for testing cross-entropy differences" )
 63 |     }
 64 |     else if ( base.test == "rank" )
 65 |     {
 66 |         if ( partition.n == 2 )
 67 |         {
 68 |             wilcox.test.res <- wilcox.test( cross.entropy.split[[ 1 ]], 
 69 |                 cross.entropy.split[[ 2 ]] )
 70 |             
 71 |             test.res <- list( rank.single = wilcox.test.res )
 72 |         }
 73 |         else if ( partition.n > 2 )
 74 |         {
 75 |             kruskal.test.res <- kruskal.test( cross.entropy, 
 76 |                 event.partition )
 77 |             
 78 |             dunn.test.res <- dunn.test( cross.entropy, event.partition, 
 79 |                 method = "holm", alpha = fcs.ce.diff.test.alpha, altp = TRUE, 
 80 |                 kw = FALSE, table = FALSE, list = TRUE )
 81 |             
 82 |             test.res <- list( rank.multiple = list( 
 83 |                 kruskal = kruskal.test.res, dunn = dunn.test.res ) )
 84 |         }
 85 |         else
 86 |             stop( "no partitions for testing cross-entropy differences" )
 87 |     }
 88 |     else
 89 |         stop( "wrong base test for cross-entropy differences" )
 90 |     
 91 |     if ( ! is.null( result ) )
 92 |     {
 93 |         result.file <- file( result, "w" )
 94 |         sink( result.file )
 95 |         
 96 |         tr.name <- names( test.res )
 97 |         stopifnot( length( tr.name ) == 1 )
 98 |         
 99 |         if ( tr.name == "ks.single" )
100 |         {
101 |             cat( "\n**  Kolmogorov-Smirnov test\n")
102 |             
103 |             print( test.res$ks.single )
104 |         }
105 |         else if ( tr.name == "ks.multiple" )
106 |         {
107 |             cat( "\n**  Multiple Kolmogorov-Smirnov tests with Holm correction\n\n")
108 |             
109 |             comparison.width <- max( nchar( test.res$ks.multiple$comparison ) )
110 |             
111 |             for ( i in 1 : length( test.res$ks.multiple$comparison ) )
112 |                 cat( sprintf( "%-*s\t\tD = %g\t\tpv = %g\t\tadj-pv = %g\n", 
113 |                     comparison.width, 
114 |                     test.res$ks.multiple$comparison[ i ], 
115 |                     test.res$ks.multiple$D.stat[ i ], 
116 |                     test.res$ks.multiple$p.value[ i ], 
117 |                     test.res$ks.multiple$p.value.adj[ i ] ) )
118 |         }
119 |         else if ( tr.name == "rank.single" )
120 |         {
121 |             cat( "\n**  Wilcoxon rank sum test\n")
122 |             
123 |             print( test.res$rank.single )
124 |         }
125 |         else if ( tr.name == "rank.multiple" )
126 |         {
127 |             cat( "\n**  Kruskal-Wallis rank sum test\n")
128 |             
129 |             print( test.res$rank.multiple$kruskal )
130 |             
131 |             cat( "\n**  Dunn post-hoc test with Holm correction\n\n")
132 |             
133 |             comparison.width <- max( nchar( 
134 |                 test.res$rank.multiple$dunn$comparison ) )
135 |             
136 |             for ( i in 1 : length( test.res$rank.multiple$dunn$comparisons ) )
137 |                 cat( sprintf( "%-*s\t\tZ = %g\t\tpv = %g\t\tadj-pv = %g\n", 
138 |                     comparison.width, 
139 |                     test.res$rank.multiple$dunn$comparisons[ i ], 
140 |                     test.res$rank.multiple$dunn$Z[ i ], 
141 |                     test.res$rank.multiple$dunn$altP[ i ], 
142 |                     test.res$rank.multiple$dunn$altP.adjusted[ i ] ) )
143 |         }
144 |         else
145 |         {
146 |             sink()
147 |             close( result.file )
148 |             stop( "unknown test in ce-diff result" )
149 |         }
150 |         
151 |         sink()
152 |         close( result.file )
153 |     }
154 |     
155 |     if ( ! is.null( cdf.figure ) )
156 |     {
157 |         if ( is.null( partition.label ) )
158 |             partition.label = partition
159 |         
160 |         if ( is.null( partition.color ) )
161 |             partition.color <- rainbow( partition.n )
162 |         
163 |         if ( is.null( partition.line.type ) )
164 |             partition.line.type <- rep( 1, partition.n )
165 |         
166 |         png( filename = cdf.figure, width = fcs.ce.diff.figure.cdf.width, 
167 |             height = fcs.ce.diff.figure.cdf.height )
168 |         
169 |         par( mar = c( 5.5, 6, 2, 1.5 ) )
170 |         
171 |         plot( ecdf( cross.entropy ), ylim = c( 0, 1 ), 
172 |             xlab = "Cross-entropy", ylab = "CDF", main = "", 
173 |             cex.lab = 3, cex.axis = 2.5, 
174 |             col = fcs.ce.diff.figure.cdf.all.color, 
175 |             lwd = fcs.ce.diff.figure.line.width - 1, do.points = FALSE )
176 |         
177 |         for ( pall in partition )
178 |         {
179 |             ces <- cross.entropy.split[[ pall ]]
180 |             ces.n <- length( ces )
181 |             
182 |             if ( ces.n < fcs.ce.diff.figure.cdf.resolution ) {
183 |                 plot( ecdf( ces ), col = partition.color[ pall ], 
184 |                     lty = partition.line.type[ pall ], 
185 |                     lwd = fcs.ce.diff.figure.line.width, 
186 |                     do.points = FALSE, add = TRUE )
187 |             }
188 |             else {
189 |                 ecdf.x <- sort( ces )
190 |                 ecdf.y <- 1 : ces.n / ces.n
191 |                 
192 |                 lines( ecdf.x, ecdf.y, col = partition.color[ pall ], 
193 |                     lty = partition.line.type[ pall ], 
194 |                     lwd = fcs.ce.diff.figure.line.width )
195 |             }
196 |         }
197 |         
198 |         legend( "bottomright", 
199 |             legend = c( fcs.ce.diff.figure.cdf.all.label, partition.label ), 
200 |             col = c( fcs.ce.diff.figure.cdf.all.color, partition.color ), 
201 |             lty = partition.line.type, 
202 |             lwd = fcs.ce.diff.figure.line.width, 
203 |             cex = fcs.ce.diff.figure.font.size )
204 |         
205 |         dev.off()
206 |     }
207 |     
208 |     if ( ! is.null( dendrogram.figure ) && partition.n > 2 )
209 |     {
210 |         cross.entropy.dist <- matrix( 0, nrow = partition.n, 
211 |             ncol = partition.n )
212 |         
213 |         for ( i in 1 : ( partition.n - 1 ) )
214 |             for ( j in (i+1) : partition.n )
215 |             {
216 |                 if ( base.dist == "ks" )
217 |                     cross.entropy.dist[ i, j ] <- ks.test( 
218 |                         cross.entropy.split[[ i ]], 
219 |                         cross.entropy.split[[ j ]] 
220 |                     )$statistic
221 |                 else if ( base.dist == "median" )
222 |                     cross.entropy.dist[ i, j ] <- abs( 
223 |                         median( cross.entropy.split[[ i ]] ) - 
224 |                         median( cross.entropy.split[[ j ]] )
225 |                     )
226 |                 else
227 |                     stop( "wrong base dist for cross-entropy differences" )
228 |                 
229 |                 cross.entropy.dist[ j, i ] <- cross.entropy.dist[ i, j ]
230 |             }
231 |         
232 |         cross.entropy.hclust <- hclust( as.dist( cross.entropy.dist ) )
233 |         
234 |         if ( ! is.null( dendrogram.order.weight ) )
235 |             cross.entropy.hclust <- as.hclust( reorder( 
236 |                 as.dendrogram( cross.entropy.hclust ), 
237 |                 dendrogram.order.weight, 
238 |                 agglo.FUN = mean
239 |             ) )
240 |         
241 |         if ( is.null( partition.label ) )
242 |             partition.label = partition
243 |         
244 |         png( filename = dendrogram.figure, 
245 |             width = fcs.ce.diff.figure.dendrogram.width, 
246 |             height = fcs.ce.diff.figure.dendrogram.height )
247 |         
248 |         par( mar = c( 5, 5.6, 4, 1.4 ) )
249 |         
250 |         plot( cross.entropy.hclust, 
251 |             labels = partition.label, hang = -1, 
252 |             xlab = "", ylab = "", main = "", sub = "", cex.axis = 3, 
253 |             cex = fcs.ce.diff.figure.font.size )
254 |         
255 |         dev.off()
256 |     }
257 |     
258 |     test.res
259 | }
260 | 
261 | 


--------------------------------------------------------------------------------
/flow analysis/ce_diff_test_tsne.r:
--------------------------------------------------------------------------------
  1 | 
  2 | # calculates cross-entropy for tsne plots and calls ce.diff.test
  3 | 
  4 | 
  5 | ce.diff.test.tsne <- function( 
  6 |     orig.data, tsne.data, 
  7 |     event.partition, 
  8 |     partition.label = NULL, partition.color = NULL, partition.line.type = NULL, 
  9 |     base.test = "ks", base.dist = "ks", 
 10 |     prob.sample.n = NULL, dendrogram.order.weight = NULL, 
 11 |     result = NULL, cdf.figure = NULL, dendrogram.figure = NULL 
 12 | )
 13 | {
 14 |     stopifnot( nrow( orig.data ) == nrow( tsne.data ) && 
 15 |         nrow( orig.data ) == length( event.partition ) )
 16 |     
 17 |     data.n <- nrow( orig.data )
 18 |     
 19 |     if ( ! is.null( prob.sample.n ) && prob.sample.n < data.n )
 20 |         prob.sample.idx <- sample( data.n, prob.sample.n )
 21 |     else
 22 |         prob.sample.idx <- 1 : data.n
 23 |     
 24 |     if ( fcs.use.cached.results && 
 25 |         file.exists( fcs.ce.diff.tsne.cache.file.path ) )
 26 |     {
 27 |         cat( "Using cached results for probability\n" )
 28 |         
 29 |         load( fcs.ce.diff.tsne.cache.file.path )
 30 |     }
 31 |     else
 32 |     {
 33 |         cat( "Calculating probability\n" )
 34 |         
 35 |         # sampling here temporary, until optimizing dist( tsne.dat ) below
 36 |         orig.tsne.prob <- calculate.probability.tsne( 
 37 |             orig.data[ prob.sample.idx, ], 
 38 |             tsne.data[ prob.sample.idx, ] 
 39 |         )
 40 |         
 41 |         save( orig.tsne.prob, file = fcs.ce.diff.tsne.cache.file.path )
 42 |     }
 43 |     
 44 |     cross.entropy.all <- calculate.cross.entropy( orig.tsne.prob$orig, 
 45 |         orig.tsne.prob$tsne )
 46 |     
 47 |     event.partition.all <- event.partition[ prob.sample.idx ]
 48 |     
 49 |     ce.diff.test( 
 50 |         cross.entropy.all, 
 51 |         event.partition.all, 
 52 |         partition.label, partition.color, partition.line.type, 
 53 |         base.test, base.dist, 
 54 |         dendrogram.order.weight, 
 55 |         result, cdf.figure, dendrogram.figure
 56 |     )
 57 | }
 58 | 
 59 | 
 60 | calculate.probability.tsne <- function( orig.dat, tsne.dat )
 61 | {
 62 |     orig.dat.n <- nrow( orig.dat )
 63 |     tsne.dat.n <- nrow( tsne.dat )
 64 |     
 65 |     stopifnot( orig.dat.n == tsne.dat.n )
 66 |     
 67 |     # find nearest neighbors in original space and their distances
 68 |     
 69 |     orig.dat.nn2 <- nn2( normalize_input( orig.dat ), 
 70 |         k = fcs.ce.diff.tsne.perplexity.factor * fcs.tsne.perplexity + 1 )
 71 |     
 72 |     orig.dat.self.idx <- sapply( 1 : orig.dat.n, function( ri ) {
 73 |         ri.idx <- which( orig.dat.nn2$nn.idx[ ri, ] == ri )
 74 |         ifelse( length( ri.idx ) == 1, ri.idx, NA )
 75 |     } )
 76 |     
 77 |     stopifnot( ! is.na( orig.dat.self.idx ) )
 78 |     
 79 |     orig.neigh <- t( sapply( 1 : orig.dat.n, function( ri ) 
 80 |         orig.dat.nn2$nn.idx[ ri, - orig.dat.self.idx[ ri ] ] ) )
 81 |     
 82 |     orig.dist2 <- t( sapply( 1 : orig.dat.n, function( ri ) 
 83 |         orig.dat.nn2$nn.dists[ ri, - orig.dat.self.idx[ ri ] ]^2 ) )
 84 |     
 85 |     # calculate probabilities associated to distances in original space
 86 |     
 87 |     orig.stdev <- apply( orig.dist2, 1, function( dd2 ) {
 88 |         tsne.perplexity.error <- function( ss, dd2 ) {
 89 |             p <- exp( - dd2 / (2*ss^2) )
 90 |             if ( sum( p ) < .Machine$double.eps )
 91 |                 p <- 1
 92 |             p <- p / sum( p )
 93 |             p <- p[ p > 0 ]
 94 |             2^( - sum( p * log2( p ) ) ) - fcs.tsne.perplexity
 95 |         }
 96 |         
 97 |         dd2.min.idx <- 1
 98 |         dd2.ascen <- sort( dd2 )
 99 |         while( dd2.ascen[ dd2.min.idx ] == 0 )
100 |             dd2.min.idx <- dd2.min.idx + 1
101 |         ss.lower <- dd2.ascen[ dd2.min.idx ]
102 |         
103 |         dd2.max.idx <- 1
104 |         dd2.descen <- sort( dd2, decreasing = TRUE )
105 |         while( is.infinite( dd2.descen[ dd2.max.idx ] ) )
106 |             dd2.max.idx <- dd2.max.idx + 1
107 |         ss.upper <- dd2.descen[ dd2.max.idx ]
108 |         
109 |         while( tsne.perplexity.error( ss.upper, dd2 ) < 0 )
110 |         {
111 |             ss.lower <- ss.upper
112 |             ss.upper <- 2 * ss.upper
113 |         }
114 |         
115 |         while( tsne.perplexity.error( ss.lower, dd2 ) > 0 )
116 |         {
117 |             ss.upper <- ss.lower
118 |             ss.lower <- ss.lower / 2
119 |         }
120 |         
121 |         uniroot( tsne.perplexity.error, dd2, 
122 |             interval = c( ss.lower, ss.upper ), 
123 |             tol = ( ss.upper - ss.lower ) * .Machine$double.eps^0.25 )$root
124 |     } )
125 |     
126 |     orig.prob <- t( sapply( 1 : orig.dat.n, function( i ) {
127 |         p <- exp( - orig.dist2[ i, ] /  ( 2 * orig.stdev[ i ]^2 ) )
128 |         p / sum( p )
129 |     } ) )
130 |     
131 |     # symmetrize probabilities in original space
132 |     
133 |     for ( i in 1 : orig.dat.n )
134 |         for ( j2 in 1 : length( orig.neigh[ i, ] ) )
135 |         {
136 |             j <- orig.neigh[ i, j2 ]
137 |             
138 |             i2 <- match( i, orig.neigh[ j, ] )
139 |             
140 |             if ( ! is.na( i2 ) )
141 |             {
142 |                 if ( j > i )
143 |                 {
144 |                     sym.prob <- ( orig.prob[ i, j2 ] + orig.prob[ j, i2 ] ) / 2
145 |                     orig.prob[ i, j2 ] <- sym.prob
146 |                     orig.prob[ j, i2 ] <- sym.prob
147 |                 }
148 |             }
149 |             else
150 |                 orig.prob[ i, j2 ] <- orig.prob[ i, j2 ] / 2
151 |         }
152 |     
153 |     orig.prob <- sweep( orig.prob, 1, rowSums( orig.prob ), "/" )
154 |     
155 |     # get distances in tsne space for closest neighbors in original space
156 |     
157 |     tsne.dist2 <- t( sapply( 1 : tsne.dat.n, function( i )
158 |         sapply( orig.neigh[ i, ], function( j )
159 |             sum( ( tsne.dat[ i, ] - tsne.dat[ j, ] )^2 )
160 |         )
161 |     ) )
162 |     
163 |     # calculate probabilities associated to distances in tsne representation
164 |     
165 |     tsne.prob.factor <- tsne.dat.n / 
166 |         ( 2 * sum( 1 / ( 1 + dist( tsne.dat )^2 ) ) )
167 |     
168 |     tsne.prob <- t( apply( tsne.dist2, 1, function( dd2 ) 
169 |         p <- tsne.prob.factor / ( 1 + dd2 )
170 |     ) )
171 |     
172 |     list( orig = orig.prob, tsne = tsne.prob )
173 | }
174 | 
175 | 
176 | calculate.cross.entropy <- function( prim.prob, secd.prob )
177 | {
178 |     prim.prob.n <- nrow( prim.prob )
179 |     secd.prob.n <- nrow( secd.prob )
180 |     
181 |     prim.prob.m <- ncol( prim.prob )
182 |     secd.prob.m <- ncol( secd.prob )
183 |     
184 |     stopifnot( prim.prob.n == secd.prob.n && prim.prob.m == secd.prob.m )
185 |     
186 |     sapply( 1 : prim.prob.n, function( i ) 
187 |         - sum( prim.prob[ i, ] * log( secd.prob[ i, ] ) )
188 |     )
189 | }
190 | 
191 | 


--------------------------------------------------------------------------------
/flow analysis/ce_diff_test_umap.r:
--------------------------------------------------------------------------------
  1 | 
  2 | # calculates cross-entropy for umap plots and calls ce.diff.test
  3 | 
  4 | 
  5 | ce.diff.test.umap <- function( 
  6 |     orig.dist, orig.knn, umap.data, umap.param, 
  7 |     event.partition, 
  8 |     partition.label = NULL, partition.color = NULL, partition.line.type = NULL, 
  9 |     base.test = "ks", base.dist = "ks", 
 10 |     prob.sample.n = NULL, dendrogram.order.weight = NULL, 
 11 |     result = NULL, cdf.figure = NULL, dendrogram.figure = NULL 
 12 | )
 13 | {
 14 |     stopifnot( nrow( orig.dist ) == nrow( umap.data ) && 
 15 |         nrow( orig.dist ) == length( event.partition ) )
 16 |     
 17 |     data.n <- nrow( orig.dist )
 18 |     
 19 |     if ( ! is.null( prob.sample.n ) && prob.sample.n < data.n )
 20 |         prob.sample.idx <- sample( data.n, prob.sample.n )
 21 |     else
 22 |         prob.sample.idx <- 1 : data.n
 23 |     
 24 |     if ( fcs.use.cached.results && 
 25 |             file.exists( fcs.ce.diff.umap.cache.file.path ) )
 26 |     {
 27 |         cat( "Using cached results for probability\n" )
 28 |         
 29 |         load( fcs.ce.diff.umap.cache.file.path )
 30 |     }
 31 |     else
 32 |     {
 33 |         cat( "Calculating probability\n" )
 34 |         
 35 |         orig.umap.prob <- calculate.probability.umap( orig.dist, orig.knn, 
 36 |             umap.data, umap.param )
 37 |         
 38 |         save( orig.umap.prob, file = fcs.ce.diff.umap.cache.file.path )
 39 |     }
 40 |     
 41 |     cross.entropy.all <- calculate.fuzzy.cross.entropy( 
 42 |         orig.umap.prob$orig[ prob.sample.idx, ], 
 43 |         orig.umap.prob$umap[ prob.sample.idx, ] 
 44 |     )
 45 |     
 46 |     event.partition.all <- event.partition[ prob.sample.idx ]
 47 |     
 48 |     ce.diff.test( 
 49 |         cross.entropy.all, 
 50 |         event.partition.all, 
 51 |         partition.label, partition.color, partition.line.type, 
 52 |         base.test, base.dist, 
 53 |         dendrogram.order.weight, 
 54 |         result, cdf.figure, dendrogram.figure
 55 |     )
 56 | }
 57 | 
 58 | 
 59 | calculate.probability.umap <- function( orig.dis, orig.kn, umap.dat, 
 60 |     umap.param )
 61 | {
 62 |     orig.dat.n <- nrow( orig.dis )
 63 |     umap.dat.n <- nrow( umap.dat )
 64 |     
 65 |     stopifnot( orig.dat.n == umap.dat.n )
 66 |     
 67 |     # get nearest neighbors in original space and their distances
 68 |     
 69 |     orig.self.idx <- sapply( 1 : orig.dat.n, function( ri ) {
 70 |         ri.idx <- which( orig.kn[ ri, ] == ri )
 71 |         ifelse( length( ri.idx ) == 1, ri.idx, NA )
 72 |     } )
 73 |     
 74 |     stopifnot( ! is.na( orig.self.idx ) )
 75 |     
 76 |     orig.neigh <- t( sapply( 1 : orig.dat.n, function( ri ) 
 77 |         orig.kn[ ri, - orig.self.idx[ ri ] ] ) )
 78 |     
 79 |     orig.dis.reduc <- t( sapply( 1 : orig.dat.n, function( ri ) 
 80 |         orig.dis[ ri, - orig.self.idx[ ri ] ] ) )
 81 |     
 82 |     # calculate probabilities associated to distances in original space
 83 |     
 84 |     umap.sigma <- apply( orig.dis.reduc, 1, function( dd ) {
 85 |         umap.sigma.error <- function( ss, dd ) {
 86 |             p <- exp( - pmax( 0, dd - min( dd ) ) / ss )
 87 |             sum( p ) - log2( length( p ) )
 88 |         }
 89 |         
 90 |         dd.ascen <- sort( dd )
 91 |         dd.ascen <- dd.ascen[ dd.ascen > 0 ]
 92 |         ss.lower <- dd.ascen[ 1 ]
 93 |         
 94 |         dd.descen <- sort( dd, decreasing = TRUE )
 95 |         dd.descen <- dd.descen[ ! is.infinite( dd.descen ) ]
 96 |         ss.upper <- dd.descen[ 1 ]
 97 |         
 98 |         while( umap.sigma.error( ss.upper, dd ) < 0 )
 99 |         {
100 |             ss.lower <- ss.upper
101 |             ss.upper <- 2 * ss.upper
102 |         }
103 |         
104 |         while( umap.sigma.error( ss.lower, dd ) > 0 )
105 |         {
106 |             ss.upper <- ss.lower
107 |             ss.lower <- ss.lower / 2
108 |         }
109 |         
110 |         uniroot( umap.sigma.error, dd, 
111 |             interval = c( ss.lower, ss.upper ), 
112 |             tol = ( ss.upper - ss.lower ) * .Machine$double.eps^0.25 )$root
113 |     } )
114 |     
115 |     orig.prob <- t( sapply( 1 : orig.dat.n, function( i ) 
116 |         exp( - pmax( 0, orig.dis.reduc[ i, ] - min( orig.dis.reduc[ i, ] ) ) / 
117 |             umap.sigma[ i ] )
118 |     ) )
119 |     
120 |     # symmetrize probabilities in original space
121 |     
122 |     for ( i in 1 : orig.dat.n )
123 |         for ( j2 in 1 : length( orig.neigh[ i, ] ) )
124 |         {
125 |             j <- orig.neigh[ i, j2 ]
126 |             
127 |             i2 <- match( i, orig.neigh[ j, ] )
128 |             
129 |             if ( ! is.na( i2 ) )
130 |             {
131 |                 if ( j > i )
132 |                 {
133 |                     sym.prob <- orig.prob[ i, j2 ] + orig.prob[ j, i2 ] - 
134 |                         orig.prob[ i, j2 ] * orig.prob[ j, i2 ]
135 |                     orig.prob[ i, j2 ] <- sym.prob
136 |                     orig.prob[ j, i2 ] <- sym.prob
137 |                 }
138 |             }
139 |         }
140 |     
141 |     # get distances in umap space for closest neighbors in original space
142 |     
143 |     umap.dist2 <- t( sapply( 1 : umap.dat.n, function( i )
144 |         sapply( orig.neigh[ i, ], function( j )
145 |             sum( ( umap.dat[ i, ] - umap.dat[ j, ] )^2 )
146 |         )
147 |     ) )
148 |     
149 |     # calculate probabilities associated to distances in umap representation
150 |     
151 |     umap.a <- umap.param$a
152 |     umap.b <- umap.param$b
153 |     
154 |     umap.prob <- t( apply( umap.dist2, 1, function( dd2 ) 
155 |         p <- 1 / ( 1 + umap.a * dd2 ^ umap.b )
156 |     ) )
157 |     
158 |     list( orig = orig.prob, umap = umap.prob )
159 | }
160 | 
161 | 
162 | calculate.fuzzy.cross.entropy <- function( prim.prob, secd.prob )
163 | {
164 |     prim.prob.n <- nrow( prim.prob )
165 |     secd.prob.n <- nrow( secd.prob )
166 |     
167 |     prim.prob.m <- ncol( prim.prob )
168 |     secd.prob.m <- ncol( secd.prob )
169 |     
170 |     stopifnot( prim.prob.n == secd.prob.n && prim.prob.m == secd.prob.m )
171 |     
172 |     sapply( 1 : prim.prob.n, function( i ) 
173 |         - sum( prim.prob[ i, ] * log( secd.prob[ i, ] ) + 
174 |             ( 1 - prim.prob[ i, ] ) * log( 1 - secd.prob[ i, ] ) )
175 |     )
176 | }
177 | 
178 | 


--------------------------------------------------------------------------------
/flow analysis/get_channels.R:
--------------------------------------------------------------------------------
 1 | # get channels script
 2 | # run this to get the channel names for the flowcytoscript parameter file
 3 | 
 4 | library(flowCore)
 5 | library(dplyr)
 6 | library( RColorBrewer )
 7 | 
 8 | # source parameters
 9 | 
10 | param.filename <- "./analyze_flow_cytometry_parameter.r"
11 | 
12 | source( param.filename )
13 | 
14 | flowFrame <- read.FCS(list.files(fcs.data.dir, "\\.fcs$", full.names = TRUE)[1], truncate_max_range = FALSE)
15 | 
16 | channels <- data.frame(name = unname(pData(parameters(flowFrame))$name), 
17 |                       desc = unname(pData(parameters(flowFrame))$desc))
18 | 
19 | for(i in 1:dim(channels)[1] ){
20 |   channels$out1[i] = paste0("\"", channels$name[i], "\", #", channels$desc[i], "\n")
21 |   channels$out2[i] = paste0("\"", channels$name[i], "\" = \"", channels$desc[i], "\",\n") 
22 |   channels$out3[i] = paste0("\"", channels$name[i], "\" = 200, #", channels$desc[i], "\n") 
23 | }
24 | 
25 | descs.filtered <- channels$desc[!is.na(channels$desc) & channels$desc != '-'] 
26 | channels.filtered <- filter(channels, desc %in% descs.filtered)
27 | cat('"', paste0(channels.filtered$desc, collapse = '","'), '"', sep = "")
28 | 
29 | # copy the previous output into the channels.of.interest and delete unnecessary markers
30 | 
31 | channels.of.interest = c("Bcl-6","Ki67","CD95",
32 |                          "Ly-6C","CD127","GATA-3",
33 |                          "CD62L","CXCR5","CD25","CD44","ICOS","RORgT","PD-1","CXCR3",
34 |                          "CXCR4","CD86",
35 |                          "T-bet","IRF4","GL7","CD69")
36 | 
37 | 
38 | # add the output to fcs.channel in the parameter file
39 | temp <- channels$out1[channels$desc %in% channels.of.interest]
40 | temp[length(temp)] <- gsub(',', '', temp[length(temp)])
41 | cat(paste(temp, collapse = ""))
42 | 
43 | # add the output to fcs.channel.label in the parameter file
44 | temp <- channels$out2[channels$desc %in% channels.of.interest]
45 | temp[length(temp)] <- gsub(',', '', temp[length(temp)])
46 | cat(paste(temp, collapse = ""))
47 | 
48 | # add the output to fcs.channel.asinh.scale in the parameter file
49 | temp <- channels$out3[channels$desc %in% channels.of.interest]
50 | temp[length(temp)] <- gsub(',', '', temp[length(temp)])
51 | cat(paste(temp, collapse = ""))
52 | 
53 | # save the analyze_flow_cytometry_parameter.r file
54 | # and run the analyze_flow_cytometry.r script


--------------------------------------------------------------------------------
/flow analysis/plot_all_dmrd_figures.r:
--------------------------------------------------------------------------------
  1 | 
  2 | # plots all dimensionality reduction figures
  3 | 
  4 | 
  5 | plot.all.dmrd.figures <- function( 
  6 |     redu.data, redu.data.max, 
  7 |     redu.figure.lims.factor, redu.figure.point.size, 
  8 |     redu.figure.dir, redu.figure.plot, 
  9 |     dmrd.data, dmrd.event.cluster, dmrd.event.condition 
 10 | )
 11 | {
 12 |     redu.lims <- redu.figure.lims.factor * redu.data.max * c( -1, 1 )
 13 |     
 14 |     the.dmrd.figure.width <- fcs.dmrd.figure.width + 
 15 |         fcs.dmrd.label.factor.width * 
 16 |             max( nchar( fcs.condition.label ), nchar( flow.sample.label ), 
 17 |                 nchar( fcs.cluster.label ) )
 18 |     
 19 |     the.dmrd.figure.width.multi <- 
 20 |         fcs.dmrd.figure.ncol * fcs.dmrd.figure.width + 
 21 |         fcs.dmrd.label.factor.width * 
 22 |             max( nchar( fcs.condition.label ), nchar( flow.sample.label ), 
 23 |                 nchar( fcs.cluster.label ) )
 24 |     
 25 |     the.dmrd.figure.height <- fcs.dmrd.figure.height
 26 |     
 27 |     the.dmrd.figure.height.multi <- 
 28 |         fcs.dmrd.figure.nrow * fcs.dmrd.figure.height
 29 |     
 30 |     # plot all events colored by cluster
 31 |     
 32 |     redu.plot <- plot.dimensionality.reduction( 
 33 |         redu.data[ , 1 ], redu.data[ , 2 ], 
 34 |         event.partition = dmrd.event.cluster, 
 35 |         partition.label = fcs.cluster.label, 
 36 |         partition.color = fcs.cluster.color, 
 37 |         dmrd.lims = redu.lims, 
 38 |         point.size = redu.figure.point.size, 
 39 |         show.guide = TRUE 
 40 |     )
 41 |     
 42 |     ggsave( 
 43 |         file.path( redu.figure.dir, 
 44 |             sprintf( "%s_all_events__cluster.png", redu.figure.plot ) ), 
 45 |         redu.plot, 
 46 |         width = the.dmrd.figure.width, height = the.dmrd.figure.height 
 47 |     )
 48 |     
 49 |     # plot all events colored by condition
 50 |     
 51 |     redu.plot <- plot.dimensionality.reduction( 
 52 |         redu.data[ , 1 ], redu.data[ , 2 ], 
 53 |         event.partition = dmrd.event.condition, 
 54 |         partition.label = fcs.condition.label, 
 55 |         partition.color = adjustcolor( fcs.condition.color, 
 56 |             alpha.f = fcs.dmrd.color.alpha ), 
 57 |         dmrd.lims = redu.lims, 
 58 |         point.size = redu.figure.point.size, 
 59 |         show.guide = TRUE 
 60 |     )
 61 |     
 62 |     ggsave( 
 63 |         file.path( redu.figure.dir, 
 64 |             sprintf( "%s_all_events__condition.png", redu.figure.plot ) ), 
 65 |         redu.plot, 
 66 |         width = the.dmrd.figure.width, height = the.dmrd.figure.height 
 67 |     )
 68 |     
 69 |     # plot all events colored by each marker level
 70 |     
 71 |     for ( fch in fcs.channel )
 72 |     {
 73 |         dmrd.event.level <- dmrd.data[ , fch ]
 74 |         
 75 |         redu.plot <- plot.dimensionality.reduction( 
 76 |             redu.data[ , 1 ], redu.data[ , 2 ], 
 77 |             event.level = dmrd.event.level, 
 78 |             dmrd.lims = redu.lims, 
 79 |             point.size = redu.figure.point.size, 
 80 |             show.guide = TRUE, guide.name = fcs.channel.label[ fch ] 
 81 |         )
 82 |         
 83 |         ggsave( 
 84 |             file.path( redu.figure.dir, 
 85 |                 sprintf( "%s_all_events__%s.png", redu.figure.plot, 
 86 |                     fcs.channel.label[ fch ] ) ), 
 87 |             redu.plot, 
 88 |             width = the.dmrd.figure.width, height = the.dmrd.figure.height 
 89 |         )
 90 |     }
 91 |     
 92 |     # plot all conditions colored by cluster
 93 |     
 94 |     redu.plot <- plot.dimensionality.reduction( 
 95 |         redu.data[ , 1 ], redu.data[ , 2 ], 
 96 |         event.group = dmrd.event.condition, 
 97 |         group.label = fcs.condition.label, 
 98 |         event.partition = dmrd.event.cluster, 
 99 |         partition.label = fcs.cluster.label, 
100 |         partition.color = fcs.cluster.color, 
101 |         dmrd.lims = redu.lims, 
102 |         dmrd.nrow = fcs.dmrd.figure.nrow, dmrd.ncol = fcs.dmrd.figure.ncol, 
103 |         point.size = redu.figure.point.size, 
104 |         show.guide = TRUE 
105 |     )
106 |     
107 |     ggsave( 
108 |         file.path( redu.figure.dir, 
109 |             sprintf( "%s_all_conditions__cluster.png", redu.figure.plot ) ), 
110 |         redu.plot, 
111 |         width = the.dmrd.figure.width.multi, 
112 |         height = the.dmrd.figure.height.multi 
113 |     )
114 |     
115 |     # plot all conditions colored by each marker level
116 |     
117 |     for ( fch in fcs.channel )
118 |     {
119 |         dmrd.event.level <- dmrd.data[ , fch ]
120 |         
121 |         redu.plot <- plot.dimensionality.reduction( 
122 |             redu.data[ , 1 ], redu.data[ , 2 ], 
123 |             event.group = dmrd.event.condition, 
124 |             group.label = fcs.condition.label, 
125 |             event.level = dmrd.event.level, 
126 |             dmrd.lims = redu.lims, 
127 |             dmrd.nrow = fcs.dmrd.figure.nrow, dmrd.ncol = fcs.dmrd.figure.ncol, 
128 |             point.size = redu.figure.point.size, 
129 |             show.guide = TRUE, guide.name = fcs.channel.label[ fch ] 
130 |         )
131 |         
132 |         ggsave( 
133 |             file.path( redu.figure.dir, 
134 |                 sprintf( "%s_all_conditions__%s.png", redu.figure.plot, 
135 |                     fcs.channel.label[ fch ] ) ), 
136 |             redu.plot, 
137 |             width = the.dmrd.figure.width.multi, 
138 |             height = the.dmrd.figure.height.multi 
139 |         )
140 |     }
141 |     
142 |     # plot each condition colored by cluster
143 |     
144 |     for ( cond in fcs.condition )
145 |     {
146 |         dmrd.cond.idx <- which( dmrd.event.condition == cond )
147 |         
148 |         redu.cond.data <- redu.data[ dmrd.cond.idx, ]
149 |         dmrd.cond.event.cluster <- dmrd.event.cluster[ dmrd.cond.idx ]
150 |         
151 |         redu.plot <- plot.dimensionality.reduction( 
152 |             redu.cond.data[ , 1 ], redu.cond.data[ , 2 ], 
153 |             event.partition = dmrd.cond.event.cluster, 
154 |             partition.label = fcs.cluster.label, 
155 |             partition.color = fcs.cluster.color, 
156 |             dmrd.lims = redu.lims, 
157 |             point.size = redu.figure.point.size, 
158 |             show.guide = TRUE 
159 |         )
160 |         
161 |         ggsave( 
162 |             file.path( redu.figure.dir, 
163 |                 sprintf( "%s_%s__cluster.png", redu.figure.plot, 
164 |                     fcs.condition.label[ cond ] ) ), 
165 |             redu.plot, 
166 |             width = the.dmrd.figure.width, height = the.dmrd.figure.height 
167 |         )
168 |     }
169 |     
170 |     # plot each condition colored by sample
171 |     
172 |     for ( cond in fcs.condition )
173 |     {
174 |         dmrd.cond.idx <- which( dmrd.event.condition == cond )
175 |         
176 |         redu.cond.data <- redu.data[ dmrd.cond.idx, ]
177 |         dmrd.cond.event.sample <- dmrd.event.sample[ dmrd.cond.idx ]
178 |         
179 |         redu.plot <- plot.dimensionality.reduction( 
180 |             redu.cond.data[ , 1 ], redu.cond.data[ , 2 ], 
181 |             event.partition = dmrd.cond.event.sample, 
182 |             partition.label = flow.sample.label, 
183 |             partition.color = adjustcolor( flow.sample.color.single, 
184 |                 alpha.f = fcs.dmrd.color.alpha ), 
185 |             dmrd.lims = redu.lims, 
186 |             point.size = redu.figure.point.size, 
187 |             show.guide = TRUE 
188 |         )
189 |         
190 |         ggsave( 
191 |             file.path( redu.figure.dir, 
192 |                 sprintf( "%s_%s__sample.png", redu.figure.plot, 
193 |                     fcs.condition.label[ cond ] ) ), 
194 |             redu.plot, 
195 |             width = the.dmrd.figure.width, height = the.dmrd.figure.height 
196 |         )
197 |     }
198 | }
199 | 
200 | 


--------------------------------------------------------------------------------
/flow analysis/plot_dimensionality_reduction.r:
--------------------------------------------------------------------------------
  1 | 
  2 | # plots one dimensionality reduction figure
  3 | 
  4 | 
  5 | plot.dimensionality.reduction <- function( 
  6 |     dmrd.x, dmrd.y, 
  7 |     event.group = NULL, group.label = NULL, 
  8 |     event.partition = NULL, partition.label = NULL, partition.color = NULL, 
  9 |     event.level = NULL, 
 10 |     dmrd.lims = NULL, dmrd.nrow = NULL, dmrd.ncol = NULL, 
 11 |     point.size = NULL, show.guide = FALSE, guide.name = NULL 
 12 | )
 13 | {
 14 |     if ( ! is.null( event.partition ) )
 15 |     {
 16 |         if ( is.null( event.group ) )
 17 |             ggdf <- data.frame( dmrd.x, dmrd.y, event.partition )
 18 |         else
 19 |             ggdf <- data.frame( dmrd.x, dmrd.y, event.partition, event.group )
 20 |         
 21 |         if ( is.null( partition.label ) )
 22 |             plot.label <- waiver()
 23 |         else
 24 |             plot.label <- partition.label
 25 |         
 26 |         if ( show.guide )
 27 |             plot.guide <- guide_legend( keyheight = 0.8, 
 28 |                 override.aes = list( size = fcs.dmrd.legend.point.size ), 
 29 |                 label.theme = element_text( size = fcs.dmrd.legend.label.size ), 
 30 |                 title = guide.name, title.position = "top", 
 31 |                 title.theme = element_text( size = fcs.dmrd.legend.title.size ) )
 32 |         else
 33 |             plot.guide <- FALSE
 34 |         
 35 |         dmrd.plot <- ggplot( ggdf, aes( x = dmrd.x, 
 36 |                 y = dmrd.y, color = event.partition ) ) + 
 37 |             scale_color_manual( values = partition.color, labels = plot.label, 
 38 |                 guide = plot.guide )
 39 |     }
 40 |     else if ( ! is.null( event.level ) )
 41 |     {
 42 |         if ( is.null( event.group ) )
 43 |             ggdf <- data.frame( dmrd.x, dmrd.y, event.level )
 44 |         else
 45 |             ggdf <- data.frame( dmrd.x, dmrd.y, event.level, event.group )
 46 |         
 47 |         if ( show.guide )
 48 |             plot.guide <- guide_colorbar( barwidth = 0.8, barheight = 10, 
 49 |                 title = guide.name, title.position = "top", 
 50 |                 title.theme = element_text( size = fcs.dmrd.legend.title.size ) )
 51 |         else
 52 |             plot.guide <- FALSE
 53 |         
 54 |         dmrd.plot <- ggplot( ggdf, aes( x = dmrd.x, y = dmrd.y, 
 55 |                 color = event.level ) ) + 
 56 |             scale_color_gradientn( colors = fcs.dmrd.density.palette, 
 57 |                 labels = NULL, guide = plot.guide )
 58 |     }
 59 |     else
 60 |     {
 61 |         if ( is.null( event.group ) )
 62 |             ggdf <- data.frame( dmrd.x, dmrd.y )
 63 |         else
 64 |             ggdf <- data.frame( dmrd.x, dmrd.y, event.group )
 65 |         
 66 |         dmrd.plot <- ggplot( ggdf, aes( x = dmrd.x, y = dmrd.y ) )
 67 |     }
 68 |     
 69 |     if ( is.null( dmrd.lims ) ) {
 70 |         dmrd.xy.max <- max( abs( c( dmrd.x, dmrd.y ) ) )
 71 |         dmrd.lims <- dmrd.xy.max * c( -1, 1 )
 72 |     }
 73 |     
 74 |     if ( is.null( point.size ) )
 75 |         point.size <- 0.5
 76 |     
 77 |     dmrd.plot <- dmrd.plot + 
 78 |         coord_fixed() + 
 79 |         lims( x = dmrd.lims, y = dmrd.lims ) + 
 80 |         geom_point( shape = 20, stroke = 0, size = point.size ) + 
 81 |         theme_bw() + 
 82 |         theme( axis.title = element_blank(), 
 83 |             axis.text  = element_blank(), 
 84 |             axis.ticks = element_blank(), 
 85 |             panel.grid.major = element_blank(), 
 86 |             panel.grid.minor = element_blank() )
 87 |     
 88 |     if ( ! is.null( event.group ) )
 89 |     {
 90 |         dmrd.plot <- dmrd.plot + 
 91 |             facet_wrap( vars( event.group ), 
 92 |                 labeller = as_labeller( group.label ), 
 93 |                 nrow = dmrd.nrow, ncol = dmrd.ncol ) + 
 94 |             theme( strip.background = element_rect( fill = "white" ), 
 95 |                 strip.text = element_text( size = fcs.dmrd.group.title.size ) )
 96 |     }
 97 |     
 98 |     dmrd.plot
 99 | }
100 | 
101 | 


--------------------------------------------------------------------------------