├── DESCRIPTION ├── NAMESPACE ├── R └── hypergate.R ├── README.md ├── data └── Samusik_01_subset.RData ├── figure ├── unnamed-chunk-10-1.png ├── unnamed-chunk-17-1.png ├── unnamed-chunk-17-2.png ├── unnamed-chunk-18-1.png ├── unnamed-chunk-19-1.png ├── unnamed-chunk-21-1.png └── unnamed-chunk-7-1.png ├── man ├── FNTN_matrix.recycle.Rd ├── F_beta.Rd ├── Samusik_01_subset.Rd ├── boolmat.Rd ├── channels_contributions.Rd ├── color_biplot_by_discrete.Rd ├── contract.Rd ├── contract.update.Rd ├── coreloop.Rd ├── en.locator.Rd ├── expand.Rd ├── expand.update.Rd ├── f.Rd ├── fill_FNTN_matrix.Rd ├── gate_from_biplot.Rd ├── hgate_info.Rd ├── hgate_pheno.Rd ├── hgate_rule.Rd ├── hgate_sample.Rd ├── hypergate.Rd ├── plot_gating_strategy.Rd ├── polygon.clean.Rd ├── reoptimize_strategy.Rd ├── subset_matrix_hg.Rd └── update_gate.Rd └── vignettes ├── Hypergate.Rmd ├── unnamed-chunk-10-1.png ├── unnamed-chunk-17-1.png ├── unnamed-chunk-17-2.png ├── unnamed-chunk-18-1.png ├── unnamed-chunk-19-1.png ├── unnamed-chunk-21-1.png └── unnamed-chunk-7-1.png /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: hypergate 2 | Title: Machine Learning of Hyperrectangular Gating Strategies for High-Dimensional Cytometry 3 | Version: 0.8.5 4 | Authors@R: c( 5 | person("Etienne","Becht",email="etienne.becht@protonmail.com",role=c("cre","aut")), 6 | person("Samuel","Granjeaud",email="samuel.granjeaud@inserm.fr",role=c("ctb")) 7 | ) 8 | Description: Given a high-dimensional dataset that typically represents a cytometry dataset, and a subset of the datapoints, this algorithm outputs an hyperrectangle so that datapoints within the hyperrectangle best correspond to the specified subset. In essence, this allows the conversion of clustering algorithms' outputs to gating strategies outputs. 9 | Depends: 10 | R (>= 3.5.0) 11 | License: GPL-3 12 | Encoding: UTF-8 13 | LazyData: true 14 | Imports: stats, 15 | grDevices, 16 | utils, 17 | graphics 18 | Suggests: knitr, 19 | rmarkdown, 20 | flowCore, 21 | sp, 22 | sf 23 | VignetteBuilder: knitr 24 | RoxygenNote: 7.3.0 25 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(F_beta) 4 | export(boolmat) 5 | export(channels_contributions) 6 | export(color_biplot_by_discrete) 7 | export(gate_from_biplot) 8 | export(hgate_info) 9 | export(hgate_pheno) 10 | export(hgate_rule) 11 | export(hgate_sample) 12 | export(hypergate) 13 | export(plot_gating_strategy) 14 | export(reoptimize_strategy) 15 | export(subset_matrix_hg) 16 | importFrom(grDevices,col2rgb) 17 | importFrom(grDevices,dev.off) 18 | importFrom(grDevices,png) 19 | importFrom(grDevices,rainbow) 20 | importFrom(grDevices,rgb) 21 | importFrom(graphics,abline) 22 | importFrom(graphics,locator) 23 | importFrom(graphics,plot) 24 | importFrom(graphics,segments) 25 | importFrom(graphics,text) 26 | importFrom(graphics,title) 27 | importFrom(stats,setNames) 28 | importFrom(utils,tail) 29 | -------------------------------------------------------------------------------- /R/hypergate.R: -------------------------------------------------------------------------------- 1 | #' @title f 2 | #' @description Computes the F_beta score given an intenger number of True Positives (TP), True Negatives (TN). It is optimized for speed and n is thus not the total number of events 3 | #' @param TP Number of true positive events 4 | #' @param TN Number of true negative events 5 | #' @param n beta^2*(TP+FN)+TN+FP 6 | #' @param beta2 squared-beta to weight precision (low beta) or recall (high beta) more 7 | f<-function(TP,TN,n,beta2=1){ 8 | (1+beta2)*TP/(n+TP-TN) ##n equals beta2*(TP+FN)+(TN+FP)=beta2*nT+nF 9 | } 10 | 11 | #' @title fill_FNTN_matrix 12 | #' @description fill_FNTN_matrix Used for assessing whether an expansion move is possible 13 | #' @param xp_FN Expression matrix of False Negative events 14 | #' @param xp_TN Expression matrix of True Negative events 15 | #' @param B_FN Boolean matrix of FN events 16 | #' @param B_TN Boolean matrix of TN events 17 | #' @param par Current hyper-rectangle parametrization 18 | fill_FNTN_matrix<-function(xp_FN,xp_TN,B_FN,B_TN,par){ 19 | FN=nrow(xp_FN) 20 | TN=nrow(xp_TN) 21 | N=FN+TN 22 | xp=rbind(xp_FN,xp_TN) 23 | B=rbind(B_FN,B_TN) 24 | 25 | res=matrix(nrow=N,ncol=FN,data=FALSE) 26 | 27 | if(ncol(B)<=.Machine$double.digits){ 28 | summer=matrix(nrow=ncol(B),ncol=1,data=2^(1:ncol(B)-1)) 29 | hashes=(!B)%*%summer 30 | ##Split events according to the state for which channels they are FALSE 31 | B_hash=split(1:N,hashes) 32 | B_FN_hash=split(1:FN,hashes[1:FN]) 33 | 34 | hash_table=matrix(nrow=length(B_hash),ncol=ncol(B),dimnames=list(names(B_hash),colnames(B)),data=NA) 35 | for(hash in names(B_hash)){ 36 | hash_table[hash,]=B[B_hash[[hash]][1],] 37 | } 38 | } else { 39 | 40 | hashes=apply(!B,1,function(x)paste(x,collapse="/")) ##For every active channel, identify which are in FALSE state. 41 | unique_hashes=unique(hashes) 42 | hash_table=matrix(nrow=length(unique_hashes),ncol=ncol(B),dimnames=list(unique_hashes,colnames(B)),data=TRUE) 43 | for(hash in unique_hashes){ 44 | hash_table[hash,as.logical(strsplit(hash,split="/")[[1]])]=FALSE 45 | } 46 | ##hash_table=unique(B) 47 | ##rownames(hash_table)=apply(hash_table,1,function(x)paste(which(!x),collapse="/")) 48 | 49 | ##Split events according to the state for which channels they are FALSE 50 | B_hash=split(1:N,hashes) 51 | B_FN_hash=split(1:FN,hashes[1:FN]) 52 | } 53 | 54 | for(hash in names(B_FN_hash)){ 55 | true_for_hash=hash_table[hash,] ##Check for a given hash which channels are TRUE 56 | compatible_candidates=rownames(hash_table)[rowSums(hash_table[,true_for_hash,drop=FALSE])==sum(true_for_hash)] ##Only other hashes for which only a subset of FALSE channels are FALSE are compatible (for the others there are guaranteed to not be implicated 57 | 58 | for(other_hash in compatible_candidates){ 59 | others=B_hash[[other_hash]] ##Compatible events 60 | xp.tmp=xp[others,,drop=FALSE] ##Subsetting the matrix of events to only keep compatible events 61 | res.tmp=matrix(nrow=length(others),ncol=length(B_FN_hash[[hash]])) 62 | i=0 63 | for(event in B_FN_hash[[hash]]){ 64 | i=i+1 65 | affected_parameters=xp[event,]=matrix(nrow=length(others),ncol=sum_affected,data=affected_parameters.values,byrow=TRUE))==rep(sum_affected,nrow(xp.tmp))) ##If all affected parameters are higher than the values for the event screened, the other event is implicated by the event screened 69 | } 70 | res[others,B_FN_hash[[hash]]]=res.tmp 71 | } 72 | } 73 | res 74 | } 75 | 76 | #' @title plot_gating_strategy 77 | #' @description Plot a hypergate return 78 | #' @param gate A hypergate object (produced by hypergate()) 79 | #' @param xp The expression matrix from which the 'gate' parameter originates 80 | #' @param gate_vector Categorical data from which the 'gate' parameter originates 81 | #' @param level Level of gate_vector identifying the population of interest 82 | #' @param highlight color of the positive population when plotting 83 | #' @param path Where png files will be produced 84 | #' @param cex size of dots 85 | #' @param ... passed to png 86 | #' @examples 87 | #' data(Samusik_01_subset) 88 | #' xp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels] 89 | #' gate_vector=Samusik_01_subset$labels 90 | #' hg=hypergate(xp=xp,gate_vector=gate_vector,level=23,delta_add=0.01) 91 | #' par(mfrow=c(1,ceiling(length(hg$active_channels)/2))) 92 | #' plot_gating_strategy(gate=hg,xp=xp,gate_vector=gate_vector,level=23,highlight="red") 93 | #' @export 94 | 95 | plot_gating_strategy<-function(gate,xp,gate_vector,level,cex=0.5,highlight="black",path="./",...){ 96 | if(missing(path)){ 97 | warning("path argument is missing, output won't be saved to file") 98 | } 99 | w=!is.na(gate_vector) 100 | xp=xp[w,,drop=F] 101 | gate_vector=gate_vector[w] 102 | truce=gate_vector==level 103 | 104 | parameters=gate$pars.history 105 | active_parameters=gate$active_channels##apply(parameters,2,function(x){x[length(x)]!=x[1]}) 106 | parameters=parameters[,active_parameters,drop=FALSE] 107 | if (nrow(parameters) > 1) { 108 | parameters_order= 109 | apply(parameters,2,function(x)min(which(x!=x[1]))) 110 | parameters=parameters[,order(parameters_order,decreasing=FALSE),drop=FALSE] 111 | } 112 | parameters=setNames(parameters[nrow(parameters),,drop=TRUE],colnames(parameters)) 113 | channels=sub("_max","",names(parameters)) 114 | channels=sub("_min","",channels) 115 | 116 | ranges.global=apply(xp[,channels,drop=F],2,range) 117 | rownames(ranges.global)=c("min","max") 118 | 119 | cols=rep("black",nrow(xp)) 120 | cols[gate_vector==level]=highlight 121 | 122 | n=length(parameters) 123 | 124 | active_events=rep(T,nrow(xp)) 125 | iter=0 126 | 127 | ##All parameters that are "_min" take value 2, the "_max" take value 1 128 | direction=rep(2,length(parameters)) 129 | direction[grep("_max",names(parameters))]=1 130 | 131 | ##Loop over pairs of consecutive parameters 132 | for(i in seq(1,n,by=2)){ 133 | if((i+1)<=n){ 134 | iter=iter+1 135 | if(!missing(path)){ 136 | png(paste(path,iter,".png",sep=""),...) 137 | } 138 | chan1=channels[i] 139 | chan2=channels[i+1] 140 | plot( 141 | xp[active_events,chan1], 142 | xp[active_events,chan2], 143 | xlab=chan1, 144 | ylab=chan2, 145 | xlim=ranges.global[,chan1], 146 | ylim=ranges.global[,chan2], 147 | bty="l", 148 | pch=16, 149 | cex=cex, 150 | col=cols[active_events] 151 | ) 152 | segments( 153 | x0=parameters[i], 154 | y0=parameters[i+1], 155 | x1=ranges.global[direction[i],chan1], 156 | col="red" 157 | ) 158 | segments( 159 | x0=parameters[i], 160 | y0=parameters[i+1], 161 | y1=ranges.global[direction[i+1],chan2], 162 | col="red" 163 | ) 164 | 165 | ##Updating active_events 166 | if(direction[i]==2){ 167 | test1=xp[,chan1]>=parameters[i] ##If _min, events above parameter are selected 168 | } else { 169 | test1=xp[,chan1]<=parameters[i] ##Else events above parameter below 170 | } 171 | if(direction[i+1]==2){ 172 | test2=xp[,chan2]>=parameters[i+1] 173 | } else { 174 | test2=xp[,chan2]<=parameters[i+1] 175 | } 176 | active_events=active_events&test1&test2 177 | title(main=paste(paste(channels[1:(i+1)],ifelse(direction[1:(i+1)]==2,"+","-"),sep=""),collapse=", ")) 178 | title(sub=paste("F=",signif(F_beta(truce,active_events),4),sep="")) 179 | if(!missing(path)){ 180 | dev.off() 181 | } 182 | } 183 | } 184 | ##Single last parameter if n is odd 185 | if(n%%2==1){ 186 | iter=iter+1 187 | chan1=channels[i] 188 | if(!missing(path)){ 189 | png(paste(path,iter,".png",sep=""),...) 190 | } 191 | plot(xp[active_events,chan1],main="", 192 | ylab=channels[i], 193 | xlab="Events index", 194 | ylim=ranges.global[,chan1], 195 | bty="l", 196 | pch=16, 197 | cex=cex, 198 | col=cols[active_events] 199 | ) 200 | abline(h=parameters[i],col="red") 201 | if(direction[i]==2){ 202 | test1=xp[,chan1]>=parameters[i] ##If _min, events above parameter are selected 203 | } else { 204 | test1=xp[,chan1]<=parameters[i] ##Else events above parameter below 205 | } 206 | active_events=active_events&test1 207 | title(main=paste(paste(channels[1:(i)],ifelse(direction[1:(i)]==2,"+","-"),sep=""),collapse=", ")) 208 | title(sub=paste("F=",signif(F_beta(truce,active_events),4),sep="")) 209 | if(!missing(path)){ 210 | dev.off() 211 | } 212 | } 213 | } 214 | 215 | #' @title hgate_info 216 | #' @description Extract information about a hypergate return: the channels of 217 | #' the phenotype, the sign of the channels, the sign of the comparison, the 218 | #' thresholds. The function could also compute the Fscores if the xp, 219 | #' gate_vector and level parameters are given. 220 | #' @param hgate A hypergate object (produced by hypergate()) 221 | #' @param xp The expression matrix from which the 'hgate' parameter originates, 222 | #' needed for Fscore computation 223 | #' @param gate_vector Categorical data from which the 'hgate' parameter 224 | #' originates, needed for Fscore computation 225 | #' @param level Level of gate_vector identifying the population of interest, 226 | #' needed for Fscore computation 227 | #' @param beta Beta to weight purity (low beta) or yield (high beta) more, 228 | #' needed for Fscore computation 229 | #' @return A data.frame with channel, sign, comp and threshold columns, and 230 | #' optionnally deltaF (score deterioration when parameter is ignored),Fscore1d (F_value when using only this parameter) and Fscore (F score when all parameters up to this one are included). Fscores are computed if xp, gate_vector 231 | #' and level are passed to the function. 232 | #' @seealso \code{hg_pheno}, \code{hg_rule} 233 | #' @examples 234 | #' data(Samusik_01_subset) 235 | #' xp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels] 236 | #' gate_vector=Samusik_01_subset$labels 237 | #' hg=hypergate(xp=xp,gate_vector=gate_vector,level=23,delta_add=0.01) 238 | #' hgate_info(hgate=hg) 239 | #' hgate_pheno(hgate=hg) 240 | #' hgate_rule(hgate=hg) 241 | #' @export 242 | 243 | hgate_info <- function(hgate, xp, gate_vector, level, beta = 1) { 244 | miss = missing(xp) + missing(level) + missing(level) 245 | if (miss == 1 || miss == 2) { 246 | warning("at least one parameter is missing in order to compute scores.") 247 | } 248 | # retrieve threshold 249 | pars = hgate$pars.history 250 | active_pars = hgate$active_channels 251 | pars = pars[, active_pars, drop = FALSE] 252 | if (nrow(pars) > 1) { 253 | pars_order = apply(pars, 2, function(x) min(which(x != x[1]))) 254 | pars = pars[, order(pars_order, decreasing = FALSE), drop = FALSE] 255 | } 256 | pars = setNames(pars[nrow(pars), , drop = TRUE], colnames(pars)) 257 | # get channel names 258 | channels = sub("_max", "", names(pars)) 259 | channels = sub("_min", "", channels) 260 | # phenotype sign 261 | dir.sign = rep('+', length(pars)) 262 | dir.sign[grep("_max", names(pars))] = '-' 263 | # comparison sign 264 | dir.comp = rep(' >= ', length(pars)) 265 | dir.comp[grep("_max", names(pars))] = ' <= ' 266 | # all together 267 | res = data.frame( 268 | channels, sign = dir.sign, comp = dir.comp, threshold = pars 269 | ) 270 | # scores 271 | if (miss == 0) { 272 | 273 | w = !is.na(gate_vector) 274 | xp = xp[w,,drop=F] 275 | gate_vector = gate_vector[w] 276 | truce = gate_vector==level 277 | 278 | ##Loop over parameters 279 | active_events = rep(TRUE, nrow(xp)) 280 | Fscore = Fscore1D = c() 281 | for(i in seq(length(pars))) { 282 | # events for the current 1D gate 283 | if(dir.comp[i] == ' >= '){ 284 | test1D = xp[,channels[i]] >= pars[i] 285 | } else { 286 | test1D = xp[,channels[i]] <= pars[i] 287 | } 288 | Fscore1D = c(Fscore1D, signif(F_beta(truce, test1D, beta = beta), 4)) 289 | # update active events 290 | active_events = active_events & test1D 291 | Fscore = c(Fscore, signif(F_beta(truce, active_events, beta = beta), 4)) 292 | } 293 | res = cbind(res, deltaF=channels_contributions(hgate, xp, gate_vector, level, beta),Fscore1D, Fscore) 294 | } 295 | res 296 | } 297 | 298 | #' @title hgate_pheno 299 | #' @description Build a human readable phenotype, i.e. a combination of channels 300 | #' and sign (+ or -) from a hypergate return. 301 | #' @param hgate A hypergate object (produced by hypergate()) 302 | #' @param collapse A character string to separate the markers. 303 | #' @return A string representing the phenotype. 304 | #' @seealso \code{hg_rule}, \code{hg_info} 305 | #' @examples 306 | #' ## See hgate_info 307 | #' @export 308 | 309 | hgate_pheno <- function(hgate, collapse = ", ") { 310 | with(hgate_info(hgate), paste0(channels, sign, collapse = collapse)) 311 | } 312 | 313 | #' @title hgate_rule 314 | #' @description Build a human readable rule i.e. a combination of channels, sign 315 | #' of comparison and threshold. 316 | #' @param hgate A hypergate object (produced by hypergate()) 317 | #' @param collapse A character string to separate the markers. 318 | #' @param digits An integer that specifies the decimal part when rounding. 319 | #' @return A data.frame with channel, sign, comp and threshold columns 320 | #' @seealso \code{hg_pheno}, \code{hg_rule} 321 | #' @examples 322 | #' ## See hgate_info 323 | #' @export 324 | 325 | hgate_rule <- function(hgate, collapse = ", ", digits = 2) { 326 | with(hgate_info(hgate), paste0(channels, comp, round(threshold, digits), collapse = collapse)) 327 | } 328 | 329 | #' @title hgate_sample 330 | #' @description Downsample the data in order to fasten the computation and 331 | #' reduce the memory usage. 332 | #' @param gate_vector A Categorical vector whose length equals the number of 333 | #' rows of the matrix to sample (nrow(xp)) 334 | #' @param level A level of gate_vector so that gate_vector == level will produce 335 | #' a boolean vector identifying events of interest 336 | #' @param size An integer specifying the maximum number of events of interest to 337 | #' retain. If the count of events of interest is lower than \code{size}, than 338 | #' \code{size} will be set to that count. 339 | #' @param method A string specifying the method to balance the count of events. 340 | #' \code{"prop"} means proportionnality: if events of interest are sampled in 341 | #' a 1/10 ratio, then all others events are sampled by the same ratio. 342 | #' \code{"10x"} means a balance of 10 between the count events of interest and 343 | #' the count all others events. \code{"ceil"} means a uniform sampling no more 344 | #' than the specified size for each level of the gate_vector. \code{level} is 345 | #' unused in that method. 346 | #' @return A logical vector with TRUE correspond to the events being sampled, ie 347 | #' kept to further analysis 348 | #' @note No replacement is applied. If there are less events in one group or the 349 | #' alternate than the algorithm requires, then all available events are 350 | #' returned. NA values in gate_vector are not sampled, ie ignored. 351 | #' @examples 352 | #' # Standard procedure with downsampling 353 | #' data(Samusik_01_subset) 354 | #' xp <- Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels] 355 | #' gate_vector <- Samusik_01_subset$labels 356 | #' sampled <- hgate_sample(gate_vector, level=8, 100) 357 | #' table(sampled) 358 | #' table(gate_vector[sampled]) 359 | #' xp_sampled <- xp[sampled, ] 360 | #' gate_vector_sampled <- gate_vector[sampled] 361 | #' hg <- hypergate(xp_sampled, gate_vector_sampled, level=8, delta_add=0.01) 362 | #' # cluster 8 consists in 122 events 363 | #' table(gate_vector) 364 | #' # Downsampling 365 | #' table(gate_vector[hgate_sample(gate_vector, level=8, 100)]) 366 | #' # Downsampling reduces the alternate events 367 | #' table(gate_vector[hgate_sample(gate_vector, level=8, 100, "10x")]) 368 | #' # Downsampling is limited to the maximum number of events of interest 369 | #' table(gate_vector[hgate_sample(gate_vector, level=8, 150)]) 370 | #' # Downsampling is limited to the maximum number of events of interest, and 371 | #' # the alternate events are downsampled to a total of 10 times 372 | #' table(gate_vector[hgate_sample(gate_vector, level=8, 150, "10x")]) 373 | #' # More details about sampling 374 | #' # Convert -1 to NA, NA are not sampled 375 | #' gate_vector[gate_vector==-1] = NA 376 | #' gate_vector = factor(gate_vector) 377 | #' table(gate_vector, useNA = "alw") 378 | #' # 379 | #' # target size = 100 whereas initial freq is 122 for pop 8 380 | #' smp.prop = hgate_sample(gate_vector, level = 8, size = 100, method = "prop") 381 | #' smp.10x = hgate_sample(gate_vector, level = 8, size = 100, method = "10x") 382 | #' smp.ceil = hgate_sample(gate_vector, size = 10, method = "ceil") 383 | #' table(smp.prop) 384 | #' table(smp.10x) 385 | #' table(smp.ceil) 386 | #' rbind(raw = table(gate_vector), 387 | #' prop = table(gate_vector[smp.prop]), 388 | #' `10x` = table(gate_vector[smp.10x]), 389 | #' ceil = table(gate_vector[smp.ceil])) 390 | #' # 391 | #' # target size = 30 whereas initial freq is 25 for pop 14 392 | #' smp.prop = hgate_sample(gate_vector, level = 14, size = 30, method = "prop") 393 | #' smp.10x = hgate_sample(gate_vector, level = 14, size = 30, method = "10x") 394 | #' table(smp.prop) 395 | #' table(smp.10x) 396 | #' rbind(raw = table(gate_vector), 397 | #' prop = table(gate_vector[smp.prop]), 398 | #' `10x` = table(gate_vector[smp.10x])) 399 | #' # prop returns original data, because target size ids larger than initial freq 400 | #' # 10x returns sampled data according to initial freq, such as the total amount 401 | #' # of other events equals 10x initial freq of pop 14 402 | #' @export 403 | 404 | hgate_sample <- function(gate_vector, level, size = 1000, method = "prop") { 405 | ## Where gate_vector is the vector of clusters and level the population of interest) 406 | subsample <- rep(FALSE, length(gate_vector)) 407 | # multi-class methods 408 | if (method == "ceil") { 409 | if (!missing(level)) 410 | warning(sprintf("level is ignored when method is %s", method)) 411 | for (level in unique(gate_vector)) { 412 | if (is.na(level)) next() 413 | nna_pop <- !is.na(gate_vector) 414 | pos_pop <- nna_pop & (gate_vector==level) 415 | sum_pos <- sum(pos_pop, na.rm = TRUE) 416 | if (sum_pos <= size) { 417 | subsample[pos_pop] = TRUE 418 | } else { 419 | subsample[pos_pop][sample.int(sum_pos, size)] = TRUE 420 | } 421 | } 422 | return(subsample) 423 | } 424 | # pos vs neg methods 425 | nna_pop <- !is.na(gate_vector) 426 | pos_pop <- nna_pop & (gate_vector==level) 427 | sum_pos <- sum(pos_pop) 428 | # downsample positive population 429 | if (sum_pos <= size) { 430 | subsample[pos_pop] = TRUE 431 | pos_size = sum_pos 432 | } else { 433 | subsample[pos_pop][sample.int(sum_pos, size)] = TRUE 434 | pos_size = size 435 | } 436 | # downsample positive population 437 | sum_neg <- sum(nna_pop) - sum_pos 438 | if (method == "prop") { 439 | neg_size = round(pos_size / sum_pos * sum_neg) 440 | } else if (method == "10x") { 441 | neg_size = 10 * pos_size 442 | } else { 443 | stop(sprintf("method \"\" is not implemented.", method)) 444 | } 445 | neg_pop <- nna_pop & (gate_vector!=level) 446 | if (neg_size < sum_neg) { 447 | idx <- sample.int(sum_neg, neg_size) 448 | subsample[neg_pop][idx] <- TRUE 449 | } else { 450 | subsample[neg_pop] <- TRUE 451 | } 452 | subsample 453 | } 454 | 455 | #' @title subset_matrix_hg 456 | #' @description Returns a boolean vector whose TRUE elements correspond to events inside the hyperrectangle 457 | #' @param gate a return from hypergate 458 | #' @param xp Expression matrix used for gate 459 | #' @examples 460 | #' data(Samusik_01_subset) 461 | #' xp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels] 462 | #' gate_vector=Samusik_01_subset$labels 463 | #' hg=hypergate(xp=xp,gate_vector=gate_vector,level=23,delta_add=0.01) 464 | #' gating_state=subset_matrix_hg(hg,xp) 465 | #' gating_state=ifelse(gating_state,"Gated in","Gated out") 466 | #' target=ifelse(gate_vector==23,"Target events","Others") 467 | #' table(gating_state,target) 468 | #' @export 469 | subset_matrix_hg<-function(gate,xp){ 470 | if(length(gate$active_channels)>0){ 471 | state=rep(TRUE,nrow(xp)) 472 | pars=tail(gate$pars.history,1)[1,] 473 | for(chan in gate$active_channels){ 474 | chan_real=substr(chan,1,nchar(chan)-4) 475 | if(substr(chan,nchar(chan)-3,nchar(chan))=="_min"){ 476 | state[state][xp[state,chan_real]pars[chan]]=FALSE 479 | } 480 | } 481 | return(state) 482 | } else { 483 | return(rep(TRUE,nrow(xp))) 484 | } 485 | } 486 | 487 | #' @title contract 488 | #' @description Test (some) possible contractions of the hyperrectangle 489 | #' @param par Current parametrization of the hyperrectangle 490 | #' @param xp_pos Expression matrix for positive events 491 | #' @param state_pos State vector of the positive events 492 | #' @param xp_neg Expression matrix for negative events 493 | #' @param state_neg State vector of the negative events 494 | #' @param n passed to f 495 | #' @param TP integer: current number of TP 496 | #' @param TN integer: current number of TN 497 | #' @param beta Passed from the top-level function 498 | #' @param envir Current environment of the optimization 499 | contract<-function( 500 | par=par, 501 | xp_pos=envir$xp_pos, 502 | state_pos=envir$state_pos, 503 | xp_neg=envir$xp_neg, 504 | state_neg=envir$state_neg, 505 | n=envir$n, 506 | TP=envir$TP, 507 | TN=envir$TN, 508 | beta=envir$beta2, 509 | envir=parent.frame() 510 | ){ 511 | ##Evaluate what happens when we increase (strictly) the parameter to fit a TP 512 | TP_par=sort(xp_pos[state_pos,par]) ##Sorted values, possibly duplicates 513 | FP_par=sort(xp_neg[state_neg,par]) ##Sorted values, possibly duplicates 514 | 515 | cc=unique(TP_par) #Contraction candidates. 516 | if(length(cc)<1){ 517 | return( 518 | list( 519 | newF=envir$f_current, 520 | par_best=setNames(min(envir$hg.env$xp[,par]),par), 521 | flag_add=FALSE, 522 | dTP_max=0, 523 | dTN_max=0 524 | ) 525 | ) 526 | } 527 | i_TP=1L ##Iterator for TP vector 528 | i_FP=1L ##Iterator for FP vector 529 | dTP=0L ##Keep track of changes in TP for every TP cutoff value 530 | dTN=0L ##Keep track of changes in TN for every TP cutoff value 531 | 532 | ncc=length(cc) 533 | dTPvec=rep(0L,ncc) 534 | dTNvec=rep(0L,ncc) 535 | j=1L 536 | for(level in cc){ 537 | ##How many TP we lose if we cut just below this level 538 | while(TP_par[i_TP]=pars[p] 709 | b_neg[,p]=xp_neg[,p]>=pars[p] 710 | } 711 | 712 | w_FN=expand.object$FNTN_matrix[1:expand.object$FN,expand.object$which_expansion] 713 | w_TN=expand.object$FNTN_matrix[(expand.object$FN+1):nrow(expand.object$FNTN_matrix),expand.object$which_expansion] 714 | 715 | state_pos[!state_pos][w_FN]=TRUE ##FN converted to TP 716 | state_neg[!state_neg][w_TN]=TRUE ##TN converted to FP 717 | 718 | ##For recycling 719 | B_FN_old=B_FN_old[!state_pos,,drop=FALSE] 720 | B_TN_old=B_TN_old[!state_neg,,drop=FALSE] 721 | B_FN_new=b_pos[!state_pos,,drop=FALSE] 722 | B_TN_new=b_neg[!state_neg,,drop=FALSE] 723 | xp_FN=xp_pos[!state_pos,,drop=FALSE] 724 | xp_TN=xp_neg[!state_neg,,drop=FALSE] 725 | 726 | TN=TN+expand.object$dTN_max 727 | TP=TP+expand.object$dTP_max 728 | 729 | ##f_current=f(TP,TN,n,beta2=beta) 730 | 731 | flag_expansion=TP0 799 | w_FN_changed=rep(TRUE,ncol(FNTN_matrix)) ##We have to include every FN whether they changed or not 800 | w_TN_changed=rowSums(xor(B_TN_old[,names(par)],B_TN_new[,names(par)]))>0 801 | 802 | if(any(w_FN_changed)){ 803 | FNTN_matrix[c(w_FN_changed,w_TN_changed),w_FN_changed]=fill_FNTN_matrix(xp_FN[w_FN_changed,names(par),drop=FALSE],xp_TN[w_TN_changed,names(par),drop=FALSE],B_FN_new[w_FN_changed,names(par),drop=FALSE],B_TN_new[w_TN_changed,names(par),drop=FALSE],par) 804 | } 805 | FNTN_matrix 806 | } 807 | 808 | #' @title coreloop 809 | #' @param par Current parametrization of the hyperrectangle 810 | #' @param hg.env Environment where the main execution of hypergate takes place 811 | #' @description Core optimization loop of hypergate 812 | coreloop<-function(par,hg.env=hg.env$hg.env){ 813 | loop.env=environment() 814 | pars=hg.env$par 815 | active_channels=hg.env$active_channels 816 | TP=hg.env$TP 817 | state_pos=hg.env$state_pos 818 | state_neg=hg.env$state_neg 819 | b_pos=hg.env$b_pos 820 | b_neg=hg.env$b_neg 821 | 822 | 823 | ##Step 1 ADD NEW CHANNEL 824 | contractions=contract(loop.env$par,envir=loop.env$hg.env) 825 | ##best_contraction=which.max(sapply(contractions,function(x)x$newF)) 826 | if(contractions$newF>loop.env$hg.env$f_current){ 827 | contraction=contract.update(contractions,envir=loop.env$hg.env) 828 | 829 | sapply(names(contraction),function(x)assign(x,contraction[[x]],envir=loop.env)) 830 | f_current=contractions$newF 831 | 832 | ##Update add channel 833 | if(hg.env$verbose){ 834 | print(paste("Trying to add",names(contractions$par_best),":",contractions$par_best,",F=",signif(f_current,4L))) 835 | } 836 | f_res=c(loop.env$hg.env$f_res,f_current) 837 | pars.history.rank=rbind(loop.env$hg.env$pars.history.rank,pars) 838 | flag_expansion=TRUE 839 | flag_contraction=TRUE 840 | } else { 841 | if(hg.env$verbose){ 842 | print("No improvement") 843 | } 844 | assign("f_current",hg.env$f_current,envir=loop.env) 845 | return(loop.env) 846 | } 847 | 848 | ##Once we have added a channel 849 | ##We expand as much as possible, and then try to contract 850 | ##We stop only if we haven't done anything (so no contraction or expansion is possible) 851 | while(flag_expansion|flag_contraction){ 852 | ##EXPANSION 853 | flag_expansion=length(active_channels)>1&TP1 855 | flag_recycling=FALSE 856 | while(flag_expansion){ 857 | FN=length(state_pos)-TP 858 | ##Expanding only concerns events that are currently !state. Events for which "state" is positive are guaranteed to stay "state==TRUE". 859 | ##FNTN_matrix tries to create expansion that match a given event, and flag !state events that will be included along with it 860 | if(!flag_recycling){ 861 | FNTN_matrix=fill_FNTN_matrix(loop.env$hg.env$xp_pos[!state_pos,active_channels,drop=FALSE],loop.env$hg.env$xp_neg[!state_neg,active_channels,drop=FALSE],b_pos[!state_pos,active_channels,drop=FALSE],b_neg[!state_neg,active_channels,drop=FALSE],pars[active_channels]) 862 | } else { 863 | FNTN_matrix=expansion$FNTN_matrix 864 | ## FNTN_matrix.raw=fill_FNTN_matrix(xp_pos[!state_pos,active_channels,drop=FALSE],xp_neg[!state_neg,active_channels,drop=FALSE],b_pos[!state_pos,active_channels,drop=FALSE],b_neg[!state_neg,active_channels,drop=FALSE],pars[active_channels]) 865 | ## if(any(FNTN_matrix.raw!=expansion$FNTN_matrix)){ 866 | ## recover() 867 | ## } 868 | } 869 | expansions=expand(envir=loop.env,n=hg.env$n,beta=hg.env$beta2) 870 | if(expansions$newF>f_current){ 871 | which_expansion=expansions$which_expansion 872 | expansion=expand.update(expansions,envir=loop.env,xp_pos=loop.env$hg.env$xp_pos,xp_neg=loop.env$hg.env$xp_neg) 873 | flag_expansion=TPcycle$f_current){ 988 | cycle=current_cycle 989 | rm(current_cycle) 990 | } 991 | } 992 | 993 | if(cycle$f_current==f_previous){ 994 | if(verbose){ 995 | print("Found no channel to add. Exiting") 996 | } 997 | break 998 | } 999 | 1000 | sapply(ls(envir=cycle),function(x){ 1001 | assign(x,envir=hg.env,value=get(x,envir=cycle)) 1002 | }) 1003 | 1004 | ##Ending condition: last channel did not bring much. 1005 | if((f_current-f_previous)1){ ##Unless its the only channel 1010 | pop=which(pars.history.rank[,tail(active_channels,1)]!=pars.history.rank[1,tail(active_channels,1)])[1] ##Flag history from when we added this last channel... 1011 | pars.history.rank=pars.history.rank[1:(pop-1),] ##... and remove it 1012 | f_res=f_res[1:(pop-1)] 1013 | active_channels=active_channels[-length(active_channels)] 1014 | } 1015 | break 1016 | } 1017 | } 1018 | 1019 | ##RETURN 1020 | pars.history=pars.history.rank 1021 | pars.history=matrix(nrow=nrow(pars.history.rank),ncol=ncol(pars.history.rank),data=NA,dimnames=dimnames(pars.history.rank)) 1022 | for(j in 1:ncol(pars.history)){ 1023 | pars.history[,j]=xp_src[match(pars.history.rank[,j],xp[,j]),j%%2+j%/%2] 1024 | } 1025 | return(list(pars.history=pars.history,pars.history.rank=pars.history.rank,f=f_res,active_channels=active_channels)) 1026 | } 1027 | 1028 | #' @title channels_contributions 1029 | #' @description Gives scores for the contribution of individual channels to a gating strategy 1030 | #' @param gate A return from hypergate 1031 | #' @param xp Expression matrix as in the hypergate call 1032 | #' @param gate_vector Categorical vector of length nrow(xp) 1033 | #' @param level A level of gate_vector that identifies the population of interest 1034 | #' @param beta, should be the same as for the hypergate object 1035 | #' @examples 1036 | #' data(Samusik_01_subset) 1037 | #' xp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels] 1038 | #' gate_vector=Samusik_01_subset$labels 1039 | #' hg=hypergate(xp=xp,gate_vector=gate_vector,level=23,delta_add=0) 1040 | #' contribs=channels_contributions(gate=hg,xp=xp,gate_vector=gate_vector,level=23,beta=1) 1041 | #' contribs 1042 | #' @export 1043 | channels_contributions<-function(gate,xp,gate_vector,level,beta=1){ 1044 | truce=gate_vector==level 1045 | F_beta(subset_matrix_hg(gate,xp),truce,beta)-sapply(gate$active_channels,function(chan){ 1046 | subchans=setdiff(gate$active_channels,chan) 1047 | gate$active_channels=subchans 1048 | pred=subset_matrix_hg(gate,xp) 1049 | F_beta(pred,truce,beta) 1050 | }) 1051 | } 1052 | 1053 | #' @title boolmat 1054 | #' @description Convert an expression matrix and a gating strategy to a boolean matrix (whether each event is gated out by each channel) 1055 | #' @param gate A return from hypergate 1056 | #' @param xp Expression matrix as in the hypergate callxp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels] 1057 | #' @examples 1058 | #' data(Samusik_01_subset) 1059 | #' xp=Samusik_01_subset$xp_src 1060 | #' gate_vector=Samusik_01_subset$labels 1061 | #' hg=hypergate(xp=xp,gate_vector=gate_vector,level=23,delta_add=0.01) 1062 | #' head(boolmat(hg,xp)) 1063 | #' @export 1064 | boolmat<-function(gate,xp){ 1065 | chans=gate$active_channels 1066 | if(length(chans)>0){ 1067 | cols=colnames(xp) 1068 | signs=rep(0,2*ncol(xp)) 1069 | signs[2*which(paste(cols,"_min",sep="")%in%chans)-1]=1 1070 | signs[2*which(paste(cols,"_max",sep="")%in%chans)]=-1 1071 | ui=matrix(nrow=2*ncol(xp),ncol=ncol(xp),data=0) 1072 | for(i in 1:length(signs)){ 1073 | ui[i,ceiling(i/2)]=signs[i] 1074 | } 1075 | ci=tail(gate$pars.history,1)[1,]*-signs 1076 | ci=matrix(nrow=ncol(xp)*2,ncol=nrow(xp),data=rep(ci,nrow(xp)),byrow=FALSE) 1077 | res=t((ui%*%t(xp)+ci)>=0) 1078 | colnames(res)=colnames(gate$pars.history) 1079 | return(res[,chans]) 1080 | } 1081 | } 1082 | 1083 | #' @title reoptimize_strategy 1084 | #' @description Optimize a gating strategy given a manual selection of channels 1085 | #' @param gate A return from hypergate 1086 | #' @param channels_subset Character vector identifying the channels that will be retained (others are ignored). The form is e.g. c("CD4_min","CD8_max") 1087 | #' @param xp Expression matrix as in the hypergate call 1088 | #' @param gate_vector Categorical vector as in the hypergate call 1089 | #' @param level Level of gate_vector identifying the population of interest 1090 | #' @param beta Yield / purity trade-off 1091 | #' @param verbose Whether to print information about optimization status 1092 | #' @examples 1093 | #' data(Samusik_01_subset) 1094 | #' xp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels] 1095 | #' gate_vector=Samusik_01_subset$labels 1096 | #' hg=hypergate(xp=xp,gate_vector=gate_vector,level=23,delta_add=0) 1097 | #' contribs=channels_contributions(gate=hg,xp=xp,gate_vector=gate_vector,level=23,beta=1) 1098 | #' significant_channels=names(contribs)[contribs>=0.01] 1099 | #' hg_reoptimized=reoptimize_strategy(gate=hg,channels_subset=significant_channels,xp,gate_vector,23) 1100 | #' @export 1101 | reoptimize_strategy<-function(gate,channels_subset,xp,gate_vector,level,beta=1,verbose=FALSE){ 1102 | beta2=beta^2 1103 | gate$active_channels=channels_subset 1104 | if(is.null(rownames(xp))){ 1105 | rownames(xp)=1:nrow(xp) 1106 | } 1107 | r=setNames(rownames(xp),1:nrow(xp)) 1108 | rownames(xp)=1:nrow(xp) 1109 | 1110 | xp_src=xp 1111 | 1112 | ##Sorting values so that cutoffs are very easy to compute. Rank = 1 is small, rank = n is big 1113 | xp=apply(xp,2,rank,ties.method="min") 1114 | rownames(xp)=1:nrow(xp) 1115 | 1116 | ##Each parameter is duplicated (for upper and lower cutoffs) 1117 | ##n=nrow(xp) 1118 | colnames=paste(rep(colnames(xp),each=2),c("min","max"),sep="_") 1119 | 1120 | xp.2=matrix(nrow=nrow(xp),ncol=2*ncol(xp),data=0,dimnames=list(rownames(xp),colnames)) 1121 | storage.mode(xp.2)="integer" 1122 | 1123 | ##For both mins (odd columns) and maxs (even columns), make sure than low rank = "extreme" values (likely to pop). We can then use min to select 1124 | xp.2[,seq(1,ncol(xp.2),by=2)]=xp 1125 | xp.2[,seq(2,ncol(xp.2),by=2)]=nrow(xp)-xp 1126 | xp=xp.2 1127 | rm(xp.2) 1128 | 1129 | b_src=matrix(nrow=nrow(xp),ncol=ncol(xp),data=TRUE,dimnames=dimnames(xp)) 1130 | b_src.tmp=boolmat(gate,xp_src) 1131 | b_src[,colnames(b_src.tmp)]=b_src.tmp 1132 | rm(b_src.tmp) 1133 | 1134 | truce=gate_vector==level 1135 | xp_pos=xp[truce,,drop=FALSE] ##xp for positive elements 1136 | b_pos=b_src[truce,,drop=FALSE] 1137 | state_pos=rowSums(b_pos)==ncol(b_pos) 1138 | 1139 | xp_neg=xp[!truce,,drop=FALSE] ##xp for negative elements 1140 | b_neg=b_src[!truce,,drop=FALSE] 1141 | state_neg=rowSums(b_neg)==ncol(b_neg) 1142 | 1143 | 1144 | ##Number of TN and TP to compute F 1145 | TN=sum(!state_neg) 1146 | TP=sum(state_pos) 1147 | n=beta2*length(state_pos)+length(state_neg) ##This is only used in the function that computes F_beta. 1148 | 1149 | pars.history=gate$pars.history 1150 | pars.history.tmp=tail(pars.history,1) 1151 | pars.history.tmp[,setdiff(colnames(pars.history.tmp),gate$active_channels)]=pars.history[1,setdiff(colnames(pars.history.tmp),gate$active_channels)] 1152 | pars.history=pars.history.tmp 1153 | rm(pars.history.tmp) 1154 | 1155 | pars.history.rank=gate$pars.history.rank 1156 | pars.history.rank.tmp=tail(pars.history.rank,1) 1157 | pars.history.rank.tmp[,setdiff(colnames(pars.history.rank.tmp),gate$active_channels)]=pars.history.rank[1,setdiff(colnames(pars.history.rank.tmp),gate$active_channels)] 1158 | pars.history.rank=pars.history.rank.tmp 1159 | rm(pars.history.rank.tmp) 1160 | storage.mode(pars.history.rank)="integer" 1161 | 1162 | pars=pars.history.rank[1,] 1163 | active_channels=gate$active_channels 1164 | 1165 | ##Optimization loop 1166 | ##f value 1167 | f_current=f(TP,TN,n,beta2) 1168 | f_res=f_current 1169 | 1170 | hg.env=environment() 1171 | loop.env=environment() 1172 | 1173 | flag_expansion=TRUE 1174 | flag_contraction=TRUE 1175 | while(flag_expansion|flag_contraction){ 1176 | ##EXPANSION 1177 | flag_recycling=FALSE 1178 | flag_expansion=length(state_pos)>TP 1179 | while(flag_expansion){ 1180 | ##Expanding only concerns events that are currently !state. Events for which "state" is positive are guaranteed to stay "state==TRUE". 1181 | ##FNTN_matrix tries to create expansion that match a given event, and flag !state events that will be included along with it 1182 | FN=length(state_pos)-TP 1183 | if(!flag_recycling){ 1184 | FNTN_matrix=fill_FNTN_matrix( 1185 | loop.env$hg.env$xp_pos[!state_pos,active_channels,drop=FALSE], 1186 | loop.env$hg.env$xp_neg[!state_neg,active_channels,drop=FALSE], 1187 | b_pos[!state_pos,active_channels,drop=FALSE], 1188 | b_neg[!state_neg,active_channels,drop=FALSE], 1189 | pars[active_channels] 1190 | ) 1191 | } else { 1192 | FNTN_matrix=expansion$FNTN_matrix 1193 | } 1194 | expansions=expand(envir=loop.env,n=hg.env$n,beta=hg.env$beta2) 1195 | if(expansions$newF>f_current){ 1196 | which_expansion=expansions$which_expansion 1197 | expansion=expand.update(expansions,envir=loop.env,xp_pos=loop.env$hg.env$xp_pos,xp_neg=loop.env$hg.env$xp_neg) 1198 | FN=length(state_pos)-TP 1199 | flag_expansion=FN>0 ##If the expansion included all positive events, cannot try to expand more. 1200 | sapply(names(expansion),function(x)assign(x,expansion[[x]],envir=loop.env)) 1201 | f_current=expansions$newF 1202 | f_res=c(f_res,f_current) 1203 | pars.history.rank=rbind(pars.history.rank,pars) 1204 | 1205 | if(hg.env$verbose){ 1206 | print(paste("Expansion of",paste(names(expansion$par_best),":",expansion$par_best,", ",collapse=""),"F=",signif(f_current,4))) 1207 | } 1208 | flag_recycling=TRUE 1209 | } else { 1210 | flag_expansion=FALSE 1211 | } 1212 | } 1213 | 1214 | ##CONTRACTION 1215 | contractions=sapply(active_channels,contract,simplify=FALSE,envir=loop.env,xp_pos=hg.env$xp_pos,xp_neg=hg.env$xp_neg,n=hg.env$n,beta=hg.env$beta2) 1216 | best_contraction=which.max(sapply(contractions,function(x)x$newF)) 1217 | if(contractions[[best_contraction]]$newF>f_current){ 1218 | contraction=contract.update(contractions[[best_contraction]],envir=loop.env,xp_pos=hg.env$xp_pos,xp_neg=hg.env$xp_neg) 1219 | sapply(names(contraction),function(x)assign(x,contraction[[x]],envir=loop.env)) 1220 | f_current=contractions[[best_contraction]]$newF 1221 | if(hg.env$verbose){ 1222 | print(paste("Contracting ",names(contractions[[best_contraction]]$par_best),":",contractions[[best_contraction]]$par_best,",F=",signif(f_current,4L))) 1223 | } 1224 | f_res=c(f_res,f_current) 1225 | pars.history.rank=rbind(pars.history.rank,pars) 1226 | flag_contraction=TRUE 1227 | } else { 1228 | flag_contraction=FALSE 1229 | } 1230 | } 1231 | 1232 | ##RETURN 1233 | pars.history=pars.history.rank 1234 | pars.history=matrix(nrow=nrow(pars.history.rank),ncol=ncol(pars.history.rank),data=NA,dimnames=dimnames(pars.history.rank)) 1235 | for(j in 1:ncol(pars.history)){ 1236 | pars.history[,j]=xp_src[match(pars.history.rank[,j],xp[,j]),j%%2+j%/%2] 1237 | } 1238 | return(list(pars.history=pars.history,pars.history.rank=pars.history.rank,f=f_res,active_channels=active_channels)) 1239 | } 1240 | 1241 | #' @title F_beta 1242 | #' @description Compute a F_beta score comparing two boolean vectors 1243 | #' @param pred boolean vector of predicted values 1244 | #' @param truth boolean vector of true values 1245 | #' @param beta Weighting of yield as compared to precision. Increase beta so that the optimization favors yield, or decrease to favor purity. 1246 | #' @examples 1247 | #' data(Samusik_01_subset) 1248 | #' truth=c(rep(TRUE,40),rep(FALSE,60)) 1249 | #' pred=rep(c(TRUE,FALSE),50) 1250 | #' table(pred,truth) ##40% purity, 50% yield 1251 | #' #' F_beta(pred=pred,truth=truth,beta=2) ##Closer to yield 1252 | #' F_beta(pred=pred,truth=truth,beta=1.5) ##Closer to yield 1253 | #' F_beta(pred=pred,truth=truth,beta=1) ##Harmonic mean 1254 | #' F_beta(pred=pred,truth=truth,beta=0.75) ##Closer to purity 1255 | #' F_beta(pred=pred,truth=truth,beta=0.5) ##Closer to purity 1256 | #' @export 1257 | 1258 | F_beta=function(pred,truth,beta=1){ 1259 | TP=sum(truth&pred) 1260 | if(TP==0){ 1261 | return(0) 1262 | } 1263 | FP=sum(!truth&pred) 1264 | FN=sum(truth&!pred) 1265 | yield=TP/(TP+FN) #aka recall 1266 | purity=TP/(TP+FP) #aka precision 1267 | if(is.nan(purity)|is.nan(yield)){ 1268 | return(0) 1269 | } 1270 | F=(1+beta^2)*(yield*purity)/(yield+beta^2*purity) 1271 | F 1272 | } 1273 | 1274 | #' @title gate_from_biplot 1275 | #' @description From a biplot let the user interactively draw polygons to create a "Gate" vector 1276 | #' @param matrix A matrix 1277 | #' @param x_axis character, colname of matrix used for x-axis in the biplot 1278 | #' @param y_axis character, colname of matrix used for y-axis in the biplot 1279 | #' @param sample Used to downsample the data in case there are too many events to plot quickly 1280 | #' @param bty passed to plot 1281 | #' @param cex passed to plot 1282 | #' @param pch passed to plot 1283 | #' @param ... passed to plot 1284 | #' @examples 1285 | #' if(interactive()){ 1286 | #' ##See the details section to see how this function works 1287 | #' gate_from_biplot(matrix=Samusik_01_subset$tsne,x_axis="tSNE1",y_axis="tSNE2") 1288 | #' } 1289 | #' @export 1290 | #' @return A named vector of length nrow(matrix) and names rownames(matrix). Ungated events are set to NA 1291 | #' @details Data will be displayed as a bi-plot according to user-specified x_axis and y_axis arguments, then a call to locator() is made. The user can draw a polygon around parts of the plot that need gating. When done, 'right-click' or 'escape' (depending on the IDE) escapes locator() and closes the polygon. Then the user can press "n" to draw another polygon (that will define a new population), "c" to cancell and draw the last polygon again, or "s" to exit. When exiting, events that do not fall within any polygon are assigned NA, the others are assigned an integer value corresponding to the last polygon they lie into. 1292 | 1293 | gate_from_biplot<-function(matrix,x_axis,y_axis,...,bty="l",pch=16,cex=0.5,sample=NULL) 1294 | { 1295 | xp=matrix[,c(x_axis,y_axis)] 1296 | if(!is.null(sample)){ 1297 | s=sort(sample(1:nrow(xp),sample)) 1298 | } else { 1299 | s=1:nrow(xp) 1300 | } 1301 | 1302 | gate_updated=rep(0,nrow(xp)) 1303 | color_biplot_by_discrete(xp[s,],gate_updated[s],bty=bty,pch=pch,cex=cex,...) 1304 | 1305 | input.message=" n : new gate, c : redo last, s : stop gating. " 1306 | cat("\nPlease use the mouse pointer to draw") 1307 | polygons=list() 1308 | i=0 1309 | u="n" 1310 | while(u!="s"){ 1311 | if(!u%in%c("n","s","c")){ 1312 | u=readline(paste("Incorrect input.",input.message,sep="")) 1313 | } 1314 | if(u=="n"){ 1315 | gate=gate_updated 1316 | i=i+1 1317 | col=setNames(c("black",rainbow(i)),0:i) 1318 | 1319 | new.pol=en.locator() 1320 | 1321 | new.pol=polygon.clean(new.pol) 1322 | polygons=c(polygons,list(new.pol)) 1323 | gate_updated=update_gate(xp,polygons[[i]],gate,i) 1324 | 1325 | color_biplot_by_discrete(xp[s,],gate_updated[s],bty=bty,pch=pch,cex=cex,colors=col,...) 1326 | 1327 | } 1328 | if(u=="c"){ 1329 | gate_updated=gate 1330 | color_biplot_by_discrete(xp[s,],gate_updated[s],bty=bty,pch=pch,cex=cex,colors=col,...) 1331 | new.pol=en.locator() 1332 | new.pol=polygon.clean(new.pol) 1333 | polygons[[i]]=new.pol 1334 | gate_updated=update_gate(xp,polygons[[i]],gate_updated,i) 1335 | color_biplot_by_discrete(xp[s,],gate_updated[s],bty=bty,pch=pch,cex=cex,colors=col,...) 1336 | } 1337 | u=readline(paste("Input ?",input.message,"\n",sep="")) 1338 | } 1339 | 1340 | gate=gate_updated 1341 | 1342 | gate[apply(xp,1,function(x)any(is.na(x)))]=NA 1343 | 1344 | setNames(gate,rownames(matrix)) 1345 | } 1346 | 1347 | #' Colors a biplot according to a vector with discrete values 1348 | #' @param matrix a two columns matrix 1349 | #' @param discrete_vector a vector of size nrow(matrix) 1350 | #' @param colors Palette to used named after the unique elements of discrete_vector. Generated from rainbow() if missing. 1351 | #' @param pch passed to plot 1352 | #' @param cex passed to plot 1353 | #' @param bty passed to plot 1354 | #' @param ... passed to plot 1355 | #' @examples 1356 | #' data(Samusik_01_subset) 1357 | #' levels=unique(sort(Samusik_01_subset$labels)) 1358 | #' colors=setNames(colorRampPalette(palette())(length(levels)),sort(levels)) 1359 | #' with(Samusik_01_subset,color_biplot_by_discrete(matrix=tsne,discrete_vector=labels,colors=colors)) 1360 | #' @export 1361 | 1362 | color_biplot_by_discrete<-function(matrix,discrete_vector,...,bty="l",pch=16,cex=0.5,colors=NULL){ 1363 | levels=unique(discrete_vector) 1364 | if(missing(colors)){ 1365 | colors=setNames(c("black",rainbow(length(levels)-1)),levels) 1366 | } 1367 | plot(matrix,bty=bty,pch=pch,cex=cex,col=colors[as.character(discrete_vector)],...) 1368 | } 1369 | 1370 | #' Wrapper to locator that plots segments on the fly 1371 | en.locator<-function(){ 1372 | input=TRUE 1373 | x=vector() 1374 | y=vector() 1375 | while(!is.null(input)){ 1376 | input=locator(1) 1377 | x=c(x,input$x) 1378 | y=c(y,input$y) 1379 | 1380 | if(length(x)>1){ 1381 | segments(x0=x[length(x)-1],x1=x[length(x)],y0=y[length(y)-1],y1=y[length(y)],lty=2) 1382 | } 1383 | } 1384 | segments(x0=x[1],x1=x[length(x)],y0=y[1],y1=y[length(y)],lty=2) 1385 | list(x=x,y=y) 1386 | } 1387 | 1388 | #' Remove self intersection in polygons 1389 | #' @param poly a polygon (list with two components x and y which are equal-length numerical vectors) 1390 | #' @return A polygon without overlapping edges and new vertices corresponding to non-inner points of intersection 1391 | 1392 | polygon.clean<-function(poly){ 1393 | if(requireNamespace("sf", quietly = TRUE)) { 1394 | ## Create a data frame from the input coordinates 1395 | coords_df = data.frame(x = poly$x, y = poly$y) 1396 | 1397 | ## Close polygon 1398 | coords_df = rbind(coords_df, coords_df[1, ]) 1399 | 1400 | ## Convert to an sf object 1401 | coords_sf = sf::st_as_sf(coords_df, coords = c("x", "y")) 1402 | 1403 | ## Convert points to line 1404 | line = sf::st_cast(sf::st_combine(coords_sf), "MULTILINESTRING") 1405 | line = sf::st_node(line) 1406 | 1407 | ## Convert line to polygon 1408 | polygon = sf::st_polygonize(line) 1409 | 1410 | ## Simplify and clean the polygon 1411 | ## Note: st_union is used here for a cleaning effect similar to gUnaryUnion 1412 | cleaned_polygon = sf::st_union(polygon, by_feature = TRUE) 1413 | 1414 | ## Extract coordinates 1415 | coords = sf::st_coordinates(sf::st_cast(cleaned_polygon, "POLYGON")) 1416 | x = coords[, 1] 1417 | y = coords[, 2] 1418 | 1419 | ## Return the cleaned coordinates 1420 | return(list(x = x, y = y)) 1421 | 1422 | } else { 1423 | return(poly) 1424 | } 1425 | } 1426 | 1427 | #' Updates a gate vector 1428 | #' @param xp A two colums matrix 1429 | #' @param polygon A list with two components x and y of equal lenghts and numeric values 1430 | #' @param gate_vector a vector of length nrow(xp) with integer values 1431 | #' @param value The number that will be assigned to gate_vector, corresponding to points that lie in the polygon 1432 | #' @return The updated gate_vector 1433 | 1434 | update_gate=function(xp,polygon,gate_vector=rep(0,nrow(xp)),value=1){ 1435 | if(requireNamespace("sp",quietly=FALSE)){ 1436 | gate_vector[sp::point.in.polygon(xp[,1],xp[,2],polygon$x,polygon$y)!=0]=value 1437 | } 1438 | gate_vector 1439 | } 1440 | 1441 | #' 2000 events randomly sampled from the 'Samusik_01' dataset 1442 | #' @name Samusik_01_subset 1443 | #' @docType data 1444 | #' @format list with four elements: fs_src (a flowSet), xp_src (its expression matrix), labels (manual gates of the events) and tsne (a tSNE projection of the dataset) 1445 | #' @references https://flowrepository.org/id/FR-FCM-ZZPH 1446 | "Samusik_01_subset" 1447 | 1448 | #' @importFrom grDevices col2rgb 1449 | NULL 1450 | #' @importFrom grDevices dev.off 1451 | NULL 1452 | #' @importFrom grDevices png 1453 | NULL 1454 | #' @importFrom grDevices rainbow 1455 | NULL 1456 | #' @importFrom grDevices rgb 1457 | NULL 1458 | #' @importFrom graphics abline 1459 | NULL 1460 | #' @importFrom graphics locator 1461 | NULL 1462 | #' @importFrom graphics plot 1463 | NULL 1464 | #' @importFrom graphics segments 1465 | NULL 1466 | #' @importFrom graphics text 1467 | NULL 1468 | #' @importFrom graphics title 1469 | NULL 1470 | #' @importFrom stats setNames 1471 | NULL 1472 | #' @importFrom utils tail 1473 | NULL 1474 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | This Vignette will walk you through the usage of the Hypergate R package. 7 | 8 | ## Package installation 9 | Installing dependencies: 10 | 11 | ```r 12 | install.packages(c("sp", "polyclip", "rgeos")) 13 | source("https://bioconductor.org/biocLite.R") 14 | biocLite("flowCore") 15 | ``` 16 | Installing the package from github: 17 | 18 | ```r 19 | install.packages("devtools") 20 | library(devtools) 21 | install_github(repo = "ebecht/hypergate") 22 | ``` 23 | 24 | 25 | ```r 26 | library(hypergate) 27 | ``` 28 | 29 | ## Data loading 30 | 31 | 32 | ```r 33 | data(Samusik_01_subset, package = "hypergate") 34 | ``` 35 | 36 | This loads 2000 datapoints randomly sampled from the *Samusik_01* dataset (available from FlowRepository accession number FR-FCM-ZZPH). This object is a list which includes as elements 37 | 38 | 1. *fs_src* a flowSet with 1 flowFrame corresponding to the data subset 39 | 40 | 2. *xp_src* a matrix corresponding to the expression of the data subset. Rownames correspond to event numbers in the unsampled dataset. Colnames correspond to protein targets (or other information e.g. events' manually-annotated labels) 41 | 42 | 3. *labels* numeric vector encoding manually-annotated labels, with the value -1 for ungated events. The text labels for the gated populations are available from FlowRepostiry 43 | 44 | 4. *regular_channels* A subset of colnames(Samusik_01_subset$xp_src) that corresponds to protein targets 45 | 46 | 5. *tsne* A 2D-tSNE ran on the whole dataset and subsampled to 2000 events 47 | 48 | ## Specifying the cell subset of interest 49 | 50 | Hypergate requires in particular as its arguments 51 | 52 | 1. an expression matrix (which we have as *Samusik_01_subset$xp_src*) 53 | 54 | 2. a vector specifying which events to attempt to gate on. This section discusses ways to achieve this point 55 | 56 | #### Selection from low-dimensional plot 57 | We included in the package a function with a rudimentary (hopefully sufficient) interface that allows for the selection of a cell subset of interest from a 2D biplot by drawing a polygon around it using the mouse. Since this function is interactive we cannot execute it in this Vignette but an example call would be as such (feel free to try it): 58 | 59 | 60 | ```r 61 | g = gate_from_biplot(Samusik_01_subset$tsne, "tSNE1", "tSNE2") 62 | ``` 63 | 64 | For this tutorial we define manually the polygon instead 65 | 66 | 67 | ```r 68 | x = c(12.54, 8.08, 7.12, 12.12, 17.32, 20.62, 21.04, 20.83, 18.07, 69 | 15.2) 70 | y = c(-10.61, -14.76, -18.55, -20.33, -21.16, -19.74, -14.4, 71 | -11.08, -10.02, -9.42) 72 | pol = list(x = x, y = y) 73 | library("sp") 74 | gate_vector = sp::point.in.polygon(Samusik_01_subset$tsne[, 1], 75 | Samusik_01_subset$tsne[, 2], pol$x, pol$y) 76 | plot(Samusik_01_subset$tsne, pch = 16, cex = 0.5, col = ifelse(gate_vector == 77 | 1, "firebrick3", "lightsteelblue")) 78 | polygon(pol, lty = 2) 79 | ``` 80 | 81 | ![Manual selection of a cluster on a 2D t-SNE](vignettes/unnamed-chunk-7-1.png) 82 | 83 | #### Clustering 84 | 85 | Another option to define a cell cluster of interest is to use the output of a clustering algorithm. Popular options for cytometry include *FlowSOM* (available from Bioconductor) or *Phenograph* (available from the ```cytofkit``` package from Bioconductor). An example call for Rphenograph is below: 86 | 87 | 88 | ```r 89 | require(Rphenograph) 90 | set.seed(5881215) 91 | clustering = Rphenograph(Samusik_01_subset$xp_src[, Samusik_01_subset$regular_channels]) 92 | cluster_labels = membership(clustering[[2]]) 93 | ``` 94 | In this Vignette we use the simpler kmeans option instead: 95 | 96 | 97 | ```r 98 | set.seed(5881215) 99 | cluster_labels = kmeans(Samusik_01_subset$tsne, 20, nstart = 100)$cluster 100 | ``` 101 | 102 | In this example we can see that the kmeans cluster *20* corresponds to the population we manually selected from the t-SNE biplot 103 | 104 | ```r 105 | plot(Samusik_01_subset$tsne, col = ifelse(cluster_labels == 20, 106 | "firebrick3", "lightsteelblue"), pch = 16, cex = 0.5) 107 | ``` 108 | 109 | ![Selection of a cluster from a clustering algorithm output](vignettes/unnamed-chunk-10-1.png) 110 | 111 | ## Running Hypergate 112 | 113 | The function to optimize gating strategies is ```hypergate```. Its main arguments are ```xp``` (a numeric matrix encoding expression), ```gate_vector``` (a vector with few unique values), ```level``` (specificies what value of gate_vector to gate upon, i.e. events satisfying ```gate_vector==level``` will be gated in) 114 | 115 | 116 | ```r 117 | hg_output = hypergate(xp = Samusik_01_subset$xp_src[, Samusik_01_subset$regular_channels], 118 | gate_vector = gate_vector, level = 1, verbose = FALSE) 119 | ``` 120 | 121 | ## Interpreting and polishing the results 122 | 123 | ### Gating datapoints 124 | 125 | The following function allows to subset an expression matrix given a return from *Hypergate*. The new matrix needs to have the same column names as the original matrix. 126 | 127 | 128 | ```r 129 | gating_predicted = subset_matrix_hg(hg_output, Samusik_01_subset$xp_src[, 130 | Samusik_01_subset$regular_channels]) 131 | ``` 132 | 133 | 134 | ```r 135 | table(ifelse(gating_predicted, "Gated-in", "Gated-out"), ifelse(gate_vector == 136 | 1, "Events of interest", "Others")) 137 | ``` 138 | 139 | 140 | | | Events of interest| Others| 141 | |:---------|------------------:|------:| 142 | |Gated-in | 116| 0| 143 | |Gated-out | 10| 1874| 144 | 145 | Another option, which offers more low-level control, is to examine for each datapoint whether they pass the threshold for each parameter. The function to obtain such a boolean matrix is ```boolmat```. Here our gating strategy specifies *SiglecF+cKit-Ly6C-*. We would thus obtain a 3-columns x 2000 (the number of events) rows 146 | 147 | 148 | ```r 149 | bm = boolmat(gate = hg_output, xp = Samusik_01_subset$xp_src[, 150 | Samusik_01_subset$regular_channels]) 151 | head(bm) 152 | ``` 153 | 154 | ``` 155 | ## SiglecF_min cKit_max Ly6C_max 156 | ## 20 FALSE TRUE TRUE 157 | ## 28 FALSE TRUE TRUE 158 | ## 70 FALSE FALSE TRUE 159 | ## 110 TRUE FALSE FALSE 160 | ## 120 FALSE TRUE FALSE 161 | ## 159 FALSE TRUE FALSE 162 | ``` 163 | 164 | 165 | | | Events of interest| Others| 166 | |:----------------------------|------------------:|------:| 167 | |Gated-out because of SiglecF | 9| 1829| 168 | |SiglecF above threshold | 117| 45| 169 | 170 | ### Examining the output 171 | 172 | The following function will plot the output of Hypergate. Arguments are 173 | 174 | 1. ```gate``` an object returned by Hypergate 175 | 176 | 2. ```xp``` an expression matrix whose columns are named similarly as the ones used to create the ```gate``` object 177 | 178 | 3. ```gate_vector``` and ```level``` to specify which events are "of interest" 179 | 180 | 4. ```highlight``` a color that will be used to highlight the events of interest 181 | 182 | 183 | ```r 184 | plot_gating_strategy(gate = hg_output, xp = Samusik_01_subset$xp_src[, 185 | Samusik_01_subset$regular_channels], gate_vector = gate_vector, 186 | level = 1, highlight = "firebrick3") 187 | ``` 188 | 189 | ![Gating strategy](vignettes/unnamed-chunk-17-1.png)![Gating strategy](vignettes/unnamed-chunk-17-2.png) 190 | 191 | Another important point to consider is how the F$\beta$-score increases with each added channel. This gives an idea of how many channels are required to reach a close-to-optimal gating strategy. 192 | 193 | This will identify at which steps the parameters were first activated and optimized: 194 | 195 | ```r 196 | f_values_vs_number_of_parameters = c(F_beta(rep(TRUE, nrow(Samusik_01_subset$xp_src)), 197 | gate_vector == 1), hg_output$f[c(apply(hg_output$pars.history[, 198 | hg_output$active_channels], 2, function(x) min(which(x != 199 | x[1]))) - 1, nrow(hg_output$pars.history))][-1]) 200 | barplot(rev(f_values_vs_number_of_parameters), names.arg = rev(c("Initialization", 201 | paste("+ ", sep = "", hg_output$active_channels))), las = 3, 202 | mar = c(10, 4, 1, 1), horiz = TRUE, xlab = "Cumulative F1-score") 203 | ``` 204 | 205 | ![F1-score obtained during optimization when adding parameters](vignettes/unnamed-chunk-18-1.png) 206 | 207 | This graph tells us that the biggest increase is by far due to SiglecF+, while the lowest is due to Ly6C-. 208 | 209 | ### Channels contributions 210 | 211 | The previous graph only shows how the F-value evolved during optimization, but what we really want to know is how much each parameter contributes to the final output (sometimes a parameter will have a big impact at the early steps of the optimization but will become relatively unimportant towards the end, if multiple other parameters collectively account for most of its discriminatory power). We use the following function to assess this, which measures how much performances lower when a parameter is ignored. The more the performances lower, the more important the parameter is. 212 | 213 | 214 | ```r 215 | contributions = channels_contributions(gate = hg_output, xp = Samusik_01_subset$xp_src[, 216 | Samusik_01_subset$regular_channels], gate_vector = gate_vector, 217 | level = 1, beta = 1) 218 | barplot(contributions, las = 3, mar = c(10, 4, 1, 1), horiz = TRUE, 219 | xlab = "F1-score deterioration when the parameter is ignored") 220 | ``` 221 | 222 | ![Contribution of each parameter to the output](vignettes/unnamed-chunk-19-1.png) 223 | 224 | ### Reoptimize strategy 225 | 226 | Since Ly6C contributes very little, we may want to ignore it to obtain a shorter gating strategy. We could keep the current threshold values for the other parameters, but it is best to re-compute the other thresholds to account for the loss of some parameters. 227 | To do that we use the following function: 228 | 229 | 230 | ```r 231 | hg_output_polished = reoptimize_strategy(gate = hg_output, channels_subset = c("SiglecF_min", 232 | "cKit_max"), xp = Samusik_01_subset$xp_src[, Samusik_01_subset$regular_channels], 233 | gate_vector = gate_vector, level = 1) 234 | ``` 235 | 236 | Finally, we get to plot our final strategy: 237 | 238 | 239 | ```r 240 | plot_gating_strategy(gate = hg_output_polished, xp = Samusik_01_subset$xp_src[, 241 | Samusik_01_subset$regular_channels], gate_vector = gate_vector, 242 | level = 1, highlight = "firebrick3") 243 | ``` 244 | 245 | ![Final output](vignettes/unnamed-chunk-21-1.png) 246 | 247 | ### Human-readable output 248 | 249 | Thanks to a nice contribution for SamGG on github, there are three functions that make the outputs more readable: 250 | 251 | 252 | ```r 253 | hgate_pheno(hg_output) 254 | ``` 255 | 256 | ``` 257 | ## [1] "SiglecF+, cKit-, Ly6C-" 258 | ``` 259 | 260 | ```r 261 | hgate_rule(hg_output) 262 | ``` 263 | 264 | ``` 265 | ## [1] "SiglecF >= 2.21, cKit <= 1.77, Ly6C <= 2.98" 266 | ``` 267 | 268 | ```r 269 | hgate_info(hg_output) 270 | ``` 271 | 272 | ``` 273 | ## channels sign comp threshold 274 | ## SiglecF_min SiglecF + >= 2.208221 275 | ## cKit_max cKit - <= 1.770901 276 | ## Ly6C_max Ly6C - <= 2.983523 277 | ``` 278 | 279 | ```r 280 | # Fscores can be retrieved when the same parameters given to 281 | # hypergate() are given to hgate_info(): 282 | hg_out_info = hgate_info(hg_output, xp = Samusik_01_subset$xp_src[, 283 | Samusik_01_subset$regular_channels], gate_vector = gate_vector, 284 | level = 1) 285 | hg_out_info 286 | ``` 287 | 288 | ``` 289 | ## channels sign comp threshold deltaF Fscore1D Fscore 290 | ## SiglecF_min SiglecF + >= 2.208221 0.76351765 0.8125 0.8125 291 | ## cKit_max cKit - <= 1.770901 0.07988981 0.1294 0.9360 292 | ## Ly6C_max Ly6C - <= 2.983523 0.02267769 0.1809 0.9587 293 | ``` 294 | 295 | ```r 296 | # and formatted readily 297 | paste0(hg_out_info[, "Fscore"], collapse = ", ") 298 | ``` 299 | 300 | ``` 301 | ## [1] "0.8125, 0.936, 0.9587" 302 | ``` 303 | 304 | ## Final notes 305 | 306 | Some comments about potential questions on your own projects (raised by QBarbier): 307 | 308 | ### Which channels to use as input? 309 | Anything that would be relevant for a gating strategy should be used as an input. So usually any phenotypic channel would be included. If you know that you would not use certain parameters on subsequent experiments (for instance if the staining is intracellular and you plan to sort a live population and thus cannot permeabilize your cells), you should exclude the corresponding channels. I usually do not use channels that were used in pre-gating steps (e.g. CD45 for immune cells). Finally, if you plan to use flow cytometry and use hypergate on a CyTOF dataset, you probably want to discard the Cell_length channel. 310 | 311 | ### How big can the input matrix be? 312 | It depends on how much RAM your computer has. If that is an issue I suggest downsampling to (e.g.) 1000 positive cells and a corresponding number of negative cells. The `hgate_sample` function can help you achieve this: 313 | 314 | 315 | ```r 316 | set.seed(123) ## Makes the subsampling reproducible 317 | gate_vector = Samusik_01_subset$labels 318 | subsample = hgate_sample(gate_vector = gate_vector, level = 5, 319 | size = 100) ## Subsample 100 events from population #5 (Classical monocytes), and a corresponding number of negative events 320 | tab = table(ifelse(subsample, "In", "Out"), ifelse(Samusik_01_subset$labels == 321 | 5, "Positive pop.", "Negative pop.")) 322 | tab[1, ]/colSums(tab) ## Fraction of subsampled events for positive and negative populations 323 | ``` 324 | 325 | ``` 326 | ## Negative pop. Positive pop. 327 | ## 0.3204976 0.3205128 328 | ``` 329 | 330 | ```r 331 | xp = Samusik_01_subset$xp_src[, Samusik_01_subset$regular_channels] 332 | hg = hypergate(xp = xp[subsample, ], gate_vector = gate_vector[subsample], 333 | level = 5) ## Runs hypergate on a subsample of the input matrix 334 | gating_heldout = subset_matrix_hg(hg, xp[!subsample, ]) ## Applies the gate to the held-out data 335 | table(ifelse(gating_heldout, "Gated in", "Gated out"), ifelse(Samusik_01_subset$labels[!subsample] == 336 | 5, "Positive pop.", "Negative pop.")) 337 | ``` 338 | 339 | ``` 340 | ## 341 | ## Negative pop. Positive pop. 342 | ## Gated in 37 192 343 | ## Gated out 1110 20 344 | ``` 345 | -------------------------------------------------------------------------------- /data/Samusik_01_subset.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebecht/hypergate/ab1283ab1369cad063899ffebc9e67c17654c4ed/data/Samusik_01_subset.RData -------------------------------------------------------------------------------- /figure/unnamed-chunk-10-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebecht/hypergate/ab1283ab1369cad063899ffebc9e67c17654c4ed/figure/unnamed-chunk-10-1.png -------------------------------------------------------------------------------- /figure/unnamed-chunk-17-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebecht/hypergate/ab1283ab1369cad063899ffebc9e67c17654c4ed/figure/unnamed-chunk-17-1.png -------------------------------------------------------------------------------- /figure/unnamed-chunk-17-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebecht/hypergate/ab1283ab1369cad063899ffebc9e67c17654c4ed/figure/unnamed-chunk-17-2.png -------------------------------------------------------------------------------- /figure/unnamed-chunk-18-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebecht/hypergate/ab1283ab1369cad063899ffebc9e67c17654c4ed/figure/unnamed-chunk-18-1.png -------------------------------------------------------------------------------- /figure/unnamed-chunk-19-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebecht/hypergate/ab1283ab1369cad063899ffebc9e67c17654c4ed/figure/unnamed-chunk-19-1.png -------------------------------------------------------------------------------- /figure/unnamed-chunk-21-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebecht/hypergate/ab1283ab1369cad063899ffebc9e67c17654c4ed/figure/unnamed-chunk-21-1.png -------------------------------------------------------------------------------- /figure/unnamed-chunk-7-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebecht/hypergate/ab1283ab1369cad063899ffebc9e67c17654c4ed/figure/unnamed-chunk-7-1.png -------------------------------------------------------------------------------- /man/FNTN_matrix.recycle.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hypergate.R 3 | \name{FNTN_matrix.recycle} 4 | \alias{FNTN_matrix.recycle} 5 | \title{FNTN_matrix.recycle} 6 | \usage{ 7 | FNTN_matrix.recycle( 8 | FNTN_matrix, 9 | B_FN_old, 10 | B_TN_old, 11 | B_FN_new, 12 | B_TN_new, 13 | xp_FN, 14 | xp_TN, 15 | par 16 | ) 17 | } 18 | \arguments{ 19 | \item{FNTN_matrix}{Expansion matrix to recycle} 20 | 21 | \item{B_FN_old}{Boolean matrix of FN events before the last expansion} 22 | 23 | \item{B_TN_old}{Boolean matrix of TN events before the last expansion} 24 | 25 | \item{B_FN_new}{Boolean matrix of FN events after the last expansion} 26 | 27 | \item{B_TN_new}{Boolean matrix of TN events after the last expansion} 28 | 29 | \item{xp_FN}{Expression matrix of False Negative events} 30 | 31 | \item{xp_TN}{Expression matrix of True Negative events} 32 | 33 | \item{par}{Current hyper-rectangle parametrization} 34 | } 35 | \description{ 36 | Recycle an expansion matrix 37 | } 38 | -------------------------------------------------------------------------------- /man/F_beta.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hypergate.R 3 | \name{F_beta} 4 | \alias{F_beta} 5 | \title{F_beta} 6 | \usage{ 7 | F_beta(pred, truth, beta = 1) 8 | } 9 | \arguments{ 10 | \item{pred}{boolean vector of predicted values} 11 | 12 | \item{truth}{boolean vector of true values} 13 | 14 | \item{beta}{Weighting of yield as compared to precision. Increase beta so that the optimization favors yield, or decrease to favor purity.} 15 | } 16 | \description{ 17 | Compute a F_beta score comparing two boolean vectors 18 | } 19 | \examples{ 20 | data(Samusik_01_subset) 21 | truth=c(rep(TRUE,40),rep(FALSE,60)) 22 | pred=rep(c(TRUE,FALSE),50) 23 | table(pred,truth) ##40\% purity, 50\% yield 24 | #' F_beta(pred=pred,truth=truth,beta=2) ##Closer to yield 25 | F_beta(pred=pred,truth=truth,beta=1.5) ##Closer to yield 26 | F_beta(pred=pred,truth=truth,beta=1) ##Harmonic mean 27 | F_beta(pred=pred,truth=truth,beta=0.75) ##Closer to purity 28 | F_beta(pred=pred,truth=truth,beta=0.5) ##Closer to purity 29 | } 30 | -------------------------------------------------------------------------------- /man/Samusik_01_subset.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hypergate.R 3 | \docType{data} 4 | \name{Samusik_01_subset} 5 | \alias{Samusik_01_subset} 6 | \title{2000 events randomly sampled from the 'Samusik_01' dataset} 7 | \format{ 8 | list with four elements: fs_src (a flowSet), xp_src (its expression matrix), labels (manual gates of the events) and tsne (a tSNE projection of the dataset) 9 | } 10 | \usage{ 11 | Samusik_01_subset 12 | } 13 | \description{ 14 | 2000 events randomly sampled from the 'Samusik_01' dataset 15 | } 16 | \references{ 17 | https://flowrepository.org/id/FR-FCM-ZZPH 18 | } 19 | \keyword{datasets} 20 | -------------------------------------------------------------------------------- /man/boolmat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hypergate.R 3 | \name{boolmat} 4 | \alias{boolmat} 5 | \title{boolmat} 6 | \usage{ 7 | boolmat(gate, xp) 8 | } 9 | \arguments{ 10 | \item{gate}{A return from hypergate} 11 | 12 | \item{xp}{Expression matrix as in the hypergate callxp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels]} 13 | } 14 | \description{ 15 | Convert an expression matrix and a gating strategy to a boolean matrix (whether each event is gated out by each channel) 16 | } 17 | \examples{ 18 | data(Samusik_01_subset) 19 | xp=Samusik_01_subset$xp_src 20 | gate_vector=Samusik_01_subset$labels 21 | hg=hypergate(xp=xp,gate_vector=gate_vector,level=23,delta_add=0.01) 22 | head(boolmat(hg,xp)) 23 | } 24 | -------------------------------------------------------------------------------- /man/channels_contributions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hypergate.R 3 | \name{channels_contributions} 4 | \alias{channels_contributions} 5 | \title{channels_contributions} 6 | \usage{ 7 | channels_contributions(gate, xp, gate_vector, level, beta = 1) 8 | } 9 | \arguments{ 10 | \item{gate}{A return from hypergate} 11 | 12 | \item{xp}{Expression matrix as in the hypergate call} 13 | 14 | \item{gate_vector}{Categorical vector of length nrow(xp)} 15 | 16 | \item{level}{A level of gate_vector that identifies the population of interest} 17 | 18 | \item{beta, }{should be the same as for the hypergate object} 19 | } 20 | \description{ 21 | Gives scores for the contribution of individual channels to a gating strategy 22 | } 23 | \examples{ 24 | data(Samusik_01_subset) 25 | xp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels] 26 | gate_vector=Samusik_01_subset$labels 27 | hg=hypergate(xp=xp,gate_vector=gate_vector,level=23,delta_add=0) 28 | contribs=channels_contributions(gate=hg,xp=xp,gate_vector=gate_vector,level=23,beta=1) 29 | contribs 30 | } 31 | -------------------------------------------------------------------------------- /man/color_biplot_by_discrete.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hypergate.R 3 | \name{color_biplot_by_discrete} 4 | \alias{color_biplot_by_discrete} 5 | \title{Colors a biplot according to a vector with discrete values} 6 | \usage{ 7 | color_biplot_by_discrete( 8 | matrix, 9 | discrete_vector, 10 | ..., 11 | bty = "l", 12 | pch = 16, 13 | cex = 0.5, 14 | colors = NULL 15 | ) 16 | } 17 | \arguments{ 18 | \item{matrix}{a two columns matrix} 19 | 20 | \item{discrete_vector}{a vector of size nrow(matrix)} 21 | 22 | \item{...}{passed to plot} 23 | 24 | \item{bty}{passed to plot} 25 | 26 | \item{pch}{passed to plot} 27 | 28 | \item{cex}{passed to plot} 29 | 30 | \item{colors}{Palette to used named after the unique elements of discrete_vector. Generated from rainbow() if missing.} 31 | } 32 | \description{ 33 | Colors a biplot according to a vector with discrete values 34 | } 35 | \examples{ 36 | data(Samusik_01_subset) 37 | levels=unique(sort(Samusik_01_subset$labels)) 38 | colors=setNames(colorRampPalette(palette())(length(levels)),sort(levels)) 39 | with(Samusik_01_subset,color_biplot_by_discrete(matrix=tsne,discrete_vector=labels,colors=colors)) 40 | } 41 | -------------------------------------------------------------------------------- /man/contract.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hypergate.R 3 | \name{contract} 4 | \alias{contract} 5 | \title{contract} 6 | \usage{ 7 | contract( 8 | par = par, 9 | xp_pos = envir$xp_pos, 10 | state_pos = envir$state_pos, 11 | xp_neg = envir$xp_neg, 12 | state_neg = envir$state_neg, 13 | n = envir$n, 14 | TP = envir$TP, 15 | TN = envir$TN, 16 | beta = envir$beta2, 17 | envir = parent.frame() 18 | ) 19 | } 20 | \arguments{ 21 | \item{par}{Current parametrization of the hyperrectangle} 22 | 23 | \item{xp_pos}{Expression matrix for positive events} 24 | 25 | \item{state_pos}{State vector of the positive events} 26 | 27 | \item{xp_neg}{Expression matrix for negative events} 28 | 29 | \item{state_neg}{State vector of the negative events} 30 | 31 | \item{n}{passed to f} 32 | 33 | \item{TP}{integer: current number of TP} 34 | 35 | \item{TN}{integer: current number of TN} 36 | 37 | \item{beta}{Passed from the top-level function} 38 | 39 | \item{envir}{Current environment of the optimization} 40 | } 41 | \description{ 42 | Test (some) possible contractions of the hyperrectangle 43 | } 44 | -------------------------------------------------------------------------------- /man/contract.update.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hypergate.R 3 | \name{contract.update} 4 | \alias{contract.update} 5 | \title{contract.update} 6 | \usage{ 7 | contract.update( 8 | contract_object, 9 | pars = envir$pars, 10 | active_channels = envir$active_channels, 11 | b_pos = envir$b_pos, 12 | b_neg = envir$b_neg, 13 | state_pos = envir$state_pos, 14 | state_neg = envir$state_neg, 15 | TN = envir$TN, 16 | TP = envir$TP, 17 | xp_pos = envir$xp_pos, 18 | xp_neg = envir$xp_neg, 19 | envir = parent.frame() 20 | ) 21 | } 22 | \arguments{ 23 | \item{contract_object}{output of the contract function} 24 | 25 | \item{pars}{Current parametrization of the hyperrectangle} 26 | 27 | \item{active_channels}{vector of currently-used parameters} 28 | 29 | \item{b_pos}{boolean matrix of positive events} 30 | 31 | \item{b_neg}{boolean matrix of negative events} 32 | 33 | \item{state_pos}{State vector of the positive events} 34 | 35 | \item{state_neg}{State vector of the negative events} 36 | 37 | \item{TN}{integer: current number of TN} 38 | 39 | \item{TP}{integer: current number of TP} 40 | 41 | \item{xp_pos}{Expression matrix for positive events} 42 | 43 | \item{xp_neg}{Expression matrix for negative events} 44 | 45 | \item{envir}{Current environment of the optimization} 46 | } 47 | \description{ 48 | Update the hyperrectangle to the best contraction move found 49 | } 50 | -------------------------------------------------------------------------------- /man/coreloop.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hypergate.R 3 | \name{coreloop} 4 | \alias{coreloop} 5 | \title{coreloop} 6 | \usage{ 7 | coreloop(par, hg.env = hg.env$hg.env) 8 | } 9 | \arguments{ 10 | \item{par}{Current parametrization of the hyperrectangle} 11 | 12 | \item{hg.env}{Environment where the main execution of hypergate takes place} 13 | } 14 | \description{ 15 | Core optimization loop of hypergate 16 | } 17 | -------------------------------------------------------------------------------- /man/en.locator.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hypergate.R 3 | \name{en.locator} 4 | \alias{en.locator} 5 | \title{Wrapper to locator that plots segments on the fly} 6 | \usage{ 7 | en.locator() 8 | } 9 | \description{ 10 | Wrapper to locator that plots segments on the fly 11 | } 12 | -------------------------------------------------------------------------------- /man/expand.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hypergate.R 3 | \name{expand} 4 | \alias{expand} 5 | \title{expand} 6 | \usage{ 7 | expand( 8 | FN = envir$FN, 9 | FNTN_matrix = envir$FNTN_matrix, 10 | TP = envir$TP, 11 | TN = envir$TN, 12 | n = envir$n, 13 | beta = envir$beta2, 14 | envir = parent.frame() 15 | ) 16 | } 17 | \arguments{ 18 | \item{FN}{integer: current number of FP} 19 | 20 | \item{FNTN_matrix}{Boolean matrix of dim (FN, FN + TN), where Mij is TRUE if and only if expanding to include the ith FN in the gate would lead to the inclusion of the jth column event} 21 | 22 | \item{TP}{integer: current number of TP} 23 | 24 | \item{TN}{integer: current number of TN} 25 | 26 | \item{n}{passed to f} 27 | 28 | \item{beta}{Passed from the top-level function} 29 | 30 | \item{envir}{Coreloop environment} 31 | } 32 | \description{ 33 | Test (some) possible expansions of the hyperrectangle 34 | } 35 | -------------------------------------------------------------------------------- /man/expand.update.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hypergate.R 3 | \name{expand.update} 4 | \alias{expand.update} 5 | \title{expand.update} 6 | \usage{ 7 | expand.update( 8 | expand.object, 9 | pars = envir$pars, 10 | xp_pos = envir$xp_pos, 11 | xp_neg = envir$xp_neg, 12 | state_pos = envir$state_pos, 13 | state_neg = envir$state_neg, 14 | b_pos = envir$b_pos, 15 | b_neg = envir$b_neg, 16 | n = envir$n, 17 | TP = envir$TP, 18 | TN = envir$TN, 19 | envir = parent.frame() 20 | ) 21 | } 22 | \arguments{ 23 | \item{expand.object}{output of the expand function} 24 | 25 | \item{pars}{Current parametrization of the hyperrectangle} 26 | 27 | \item{xp_pos}{Expression matrix for positive events} 28 | 29 | \item{xp_neg}{Expression matrix for negative events} 30 | 31 | \item{state_pos}{State vector of the positive events} 32 | 33 | \item{state_neg}{State vector of the negative events} 34 | 35 | \item{b_pos}{boolean matrix of positive events} 36 | 37 | \item{b_neg}{boolean matrix of negative events} 38 | 39 | \item{n}{passed to f} 40 | 41 | \item{TP}{integer: current number of TP} 42 | 43 | \item{TN}{integer: current number of TN} 44 | 45 | \item{envir}{Current environment of the optimization} 46 | } 47 | \description{ 48 | Update the hyperrectangle to the best expansion move found 49 | } 50 | -------------------------------------------------------------------------------- /man/f.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hypergate.R 3 | \name{f} 4 | \alias{f} 5 | \title{f} 6 | \usage{ 7 | f(TP, TN, n, beta2 = 1) 8 | } 9 | \arguments{ 10 | \item{TP}{Number of true positive events} 11 | 12 | \item{TN}{Number of true negative events} 13 | 14 | \item{n}{beta^2*(TP+FN)+TN+FP} 15 | 16 | \item{beta2}{squared-beta to weight precision (low beta) or recall (high beta) more} 17 | } 18 | \description{ 19 | Computes the F_beta score given an intenger number of True Positives (TP), True Negatives (TN). It is optimized for speed and n is thus not the total number of events 20 | } 21 | -------------------------------------------------------------------------------- /man/fill_FNTN_matrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hypergate.R 3 | \name{fill_FNTN_matrix} 4 | \alias{fill_FNTN_matrix} 5 | \title{fill_FNTN_matrix} 6 | \usage{ 7 | fill_FNTN_matrix(xp_FN, xp_TN, B_FN, B_TN, par) 8 | } 9 | \arguments{ 10 | \item{xp_FN}{Expression matrix of False Negative events} 11 | 12 | \item{xp_TN}{Expression matrix of True Negative events} 13 | 14 | \item{B_FN}{Boolean matrix of FN events} 15 | 16 | \item{B_TN}{Boolean matrix of TN events} 17 | 18 | \item{par}{Current hyper-rectangle parametrization} 19 | } 20 | \description{ 21 | fill_FNTN_matrix Used for assessing whether an expansion move is possible 22 | } 23 | -------------------------------------------------------------------------------- /man/gate_from_biplot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hypergate.R 3 | \name{gate_from_biplot} 4 | \alias{gate_from_biplot} 5 | \title{gate_from_biplot} 6 | \usage{ 7 | gate_from_biplot( 8 | matrix, 9 | x_axis, 10 | y_axis, 11 | ..., 12 | bty = "l", 13 | pch = 16, 14 | cex = 0.5, 15 | sample = NULL 16 | ) 17 | } 18 | \arguments{ 19 | \item{matrix}{A matrix} 20 | 21 | \item{x_axis}{character, colname of matrix used for x-axis in the biplot} 22 | 23 | \item{y_axis}{character, colname of matrix used for y-axis in the biplot} 24 | 25 | \item{...}{passed to plot} 26 | 27 | \item{bty}{passed to plot} 28 | 29 | \item{pch}{passed to plot} 30 | 31 | \item{cex}{passed to plot} 32 | 33 | \item{sample}{Used to downsample the data in case there are too many events to plot quickly} 34 | } 35 | \value{ 36 | A named vector of length nrow(matrix) and names rownames(matrix). Ungated events are set to NA 37 | } 38 | \description{ 39 | From a biplot let the user interactively draw polygons to create a "Gate" vector 40 | } 41 | \details{ 42 | Data will be displayed as a bi-plot according to user-specified x_axis and y_axis arguments, then a call to locator() is made. The user can draw a polygon around parts of the plot that need gating. When done, 'right-click' or 'escape' (depending on the IDE) escapes locator() and closes the polygon. Then the user can press "n" to draw another polygon (that will define a new population), "c" to cancell and draw the last polygon again, or "s" to exit. When exiting, events that do not fall within any polygon are assigned NA, the others are assigned an integer value corresponding to the last polygon they lie into. 43 | } 44 | \examples{ 45 | if(interactive()){ 46 | ##See the details section to see how this function works 47 | gate_from_biplot(matrix=Samusik_01_subset$tsne,x_axis="tSNE1",y_axis="tSNE2") 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /man/hgate_info.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hypergate.R 3 | \name{hgate_info} 4 | \alias{hgate_info} 5 | \title{hgate_info} 6 | \usage{ 7 | hgate_info(hgate, xp, gate_vector, level, beta = 1) 8 | } 9 | \arguments{ 10 | \item{hgate}{A hypergate object (produced by hypergate())} 11 | 12 | \item{xp}{The expression matrix from which the 'hgate' parameter originates, 13 | needed for Fscore computation} 14 | 15 | \item{gate_vector}{Categorical data from which the 'hgate' parameter 16 | originates, needed for Fscore computation} 17 | 18 | \item{level}{Level of gate_vector identifying the population of interest, 19 | needed for Fscore computation} 20 | 21 | \item{beta}{Beta to weight purity (low beta) or yield (high beta) more, 22 | needed for Fscore computation} 23 | } 24 | \value{ 25 | A data.frame with channel, sign, comp and threshold columns, and 26 | optionnally deltaF (score deterioration when parameter is ignored),Fscore1d (F_value when using only this parameter) and Fscore (F score when all parameters up to this one are included). Fscores are computed if xp, gate_vector 27 | and level are passed to the function. 28 | } 29 | \description{ 30 | Extract information about a hypergate return: the channels of 31 | the phenotype, the sign of the channels, the sign of the comparison, the 32 | thresholds. The function could also compute the Fscores if the xp, 33 | gate_vector and level parameters are given. 34 | } 35 | \examples{ 36 | data(Samusik_01_subset) 37 | xp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels] 38 | gate_vector=Samusik_01_subset$labels 39 | hg=hypergate(xp=xp,gate_vector=gate_vector,level=23,delta_add=0.01) 40 | hgate_info(hgate=hg) 41 | hgate_pheno(hgate=hg) 42 | hgate_rule(hgate=hg) 43 | } 44 | \seealso{ 45 | \code{hg_pheno}, \code{hg_rule} 46 | } 47 | -------------------------------------------------------------------------------- /man/hgate_pheno.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hypergate.R 3 | \name{hgate_pheno} 4 | \alias{hgate_pheno} 5 | \title{hgate_pheno} 6 | \usage{ 7 | hgate_pheno(hgate, collapse = ", ") 8 | } 9 | \arguments{ 10 | \item{hgate}{A hypergate object (produced by hypergate())} 11 | 12 | \item{collapse}{A character string to separate the markers.} 13 | } 14 | \value{ 15 | A string representing the phenotype. 16 | } 17 | \description{ 18 | Build a human readable phenotype, i.e. a combination of channels 19 | and sign (+ or -) from a hypergate return. 20 | } 21 | \examples{ 22 | ## See hgate_info 23 | } 24 | \seealso{ 25 | \code{hg_rule}, \code{hg_info} 26 | } 27 | -------------------------------------------------------------------------------- /man/hgate_rule.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hypergate.R 3 | \name{hgate_rule} 4 | \alias{hgate_rule} 5 | \title{hgate_rule} 6 | \usage{ 7 | hgate_rule(hgate, collapse = ", ", digits = 2) 8 | } 9 | \arguments{ 10 | \item{hgate}{A hypergate object (produced by hypergate())} 11 | 12 | \item{collapse}{A character string to separate the markers.} 13 | 14 | \item{digits}{An integer that specifies the decimal part when rounding.} 15 | } 16 | \value{ 17 | A data.frame with channel, sign, comp and threshold columns 18 | } 19 | \description{ 20 | Build a human readable rule i.e. a combination of channels, sign 21 | of comparison and threshold. 22 | } 23 | \examples{ 24 | ## See hgate_info 25 | } 26 | \seealso{ 27 | \code{hg_pheno}, \code{hg_rule} 28 | } 29 | -------------------------------------------------------------------------------- /man/hgate_sample.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hypergate.R 3 | \name{hgate_sample} 4 | \alias{hgate_sample} 5 | \title{hgate_sample} 6 | \usage{ 7 | hgate_sample(gate_vector, level, size = 1000, method = "prop") 8 | } 9 | \arguments{ 10 | \item{gate_vector}{A Categorical vector whose length equals the number of 11 | rows of the matrix to sample (nrow(xp))} 12 | 13 | \item{level}{A level of gate_vector so that gate_vector == level will produce 14 | a boolean vector identifying events of interest} 15 | 16 | \item{size}{An integer specifying the maximum number of events of interest to 17 | retain. If the count of events of interest is lower than \code{size}, than 18 | \code{size} will be set to that count.} 19 | 20 | \item{method}{A string specifying the method to balance the count of events. 21 | \code{"prop"} means proportionnality: if events of interest are sampled in 22 | a 1/10 ratio, then all others events are sampled by the same ratio. 23 | \code{"10x"} means a balance of 10 between the count events of interest and 24 | the count all others events. \code{"ceil"} means a uniform sampling no more 25 | than the specified size for each level of the gate_vector. \code{level} is 26 | unused in that method.} 27 | } 28 | \value{ 29 | A logical vector with TRUE correspond to the events being sampled, ie 30 | kept to further analysis 31 | } 32 | \description{ 33 | Downsample the data in order to fasten the computation and 34 | reduce the memory usage. 35 | } 36 | \note{ 37 | No replacement is applied. If there are less events in one group or the 38 | alternate than the algorithm requires, then all available events are 39 | returned. NA values in gate_vector are not sampled, ie ignored. 40 | } 41 | \examples{ 42 | # Standard procedure with downsampling 43 | data(Samusik_01_subset) 44 | xp <- Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels] 45 | gate_vector <- Samusik_01_subset$labels 46 | sampled <- hgate_sample(gate_vector, level=8, 100) 47 | table(sampled) 48 | table(gate_vector[sampled]) 49 | xp_sampled <- xp[sampled, ] 50 | gate_vector_sampled <- gate_vector[sampled] 51 | hg <- hypergate(xp_sampled, gate_vector_sampled, level=8, delta_add=0.01) 52 | # cluster 8 consists in 122 events 53 | table(gate_vector) 54 | # Downsampling 55 | table(gate_vector[hgate_sample(gate_vector, level=8, 100)]) 56 | # Downsampling reduces the alternate events 57 | table(gate_vector[hgate_sample(gate_vector, level=8, 100, "10x")]) 58 | # Downsampling is limited to the maximum number of events of interest 59 | table(gate_vector[hgate_sample(gate_vector, level=8, 150)]) 60 | # Downsampling is limited to the maximum number of events of interest, and 61 | # the alternate events are downsampled to a total of 10 times 62 | table(gate_vector[hgate_sample(gate_vector, level=8, 150, "10x")]) 63 | # More details about sampling 64 | # Convert -1 to NA, NA are not sampled 65 | gate_vector[gate_vector==-1] = NA 66 | gate_vector = factor(gate_vector) 67 | table(gate_vector, useNA = "alw") 68 | # 69 | # target size = 100 whereas initial freq is 122 for pop 8 70 | smp.prop = hgate_sample(gate_vector, level = 8, size = 100, method = "prop") 71 | smp.10x = hgate_sample(gate_vector, level = 8, size = 100, method = "10x") 72 | smp.ceil = hgate_sample(gate_vector, size = 10, method = "ceil") 73 | table(smp.prop) 74 | table(smp.10x) 75 | table(smp.ceil) 76 | rbind(raw = table(gate_vector), 77 | prop = table(gate_vector[smp.prop]), 78 | `10x` = table(gate_vector[smp.10x]), 79 | ceil = table(gate_vector[smp.ceil])) 80 | # 81 | # target size = 30 whereas initial freq is 25 for pop 14 82 | smp.prop = hgate_sample(gate_vector, level = 14, size = 30, method = "prop") 83 | smp.10x = hgate_sample(gate_vector, level = 14, size = 30, method = "10x") 84 | table(smp.prop) 85 | table(smp.10x) 86 | rbind(raw = table(gate_vector), 87 | prop = table(gate_vector[smp.prop]), 88 | `10x` = table(gate_vector[smp.10x])) 89 | # prop returns original data, because target size ids larger than initial freq 90 | # 10x returns sampled data according to initial freq, such as the total amount 91 | # of other events equals 10x initial freq of pop 14 92 | } 93 | -------------------------------------------------------------------------------- /man/hypergate.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hypergate.R 3 | \name{hypergate} 4 | \alias{hypergate} 5 | \title{hypergate} 6 | \usage{ 7 | hypergate(xp, gate_vector, level, delta_add = 0, beta = 1, verbose = FALSE) 8 | } 9 | \arguments{ 10 | \item{xp}{an Expression matrix} 11 | 12 | \item{gate_vector}{A Categorical vector of length nrow(xp)} 13 | 14 | \item{level}{A level of gate_vector so that gate_vector == level will produce a boolean vector identifying events of interest} 15 | 16 | \item{delta_add}{If the increase in F after an optimization loop is lower than delta_add, the optimization will stop (may save computation time)} 17 | 18 | \item{beta}{Purity / Yield trade-off} 19 | 20 | \item{verbose}{Boolean. Whether to print information about the optimization status.} 21 | } 22 | \description{ 23 | Finds a hyperrectangle gating around a population of interest 24 | } 25 | \examples{ 26 | data(Samusik_01_subset) 27 | xp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels] 28 | gate_vector=Samusik_01_subset$labels 29 | hg=hypergate(xp=xp,gate_vector=gate_vector,level=23,delta_add=0.01) 30 | } 31 | \seealso{ 32 | \code{\link{channels_contributions}} for ranking parameters within the output, \code{\link{reoptimize_strategy}} for reoptimizing a output on a subset of the markers, \code{\link{plot_gating_strategy}} for plotting an output, \code{\link{subset_matrix_hg}} to apply the output to another input matrix, \code{\link{boolmat}} to obtain a boolean matrix stating which events are filtered out because of which markers 33 | } 34 | -------------------------------------------------------------------------------- /man/plot_gating_strategy.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hypergate.R 3 | \name{plot_gating_strategy} 4 | \alias{plot_gating_strategy} 5 | \title{plot_gating_strategy} 6 | \usage{ 7 | plot_gating_strategy( 8 | gate, 9 | xp, 10 | gate_vector, 11 | level, 12 | cex = 0.5, 13 | highlight = "black", 14 | path = "./", 15 | ... 16 | ) 17 | } 18 | \arguments{ 19 | \item{gate}{A hypergate object (produced by hypergate())} 20 | 21 | \item{xp}{The expression matrix from which the 'gate' parameter originates} 22 | 23 | \item{gate_vector}{Categorical data from which the 'gate' parameter originates} 24 | 25 | \item{level}{Level of gate_vector identifying the population of interest} 26 | 27 | \item{cex}{size of dots} 28 | 29 | \item{highlight}{color of the positive population when plotting} 30 | 31 | \item{path}{Where png files will be produced} 32 | 33 | \item{...}{passed to png} 34 | } 35 | \description{ 36 | Plot a hypergate return 37 | } 38 | \examples{ 39 | data(Samusik_01_subset) 40 | xp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels] 41 | gate_vector=Samusik_01_subset$labels 42 | hg=hypergate(xp=xp,gate_vector=gate_vector,level=23,delta_add=0.01) 43 | par(mfrow=c(1,ceiling(length(hg$active_channels)/2))) 44 | plot_gating_strategy(gate=hg,xp=xp,gate_vector=gate_vector,level=23,highlight="red") 45 | } 46 | -------------------------------------------------------------------------------- /man/polygon.clean.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hypergate.R 3 | \name{polygon.clean} 4 | \alias{polygon.clean} 5 | \title{Remove self intersection in polygons} 6 | \usage{ 7 | polygon.clean(poly) 8 | } 9 | \arguments{ 10 | \item{poly}{a polygon (list with two components x and y which are equal-length numerical vectors)} 11 | } 12 | \value{ 13 | A polygon without overlapping edges and new vertices corresponding to non-inner points of intersection 14 | } 15 | \description{ 16 | Remove self intersection in polygons 17 | } 18 | -------------------------------------------------------------------------------- /man/reoptimize_strategy.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hypergate.R 3 | \name{reoptimize_strategy} 4 | \alias{reoptimize_strategy} 5 | \title{reoptimize_strategy} 6 | \usage{ 7 | reoptimize_strategy( 8 | gate, 9 | channels_subset, 10 | xp, 11 | gate_vector, 12 | level, 13 | beta = 1, 14 | verbose = FALSE 15 | ) 16 | } 17 | \arguments{ 18 | \item{gate}{A return from hypergate} 19 | 20 | \item{channels_subset}{Character vector identifying the channels that will be retained (others are ignored). The form is e.g. c("CD4_min","CD8_max")} 21 | 22 | \item{xp}{Expression matrix as in the hypergate call} 23 | 24 | \item{gate_vector}{Categorical vector as in the hypergate call} 25 | 26 | \item{level}{Level of gate_vector identifying the population of interest} 27 | 28 | \item{beta}{Yield / purity trade-off} 29 | 30 | \item{verbose}{Whether to print information about optimization status} 31 | } 32 | \description{ 33 | Optimize a gating strategy given a manual selection of channels 34 | } 35 | \examples{ 36 | data(Samusik_01_subset) 37 | xp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels] 38 | gate_vector=Samusik_01_subset$labels 39 | hg=hypergate(xp=xp,gate_vector=gate_vector,level=23,delta_add=0) 40 | contribs=channels_contributions(gate=hg,xp=xp,gate_vector=gate_vector,level=23,beta=1) 41 | significant_channels=names(contribs)[contribs>=0.01] 42 | hg_reoptimized=reoptimize_strategy(gate=hg,channels_subset=significant_channels,xp,gate_vector,23) 43 | } 44 | -------------------------------------------------------------------------------- /man/subset_matrix_hg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hypergate.R 3 | \name{subset_matrix_hg} 4 | \alias{subset_matrix_hg} 5 | \title{subset_matrix_hg} 6 | \usage{ 7 | subset_matrix_hg(gate, xp) 8 | } 9 | \arguments{ 10 | \item{gate}{a return from hypergate} 11 | 12 | \item{xp}{Expression matrix used for gate} 13 | } 14 | \description{ 15 | Returns a boolean vector whose TRUE elements correspond to events inside the hyperrectangle 16 | } 17 | \examples{ 18 | data(Samusik_01_subset) 19 | xp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels] 20 | gate_vector=Samusik_01_subset$labels 21 | hg=hypergate(xp=xp,gate_vector=gate_vector,level=23,delta_add=0.01) 22 | gating_state=subset_matrix_hg(hg,xp) 23 | gating_state=ifelse(gating_state,"Gated in","Gated out") 24 | target=ifelse(gate_vector==23,"Target events","Others") 25 | table(gating_state,target) 26 | } 27 | -------------------------------------------------------------------------------- /man/update_gate.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hypergate.R 3 | \name{update_gate} 4 | \alias{update_gate} 5 | \title{Updates a gate vector} 6 | \usage{ 7 | update_gate(xp, polygon, gate_vector = rep(0, nrow(xp)), value = 1) 8 | } 9 | \arguments{ 10 | \item{xp}{A two colums matrix} 11 | 12 | \item{polygon}{A list with two components x and y of equal lenghts and numeric values} 13 | 14 | \item{gate_vector}{a vector of length nrow(xp) with integer values} 15 | 16 | \item{value}{The number that will be assigned to gate_vector, corresponding to points that lie in the polygon} 17 | } 18 | \value{ 19 | The updated gate_vector 20 | } 21 | \description{ 22 | Updates a gate vector 23 | } 24 | -------------------------------------------------------------------------------- /vignettes/Hypergate.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Hypergate" 3 | author: "Etienne Becht" 4 | date: "`r Sys.Date()`" 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteIndexEntry{Hypergate} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | %\VignetteEncoding{UTF-8} 10 | --- 11 | 12 | ```{r global_options, include=FALSE} 13 | knitr::opts_chunk$set(fig.pos = 'H',tidy.opts=list(width.cutoff=60),tidy=TRUE,fig.path="fig/") 14 | ``` 15 | 16 | ```{r, include=FALSE} 17 | options("warn"=-1) 18 | ``` 19 | 20 | This Vignette will walk you through the usage of the Hypergate R package. 21 | 22 | ## Package installation 23 | Installing dependencies: 24 | ```{r, eval=FALSE} 25 | install.packages(c("sp","polyclip","rgeos")) 26 | source("https://bioconductor.org/biocLite.R") 27 | biocLite("flowCore") 28 | ``` 29 | Installing the package from github: 30 | ```{r, eval=FALSE} 31 | install.packages("devtools") 32 | library(devtools) 33 | install_github(repo="ebecht/hypergate") 34 | ``` 35 | 36 | ```{r, echo = TRUE, message = FALSE} 37 | library(hypergate) 38 | ``` 39 | 40 | ## Data loading 41 | 42 | ```{r, echo= TRUE} 43 | data(Samusik_01_subset,package="hypergate") 44 | ``` 45 | 46 | This loads 2000 datapoints randomly sampled from the *Samusik_01* dataset (available from FlowRepository accession number FR-FCM-ZZPH). This object is a list which includes as elements 47 | 48 | 1. *fs_src* a flowSet with 1 flowFrame corresponding to the data subset 49 | 50 | 2. *xp_src* a matrix corresponding to the expression of the data subset. Rownames correspond to event numbers in the unsampled dataset. Colnames correspond to protein targets (or other information e.g. events' manually-annotated labels) 51 | 52 | 3. *labels* numeric vector encoding manually-annontated labels, with the value -1 for ungated events. The text labels for the gated populations are availble from FlowRepostiry 53 | 54 | 4. *regular_channels* A subset of colnames(Samusik_01_subset$xp_src) that corresponds to protein targets 55 | 56 | 5. *tsne* A 2D-tSNE ran on the whole dataset and subsampled to 2000 events 57 | 58 | ## Specifying the cell subset of interest 59 | 60 | Hypergate requires in particular as its arguments 61 | 62 | 1. an expression matrix (which we have as *Samusik_01_subset$xp_src*) 63 | 64 | 2. a vector specifying which events to attempt to gate on. This section discusses ways to achieve this point 65 | 66 | #### Selection from low-dimensional plot 67 | We included in the package a function with a rudimentary (hopefully sufficient) interface that allows for the selection of a cell subset of interest from a 2D biplot by drawing a polygon around it using the mouse. Since this function is interactive we cannot execute it in this Vignette but an example call would be as such (feel free to try it): 68 | 69 | ```{r, eval = FALSE} 70 | g=gate_from_biplot( 71 | Samusik_01_subset$tsne, 72 | "tSNE1", 73 | "tSNE2" 74 | ) 75 | ``` 76 | 77 | For this tutorial we define manually the polygon instead 78 | 79 | ```{r, echo=TRUE, fig.cap="Manual selection of a cluster on a 2D t-SNE"} 80 | x=c(12.54,8.08,7.12,12.12,17.32,20.62,21.04,20.83,18.07,15.20) 81 | y=c(-10.61,-14.76,-18.55,-20.33,-21.16,-19.74,-14.40,-11.08,-10.02,-9.42) 82 | pol=list(x=x,y=y) 83 | library("sp") 84 | gate_vector=sp::point.in.polygon(Samusik_01_subset$tsne[,1],Samusik_01_subset$tsne[,2],pol$x,pol$y) 85 | plot(Samusik_01_subset$tsne,pch=16,cex=0.5,col=ifelse(gate_vector==1,"firebrick3","lightsteelblue")) 86 | polygon(pol,lty=2) 87 | ``` 88 | 89 | #### Clustering 90 | 91 | Another option to define a cell cluster of interest is to use the output of a clustering algorithm. Popular options for cytometry include *FlowSOM* (available from Bioconductor) or *Phenograph* (available from the ```cytofkit``` package from Bioconductor). An example call for Rphenograph is below: 92 | 93 | ```{r, eval=FALSE} 94 | require(Rphenograph) 95 | set.seed(5881215) 96 | clustering=Rphenograph(Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels]) 97 | cluster_labels=membership(clustering[[2]]) 98 | ``` 99 | In this Vignette we use the simpler kmeans option instead: 100 | 101 | ```{r} 102 | set.seed(5881215) 103 | cluster_labels=kmeans(Samusik_01_subset$tsne,20,nstart=100)$cluster 104 | ``` 105 | 106 | In this example we can see that the kmeans cluster *20* corresponds to the population we manually selected from the t-SNE biplot 107 | ```{r, fig.cap="Selection of a cluster from a clustering algorithm output"} 108 | plot(Samusik_01_subset$tsne,col=ifelse(cluster_labels==20,"firebrick3","lightsteelblue"),pch=16,cex=0.5) 109 | ``` 110 | 111 | ## Running Hypergate 112 | 113 | The function to optimize gating strategies is ```hypergate```. Its main arguments are ```xp``` (a numeric matrix encoding expression), ```gate_vector``` (a vector with few unique values), ```level``` (specificies what value of gate_vector to gate upon, i.e. events satisfying ```gate_vector==level``` will be gated in) 114 | 115 | ```{r} 116 | hg_output=hypergate( 117 | xp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels], 118 | gate_vector=gate_vector, 119 | level=1, 120 | verbose=FALSE 121 | ) 122 | ``` 123 | 124 | ## Interpreting and polishing the results 125 | 126 | ### Gating datapoints 127 | 128 | The following function allows to subset an expression matrix given a return from *Hypergate*. The new matrix needs to have the same column names as the original matrix. 129 | 130 | ```{r} 131 | gating_predicted=subset_matrix_hg(hg_output,Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels]) 132 | ``` 133 | 134 | ```{r, eval=FALSE} 135 | table(ifelse(gating_predicted,"Gated-in","Gated-out"),ifelse(gate_vector==1,"Events of interest","Others")) 136 | ``` 137 | 138 | ```{r, echo=FALSE} 139 | knitr::kable(table(ifelse(gating_predicted,"Gated-in","Gated-out"),ifelse(gate_vector==1,"Events of interest","Others"))) 140 | ``` 141 | 142 | Another option, which offers more low-level control, is to examine for each datapoint whether they pass the threshold for each parameter. The function to obtain such a boolean matrix is ```boolmat```. Here our gating strategy specifies *SiglecF+cKit-Ly6C-*. We would thus obtain a 3-columns x 2000 (the number of events) rows 143 | 144 | ```{r} 145 | bm=boolmat( 146 | gate=hg_output, 147 | xp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels] 148 | ) 149 | head(bm) 150 | ``` 151 | 152 | ```{r, echo=FALSE} 153 | knitr::kable(table(ifelse(bm[,"SiglecF_min"],"SiglecF above threshold","Gated-out because of SiglecF"),ifelse(gate_vector==1,"Events of interest","Others"))) 154 | ``` 155 | 156 | ### Examining the output 157 | 158 | The following function will plot the output of Hypergate. Arguments are 159 | 160 | 1. ```gate``` an object returned by Hypergate 161 | 162 | 2. ```xp``` an expression matrix whose columns are named similarly as the ones used to create the ```gate``` object 163 | 164 | 3. ```gate_vector``` and ```level``` to specify which events are "of interest" 165 | 166 | 4. ```highlight``` a color that will be used to highlight the events of interest 167 | 168 | ```{r,echo=TRUE,fig.width=3.5,fig.height=3.5,fig.cap="Gating strategy"} 169 | plot_gating_strategy( 170 | gate=hg_output, 171 | xp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels], 172 | gate_vector=gate_vector, 173 | level=1, 174 | highlight="firebrick3" 175 | ) 176 | ``` 177 | 178 | Another important point to consider is how the F$\beta$-score increases with each added channel. This gives an idea of how many channels are required to reach a close-to-optimal gating strategy. 179 | 180 | This will identify at which steps the parameters were first activated and optimized: 181 | ```{r,fig.cap="F1-score obtained during optimization when adding parameters"} 182 | f_values_vs_number_of_parameters=c( 183 | F_beta(rep(TRUE,nrow(Samusik_01_subset$xp_src)),gate_vector==1), 184 | hg_output$f[c(apply(hg_output$pars.history[,hg_output$active_channels],2,function(x)min(which(x!=x[1])))-1,nrow(hg_output$pars.history))][-1] 185 | ) 186 | barplot(rev(f_values_vs_number_of_parameters),names.arg=rev(c("Initialization",paste("+ ",sep="",hg_output$active_channels))),las=3,mar=c(10,4,1,1),horiz=TRUE,xlab="Cumulative F1-score") 187 | ``` 188 | 189 | This graph tells us that the biggest increase is by far due to SiglecF+, while the lowest is due to Ly6C-. 190 | 191 | ### Channels contributions 192 | 193 | The previous graph only shows how the F-value evolved during optimization, but what we really want to know is how much each parameter contributes to the final output (sometimes a parameter will have a big impact at the early steps of the optimization but will become relatively unimportant towards the end, if multiple other parameters collectively account for most of its discriminatory power). We use the following function to assess this, which measures how much performances lower when a parameter is ignored. The more the performances lower, the more important the parameter is. 194 | 195 | ```{r, fig.cap="Contribution of each parameter to the output"} 196 | contributions=channels_contributions( 197 | gate=hg_output, 198 | xp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels], 199 | gate_vector=gate_vector, 200 | level=1, 201 | beta=1 202 | ) 203 | barplot(contributions,las=3,mar=c(10,4,1,1),horiz=TRUE,xlab="F1-score deterioration when the parameter is ignored") 204 | ``` 205 | 206 | ### Reoptimize strategy 207 | 208 | Since Ly6C contributes very little, we may want to ignore it to obtain a shorter gating strategy. We could keep the current threshold values for the other parameters, but it is best to re-compute the other thresholds to account for the loss of some parameters. 209 | To do that we use the following function: 210 | 211 | ```{r} 212 | hg_output_polished=reoptimize_strategy( 213 | gate=hg_output, 214 | channels_subset=c("SiglecF_min","cKit_max"), 215 | xp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels], 216 | gate_vector=gate_vector, 217 | level=1 218 | ) 219 | ``` 220 | 221 | Finally, we get to plot our final strategy: 222 | 223 | ```{r,echo=TRUE,fig.width=3.5,fig.height=3.5, fig.cap="Final output"} 224 | plot_gating_strategy( 225 | gate=hg_output_polished, 226 | xp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels], 227 | gate_vector=gate_vector, 228 | level=1, 229 | highlight="firebrick3" 230 | ) 231 | ``` 232 | 233 | ### Human-readable output 234 | 235 | Thanks to a nice contribution for SamGG on github, there are three functions that make the outputs more readable: 236 | 237 | ```{r} 238 | hgate_pheno(hg_output) 239 | hgate_rule(hg_output) 240 | hgate_info(hg_output) 241 | # Fscores can be retrieved when the same parameters given to hypergate() are given to hgate_info(): 242 | hg_out_info = hgate_info(hg_output, 243 | xp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels], 244 | gate_vector=gate_vector, 245 | level=1) 246 | hg_out_info 247 | # and formatted readily 248 | paste0(hg_out_info[,"Fscore"], collapse = ", ") 249 | ``` 250 | 251 | ## Final notes 252 | 253 | Some comments about potential questions on your own projects (raised by QBarbier): 254 | 255 | ### Which channels to use as input? 256 | Anything that would be relevant for a gating strategy should be used as an input. So usually any phenotypic channel would be included. If you know that you would not use certain parameters on subsequent experiments (for instance if the staining is intracellular and you plan to sort a live population and thus cannot permeabilize your cells), you should exclude the corresponding channels. I usually do not use channels that were used in pre-gating steps (e.g. CD45 for immune cells). Finally, if you plan to use flow cytometry and use hypergate on a CyTOF dataset, you probably want to discard the Cell_length channel. 257 | 258 | ### How big can the input matrix be? 259 | It depends on how much RAM your computer has. If that is an issue I suggest downsampling to (e.g.) 1000 positive cells and a corresponding number of negative cells. The `hgate_sample` function can help you achieve this: 260 | 261 | ```{r} 262 | set.seed(123) ## Makes the subsampling reproducible 263 | gate_vector=Samusik_01_subset$labels 264 | subsample=hgate_sample(gate_vector=gate_vector,level=5,size=100) ## Subsample 100 events from population #5 (Classical monocytes), and a corresponding number of negative events 265 | tab=table(ifelse(subsample,"In","Out"),ifelse(Samusik_01_subset$labels==5,"Positive pop.","Negative pop.")) 266 | tab[1,]/colSums(tab) ## Fraction of subsampled events for positive and negative populations 267 | xp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels] 268 | hg=hypergate(xp=xp[subsample,],gate_vector=gate_vector[subsample],level=5) ## Runs hypergate on a subsample of the input matrix 269 | gating_heldout=subset_matrix_hg(hg,xp[!subsample,]) ## Applies the gate to the held-out data 270 | table(ifelse(gating_heldout,"Gated in","Gated out"),ifelse(Samusik_01_subset$labels[!subsample]==5,"Positive pop.","Negative pop.")) 271 | ``` 272 | -------------------------------------------------------------------------------- /vignettes/unnamed-chunk-10-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebecht/hypergate/ab1283ab1369cad063899ffebc9e67c17654c4ed/vignettes/unnamed-chunk-10-1.png -------------------------------------------------------------------------------- /vignettes/unnamed-chunk-17-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebecht/hypergate/ab1283ab1369cad063899ffebc9e67c17654c4ed/vignettes/unnamed-chunk-17-1.png -------------------------------------------------------------------------------- /vignettes/unnamed-chunk-17-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebecht/hypergate/ab1283ab1369cad063899ffebc9e67c17654c4ed/vignettes/unnamed-chunk-17-2.png -------------------------------------------------------------------------------- /vignettes/unnamed-chunk-18-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebecht/hypergate/ab1283ab1369cad063899ffebc9e67c17654c4ed/vignettes/unnamed-chunk-18-1.png -------------------------------------------------------------------------------- /vignettes/unnamed-chunk-19-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebecht/hypergate/ab1283ab1369cad063899ffebc9e67c17654c4ed/vignettes/unnamed-chunk-19-1.png -------------------------------------------------------------------------------- /vignettes/unnamed-chunk-21-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebecht/hypergate/ab1283ab1369cad063899ffebc9e67c17654c4ed/vignettes/unnamed-chunk-21-1.png -------------------------------------------------------------------------------- /vignettes/unnamed-chunk-7-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebecht/hypergate/ab1283ab1369cad063899ffebc9e67c17654c4ed/vignettes/unnamed-chunk-7-1.png --------------------------------------------------------------------------------