├── DESCRIPTION
├── NAMESPACE
├── R
    └── hypergate.R
├── README.md
├── data
    └── Samusik_01_subset.RData
├── figure
    ├── unnamed-chunk-10-1.png
    ├── unnamed-chunk-17-1.png
    ├── unnamed-chunk-17-2.png
    ├── unnamed-chunk-18-1.png
    ├── unnamed-chunk-19-1.png
    ├── unnamed-chunk-21-1.png
    └── unnamed-chunk-7-1.png
├── man
    ├── FNTN_matrix.recycle.Rd
    ├── F_beta.Rd
    ├── Samusik_01_subset.Rd
    ├── boolmat.Rd
    ├── channels_contributions.Rd
    ├── color_biplot_by_discrete.Rd
    ├── contract.Rd
    ├── contract.update.Rd
    ├── coreloop.Rd
    ├── en.locator.Rd
    ├── expand.Rd
    ├── expand.update.Rd
    ├── f.Rd
    ├── fill_FNTN_matrix.Rd
    ├── gate_from_biplot.Rd
    ├── hgate_info.Rd
    ├── hgate_pheno.Rd
    ├── hgate_rule.Rd
    ├── hgate_sample.Rd
    ├── hypergate.Rd
    ├── plot_gating_strategy.Rd
    ├── polygon.clean.Rd
    ├── reoptimize_strategy.Rd
    ├── subset_matrix_hg.Rd
    └── update_gate.Rd
└── vignettes
    ├── Hypergate.Rmd
    ├── unnamed-chunk-10-1.png
    ├── unnamed-chunk-17-1.png
    ├── unnamed-chunk-17-2.png
    ├── unnamed-chunk-18-1.png
    ├── unnamed-chunk-19-1.png
    ├── unnamed-chunk-21-1.png
    └── unnamed-chunk-7-1.png


/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: hypergate
 2 | Title: Machine Learning of Hyperrectangular Gating Strategies for High-Dimensional Cytometry
 3 | Version: 0.8.5
 4 | Authors@R: c(
 5 |                person("Etienne","Becht",email="etienne.becht@protonmail.com",role=c("cre","aut")),
 6 |                person("Samuel","Granjeaud",email="samuel.granjeaud@inserm.fr",role=c("ctb"))
 7 | 	   )
 8 | Description: Given a high-dimensional dataset that typically represents a cytometry dataset, and a subset of the datapoints, this algorithm outputs an hyperrectangle so that datapoints within the hyperrectangle best correspond to the specified subset. In essence, this allows the conversion of clustering algorithms' outputs to gating strategies outputs.
 9 | Depends:
10 | 	R (>= 3.5.0)
11 | License: GPL-3
12 | Encoding: UTF-8
13 | LazyData: true
14 | Imports: stats,
15 | 	grDevices,
16 | 	utils,
17 | 	graphics
18 | Suggests: knitr,
19 | 	  rmarkdown,
20 | 	  flowCore,
21 | 	  sp,
22 | 	  sf
23 | VignetteBuilder: knitr
24 | RoxygenNote: 7.3.0
25 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(F_beta)
 4 | export(boolmat)
 5 | export(channels_contributions)
 6 | export(color_biplot_by_discrete)
 7 | export(gate_from_biplot)
 8 | export(hgate_info)
 9 | export(hgate_pheno)
10 | export(hgate_rule)
11 | export(hgate_sample)
12 | export(hypergate)
13 | export(plot_gating_strategy)
14 | export(reoptimize_strategy)
15 | export(subset_matrix_hg)
16 | importFrom(grDevices,col2rgb)
17 | importFrom(grDevices,dev.off)
18 | importFrom(grDevices,png)
19 | importFrom(grDevices,rainbow)
20 | importFrom(grDevices,rgb)
21 | importFrom(graphics,abline)
22 | importFrom(graphics,locator)
23 | importFrom(graphics,plot)
24 | importFrom(graphics,segments)
25 | importFrom(graphics,text)
26 | importFrom(graphics,title)
27 | importFrom(stats,setNames)
28 | importFrom(utils,tail)
29 | 


--------------------------------------------------------------------------------
/R/hypergate.R:
--------------------------------------------------------------------------------
   1 | #' @title f
   2 | #' @description Computes the F_beta score given an intenger number of True Positives (TP), True Negatives (TN). It is optimized for speed and n is thus not the total number of events
   3 | #' @param TP Number of true positive events
   4 | #' @param TN Number of true negative events
   5 | #' @param n beta^2*(TP+FN)+TN+FP
   6 | #' @param beta2 squared-beta to weight precision (low beta) or recall (high beta) more
   7 | f<-function(TP,TN,n,beta2=1){
   8 |     (1+beta2)*TP/(n+TP-TN) ##n equals beta2*(TP+FN)+(TN+FP)=beta2*nT+nF
   9 | }
  10 | 
  11 | #' @title fill_FNTN_matrix
  12 | #' @description fill_FNTN_matrix Used for assessing whether an expansion move is possible
  13 | #' @param xp_FN Expression matrix of False Negative events
  14 | #' @param xp_TN Expression matrix of True Negative events
  15 | #' @param B_FN Boolean matrix of FN events
  16 | #' @param B_TN Boolean matrix of TN events
  17 | #' @param par Current hyper-rectangle parametrization
  18 | fill_FNTN_matrix<-function(xp_FN,xp_TN,B_FN,B_TN,par){
  19 |     FN=nrow(xp_FN)
  20 |     TN=nrow(xp_TN)
  21 |     N=FN+TN
  22 |     xp=rbind(xp_FN,xp_TN)
  23 |     B=rbind(B_FN,B_TN)
  24 | 
  25 |     res=matrix(nrow=N,ncol=FN,data=FALSE)
  26 | 
  27 |     if(ncol(B)<=.Machine$double.digits){
  28 |         summer=matrix(nrow=ncol(B),ncol=1,data=2^(1:ncol(B)-1))
  29 |         hashes=(!B)%*%summer
  30 |         ##Split events according to the state for which channels they are FALSE
  31 |         B_hash=split(1:N,hashes)
  32 |         B_FN_hash=split(1:FN,hashes[1:FN])
  33 | 
  34 |         hash_table=matrix(nrow=length(B_hash),ncol=ncol(B),dimnames=list(names(B_hash),colnames(B)),data=NA)
  35 |         for(hash in names(B_hash)){
  36 |             hash_table[hash,]=B[B_hash[[hash]][1],]
  37 |         }
  38 |     } else {
  39 | 
  40 |         hashes=apply(!B,1,function(x)paste(x,collapse="/")) ##For every active channel, identify which are in FALSE state.
  41 |         unique_hashes=unique(hashes)
  42 |         hash_table=matrix(nrow=length(unique_hashes),ncol=ncol(B),dimnames=list(unique_hashes,colnames(B)),data=TRUE)
  43 |         for(hash in unique_hashes){
  44 |             hash_table[hash,as.logical(strsplit(hash,split="/")[[1]])]=FALSE
  45 |         }
  46 |         ##hash_table=unique(B)
  47 |         ##rownames(hash_table)=apply(hash_table,1,function(x)paste(which(!x),collapse="/"))
  48 | 
  49 |         ##Split events according to the state for which channels they are FALSE
  50 |         B_hash=split(1:N,hashes)
  51 |         B_FN_hash=split(1:FN,hashes[1:FN])
  52 |     }
  53 | 
  54 |     for(hash in names(B_FN_hash)){
  55 |         true_for_hash=hash_table[hash,] ##Check for a given hash which channels are TRUE
  56 |         compatible_candidates=rownames(hash_table)[rowSums(hash_table[,true_for_hash,drop=FALSE])==sum(true_for_hash)] ##Only other hashes for which only a subset of FALSE channels are FALSE are compatible (for the others there are guaranteed to not be implicated
  57 | 
  58 |         for(other_hash in compatible_candidates){
  59 |             others=B_hash[[other_hash]] ##Compatible events
  60 |             xp.tmp=xp[others,,drop=FALSE] ##Subsetting the matrix of events to only keep compatible events
  61 |             res.tmp=matrix(nrow=length(others),ncol=length(B_FN_hash[[hash]]))
  62 |             i=0
  63 |             for(event in B_FN_hash[[hash]]){
  64 |                 i=i+1
  65 |                 affected_parameters=xp[event,]<par
  66 |                 affected_parameters.values=xp[event,affected_parameters]
  67 |                 sum_affected=sum(affected_parameters) ##Number of affected parameters
  68 |                 res.tmp[,i]=(rowSums(xp.tmp[,affected_parameters,drop=FALSE]>=matrix(nrow=length(others),ncol=sum_affected,data=affected_parameters.values,byrow=TRUE))==rep(sum_affected,nrow(xp.tmp))) ##If all affected parameters are higher than the values for the event screened, the other event is implicated by the event screened
  69 |             }
  70 |             res[others,B_FN_hash[[hash]]]=res.tmp
  71 |         }
  72 |     }
  73 |     res
  74 | }
  75 | 
  76 | #' @title plot_gating_strategy
  77 | #' @description Plot a hypergate return
  78 | #' @param gate A hypergate object (produced by hypergate())
  79 | #' @param xp The expression matrix from which the 'gate' parameter originates
  80 | #' @param gate_vector Categorical data from which the 'gate' parameter originates
  81 | #' @param level Level of gate_vector identifying the population of interest
  82 | #' @param highlight color of the positive population when plotting
  83 | #' @param path Where png files will be produced
  84 | #' @param cex size of dots
  85 | #' @param ... passed to png
  86 | #' @examples
  87 | #' data(Samusik_01_subset)
  88 | #' xp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels]
  89 | #' gate_vector=Samusik_01_subset$labels
  90 | #' hg=hypergate(xp=xp,gate_vector=gate_vector,level=23,delta_add=0.01)
  91 | #' par(mfrow=c(1,ceiling(length(hg$active_channels)/2)))
  92 | #' plot_gating_strategy(gate=hg,xp=xp,gate_vector=gate_vector,level=23,highlight="red")
  93 | #' @export
  94 | 
  95 | plot_gating_strategy<-function(gate,xp,gate_vector,level,cex=0.5,highlight="black",path="./",...){
  96 |     if(missing(path)){
  97 |         warning("path argument is missing, output won't be saved to file")
  98 |     }
  99 |     w=!is.na(gate_vector)
 100 |     xp=xp[w,,drop=F]
 101 |     gate_vector=gate_vector[w]
 102 |     truce=gate_vector==level
 103 | 
 104 |     parameters=gate$pars.history
 105 |     active_parameters=gate$active_channels##apply(parameters,2,function(x){x[length(x)]!=x[1]})
 106 |     parameters=parameters[,active_parameters,drop=FALSE]
 107 |     if (nrow(parameters) > 1) {
 108 |         parameters_order=
 109 |             apply(parameters,2,function(x)min(which(x!=x[1])))
 110 |         parameters=parameters[,order(parameters_order,decreasing=FALSE),drop=FALSE]
 111 |     }
 112 |     parameters=setNames(parameters[nrow(parameters),,drop=TRUE],colnames(parameters))
 113 |     channels=sub("_max","",names(parameters))
 114 |     channels=sub("_min","",channels)
 115 | 
 116 |     ranges.global=apply(xp[,channels,drop=F],2,range)
 117 |     rownames(ranges.global)=c("min","max")
 118 |     
 119 |     cols=rep("black",nrow(xp))
 120 |     cols[gate_vector==level]=highlight
 121 | 
 122 |     n=length(parameters)
 123 | 
 124 |     active_events=rep(T,nrow(xp))
 125 |     iter=0
 126 | 
 127 |     ##All parameters that are "_min" take value 2, the "_max" take value 1
 128 |     direction=rep(2,length(parameters))
 129 |     direction[grep("_max",names(parameters))]=1
 130 | 
 131 |     ##Loop over pairs of consecutive parameters
 132 |     for(i in seq(1,n,by=2)){
 133 |         if((i+1)<=n){
 134 |             iter=iter+1
 135 |             if(!missing(path)){
 136 |                 png(paste(path,iter,".png",sep=""),...)
 137 |             }
 138 |             chan1=channels[i]
 139 |             chan2=channels[i+1]
 140 |             plot(
 141 |                 xp[active_events,chan1],
 142 |                 xp[active_events,chan2],
 143 |                 xlab=chan1,
 144 |                 ylab=chan2,
 145 |                 xlim=ranges.global[,chan1],
 146 |                 ylim=ranges.global[,chan2],
 147 |                 bty="l",
 148 |                 pch=16,
 149 |                 cex=cex,
 150 |                 col=cols[active_events]
 151 |             )
 152 |             segments(
 153 |                 x0=parameters[i],
 154 |                 y0=parameters[i+1],
 155 |                 x1=ranges.global[direction[i],chan1],
 156 |                 col="red"
 157 |             )
 158 |             segments(
 159 |                 x0=parameters[i],
 160 |                 y0=parameters[i+1],
 161 |                 y1=ranges.global[direction[i+1],chan2],
 162 |                 col="red"
 163 |             )
 164 | 
 165 |             ##Updating active_events
 166 |             if(direction[i]==2){
 167 |                 test1=xp[,chan1]>=parameters[i] ##If _min, events above parameter are selected
 168 |             } else {
 169 |                 test1=xp[,chan1]<=parameters[i] ##Else events above parameter below
 170 |             }
 171 |             if(direction[i+1]==2){
 172 |                 test2=xp[,chan2]>=parameters[i+1]
 173 |             } else {
 174 |                 test2=xp[,chan2]<=parameters[i+1]
 175 |             }
 176 |             active_events=active_events&test1&test2
 177 |             title(main=paste(paste(channels[1:(i+1)],ifelse(direction[1:(i+1)]==2,"+","-"),sep=""),collapse=", "))
 178 |             title(sub=paste("F=",signif(F_beta(truce,active_events),4),sep=""))
 179 |             if(!missing(path)){
 180 |                 dev.off()
 181 |             }
 182 |         }
 183 |     }
 184 |     ##Single last parameter if n is odd
 185 |     if(n%%2==1){
 186 |         iter=iter+1
 187 |         chan1=channels[i]
 188 |         if(!missing(path)){
 189 |             png(paste(path,iter,".png",sep=""),...)
 190 |         }
 191 |         plot(xp[active_events,chan1],main="",
 192 |              ylab=channels[i],
 193 |              xlab="Events index",
 194 |              ylim=ranges.global[,chan1],
 195 |              bty="l",
 196 |              pch=16,
 197 |              cex=cex,
 198 |              col=cols[active_events]
 199 |              )
 200 |         abline(h=parameters[i],col="red")
 201 |         if(direction[i]==2){
 202 |             test1=xp[,chan1]>=parameters[i] ##If _min, events above parameter are selected
 203 |         } else {
 204 |             test1=xp[,chan1]<=parameters[i] ##Else events above parameter below
 205 |         }
 206 |         active_events=active_events&test1
 207 |         title(main=paste(paste(channels[1:(i)],ifelse(direction[1:(i)]==2,"+","-"),sep=""),collapse=", "))
 208 |         title(sub=paste("F=",signif(F_beta(truce,active_events),4),sep=""))
 209 |         if(!missing(path)){
 210 |             dev.off()
 211 |         }
 212 |     }
 213 | }
 214 | 
 215 | #' @title hgate_info
 216 | #' @description Extract information about a hypergate return: the channels of
 217 | #'   the phenotype, the sign of the channels, the sign of the comparison, the
 218 | #'   thresholds. The function could also compute the Fscores if the xp,
 219 | #'   gate_vector and level parameters are given.
 220 | #' @param hgate A hypergate object (produced by hypergate())
 221 | #' @param xp The expression matrix from which the 'hgate' parameter originates,
 222 | #'   needed for Fscore computation
 223 | #' @param gate_vector Categorical data from which the 'hgate' parameter
 224 | #'   originates, needed for Fscore computation
 225 | #' @param level Level of gate_vector identifying the population of interest,
 226 | #'   needed for Fscore computation
 227 | #' @param beta Beta to weight purity (low beta) or yield (high beta) more,
 228 | #'   needed for Fscore computation
 229 | #' @return A data.frame with channel, sign, comp and threshold columns, and
 230 | #'   optionnally deltaF (score deterioration when parameter is ignored),Fscore1d (F_value when using only this parameter) and Fscore (F score when all parameters up to this one are included). Fscores are computed if xp, gate_vector
 231 | #'   and level are passed to the function.
 232 | #' @seealso \code{hg_pheno}, \code{hg_rule}
 233 | #' @examples
 234 | #' data(Samusik_01_subset)
 235 | #' xp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels]
 236 | #' gate_vector=Samusik_01_subset$labels
 237 | #' hg=hypergate(xp=xp,gate_vector=gate_vector,level=23,delta_add=0.01)
 238 | #' hgate_info(hgate=hg)
 239 | #' hgate_pheno(hgate=hg)
 240 | #' hgate_rule(hgate=hg)
 241 | #' @export
 242 | 
 243 | hgate_info <- function(hgate, xp, gate_vector, level, beta = 1) {
 244 |     miss = missing(xp) + missing(level) + missing(level)
 245 |     if (miss == 1 || miss == 2) {
 246 |         warning("at least one parameter is missing in order to compute scores.")
 247 |     }
 248 |                                         # retrieve threshold
 249 |     pars = hgate$pars.history
 250 |     active_pars = hgate$active_channels
 251 |     pars = pars[, active_pars, drop = FALSE]
 252 |     if (nrow(pars) > 1) {
 253 |         pars_order = apply(pars, 2, function(x) min(which(x != x[1])))
 254 |         pars = pars[, order(pars_order, decreasing = FALSE), drop = FALSE]
 255 |     }
 256 |     pars = setNames(pars[nrow(pars), , drop = TRUE], colnames(pars))
 257 |                                         # get channel names
 258 |     channels = sub("_max", "", names(pars))
 259 |     channels = sub("_min", "", channels)
 260 |                                         # phenotype sign
 261 |     dir.sign = rep('+', length(pars))
 262 |     dir.sign[grep("_max", names(pars))] = '-'
 263 |                                         # comparison sign
 264 |     dir.comp = rep(' >= ', length(pars))
 265 |     dir.comp[grep("_max", names(pars))] = ' <= '
 266 |                                         # all together
 267 |     res = data.frame(
 268 |         channels, sign = dir.sign, comp = dir.comp, threshold = pars
 269 |     )
 270 |                                         # scores
 271 |     if (miss == 0) {
 272 |         
 273 |         w = !is.na(gate_vector)
 274 |         xp = xp[w,,drop=F]
 275 |         gate_vector = gate_vector[w]
 276 |         truce = gate_vector==level
 277 |         
 278 |         ##Loop over parameters
 279 |         active_events = rep(TRUE, nrow(xp))
 280 |         Fscore = Fscore1D = c()
 281 |         for(i in seq(length(pars))) {
 282 |                                         # events for the current 1D gate
 283 |             if(dir.comp[i] == ' >= '){
 284 |                 test1D = xp[,channels[i]] >= pars[i]
 285 |             } else {
 286 |                 test1D = xp[,channels[i]] <= pars[i]
 287 |             }
 288 |             Fscore1D = c(Fscore1D, signif(F_beta(truce, test1D, beta = beta), 4))
 289 |                                         # update active events
 290 |             active_events = active_events & test1D
 291 |             Fscore = c(Fscore, signif(F_beta(truce, active_events, beta = beta), 4))
 292 |         }
 293 |         res = cbind(res, deltaF=channels_contributions(hgate, xp, gate_vector, level, beta),Fscore1D, Fscore)
 294 |     }
 295 |     res
 296 | }
 297 | 
 298 | #' @title hgate_pheno
 299 | #' @description Build a human readable phenotype, i.e. a combination of channels
 300 | #'   and sign (+ or -) from a hypergate return.
 301 | #' @param hgate A hypergate object (produced by hypergate())
 302 | #' @param collapse A character string to separate the markers.
 303 | #' @return A string representing the phenotype.
 304 | #' @seealso \code{hg_rule}, \code{hg_info}
 305 | #' @examples
 306 | #' ## See hgate_info
 307 | #' @export
 308 | 
 309 | hgate_pheno <- function(hgate, collapse = ", ") {
 310 |     with(hgate_info(hgate), paste0(channels, sign, collapse = collapse))
 311 | }
 312 | 
 313 | #' @title hgate_rule
 314 | #' @description Build a human readable rule i.e. a combination of channels, sign
 315 | #'   of comparison and threshold.
 316 | #' @param hgate A hypergate object (produced by hypergate())
 317 | #' @param collapse A character string to separate the markers.
 318 | #' @param digits An integer that specifies the decimal part when rounding.
 319 | #' @return A data.frame with channel, sign, comp and threshold columns
 320 | #' @seealso \code{hg_pheno}, \code{hg_rule}
 321 | #' @examples
 322 | #' ## See hgate_info
 323 | #' @export
 324 | 
 325 | hgate_rule <- function(hgate, collapse = ", ", digits = 2) {
 326 |     with(hgate_info(hgate), paste0(channels, comp, round(threshold, digits), collapse = collapse))
 327 | }
 328 | 
 329 | #' @title hgate_sample
 330 | #' @description Downsample the data in order to fasten the computation and
 331 | #'   reduce the memory usage.
 332 | #' @param gate_vector A Categorical vector whose length equals the number of
 333 | #'   rows of the matrix to sample (nrow(xp))
 334 | #' @param level A level of gate_vector so that gate_vector == level will produce
 335 | #'   a boolean vector identifying events of interest
 336 | #' @param size An integer specifying the maximum number of events of interest to
 337 | #'   retain. If the count of events of interest is lower than \code{size}, than
 338 | #'   \code{size} will be set to that count.
 339 | #' @param method A string specifying the method to balance the count of events.
 340 | #'   \code{"prop"} means proportionnality: if events of interest are sampled in
 341 | #'   a 1/10 ratio, then all others events are sampled by the same ratio.
 342 | #'   \code{"10x"} means a balance of 10 between the count events of interest and
 343 | #'   the count all others events. \code{"ceil"} means a uniform sampling no more
 344 | #'   than the specified size for each level of the gate_vector. \code{level} is
 345 | #'   unused in that method.
 346 | #' @return A logical vector with TRUE correspond to the events being sampled, ie
 347 | #'   kept to further analysis
 348 | #' @note No replacement is applied. If there are less events in one group or the
 349 | #'   alternate than the algorithm requires, then all available events are
 350 | #'   returned. NA values in gate_vector are not sampled, ie ignored.
 351 | #' @examples
 352 | #' # Standard procedure with downsampling
 353 | #' data(Samusik_01_subset)
 354 | #' xp <- Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels]
 355 | #' gate_vector <- Samusik_01_subset$labels
 356 | #' sampled <- hgate_sample(gate_vector, level=8, 100)
 357 | #' table(sampled)
 358 | #' table(gate_vector[sampled])
 359 | #' xp_sampled <- xp[sampled, ]
 360 | #' gate_vector_sampled <- gate_vector[sampled]
 361 | #' hg <- hypergate(xp_sampled, gate_vector_sampled, level=8, delta_add=0.01)
 362 | #' # cluster 8 consists in 122 events
 363 | #' table(gate_vector)
 364 | #' # Downsampling
 365 | #' table(gate_vector[hgate_sample(gate_vector, level=8, 100)])
 366 | #' # Downsampling reduces the alternate events
 367 | #' table(gate_vector[hgate_sample(gate_vector, level=8, 100, "10x")])
 368 | #' # Downsampling is limited to the maximum number of events of interest
 369 | #' table(gate_vector[hgate_sample(gate_vector, level=8, 150)])
 370 | #' # Downsampling is limited to the maximum number of events of interest, and
 371 | #' # the alternate events are downsampled to a total of 10 times
 372 | #' table(gate_vector[hgate_sample(gate_vector, level=8, 150, "10x")])
 373 | #' # More details about sampling
 374 | #' # Convert -1 to NA, NA are not sampled
 375 | #' gate_vector[gate_vector==-1] = NA
 376 | #' gate_vector = factor(gate_vector)
 377 | #' table(gate_vector, useNA = "alw")
 378 | #' #
 379 | #' # target size = 100 whereas initial freq is 122 for pop 8
 380 | #' smp.prop = hgate_sample(gate_vector, level = 8, size = 100, method = "prop")
 381 | #' smp.10x  = hgate_sample(gate_vector, level = 8, size = 100, method = "10x")
 382 | #' smp.ceil = hgate_sample(gate_vector, size = 10, method = "ceil")
 383 | #' table(smp.prop)
 384 | #' table(smp.10x)
 385 | #' table(smp.ceil)
 386 | #' rbind(raw = table(gate_vector),
 387 | #'       prop = table(gate_vector[smp.prop]),
 388 | #'       `10x` = table(gate_vector[smp.10x]),
 389 | #'       ceil = table(gate_vector[smp.ceil]))
 390 | #' #
 391 | #' # target size = 30 whereas initial freq is 25 for pop 14
 392 | #' smp.prop = hgate_sample(gate_vector, level = 14, size = 30, method = "prop")
 393 | #' smp.10x  = hgate_sample(gate_vector, level = 14, size = 30, method = "10x")
 394 | #' table(smp.prop)
 395 | #' table(smp.10x)
 396 | #' rbind(raw = table(gate_vector),
 397 | #'       prop = table(gate_vector[smp.prop]),
 398 | #'       `10x` = table(gate_vector[smp.10x]))
 399 | #' # prop returns original data, because target size ids larger than initial freq
 400 | #' # 10x  returns sampled data according to initial freq, such as the total amount
 401 | #' # of other events equals 10x initial freq of pop 14
 402 | #' @export
 403 | 
 404 | hgate_sample <- function(gate_vector, level, size = 1000, method = "prop") {
 405 |     ## Where gate_vector is the vector of clusters and level the population of interest)
 406 |     subsample <- rep(FALSE, length(gate_vector))
 407 |                                         # multi-class methods
 408 |     if (method == "ceil") {
 409 |         if (!missing(level))
 410 |             warning(sprintf("level is ignored when method is %s", method))
 411 |         for (level in unique(gate_vector)) {
 412 |             if (is.na(level)) next()
 413 |             nna_pop <- !is.na(gate_vector)
 414 |             pos_pop <- nna_pop & (gate_vector==level)
 415 |             sum_pos <- sum(pos_pop, na.rm = TRUE)
 416 |             if (sum_pos <= size) {
 417 |                 subsample[pos_pop] = TRUE
 418 |             } else {
 419 |                 subsample[pos_pop][sample.int(sum_pos, size)] = TRUE
 420 |             }
 421 |         }
 422 |         return(subsample)
 423 |     }
 424 |                                         # pos vs neg methods
 425 |     nna_pop <- !is.na(gate_vector)
 426 |     pos_pop <- nna_pop & (gate_vector==level)
 427 |     sum_pos <- sum(pos_pop)
 428 |                                         # downsample positive population
 429 |     if (sum_pos <= size) {
 430 |         subsample[pos_pop] = TRUE
 431 |         pos_size = sum_pos
 432 |     } else {
 433 |         subsample[pos_pop][sample.int(sum_pos, size)] = TRUE
 434 |         pos_size = size
 435 |     }
 436 |                                         # downsample positive population
 437 |     sum_neg <- sum(nna_pop) - sum_pos
 438 |     if (method == "prop") {
 439 |         neg_size = round(pos_size / sum_pos * sum_neg)
 440 |     } else if (method == "10x") {
 441 |         neg_size = 10 * pos_size
 442 |     } else {
 443 |         stop(sprintf("method \"\" is not implemented.", method))
 444 |     }
 445 |     neg_pop <- nna_pop & (gate_vector!=level)
 446 |     if (neg_size < sum_neg) {
 447 |         idx <- sample.int(sum_neg, neg_size)
 448 |         subsample[neg_pop][idx] <- TRUE
 449 |     } else {
 450 |         subsample[neg_pop] <- TRUE
 451 |     }
 452 |     subsample
 453 | }
 454 | 
 455 | #' @title subset_matrix_hg
 456 | #' @description Returns a boolean vector whose TRUE elements correspond to events inside the hyperrectangle
 457 | #' @param gate a return from hypergate
 458 | #' @param xp Expression matrix used for gate
 459 | #' @examples
 460 | #' data(Samusik_01_subset)
 461 | #' xp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels]
 462 | #' gate_vector=Samusik_01_subset$labels
 463 | #' hg=hypergate(xp=xp,gate_vector=gate_vector,level=23,delta_add=0.01)
 464 | #' gating_state=subset_matrix_hg(hg,xp)
 465 | #' gating_state=ifelse(gating_state,"Gated in","Gated out")
 466 | #' target=ifelse(gate_vector==23,"Target events","Others")
 467 | #' table(gating_state,target)
 468 | #' @export
 469 | subset_matrix_hg<-function(gate,xp){
 470 |     if(length(gate$active_channels)>0){
 471 |         state=rep(TRUE,nrow(xp))
 472 |         pars=tail(gate$pars.history,1)[1,]
 473 |         for(chan in gate$active_channels){
 474 |             chan_real=substr(chan,1,nchar(chan)-4)
 475 |             if(substr(chan,nchar(chan)-3,nchar(chan))=="_min"){
 476 |                 state[state][xp[state,chan_real]<pars[chan]]=FALSE
 477 |             } else {
 478 |                 state[state][xp[state,chan_real]>pars[chan]]=FALSE
 479 |             }        
 480 |         }
 481 |         return(state)
 482 |     } else {
 483 |         return(rep(TRUE,nrow(xp)))
 484 |     }
 485 | }
 486 | 
 487 | #' @title contract
 488 | #' @description Test (some) possible contractions of the hyperrectangle
 489 | #' @param par Current parametrization of the hyperrectangle
 490 | #' @param xp_pos Expression matrix for positive events
 491 | #' @param state_pos State vector of the positive events
 492 | #' @param xp_neg Expression matrix for negative events
 493 | #' @param state_neg State vector of the negative events
 494 | #' @param n passed to f
 495 | #' @param TP integer: current number of TP
 496 | #' @param TN integer: current number of TN
 497 | #' @param beta Passed from the top-level function
 498 | #' @param envir Current environment of the optimization
 499 | contract<-function(
 500 |                    par=par,
 501 |                    xp_pos=envir$xp_pos,
 502 |                    state_pos=envir$state_pos,
 503 |                    xp_neg=envir$xp_neg,
 504 |                    state_neg=envir$state_neg,
 505 |                    n=envir$n,
 506 |                    TP=envir$TP,
 507 |                    TN=envir$TN,
 508 |                    beta=envir$beta2,
 509 |                    envir=parent.frame()
 510 |                    ){
 511 |     ##Evaluate what happens when we increase (strictly) the parameter to fit a TP
 512 |     TP_par=sort(xp_pos[state_pos,par]) ##Sorted values, possibly duplicates
 513 |     FP_par=sort(xp_neg[state_neg,par]) ##Sorted values, possibly duplicates
 514 | 
 515 |     cc=unique(TP_par) #Contraction candidates.
 516 |     if(length(cc)<1){
 517 |         return(
 518 |             list(
 519 |                 newF=envir$f_current,
 520 |                 par_best=setNames(min(envir$hg.env$xp[,par]),par),
 521 |                 flag_add=FALSE,
 522 |                 dTP_max=0,
 523 |                 dTN_max=0
 524 |             )
 525 |         )
 526 |     }
 527 |     i_TP=1L ##Iterator for TP vector
 528 |     i_FP=1L ##Iterator for FP vector
 529 |     dTP=0L ##Keep track of changes in TP for every TP cutoff value
 530 |     dTN=0L ##Keep track of changes in TN for every TP cutoff value
 531 | 
 532 |     ncc=length(cc)
 533 |     dTPvec=rep(0L,ncc)
 534 |     dTNvec=rep(0L,ncc)
 535 |     j=1L
 536 |     for(level in cc){
 537 |         ##How many TP we lose if we cut just below this level
 538 |         while(TP_par[i_TP]<level&i_TP<=length(TP_par)){
 539 |             dTP=dTP-1L
 540 |             i_TP=i_TP+1L
 541 |         }
 542 |         ##How many TN we gain if we cut just below this level
 543 |         while(FP_par[i_FP]<level&i_FP<=length(FP_par)){
 544 |             dTN=dTN+1L
 545 |             i_FP=i_FP+1L
 546 |         }
 547 | 
 548 |         dTPvec[j]=dTP
 549 |         dTNvec[j]=dTN
 550 |         j=j+1L
 551 |     }
 552 |     
 553 |     newF=f(rep(TP,ncc)+dTPvec,rep(TN,ncc)+dTNvec,n,beta2=beta)
 554 |     
 555 |     w=which.max(newF)
 556 |     
 557 |     list(
 558 |         newF=newF[w],
 559 |         par_best=setNames(cc[w],par),
 560 |         flag_add=FALSE,
 561 |         dTP_max=dTPvec[w],
 562 |         dTN_max=dTNvec[w]
 563 |     )
 564 | }
 565 | 
 566 | ##From current state and update parameters, returns updated state
 567 | #' @title contract.update
 568 | #' @description Update the hyperrectangle to the best contraction move found
 569 | #' @param contract_object output of the contract function
 570 | #' @param pars Current parametrization of the hyperrectangle
 571 | #' @param active_channels vector of currently-used parameters
 572 | #' @param b_pos boolean matrix of positive events
 573 | #' @param b_neg boolean matrix of negative events
 574 | #' @param xp_pos Expression matrix for positive events
 575 | #' @param state_pos State vector of the positive events
 576 | #' @param xp_neg Expression matrix for negative events
 577 | #' @param state_neg State vector of the negative events
 578 | #' @param envir Current environment of the optimization
 579 | #' @param TP integer: current number of TP
 580 | #' @param TN integer: current number of TN
 581 | contract.update<-function(
 582 |                           contract_object,
 583 |                           pars=envir$pars,
 584 |                           active_channels=envir$active_channels,
 585 |                           b_pos=envir$b_pos,
 586 |                           b_neg=envir$b_neg,
 587 |                           state_pos=envir$state_pos,
 588 |                           state_neg=envir$state_neg,
 589 |                           TN=envir$TN,
 590 |                           TP=envir$TP,
 591 |                           xp_pos=envir$xp_pos,
 592 |                           xp_neg=envir$xp_neg,
 593 |                           envir=parent.frame()
 594 |                           ){
 595 |     pars[names(contract_object$par_best)]=contract_object$par_best
 596 |     if(!names(contract_object$par_best)%in%active_channels){
 597 |         active_channels=c(active_channels,names(contract_object$par_best))
 598 |     }
 599 | 
 600 |     ##Values in B that may be affected (previously TRUE)
 601 |     w_pos=b_pos[,names(contract_object$par_best)]
 602 |     w_neg=b_neg[,names(contract_object$par_best)]
 603 | 
 604 |     ##Those that are indeed affected in B
 605 |     w_state_pos=xp_pos[w_pos,names(contract_object$par_best)]<contract_object$par_best
 606 |     w_state_neg=xp_neg[w_neg,names(contract_object$par_best)]<contract_object$par_best
 607 | 
 608 |     ##We update the state
 609 |     state_pos[w_pos][w_state_pos]=FALSE
 610 |     state_neg[w_neg][w_state_neg]=FALSE
 611 | 
 612 |     ##We perform the changes in B
 613 |     b_pos[w_pos,names(contract_object$par_best)][w_state_pos]=FALSE
 614 |     b_neg[w_neg,names(contract_object$par_best)][w_state_neg]=FALSE
 615 | 
 616 |     list(
 617 |         pars=pars,
 618 |         active_channels=active_channels,
 619 |         state_pos=state_pos,
 620 |         state_neg=state_neg,
 621 |         b_pos=b_pos,
 622 |         b_neg=b_neg,
 623 |         TP=TP+contract_object$dTP,
 624 |         TN=TN+contract_object$dTN
 625 |     )
 626 | }
 627 | 
 628 | #' @title expand
 629 | #' @description Test (some) possible expansions of the hyperrectangle
 630 | #' @param n passed to f
 631 | #' @param TP integer: current number of TP
 632 | #' @param TN integer: current number of TN
 633 | #' @param FN integer: current number of FP
 634 | #' @param beta Passed from the top-level function 
 635 | #' @param envir Coreloop environment
 636 | #' @param FNTN_matrix Boolean matrix of dim (FN, FN + TN), where Mij is TRUE if and only if expanding to include the ith FN in the gate would lead to the inclusion of the jth column event
 637 | expand<-function(
 638 |                  FN=envir$FN,
 639 |                  FNTN_matrix=envir$FNTN_matrix,
 640 |                  TP=envir$TP,
 641 |                  TN=envir$TN,
 642 |                  n=envir$n,
 643 |                  beta=envir$beta2,
 644 |                  envir=parent.frame()
 645 |                  ){
 646 |     ##Expanding only concerns events that are currently !state. Events for which "state" is positive are guaranteed to stay "state==TRUE".
 647 |     ##FNTN_matrix tries to create expansion that match a given event, and flag !state events that will be included along with it
 648 | 
 649 |     dTP=colSums(FNTN_matrix[1:FN,1:FN,drop=FALSE])
 650 |     dTN=-colSums(FNTN_matrix[(FN+1):nrow(FNTN_matrix),1:FN,drop=FALSE])
 651 |     TP.vector=rep(TP,FN)
 652 |     TN.vector=rep(TN,FN)
 653 |     n.vector=rep(n,FN)
 654 |     dF_expansion=f(TP.vector+dTP,TN.vector+dTN,n.vector,beta2=beta)
 655 |     w=which.max(dF_expansion)
 656 |     newF=dF_expansion[w]
 657 |     list(
 658 |         newF=newF,
 659 |         dTP_max=dTP[w],
 660 |         dTN_max=dTN[w],
 661 |         which_expansion=w,
 662 |         FN=FN,
 663 |         FNTN_matrix=FNTN_matrix
 664 |     )
 665 | }
 666 | 
 667 | #' @title expand.update
 668 | #' @description Update the hyperrectangle to the best expansion move found
 669 | #' @param expand.object output of the expand function
 670 | #' @param pars Current parametrization of the hyperrectangle
 671 | #' @param xp_pos Expression matrix for positive events
 672 | #' @param xp_neg Expression matrix for negative events
 673 | #' @param state_pos State vector of the positive events
 674 | #' @param state_neg State vector of the negative events
 675 | #' @param b_pos boolean matrix of positive events
 676 | #' @param b_neg boolean matrix of negative events
 677 | #' @param n passed to f
 678 | #' @param TP integer: current number of TP
 679 | #' @param TN integer: current number of TN
 680 | #' @param envir Current environment of the optimization
 681 | expand.update<-function(
 682 |                         expand.object,
 683 |                         pars=envir$pars,
 684 |                         xp_pos=envir$xp_pos,
 685 |                         xp_neg=envir$xp_neg,
 686 |                         state_pos=envir$state_pos,
 687 |                         state_neg=envir$state_neg,
 688 |                         b_pos=envir$b_pos,
 689 |                         b_neg=envir$b_neg,
 690 |                         n=envir$n,
 691 |                         TP=envir$TP,
 692 |                         TN=envir$TN,
 693 |                         envir=parent.frame()
 694 |                         ){
 695 | 
 696 |     f_best=expand.object$newF
 697 |     expansion_parameters=setNames(xp_pos[!state_pos,,drop=FALSE][expand.object$which_expansion,],names(pars))
 698 |     affected_expansion_parameters=expansion_parameters<pars
 699 |     par_best=expansion_parameters[affected_expansion_parameters]
 700 | 
 701 |     pars[names(par_best)]=par_best
 702 | 
 703 |     ##For recycling
 704 |     B_FN_old=b_pos
 705 |     B_TN_old=b_neg
 706 |     
 707 |     for(p in which(affected_expansion_parameters)){
 708 |         b_pos[,p]=xp_pos[,p]>=pars[p]
 709 |         b_neg[,p]=xp_neg[,p]>=pars[p]
 710 |     }
 711 |     
 712 |     w_FN=expand.object$FNTN_matrix[1:expand.object$FN,expand.object$which_expansion]
 713 |     w_TN=expand.object$FNTN_matrix[(expand.object$FN+1):nrow(expand.object$FNTN_matrix),expand.object$which_expansion]
 714 |     
 715 |     state_pos[!state_pos][w_FN]=TRUE ##FN converted to TP
 716 |     state_neg[!state_neg][w_TN]=TRUE ##TN converted to FP
 717 | 
 718 |     ##For recycling
 719 |     B_FN_old=B_FN_old[!state_pos,,drop=FALSE]
 720 |     B_TN_old=B_TN_old[!state_neg,,drop=FALSE]
 721 |     B_FN_new=b_pos[!state_pos,,drop=FALSE]
 722 |     B_TN_new=b_neg[!state_neg,,drop=FALSE]
 723 |     xp_FN=xp_pos[!state_pos,,drop=FALSE]
 724 |     xp_TN=xp_neg[!state_neg,,drop=FALSE]
 725 |     
 726 |     TN=TN+expand.object$dTN_max
 727 |     TP=TP+expand.object$dTP_max
 728 | 
 729 |     ##f_current=f(TP,TN,n,beta2=beta)
 730 | 
 731 |     flag_expansion=TP<length(state_pos) ##If the expansion included all positive events, cannot try to expand more. Otherwise it's TRUE (so we try expanding again)
 732 | 
 733 |     FNTN_matrix=FNTN_matrix.recycle(
 734 |         expand.object$FNTN_matrix[!c(w_FN,w_TN),!w_FN,drop=FALSE],
 735 |         B_FN_old,
 736 |         B_TN_old,
 737 |         B_FN_new,
 738 |         B_TN_new,
 739 |         xp_FN,
 740 |         xp_TN,
 741 |         pars[envir$active_channels]
 742 |     )
 743 |     ## ##TMP failcheck
 744 |     ## if(TN!=sum(!state_neg)){
 745 |     ##     stop("Problem TN mismatch")
 746 |     ## }
 747 |     ## if(TP!=sum(state_pos)){
 748 |     ##     stop("Problem TP mismatch")
 749 |     ## }
 750 |     ## if(f_current!=f_best){
 751 |     ##     stop("f_current != f_best in contraction")
 752 |     ## }
 753 |     ## if(any(state_pos!=(rowSums(b_pos)==ncol(b_pos)))){
 754 |     ##     stop("Mismatch state_pos b_pos")
 755 |     ## }
 756 |     ## if(any(state_neg!=(rowSums(b_neg)==ncol(b_neg)))){
 757 |     ##     stop("Mismatch state_neg b_neg")
 758 |     ## }
 759 | 
 760 |     return(
 761 |         list(
 762 |             pars=pars,
 763 |             state_pos=state_pos,
 764 |             state_neg=state_neg,
 765 |             b_pos=b_pos,
 766 |             b_neg=b_neg,
 767 |             flag_expansion=flag_expansion,
 768 |             TN=TN,
 769 |             TP=TP,
 770 |             par_best=par_best,
 771 |             FNTN_matrix=FNTN_matrix
 772 |         )
 773 |     )
 774 | }
 775 | 
 776 | #' @title FNTN_matrix.recycle
 777 | #' @description Recycle an expansion matrix
 778 | #' @param FNTN_matrix Expansion matrix to recycle
 779 | #' @param xp_FN Expression matrix of False Negative events
 780 | #' @param xp_TN Expression matrix of True Negative events
 781 | #' @param B_FN_old Boolean matrix of FN events before the last expansion
 782 | #' @param B_TN_old Boolean matrix of TN events before the last expansion
 783 | #' @param B_FN_new Boolean matrix of FN events after the last expansion
 784 | #' @param B_TN_new Boolean matrix of TN events after the last expansion
 785 | #' @param par Current hyper-rectangle parametrization
 786 | FNTN_matrix.recycle<-function(
 787 |                               FNTN_matrix, ##element to recycle
 788 |                               B_FN_old,
 789 |                               B_TN_old,
 790 |                               B_FN_new,
 791 |                               B_TN_new,
 792 |                               xp_FN,
 793 |                               xp_TN,
 794 |                               par
 795 |                               ){
 796 |     
 797 |     ##Identify elements that changed
 798 |     ##w_FN_changed=rowSums(xor(B_FN_old,B_FN_new))>0
 799 |     w_FN_changed=rep(TRUE,ncol(FNTN_matrix)) ##We have to include every FN whether they changed or not
 800 |     w_TN_changed=rowSums(xor(B_TN_old[,names(par)],B_TN_new[,names(par)]))>0
 801 | 
 802 |     if(any(w_FN_changed)){
 803 |         FNTN_matrix[c(w_FN_changed,w_TN_changed),w_FN_changed]=fill_FNTN_matrix(xp_FN[w_FN_changed,names(par),drop=FALSE],xp_TN[w_TN_changed,names(par),drop=FALSE],B_FN_new[w_FN_changed,names(par),drop=FALSE],B_TN_new[w_TN_changed,names(par),drop=FALSE],par)
 804 |     }
 805 |     FNTN_matrix
 806 | }
 807 | 
 808 | #' @title coreloop
 809 | #' @param par Current parametrization of the hyperrectangle
 810 | #' @param hg.env Environment where the main execution of hypergate takes place
 811 | #' @description Core optimization loop of hypergate
 812 | coreloop<-function(par,hg.env=hg.env$hg.env){
 813 |     loop.env=environment()
 814 |     pars=hg.env$par
 815 |     active_channels=hg.env$active_channels
 816 |     TP=hg.env$TP
 817 |     state_pos=hg.env$state_pos
 818 |     state_neg=hg.env$state_neg
 819 |     b_pos=hg.env$b_pos
 820 |     b_neg=hg.env$b_neg
 821 |     
 822 |     
 823 |     ##Step 1 ADD NEW CHANNEL
 824 |     contractions=contract(loop.env$par,envir=loop.env$hg.env)
 825 |     ##best_contraction=which.max(sapply(contractions,function(x)x$newF))
 826 |     if(contractions$newF>loop.env$hg.env$f_current){
 827 |         contraction=contract.update(contractions,envir=loop.env$hg.env)
 828 |         
 829 |         sapply(names(contraction),function(x)assign(x,contraction[[x]],envir=loop.env))
 830 |         f_current=contractions$newF
 831 |         
 832 |         ##Update add channel
 833 |         if(hg.env$verbose){
 834 |             print(paste("Trying to add",names(contractions$par_best),":",contractions$par_best,",F=",signif(f_current,4L)))
 835 |         }
 836 |         f_res=c(loop.env$hg.env$f_res,f_current)
 837 |         pars.history.rank=rbind(loop.env$hg.env$pars.history.rank,pars)
 838 |         flag_expansion=TRUE
 839 |         flag_contraction=TRUE
 840 |     } else {
 841 |         if(hg.env$verbose){
 842 |             print("No improvement")
 843 |         }
 844 |         assign("f_current",hg.env$f_current,envir=loop.env)
 845 |         return(loop.env)
 846 |     }
 847 |     
 848 |     ##Once we have added a channel
 849 |     ##We expand as much as possible, and then try to contract
 850 |     ##We stop only if we haven't done anything (so no contraction or expansion is possible)
 851 |     while(flag_expansion|flag_contraction){
 852 |         ##EXPANSION
 853 |         flag_expansion=length(active_channels)>1&TP<length(state_pos) ##We only try to expand if we already have two channels... and its not possible if we don't have a FN
 854 |         flag_contraction=length(active_channels)>1
 855 |         flag_recycling=FALSE
 856 |         while(flag_expansion){
 857 |             FN=length(state_pos)-TP
 858 |             ##Expanding only concerns events that are currently !state. Events for which "state" is positive are guaranteed to stay "state==TRUE".
 859 |             ##FNTN_matrix tries to create expansion that match a given event, and flag !state events that will be included along with it
 860 |             if(!flag_recycling){
 861 |                 FNTN_matrix=fill_FNTN_matrix(loop.env$hg.env$xp_pos[!state_pos,active_channels,drop=FALSE],loop.env$hg.env$xp_neg[!state_neg,active_channels,drop=FALSE],b_pos[!state_pos,active_channels,drop=FALSE],b_neg[!state_neg,active_channels,drop=FALSE],pars[active_channels])
 862 |             } else {
 863 |                 FNTN_matrix=expansion$FNTN_matrix
 864 |                 ## FNTN_matrix.raw=fill_FNTN_matrix(xp_pos[!state_pos,active_channels,drop=FALSE],xp_neg[!state_neg,active_channels,drop=FALSE],b_pos[!state_pos,active_channels,drop=FALSE],b_neg[!state_neg,active_channels,drop=FALSE],pars[active_channels])
 865 |                 ## if(any(FNTN_matrix.raw!=expansion$FNTN_matrix)){
 866 |                 ##     recover()
 867 |                 ## }
 868 |             }
 869 |             expansions=expand(envir=loop.env,n=hg.env$n,beta=hg.env$beta2)
 870 |             if(expansions$newF>f_current){
 871 |                 which_expansion=expansions$which_expansion
 872 |                 expansion=expand.update(expansions,envir=loop.env,xp_pos=loop.env$hg.env$xp_pos,xp_neg=loop.env$hg.env$xp_neg)
 873 |                 flag_expansion=TP<length(state_pos) ##If the expansion included all positive events, cannot try to expand more.
 874 |                 sapply(names(expansion),function(x)assign(x,expansion[[x]],envir=loop.env))
 875 |                 f_current=expansions$newF
 876 |                 f_res=c(f_res,f_current)
 877 |                 pars.history.rank=rbind(pars.history.rank,pars)
 878 |                 
 879 |                 if(hg.env$verbose){
 880 |                     print(paste("Expansion of",paste(names(expansion$par_best),":",expansion$par_best,", ",collapse=""),"F=",signif(f_current,4)))
 881 |                 }
 882 |                 flag_recycling=TRUE
 883 |             } else {
 884 |                 flag_expansion=FALSE
 885 |             }
 886 |         }
 887 | 
 888 |         ##CONTRACTION
 889 |         contractions=sapply(active_channels,contract,simplify=FALSE,envir=loop.env,xp_pos=hg.env$xp_pos,xp_neg=hg.env$xp_neg,n=hg.env$n,beta=hg.env$beta2)
 890 |         best_contraction=which.max(sapply(contractions,function(x)x$newF))
 891 |         if(contractions[[best_contraction]]$newF>f_current){
 892 |             contraction=contract.update(contractions[[best_contraction]],envir=loop.env,xp_pos=hg.env$xp_pos,xp_neg=hg.env$xp_neg)
 893 |             sapply(names(contraction),function(x)assign(x,contraction[[x]],envir=loop.env))
 894 |             f_current=contractions[[best_contraction]]$newF
 895 |             if(hg.env$verbose){
 896 |                 print(paste("Contracting ",names(contractions[[best_contraction]]$par_best),":",contractions[[best_contraction]]$par_best,",F=",signif(f_current,4L)))
 897 |             }
 898 |             f_res=c(f_res,f_current)
 899 |             pars.history.rank=rbind(pars.history.rank,pars)
 900 |             flag_contraction=TRUE
 901 |         } else {
 902 |             flag_contraction=FALSE
 903 |         }
 904 |     }
 905 |     return(loop.env)
 906 | }
 907 | 
 908 | #' @title hypergate
 909 | #' @description Finds a hyperrectangle gating around a population of interest
 910 | #' @param xp an Expression matrix
 911 | #' @param gate_vector A Categorical vector of length nrow(xp)
 912 | #' @param level A level of gate_vector so that gate_vector == level will produce a boolean vector identifying events of interest
 913 | #' @param delta_add If the increase in F after an optimization loop is lower than delta_add, the optimization will stop (may save computation time)
 914 | #' @param beta Purity / Yield trade-off
 915 | #' @param verbose Boolean. Whether to print information about the optimization status.
 916 | #' @examples
 917 | #' data(Samusik_01_subset)
 918 | #' xp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels]
 919 | #' gate_vector=Samusik_01_subset$labels
 920 | #' hg=hypergate(xp=xp,gate_vector=gate_vector,level=23,delta_add=0.01)
 921 | #' @seealso \code{\link{channels_contributions}} for ranking parameters within the output, \code{\link{reoptimize_strategy}} for reoptimizing a output on a subset of the markers, \code{\link{plot_gating_strategy}} for plotting an output, \code{\link{subset_matrix_hg}} to apply the output to another input matrix, \code{\link{boolmat}} to obtain a boolean matrix stating which events are filtered out because of which markers
 922 | #' @export
 923 | 
 924 | hypergate<-function(xp,gate_vector,level,delta_add=0,beta=1,verbose=FALSE){
 925 |     beta2=beta^2
 926 |     if(is.null(rownames(xp))){
 927 |         rownames(xp)=1:nrow(xp)
 928 |     }
 929 |     r=setNames(rownames(xp),1:nrow(xp))
 930 |     rownames(xp)=1:nrow(xp)
 931 | 
 932 |     xp_src=xp
 933 | 
 934 |     ##Sorting values so that cutoffs are very easy to compute. Rank = 1 is small, rank = n is big
 935 |     xp=apply(xp,2,rank,ties.method="min")
 936 |     rownames(xp)=1:nrow(xp)
 937 | 
 938 |     ##Each parameter is duplicated (for upper and lower cutoffs)
 939 |     ##n=nrow(xp)
 940 |     colnames=paste(rep(colnames(xp),each=2),c("min","max"),sep="_")
 941 | 
 942 |     xp.2=matrix(nrow=nrow(xp),ncol=2*ncol(xp),data=0,dimnames=list(rownames(xp),colnames))
 943 |     storage.mode(xp.2)="integer"
 944 | 
 945 |     ##For both mins (odd columns) and maxs (even columns), make sure than low rank = "extreme" values (likely to pop). We can then use min to select
 946 |     xp.2[,seq(1,ncol(xp.2),by=2)]=xp
 947 |     xp.2[,seq(2,ncol(xp.2),by=2)]=nrow(xp)-xp
 948 |     xp=xp.2
 949 |     rm(xp.2)
 950 | 
 951 |     truce=gate_vector==level
 952 |     xp_pos=xp[truce,,drop=FALSE] ##xp for positive elements
 953 |     state_pos=rep(T,nrow(xp_pos)) ##State vector (event selected or not in current gate)
 954 |     b_pos=matrix(nrow=nrow(xp_pos),ncol=ncol(xp_pos),dimnames=dimnames(xp_pos),data=TRUE) ##State vector for every channel. Keeps track of whether the event is above the cut-off value for this parameter (parameter = (channel,upper/lower bound)
 955 | 
 956 |     xp_neg=xp[!truce,,drop=FALSE] ##xp for negative elements
 957 |     state_neg=rep(T,nrow(xp_neg))
 958 |     b_neg=matrix(nrow=nrow(xp_neg),ncol=ncol(xp_neg),dimnames=dimnames(xp_neg),data=TRUE)
 959 | 
 960 |     ##Number of TN and TP to compute F
 961 |     TN=sum(!state_neg)
 962 |     TP=sum(state_pos)
 963 |     n=beta2*length(state_pos)+length(state_neg) ##This is only used in the function that computes F_beta.
 964 |     
 965 |     pars=setNames(as.integer(apply(xp,2,min)),colnames(xp)) ##Rank-parameters initialization
 966 |     pars.history.rank=matrix(nrow=1,ncol=length(pars),dimnames=list("0",names(pars)),data=pars) ##For reporting purposes we keep track of every optimization step
 967 |     storage.mode(pars.history.rank)="integer"
 968 | 
 969 |     active_channels=character()
 970 | 
 971 |     ##Optimization loop
 972 | 
 973 |     ##f value
 974 |     f_current=0 ##This is wrong but the optimization sometimes gets stuck really early if set otherwise (i.e. big cluster will naturally have "high" F if all==TRUE)
 975 |     f_res=f_current ##Just for reporting purposes
 976 | 
 977 |     hg.env=environment()
 978 | 
 979 |     while(length(active_channels)<=ncol(xp)){ ##This condition just makes sure that we can't add more channels than possible. The true ending condition is at the very end of the loop and enforced using "break"
 980 |         f_previous=f_current
 981 | 
 982 |         ##Core loop
 983 |         cycle=new.env()
 984 |         assign("f_current",f_current,envir=cycle)
 985 |         for(par in setdiff(colnames(xp),active_channels)){
 986 |             current_cycle=coreloop(par,hg.env=hg.env)
 987 |             if(current_cycle$f_current>cycle$f_current){
 988 |                 cycle=current_cycle
 989 |                 rm(current_cycle)
 990 |             }
 991 |         }
 992 |         
 993 |         if(cycle$f_current==f_previous){
 994 |             if(verbose){
 995 |                 print("Found no channel to add. Exiting")
 996 |             }
 997 |             break
 998 |         }
 999 |         
1000 |         sapply(ls(envir=cycle),function(x){
1001 |             assign(x,envir=hg.env,value=get(x,envir=cycle))
1002 |         })
1003 |         
1004 |         ##Ending condition: last channel did not bring much.
1005 |         if((f_current-f_previous)<delta_add){
1006 |             if(verbose){
1007 |                 print("Increase in f lower than delta_add, exiting")
1008 |             }
1009 |             if(length(active_channels)>1){ ##Unless its the only channel
1010 |                 pop=which(pars.history.rank[,tail(active_channels,1)]!=pars.history.rank[1,tail(active_channels,1)])[1] ##Flag history from when we added this last channel...
1011 |                 pars.history.rank=pars.history.rank[1:(pop-1),] ##... and remove it
1012 |                 f_res=f_res[1:(pop-1)]
1013 |                 active_channels=active_channels[-length(active_channels)]
1014 |             }
1015 |             break
1016 |         }
1017 |     }
1018 | 
1019 |     ##RETURN
1020 |     pars.history=pars.history.rank
1021 |     pars.history=matrix(nrow=nrow(pars.history.rank),ncol=ncol(pars.history.rank),data=NA,dimnames=dimnames(pars.history.rank))
1022 |     for(j in 1:ncol(pars.history)){
1023 |         pars.history[,j]=xp_src[match(pars.history.rank[,j],xp[,j]),j%%2+j%/%2]
1024 |     }
1025 |     return(list(pars.history=pars.history,pars.history.rank=pars.history.rank,f=f_res,active_channels=active_channels))
1026 | }
1027 | 
1028 | #' @title channels_contributions
1029 | #' @description Gives scores for the contribution of individual channels to a gating strategy
1030 | #' @param gate A return from hypergate
1031 | #' @param xp Expression matrix as in the hypergate call
1032 | #' @param gate_vector Categorical vector of length nrow(xp)
1033 | #' @param level A level of gate_vector that identifies the population of interest
1034 | #' @param beta, should be the same as for the hypergate object
1035 | #' @examples
1036 | #' data(Samusik_01_subset)
1037 | #' xp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels]
1038 | #' gate_vector=Samusik_01_subset$labels
1039 | #' hg=hypergate(xp=xp,gate_vector=gate_vector,level=23,delta_add=0)
1040 | #' contribs=channels_contributions(gate=hg,xp=xp,gate_vector=gate_vector,level=23,beta=1)
1041 | #' contribs
1042 | #' @export
1043 | channels_contributions<-function(gate,xp,gate_vector,level,beta=1){
1044 |     truce=gate_vector==level
1045 |     F_beta(subset_matrix_hg(gate,xp),truce,beta)-sapply(gate$active_channels,function(chan){
1046 |         subchans=setdiff(gate$active_channels,chan)
1047 |         gate$active_channels=subchans
1048 |         pred=subset_matrix_hg(gate,xp)
1049 |         F_beta(pred,truce,beta)
1050 |     })
1051 | }
1052 | 
1053 | #' @title boolmat
1054 | #' @description Convert an expression matrix and a gating strategy to a boolean matrix (whether each event is gated out by each channel)
1055 | #' @param gate A return from hypergate
1056 | #' @param xp Expression matrix as in the hypergate callxp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels]
1057 | #' @examples
1058 | #' data(Samusik_01_subset)
1059 | #' xp=Samusik_01_subset$xp_src
1060 | #' gate_vector=Samusik_01_subset$labels
1061 | #' hg=hypergate(xp=xp,gate_vector=gate_vector,level=23,delta_add=0.01)
1062 | #' head(boolmat(hg,xp))
1063 | #' @export
1064 | boolmat<-function(gate,xp){
1065 |     chans=gate$active_channels
1066 |     if(length(chans)>0){
1067 |         cols=colnames(xp)
1068 |         signs=rep(0,2*ncol(xp))
1069 |         signs[2*which(paste(cols,"_min",sep="")%in%chans)-1]=1
1070 |         signs[2*which(paste(cols,"_max",sep="")%in%chans)]=-1
1071 |         ui=matrix(nrow=2*ncol(xp),ncol=ncol(xp),data=0)
1072 |         for(i in 1:length(signs)){
1073 |             ui[i,ceiling(i/2)]=signs[i]
1074 |         }
1075 |         ci=tail(gate$pars.history,1)[1,]*-signs
1076 |         ci=matrix(nrow=ncol(xp)*2,ncol=nrow(xp),data=rep(ci,nrow(xp)),byrow=FALSE)
1077 |         res=t((ui%*%t(xp)+ci)>=0)
1078 |         colnames(res)=colnames(gate$pars.history)
1079 |         return(res[,chans])
1080 |     }
1081 | }
1082 | 
1083 | #' @title reoptimize_strategy
1084 | #' @description Optimize a gating strategy given a manual selection of channels
1085 | #' @param gate A return from hypergate
1086 | #' @param channels_subset Character vector identifying the channels that will be retained (others are ignored). The form is e.g. c("CD4_min","CD8_max")
1087 | #' @param xp Expression matrix as in the hypergate call
1088 | #' @param gate_vector Categorical vector as in the hypergate call
1089 | #' @param level Level of gate_vector identifying the population of interest
1090 | #' @param beta Yield / purity trade-off
1091 | #' @param verbose Whether to print information about optimization status
1092 | #' @examples
1093 | #' data(Samusik_01_subset)
1094 | #' xp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels]
1095 | #' gate_vector=Samusik_01_subset$labels
1096 | #' hg=hypergate(xp=xp,gate_vector=gate_vector,level=23,delta_add=0)
1097 | #' contribs=channels_contributions(gate=hg,xp=xp,gate_vector=gate_vector,level=23,beta=1)
1098 | #' significant_channels=names(contribs)[contribs>=0.01]
1099 | #' hg_reoptimized=reoptimize_strategy(gate=hg,channels_subset=significant_channels,xp,gate_vector,23)
1100 | #' @export
1101 | reoptimize_strategy<-function(gate,channels_subset,xp,gate_vector,level,beta=1,verbose=FALSE){
1102 |     beta2=beta^2
1103 |     gate$active_channels=channels_subset
1104 |     if(is.null(rownames(xp))){
1105 |         rownames(xp)=1:nrow(xp)
1106 |     }
1107 |     r=setNames(rownames(xp),1:nrow(xp))
1108 |     rownames(xp)=1:nrow(xp)
1109 |     
1110 |     xp_src=xp
1111 |     
1112 |     ##Sorting values so that cutoffs are very easy to compute. Rank = 1 is small, rank = n is big
1113 |     xp=apply(xp,2,rank,ties.method="min")
1114 |     rownames(xp)=1:nrow(xp)
1115 | 
1116 |     ##Each parameter is duplicated (for upper and lower cutoffs)
1117 |     ##n=nrow(xp)
1118 |     colnames=paste(rep(colnames(xp),each=2),c("min","max"),sep="_")
1119 |     
1120 |     xp.2=matrix(nrow=nrow(xp),ncol=2*ncol(xp),data=0,dimnames=list(rownames(xp),colnames))
1121 |     storage.mode(xp.2)="integer"
1122 | 
1123 |     ##For both mins (odd columns) and maxs (even columns), make sure than low rank = "extreme" values (likely to pop). We can then use min to select
1124 |     xp.2[,seq(1,ncol(xp.2),by=2)]=xp
1125 |     xp.2[,seq(2,ncol(xp.2),by=2)]=nrow(xp)-xp
1126 |     xp=xp.2
1127 |     rm(xp.2)
1128 | 
1129 |     b_src=matrix(nrow=nrow(xp),ncol=ncol(xp),data=TRUE,dimnames=dimnames(xp))
1130 |     b_src.tmp=boolmat(gate,xp_src)
1131 |     b_src[,colnames(b_src.tmp)]=b_src.tmp
1132 |     rm(b_src.tmp)
1133 |     
1134 |     truce=gate_vector==level
1135 |     xp_pos=xp[truce,,drop=FALSE] ##xp for positive elements
1136 |     b_pos=b_src[truce,,drop=FALSE]
1137 |     state_pos=rowSums(b_pos)==ncol(b_pos)
1138 |     
1139 |     xp_neg=xp[!truce,,drop=FALSE] ##xp for negative elements
1140 |     b_neg=b_src[!truce,,drop=FALSE]
1141 |     state_neg=rowSums(b_neg)==ncol(b_neg)
1142 | 
1143 |     
1144 |     ##Number of TN and TP to compute F
1145 |     TN=sum(!state_neg)
1146 |     TP=sum(state_pos)
1147 |     n=beta2*length(state_pos)+length(state_neg) ##This is only used in the function that computes F_beta.
1148 | 
1149 |     pars.history=gate$pars.history
1150 |     pars.history.tmp=tail(pars.history,1)
1151 |     pars.history.tmp[,setdiff(colnames(pars.history.tmp),gate$active_channels)]=pars.history[1,setdiff(colnames(pars.history.tmp),gate$active_channels)]
1152 |     pars.history=pars.history.tmp
1153 |     rm(pars.history.tmp)
1154 | 
1155 |     pars.history.rank=gate$pars.history.rank
1156 |     pars.history.rank.tmp=tail(pars.history.rank,1)
1157 |     pars.history.rank.tmp[,setdiff(colnames(pars.history.rank.tmp),gate$active_channels)]=pars.history.rank[1,setdiff(colnames(pars.history.rank.tmp),gate$active_channels)]
1158 |     pars.history.rank=pars.history.rank.tmp
1159 |     rm(pars.history.rank.tmp)
1160 |     storage.mode(pars.history.rank)="integer"
1161 | 
1162 |     pars=pars.history.rank[1,]    
1163 |     active_channels=gate$active_channels
1164 | 
1165 |     ##Optimization loop   
1166 |     ##f value
1167 |     f_current=f(TP,TN,n,beta2)
1168 |     f_res=f_current
1169 |     
1170 |     hg.env=environment()
1171 |     loop.env=environment()
1172 | 
1173 |     flag_expansion=TRUE
1174 |     flag_contraction=TRUE
1175 |     while(flag_expansion|flag_contraction){
1176 |         ##EXPANSION
1177 |         flag_recycling=FALSE
1178 |         flag_expansion=length(state_pos)>TP
1179 |         while(flag_expansion){
1180 |             ##Expanding only concerns events that are currently !state. Events for which "state" is positive are guaranteed to stay "state==TRUE".
1181 |             ##FNTN_matrix tries to create expansion that match a given event, and flag !state events that will be included along with it
1182 |             FN=length(state_pos)-TP
1183 |             if(!flag_recycling){
1184 |                 FNTN_matrix=fill_FNTN_matrix(
1185 |                     loop.env$hg.env$xp_pos[!state_pos,active_channels,drop=FALSE],
1186 |                     loop.env$hg.env$xp_neg[!state_neg,active_channels,drop=FALSE],
1187 |                     b_pos[!state_pos,active_channels,drop=FALSE],
1188 |                     b_neg[!state_neg,active_channels,drop=FALSE],
1189 |                     pars[active_channels]
1190 |                 )
1191 |             } else {
1192 |                 FNTN_matrix=expansion$FNTN_matrix
1193 |             }
1194 |             expansions=expand(envir=loop.env,n=hg.env$n,beta=hg.env$beta2)
1195 |             if(expansions$newF>f_current){
1196 |                 which_expansion=expansions$which_expansion
1197 |                 expansion=expand.update(expansions,envir=loop.env,xp_pos=loop.env$hg.env$xp_pos,xp_neg=loop.env$hg.env$xp_neg)
1198 |                 FN=length(state_pos)-TP
1199 |                 flag_expansion=FN>0 ##If the expansion included all positive events, cannot try to expand more.
1200 |                 sapply(names(expansion),function(x)assign(x,expansion[[x]],envir=loop.env))
1201 |                 f_current=expansions$newF
1202 |                 f_res=c(f_res,f_current)
1203 |                 pars.history.rank=rbind(pars.history.rank,pars)
1204 |                 
1205 |                 if(hg.env$verbose){
1206 |                     print(paste("Expansion of",paste(names(expansion$par_best),":",expansion$par_best,", ",collapse=""),"F=",signif(f_current,4)))
1207 |                 }
1208 |                 flag_recycling=TRUE
1209 |             } else {
1210 |                 flag_expansion=FALSE
1211 |             }
1212 |         }
1213 | 
1214 |         ##CONTRACTION
1215 |         contractions=sapply(active_channels,contract,simplify=FALSE,envir=loop.env,xp_pos=hg.env$xp_pos,xp_neg=hg.env$xp_neg,n=hg.env$n,beta=hg.env$beta2)
1216 |         best_contraction=which.max(sapply(contractions,function(x)x$newF))
1217 |         if(contractions[[best_contraction]]$newF>f_current){
1218 |             contraction=contract.update(contractions[[best_contraction]],envir=loop.env,xp_pos=hg.env$xp_pos,xp_neg=hg.env$xp_neg)
1219 |             sapply(names(contraction),function(x)assign(x,contraction[[x]],envir=loop.env))
1220 |             f_current=contractions[[best_contraction]]$newF
1221 |             if(hg.env$verbose){
1222 |                 print(paste("Contracting ",names(contractions[[best_contraction]]$par_best),":",contractions[[best_contraction]]$par_best,",F=",signif(f_current,4L)))
1223 |             }
1224 |             f_res=c(f_res,f_current)
1225 |             pars.history.rank=rbind(pars.history.rank,pars)
1226 |             flag_contraction=TRUE
1227 |         } else {
1228 |             flag_contraction=FALSE
1229 |         }
1230 |     }
1231 | 
1232 |     ##RETURN
1233 |     pars.history=pars.history.rank
1234 |     pars.history=matrix(nrow=nrow(pars.history.rank),ncol=ncol(pars.history.rank),data=NA,dimnames=dimnames(pars.history.rank))
1235 |     for(j in 1:ncol(pars.history)){
1236 |         pars.history[,j]=xp_src[match(pars.history.rank[,j],xp[,j]),j%%2+j%/%2]
1237 |     }
1238 |     return(list(pars.history=pars.history,pars.history.rank=pars.history.rank,f=f_res,active_channels=active_channels))
1239 | }
1240 | 
1241 | #' @title F_beta
1242 | #' @description Compute a F_beta score comparing two boolean vectors
1243 | #' @param pred boolean vector of predicted values
1244 | #' @param truth boolean vector of true values
1245 | #' @param beta Weighting of yield as compared to precision. Increase beta so that the optimization favors yield, or decrease to favor purity.
1246 | #' @examples
1247 | #' data(Samusik_01_subset)
1248 | #' truth=c(rep(TRUE,40),rep(FALSE,60))
1249 | #' pred=rep(c(TRUE,FALSE),50)
1250 | #' table(pred,truth) ##40% purity, 50% yield
1251 | #' #' F_beta(pred=pred,truth=truth,beta=2) ##Closer to yield
1252 | #' F_beta(pred=pred,truth=truth,beta=1.5) ##Closer to yield
1253 | #' F_beta(pred=pred,truth=truth,beta=1) ##Harmonic mean
1254 | #' F_beta(pred=pred,truth=truth,beta=0.75) ##Closer to purity
1255 | #' F_beta(pred=pred,truth=truth,beta=0.5) ##Closer to purity
1256 | #' @export
1257 | 
1258 | F_beta=function(pred,truth,beta=1){
1259 |     TP=sum(truth&pred)
1260 |     if(TP==0){
1261 |         return(0)
1262 |     }
1263 |     FP=sum(!truth&pred)
1264 |     FN=sum(truth&!pred)
1265 |     yield=TP/(TP+FN) #aka recall
1266 |     purity=TP/(TP+FP) #aka precision
1267 |     if(is.nan(purity)|is.nan(yield)){
1268 |         return(0)
1269 |     }
1270 |     F=(1+beta^2)*(yield*purity)/(yield+beta^2*purity)
1271 |     F
1272 | }
1273 | 
1274 | #' @title gate_from_biplot
1275 | #' @description From a biplot let the user interactively draw polygons to create a "Gate" vector
1276 | #' @param matrix A matrix
1277 | #' @param x_axis character, colname of matrix used for x-axis in the biplot
1278 | #' @param y_axis character, colname of matrix used for y-axis in the biplot
1279 | #' @param sample Used to downsample the data in case there are too many events to plot quickly
1280 | #' @param bty passed to plot
1281 | #' @param cex passed to plot
1282 | #' @param pch passed to plot
1283 | #' @param ... passed to plot
1284 | #' @examples
1285 | #' if(interactive()){
1286 | #'     ##See the details section to see how this function works
1287 | #'     gate_from_biplot(matrix=Samusik_01_subset$tsne,x_axis="tSNE1",y_axis="tSNE2")
1288 | #' }
1289 | #' @export
1290 | #' @return A named vector of length nrow(matrix) and names rownames(matrix). Ungated events are set to NA
1291 | #' @details Data will be displayed as a bi-plot according to user-specified x_axis and y_axis arguments, then a call to locator() is made. The user can draw a polygon around parts of the plot that need gating. When done, 'right-click' or 'escape' (depending on the IDE) escapes locator() and closes the polygon. Then the user can press "n" to draw another polygon (that will define a new population), "c" to cancell and draw the last polygon again, or "s" to exit. When exiting, events that do not fall within any polygon are assigned NA, the others are assigned an integer value corresponding to the last polygon they lie into.
1292 | 
1293 | gate_from_biplot<-function(matrix,x_axis,y_axis,...,bty="l",pch=16,cex=0.5,sample=NULL)
1294 | {
1295 |     xp=matrix[,c(x_axis,y_axis)]
1296 |     if(!is.null(sample)){
1297 |         s=sort(sample(1:nrow(xp),sample))
1298 |     } else {
1299 |         s=1:nrow(xp)
1300 |     }
1301 | 
1302 |     gate_updated=rep(0,nrow(xp))
1303 |     color_biplot_by_discrete(xp[s,],gate_updated[s],bty=bty,pch=pch,cex=cex,...)
1304 | 
1305 |     input.message=" n : new gate, c : redo last, s : stop gating. "
1306 |     cat("\nPlease use the mouse pointer to draw")
1307 |     polygons=list()
1308 |     i=0
1309 |     u="n"
1310 |     while(u!="s"){
1311 |         if(!u%in%c("n","s","c")){
1312 |             u=readline(paste("Incorrect input.",input.message,sep=""))
1313 |         }
1314 |         if(u=="n"){
1315 |             gate=gate_updated
1316 |             i=i+1
1317 |             col=setNames(c("black",rainbow(i)),0:i)
1318 | 
1319 |             new.pol=en.locator()
1320 | 
1321 |             new.pol=polygon.clean(new.pol)
1322 |             polygons=c(polygons,list(new.pol))
1323 |             gate_updated=update_gate(xp,polygons[[i]],gate,i)
1324 | 
1325 |             color_biplot_by_discrete(xp[s,],gate_updated[s],bty=bty,pch=pch,cex=cex,colors=col,...)
1326 | 
1327 |         }
1328 |         if(u=="c"){
1329 |             gate_updated=gate
1330 |             color_biplot_by_discrete(xp[s,],gate_updated[s],bty=bty,pch=pch,cex=cex,colors=col,...)
1331 |             new.pol=en.locator()
1332 |             new.pol=polygon.clean(new.pol)
1333 |             polygons[[i]]=new.pol
1334 |             gate_updated=update_gate(xp,polygons[[i]],gate_updated,i)
1335 |             color_biplot_by_discrete(xp[s,],gate_updated[s],bty=bty,pch=pch,cex=cex,colors=col,...)
1336 |         }
1337 |         u=readline(paste("Input ?",input.message,"\n",sep=""))
1338 |     }
1339 |     
1340 |     gate=gate_updated
1341 | 
1342 |     gate[apply(xp,1,function(x)any(is.na(x)))]=NA
1343 | 
1344 |     setNames(gate,rownames(matrix))
1345 | }
1346 | 
1347 | #' Colors a biplot according to a vector with discrete values
1348 | #' @param matrix a two columns matrix
1349 | #' @param discrete_vector a vector of size nrow(matrix)
1350 | #' @param colors Palette to used named after the unique elements of discrete_vector. Generated from rainbow() if missing.
1351 | #' @param pch passed to plot
1352 | #' @param cex passed to plot
1353 | #' @param bty passed to plot
1354 | #' @param ... passed to plot
1355 | #' @examples
1356 | #' data(Samusik_01_subset)
1357 | #' levels=unique(sort(Samusik_01_subset$labels))
1358 | #' colors=setNames(colorRampPalette(palette())(length(levels)),sort(levels))
1359 | #' with(Samusik_01_subset,color_biplot_by_discrete(matrix=tsne,discrete_vector=labels,colors=colors))
1360 | #' @export
1361 | 
1362 | color_biplot_by_discrete<-function(matrix,discrete_vector,...,bty="l",pch=16,cex=0.5,colors=NULL){
1363 |     levels=unique(discrete_vector)
1364 |     if(missing(colors)){
1365 |         colors=setNames(c("black",rainbow(length(levels)-1)),levels)
1366 |     }
1367 |     plot(matrix,bty=bty,pch=pch,cex=cex,col=colors[as.character(discrete_vector)],...)
1368 | }
1369 | 
1370 | #' Wrapper to locator that plots segments on the fly
1371 | en.locator<-function(){
1372 |     input=TRUE
1373 |     x=vector()
1374 |     y=vector()
1375 |     while(!is.null(input)){
1376 |         input=locator(1)
1377 |         x=c(x,input$x)
1378 |         y=c(y,input$y)
1379 | 
1380 |         if(length(x)>1){
1381 |             segments(x0=x[length(x)-1],x1=x[length(x)],y0=y[length(y)-1],y1=y[length(y)],lty=2)
1382 |         }
1383 |     }
1384 |     segments(x0=x[1],x1=x[length(x)],y0=y[1],y1=y[length(y)],lty=2)
1385 |     list(x=x,y=y)
1386 | }
1387 | 
1388 | #' Remove self intersection in polygons
1389 | #' @param poly a polygon (list with two components x and y which are equal-length numerical vectors)
1390 | #' @return A polygon without overlapping edges and new vertices corresponding to non-inner points of intersection
1391 | 
1392 | polygon.clean<-function(poly){
1393 |     if(requireNamespace("sf", quietly = TRUE)) {
1394 |         ## Create a data frame from the input coordinates
1395 |         coords_df = data.frame(x = poly$x, y = poly$y)
1396 | 
1397 |         ## Close polygon
1398 |         coords_df = rbind(coords_df, coords_df[1, ])
1399 |         
1400 |         ## Convert to an sf object
1401 |         coords_sf = sf::st_as_sf(coords_df, coords = c("x", "y"))
1402 |         
1403 |         ## Convert points to line
1404 |         line = sf::st_cast(sf::st_combine(coords_sf), "MULTILINESTRING")
1405 |         line = sf::st_node(line)
1406 |         
1407 |         ## Convert line to polygon
1408 |         polygon = sf::st_polygonize(line)
1409 | 
1410 |         ## Simplify and clean the polygon
1411 |         ## Note: st_union is used here for a cleaning effect similar to gUnaryUnion
1412 |         cleaned_polygon = sf::st_union(polygon, by_feature = TRUE)
1413 | 
1414 |         ## Extract coordinates
1415 |         coords = sf::st_coordinates(sf::st_cast(cleaned_polygon, "POLYGON"))
1416 |         x = coords[, 1]
1417 |         y = coords[, 2]
1418 | 
1419 |         ## Return the cleaned coordinates
1420 |         return(list(x = x, y = y))
1421 | 
1422 |     } else {
1423 |         return(poly)
1424 |     }
1425 | }
1426 | 
1427 | #' Updates a gate vector
1428 | #' @param xp A two colums matrix
1429 | #' @param polygon A list with two components x and y of equal lenghts and numeric values
1430 | #' @param gate_vector a vector of length nrow(xp) with integer values
1431 | #' @param value The number that will be assigned to gate_vector, corresponding to points that lie in the polygon
1432 | #' @return The updated gate_vector
1433 | 
1434 | update_gate=function(xp,polygon,gate_vector=rep(0,nrow(xp)),value=1){
1435 |     if(requireNamespace("sp",quietly=FALSE)){
1436 |         gate_vector[sp::point.in.polygon(xp[,1],xp[,2],polygon$x,polygon$y)!=0]=value
1437 |     }
1438 |     gate_vector
1439 | }
1440 | 
1441 | #' 2000 events randomly sampled from the 'Samusik_01' dataset
1442 | #' @name Samusik_01_subset
1443 | #' @docType data
1444 | #' @format list with four elements: fs_src (a flowSet), xp_src (its expression matrix), labels (manual gates of the events) and tsne (a tSNE projection of the dataset)
1445 | #' @references https://flowrepository.org/id/FR-FCM-ZZPH
1446 | "Samusik_01_subset"
1447 | 
1448 | #' @importFrom grDevices col2rgb
1449 | NULL
1450 | #' @importFrom grDevices dev.off
1451 | NULL
1452 | #' @importFrom grDevices png
1453 | NULL
1454 | #' @importFrom grDevices rainbow
1455 | NULL
1456 | #' @importFrom grDevices rgb
1457 | NULL
1458 | #' @importFrom graphics abline
1459 | NULL
1460 | #' @importFrom graphics locator
1461 | NULL
1462 | #' @importFrom graphics plot
1463 | NULL
1464 | #' @importFrom graphics segments
1465 | NULL
1466 | #' @importFrom graphics text
1467 | NULL
1468 | #' @importFrom graphics title
1469 | NULL
1470 | #' @importFrom stats setNames
1471 | NULL
1472 | #' @importFrom utils tail
1473 | NULL
1474 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | 
  4 | 
  5 | 
  6 | This Vignette will walk you through the usage of the Hypergate R package.
  7 | 
  8 | ## Package installation
  9 | Installing dependencies:
 10 | 
 11 | ```r
 12 | install.packages(c("sp", "polyclip", "rgeos"))
 13 | source("https://bioconductor.org/biocLite.R")
 14 | biocLite("flowCore")
 15 | ```
 16 | Installing the package from github:
 17 | 
 18 | ```r
 19 | install.packages("devtools")
 20 | library(devtools)
 21 | install_github(repo = "ebecht/hypergate")
 22 | ```
 23 | 
 24 | 
 25 | ```r
 26 | library(hypergate)
 27 | ```
 28 | 
 29 | ## Data loading
 30 | 
 31 | 
 32 | ```r
 33 | data(Samusik_01_subset, package = "hypergate")
 34 | ```
 35 | 
 36 | This loads 2000 datapoints randomly sampled from the *Samusik_01* dataset (available from FlowRepository accession number FR-FCM-ZZPH). This object is a list which includes as elements
 37 | 
 38 | 1. *fs_src* a flowSet with 1 flowFrame corresponding to the data subset
 39 | 
 40 | 2. *xp_src* a matrix corresponding to the expression of the data subset. Rownames correspond to event numbers in the unsampled dataset. Colnames correspond to protein targets (or other information e.g. events' manually-annotated labels)
 41 | 
 42 | 3. *labels* numeric vector encoding manually-annotated labels, with the value -1 for ungated events. The text labels for the gated populations are available from FlowRepostiry
 43 | 
 44 | 4. *regular_channels* A subset of colnames(Samusik_01_subset$xp_src) that corresponds to protein targets
 45 | 
 46 | 5. *tsne* A 2D-tSNE ran on the whole dataset and subsampled to 2000 events
 47 | 
 48 | ## Specifying the cell subset of interest
 49 | 
 50 | Hypergate requires in particular as its arguments
 51 | 
 52 | 1. an expression matrix (which we have as *Samusik_01_subset$xp_src*)
 53 | 
 54 | 2. a vector specifying which events to attempt to gate on. This section discusses ways to achieve this point
 55 | 
 56 | #### Selection from low-dimensional plot
 57 | We included in the package a function with a rudimentary (hopefully sufficient) interface that allows for the selection of a cell subset of interest from a 2D biplot by drawing a polygon around it using the mouse. Since this function is interactive we cannot execute it in this Vignette but an example call would be as such (feel free to try it):
 58 | 
 59 | 
 60 | ```r
 61 | g = gate_from_biplot(Samusik_01_subset$tsne, "tSNE1", "tSNE2")
 62 | ```
 63 | 
 64 | For this tutorial we define manually the polygon instead
 65 | 
 66 | 
 67 | ```r
 68 | x = c(12.54, 8.08, 7.12, 12.12, 17.32, 20.62, 21.04, 20.83, 18.07, 
 69 |     15.2)
 70 | y = c(-10.61, -14.76, -18.55, -20.33, -21.16, -19.74, -14.4, 
 71 |     -11.08, -10.02, -9.42)
 72 | pol = list(x = x, y = y)
 73 | library("sp")
 74 | gate_vector = sp::point.in.polygon(Samusik_01_subset$tsne[, 1], 
 75 |     Samusik_01_subset$tsne[, 2], pol$x, pol$y)
 76 | plot(Samusik_01_subset$tsne, pch = 16, cex = 0.5, col = ifelse(gate_vector == 
 77 |     1, "firebrick3", "lightsteelblue"))
 78 | polygon(pol, lty = 2)
 79 | ```
 80 | 
 81 | ![Manual selection of a cluster on a 2D t-SNE](vignettes/unnamed-chunk-7-1.png)
 82 | 
 83 | #### Clustering
 84 | 
 85 | Another option to define a cell cluster of interest is to use the output of a clustering algorithm. Popular options for cytometry include *FlowSOM* (available from Bioconductor) or *Phenograph* (available from the ```cytofkit``` package from Bioconductor). An example call for Rphenograph is below:
 86 | 
 87 | 
 88 | ```r
 89 | require(Rphenograph)
 90 | set.seed(5881215)
 91 | clustering = Rphenograph(Samusik_01_subset$xp_src[, Samusik_01_subset$regular_channels])
 92 | cluster_labels = membership(clustering[[2]])
 93 | ```
 94 | In this Vignette we use the simpler kmeans option instead:
 95 | 
 96 | 
 97 | ```r
 98 | set.seed(5881215)
 99 | cluster_labels = kmeans(Samusik_01_subset$tsne, 20, nstart = 100)$cluster
100 | ```
101 | 
102 | In this example we can see that the kmeans cluster *20* corresponds to the population we manually selected from the t-SNE biplot
103 | 
104 | ```r
105 | plot(Samusik_01_subset$tsne, col = ifelse(cluster_labels == 20, 
106 |     "firebrick3", "lightsteelblue"), pch = 16, cex = 0.5)
107 | ```
108 | 
109 | ![Selection of a cluster from a clustering algorithm output](vignettes/unnamed-chunk-10-1.png)
110 | 
111 | ## Running Hypergate
112 | 
113 | The function to optimize gating strategies is ```hypergate```. Its main arguments are ```xp``` (a numeric matrix encoding expression), ```gate_vector``` (a vector with few unique values), ```level``` (specificies what value of gate_vector to gate upon, i.e. events satisfying ```gate_vector==level``` will be gated in)
114 | 
115 | 
116 | ```r
117 | hg_output = hypergate(xp = Samusik_01_subset$xp_src[, Samusik_01_subset$regular_channels], 
118 |     gate_vector = gate_vector, level = 1, verbose = FALSE)
119 | ```
120 | 
121 | ## Interpreting and polishing the results
122 | 
123 | ### Gating datapoints
124 | 
125 | The following function allows to subset an expression matrix given a return from *Hypergate*. The new matrix needs to have the same column names as the original matrix.
126 | 
127 | 
128 | ```r
129 | gating_predicted = subset_matrix_hg(hg_output, Samusik_01_subset$xp_src[, 
130 |     Samusik_01_subset$regular_channels])
131 | ```
132 | 
133 | 
134 | ```r
135 | table(ifelse(gating_predicted, "Gated-in", "Gated-out"), ifelse(gate_vector == 
136 |     1, "Events of interest", "Others"))
137 | ```
138 | 
139 | 
140 | |          | Events of interest| Others|
141 | |:---------|------------------:|------:|
142 | |Gated-in  |                116|      0|
143 | |Gated-out |                 10|   1874|
144 | 
145 | Another option, which offers more low-level control, is to examine for each datapoint whether they pass the threshold for each parameter. The function to obtain such a boolean matrix is ```boolmat```. Here our gating strategy specifies *SiglecF+cKit-Ly6C-*. We would thus obtain a 3-columns x 2000 (the number of events) rows
146 | 
147 | 
148 | ```r
149 | bm = boolmat(gate = hg_output, xp = Samusik_01_subset$xp_src[, 
150 |     Samusik_01_subset$regular_channels])
151 | head(bm)
152 | ```
153 | 
154 | ```
155 | ##     SiglecF_min cKit_max Ly6C_max
156 | ## 20        FALSE     TRUE     TRUE
157 | ## 28        FALSE     TRUE     TRUE
158 | ## 70        FALSE    FALSE     TRUE
159 | ## 110        TRUE    FALSE    FALSE
160 | ## 120       FALSE     TRUE    FALSE
161 | ## 159       FALSE     TRUE    FALSE
162 | ```
163 | 
164 | 
165 | |                             | Events of interest| Others|
166 | |:----------------------------|------------------:|------:|
167 | |Gated-out because of SiglecF |                  9|   1829|
168 | |SiglecF above threshold      |                117|     45|
169 | 
170 | ### Examining the output
171 | 
172 | The following function will plot the output of Hypergate. Arguments are
173 | 
174 | 1. ```gate``` an object returned by Hypergate
175 | 
176 | 2. ```xp``` an expression matrix whose columns are named similarly as the ones used to create the ```gate``` object
177 | 
178 | 3. ```gate_vector``` and ```level``` to specify which events are "of interest"
179 | 
180 | 4. ```highlight``` a color that will be used to highlight the events of interest 
181 | 
182 | 
183 | ```r
184 | plot_gating_strategy(gate = hg_output, xp = Samusik_01_subset$xp_src[, 
185 |     Samusik_01_subset$regular_channels], gate_vector = gate_vector, 
186 |     level = 1, highlight = "firebrick3")
187 | ```
188 | 
189 | ![Gating strategy](vignettes/unnamed-chunk-17-1.png)![Gating strategy](vignettes/unnamed-chunk-17-2.png)
190 | 
191 | Another important point to consider is how the F$\beta$-score increases with each added channel. This gives an idea of how many channels are required to reach a close-to-optimal gating strategy.
192 | 
193 | This will identify at which steps the parameters were first activated and optimized:
194 | 
195 | ```r
196 | f_values_vs_number_of_parameters = c(F_beta(rep(TRUE, nrow(Samusik_01_subset$xp_src)), 
197 |     gate_vector == 1), hg_output$f[c(apply(hg_output$pars.history[, 
198 |     hg_output$active_channels], 2, function(x) min(which(x != 
199 |     x[1]))) - 1, nrow(hg_output$pars.history))][-1])
200 | barplot(rev(f_values_vs_number_of_parameters), names.arg = rev(c("Initialization", 
201 |     paste("+ ", sep = "", hg_output$active_channels))), las = 3, 
202 |     mar = c(10, 4, 1, 1), horiz = TRUE, xlab = "Cumulative F1-score")
203 | ```
204 | 
205 | ![F1-score obtained during optimization when adding parameters](vignettes/unnamed-chunk-18-1.png)
206 | 
207 | This graph tells us that the biggest increase is by far due to SiglecF+, while the lowest is due to Ly6C-.
208 | 
209 | ### Channels contributions
210 | 
211 | The previous graph only shows how the F-value evolved during optimization, but what we really want to know is how much each parameter contributes to the final output (sometimes a parameter will have a big impact at the early steps of the optimization but will become relatively unimportant towards the end, if multiple other parameters collectively account for most of its discriminatory power). We use the following function to assess this, which measures how much performances lower when a parameter is ignored. The more the performances lower, the more important the parameter is.
212 | 
213 | 
214 | ```r
215 | contributions = channels_contributions(gate = hg_output, xp = Samusik_01_subset$xp_src[, 
216 |     Samusik_01_subset$regular_channels], gate_vector = gate_vector, 
217 |     level = 1, beta = 1)
218 | barplot(contributions, las = 3, mar = c(10, 4, 1, 1), horiz = TRUE, 
219 |     xlab = "F1-score deterioration when the parameter is ignored")
220 | ```
221 | 
222 | ![Contribution of each parameter to the output](vignettes/unnamed-chunk-19-1.png)
223 | 
224 | ### Reoptimize strategy
225 | 
226 | Since Ly6C contributes very little, we may want to ignore it to obtain a shorter gating strategy. We could keep the current threshold values for the other parameters, but it is best to re-compute the other thresholds to account for the loss of some parameters.
227 | To do that we use the following function:
228 | 
229 | 
230 | ```r
231 | hg_output_polished = reoptimize_strategy(gate = hg_output, channels_subset = c("SiglecF_min", 
232 |     "cKit_max"), xp = Samusik_01_subset$xp_src[, Samusik_01_subset$regular_channels], 
233 |     gate_vector = gate_vector, level = 1)
234 | ```
235 | 
236 | Finally, we get to plot our final strategy:
237 | 
238 | 
239 | ```r
240 | plot_gating_strategy(gate = hg_output_polished, xp = Samusik_01_subset$xp_src[, 
241 |     Samusik_01_subset$regular_channels], gate_vector = gate_vector, 
242 |     level = 1, highlight = "firebrick3")
243 | ```
244 | 
245 | ![Final output](vignettes/unnamed-chunk-21-1.png)
246 | 
247 | ### Human-readable output
248 | 
249 | Thanks to a nice contribution for SamGG on github, there are three functions that make the outputs more readable:
250 | 
251 | 
252 | ```r
253 | hgate_pheno(hg_output)
254 | ```
255 | 
256 | ```
257 | ## [1] "SiglecF+, cKit-, Ly6C-"
258 | ```
259 | 
260 | ```r
261 | hgate_rule(hg_output)
262 | ```
263 | 
264 | ```
265 | ## [1] "SiglecF >= 2.21, cKit <= 1.77, Ly6C <= 2.98"
266 | ```
267 | 
268 | ```r
269 | hgate_info(hg_output)
270 | ```
271 | 
272 | ```
273 | ##             channels sign comp threshold
274 | ## SiglecF_min  SiglecF    +  >=   2.208221
275 | ## cKit_max        cKit    -  <=   1.770901
276 | ## Ly6C_max        Ly6C    -  <=   2.983523
277 | ```
278 | 
279 | ```r
280 | # Fscores can be retrieved when the same parameters given to
281 | # hypergate() are given to hgate_info():
282 | hg_out_info = hgate_info(hg_output, xp = Samusik_01_subset$xp_src[, 
283 |     Samusik_01_subset$regular_channels], gate_vector = gate_vector, 
284 |     level = 1)
285 | hg_out_info
286 | ```
287 | 
288 | ```
289 | ##             channels sign comp threshold     deltaF Fscore1D Fscore
290 | ## SiglecF_min  SiglecF    +  >=   2.208221 0.76351765   0.8125 0.8125
291 | ## cKit_max        cKit    -  <=   1.770901 0.07988981   0.1294 0.9360
292 | ## Ly6C_max        Ly6C    -  <=   2.983523 0.02267769   0.1809 0.9587
293 | ```
294 | 
295 | ```r
296 | # and formatted readily
297 | paste0(hg_out_info[, "Fscore"], collapse = ", ")
298 | ```
299 | 
300 | ```
301 | ## [1] "0.8125, 0.936, 0.9587"
302 | ```
303 | 
304 | ## Final notes
305 | 
306 | Some comments about potential questions on your own projects (raised by QBarbier):
307 | 
308 | ### Which channels to use as input?
309 | Anything that would be relevant for a gating strategy should be used as an input. So usually any phenotypic channel would be included. If you know that you would not use certain parameters on subsequent experiments (for instance if the staining is intracellular and you plan to sort a live population and thus cannot permeabilize your cells), you should exclude the corresponding channels. I usually do not use channels that were used in pre-gating steps (e.g. CD45 for immune cells). Finally, if you plan to use flow cytometry and use hypergate on a CyTOF dataset, you probably want to discard the Cell_length channel.
310 | 
311 | ### How big can the input matrix be?
312 | It depends on how much RAM your computer has. If that is an issue I suggest downsampling to (e.g.) 1000 positive cells and a corresponding number of negative cells. The `hgate_sample` function can help you achieve this:
313 | 
314 | 
315 | ```r
316 | set.seed(123)  ## Makes the subsampling reproducible
317 | gate_vector = Samusik_01_subset$labels
318 | subsample = hgate_sample(gate_vector = gate_vector, level = 5, 
319 |     size = 100)  ## Subsample 100 events from population #5 (Classical monocytes), and a corresponding number of negative events
320 | tab = table(ifelse(subsample, "In", "Out"), ifelse(Samusik_01_subset$labels == 
321 |     5, "Positive pop.", "Negative pop."))
322 | tab[1, ]/colSums(tab)  ## Fraction of subsampled events for positive and negative populations
323 | ```
324 | 
325 | ```
326 | ## Negative pop. Positive pop. 
327 | ##     0.3204976     0.3205128
328 | ```
329 | 
330 | ```r
331 | xp = Samusik_01_subset$xp_src[, Samusik_01_subset$regular_channels]
332 | hg = hypergate(xp = xp[subsample, ], gate_vector = gate_vector[subsample], 
333 |     level = 5)  ## Runs hypergate on a subsample of the input matrix
334 | gating_heldout = subset_matrix_hg(hg, xp[!subsample, ])  ## Applies the gate to the held-out data
335 | table(ifelse(gating_heldout, "Gated in", "Gated out"), ifelse(Samusik_01_subset$labels[!subsample] == 
336 |     5, "Positive pop.", "Negative pop."))
337 | ```
338 | 
339 | ```
340 | ##            
341 | ##             Negative pop. Positive pop.
342 | ##   Gated in             37           192
343 | ##   Gated out          1110            20
344 | ```
345 | 


--------------------------------------------------------------------------------
/data/Samusik_01_subset.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebecht/hypergate/ab1283ab1369cad063899ffebc9e67c17654c4ed/data/Samusik_01_subset.RData


--------------------------------------------------------------------------------
/figure/unnamed-chunk-10-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebecht/hypergate/ab1283ab1369cad063899ffebc9e67c17654c4ed/figure/unnamed-chunk-10-1.png


--------------------------------------------------------------------------------
/figure/unnamed-chunk-17-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebecht/hypergate/ab1283ab1369cad063899ffebc9e67c17654c4ed/figure/unnamed-chunk-17-1.png


--------------------------------------------------------------------------------
/figure/unnamed-chunk-17-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebecht/hypergate/ab1283ab1369cad063899ffebc9e67c17654c4ed/figure/unnamed-chunk-17-2.png


--------------------------------------------------------------------------------
/figure/unnamed-chunk-18-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebecht/hypergate/ab1283ab1369cad063899ffebc9e67c17654c4ed/figure/unnamed-chunk-18-1.png


--------------------------------------------------------------------------------
/figure/unnamed-chunk-19-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebecht/hypergate/ab1283ab1369cad063899ffebc9e67c17654c4ed/figure/unnamed-chunk-19-1.png


--------------------------------------------------------------------------------
/figure/unnamed-chunk-21-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebecht/hypergate/ab1283ab1369cad063899ffebc9e67c17654c4ed/figure/unnamed-chunk-21-1.png


--------------------------------------------------------------------------------
/figure/unnamed-chunk-7-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebecht/hypergate/ab1283ab1369cad063899ffebc9e67c17654c4ed/figure/unnamed-chunk-7-1.png


--------------------------------------------------------------------------------
/man/FNTN_matrix.recycle.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hypergate.R
 3 | \name{FNTN_matrix.recycle}
 4 | \alias{FNTN_matrix.recycle}
 5 | \title{FNTN_matrix.recycle}
 6 | \usage{
 7 | FNTN_matrix.recycle(
 8 |   FNTN_matrix,
 9 |   B_FN_old,
10 |   B_TN_old,
11 |   B_FN_new,
12 |   B_TN_new,
13 |   xp_FN,
14 |   xp_TN,
15 |   par
16 | )
17 | }
18 | \arguments{
19 | \item{FNTN_matrix}{Expansion matrix to recycle}
20 | 
21 | \item{B_FN_old}{Boolean matrix of FN events before the last expansion}
22 | 
23 | \item{B_TN_old}{Boolean matrix of TN events before the last expansion}
24 | 
25 | \item{B_FN_new}{Boolean matrix of FN events after the last expansion}
26 | 
27 | \item{B_TN_new}{Boolean matrix of TN events after the last expansion}
28 | 
29 | \item{xp_FN}{Expression matrix of False Negative events}
30 | 
31 | \item{xp_TN}{Expression matrix of True Negative events}
32 | 
33 | \item{par}{Current hyper-rectangle parametrization}
34 | }
35 | \description{
36 | Recycle an expansion matrix
37 | }
38 | 


--------------------------------------------------------------------------------
/man/F_beta.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hypergate.R
 3 | \name{F_beta}
 4 | \alias{F_beta}
 5 | \title{F_beta}
 6 | \usage{
 7 | F_beta(pred, truth, beta = 1)
 8 | }
 9 | \arguments{
10 | \item{pred}{boolean vector of predicted values}
11 | 
12 | \item{truth}{boolean vector of true values}
13 | 
14 | \item{beta}{Weighting of yield as compared to precision. Increase beta so that the optimization favors yield, or decrease to favor purity.}
15 | }
16 | \description{
17 | Compute a F_beta score comparing two boolean vectors
18 | }
19 | \examples{
20 | data(Samusik_01_subset)
21 | truth=c(rep(TRUE,40),rep(FALSE,60))
22 | pred=rep(c(TRUE,FALSE),50)
23 | table(pred,truth) ##40\% purity, 50\% yield
24 | #' F_beta(pred=pred,truth=truth,beta=2) ##Closer to yield
25 | F_beta(pred=pred,truth=truth,beta=1.5) ##Closer to yield
26 | F_beta(pred=pred,truth=truth,beta=1) ##Harmonic mean
27 | F_beta(pred=pred,truth=truth,beta=0.75) ##Closer to purity
28 | F_beta(pred=pred,truth=truth,beta=0.5) ##Closer to purity
29 | }
30 | 


--------------------------------------------------------------------------------
/man/Samusik_01_subset.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hypergate.R
 3 | \docType{data}
 4 | \name{Samusik_01_subset}
 5 | \alias{Samusik_01_subset}
 6 | \title{2000 events randomly sampled from the 'Samusik_01' dataset}
 7 | \format{
 8 | list with four elements: fs_src (a flowSet), xp_src (its expression matrix), labels (manual gates of the events) and tsne (a tSNE projection of the dataset)
 9 | }
10 | \usage{
11 | Samusik_01_subset
12 | }
13 | \description{
14 | 2000 events randomly sampled from the 'Samusik_01' dataset
15 | }
16 | \references{
17 | https://flowrepository.org/id/FR-FCM-ZZPH
18 | }
19 | \keyword{datasets}
20 | 


--------------------------------------------------------------------------------
/man/boolmat.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hypergate.R
 3 | \name{boolmat}
 4 | \alias{boolmat}
 5 | \title{boolmat}
 6 | \usage{
 7 | boolmat(gate, xp)
 8 | }
 9 | \arguments{
10 | \item{gate}{A return from hypergate}
11 | 
12 | \item{xp}{Expression matrix as in the hypergate callxp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels]}
13 | }
14 | \description{
15 | Convert an expression matrix and a gating strategy to a boolean matrix (whether each event is gated out by each channel)
16 | }
17 | \examples{
18 | data(Samusik_01_subset)
19 | xp=Samusik_01_subset$xp_src
20 | gate_vector=Samusik_01_subset$labels
21 | hg=hypergate(xp=xp,gate_vector=gate_vector,level=23,delta_add=0.01)
22 | head(boolmat(hg,xp))
23 | }
24 | 


--------------------------------------------------------------------------------
/man/channels_contributions.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hypergate.R
 3 | \name{channels_contributions}
 4 | \alias{channels_contributions}
 5 | \title{channels_contributions}
 6 | \usage{
 7 | channels_contributions(gate, xp, gate_vector, level, beta = 1)
 8 | }
 9 | \arguments{
10 | \item{gate}{A return from hypergate}
11 | 
12 | \item{xp}{Expression matrix as in the hypergate call}
13 | 
14 | \item{gate_vector}{Categorical vector of length nrow(xp)}
15 | 
16 | \item{level}{A level of gate_vector that identifies the population of interest}
17 | 
18 | \item{beta, }{should be the same as for the hypergate object}
19 | }
20 | \description{
21 | Gives scores for the contribution of individual channels to a gating strategy
22 | }
23 | \examples{
24 | data(Samusik_01_subset)
25 | xp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels]
26 | gate_vector=Samusik_01_subset$labels
27 | hg=hypergate(xp=xp,gate_vector=gate_vector,level=23,delta_add=0)
28 | contribs=channels_contributions(gate=hg,xp=xp,gate_vector=gate_vector,level=23,beta=1)
29 | contribs
30 | }
31 | 


--------------------------------------------------------------------------------
/man/color_biplot_by_discrete.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hypergate.R
 3 | \name{color_biplot_by_discrete}
 4 | \alias{color_biplot_by_discrete}
 5 | \title{Colors a biplot according to a vector with discrete values}
 6 | \usage{
 7 | color_biplot_by_discrete(
 8 |   matrix,
 9 |   discrete_vector,
10 |   ...,
11 |   bty = "l",
12 |   pch = 16,
13 |   cex = 0.5,
14 |   colors = NULL
15 | )
16 | }
17 | \arguments{
18 | \item{matrix}{a two columns matrix}
19 | 
20 | \item{discrete_vector}{a vector of size nrow(matrix)}
21 | 
22 | \item{...}{passed to plot}
23 | 
24 | \item{bty}{passed to plot}
25 | 
26 | \item{pch}{passed to plot}
27 | 
28 | \item{cex}{passed to plot}
29 | 
30 | \item{colors}{Palette to used named after the unique elements of discrete_vector. Generated from rainbow() if missing.}
31 | }
32 | \description{
33 | Colors a biplot according to a vector with discrete values
34 | }
35 | \examples{
36 | data(Samusik_01_subset)
37 | levels=unique(sort(Samusik_01_subset$labels))
38 | colors=setNames(colorRampPalette(palette())(length(levels)),sort(levels))
39 | with(Samusik_01_subset,color_biplot_by_discrete(matrix=tsne,discrete_vector=labels,colors=colors))
40 | }
41 | 


--------------------------------------------------------------------------------
/man/contract.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hypergate.R
 3 | \name{contract}
 4 | \alias{contract}
 5 | \title{contract}
 6 | \usage{
 7 | contract(
 8 |   par = par,
 9 |   xp_pos = envir$xp_pos,
10 |   state_pos = envir$state_pos,
11 |   xp_neg = envir$xp_neg,
12 |   state_neg = envir$state_neg,
13 |   n = envir$n,
14 |   TP = envir$TP,
15 |   TN = envir$TN,
16 |   beta = envir$beta2,
17 |   envir = parent.frame()
18 | )
19 | }
20 | \arguments{
21 | \item{par}{Current parametrization of the hyperrectangle}
22 | 
23 | \item{xp_pos}{Expression matrix for positive events}
24 | 
25 | \item{state_pos}{State vector of the positive events}
26 | 
27 | \item{xp_neg}{Expression matrix for negative events}
28 | 
29 | \item{state_neg}{State vector of the negative events}
30 | 
31 | \item{n}{passed to f}
32 | 
33 | \item{TP}{integer: current number of TP}
34 | 
35 | \item{TN}{integer: current number of TN}
36 | 
37 | \item{beta}{Passed from the top-level function}
38 | 
39 | \item{envir}{Current environment of the optimization}
40 | }
41 | \description{
42 | Test (some) possible contractions of the hyperrectangle
43 | }
44 | 


--------------------------------------------------------------------------------
/man/contract.update.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hypergate.R
 3 | \name{contract.update}
 4 | \alias{contract.update}
 5 | \title{contract.update}
 6 | \usage{
 7 | contract.update(
 8 |   contract_object,
 9 |   pars = envir$pars,
10 |   active_channels = envir$active_channels,
11 |   b_pos = envir$b_pos,
12 |   b_neg = envir$b_neg,
13 |   state_pos = envir$state_pos,
14 |   state_neg = envir$state_neg,
15 |   TN = envir$TN,
16 |   TP = envir$TP,
17 |   xp_pos = envir$xp_pos,
18 |   xp_neg = envir$xp_neg,
19 |   envir = parent.frame()
20 | )
21 | }
22 | \arguments{
23 | \item{contract_object}{output of the contract function}
24 | 
25 | \item{pars}{Current parametrization of the hyperrectangle}
26 | 
27 | \item{active_channels}{vector of currently-used parameters}
28 | 
29 | \item{b_pos}{boolean matrix of positive events}
30 | 
31 | \item{b_neg}{boolean matrix of negative events}
32 | 
33 | \item{state_pos}{State vector of the positive events}
34 | 
35 | \item{state_neg}{State vector of the negative events}
36 | 
37 | \item{TN}{integer: current number of TN}
38 | 
39 | \item{TP}{integer: current number of TP}
40 | 
41 | \item{xp_pos}{Expression matrix for positive events}
42 | 
43 | \item{xp_neg}{Expression matrix for negative events}
44 | 
45 | \item{envir}{Current environment of the optimization}
46 | }
47 | \description{
48 | Update the hyperrectangle to the best contraction move found
49 | }
50 | 


--------------------------------------------------------------------------------
/man/coreloop.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hypergate.R
 3 | \name{coreloop}
 4 | \alias{coreloop}
 5 | \title{coreloop}
 6 | \usage{
 7 | coreloop(par, hg.env = hg.env$hg.env)
 8 | }
 9 | \arguments{
10 | \item{par}{Current parametrization of the hyperrectangle}
11 | 
12 | \item{hg.env}{Environment where the main execution of hypergate takes place}
13 | }
14 | \description{
15 | Core optimization loop of hypergate
16 | }
17 | 


--------------------------------------------------------------------------------
/man/en.locator.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hypergate.R
 3 | \name{en.locator}
 4 | \alias{en.locator}
 5 | \title{Wrapper to locator that plots segments on the fly}
 6 | \usage{
 7 | en.locator()
 8 | }
 9 | \description{
10 | Wrapper to locator that plots segments on the fly
11 | }
12 | 


--------------------------------------------------------------------------------
/man/expand.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hypergate.R
 3 | \name{expand}
 4 | \alias{expand}
 5 | \title{expand}
 6 | \usage{
 7 | expand(
 8 |   FN = envir$FN,
 9 |   FNTN_matrix = envir$FNTN_matrix,
10 |   TP = envir$TP,
11 |   TN = envir$TN,
12 |   n = envir$n,
13 |   beta = envir$beta2,
14 |   envir = parent.frame()
15 | )
16 | }
17 | \arguments{
18 | \item{FN}{integer: current number of FP}
19 | 
20 | \item{FNTN_matrix}{Boolean matrix of dim (FN, FN + TN), where Mij is TRUE if and only if expanding to include the ith FN in the gate would lead to the inclusion of the jth column event}
21 | 
22 | \item{TP}{integer: current number of TP}
23 | 
24 | \item{TN}{integer: current number of TN}
25 | 
26 | \item{n}{passed to f}
27 | 
28 | \item{beta}{Passed from the top-level function}
29 | 
30 | \item{envir}{Coreloop environment}
31 | }
32 | \description{
33 | Test (some) possible expansions of the hyperrectangle
34 | }
35 | 


--------------------------------------------------------------------------------
/man/expand.update.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hypergate.R
 3 | \name{expand.update}
 4 | \alias{expand.update}
 5 | \title{expand.update}
 6 | \usage{
 7 | expand.update(
 8 |   expand.object,
 9 |   pars = envir$pars,
10 |   xp_pos = envir$xp_pos,
11 |   xp_neg = envir$xp_neg,
12 |   state_pos = envir$state_pos,
13 |   state_neg = envir$state_neg,
14 |   b_pos = envir$b_pos,
15 |   b_neg = envir$b_neg,
16 |   n = envir$n,
17 |   TP = envir$TP,
18 |   TN = envir$TN,
19 |   envir = parent.frame()
20 | )
21 | }
22 | \arguments{
23 | \item{expand.object}{output of the expand function}
24 | 
25 | \item{pars}{Current parametrization of the hyperrectangle}
26 | 
27 | \item{xp_pos}{Expression matrix for positive events}
28 | 
29 | \item{xp_neg}{Expression matrix for negative events}
30 | 
31 | \item{state_pos}{State vector of the positive events}
32 | 
33 | \item{state_neg}{State vector of the negative events}
34 | 
35 | \item{b_pos}{boolean matrix of positive events}
36 | 
37 | \item{b_neg}{boolean matrix of negative events}
38 | 
39 | \item{n}{passed to f}
40 | 
41 | \item{TP}{integer: current number of TP}
42 | 
43 | \item{TN}{integer: current number of TN}
44 | 
45 | \item{envir}{Current environment of the optimization}
46 | }
47 | \description{
48 | Update the hyperrectangle to the best expansion move found
49 | }
50 | 


--------------------------------------------------------------------------------
/man/f.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hypergate.R
 3 | \name{f}
 4 | \alias{f}
 5 | \title{f}
 6 | \usage{
 7 | f(TP, TN, n, beta2 = 1)
 8 | }
 9 | \arguments{
10 | \item{TP}{Number of true positive events}
11 | 
12 | \item{TN}{Number of true negative events}
13 | 
14 | \item{n}{beta^2*(TP+FN)+TN+FP}
15 | 
16 | \item{beta2}{squared-beta to weight precision (low beta) or recall (high beta) more}
17 | }
18 | \description{
19 | Computes the F_beta score given an intenger number of True Positives (TP), True Negatives (TN). It is optimized for speed and n is thus not the total number of events
20 | }
21 | 


--------------------------------------------------------------------------------
/man/fill_FNTN_matrix.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hypergate.R
 3 | \name{fill_FNTN_matrix}
 4 | \alias{fill_FNTN_matrix}
 5 | \title{fill_FNTN_matrix}
 6 | \usage{
 7 | fill_FNTN_matrix(xp_FN, xp_TN, B_FN, B_TN, par)
 8 | }
 9 | \arguments{
10 | \item{xp_FN}{Expression matrix of False Negative events}
11 | 
12 | \item{xp_TN}{Expression matrix of True Negative events}
13 | 
14 | \item{B_FN}{Boolean matrix of FN events}
15 | 
16 | \item{B_TN}{Boolean matrix of TN events}
17 | 
18 | \item{par}{Current hyper-rectangle parametrization}
19 | }
20 | \description{
21 | fill_FNTN_matrix Used for assessing whether an expansion move is possible
22 | }
23 | 


--------------------------------------------------------------------------------
/man/gate_from_biplot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hypergate.R
 3 | \name{gate_from_biplot}
 4 | \alias{gate_from_biplot}
 5 | \title{gate_from_biplot}
 6 | \usage{
 7 | gate_from_biplot(
 8 |   matrix,
 9 |   x_axis,
10 |   y_axis,
11 |   ...,
12 |   bty = "l",
13 |   pch = 16,
14 |   cex = 0.5,
15 |   sample = NULL
16 | )
17 | }
18 | \arguments{
19 | \item{matrix}{A matrix}
20 | 
21 | \item{x_axis}{character, colname of matrix used for x-axis in the biplot}
22 | 
23 | \item{y_axis}{character, colname of matrix used for y-axis in the biplot}
24 | 
25 | \item{...}{passed to plot}
26 | 
27 | \item{bty}{passed to plot}
28 | 
29 | \item{pch}{passed to plot}
30 | 
31 | \item{cex}{passed to plot}
32 | 
33 | \item{sample}{Used to downsample the data in case there are too many events to plot quickly}
34 | }
35 | \value{
36 | A named vector of length nrow(matrix) and names rownames(matrix). Ungated events are set to NA
37 | }
38 | \description{
39 | From a biplot let the user interactively draw polygons to create a "Gate" vector
40 | }
41 | \details{
42 | Data will be displayed as a bi-plot according to user-specified x_axis and y_axis arguments, then a call to locator() is made. The user can draw a polygon around parts of the plot that need gating. When done, 'right-click' or 'escape' (depending on the IDE) escapes locator() and closes the polygon. Then the user can press "n" to draw another polygon (that will define a new population), "c" to cancell and draw the last polygon again, or "s" to exit. When exiting, events that do not fall within any polygon are assigned NA, the others are assigned an integer value corresponding to the last polygon they lie into.
43 | }
44 | \examples{
45 | if(interactive()){
46 |     ##See the details section to see how this function works
47 |     gate_from_biplot(matrix=Samusik_01_subset$tsne,x_axis="tSNE1",y_axis="tSNE2")
48 | }
49 | }
50 | 


--------------------------------------------------------------------------------
/man/hgate_info.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hypergate.R
 3 | \name{hgate_info}
 4 | \alias{hgate_info}
 5 | \title{hgate_info}
 6 | \usage{
 7 | hgate_info(hgate, xp, gate_vector, level, beta = 1)
 8 | }
 9 | \arguments{
10 | \item{hgate}{A hypergate object (produced by hypergate())}
11 | 
12 | \item{xp}{The expression matrix from which the 'hgate' parameter originates,
13 | needed for Fscore computation}
14 | 
15 | \item{gate_vector}{Categorical data from which the 'hgate' parameter
16 | originates, needed for Fscore computation}
17 | 
18 | \item{level}{Level of gate_vector identifying the population of interest,
19 | needed for Fscore computation}
20 | 
21 | \item{beta}{Beta to weight purity (low beta) or yield (high beta) more,
22 | needed for Fscore computation}
23 | }
24 | \value{
25 | A data.frame with channel, sign, comp and threshold columns, and
26 |   optionnally deltaF (score deterioration when parameter is ignored),Fscore1d (F_value when using only this parameter) and Fscore (F score when all parameters up to this one are included). Fscores are computed if xp, gate_vector
27 |   and level are passed to the function.
28 | }
29 | \description{
30 | Extract information about a hypergate return: the channels of
31 |   the phenotype, the sign of the channels, the sign of the comparison, the
32 |   thresholds. The function could also compute the Fscores if the xp,
33 |   gate_vector and level parameters are given.
34 | }
35 | \examples{
36 | data(Samusik_01_subset)
37 | xp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels]
38 | gate_vector=Samusik_01_subset$labels
39 | hg=hypergate(xp=xp,gate_vector=gate_vector,level=23,delta_add=0.01)
40 | hgate_info(hgate=hg)
41 | hgate_pheno(hgate=hg)
42 | hgate_rule(hgate=hg)
43 | }
44 | \seealso{
45 | \code{hg_pheno}, \code{hg_rule}
46 | }
47 | 


--------------------------------------------------------------------------------
/man/hgate_pheno.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hypergate.R
 3 | \name{hgate_pheno}
 4 | \alias{hgate_pheno}
 5 | \title{hgate_pheno}
 6 | \usage{
 7 | hgate_pheno(hgate, collapse = ", ")
 8 | }
 9 | \arguments{
10 | \item{hgate}{A hypergate object (produced by hypergate())}
11 | 
12 | \item{collapse}{A character string to separate the markers.}
13 | }
14 | \value{
15 | A string representing the phenotype.
16 | }
17 | \description{
18 | Build a human readable phenotype, i.e. a combination of channels
19 |   and sign (+ or -) from a hypergate return.
20 | }
21 | \examples{
22 | ## See hgate_info
23 | }
24 | \seealso{
25 | \code{hg_rule}, \code{hg_info}
26 | }
27 | 


--------------------------------------------------------------------------------
/man/hgate_rule.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hypergate.R
 3 | \name{hgate_rule}
 4 | \alias{hgate_rule}
 5 | \title{hgate_rule}
 6 | \usage{
 7 | hgate_rule(hgate, collapse = ", ", digits = 2)
 8 | }
 9 | \arguments{
10 | \item{hgate}{A hypergate object (produced by hypergate())}
11 | 
12 | \item{collapse}{A character string to separate the markers.}
13 | 
14 | \item{digits}{An integer that specifies the decimal part when rounding.}
15 | }
16 | \value{
17 | A data.frame with channel, sign, comp and threshold columns
18 | }
19 | \description{
20 | Build a human readable rule i.e. a combination of channels, sign
21 |   of comparison and threshold.
22 | }
23 | \examples{
24 | ## See hgate_info
25 | }
26 | \seealso{
27 | \code{hg_pheno}, \code{hg_rule}
28 | }
29 | 


--------------------------------------------------------------------------------
/man/hgate_sample.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hypergate.R
 3 | \name{hgate_sample}
 4 | \alias{hgate_sample}
 5 | \title{hgate_sample}
 6 | \usage{
 7 | hgate_sample(gate_vector, level, size = 1000, method = "prop")
 8 | }
 9 | \arguments{
10 | \item{gate_vector}{A Categorical vector whose length equals the number of
11 | rows of the matrix to sample (nrow(xp))}
12 | 
13 | \item{level}{A level of gate_vector so that gate_vector == level will produce
14 | a boolean vector identifying events of interest}
15 | 
16 | \item{size}{An integer specifying the maximum number of events of interest to
17 | retain. If the count of events of interest is lower than \code{size}, than
18 | \code{size} will be set to that count.}
19 | 
20 | \item{method}{A string specifying the method to balance the count of events.
21 | \code{"prop"} means proportionnality: if events of interest are sampled in
22 | a 1/10 ratio, then all others events are sampled by the same ratio.
23 | \code{"10x"} means a balance of 10 between the count events of interest and
24 | the count all others events. \code{"ceil"} means a uniform sampling no more
25 | than the specified size for each level of the gate_vector. \code{level} is
26 | unused in that method.}
27 | }
28 | \value{
29 | A logical vector with TRUE correspond to the events being sampled, ie
30 |   kept to further analysis
31 | }
32 | \description{
33 | Downsample the data in order to fasten the computation and
34 |   reduce the memory usage.
35 | }
36 | \note{
37 | No replacement is applied. If there are less events in one group or the
38 |   alternate than the algorithm requires, then all available events are
39 |   returned. NA values in gate_vector are not sampled, ie ignored.
40 | }
41 | \examples{
42 | # Standard procedure with downsampling
43 | data(Samusik_01_subset)
44 | xp <- Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels]
45 | gate_vector <- Samusik_01_subset$labels
46 | sampled <- hgate_sample(gate_vector, level=8, 100)
47 | table(sampled)
48 | table(gate_vector[sampled])
49 | xp_sampled <- xp[sampled, ]
50 | gate_vector_sampled <- gate_vector[sampled]
51 | hg <- hypergate(xp_sampled, gate_vector_sampled, level=8, delta_add=0.01)
52 | # cluster 8 consists in 122 events
53 | table(gate_vector)
54 | # Downsampling
55 | table(gate_vector[hgate_sample(gate_vector, level=8, 100)])
56 | # Downsampling reduces the alternate events
57 | table(gate_vector[hgate_sample(gate_vector, level=8, 100, "10x")])
58 | # Downsampling is limited to the maximum number of events of interest
59 | table(gate_vector[hgate_sample(gate_vector, level=8, 150)])
60 | # Downsampling is limited to the maximum number of events of interest, and
61 | # the alternate events are downsampled to a total of 10 times
62 | table(gate_vector[hgate_sample(gate_vector, level=8, 150, "10x")])
63 | # More details about sampling
64 | # Convert -1 to NA, NA are not sampled
65 | gate_vector[gate_vector==-1] = NA
66 | gate_vector = factor(gate_vector)
67 | table(gate_vector, useNA = "alw")
68 | #
69 | # target size = 100 whereas initial freq is 122 for pop 8
70 | smp.prop = hgate_sample(gate_vector, level = 8, size = 100, method = "prop")
71 | smp.10x  = hgate_sample(gate_vector, level = 8, size = 100, method = "10x")
72 | smp.ceil = hgate_sample(gate_vector, size = 10, method = "ceil")
73 | table(smp.prop)
74 | table(smp.10x)
75 | table(smp.ceil)
76 | rbind(raw = table(gate_vector),
77 |       prop = table(gate_vector[smp.prop]),
78 |       `10x` = table(gate_vector[smp.10x]),
79 |       ceil = table(gate_vector[smp.ceil]))
80 | #
81 | # target size = 30 whereas initial freq is 25 for pop 14
82 | smp.prop = hgate_sample(gate_vector, level = 14, size = 30, method = "prop")
83 | smp.10x  = hgate_sample(gate_vector, level = 14, size = 30, method = "10x")
84 | table(smp.prop)
85 | table(smp.10x)
86 | rbind(raw = table(gate_vector),
87 |       prop = table(gate_vector[smp.prop]),
88 |       `10x` = table(gate_vector[smp.10x]))
89 | # prop returns original data, because target size ids larger than initial freq
90 | # 10x  returns sampled data according to initial freq, such as the total amount
91 | # of other events equals 10x initial freq of pop 14
92 | }
93 | 


--------------------------------------------------------------------------------
/man/hypergate.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hypergate.R
 3 | \name{hypergate}
 4 | \alias{hypergate}
 5 | \title{hypergate}
 6 | \usage{
 7 | hypergate(xp, gate_vector, level, delta_add = 0, beta = 1, verbose = FALSE)
 8 | }
 9 | \arguments{
10 | \item{xp}{an Expression matrix}
11 | 
12 | \item{gate_vector}{A Categorical vector of length nrow(xp)}
13 | 
14 | \item{level}{A level of gate_vector so that gate_vector == level will produce a boolean vector identifying events of interest}
15 | 
16 | \item{delta_add}{If the increase in F after an optimization loop is lower than delta_add, the optimization will stop (may save computation time)}
17 | 
18 | \item{beta}{Purity / Yield trade-off}
19 | 
20 | \item{verbose}{Boolean. Whether to print information about the optimization status.}
21 | }
22 | \description{
23 | Finds a hyperrectangle gating around a population of interest
24 | }
25 | \examples{
26 | data(Samusik_01_subset)
27 | xp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels]
28 | gate_vector=Samusik_01_subset$labels
29 | hg=hypergate(xp=xp,gate_vector=gate_vector,level=23,delta_add=0.01)
30 | }
31 | \seealso{
32 | \code{\link{channels_contributions}} for ranking parameters within the output, \code{\link{reoptimize_strategy}} for reoptimizing a output on a subset of the markers, \code{\link{plot_gating_strategy}} for plotting an output, \code{\link{subset_matrix_hg}} to apply the output to another input matrix, \code{\link{boolmat}} to obtain a boolean matrix stating which events are filtered out because of which markers
33 | }
34 | 


--------------------------------------------------------------------------------
/man/plot_gating_strategy.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hypergate.R
 3 | \name{plot_gating_strategy}
 4 | \alias{plot_gating_strategy}
 5 | \title{plot_gating_strategy}
 6 | \usage{
 7 | plot_gating_strategy(
 8 |   gate,
 9 |   xp,
10 |   gate_vector,
11 |   level,
12 |   cex = 0.5,
13 |   highlight = "black",
14 |   path = "./",
15 |   ...
16 | )
17 | }
18 | \arguments{
19 | \item{gate}{A hypergate object (produced by hypergate())}
20 | 
21 | \item{xp}{The expression matrix from which the 'gate' parameter originates}
22 | 
23 | \item{gate_vector}{Categorical data from which the 'gate' parameter originates}
24 | 
25 | \item{level}{Level of gate_vector identifying the population of interest}
26 | 
27 | \item{cex}{size of dots}
28 | 
29 | \item{highlight}{color of the positive population when plotting}
30 | 
31 | \item{path}{Where png files will be produced}
32 | 
33 | \item{...}{passed to png}
34 | }
35 | \description{
36 | Plot a hypergate return
37 | }
38 | \examples{
39 | data(Samusik_01_subset)
40 | xp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels]
41 | gate_vector=Samusik_01_subset$labels
42 | hg=hypergate(xp=xp,gate_vector=gate_vector,level=23,delta_add=0.01)
43 | par(mfrow=c(1,ceiling(length(hg$active_channels)/2)))
44 | plot_gating_strategy(gate=hg,xp=xp,gate_vector=gate_vector,level=23,highlight="red")
45 | }
46 | 


--------------------------------------------------------------------------------
/man/polygon.clean.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hypergate.R
 3 | \name{polygon.clean}
 4 | \alias{polygon.clean}
 5 | \title{Remove self intersection in polygons}
 6 | \usage{
 7 | polygon.clean(poly)
 8 | }
 9 | \arguments{
10 | \item{poly}{a polygon (list with two components x and y which are equal-length numerical vectors)}
11 | }
12 | \value{
13 | A polygon without overlapping edges and new vertices corresponding to non-inner points of intersection
14 | }
15 | \description{
16 | Remove self intersection in polygons
17 | }
18 | 


--------------------------------------------------------------------------------
/man/reoptimize_strategy.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hypergate.R
 3 | \name{reoptimize_strategy}
 4 | \alias{reoptimize_strategy}
 5 | \title{reoptimize_strategy}
 6 | \usage{
 7 | reoptimize_strategy(
 8 |   gate,
 9 |   channels_subset,
10 |   xp,
11 |   gate_vector,
12 |   level,
13 |   beta = 1,
14 |   verbose = FALSE
15 | )
16 | }
17 | \arguments{
18 | \item{gate}{A return from hypergate}
19 | 
20 | \item{channels_subset}{Character vector identifying the channels that will be retained (others are ignored). The form is e.g. c("CD4_min","CD8_max")}
21 | 
22 | \item{xp}{Expression matrix as in the hypergate call}
23 | 
24 | \item{gate_vector}{Categorical vector as in the hypergate call}
25 | 
26 | \item{level}{Level of gate_vector identifying the population of interest}
27 | 
28 | \item{beta}{Yield / purity trade-off}
29 | 
30 | \item{verbose}{Whether to print information about optimization status}
31 | }
32 | \description{
33 | Optimize a gating strategy given a manual selection of channels
34 | }
35 | \examples{
36 | data(Samusik_01_subset)
37 | xp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels]
38 | gate_vector=Samusik_01_subset$labels
39 | hg=hypergate(xp=xp,gate_vector=gate_vector,level=23,delta_add=0)
40 | contribs=channels_contributions(gate=hg,xp=xp,gate_vector=gate_vector,level=23,beta=1)
41 | significant_channels=names(contribs)[contribs>=0.01]
42 | hg_reoptimized=reoptimize_strategy(gate=hg,channels_subset=significant_channels,xp,gate_vector,23)
43 | }
44 | 


--------------------------------------------------------------------------------
/man/subset_matrix_hg.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hypergate.R
 3 | \name{subset_matrix_hg}
 4 | \alias{subset_matrix_hg}
 5 | \title{subset_matrix_hg}
 6 | \usage{
 7 | subset_matrix_hg(gate, xp)
 8 | }
 9 | \arguments{
10 | \item{gate}{a return from hypergate}
11 | 
12 | \item{xp}{Expression matrix used for gate}
13 | }
14 | \description{
15 | Returns a boolean vector whose TRUE elements correspond to events inside the hyperrectangle
16 | }
17 | \examples{
18 | data(Samusik_01_subset)
19 | xp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels]
20 | gate_vector=Samusik_01_subset$labels
21 | hg=hypergate(xp=xp,gate_vector=gate_vector,level=23,delta_add=0.01)
22 | gating_state=subset_matrix_hg(hg,xp)
23 | gating_state=ifelse(gating_state,"Gated in","Gated out")
24 | target=ifelse(gate_vector==23,"Target events","Others")
25 | table(gating_state,target)
26 | }
27 | 


--------------------------------------------------------------------------------
/man/update_gate.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hypergate.R
 3 | \name{update_gate}
 4 | \alias{update_gate}
 5 | \title{Updates a gate vector}
 6 | \usage{
 7 | update_gate(xp, polygon, gate_vector = rep(0, nrow(xp)), value = 1)
 8 | }
 9 | \arguments{
10 | \item{xp}{A two colums matrix}
11 | 
12 | \item{polygon}{A list with two components x and y of equal lenghts and numeric values}
13 | 
14 | \item{gate_vector}{a vector of length nrow(xp) with integer values}
15 | 
16 | \item{value}{The number that will be assigned to gate_vector, corresponding to points that lie in the polygon}
17 | }
18 | \value{
19 | The updated gate_vector
20 | }
21 | \description{
22 | Updates a gate vector
23 | }
24 | 


--------------------------------------------------------------------------------
/vignettes/Hypergate.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Hypergate"
  3 | author: "Etienne Becht"
  4 | date: "`r Sys.Date()`"
  5 | output: rmarkdown::html_vignette
  6 | vignette: >
  7 |   %\VignetteIndexEntry{Hypergate}
  8 |   %\VignetteEngine{knitr::rmarkdown}
  9 |   %\VignetteEncoding{UTF-8}
 10 | ---
 11 | 
 12 | ```{r global_options, include=FALSE}
 13 | knitr::opts_chunk$set(fig.pos = 'H',tidy.opts=list(width.cutoff=60),tidy=TRUE,fig.path="fig/")
 14 | ```
 15 | 
 16 | ```{r, include=FALSE}
 17 | options("warn"=-1)
 18 | ```
 19 | 
 20 | This Vignette will walk you through the usage of the Hypergate R package.
 21 | 
 22 | ## Package installation
 23 | Installing dependencies:
 24 | ```{r, eval=FALSE}
 25 | install.packages(c("sp","polyclip","rgeos"))
 26 | source("https://bioconductor.org/biocLite.R")
 27 | biocLite("flowCore")
 28 | ```
 29 | Installing the package from github:
 30 | ```{r, eval=FALSE}
 31 | install.packages("devtools")
 32 | library(devtools)
 33 | install_github(repo="ebecht/hypergate")
 34 | ```
 35 | 
 36 | ```{r, echo = TRUE, message = FALSE}
 37 | library(hypergate)
 38 | ```
 39 | 
 40 | ## Data loading
 41 | 
 42 | ```{r, echo= TRUE}
 43 | data(Samusik_01_subset,package="hypergate")
 44 | ```
 45 | 
 46 | This loads 2000 datapoints randomly sampled from the *Samusik_01* dataset (available from FlowRepository accession number FR-FCM-ZZPH). This object is a list which includes as elements
 47 | 
 48 | 1. *fs_src* a flowSet with 1 flowFrame corresponding to the data subset
 49 | 
 50 | 2. *xp_src* a matrix corresponding to the expression of the data subset. Rownames correspond to event numbers in the unsampled dataset. Colnames correspond to protein targets (or other information e.g. events' manually-annotated labels)
 51 | 
 52 | 3. *labels* numeric vector encoding manually-annontated labels, with the value -1 for ungated events. The text labels for the gated populations are availble from FlowRepostiry
 53 | 
 54 | 4. *regular_channels* A subset of colnames(Samusik_01_subset$xp_src) that corresponds to protein targets
 55 | 
 56 | 5. *tsne* A 2D-tSNE ran on the whole dataset and subsampled to 2000 events
 57 | 
 58 | ## Specifying the cell subset of interest
 59 | 
 60 | Hypergate requires in particular as its arguments
 61 | 
 62 | 1. an expression matrix (which we have as *Samusik_01_subset$xp_src*)
 63 | 
 64 | 2. a vector specifying which events to attempt to gate on. This section discusses ways to achieve this point
 65 | 
 66 | #### Selection from low-dimensional plot
 67 | We included in the package a function with a rudimentary (hopefully sufficient) interface that allows for the selection of a cell subset of interest from a 2D biplot by drawing a polygon around it using the mouse. Since this function is interactive we cannot execute it in this Vignette but an example call would be as such (feel free to try it):
 68 | 
 69 | ```{r, eval = FALSE}
 70 | g=gate_from_biplot(
 71 | 	Samusik_01_subset$tsne,
 72 | 	"tSNE1",
 73 | 	"tSNE2"
 74 | )		
 75 | ```
 76 | 
 77 | For this tutorial we define manually the polygon instead
 78 | 
 79 | ```{r, echo=TRUE, fig.cap="Manual selection of a cluster on a 2D t-SNE"}
 80 | x=c(12.54,8.08,7.12,12.12,17.32,20.62,21.04,20.83,18.07,15.20)
 81 | y=c(-10.61,-14.76,-18.55,-20.33,-21.16,-19.74,-14.40,-11.08,-10.02,-9.42)
 82 | pol=list(x=x,y=y)
 83 | library("sp")
 84 | gate_vector=sp::point.in.polygon(Samusik_01_subset$tsne[,1],Samusik_01_subset$tsne[,2],pol$x,pol$y)
 85 | plot(Samusik_01_subset$tsne,pch=16,cex=0.5,col=ifelse(gate_vector==1,"firebrick3","lightsteelblue"))
 86 | polygon(pol,lty=2)
 87 | ```
 88 | 
 89 | #### Clustering
 90 | 
 91 | Another option to define a cell cluster of interest is to use the output of a clustering algorithm. Popular options for cytometry include *FlowSOM* (available from Bioconductor) or *Phenograph* (available from the ```cytofkit``` package from Bioconductor). An example call for Rphenograph is below:
 92 | 
 93 | ```{r, eval=FALSE}
 94 | require(Rphenograph)
 95 | set.seed(5881215)
 96 | clustering=Rphenograph(Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels])
 97 | cluster_labels=membership(clustering[[2]])
 98 | ```
 99 | In this Vignette we use the simpler kmeans option instead:
100 | 
101 | ```{r}
102 | set.seed(5881215)
103 | cluster_labels=kmeans(Samusik_01_subset$tsne,20,nstart=100)$cluster
104 | ```
105 | 
106 | In this example we can see that the kmeans cluster *20* corresponds to the population we manually selected from the t-SNE biplot
107 | ```{r, fig.cap="Selection of a cluster from a clustering algorithm output"}
108 | plot(Samusik_01_subset$tsne,col=ifelse(cluster_labels==20,"firebrick3","lightsteelblue"),pch=16,cex=0.5)
109 | ```
110 | 
111 | ## Running Hypergate
112 | 
113 | The function to optimize gating strategies is ```hypergate```. Its main arguments are ```xp``` (a numeric matrix encoding expression), ```gate_vector``` (a vector with few unique values), ```level``` (specificies what value of gate_vector to gate upon, i.e. events satisfying ```gate_vector==level``` will be gated in)
114 | 
115 | ```{r}
116 | hg_output=hypergate(
117 | 	xp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels],
118 | 	gate_vector=gate_vector,
119 | 	level=1,
120 | 	verbose=FALSE
121 | )
122 | ```
123 | 
124 | ## Interpreting and polishing the results
125 | 
126 | ### Gating datapoints
127 | 
128 | The following function allows to subset an expression matrix given a return from *Hypergate*. The new matrix needs to have the same column names as the original matrix.
129 | 
130 | ```{r}
131 | gating_predicted=subset_matrix_hg(hg_output,Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels])
132 | ```
133 | 
134 | ```{r, eval=FALSE}
135 | table(ifelse(gating_predicted,"Gated-in","Gated-out"),ifelse(gate_vector==1,"Events of interest","Others"))
136 | ```
137 | 
138 | ```{r, echo=FALSE}
139 | knitr::kable(table(ifelse(gating_predicted,"Gated-in","Gated-out"),ifelse(gate_vector==1,"Events of interest","Others")))
140 | ```
141 | 
142 | Another option, which offers more low-level control, is to examine for each datapoint whether they pass the threshold for each parameter. The function to obtain such a boolean matrix is ```boolmat```. Here our gating strategy specifies *SiglecF+cKit-Ly6C-*. We would thus obtain a 3-columns x 2000 (the number of events) rows
143 | 
144 | ```{r}
145 | bm=boolmat(
146 | 	gate=hg_output,
147 | 	xp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels]
148 | )
149 | head(bm)
150 | ```
151 | 
152 | ```{r, echo=FALSE}
153 | knitr::kable(table(ifelse(bm[,"SiglecF_min"],"SiglecF above threshold","Gated-out because of SiglecF"),ifelse(gate_vector==1,"Events of interest","Others")))
154 | ```
155 | 
156 | ### Examining the output
157 | 
158 | The following function will plot the output of Hypergate. Arguments are
159 | 
160 | 1. ```gate``` an object returned by Hypergate
161 | 
162 | 2. ```xp``` an expression matrix whose columns are named similarly as the ones used to create the ```gate``` object
163 | 
164 | 3. ```gate_vector``` and ```level``` to specify which events are "of interest"
165 | 
166 | 4. ```highlight``` a color that will be used to highlight the events of interest 
167 | 
168 | ```{r,echo=TRUE,fig.width=3.5,fig.height=3.5,fig.cap="Gating strategy"}
169 | plot_gating_strategy(
170 | 	gate=hg_output,
171 | 	xp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels],
172 | 	gate_vector=gate_vector,
173 | 	level=1,
174 | 	highlight="firebrick3"
175 | )
176 | ```
177 | 
178 | Another important point to consider is how the F$\beta$-score increases with each added channel. This gives an idea of how many channels are required to reach a close-to-optimal gating strategy.
179 | 
180 | This will identify at which steps the parameters were first activated and optimized:
181 | ```{r,fig.cap="F1-score obtained during optimization when adding parameters"}
182 | f_values_vs_number_of_parameters=c(
183 | 	F_beta(rep(TRUE,nrow(Samusik_01_subset$xp_src)),gate_vector==1),
184 | 	hg_output$f[c(apply(hg_output$pars.history[,hg_output$active_channels],2,function(x)min(which(x!=x[1])))-1,nrow(hg_output$pars.history))][-1]
185 | )
186 | barplot(rev(f_values_vs_number_of_parameters),names.arg=rev(c("Initialization",paste("+ ",sep="",hg_output$active_channels))),las=3,mar=c(10,4,1,1),horiz=TRUE,xlab="Cumulative F1-score")
187 | ```
188 | 
189 | This graph tells us that the biggest increase is by far due to SiglecF+, while the lowest is due to Ly6C-.
190 | 
191 | ### Channels contributions
192 | 
193 | The previous graph only shows how the F-value evolved during optimization, but what we really want to know is how much each parameter contributes to the final output (sometimes a parameter will have a big impact at the early steps of the optimization but will become relatively unimportant towards the end, if multiple other parameters collectively account for most of its discriminatory power). We use the following function to assess this, which measures how much performances lower when a parameter is ignored. The more the performances lower, the more important the parameter is.
194 | 
195 | ```{r, fig.cap="Contribution of each parameter to the output"}
196 | contributions=channels_contributions(
197 | 	gate=hg_output,
198 | 	xp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels],
199 | 	gate_vector=gate_vector,
200 | 	level=1,
201 | 	beta=1
202 | )		
203 | barplot(contributions,las=3,mar=c(10,4,1,1),horiz=TRUE,xlab="F1-score deterioration when the parameter is ignored")
204 | ```
205 | 
206 | ### Reoptimize strategy
207 | 
208 | Since Ly6C contributes very little, we may want to ignore it to obtain a shorter gating strategy. We could keep the current threshold values for the other parameters, but it is best to re-compute the other thresholds to account for the loss of some parameters.
209 | To do that we use the following function:
210 | 
211 | ```{r}
212 | hg_output_polished=reoptimize_strategy(
213 | 	gate=hg_output,
214 | 	channels_subset=c("SiglecF_min","cKit_max"),
215 | 	xp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels],
216 | 	gate_vector=gate_vector,
217 | 	level=1
218 | )
219 | ```
220 | 
221 | Finally, we get to plot our final strategy:
222 | 
223 | ```{r,echo=TRUE,fig.width=3.5,fig.height=3.5, fig.cap="Final output"}
224 | plot_gating_strategy(
225 | 	gate=hg_output_polished,
226 | 	xp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels],
227 | 	gate_vector=gate_vector,
228 | 	level=1,
229 | 	highlight="firebrick3"
230 | )
231 | ```
232 | 
233 | ### Human-readable output
234 | 
235 | Thanks to a nice contribution for SamGG on github, there are three functions that make the outputs more readable:
236 | 
237 | ```{r}
238 | hgate_pheno(hg_output)
239 | hgate_rule(hg_output)
240 | hgate_info(hg_output)
241 | # Fscores can be retrieved when the same parameters given to hypergate() are given to hgate_info():
242 | hg_out_info = hgate_info(hg_output,
243 | 	xp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels],
244 | 	gate_vector=gate_vector,
245 | 	level=1)
246 | hg_out_info
247 | # and formatted readily
248 | paste0(hg_out_info[,"Fscore"], collapse = ", ")
249 | ```
250 | 
251 | ## Final notes
252 | 
253 | Some comments about potential questions on your own projects (raised by QBarbier):
254 | 
255 | ### Which channels to use as input?
256 | Anything that would be relevant for a gating strategy should be used as an input. So usually any phenotypic channel would be included. If you know that you would not use certain parameters on subsequent experiments (for instance if the staining is intracellular and you plan to sort a live population and thus cannot permeabilize your cells), you should exclude the corresponding channels. I usually do not use channels that were used in pre-gating steps (e.g. CD45 for immune cells). Finally, if you plan to use flow cytometry and use hypergate on a CyTOF dataset, you probably want to discard the Cell_length channel.
257 | 
258 | ### How big can the input matrix be?
259 | It depends on how much RAM your computer has. If that is an issue I suggest downsampling to (e.g.) 1000 positive cells and a corresponding number of negative cells. The `hgate_sample` function can help you achieve this:
260 | 
261 | ```{r}
262 | set.seed(123) ## Makes the subsampling reproducible
263 | gate_vector=Samusik_01_subset$labels
264 | subsample=hgate_sample(gate_vector=gate_vector,level=5,size=100) ## Subsample 100 events from population #5 (Classical monocytes), and a corresponding number of negative events
265 | tab=table(ifelse(subsample,"In","Out"),ifelse(Samusik_01_subset$labels==5,"Positive pop.","Negative pop."))
266 | tab[1,]/colSums(tab) ## Fraction of subsampled events for positive and negative populations
267 | xp=Samusik_01_subset$xp_src[,Samusik_01_subset$regular_channels]
268 | hg=hypergate(xp=xp[subsample,],gate_vector=gate_vector[subsample],level=5) ## Runs hypergate on a subsample of the input matrix
269 | gating_heldout=subset_matrix_hg(hg,xp[!subsample,]) ## Applies the gate to the held-out data
270 | table(ifelse(gating_heldout,"Gated in","Gated out"),ifelse(Samusik_01_subset$labels[!subsample]==5,"Positive pop.","Negative pop."))
271 | ```
272 | 


--------------------------------------------------------------------------------
/vignettes/unnamed-chunk-10-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebecht/hypergate/ab1283ab1369cad063899ffebc9e67c17654c4ed/vignettes/unnamed-chunk-10-1.png


--------------------------------------------------------------------------------
/vignettes/unnamed-chunk-17-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebecht/hypergate/ab1283ab1369cad063899ffebc9e67c17654c4ed/vignettes/unnamed-chunk-17-1.png


--------------------------------------------------------------------------------
/vignettes/unnamed-chunk-17-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebecht/hypergate/ab1283ab1369cad063899ffebc9e67c17654c4ed/vignettes/unnamed-chunk-17-2.png


--------------------------------------------------------------------------------
/vignettes/unnamed-chunk-18-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebecht/hypergate/ab1283ab1369cad063899ffebc9e67c17654c4ed/vignettes/unnamed-chunk-18-1.png


--------------------------------------------------------------------------------
/vignettes/unnamed-chunk-19-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebecht/hypergate/ab1283ab1369cad063899ffebc9e67c17654c4ed/vignettes/unnamed-chunk-19-1.png


--------------------------------------------------------------------------------
/vignettes/unnamed-chunk-21-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebecht/hypergate/ab1283ab1369cad063899ffebc9e67c17654c4ed/vignettes/unnamed-chunk-21-1.png


--------------------------------------------------------------------------------
/vignettes/unnamed-chunk-7-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebecht/hypergate/ab1283ab1369cad063899ffebc9e67c17654c4ed/vignettes/unnamed-chunk-7-1.png


--------------------------------------------------------------------------------