├── README.md
├── RaceID2_StemID_class.R
├── RaceID2_StemID_sample.R
├── Reference_manual_RaceID2_StemID.pdf
└── transcript_counts_intestine_5days_YFP.xls


/README.md:
--------------------------------------------------------------------------------
 1 | StemID and RaceID2 algorithms
 2 | 
 3 | RaceID2 is an advanced version of RaceID, an algorithm for the identification of rare and abundant cell types from single cell transcriptome data. The method is based on transcript counts obtained with unique molecular identifies.
 4 | 
 5 | StemID is an algorithm for the derivation of cell lineage trees based on RaceID2 results and predicts multipotent cell identites.
 6 | 
 7 | RaceID2 and StemID are written in the R computing language.
 8 | 
 9 | The following files are provided:
10 | 
11 | StemID/RaceID2 class definition: RaceID2_StemID_class.R 
12 | StemID/RaceID2 sample code: RaceID2_StemID_sample.R
13 | StemID/RaceID2 reference manual: Reference_manual_RaceID2_StemID.pdf
14 | StemID/RaceID2 sample data: transcript_counts_intestine_5days_YFP.xls
15 | 
16 | 


--------------------------------------------------------------------------------
/RaceID2_StemID_class.R:
--------------------------------------------------------------------------------
   1 | ## load required packages.
   2 | require(tsne)
   3 | require(pheatmap)
   4 | require(MASS)
   5 | require(cluster)
   6 | require(mclust)
   7 | require(flexmix)
   8 | require(lattice)
   9 | require(fpc)
  10 | require(amap)
  11 | require(RColorBrewer)
  12 | require(locfit)
  13 | require(vegan)
  14 | 
  15 | ## class definition
  16 | SCseq <- setClass("SCseq", slots = c(expdata = "data.frame", ndata = "data.frame", fdata = "data.frame", distances = "matrix", tsne = "data.frame", cluster = "list", background = "list", out = "list", cpart = "vector", fcol = "vector", filterpar = "list", clusterpar = "list", outlierpar ="list" ))
  17 | 
  18 | setValidity("SCseq",
  19 |             function(object) {
  20 |               msg <- NULL
  21 |               if ( ! is.data.frame(object@expdata) ){
  22 |                 msg <- c(msg, "input data must be data.frame")
  23 |               }else if ( nrow(object@expdata) < 2 ){
  24 |                 msg <- c(msg, "input data must have more than one row")
  25 |               }else if ( ncol(object@expdata) < 2 ){
  26 |                 msg <- c(msg, "input data must have more than one column")
  27 |               }else if (sum( apply( is.na(object@expdata),1,sum ) ) > 0 ){
  28 |                 msg <- c(msg, "NAs are not allowed in input data")
  29 |               }else if (sum( apply( object@expdata,1,min ) ) < 0 ){
  30 |                 msg <- c(msg, "negative values are not allowed in input data")
  31 |               }
  32 |               if (is.null(msg)) TRUE
  33 |               else msg
  34 |             }
  35 |             )
  36 | 
  37 | setMethod("initialize",
  38 |           signature = "SCseq",
  39 |           definition = function(.Object, expdata ){
  40 |             .Object@expdata <- expdata
  41 |             .Object@ndata <- expdata
  42 |             .Object@fdata <- expdata
  43 |             validObject(.Object)
  44 |             return(.Object)
  45 |           }
  46 |           )
  47 | 
  48 | setGeneric("filterdata", function(object, mintotal=3000, minexpr=5, minnumber=1, maxexpr=Inf, downsample=TRUE, dsn=1, rseed=17000) standardGeneric("filterdata"))
  49 | 
  50 | setMethod("filterdata",
  51 |           signature = "SCseq",
  52 |           definition = function(object,mintotal,minexpr,minnumber,maxexpr,downsample,dsn,rseed) {
  53 |             if ( ! is.numeric(mintotal) ) stop( "mintotal has to be a positive number" ) else if ( mintotal <= 0 ) stop( "mintotal has to be a positive number" )
  54 |             if ( ! is.numeric(minexpr) ) stop( "minexpr has to be a non-negative number" ) else if ( minexpr < 0 ) stop( "minexpr has to be a non-negative number" )
  55 |             if ( ! is.numeric(minnumber) ) stop( "minnumber has to be a non-negative integer number" ) else if ( round(minnumber) != minnumber | minnumber < 0 ) stop( "minnumber has to be a non-negative integer number" )
  56 |             if ( ! ( is.numeric(downsample) | is.logical(downsample) ) ) stop( "downsample has to be logical (TRUE/FALSE)" )
  57 |             if ( ! is.numeric(dsn) ) stop( "dsn has to be a positive integer number" ) else if ( round(dsn) != dsn | dsn <= 0 ) stop( "dsn has to be a positive integer number" )
  58 |             object@filterpar <- list(mintotal=mintotal, minexpr=minexpr, minnumber=minnumber, maxexpr=maxexpr, downsample=downsample, dsn=dsn)
  59 |             object@ndata <- object@expdata[,apply(object@expdata,2,sum,na.rm=TRUE) >= mintotal]
  60 |             if ( downsample ){
  61 |               set.seed(rseed)
  62 |               object@ndata <- downsample(object@expdata,n=mintotal,dsn=dsn)
  63 |             }else{
  64 |               x <- object@ndata
  65 |               object@ndata <- as.data.frame( t(t(x)/apply(x,2,sum))*median(apply(x,2,sum,na.rm=TRUE)) + .1 )
  66 |             }
  67 |             x <- object@ndata
  68 |             object@fdata <- x[apply(x>=minexpr,1,sum,na.rm=TRUE) >= minnumber,]
  69 |             x <- object@fdata
  70 |             object@fdata <- x[apply(x,1,max,na.rm=TRUE) < maxexpr,]
  71 |             return(object)
  72 |           }
  73 |           )
  74 | 
  75 | downsample <- function(x,n,dsn){
  76 |   x <- round( x[,apply(x,2,sum,na.rm=TRUE) >= n], 0)
  77 |   nn <- min( apply(x,2,sum) )
  78 |   for ( j in 1:dsn ){
  79 |     z  <- data.frame(GENEID=rownames(x))
  80 |     rownames(z) <- rownames(x)
  81 |     initv <- rep(0,nrow(z))
  82 |     for ( i in 1:dim(x)[2] ){
  83 |       y <- aggregate(rep(1,nn),list(sample(rep(rownames(x),x[,i]),nn)),sum)
  84 |       na <- names(x)[i]
  85 |       names(y) <- c("GENEID",na)
  86 |       rownames(y) <- y$GENEID
  87 |       z[,na] <- initv
  88 |       k <- intersect(rownames(z),y$GENEID)
  89 |       z[k,na] <- y[k,na]
  90 |       z[is.na(z[,na]),na] <- 0
  91 |     }
  92 |     rownames(z) <- as.vector(z$GENEID)
  93 |     ds <- if ( j == 1 ) z[,-1] else ds + z[,-1]
  94 |   }
  95 |   ds <- ds/dsn + .1
  96 |   return(ds)
  97 | }
  98 | 
  99 | dist.gen <- function(x,method="euclidean", ...) if ( method %in% c("spearman","pearson","kendall") ) as.dist( 1 - cor(t(x),method=method,...) ) else dist(x,method=method,...)
 100 | 
 101 | dist.gen.pairs <- function(x,y,...) dist.gen(t(cbind(x,y)),...)
 102 | 
 103 | binompval <- function(p,N,n){
 104 |   pval   <- pbinom(n,round(N,0),p,lower.tail=TRUE)
 105 |   pval[!is.na(pval) & pval > 0.5] <- 1-pval[!is.na(pval) & pval > 0.5]
 106 |   return(pval)
 107 | }
 108 | 
 109 | setGeneric("plotgap", function(object) standardGeneric("plotgap"))
 110 | 
 111 | setMethod("plotgap",
 112 |           signature = "SCseq",
 113 |           definition = function(object){
 114 |             if ( length(object@cluster$kpart) == 0 ) stop("run clustexp before plotgap")
 115 |             if ( sum(is.na(object@cluster$gap$Tab[,3])) > 0 ) stop("run clustexp with do.gap = TRUE first")
 116 |             plot(object@cluster$gap,ylim=c( min(object@cluster$gap$Tab[,3] - object@cluster$gap$Tab[,4]),  max(object@cluster$gap$Tab[,3] + object@cluster$gap$Tab[,4])))
 117 |           }
 118 |           )
 119 | 
 120 | setGeneric("plotjaccard", function(object) standardGeneric("plotjaccard"))
 121 | 
 122 | setMethod("plotjaccard",
 123 |           signature = "SCseq",
 124 |           definition = function(object){
 125 |             if ( length(object@cluster$kpart) == 0 ) stop("run clustexp before plotjaccard")
 126 |             if ( length(unique(object@cluster$kpart)) < 2 ) stop("only a single cluster: no Jaccard's similarity plot")
 127 |             barplot(object@cluster$jaccard,names.arg=1:length(object@cluster$jaccard),ylab="Jaccard's similarity")
 128 |           }
 129 |           )
 130 | 
 131 | setGeneric("plotoutlierprobs", function(object) standardGeneric("plotoutlierprobs"))
 132 | 
 133 | setMethod("plotoutlierprobs",
 134 |           signature = "SCseq",
 135 |           definition = function(object){
 136 |             if ( length(object@cpart) == 0 ) stop("run findoutliers before plotoutlierprobs")
 137 |             p <- object@cluster$kpart[ order(object@cluster$kpart,decreasing=FALSE)]
 138 |             x <- object@out$cprobs[names(p)]
 139 |             fcol <- object@fcol
 140 |             for ( i in 1:max(p) ){
 141 |               y <- -log10(x + 2.2e-16)
 142 |               y[p != i] <- 0
 143 |               if ( i == 1 ) b <- barplot(y,ylim=c(0,max(-log10(x + 2.2e-16))*1.1),col=fcol[i],border=fcol[i],names.arg=FALSE,ylab="-log10prob") else barplot(y,add=TRUE,col=fcol[i],border=fcol[i],names.arg=FALSE,axes=FALSE)
 144 |   }
 145 |             abline(-log10(object@outlierpar$probthr),0,col="black",lty=2)
 146 |             d <- b[2,1] - b[1,1]
 147 |             y <- 0
 148 |             for ( i in 1:max(p) ) y <- append(y,b[sum(p <=i),1] + d/2)
 149 |             axis(1,at=(y[1:(length(y)-1)] + y[-1])/2,lab=1:max(p))
 150 |             box()
 151 |           }
 152 |           )
 153 | 
 154 | setGeneric("plotbackground", function(object) standardGeneric("plotbackground"))
 155 | 
 156 | setMethod("plotbackground",
 157 |           signature = "SCseq",
 158 |           definition = function(object){
 159 |             if ( length(object@cpart) == 0 ) stop("run findoutliers before plotbackground")
 160 |             m <- apply(object@fdata,1,mean)
 161 |             v <- apply(object@fdata,1,var)
 162 |             fit <- locfit(v~lp(m,nn=.7),family="gamma",maxk=500)
 163 |             plot(log2(m),log2(v),pch=20,xlab="log2mean",ylab="log2var",col="grey")
 164 |             lines(log2(m[order(m)]),log2(object@background$lvar(m[order(m)],object)),col="red",lwd=2)
 165 |             lines(log2(m[order(m)]),log2(fitted(fit)[order(m)]),col="orange",lwd=2,lty=2)
 166 |             legend("topleft",legend=substitute(paste("y = ",a,"*x^2 + ",b,"*x + ",d,sep=""),list(a=round(coef(object@background$vfit)[3],2),b=round(coef(object@background$vfit)[2],2),d=round(coef(object@background$vfit)[1],2))),bty="n")
 167 |           }
 168 |           )
 169 | 
 170 | setGeneric("plotsensitivity", function(object) standardGeneric("plotsensitivity"))
 171 | 
 172 | setMethod("plotsensitivity",
 173 |           signature = "SCseq",
 174 |           definition = function(object){
 175 |             if ( length(object@cpart) == 0 ) stop("run findoutliers before plotsensitivity")
 176 |             plot(log10(object@out$thr), object@out$stest, type="l",xlab="log10 Probability cutoff", ylab="Number of outliers")
 177 |             abline(v=log10(object@outlierpar$probthr),col="red",lty=2)
 178 |           }
 179 |           )
 180 | 
 181 | setGeneric("diffgenes", function(object,cl1,cl2,mincount=5) standardGeneric("diffgenes"))
 182 | 
 183 | setMethod("diffgenes",
 184 |           signature = "SCseq",
 185 |           definition = function(object,cl1,cl2,mincount){
 186 |             part <- object@cpart
 187 |             cl1 <- c(cl1)
 188 |             cl2 <- c(cl2)
 189 |             if ( length(part) == 0 ) stop("run findoutliers before diffgenes")
 190 |             if ( ! is.numeric(mincount) ) stop("mincount has to be a non-negative number") else if (  mincount < 0 ) stop("mincount has to be a non-negative number")
 191 |             if ( length(intersect(cl1, part)) < length(unique(cl1)) ) stop( paste("cl1 has to be a subset of ",paste(sort(unique(part)),collapse=","),"\n",sep="") )
 192 |             if ( length(intersect(cl2, part)) < length(unique(cl2)) ) stop( paste("cl2 has to be a subset of ",paste(sort(unique(part)),collapse=","),"\n",sep="") )
 193 |             f <- apply(object@ndata[,part %in% c(cl1,cl2)],1,max) > mincount
 194 |             x <- object@ndata[f,part %in% cl1]
 195 |             y <- object@ndata[f,part %in% cl2]
 196 |             if ( sum(part %in% cl1) == 1 ) m1 <- x else m1 <- apply(x,1,mean)
 197 |             if ( sum(part %in% cl2) == 1 ) m2 <- y else m2 <- apply(y,1,mean)
 198 |             if ( sum(part %in% cl1) == 1 ) s1 <- sqrt(x) else s1 <- sqrt(apply(x,1,var))
 199 |             if ( sum(part %in% cl2) == 1 ) s2 <- sqrt(y) else s2 <- sqrt(apply(y,1,var))
 200 |             
 201 |             d <- ( m1 - m2 )/ apply( cbind( s1, s2 ),1,mean )
 202 |             names(d) <- rownames(object@ndata)[f]
 203 |             if ( sum(part %in% cl1) == 1 ){
 204 |               names(x) <- names(d)
 205 |               x <- x[order(d,decreasing=TRUE)]
 206 |             }else{
 207 |               x <- x[order(d,decreasing=TRUE),]
 208 |             }
 209 |             if ( sum(part %in% cl2) == 1 ){
 210 |               names(y) <- names(d)
 211 |               y <- y[order(d,decreasing=TRUE)]
 212 |             }else{
 213 |               y <- y[order(d,decreasing=TRUE),]
 214 |             }
 215 |             return(list(z=d[order(d,decreasing=TRUE)],cl1=x,cl2=y,cl1n=cl1,cl2n=cl2))
 216 |           }
 217 |           )
 218 | 
 219 | plotdiffgenes <- function(z,gene=g){
 220 |   if ( ! is.list(z) ) stop("first arguments needs to be output of function diffgenes")
 221 |   if ( length(z) < 5 ) stop("first arguments needs to be output of function diffgenes")
 222 |   if ( sum(names(z) == c("z","cl1","cl2","cl1n","cl2n")) < 5 ) stop("first arguments needs to be output of function diffgenes")
 223 |   if ( length(gene) > 1 ) stop("only single value allowed for argument gene")
 224 |   if ( !is.numeric(gene) & !(gene %in% names(z$z)) ) stop("argument gene needs to be within rownames of first argument or a positive integer number")
 225 |   if ( is.numeric(gene) ){ if ( gene < 0 | round(gene) != gene ) stop("argument gene needs to be within rownames of first argument or a positive integer number") }
 226 |   x <- if ( is.null(dim(z$cl1)) ) z$cl1[gene] else t(z$cl1[gene,])
 227 |   y <- if ( is.null(dim(z$cl2)) ) z$cl2[gene] else t(z$cl2[gene,])
 228 |   plot(1:length(c(x,y)),c(x,y),ylim=c(0,max(c(x,y))),xlab="",ylab="Expression",main=gene,cex=0,axes=FALSE)
 229 |   axis(2)
 230 |   box()
 231 |   u <- 1:length(x)
 232 |   rect(u - .5,0,u + .5,x,col="red")
 233 |   v <- c(min(u) - .5,max(u) + .5)
 234 |   axis(1,at=mean(v),lab=paste(z$cl1n,collapse=","))
 235 |   lines(v,rep(mean(x),length(v)))
 236 |   lines(v,rep(mean(x)-sqrt(var(x)),length(v)),lty=2)
 237 |   lines(v,rep(mean(x)+sqrt(var(x)),length(v)),lty=2)
 238 |   
 239 |   u <- ( length(x) + 1 ):length(c(x,y))
 240 |   v <- c(min(u) - .5,max(u) + .5)
 241 |   rect(u - .5,0,u + .5,y,col="blue")
 242 |   axis(1,at=mean(v),lab=paste(z$cl2n,collapse=","))
 243 |   lines(v,rep(mean(y),length(v)))
 244 |   lines(v,rep(mean(y)-sqrt(var(y)),length(v)),lty=2)
 245 |   lines(v,rep(mean(y)+sqrt(var(y)),length(v)),lty=2)
 246 |   abline(v=length(x) + .5)
 247 | }
 248 | 
 249 | setGeneric("plottsne", function(object,final=TRUE) standardGeneric("plottsne"))
 250 | 
 251 | setMethod("plottsne",
 252 |           signature = "SCseq",
 253 |           definition = function(object,final){
 254 |             if ( length(object@tsne) == 0 ) stop("run comptsne before plottsne")
 255 |             if ( final & length(object@cpart) == 0 ) stop("run findoutliers before plottsne")
 256 |             if ( !final & length(object@cluster$kpart) == 0 ) stop("run clustexp before plottsne")
 257 |             part <- if ( final ) object@cpart else object@cluster$kpart
 258 |             plot(object@tsne,xlab="Dim 1",ylab="Dim 2",pch=20,cex=1.5,col="lightgrey")
 259 |             for ( i in 1:max(part) ){
 260 |               if ( sum(part == i) > 0 ) text(object@tsne[part == i,1],object@tsne[part == i,2],i,col=object@fcol[i],cex=.75,font=4)
 261 |             }
 262 |           }
 263 |           )
 264 | 
 265 | setGeneric("plotlabelstsne", function(object,labels=NULL) standardGeneric("plotlabelstsne"))
 266 | 
 267 | setMethod("plotlabelstsne",
 268 |           signature = "SCseq",
 269 |           definition = function(object,labels){
 270 |             if ( is.null(labels ) ) labels <- names(object@ndata)
 271 |             if ( length(object@tsne) == 0 ) stop("run comptsne before plotlabelstsne")
 272 |             plot(object@tsne,xlab="Dim 1",ylab="Dim 2",pch=20,cex=1.5,col="lightgrey")
 273 |             text(object@tsne[,1],object@tsne[,2],labels,cex=.5)
 274 |           }
 275 |           )
 276 | 
 277 | setGeneric("plotsymbolstsne", function(object,types=NULL) standardGeneric("plotsymbolstsne"))
 278 | 
 279 | setMethod("plotsymbolstsne",
 280 |           signature = "SCseq",
 281 |           definition = function(object,types){
 282 |             if ( is.null(types) ) types <- names(object@fdata)
 283 |             if ( length(object@tsne) == 0 ) stop("run comptsne before plotsymbolstsne")
 284 |             if ( length(types) != ncol(object@fdata) ) stop("types argument has wrong length. Length has to equal to the column number of object@ndata")
 285 |             coloc <- rainbow(length(unique(types)))
 286 |             syms <- c()
 287 |             plot(object@tsne,xlab="Dim 1",ylab="Dim 2",pch=20,col="grey")
 288 |             for ( i in 1:length(unique(types)) ){
 289 |               f <- types == sort(unique(types))[i]
 290 |               syms <- append( syms, ( (i-1) %% 25 ) + 1 )
 291 |               points(object@tsne[f,1],object@tsne[f,2],col=coloc[i],pch=( (i-1) %% 25 ) + 1,cex=1)
 292 |             }
 293 |             legend("topleft", legend=sort(unique(types)), col=coloc, pch=syms)
 294 |           }
 295 |           )
 296 | 
 297 | setGeneric("plotexptsne", function(object,g,n="",logsc=FALSE) standardGeneric("plotexptsne"))
 298 | 
 299 | setMethod("plotexptsne",
 300 |           signature = "SCseq",
 301 |           definition = function(object,g,n="",logsc=FALSE){
 302 |             if ( length(object@tsne) == 0 ) stop("run comptsne before plottsne")
 303 |             if ( length(intersect(g,rownames(object@ndata))) < length(unique(g)) ) stop("second argument does not correspond to set of rownames slot ndata of SCseq object")
 304 |             if ( !is.numeric(logsc) & !is.logical(logsc) ) stop("argument logsc has to be logical (TRUE/FALSE)")
 305 |             if ( n == "" ) n <- g[1]
 306 |             l <- apply(object@ndata[g,] - .1,2,sum) + .1
 307 |             if (logsc) {
 308 |               f <- l == 0
 309 |               l <- log2(l)
 310 |               l[f] <- NA
 311 |             }
 312 |             mi <- min(l,na.rm=TRUE)
 313 |             ma <- max(l,na.rm=TRUE)
 314 |             ColorRamp <- colorRampPalette(rev(brewer.pal(n = 7,name = "RdYlBu")))(100)
 315 |             ColorLevels <- seq(mi, ma, length=length(ColorRamp))
 316 |             v <- round((l - mi)/(ma - mi)*99 + 1,0)
 317 |             layout(matrix(data=c(1,3,2,4), nrow=2, ncol=2), widths=c(5,1,5,1), heights=c(5,1,1,1))
 318 |             par(mar = c(3,5,2.5,2))
 319 |             plot(object@tsne,xlab="Dim 1",ylab="Dim 2",main=n,pch=20,cex=0,col="grey")
 320 |             for ( k in 1:length(v) ){
 321 |               points(object@tsne[k,1],object@tsne[k,2],col=ColorRamp[v[k]],pch=20,cex=1.5)
 322 |             }
 323 |             par(mar = c(3,2.5,2.5,2))
 324 |             image(1, ColorLevels,
 325 |                   matrix(data=ColorLevels, ncol=length(ColorLevels),nrow=1),
 326 |                   col=ColorRamp,
 327 |                   xlab="",ylab="",
 328 |                   xaxt="n")
 329 |             layout(1)
 330 |           }
 331 |           )
 332 | 
 333 | 
 334 | plot.err.bars.y <- function(x, y, y.err, col="black", lwd=1, lty=1, h=0.1){
 335 |   arrows(x,y-y.err,x,y+y.err,code=0, col=col, lwd=lwd, lty=lty)
 336 |   arrows(x-h,y-y.err,x+h,y-y.err,code=0, col=col, lwd=lwd, lty=lty)
 337 |   arrows(x-h,y+y.err,x+h,y+y.err,code=0, col=col, lwd=lwd, lty=lty)
 338 | }
 339 | 
 340 | clusGapExt <-function (x, FUNcluster, K.max, B = 100, verbose = interactive(), method="euclidean",random=TRUE,
 341 |     ...) 
 342 | {
 343 |      stopifnot(is.function(FUNcluster), length(dim(x)) == 2, K.max >= 
 344 |         2, (n <- nrow(x)) >= 1, (p <- ncol(x)) >= 1)
 345 |     if (B != (B. <- as.integer(B)) || (B <- B.) <= 0) 
 346 |         stop("'B' has to be a positive integer")
 347 |     if (is.data.frame(x)) 
 348 |         x <- as.matrix(x)
 349 |     ii <- seq_len(n)
 350 |     W.k <- function(X, kk) {
 351 |         clus <- if (kk > 1) 
 352 |             FUNcluster(X, kk, ...)$cluster
 353 |         else rep.int(1L, nrow(X))
 354 |         0.5 * sum(vapply(split(ii, clus), function(I) {
 355 |             xs <- X[I, , drop = FALSE]
 356 |             sum(dist.gen(xs,method=method)/nrow(xs))
 357 |         }, 0))
 358 |     }
 359 |     logW <- E.logW <- SE.sim <- numeric(K.max)
 360 |     if (verbose) 
 361 |         cat("Clustering k = 1,2,..., K.max (= ", K.max, "): .. ", 
 362 |             sep = "")
 363 |     for (k in 1:K.max) logW[k] <- log(W.k(x, k))
 364 |     if (verbose) 
 365 |         cat("done\n")
 366 |     xs <- scale(x, center = TRUE, scale = FALSE)
 367 |     m.x <- rep(attr(xs, "scaled:center"), each = n)
 368 |     V.sx <- svd(xs)$v
 369 |     rng.x1 <- apply(xs %*% V.sx, 2, range)
 370 |     logWks <- matrix(0, B, K.max)
 371 |      if (random){
 372 |        if (verbose) 
 373 |          cat("Bootstrapping, b = 1,2,..., B (= ", B, ")  [one \".\" per sample]:\n", 
 374 |              sep = "")
 375 |        for (b in 1:B) {
 376 |          z1 <- apply(rng.x1, 2, function(M, nn) runif(nn, min = M[1], 
 377 |              max = M[2]), nn = n)
 378 |          z <- tcrossprod(z1, V.sx) + m.x
 379 |          ##z <- apply(x,2,function(m) runif(length(m),min=min(m),max=max(m)))
 380 |          ##z <- apply(x,2,function(m) sample(m))
 381 |          for (k in 1:K.max) {
 382 |            logWks[b, k] <- log(W.k(z, k))
 383 |          }
 384 |          if (verbose) 
 385 |            cat(".", if (b%%50 == 0) 
 386 |                paste(b, "\n"))
 387 |        }
 388 |        if (verbose && (B%%50 != 0)) 
 389 |          cat("", B, "\n")
 390 |        E.logW <- colMeans(logWks)
 391 |        SE.sim <- sqrt((1 + 1/B) * apply(logWks, 2, var))
 392 |      }else{
 393 |        E.logW <- rep(NA,K.max)
 394 |        SE.sim <- rep(NA,K.max)
 395 |      }
 396 |     structure(class = "clusGap", list(Tab = cbind(logW, E.logW, 
 397 |         gap = E.logW - logW, SE.sim), n = n, B = B, FUNcluster = FUNcluster))
 398 | }
 399 | 
 400 | clustfun <- function(x,clustnr=20,bootnr=50,metric="pearson",do.gap=FALSE,sat=TRUE,SE.method="Tibs2001SEmax",SE.factor=.25,B.gap=50,cln=0,rseed=17000,FUNcluster="kmedoids",distances=NULL,link="single")
 401 | {
 402 |   if ( clustnr < 2) stop("Choose clustnr > 1")
 403 |   di <- if ( FUNcluster == "kmedoids" ) t(x) else dist.gen(t(x),method=metric)
 404 |   if ( nrow(di) - 1 < clustnr ) clustnr <-  nrow(di) - 1
 405 |   if ( do.gap | sat | cln > 0 ){
 406 |     gpr <- NULL
 407 |     f <- if ( cln == 0 ) TRUE else FALSE
 408 |     if ( do.gap ){
 409 |       set.seed(rseed)
 410 |       if ( FUNcluster == "kmeans" )   gpr <- clusGapExt(as.matrix(di), FUN = kmeans, K.max = clustnr, B = B.gap, iter.max=100)
 411 |       if ( FUNcluster == "kmedoids" ) gpr <- clusGapExt(as.matrix(di), FUN = function(x,k) pam(dist.gen(x,method=metric),k), K.max = clustnr, B = B.gap, method=metric)
 412 |       if ( FUNcluster == "hclust" )   gpr <- clusGapExt(as.matrix(di), FUN = function(x,k){ y <- hclusterCBI(x,k,link=link,scaling=FALSE); y$cluster <- y$partition; y }, K.max = clustnr, B = B.gap) 
 413 |       if ( f ) cln <- maxSE(gpr$Tab[,3],gpr$Tab[,4],method=SE.method,SE.factor)
 414 |     }
 415 |     if ( sat ){
 416 |       if ( ! do.gap ){
 417 |         if ( FUNcluster == "kmeans" )   gpr <- clusGapExt(as.matrix(di), FUN = kmeans, K.max = clustnr, B = B.gap, iter.max=100, random=FALSE)
 418 |         if ( FUNcluster == "kmedoids" ) gpr <- clusGapExt(as.matrix(di), FUN = function(x,k) pam(dist.gen(x,method=metric),k), K.max = clustnr, B = B.gap, random=FALSE, method=metric)
 419 |         if ( FUNcluster == "hclust" )   gpr <- clusGapExt(as.matrix(di), FUN = function(x,k){ y <- hclusterCBI(x,k,link=link,scaling=FALSE); y$cluster <- y$partition; y }, K.max = clustnr, B = B.gap, random=FALSE)
 420 |       }
 421 |       g <- gpr$Tab[,1]
 422 |       y <- g[-length(g)] - g[-1]
 423 |       mm <- numeric(length(y))
 424 |       nn <- numeric(length(y))
 425 |       for ( i in 1:length(y)){
 426 |         mm[i] <- mean(y[i:length(y)]) 
 427 |         nn[i] <- sqrt(var(y[i:length(y)]))
 428 |       }
 429 |       if ( f ) cln <- max(min(which( y - (mm + nn) < 0 )),1)
 430 |     }
 431 |     if ( cln <= 1 ) {
 432 |       clb <- list(result=list(partition=rep(1,dim(x)[2])),bootmean=1)
 433 |       names(clb$result$partition) <- names(x)
 434 |       return(list(x=x,clb=clb,gpr=gpr,di=if ( FUNcluster == "kmedoids" ) dist.gen(di,method=metric) else di))
 435 |     }
 436 |     if ( FUNcluster == "kmeans" ) clb <- clusterboot(di,B=bootnr,distances=FALSE,bootmethod="boot",clustermethod=kmeansCBI,krange=cln,scaling=FALSE,multipleboot=FALSE,bscompare=TRUE,seed=rseed)
 437 |     if ( FUNcluster == "kmedoids" ) clb <- clusterboot(dist.gen(di,method=metric),B=bootnr,bootmethod="boot",clustermethod=pamkCBI,k=cln,multipleboot=FALSE,bscompare=TRUE,seed=rseed)
 438 |     if ( FUNcluster == "hclust" ) clb <- clusterboot(di,B=bootnr,distances=FALSE,bootmethod="boot",clustermethod=hclusterCBI,k=cln,link=link,scaling=FALSE,multipleboot=FALSE,bscompare=TRUE,seed=rseed)
 439 |     return(list(x=x,clb=clb,gpr=gpr,di=if ( FUNcluster == "kmedoids" ) dist.gen(di,method=metric) else di))
 440 |   }
 441 | }
 442 | 
 443 | setGeneric("clustexp", function(object,clustnr=20,bootnr=50,metric="pearson",do.gap=FALSE,sat=TRUE,SE.method="Tibs2001SEmax",SE.factor=.25,B.gap=50,cln=0,rseed=17000,FUNcluster="kmedoids") standardGeneric("clustexp"))
 444 | 
 445 | setMethod("clustexp",
 446 |           signature = "SCseq",
 447 |           definition = function(object,clustnr,bootnr,metric,do.gap,sat,SE.method,SE.factor,B.gap,cln,rseed,FUNcluster) {
 448 |             if ( ! is.numeric(clustnr) ) stop("clustnr has to be a positive integer") else if ( round(clustnr) != clustnr | clustnr <= 0 ) stop("clustnr has to be a positive integer")
 449 |             if ( ! is.numeric(bootnr) ) stop("bootnr has to be a positive integer") else if ( round(bootnr) != bootnr | bootnr <= 0 ) stop("bootnr has to be a positive integer")
 450 |             if ( ! ( metric %in% c( "spearman","pearson","kendall","euclidean","maximum","manhattan","canberra","binary","minkowski") ) ) stop("metric has to be one of the following: spearman, pearson, kendall, euclidean, maximum, manhattan, canberra, binary, minkowski")
 451 |             if ( ! ( SE.method %in% c( "firstSEmax","Tibs2001SEmax","globalSEmax","firstmax","globalmax") ) ) stop("SE.method has to be one of the following: firstSEmax, Tibs2001SEmax, globalSEmax, firstmax, globalmax")
 452 |             if ( ! is.numeric(SE.factor) ) stop("SE.factor has to be a non-negative integer") else if  ( SE.factor < 0 )  stop("SE.factor has to be a non-negative integer")
 453 |             if ( ! ( is.numeric(do.gap) | is.logical(do.gap) ) ) stop( "do.gap has to be logical (TRUE/FALSE)" )
 454 |             if ( ! ( is.numeric(sat) | is.logical(sat) ) ) stop( "sat has to be logical (TRUE/FALSE)" )
 455 |             if ( ! is.numeric(B.gap) ) stop("B.gap has to be a positive integer") else if ( round(B.gap) != B.gap | B.gap <= 0 ) stop("B.gap has to be a positive integer")
 456 |             if ( ! is.numeric(cln) ) stop("cln has to be a non-negative integer") else if ( round(cln) != cln | cln < 0 ) stop("cln has to be a non-negative integer")          
 457 |             if ( ! is.numeric(rseed) ) stop("rseed has to be numeric")
 458 |             if ( !do.gap & !sat & cln == 0 ) stop("cln has to be a positive integer or either do.gap or sat has to be TRUE")
 459 |             if ( ! ( FUNcluster %in% c("kmeans","hclust","kmedoids") ) ) stop("FUNcluster has to be one of the following: kmeans, hclust,kmedoids") 
 460 |             object@clusterpar <- list(clustnr=clustnr,bootnr=bootnr,metric=metric,do.gap=do.gap,sat=sat,SE.method=SE.method,SE.factor=SE.factor,B.gap=B.gap,cln=cln,rseed=rseed,FUNcluster=FUNcluster)
 461 |             y <- clustfun(object@fdata,clustnr,bootnr,metric,do.gap,sat,SE.method,SE.factor,B.gap,cln,rseed,FUNcluster)
 462 |             object@cluster   <- list(kpart=y$clb$result$partition, jaccard=y$clb$bootmean, gap=y$gpr, clb=y$clb)
 463 |             object@distances <- as.matrix( y$di )
 464 |             set.seed(111111)
 465 |             object@fcol <- sample(rainbow(max(y$clb$result$partition)))
 466 |             return(object)
 467 |           }
 468 |           )
 469 | 
 470 | setGeneric("findoutliers", function(object,outminc=5,outlg=2,probthr=1e-3,thr=2**-(1:40),outdistquant=.95) standardGeneric("findoutliers"))
 471 | 
 472 | setMethod("findoutliers",
 473 |           signature = "SCseq",
 474 |           definition = function(object,outminc,outlg,probthr,thr,outdistquant) {
 475 |             if ( length(object@cluster$kpart) == 0 ) stop("run clustexp before findoutliers")
 476 |             if ( ! is.numeric(outminc) ) stop("outminc has to be a non-negative integer") else if ( round(outminc) != outminc | outminc < 0 ) stop("outminc has to be a non-negative integer")
 477 |             if ( ! is.numeric(outlg) ) stop("outlg has to be a non-negative integer") else if ( round(outlg) != outlg | outlg < 0 ) stop("outlg has to be a non-negative integer")
 478 |             if ( ! is.numeric(probthr) ) stop("probthr has to be a number between 0 and 1") else if (  probthr < 0 | probthr > 1 ) stop("probthr has to be a number between 0 and 1")
 479 |             if ( ! is.numeric(thr) ) stop("thr hast to be a vector of numbers between 0 and 1") else if ( min(thr) < 0 | max(thr) > 1 ) stop("thr hast to be a vector of numbers between 0 and 1")
 480 |             if ( ! is.numeric(outdistquant) ) stop("outdistquant has to be a number between 0 and 1") else if (  outdistquant < 0 | outdistquant > 1 ) stop("outdistquant has to be a number between 0 and 1")
 481 |             
 482 |             object@outlierpar <- list( outminc=outminc,outlg=outlg,probthr=probthr,thr=thr,outdistquant=outdistquant )
 483 |             ### calibrate background model
 484 |             m <- log2(apply(object@fdata,1,mean))
 485 |             v <- log2(apply(object@fdata,1,var))
 486 |             f <- m > -Inf & v > -Inf
 487 |             m <- m[f]
 488 |             v <- v[f]
 489 |             mm <- -8
 490 |             repeat{
 491 |               fit <- lm(v ~ m + I(m^2)) 
 492 |               if( coef(fit)[3] >= 0 | mm >= -1){
 493 |                 break
 494 |               }
 495 |               mm <- mm + .5
 496 |               f <- m > mm
 497 |               m <- m[f]
 498 |               v <- v[f]
 499 |             }
 500 |             object@background <- list()
 501 |             object@background$vfit <- fit
 502 |             object@background$lvar <- function(x,object) 2**(coef(object@background$vfit)[1] + log2(x)*coef(object@background$vfit)[2] + coef(object@background$vfit)[3] * log2(x)**2)
 503 |             object@background$lsize <- function(x,object) x**2/(max(x + 1e-6,object@background$lvar(x,object)) - x)
 504 | 
 505 |             ### identify outliers
 506 |             out   <- c()
 507 |             stest <- rep(0,length(thr))
 508 |             cprobs <- c()
 509 |             for ( n in 1:max(object@cluster$kpart) ){     
 510 |               if ( sum(object@cluster$kpart == n) == 1 ){ cprobs <- append(cprobs,.5); names(cprobs)[length(cprobs)] <- names(object@cluster$kpart)[object@cluster$kpart == n]; next }
 511 |               x <- object@fdata[,object@cluster$kpart == n]
 512 |               x <- x[apply(x,1,max) > outminc,]
 513 |               z <- t( apply(x,1,function(x){ apply( cbind( pnbinom(round(x,0),mu=mean(x),size=object@background$lsize(mean(x),object)) , 1 - pnbinom(round(x,0),mu=mean(x),size=object@background$lsize(mean(x),object)) ),1, min) } ) )
 514 |               cp <- apply(z,2,function(x){ y <- p.adjust(x,method="BH"); y <- y[order(y,decreasing=FALSE)]; return(y[outlg]);})
 515 |               f <- cp < probthr
 516 |               cprobs <- append(cprobs,cp)
 517 |               if ( sum(f) > 0 ) out <- append(out,names(x)[f])
 518 |               for ( j in 1:length(thr) )  stest[j] <-  stest[j] + sum( cp < thr[j] )  
 519 |             }
 520 |             object@out <-list(out=out,stest=stest,thr=thr,cprobs=cprobs)
 521 | 
 522 |             ### cluster outliers
 523 |             clp2p.cl <- c()
 524 |             cols <- names(object@fdata)
 525 |             cpart <- object@cluster$kpart
 526 |             di   <- as.data.frame(object@distances)
 527 |             for ( i in 1:max(cpart) ) {
 528 |               tcol <- cols[cpart == i]
 529 |               if ( sum(!(tcol %in% out)) > 1 ) clp2p.cl <- append(clp2p.cl,as.vector(t(di[tcol[!(tcol %in% out)],tcol[!(tcol %in% out)]])))
 530 |             }
 531 |             clp2p.cl <- clp2p.cl[clp2p.cl>0]
 532 |               
 533 |             cadd  <- list()
 534 |             if ( length(out) > 0 ){
 535 |               if (length(out) == 1){
 536 |                 cadd <- list(out)
 537 |               }else{
 538 |                 n <- out
 539 |                 m <- as.data.frame(di[out,out])
 540 |                 
 541 |                 for ( i in 1:length(out) ){
 542 |                   if ( length(n) > 1 ){
 543 |                     o   <- order(apply(cbind(m,1:dim(m)[1]),1,function(x)  min(x[1:(length(x)-1)][-x[length(x)]])),decreasing=FALSE)
 544 |                     m <- m[o,o]
 545 |                     n <- n[o]          
 546 |                     f <- m[,1] < quantile(clp2p.cl,outdistquant) | m[,1] == min(clp2p.cl)
 547 |                     ind <- 1  
 548 |                     if ( sum(f) > 1 ) for ( j in 2:sum(f) ) if ( apply(m[f,f][j,c(ind,j)] > quantile(clp2p.cl,outdistquant) ,1,sum) == 0 ) ind <- append(ind,j)
 549 |                     cadd[[i]] <- n[f][ind]
 550 |                     g <- ! n %in% n[f][ind]
 551 |                     n <- n[g]
 552 |                     m <- m[g,g]
 553 |                     if ( sum(g) == 0 ) break
 554 |           
 555 |                   }else if (length(n) == 1){
 556 |                     cadd[[i]] <- n
 557 |                     break
 558 |                   }
 559 |                 }
 560 |               }
 561 |     
 562 |               for ( i in 1:length(cadd) ){
 563 |                 cpart[cols %in% cadd[[i]]] <- max(cpart) + 1
 564 |               }
 565 |             }
 566 | 
 567 |             ### determine final clusters
 568 |             for ( i in 1:max(cpart) ){
 569 |               if ( sum(cpart == i) == 0 ) next
 570 |               f <- cols[cpart == i]
 571 |               d <- object@fdata
 572 |               if ( length(f) == 1 ){
 573 |                 cent <- d[,f]
 574 |               }else{
 575 |                 if ( object@clusterpar$FUNcluster == "kmedoids" ){
 576 |                   x <- apply(as.matrix(dist.gen(t(d[,f]),method=object@clusterpar$metric)),2,mean)
 577 |                   cent <- d[,f[which(x == min(x))[1]]]
 578 |                 }else{
 579 |                   cent <- apply(d[,f],1,mean)
 580 |                 }
 581 |               }
 582 |               if ( i == 1 ) dcent <- data.frame(cent) else dcent <- cbind(dcent,cent)
 583 |               if ( i == 1 ) tmp <- data.frame(apply(d,2,dist.gen.pairs,y=cent,method=object@clusterpar$metric)) else tmp <- cbind(tmp,apply(d,2,dist.gen.pairs,y=cent,method=object@clusterpar$metric))
 584 |             }
 585 |             cpart <- apply(tmp,1,function(x) order(x,decreasing=FALSE)[1])
 586 |             
 587 |             for  ( i in max(cpart):1){if (sum(cpart==i)==0) cpart[cpart>i] <- cpart[cpart>i] - 1 }
 588 | 
 589 |             object@cpart <- cpart
 590 | 
 591 |             set.seed(111111)
 592 |             object@fcol <- sample(rainbow(max(cpart)))
 593 |             return(object)
 594 |           }
 595 |         )
 596 | 
 597 | 
 598 | setGeneric("comptsne", function(object,rseed=15555,sammonmap=FALSE,initial_cmd=TRUE,...) standardGeneric("comptsne"))
 599 | 
 600 | setMethod("comptsne",
 601 |           signature = "SCseq",
 602 |           definition = function(object,rseed,sammonmap,...){
 603 |             if ( length(object@cluster$kpart) == 0 ) stop("run clustexp before comptsne")
 604 |             set.seed(rseed)
 605 |             di <- if ( object@clusterpar$FUNcluster == "kmedoids") as.dist(object@distances) else dist.gen(as.matrix(object@distances))
 606 |             if ( sammonmap ){
 607 |               object@tsne <- as.data.frame(sammon(di,k=2)$points)
 608 |             }else{
 609 |               ts <- if ( initial_cmd ) tsne(di,initial_config=cmdscale(di,k=2),...) else tsne(di,k=2,...)
 610 |               object@tsne <- as.data.frame(ts)
 611 |             }
 612 |             return(object)
 613 |           }
 614 |           )
 615 | 
 616 | setGeneric("clustdiffgenes", function(object,pvalue=.01) standardGeneric("clustdiffgenes"))
 617 | 
 618 | setMethod("clustdiffgenes",
 619 |           signature = "SCseq",
 620 |           definition = function(object,pvalue){
 621 |             if ( length(object@cpart) == 0 ) stop("run findoutliers before clustdiffgenes")
 622 |             if ( ! is.numeric(pvalue) ) stop("pvalue has to be a number between 0 and 1") else if (  pvalue < 0 | pvalue > 1 ) stop("pvalue has to be a number between 0 and 1")
 623 |             cdiff <- list()
 624 |             x     <- object@ndata
 625 |             y     <- object@expdata[,names(object@ndata)]
 626 |             part  <- object@cpart
 627 |             for ( i in 1:max(part) ){
 628 |               if ( sum(part == i) == 0 ) next
 629 |               m <-  if ( sum(part != i) > 1 ) apply(x[,part != i],1,mean) else x[,part != i]
 630 |               n <-  if ( sum(part == i) > 1 ) apply(x[,part == i],1,mean) else x[,part == i]
 631 |               no <- if ( sum(part == i) > 1 ) median(apply(y[,part == i],2,sum))/median(apply(x[,part == i],2,sum)) else sum(y[,part == i])/sum(x[,part == i])
 632 |               m <- m*no
 633 |               n <- n*no
 634 |               pv <- binompval(m/sum(m),sum(n),n)
 635 |               d <- data.frame(mean.ncl=m,mean.cl=n,fc=n/m,pv=pv)[order(pv,decreasing=FALSE),]
 636 |               cdiff[[paste("cl",i,sep=".")]] <- d[d$pv < pvalue,]
 637 |             }
 638 |             return(cdiff)
 639 |           }
 640 |           )
 641 | 
 642 | setGeneric("plotsaturation", function(object,disp=FALSE) standardGeneric("plotsaturation"))
 643 | 
 644 | setMethod("plotsaturation",
 645 |           signature = "SCseq",
 646 |           definition = function(object,disp){
 647 |             if ( length(object@cluster$gap) == 0 ) stop("run clustexp before plotsaturation")
 648 |             g <- object@cluster$gap$Tab[,1]
 649 |             y <- g[-length(g)] - g[-1]
 650 |             mm <- numeric(length(y))
 651 |             nn <- numeric(length(y))
 652 |             for ( i in 1:length(y)){
 653 |               mm[i] <- mean(y[i:length(y)]) 
 654 |               nn[i] <- sqrt(var(y[i:length(y)]))
 655 |             }
 656 |             cln <- max(min(which( y - (mm + nn) < 0 )),1)
 657 |             x <- 1:length(y)
 658 |             if (disp){
 659 |               x <- 1:length(g)
 660 |               plot(x,g,pch=20,col="grey",xlab="k",ylab="log within cluster dispersion")
 661 |               f <- x == cln
 662 |               points(x[f],g[f],col="blue")
 663 |             }else{
 664 |               plot(x,y,pch=20,col="grey",xlab="k",ylab="Change in log within cluster dispersion")
 665 |               points(x,mm,col="red",pch=20)
 666 |               plot.err.bars.y(x,mm,nn,col="red")
 667 |               points(x,y,col="grey",pch=20)
 668 |               f <- x == cln
 669 |               points(x[f],y[f],col="blue")
 670 |             }
 671 |           }
 672 |           )
 673 | 
 674 | setGeneric("plotsilhouette", function(object) standardGeneric("plotsilhouette"))
 675 | 
 676 | setMethod("plotsilhouette",
 677 |           signature = "SCseq",
 678 |           definition = function(object){
 679 |             if ( length(object@cluster$kpart) == 0 ) stop("run clustexp before plotsilhouette")
 680 |             if ( length(unique(object@cluster$kpart)) < 2 ) stop("only a single cluster: no silhouette plot")
 681 |             kpart <- object@cluster$kpart
 682 |             distances  <- if ( object@clusterpar$FUNcluster == "kmedoids" ) as.dist(object@distances) else dist.gen(object@distances)
 683 |             si <- silhouette(kpart,distances)
 684 |             plot(si)
 685 |           }
 686 |           )
 687 | 
 688 | compmedoids <- function(x,part,metric="pearson"){
 689 |   m <- c()
 690 |   for ( i in sort(unique(part)) ){
 691 |     f <- names(x)[part == i]
 692 |     if ( length(f) == 1 ){
 693 |       m <- append(m,f)
 694 |     }else{
 695 |       y <- apply(as.matrix(dist.gen(t(x[,f]),method=metric)),2,mean)
 696 |       m <- append(m,f[which(y == min(y))[1]])
 697 |     }
 698 |   }
 699 |   m
 700 | }
 701 | 
 702 | setGeneric("clustheatmap", function(object,final=FALSE,hmethod="single") standardGeneric("clustheatmap"))
 703 | 
 704 | setMethod("clustheatmap",
 705 |           signature = "SCseq",
 706 |           definition = function(object,final,hmethod){
 707 |             if ( final & length(object@cpart) == 0 ) stop("run findoutliers before clustheatmap")
 708 |             if ( !final & length(object@cluster$kpart) == 0 ) stop("run clustexp before clustheatmap")
 709 |             x <- object@fdata  
 710 |             part <- if ( final ) object@cpart else object@cluster$kpart
 711 |             na <- c()
 712 |             j <- 0
 713 |             for ( i in 1:max(part) ){
 714 |               if ( sum(part == i) == 0 ) next
 715 |               j <- j + 1
 716 |               na <- append(na,i)
 717 |               d <- x[,part == i]
 718 |               if ( sum(part == i) == 1 ) cent <- d else cent <- apply(d,1,mean)
 719 |               if ( j == 1 ) tmp <- data.frame(cent) else tmp <- cbind(tmp,cent)
 720 |             }
 721 |             names(tmp) <- paste("cl",na,sep=".")
 722 |             ld <- if ( object@clusterpar$FUNcluster == "kmedoids" ) dist.gen(t(tmp),method=object@clusterpar$metric) else dist.gen(as.matrix(dist.gen(t(tmp),method=object@clusterpar$metric)))
 723 |             if ( max(part) > 1 )  cclmo <- hclust(ld,method=hmethod)$order else cclmo <- 1
 724 |             q <- part
 725 |             for ( i in 1:max(part) ){
 726 |               q[part == na[cclmo[i]]] <- i
 727 |             }
 728 |             part <- q
 729 |             di <-  if ( object@clusterpar$FUNcluster == "kmedoids" ) object@distances else as.data.frame( as.matrix( dist.gen(t(object@distances)) ) )
 730 |             pto <- part[order(part,decreasing=FALSE)]
 731 |             ptn <- c()
 732 |             for ( i in 1:max(pto) ){ pt <- names(pto)[pto == i]; z <- if ( length(pt) == 1 ) pt else pt[hclust(as.dist(t(di[pt,pt])),method=hmethod)$order]; ptn <- append(ptn,z) }
 733 |             col <- object@fcol
 734 |             mi  <- min(di,na.rm=TRUE)
 735 |             ma  <- max(di,na.rm=TRUE)
 736 |             layout(matrix(data=c(1,3,2,4), nrow=2, ncol=2), widths=c(5,1,5,1), heights=c(5,1,1,1))
 737 |             ColorRamp   <- colorRampPalette(brewer.pal(n = 7,name = "RdYlBu"))(100)
 738 |             ColorLevels <- seq(mi, ma, length=length(ColorRamp))
 739 |             if ( mi == ma ){
 740 |               ColorLevels <- seq(0.99*mi, 1.01*ma, length=length(ColorRamp))
 741 |             }
 742 |             par(mar = c(3,5,2.5,2))
 743 |             image(as.matrix(di[ptn,ptn]),col=ColorRamp,axes=FALSE)
 744 |             abline(0,1)
 745 |             box()
 746 |             
 747 |             tmp <- c()
 748 |             for ( u in 1:max(part) ){
 749 |               ol <- (0:(length(part) - 1)/(length(part) - 1))[ptn %in% names(x)[part == u]]
 750 |               points(rep(0,length(ol)),ol,col=col[cclmo[u]],pch=15,cex=.75)
 751 |               points(ol,rep(0,length(ol)),col=col[cclmo[u]],pch=15,cex=.75)
 752 |               tmp <- append(tmp,mean(ol))
 753 |             }
 754 |             axis(1,at=tmp,lab=cclmo)
 755 |             axis(2,at=tmp,lab=cclmo)
 756 |             par(mar = c(3,2.5,2.5,2))
 757 |             image(1, ColorLevels,
 758 |                   matrix(data=ColorLevels, ncol=length(ColorLevels),nrow=1),
 759 |                   col=ColorRamp,
 760 |                   xlab="",ylab="",
 761 |                   xaxt="n")
 762 |             layout(1)
 763 |             return(cclmo)
 764 |           }
 765 |           )
 766 | 
 767 | 
 768 | 
 769 | 
 770 | 
 771 | 
 772 | ## class definition
 773 | Ltree <- setClass("Ltree", slots = c(sc = "SCseq", ldata = "list", entropy = "vector", trproj = "list", par = "list", prback = "data.frame", prbacka = "data.frame", ltcoord = "matrix", prtree = "list", sigcell = "vector", cdata = "list"  ))
 774 | 
 775 | setValidity("Ltree",
 776 |             function(object) {
 777 |               msg <- NULL
 778 |               if ( class(object@sc)[1] != "SCseq" ){
 779 |                 msg <- c(msg, "input data must be of class SCseq")
 780 |               }
 781 |               if (is.null(msg)) TRUE
 782 |               else msg
 783 |             }
 784 |             )
 785 | 
 786 | setMethod("initialize",
 787 |           signature = "Ltree",
 788 |           definition = function(.Object, sc ){
 789 |             .Object@sc <- sc
 790 |             validObject(.Object)
 791 |             return(.Object)
 792 |           }
 793 |           )
 794 | 
 795 | setGeneric("compentropy", function(object) standardGeneric("compentropy"))
 796 | 
 797 | setMethod("compentropy",
 798 |           signature = "Ltree",
 799 |           definition = function(object){
 800 |             probs   <- t(t(object@sc@ndata)/apply(object@sc@ndata,2,sum))
 801 |             object@entropy <- -apply(probs*log(probs)/log(nrow(object@sc@ndata)),2,sum)
 802 |             return(object)
 803 |           }            
 804 |           )
 805 | 
 806 | 
 807 | compproj <- function(pdiloc,lploc,cnloc,mloc,d=NULL){
 808 |   pd    <- data.frame(pdiloc)
 809 |   k     <- paste("X",sort(rep(1:nrow(pdiloc),length(mloc))),sep="")
 810 |   pd$k  <- paste("X",1:nrow(pdiloc),sep="")
 811 |   pd    <- merge(data.frame(k=k),pd,by="k")
 812 |  
 813 |   if ( is.null(d) ){
 814 |     cnv   <- t(matrix(rep(t(cnloc),nrow(pdiloc)),nrow=ncol(pdiloc)))
 815 |     pdcl  <- paste("X",lploc[as.numeric(sub("X","",pd$k))],sep="")
 816 |     rownames(cnloc) <- paste("X",mloc,sep="")
 817 |     pdcn  <- cnloc[pdcl,]
 818 |     v     <- cnv - pdcn
 819 |   }else{
 820 |     v    <- d$v
 821 |     pdcn <- d$pdcn
 822 |   }
 823 |   w <- pd[,names(pd) != "k"] - pdcn
 824 |   
 825 |   h <- apply(cbind(v,w),1,function(x){
 826 |     x1 <- x[1:(length(x)/2)];
 827 |     x2 <- x[(length(x)/2 + 1):length(x)];
 828 |     x1s <- sqrt(sum(x1**2)); x2s <- sqrt(sum(x2**2)); y <- sum(x1*x2)/x1s/x2s; return( if (x1s == 0 | x2s == 0 ) NA else y ) }) 
 829 |   
 830 |   rma <- as.data.frame(matrix(h,ncol=nrow(pdiloc)))
 831 |   names(rma) <- unique(pd$k)
 832 |   pdclu  <- lploc[as.numeric(sub("X","",names(rma)))]
 833 |   pdclp  <- apply(t(rma),1,function(x) if (sum(!is.na(x)) == 0 ) NA else mloc[which(abs(x) == max(abs(x),na.rm=TRUE))[1]])
 834 |   pdclh  <- apply(t(rma),1,function(x) if (sum(!is.na(x)) == 0 ) NA else x[which(abs(x) == max(abs(x),na.rm=TRUE))[1]])
 835 |   pdcln  <-  names(lploc)[as.numeric(sub("X","",names(rma)))]
 836 |   names(rma) <- pdcln
 837 |   rownames(rma) <- paste("X",mloc,sep="")
 838 |   res    <- data.frame(o=pdclu,l=pdclp,h=pdclh)
 839 |   rownames(res) <- pdcln
 840 |   return(list(res=res[names(lploc),],rma=as.data.frame(t(rma[,names(lploc)])),d=list(v=v,pdcn=pdcn)))
 841 | }
 842 | 
 843 | pdishuffle <- function(pdi,lp,cn,m,all=FALSE){
 844 |   if ( all ){
 845 |     d <- as.data.frame(pdi)
 846 |     y <- t(apply(pdi,1,function(x) runif(length(x),min=min(x),max=max(x))))
 847 |     names(y)    <- names(d)
 848 |     rownames(y) <- rownames(d)
 849 |     return(y)
 850 |   }else{
 851 |     fl <- TRUE
 852 |     for ( i in unique(lp)){
 853 |       if ( sum(lp == i) > 1 ){
 854 |         y <-data.frame( t(apply(as.data.frame(pdi[,lp == i]),1,sample)) )
 855 |       }else{
 856 |         y <- as.data.frame(pdi[,lp == i])
 857 |       }
 858 |       names(y) <- names(lp)[lp == i]
 859 |       rownames(y) <- names(lp)
 860 |       z <- if (fl) y else cbind(z,y)
 861 |       fl <- FALSE
 862 |     }
 863 |     z <- t(z[,names(lp)])
 864 |     return(z)
 865 |   }
 866 | }
 867 | 
 868 | setGeneric("projcells", function(object,cthr=0,nmode=FALSE) standardGeneric("projcells"))
 869 | 
 870 | setMethod("projcells",
 871 |           signature = "Ltree",
 872 |           definition = function(object,cthr,nmode){
 873 |             if ( ! is.numeric(cthr) ) stop( "cthr has to be a non-negative number" ) else if ( cthr < 0 ) stop( "cthr has to be a non-negative number" )
 874 |             if ( ! length(object@sc@cpart == 0) ) stop( "please run findoutliers on the SCseq input object before initializing Ltree" )
 875 |             if ( !is.numeric(nmode) & !is.logical(nmode) ) stop("argument nmode has to be logical (TRUE/FALSE)")
 876 |          
 877 |             object@par$cthr  <- cthr
 878 |             object@par$nmode <- nmode
 879 |             
 880 |             lp <- object@sc@cpart
 881 |             ld <- object@sc@distances
 882 |             n  <- aggregate(rep(1,length(lp)),list(lp),sum)
 883 |             n  <- as.vector(n[order(n[,1],decreasing=FALSE),-1])
 884 |             m  <- (1:length(n))[n>cthr]
 885 |             f  <- lp %in% m
 886 |             lp <- lp[f]
 887 |             ld <- ld[f,f]
 888 | 
 889 |             pdil <- object@sc@tsne[f,]
 890 |             cnl  <- aggregate(pdil,by=list(lp),median)
 891 |             cnl  <- cnl[order(cnl[,1],decreasing=FALSE),-1]
 892 | 
 893 |             pdi <- suppressWarnings( cmdscale(as.matrix(ld),k=ncol(ld)-1) )
 894 |             cn <- as.data.frame(pdi[compmedoids(object@sc@fdata[,names(lp)],lp),])
 895 |             rownames(cn) <- 1:nrow(cn)
 896 | 
 897 |             x <- compproj(pdi,lp,cn,m)
 898 |             res <- x$res
 899 |            
 900 |             if ( nmode ){
 901 |               rma <- x$rma
 902 |               z <- paste("X",t(as.vector(apply(cbind(lp,ld),1,function(x){ f <- lp != x[1]; lp[f][which(x[-1][f] == min(x[-1][f]))[1]] }))),sep="")
 903 |               k <- apply(cbind(z,rma),1,function(x) (x[-1])[names(rma) == x[1]])
 904 |               rn <- res
 905 |               rn$l <- as.numeric(sub("X","",z))
 906 |               rn$h <- as.numeric(k)
 907 |               res <- rn
 908 |               x$res <- res
 909 |             }
 910 | 
 911 |             object@ldata  <- list(lp=lp,ld=ld,m=m,pdi=pdi,pdil=pdil,cn=cn,cnl=cnl)
 912 |             object@trproj <- x
 913 |             return(object)
 914 |           }
 915 |           )
 916 | 
 917 | 
 918 | 
 919 | 
 920 | 
 921 | setGeneric("projback", function(object,pdishuf=2000,nmode=FALSE,rseed=17000) standardGeneric("projback"))
 922 | 
 923 | setMethod("projback",
 924 |           signature = "Ltree",
 925 |           definition = function(object,pdishuf,nmode,rseed){
 926 |             if ( !is.numeric(nmode) & !is.logical(nmode) ) stop("argument nmode has to be logical (TRUE/FALSE)")
 927 |             if ( ! is.numeric(pdishuf) ) stop( "pdishuf has to be a non-negative integer number" ) else if ( round(pdishuf) != pdishuf | pdishuf < 0 ) stop( "pdishuf has to be a non-negative integer number" )
 928 |             if ( length(object@trproj) == 0 ) stop("run projcells before projback")
 929 |             object@par$pdishuf  <- pdishuf
 930 |             object@par$rseed    <- rseed
 931 |             
 932 |             if ( ! nmode ){
 933 |               set.seed(rseed)
 934 |               for ( i in 1:pdishuf ){
 935 |                 cat("pdishuffle:",i,"\n")
 936 |                 x <- compproj(pdishuffle(object@ldata$pdi,object@ldata$lp,object@ldata$cn,object@ldata$m,all=TRUE),object@ldata$lp,object@ldata$cn,object@ldata$m,d=object@trproj$d)$res
 937 |                 y <- if ( i == 1 ) t(x) else cbind(y,t(x))
 938 |               }    
 939 |               ##important
 940 |               object@prback <- as.data.frame(t(y))
 941 |               
 942 |               x <- object@prback
 943 |               x$n <- as.vector(t(matrix(rep(1:(nrow(x)/nrow(object@ldata$pdi)),nrow(object@ldata$pdi)),ncol=nrow(object@ldata$pdi))))
 944 |               object@prbacka <- aggregate(data.frame(count=rep(1,nrow(x))),by=list(n=x$n,o=x$o,l=x$l),sum)
 945 |             }
 946 |             return( object )
 947 |           }
 948 |           )
 949 | 
 950 | 
 951 | 
 952 | 
 953 | 
 954 | setGeneric("lineagetree", function(object,pthr=0.01,nmode=FALSE) standardGeneric("lineagetree"))
 955 | 
 956 | setMethod("lineagetree",
 957 |           signature = "Ltree",
 958 |           definition = function(object,pthr,nmode){
 959 |             if ( !is.numeric(nmode) & !is.logical(nmode) ) stop("argument nmode has to be logical (TRUE/FALSE)")
 960 |             if ( length(object@trproj) == 0 ) stop("run projcells before lineagetree")
 961 |             if ( max(dim(object@prback)) == 0 & ! nmode ) stop("run projback before lineagetree")
 962 |             if ( ! is.numeric(pthr) ) stop( "pthr has to be a non-negative number" ) else if ( pthr < 0 ) stop( "pthr has to be a non-negative number" )
 963 |             object@par$pthr <- pthr
 964 |             cnl    <- object@ldata$cnl
 965 |             pdil   <- object@ldata$pdil
 966 |             cn    <- object@ldata$cn
 967 |             pdi   <- object@ldata$pdi
 968 |             m      <- object@ldata$m
 969 |             lp     <- object@ldata$lp
 970 |             res    <- object@trproj$res
 971 |             rma    <- object@trproj$rma
 972 |             prback <- object@prback
 973 |             
 974 |             cm <- as.matrix(dist(cnl))*0
 975 |             linl <- list()
 976 |             linn <- list()
 977 |             for ( i in 1:length(m) ){
 978 |               for ( j in i:length(m) ){
 979 |                 linl[[paste(m[i],m[j],sep=".")]] <- c()
 980 |                 linn[[paste(m[i],m[j],sep=".")]] <- c()
 981 |               }
 982 |             }
 983 |             sigcell <- c()
 984 |             for ( i in 1:nrow(res) ){
 985 |               a <- which( m == res$o[i])
 986 |               if ( sum( lp == m[a] ) == 1 ){
 987 |                 k <- t(cnl)[,a]
 988 |                 k <- NA
 989 |                 sigcell <- append(sigcell, FALSE)
 990 |               }else{
 991 |                 b <- which(m == res$l[i] )
 992 |                 h <- res$h[i]
 993 |                 if ( nmode ){
 994 |                   sigcell <- append(sigcell, FALSE)
 995 |                 }else{
 996 |                   f <- prback$o == m[a] & prback$l == m[b]
 997 |                   if ( sum(f) > 0 ){
 998 |                     ql <- quantile(prback[f,"h"],pthr,na.rm=TRUE)
 999 |                     qh <- quantile(prback[f,"h"],1 - pthr,na.rm=TRUE)
1000 |                   }else{
1001 |                     ql <- 0
1002 |                     qh <- 0
1003 |                   }
1004 |                   sigcell <- if (is.na(h) ) append(sigcell, NA) else if ( h > qh |  h < min(0,ql) ) append(sigcell, TRUE) else append(sigcell, FALSE)
1005 |                 }
1006 |                 if ( !is.na(res$h[i]) ){
1007 |                   w <- t(pdil)[,i] - t(cnl)[,a]
1008 |                   v <- t(cnl)[,b] - t(cnl)[,a]
1009 |                   
1010 |                   wo <- t(pdi)[,i] - t(cn)[,a]
1011 |                   vo <-  t(cn)[,b] - t(cn)[,a]
1012 |                   df <-( h*sqrt(sum(wo*wo)) )/sqrt(sum(vo*vo))*v
1013 |                   k <- df + t(cnl)[,a]
1014 |                   cm[a,b] <- cm[a,b] + 1
1015 |                   so <- m[sort(c(a,b))]
1016 |                   dfl <-  sign(h)*sqrt( sum( df*df ) )/sqrt(sum(v*v))
1017 |                   if ( a > b ) dfl <-  1 - dfl
1018 |                   linn[[paste(so[1],so[2],sep=".")]] <- append( linn[[paste(so[1],so[2],sep=".")]], rownames(pdi)[i] )
1019 |                   linl[[paste(so[1],so[2],sep=".")]] <- append( linl[[paste(so[1],so[2],sep=".")]], dfl ) 
1020 |                 }else{
1021 |                   k <- t(cnl)[,a]
1022 |                   for ( j in unique(lp[lp != m[a]]) ){
1023 |                     b <- which(j == m)
1024 |                     so <- m[sort(c(a,b))]
1025 |                     dfl <- 0
1026 |                     if ( a > b ) dfl <-  1 - dfl
1027 |                     linn[[paste(so[1],so[2],sep=".")]] <- append( linn[[paste(so[1],so[2],sep=".")]], rownames(pdi)[i] )
1028 |                     linl[[paste(so[1],so[2],sep=".")]] <- append( linl[[paste(so[1],so[2],sep=".")]], dfl ) 
1029 |                   }
1030 |                 }
1031 |               }
1032 |               lt <- if ( i == 1 ) data.frame(k) else cbind(lt,k)
1033 |             }
1034 |             lt <- t(lt)
1035 |             cm <- as.data.frame(cm)
1036 |             names(cm) <- paste("cl",m,sep=".")
1037 |             rownames(cm) <- paste("cl",m,sep=".")
1038 |             lt <- as.data.frame(lt)
1039 |             rownames(lt) <- rownames(res)
1040 |             object@ltcoord <- as.matrix(lt)
1041 |             object@prtree  <- list(n=linn,l=linl)
1042 |             object@cdata$counts <- cm
1043 |             names(sigcell) <- rownames(res)
1044 |             object@sigcell <- sigcell
1045 | 
1046 |             return( object )
1047 |           }
1048 |           )
1049 | 
1050 | 
1051 | 
1052 | 
1053 | 
1054 | setGeneric("comppvalue", function(object,pethr=0.01,nmode=FALSE) standardGeneric("comppvalue"))
1055 | 
1056 | setMethod("comppvalue",
1057 |           signature = "Ltree",
1058 |           definition = function(object,pethr,nmode){
1059 |             if ( !is.numeric(nmode) & !is.logical(nmode) ) stop("argument nmode has to be logical (TRUE/FALSE)")
1060 |             if ( length(object@prtree) == 0 ) stop("run lineagetree before comppvalue")
1061 |             if ( ! is.numeric(pethr) ) stop( "pethr has to be a non-negative number" ) else if ( pethr < 0 ) stop( "pethr has to be a non-negative number" )
1062 |             object@par$pethr <- pethr
1063 |             cm <- object@cdata$counts
1064 |             cmpv   <- cm*NA
1065 |             cmpvd  <- cm*NA
1066 |             cmbr   <- cm*NA
1067 |             cmpvn  <- cm*NA
1068 |             cmpvnd <- cm*NA
1069 |             cmfr   <- cm/apply(cm,1,sum)
1070 |             if ( nmode ){
1071 |               N <- apply(cm,1,sum) + 1
1072 |               N0 <- sum(N) - N
1073 |               n0 <- t(matrix(rep(N,length(N)),ncol=length(N)))
1074 |               p <- n0/N0
1075 |               n <- cm
1076 |               k <- cbind(N,p,n)
1077 |               cmpv   <- apply(k,1,function(x){N <- x[1]; p <- x[2:( ncol(cm) + 1 )];  n <- x[( ncol(cm) + 2 ):( 2*ncol(cm) + 1)]; apply(cbind(n,p),1,function(x,N) binom.test(x[1],N,min(1,x[2]),alternative="g")$p.value,N=N)})
1078 |               cmpvd   <- apply(k,1,function(x){N <- x[1]; p <- x[2:( ncol(cm) + 1 )];  n <- x[( ncol(cm) + 2 ):( 2*ncol(cm) + 1)]; apply(cbind(n,p),1,function(x,N) binom.test(x[1],N,min(1,x[2]),alternative="l")$p.value,N=N)})
1079 |               cmpvn  <- cmpv
1080 |               cmpvnd <- cmpvd
1081 |               cmbr   <- as.data.frame(n0/N0*N)
1082 |               names(cmbr)    <- names(cm)
1083 |               rownames(cmbr) <- rownames(cm)
1084 |             }else{
1085 |               for ( i in 1:nrow(cm) ){
1086 |                 for ( j in 1:ncol(cm) ){
1087 |                   f <- object@prbacka$o == object@ldata$m[i] & object@prbacka$l == object@ldata$m[j]
1088 |                   x <- object@prbacka$count[f]
1089 |                   if ( sum(f) < object@par$pdishuf ) x <- append(x,rep(0, object@par$pdishuf - sum(f)))
1090 |                   cmbr[i,j]   <- if ( sum(f) > 0 ) mean(x) else 0
1091 |                   cmpv[i,j]   <- if ( quantile(x,1 - pethr) < cm[i,j] ) 0 else 0.5
1092 |                   cmpvd[i,j]  <- if ( quantile(x,pethr) > cm[i,j] ) 0 else 0.5
1093 |                   cmpvn[i,j]  <- sum( x >= cm[i,j])/length(x)
1094 |                   cmpvnd[i,j] <- sum( x <= cm[i,j])/length(x)
1095 |                 }
1096 |               }
1097 |             }
1098 | 
1099 |             diag(cmpv)   <- .5
1100 |             diag(cmpvd)  <- .5
1101 |             diag(cmpvn)  <- NA
1102 |             diag(cmpvnd) <- NA
1103 | 
1104 |             object@cdata$counts.br <- cmbr
1105 |             object@cdata$pv.e <- cmpv
1106 |             object@cdata$pv.d <- cmpvd
1107 |             object@cdata$pvn.e <- cmpvn
1108 |             object@cdata$pvn.d <- cmpvnd
1109 | 
1110 |             m    <- object@ldata$m
1111 |             linl <- object@prtree$l
1112 |             ls   <- as.data.frame(matrix(rep(NA,length(m)**2),ncol=length(m)))
1113 |             names(ls) <- rownames(ls) <- paste("cl",m,sep=".")
1114 |             for ( i in 1:( length(m) - 1 )){
1115 |               for ( j in (i + 1):length(m) ){
1116 |                 na <- paste(m[i],m[j],sep=".")
1117 |                 if ( na %in% names(linl) &  min(cmpv[i,j],cmpv[j,i],na.rm=TRUE) < pethr ){
1118 |                   y <- sort(linl[[na]])
1119 |                   nn <- ( 1 - max(y[-1] - y[-length(y)]) )
1120 |                 }else{
1121 |                   nn <- 0
1122 |                 }
1123 |                 ls[i,j] <- nn
1124 |               }
1125 |             }
1126 |             object@cdata$linkscore <- ls
1127 | 
1128 |             return(object)
1129 |           }
1130 |           )
1131 | 
1132 | setGeneric("plotlinkpv", function(object) standardGeneric("plotlinkpv"))
1133 | 
1134 | setMethod("plotlinkpv",
1135 |           signature = "Ltree",
1136 |           definition = function(object){
1137 |             if ( length(object@cdata) <= 0 ) stop("run comppvalue before plotlinkpv")
1138 |             pheatmap(-log2(object@cdata$pvn.e + 1/object@par$pdishuf/2))
1139 |           }
1140 |           )
1141 | 
1142 | setGeneric("plotlinkscore", function(object) standardGeneric("plotlinkscore"))
1143 | 
1144 | setMethod("plotlinkscore",
1145 |           signature = "Ltree",
1146 |           definition = function(object){
1147 |             if ( length(object@cdata) <= 0 ) stop("run comppvalue before plotlinkscore")
1148 |             pheatmap(object@cdata$linkscore,cluster_rows=FALSE,cluster_cols=FALSE)
1149 |           }
1150 |           )
1151 | 
1152 | setGeneric("plotmapprojections", function(object) standardGeneric("plotmapprojections"))
1153 | 
1154 | setMethod("plotmapprojections",
1155 |           signature = "Ltree",
1156 |           definition = function(object){
1157 |             if ( length(object@cdata) <= 0 ) stop("run comppvalue before plotmapprojections")
1158 |          
1159 |             cent <- object@sc@fdata[,compmedoids(object@sc@fdata,object@sc@cpart)]
1160 |             dc <- as.data.frame(1 - cor(cent))
1161 |             names(dc) <- sort(unique(object@sc@cpart))
1162 |             rownames(dc) <- sort(unique(object@sc@cpart))
1163 |             trl <- spantree(dc[object@ldata$m,object@ldata$m])
1164 | 
1165 |             u <- object@ltcoord[,1]
1166 |             v <- object@ltcoord[,2]
1167 |             cnl <- object@ldata$cnl
1168 |             plot(u,v,cex=1.5,col="grey",pch=20,xlab="Dim 1",ylab="Dim 2")
1169 |             for ( i in unique(object@ldata$lp) ){ f <- object@ldata$lp == i; text(u[f],v[f],i,cex=.75,font=4,col=object@sc@fcol[i]) }
1170 |             points(cnl[,1],cnl[,2])
1171 |             text(cnl[,1],cnl[,2],object@ldata$m,cex=2)
1172 |             for ( i in 1:length(trl$kid) ){
1173 |               lines(c(cnl[i+1,1],cnl[trl$kid[i],1]),c(cnl[i+1,2],cnl[trl$kid[i],2]),col="black")
1174 |             }
1175 |           }
1176 |           )
1177 | 
1178 | 
1179 | 
1180 | setGeneric("plotmap", function(object) standardGeneric("plotmap"))
1181 | 
1182 | setMethod("plotmap",
1183 |           signature = "Ltree",
1184 |           definition = function(object){
1185 |             if ( length(object@cdata) <= 0 ) stop("run comppvalue before plotmap")
1186 |          
1187 |             cent <- object@sc@fdata[,compmedoids(object@sc@fdata,object@sc@cpart)]
1188 |             dc <- as.data.frame(1 - cor(cent))
1189 |             names(dc) <- sort(unique(object@sc@cpart))
1190 |             rownames(dc) <- sort(unique(object@sc@cpart))
1191 |             trl <- spantree(dc[object@ldata$m,object@ldata$m])
1192 | 
1193 |   
1194 |             u <- object@ldata$pdil[,1]
1195 |             v <- object@ldata$pdil[,2]
1196 |             cnl <- object@ldata$cnl
1197 |             plot(u,v,cex=1.5,col="grey",pch=20,xlab="Dim 1",ylab="Dim 2")
1198 |             for ( i in unique(object@ldata$lp) ){ f <- object@ldata$lp == i; text(u[f],v[f],i,cex=.75,font=4,col=object@sc@fcol[i]) }
1199 |             points(cnl[,1],cnl[,2])
1200 |             text(cnl[,1],cnl[,2],object@ldata$m,cex=2)
1201 |             for ( i in 1:length(trl$kid) ){
1202 |               lines(c(cnl[i+1,1],cnl[trl$kid[i],1]),c(cnl[i+1,2],cnl[trl$kid[i],2]),col="black")
1203 |             }
1204 |           }
1205 |           )
1206 | 
1207 | 
1208 | setGeneric("plottree", function(object,showCells=TRUE,nmode=FALSE,scthr=0) standardGeneric("plottree"))
1209 | 
1210 | setMethod("plottree",
1211 |           signature = "Ltree",
1212 |           definition = function(object,showCells,nmode,scthr){
1213 |             if ( length(object@cdata) <= 0 ) stop("run comppvalue before plotmap")
1214 |             if ( !is.numeric(nmode) & !is.logical(nmode) ) stop("argument nmode has to be logical (TRUE/FALSE)")
1215 |             if ( !is.numeric(showCells) & !is.logical(showCells) ) stop("argument showCells has to be logical (TRUE/FALSE)")
1216 |             if ( ! is.numeric(scthr) ) stop( "scthr has to be a non-negative number" ) else if ( scthr < 0 | scthr > 1 ) stop( "scthr has to be a number between 0 and 1" )
1217 |            
1218 |          
1219 |             ramp <- colorRamp(c("darkgreen", "yellow", "brown"))
1220 |             mcol <- rgb( ramp(seq(0, 1, length = 101)), max = 255)
1221 |             co <- object@cdata
1222 |             fc <- (co$counts/( co$counts.br + .5))*(co$pv.e < object@par$pethr)
1223 |             fc <- fc*(fc > t(fc)) + t(fc)*(t(fc) >= fc)
1224 |             fc <- log2(fc + (fc == 0))
1225 | 
1226 |             k <- -log10(sort(unique(as.vector(t(co$pvn.e))[as.vector(t(co$pv.e))<object@par$pethr])) + 1/object@par$pdishuf)
1227 |             if (length(k) == 1) k <- c(k - k/100,k)
1228 |             mlpv <- -log10(co$pvn.e + 1/object@par$pdishuf)
1229 |             diag(mlpv) <- min(mlpv,na.rm=TRUE)
1230 |             dcc <- t(apply(round(100*(mlpv - min(k))/(max(k) - min(k)),0) + 1,1,function(x){y <- c(); for ( n in x ) y <- append(y,if ( n < 1 ) NA else mcol[n]); y }))
1231 | 
1232 | 
1233 |             cx <- c()
1234 |             cy <- c()
1235 |             va <- c()
1236 |             m <- object@ldata$m
1237 |             for ( i in 1:( length(m) - 1 ) ){
1238 |               for ( j in ( i + 1 ):length(m) ){
1239 |                 if ( min(co$pv.e[i,j],co$pv.e[j,i],na.rm=TRUE) < object@par$pethr ){
1240 |                   if ( mlpv[i,j] > mlpv[j,i] ){
1241 |                     va <- append(va,dcc[i,j])
1242 |                   }else{
1243 |                     va <- append(va,dcc[j,i])
1244 |                   }
1245 |                   cx <- append(cx,i)
1246 |                   cy <- append(cy,j)
1247 |                 }
1248 |               }
1249 |             }
1250 | 
1251 | 
1252 | 
1253 |             cnl <- object@ldata$cnl
1254 |             u <- object@ltcoord[,1]
1255 |             v <- object@ltcoord[,2]
1256 |             layout( cbind(c(1, 1), c(2, 3)),widths=c(5,1,1),height=c(5,5,1))
1257 |             par(mar = c(12,5,1,1))
1258 | 
1259 |             if ( showCells ){
1260 |               plot(u,v,cex=1.5,col="grey",pch=20,xlab="Dim 1",ylab="Dim 2")
1261 |               if ( !nmode ) points(u[object@sigcell],v[object@sigcell],col="black")
1262 |             }else{
1263 |               plot(u,v,cex=0,xlab="Dim 1",ylab="Dim 2")
1264 |             }
1265 |     
1266 |             if ( length(va) > 0 ){
1267 |               f <- order(va,decreasing=TRUE)
1268 |               for ( i in 1:length(va) ){
1269 |                 if ( object@cdata$linkscore[cx[i],cy[i]] > scthr ){
1270 |                   if ( showCells ){
1271 |                     lines(cnl[c(cx[i],cy[i]),1],cnl[c(cx[i],cy[i]),2],col=va[i],lwd=2)
1272 |                   }else{
1273 |                     ##nn <- min(10,fc[cx[i],cy[i]])
1274 |                     lines(cnl[c(cx[i],cy[i]),1],cnl[c(cx[i],cy[i]),2],col=va[i],lwd=5*object@cdata$linkscore[cx[i],cy[i]])
1275 |                   }
1276 |                 }
1277 |               }
1278 |             }
1279 | 
1280 | 
1281 | 
1282 |             en <- aggregate(object@entropy,list(object@sc@cpart),median)
1283 |             en <- en[en$Group.1 %in% m,]
1284 |     
1285 |             mi <- min(en[,2],na.rm=TRUE)
1286 |             ma <- max(en[,2],na.rm=TRUE)
1287 |             w <- round((en[,2] - mi)/(ma - mi)*99 + 1,0)
1288 |             ramp <- colorRamp(c("red","orange", "pink","purple", "blue"))
1289 |             ColorRamp <- rgb( ramp(seq(0, 1, length = 101)), max = 255)
1290 |             ColorLevels <- seq(mi, ma, length=length(ColorRamp))
1291 |             if ( mi == ma ){
1292 |               ColorLevels <- seq(0.99*mi, 1.01*ma, length=length(ColorRamp))
1293 |             }
1294 |             for ( i in m ){
1295 |               f <- en[,1] == m
1296 |               points(cnl[f,1],cnl[f,2],cex=5,col=ColorRamp[w[f]],pch=20)
1297 |             }
1298 |             text(cnl[,1],cnl[,2],m,cex=1.25,font=4,col="white")
1299 |             par(mar = c(5, 4, 1, 2))
1300 |             image(1, ColorLevels,
1301 |                   matrix(data=ColorLevels, ncol=length(ColorLevels),nrow=1),
1302 |                   col=ColorRamp,
1303 |                   xlab="",ylab="",
1304 |                   xaxt="n")
1305 |             coll <- seq(min(k), max(k), length=length(mcol))
1306 |             image(1, coll,
1307 |                   matrix(data=coll, ncol=length(mcol),nrow=1),
1308 |                   col=mcol,
1309 |                   xlab="",ylab="",
1310 |                   xaxt="n")
1311 |             layout(1)
1312 |           }
1313 |           )
1314 | 
1315 | 
1316 | setGeneric("plotdistanceratio", function(object) standardGeneric("plotdistanceratio"))
1317 | 
1318 | setMethod("plotdistanceratio",
1319 |           signature = "Ltree",
1320 |           definition = function(object){
1321 |             if ( length(object@ldata) <= 0 ) stop("run projcells before plotdistanceratio")
1322 |             l <- as.matrix(dist(object@ldata$pdi))
1323 |             z <- (l/object@ldata$ld)
1324 |             hist(log2(z),breaks=100,xlab=" log2 emb. distance/distance",main="")
1325 |           }
1326 |           )
1327 | 
1328 | 
1329 | setGeneric("getproj", function(object,i) standardGeneric("getproj"))
1330 | 
1331 | setMethod("getproj",
1332 |           signature = "Ltree",
1333 |           definition = function(object,i){
1334 |             if ( length(object@ldata) <= 0 ) stop("run projcells before plotdistanceratio")
1335 |             if ( ! i %in% object@ldata$m )  stop(paste("argument i has to be one of",paste(object@ldata$m,collapse=",")))
1336 |             x <- object@trproj$rma[names(object@ldata$lp)[object@ldata$lp == i],]
1337 |             x <- x[,names(x) != paste("X",i,sep="")]
1338 |             f <- !is.na(x[,1])
1339 |             x <- x[f,]
1340 |             if ( nrow(x) > 1 ){
1341 |               y <- x
1342 |               y <- as.data.frame(t(apply(y,1,function(x) (x - mean(x))/sqrt(var(x)))))
1343 |             }
1344 |             names(x) = sub("X","cl.",names(x))
1345 |             names(y) = sub("X","cl.",names(y))
1346 |             return(list(pr=x,prz=y))
1347 |           }
1348 |           )
1349 | 
1350 | 
1351 | setGeneric("projenrichment", function(object) standardGeneric("projenrichment"))
1352 | 
1353 | setMethod("projenrichment",
1354 |           signature = "Ltree",
1355 |           definition = function(object){
1356 |             if ( length(object@cdata) <= 0 ) stop("run comppvalue before plotmap")
1357 |             
1358 |             ze <- ( object@cdata$pv.e < object@par$pethr | object@cdata$pv.d < object@par$pethr) * (object@cdata$counts + .1)/( object@cdata$counts.br + .1 )
1359 |             pheatmap(log2(ze + ( ze == 0 ) ),cluster_rows=FALSE,cluster_cols=FALSE)
1360 |           }
1361 |           )
1362 | 
1363 | 
1364 | 
1365 | 
1366 | 
1367 | setGeneric("compscore", function(object,nn=1) standardGeneric("compscore"))
1368 | 
1369 | setMethod("compscore",
1370 |           signature = "Ltree",
1371 |           definition = function(object,nn){
1372 |             if ( length(object@cdata) <= 0 ) stop("run comppvalue before compscore")
1373 |             if ( ! is.numeric(nn) ) stop( "nn has to be a non-negative integer number" ) else if ( round(nn) != nn | nn < 0 ) stop( "nn has to be a non-negative integer number" )
1374 |             x <- object@cdata$counts*(object@cdata$pv.e < object@par$pethr)>0
1375 |             y <- x | t(x)
1376 |             
1377 |             if ( max(y) > 0 ){
1378 |               z <- apply(y,1,sum)
1379 |               nl <- list()
1380 |               n <- list()
1381 |               for ( i in 1:nn ){
1382 |                 if ( i == 1 ){
1383 |                   n[[i]] <- as.list(apply(y,1,function(x) grep(TRUE,x)))
1384 |                   nl <- data.frame( apply(y,1,sum) )
1385 |                 }
1386 |                 if ( i > 1 ){
1387 |                   v <- rep(0,nrow(nl))
1388 |                   n[[i]] <- list()
1389 |                   for ( j in 1:length(n[[i-1]]) ){
1390 |                     cl <- n[[i-1]][[j]]
1391 |                     if ( length(cl) == 0 ){
1392 |                       n[[i]][[paste("d",j,sep="")]] <- NA
1393 |                       v[j] <- 0
1394 |                     }else{
1395 |                       k  <- if ( length(cl) > 1 ) apply(y[cl,],2,sum) > 0 else if ( length(cl) == 1 ) y[cl,]
1396 |                       n[[i]][[paste("d",j,sep="")]] <- sort(unique(c(cl,grep(TRUE,k))))
1397 |                       v[j] <- length(n[[i]][[paste("d",j,sep="")]])
1398 |                     }
1399 |                   }
1400 |                   names(n[[i]]) <- names(z)
1401 |                   nl <- cbind(nl,v)
1402 |           
1403 |                 }
1404 |               }
1405 |               x <- nl[,i]
1406 |               names(x) <- rownames(nl)
1407 |             }else{
1408 |               x <- rep(0,length(object@ldata$m))
1409 |               names(x) <- paste("cl",object@ldata$m,sep=".")
1410 |             }
1411 |             
1412 |             v <- aggregate(object@entropy,list(object@sc@cpart),median)
1413 |             v <- v[v$Group.1 %in% object@ldata$m,]
1414 |             w <- as.vector(v[,-1])
1415 |             names(w) <- paste("cl.",v$Group.1,sep="")
1416 |             w <- w - min(w)
1417 |             
1418 |             return(list(links=x,entropy=w,StemIDscore=x*w))
1419 |           }
1420 |           )
1421 | 
1422 | 
1423 | 
1424 | 
1425 | setGeneric("plotscore", function(object,nn=1) standardGeneric("plotscore"))
1426 | 
1427 | setMethod("plotscore",
1428 |           signature = "Ltree",
1429 |           definition = function(object,nn){
1430 |             if ( length(object@cdata) <= 0 ) stop("run comppvalue before plotscore")
1431 |             x <- compscore(object,nn)
1432 |             layout(1:3)
1433 |             barplot(x$links,names.arg=sub("cl\\.","",object@ldata$m),xlab="Cluster",ylab="Number of links",cex.names=1)
1434 |             barplot(x$entropy,names.arg=sub("cl\\.","",object@ldata$m),xlab="Cluster",ylab="Delta-Entropy",cex.names=1)
1435 |             barplot(x$StemIDscore,names.arg=sub("cl\\.","",object@ldata$m),xlab="Cluster",ylab="Number of links * Delta-Entropy",cex.names=1)
1436 |             layout(1)
1437 |           }
1438 |           )
1439 | 
1440 | 
1441 | 
1442 | 
1443 | setGeneric("branchcells", function(object,br) standardGeneric("branchcells"))
1444 | 
1445 | setMethod("branchcells",
1446 |           signature = "Ltree",
1447 |           definition = function(object,br){
1448 |             if ( length(object@ldata) <= 0 ) stop("run projcells before branchcells")
1449 |             msg <- paste("br needs to be list of length two containing two branches, where each has to be one of", paste(names(object@prtree$n),collapse=","))
1450 |             if ( !is.list(br) ) stop(msg) else if ( length(br) != 2 ) stop(msg) else if ( ! br[[1]] %in% names(object@prtree$n) | ! br[[2]] %in% names(object@prtree$n) ) stop(msg)
1451 | 
1452 |              
1453 |             n <- list()
1454 |             scl <- object@sc
1455 |             k <- c()
1456 |             cl <- intersect( as.numeric(strsplit(br[[1]],"\\.")[[1]]), as.numeric(strsplit(br[[2]],"\\.")[[1]]))
1457 |             if ( length(cl) == 0 ) stop("the two branches in br need to have one cluster in common.")
1458 |                       
1459 |             for ( i in 1:length(br) ){
1460 |               f <- object@sc@cpart[ object@prtree$n[[br[[i]]]] ] %in% cl
1461 |               if ( sum(f) > 0 ){
1462 |                 n[[i]] <- names(object@sc@cpart[ object@prtree$n[[br[[i]]]] ])[f]
1463 |                 k <- append(k, max( scl@cpart ) + 1)
1464 |                 scl@cpart[n[[i]]] <- max( scl@cpart ) + 1
1465 |               }else{
1466 |                 stop(paste("no cells on branch",br[[i]],"fall into clusters",cl))
1467 |               }
1468 |             }
1469 |             set.seed(111111)
1470 |             scl@fcol <- sample(rainbow(max(scl@cpart)))
1471 |             z <- diffgenes(scl,k[1],k[2])
1472 |             return( list(n=n,scl=scl,k=k,diffgenes=z) )
1473 |           }
1474 |           )
1475 | 
1476 | 
1477 | 
1478 | 


--------------------------------------------------------------------------------
/RaceID2_StemID_sample.R:
--------------------------------------------------------------------------------
  1 | ## install required packages (only at first time)
  2 | install.packages(c("tsne","pheatmap","MASS","cluster","mclust","flexmix","lattice","fpc","RColorBrewer","permute","amap","locfit","vegan"))
  3 | 
  4 | ## load class definition and functions
  5 | source("RaceID2_StemID_class.R")
  6 | 
  7 | ## input data
  8 | x <- read.csv("transcript_counts_intestine_5days_YFP.xls",sep="\t",header=TRUE)
  9 | rownames(x) <- x$GENEID
 10 | # prdata: data.frame with transcript counts for all genes (rows) in all cells (columns); with rownames == gene ids; remove ERCC spike-ins 
 11 | prdata <- x[grep("ERCC",rownames(x),invert=TRUE),-1]
 12 | 
 13 | ## RaceID2
 14 | # initialize SCseq object with transcript counts
 15 | sc <- SCseq(prdata)
 16 | # filtering of expression data
 17 | sc <- filterdata(sc, mintotal=3000, minexpr=5, minnumber=1, maxexpr=500, downsample=TRUE, dsn=1, rseed=17000)
 18 | # k-medoids clustering
 19 | sc <- clustexp(sc,clustnr=30,bootnr=50,metric="pearson",do.gap=FALSE,sat=TRUE,SE.method="Tibs2001SEmax",SE.factor=.25,B.gap=50,cln=0,rseed=17000,FUNcluster="kmedoids")
 20 | # compute t-SNE map
 21 | sc <- comptsne(sc,rseed=15555)
 22 | # detect outliers and redefine clusters
 23 | sc <- findoutliers(sc, outminc=5,outlg=2,probthr=1e-3,thr=2**-(1:40),outdistquant=.95)
 24 | 
 25 | ## diagnostic plots
 26 | # gap statistics: only if do.gap == TRUE
 27 | ##plotgap(sc)
 28 | # plot within-cluster dispersion as a function of the cluster number: only if sat == TRUE
 29 | plotsaturation(sc,disp=TRUE)
 30 | # plot change of the within-cluster dispersion as a function of the cluster number: only if sat == TRUE
 31 | plotsaturation(sc)
 32 | # silhouette of k-medoids clusters
 33 | plotsilhouette(sc)
 34 | # Jaccard's similarity of k-medoids clusters
 35 | plotjaccard(sc)
 36 | # barchart of outlier probabilities
 37 | plotoutlierprobs(sc)
 38 | # regression of background model
 39 | plotbackground(sc)
 40 | # dependence of outlier number on probability threshold (probthr)
 41 | plotsensitivity(sc)
 42 | # heatmap of k-medoids cluster
 43 | clustheatmap(sc,final=FALSE,hmethod="single")
 44 | # heatmap of final cluster
 45 | clustheatmap(sc,final=TRUE,hmethod="single")
 46 | # highlight k-medoids clusters in t-SNE map
 47 | plottsne(sc,final=FALSE)
 48 | # highlight final clusters in t-SNE map
 49 | plottsne(sc,final=TRUE)
 50 | # highlight cell labels in t-SNE map
 51 | plotlabelstsne(sc,labels=sub("(\\_\\d+)","",names(sc@ndata)))
 52 | # highlight groups of cells by symbols in t-SNE map
 53 | plotsymbolstsne(sc,types=sub("(\\_\\d+)$","", names(sc@ndata)))
 54 | # highlight transcirpt counts of a set of genes in t-SNE map, e. g. all Apoa genes
 55 | g <- c("Apoa1__chr9", "Apoa1bp__chr3", "Apoa2__chr1", "Apoa4__chr9", "Apoa5__chr9")
 56 | plotexptsne(sc,g,n="Apoa genes",logsc=TRUE)
 57 | 
 58 | ## identification of marker genes
 59 | # differentially regulated genes in each cluster compared to the full ensemble
 60 | cdiff <- clustdiffgenes(sc,pvalue=.01)
 61 | 
 62 | ## write results to text files
 63 | # final clusters 
 64 | x <- data.frame(CELLID=names(sc@cpart),cluster=sc@cpart)
 65 | write.table(x[order(x$cluster,decreasing=FALSE),],"cell_clust.xls",row.names=FALSE,col.names=TRUE,sep="\t",quote=FALSE)
 66 |   
 67 | # differentially expressed genes in cluster
 68 | for ( n in names(cdiff) ) write.table(data.frame(GENEID=rownames(cdiff[[n]]),cdiff[[n]]),paste(paste("cell_clust_diff_genes",sub("\\.","\\_",n),sep="_"),".xls",sep=""),row.names=FALSE,col.names=TRUE,sep="\t",quote=FALSE)
 69 | 
 70 | # differentially expressed genes between two sets of clusters, e. g. cluster 1 and clusters 2,3
 71 | d <- diffgenes(sc,cl1=1,cl2=c(2,3),mincount=5)
 72 | plotdiffgenes(d,gene=names(d$z)[1])
 73 | 
 74 | 
 75 | ## StemID
 76 | 
 77 | # initialization
 78 | ltr <- Ltree(sc)
 79 | # computation of the entropy
 80 | ltr <- compentropy(ltr)
 81 | # computation of the projections for all cells
 82 | ltr <- projcells(ltr,cthr=2,nmode=FALSE)
 83 | # computation of the projections for all cells after randomization
 84 | ltr <- projback(ltr,pdishuf=2000,nmode=FALSE,rseed=17000)
 85 | # assembly of the lineage tree
 86 | ltr <- lineagetree(ltr,pthr=0.01,nmode=FALSE)
 87 | # determination of significant differentiation trajectories
 88 | ltr <- comppvalue(ltr,pethr=0.01,nmode=FALSE)
 89 | 
 90 | ## diagnostic plots
 91 | # histogram of ratio between cell-to-cell distances in the embedded and the input space
 92 | plotdistanceratio(ltr)
 93 | # t-SNE map of the clusters with more than cthr cells including a minimum spanning tree for the cluster medoids
 94 | plotmap(ltr)
 95 | # visualization of the projections in t-SNE space overlayed with a minimum spanning tree connecting the cluster medoids
 96 | plotmapprojections(ltr)
 97 | # lineage tree showing the projections of all cells in t-SNE space
 98 | plottree(ltr,showCells=TRUE,nmode=FALSE,scthr=0)
 99 | # lineage tree without showing the projections of all cells
100 | plottree(ltr,showCells=FALSE,nmode=FALSE,scthr=0)
101 | # heatmap of the enrichment p-values for all inter-cluster links
102 | plotlinkpv(ltr)
103 | # heatmap of the link score for all inter-cluster links
104 | plotlinkscore(ltr)
105 | # heatmap showing the fold enrichment (or depletion) for significantly enriched or depleted links
106 | projenrichment(ltr)
107 | 
108 | ## extract projections onto all links for all cells in a given cluster i
109 | x <- getproj(ltr,i=1)
110 | # heatmap of all projections for cluster i
111 | pheatmap(x$pr)
112 | # heatmap of z-score for all projections for cluster i
113 | pheatmap(x$prz)
114 | 
115 | ## extracting all cells on two branches sharing the same cluster and computing differentially expressed genes between these two branches
116 | x <- branchcells(ltr,list("1.3","1.2"))
117 | # z-scores for differentially expressed genes
118 | head(x$diffgenes$z)
119 | # plotting the cells on the two branches as additional clusters in the t-SNE map
120 | plottsne(x$scl)
121 | 
122 | 
123 | ## computing the StemID score
124 | x <- compscore(ltr,nn=1)
125 | #plotting the StemID score
126 | plotscore(ltr,1)
127 | 
128 |  
129 | 


--------------------------------------------------------------------------------
/Reference_manual_RaceID2_StemID.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dgrun/StemID/edee40e9a253ea1e57fbdce1df9480d40007f6c7/Reference_manual_RaceID2_StemID.pdf


--------------------------------------------------------------------------------