├── data
    ├── Data1.rda
    ├── Data2.rda
    ├── dataL.rda
    └── label.rda
├── man
    ├── label.Rd
    ├── internal.Rd
    ├── Data1.Rd
    ├── Data2.Rd
    ├── dataL.Rd
    ├── standardNormalization.Rd
    ├── chiDist2.Rd
    ├── dist2.Rd
    ├── calNMI.Rd
    ├── concordanceNetworkNMI.Rd
    ├── affinityMatrix.Rd
    ├── plotAlluvial.Rd
    ├── groupPredict.Rd
    ├── spectralClustering.Rd
    ├── rankFeaturesByNMI.Rd
    ├── displayClusters.Rd
    ├── SNF.Rd
    ├── estimateNumberOfClustersGivenGraph.Rd
    ├── getColorsForGroups.Rd
    ├── heatmapPlus.Rd
    └── displayClustersWithHeatmap.Rd
├── R
    ├── standardNormalization.R
    ├── calNMI.R
    ├── chiDist2.R
    ├── displayClusters.R
    ├── concordanceNetworkNMI.R
    ├── dist2.R
    ├── affinityMatrix.R
    ├── getColorsForGroups.R
    ├── spectralClustering.r
    ├── displayClustersWithHeatmap.R
    ├── groupPredict.r
    ├── plotAlluvial.R
    ├── rankFeaturesByNMI.R
    ├── estimateNumberOfClustersGivenGraph.R
    ├── SNF.R
    ├── internal.R
    └── heatmapPlus.R
├── NAMESPACE
├── DESCRIPTION
├── MD5
└── README


/data/Data1.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/SNFtool/HEAD/data/Data1.rda


--------------------------------------------------------------------------------
/data/Data2.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/SNFtool/HEAD/data/Data2.rda


--------------------------------------------------------------------------------
/data/dataL.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/SNFtool/HEAD/data/dataL.rda


--------------------------------------------------------------------------------
/data/label.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/SNFtool/HEAD/data/label.rda


--------------------------------------------------------------------------------
/man/label.Rd:
--------------------------------------------------------------------------------
 1 | \name{label}
 2 | \alias{label}
 3 | \docType{data}
 4 | \title{
 5 | Labels for dataL dataset
 6 | }
 7 | \description{
 8 | The ground truth for dataL dataset
 9 | }
10 | \usage{
11 | data(label)
12 | }
13 | \format{
14 |   The format is:
15 |  int [1:600] 1 1 1 1 1 1 1 1 1 1 ...
16 | }
17 | \examples{
18 | data(label)
19 | }
20 | \keyword{datasets}
21 | 


--------------------------------------------------------------------------------
/man/internal.Rd:
--------------------------------------------------------------------------------
 1 | \name{SNFtool-internal} 
 2 | \alias{.csPrediction} 
 3 | \alias{.discretisation} 
 4 | \alias{.discretisationEigenVectorData} 
 5 | \alias{.dominateset} 
 6 | \alias{.mutualInformation} 
 7 | \alias{.entropy} 
 8 | \title{Internal SNFtool Functions} 
 9 | \description{ 
10 |           Internal SNFtool functions 
11 | } 
12 | \details{ 
13 |           These are not to be called by the user. 
14 | } 
15 | \keyword{ internal } 
16 | 


--------------------------------------------------------------------------------
/R/standardNormalization.R:
--------------------------------------------------------------------------------
 1 | standardNormalization = function(x) {
 2 |     # Normalizes each column of x to have mean of 0 and standarad deviation of 1
 3 |     #
 4 |     # Args:
 5 |     #   x: Matrix
 6 |     #
 7 |     # Returns:
 8 |     #   xNorm: Normalized matrix
 9 |  
10 |     x <- as.matrix(x)
11 |     mean <- apply(x, 2, mean)
12 |     sd <- apply(x, 2, sd)
13 |     sd[sd==0] <- 1
14 |     xNorm <- t((t(x) - mean) / sd)
15 | 
16 |     return(xNorm)
17 | }
18 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | importFrom("grDevices", "grey")
 2 | importFrom("graphics", "image")
 3 | importFrom("stats", "dnorm", "heatmap", "median")
 4 | importFrom("ExPosition", "chi2Dist")
 5 | importFrom("alluvial","alluvial")
 6 | importFrom("graphics", "axis", "frame", "layout", "mtext", "par", "title")
 7 | importFrom("stats", "as.dendrogram", "dist", "hclust", "order.dendrogram", "reorder", "sd")
 8 | importFrom("graphics", "plot")
 9 | exportPattern("^[^\\.]") 
10 | 
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/man/Data1.Rd:
--------------------------------------------------------------------------------
 1 | \name{Data1}
 2 | \alias{Data1}
 3 | \docType{data}
 4 | \title{
 5 | Data1
 6 | }
 7 | \description{
 8 | Data1 dataset used to demonstrate the use of SNFtool. 
 9 | }
10 | \usage{
11 | data(Data1)
12 | }
13 | \format{
14 | A data frame with 200 observations on the following 2 variables.
15 |   \describe{
16 |     \item{\code{V1}}{a numeric vector}
17 |     \item{\code{V2}}{a numeric vector}
18 |   }
19 | }
20 | \examples{
21 | data(Data1)
22 | }
23 | \keyword{datasets}
24 | 


--------------------------------------------------------------------------------
/man/Data2.Rd:
--------------------------------------------------------------------------------
 1 | \name{Data2}
 2 | \alias{Data2}
 3 | \docType{data}
 4 | \title{
 5 | Data2
 6 | }
 7 | \description{
 8 | Data2 dataset used to demonstrate the use of SNFtool. 
 9 | }
10 | \usage{
11 | data(Data2)
12 | }
13 | \format{
14 | A data frame with 200 observations on the following 2 variables.
15 |   \describe{
16 |     \item{\code{V3}}{a numeric vector}
17 |     \item{\code{V4}}{a numeric vector}
18 |   }
19 | }
20 | \examples{
21 | data(Data2)
22 | }
23 | \keyword{datasets}
24 | 


--------------------------------------------------------------------------------
/R/calNMI.R:
--------------------------------------------------------------------------------
 1 | calNMI <- function(x, y) {
 2 |     # Calculates normalized mutual information between two vectors
 3 |     #
 4 |     # Args: 
 5 |     #   x: a vector
 6 |     #   y: a vector
 7 |     #
 8 |     # Returns:
 9 |     #   The normalized mutual information between vectors x and y.
10 | 	
11 |     x <- as.vector(x)
12 | 	y <- as.vector(y)
13 | 
14 |     mutual.info <- (.mutualInformation(x, y)/
15 |         sqrt(.entropy(x) * .entropy(y)))
16 | 
17 |     return(max(0, mutual.info, na.rm=TRUE))
18 | }
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/R/chiDist2.R:
--------------------------------------------------------------------------------
 1 | chiDist2 <- function(A){
 2 |     # Calculates the pairwise chi-square distance between all rows in a given matrix.
 3 |     # Uses chi2Dist from 'ExPosition' package
 4 |     #
 5 |     # Args:
 6 |     #   A: Matrix with rows representing samples
 7 |     # 
 8 |     # Returns:
 9 |     #   D: NxN matrix where N is the number of rows in A. Element i,j in 
10 |     #       the returned matrix is the chi-square distance between A[i,]
11 |     #        and A[j,].
12 | 
13 |     return(chi2Dist(A)$D)
14 | }
15 | 


--------------------------------------------------------------------------------
/R/displayClusters.R:
--------------------------------------------------------------------------------
 1 | displayClusters <- function(W, group) {
 2 |     # Visualizes the specified clusters in an affinity matrix 
 3 |     #
 4 |     # Args:
 5 |     #   W: Affinity matrix 
 6 |     #   group: labels for each row/column in W
 7 |     #
 8 |     # Returns:
 9 |     #   NULL - Plots the image
10 | 
11 |     normalize <- function(X){
12 |         return( X / rowSums(X))
13 |     }
14 | 
15 |     ind <- sort(as.vector(group),index.return=TRUE)
16 |     ind <- ind$ix
17 |     diag(W) <- 0
18 |     W <- normalize(W)
19 |     W <- W + t(W)
20 | 
21 |     image(1:ncol(W),1:nrow(W),W[ind,ind],col = grey(100:0 / 100),xlab = 'Patients',ylab='Patients');
22 | }
23 | 


--------------------------------------------------------------------------------
/man/dataL.Rd:
--------------------------------------------------------------------------------
 1 | \name{dataL}
 2 | \alias{dataL}
 3 | \docType{data}
 4 | \title{
 5 | dataL
 6 | }
 7 | \description{
 8 | Dataset used to provide an example of predicting the new labels with label propagation.
 9 | }
10 | \usage{
11 | data(dataL)
12 | }
13 | \format{
14 |   The format is:
15 | List of 2
16 |  $ : num [1:600, 1:76] 0.0659 0.0491 0.0342 0.0623 0.062 ...
17 |   ..- attr(*, "dimnames")=List of 2
18 |   .. ..$ : chr [1:600] "V1" "V2" "V3" "V4" ...
19 |   .. ..$ : NULL
20 |  $ : int [1:600, 1:240] 0 0 0 0 0 0 0 0 0 0 ...
21 |   ..- attr(*, "dimnames")=List of 2
22 |   .. ..$ : chr [1:600] "V1" "V2" "V3" "V4" ...
23 |   .. ..$ : NULL
24 | }
25 | \examples{
26 | data(dataL)
27 | }
28 | \keyword{datasets}
29 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: SNFtool
 2 | Type: Package
 3 | Title: Similarity Network Fusion
 4 | Version: 2.3.1
 5 | Date: 2021-06-10
 6 | Author: Bo Wang, Aziz Mezlini, Feyyaz Demir, Marc Fiume, Zhuowen Tu, Michael Brudno, Benjamin Haibe-Kains, Anna Goldenberg
 7 | Maintainer: Benjamin Brew <goldenberglab@gmail.com>
 8 | Imports: ExPosition, alluvial
 9 | Description: Similarity Network Fusion takes multiple views of a network and fuses them together to construct an overall status matrix. The input to our algorithm can be feature vectors, pairwise distances, or pairwise similarities. The learned status matrix can then be used for retrieval, clustering, and classification.
10 | License: GPL
11 | NeedsCompilation: no
12 | Repository: CRAN
13 | Date/Publication: 2021-06-11 08:40:15 UTC
14 | Packaged: 2021-06-10 21:44:03 UTC; benbrew88
15 | 


--------------------------------------------------------------------------------
/R/concordanceNetworkNMI.R:
--------------------------------------------------------------------------------
 1 | concordanceNetworkNMI <- function(Wall,C) {
 2 |     # Calculates all pairwise NMIs between matrices in Wall and the fusion of 
 3 |     #   these matrices.
 4 |     #
 5 |     # Args:
 6 |     #   Wall: List of affinity matrices
 7 |     #   C: Number of clusters
 8 |     # 
 9 |     # Returns:
10 |     #   A nxn matrix containing NMIs between cluster assignments made by spectral
11 |     #        clustering for all n matrices in Wall and the fusion of those matrices.
12 | 
13 |     # Get the cluster labels for each of the networks
14 |     labels <- lapply(Wall, function(x) spectralClustering(x, C))
15 | 
16 |     # Calculate the NMI between each pair clusters
17 |     LW <- length(Wall)
18 |     NMIs <- matrix(NA, LW, LW)
19 |     for (i in 1:LW) {
20 |         for (j in 1:LW) {
21 |             NMIs[i, j] <- calNMI(labels[[i]], labels[[j]])
22 |         }
23 |     }
24 |   
25 |   return(NMIs)
26 | }
27 | 


--------------------------------------------------------------------------------
/R/dist2.R:
--------------------------------------------------------------------------------
 1 | dist2 <- function(X,C) {
 2 |     # Calculates the squared euclidean distance between two matrices where rows 
 3 |     #   represent a single data point or patient.
 4 |     #
 5 |     # Args:
 6 |     #   X: Matrix with each row representing a single data point (or patient)
 7 |     #   C: Matrix with each row representing a single data point (or patient)
 8 |     #
 9 |     # Returns:
10 |     #   res: A NxM matrix where nrow(X) == N and nrow(C) == M. Element [n,m] 
11 |     #       is the squared euclidean distance between rows N[n,] and C[m,].
12 | 
13 |     ndata <- nrow(X)
14 |     ncentres <- nrow(C)
15 |     
16 |     sumsqX <- rowSums(X^2)
17 |     sumsqC <- rowSums(C^2)
18 |       
19 |     XC <- 2 * (X %*% t(C))
20 |     res <- matrix(rep(sumsqX, times=ncentres), ndata, ncentres) + 
21 |         t(matrix(rep(sumsqC, times=ndata), ncentres, ndata)) - XC
22 |     res[res < 0] <- 0
23 | 
24 |     return(res)
25 | }
26 | 


--------------------------------------------------------------------------------
/man/standardNormalization.Rd:
--------------------------------------------------------------------------------
 1 | \name{standardNormalization}
 2 | \alias{standardNormalization}
 3 | \title{
 4 | Standard Normalization
 5 | }
 6 | \description{
 7 | Normalize each column of the input data to have mean 0 and standard deviation 1.
 8 | }
 9 | \usage{
10 | standardNormalization(x)
11 | }
12 | %- maybe also 'usage' for other objects documented here.
13 | \arguments{
14 |   \item{x}{
15 | The unnormalized data.
16 | }
17 | }
18 | \value{
19 | The data normalized. 
20 | }
21 | \author{
22 | Dr. Anna Goldenberg, Bo Wang, Aziz Mezlini, Feyyaz Demir
23 | }
24 | %% ~Make other sections like Warning with \section{Warning }{....} ~
25 | 
26 | \examples{
27 | 
28 | ## Data1 is of size n x d_1, 
29 | ## where n is the number of patients, d_1 is the number of genes, 
30 | ## Data2 is of size n x d_2, 
31 | ## where n is the number of patients, d_2 is the number of methylation
32 | data(Data1)
33 | data(Data2)
34 | 
35 | Data1 = standardNormalization(Data1);
36 | Data2 = standardNormalization(Data2);
37 | }
38 | 


--------------------------------------------------------------------------------
/R/affinityMatrix.R:
--------------------------------------------------------------------------------
 1 | affinityMatrix <- function(diff,K=20,sigma=0.5) {
 2 |     # Computes the affinity matrix for a given distance matrix
 3 |     # 
 4 |     # Args:
 5 |     #   diff: Distance matrix 
 6 |     #   K: Number of nearest neighbours to sparsify similarity
 7 |     #   sigma: Variance for local model
 8 |     #
 9 |     # Returns:
10 |     #   Affinity matrix using exponential similarity kernel scaled by k nearest
11 |     #       neighbour average similarity
12 |     #
13 | 
14 |     N <- nrow(diff)
15 |     
16 |     diff <- (diff + t(diff)) / 2
17 |     diag(diff) <- 0
18 |     sortedColumns <- as.matrix(t(apply(diff,2,sort)))
19 | 
20 |     finiteMean <- function(x){
21 |         return(mean(x[is.finite(x)]))
22 |     }
23 |     means <- apply(sortedColumns[,1:K+1],1,finiteMean)+.Machine$double.eps;
24 |     
25 |     avg <- function(x,y){
26 |         return((x+y)/2)
27 |     }
28 |     Sig <- outer(means,means,avg)/3*2 + diff/3 + .Machine$double.eps;
29 |     Sig[Sig <= .Machine$double.eps] <- .Machine$double.eps
30 |     densities <- dnorm(diff, 0, sigma*Sig, log = FALSE)
31 |     
32 |     W <- (densities + t(densities)) / 2
33 |     return(W)
34 | }
35 | 


--------------------------------------------------------------------------------
/R/getColorsForGroups.R:
--------------------------------------------------------------------------------
 1 | getColorsForGroups <- function(group,
 2 |     colors=c("red","blue","green","purple","grey","cyan","brown","pink")){
 3 |     # Constructs a vector of colours given a numeric vector of cluster labels.
 4 |     #   If the number of groups > 8 a vector with length equal to the number of 
 5 |     #   groups  must be provided.
 6 |     # 
 7 |     # Args:
 8 |     #   group: A numeric vector of group labels
 9 |     #   colors: Provided to override default colour options, must be provided if
10 |     #       number of groups exceeds eight. Length of provided colours must be 
11 |     #        >= number of groups.
12 |     #
13 |     # Returns:
14 |     #   A vector of characters specifying colours for each group
15 |     #
16 | 
17 |     cluster.colors <- group
18 | 
19 |     if(max(group) <= length(colors)){
20 |         for(i in 1:max(group)){
21 |             cluster.colors[which(group==i)] <- colors[i]
22 |         }
23 |     return(cluster.colors)
24 | 
25 |     } else {
26 |         warning(paste("ERROR: Not enough colors using the default color argument",
27 |             "for the different groups, PLEASE inform the colors argument",
28 |              sep=""))
29 |         return(NULL)
30 |   }
31 | }
32 | 


--------------------------------------------------------------------------------
/man/chiDist2.Rd:
--------------------------------------------------------------------------------
 1 | \name{chiDist2}
 2 | \alias{chiDist2}
 3 | %- Also NEED an '\alias' for EACH other topic documented here.
 4 | \title{
 5 | Pairwise Chi-squared distances
 6 | }
 7 | \description{
 8 | Wrapper function chi2Dist imported from 'ExPosition' package.
 9 | Computes the Chi-squared distances between all pairs of data point given
10 | }
11 | \usage{
12 | chiDist2(A)
13 | }
14 | %- maybe also 'usage' for other objects documented here.
15 | \arguments{
16 |   \item{A}{
17 | A data matrix where each row is a different data point
18 | }
19 | }
20 | \value{
21 | Returns an N x N matrix where N is the number of rows in X. element (i,j) is the squared Chi-squared distance between ith data point in X and jth data point in X.
22 | }
23 | \author{
24 | Dr. Anna Goldenberg, Bo Wang, Aziz Mezlini, Feyyaz Demir
25 | 
26 | }
27 | \examples{
28 | 
29 | ## Data1 is of size n x d_1, 
30 | ## where n is the number of patients, d_1 is the number of genes, 
31 | ## Data2 is of size n x d_2, 
32 | ## where n is the number of patients, d_2 is the number of methylation
33 | data(Data1)
34 | data(Data2)
35 | 
36 | ## Calculate distance matrices(here we calculate Euclidean Distance, 
37 | ## you can use other distance, e.g. correlation)
38 | Dist1 = chiDist2(as.matrix(Data1))
39 | Dist2 = chiDist2(as.matrix(Data2))
40 | 
41 | }
42 | 


--------------------------------------------------------------------------------
/R/spectralClustering.r:
--------------------------------------------------------------------------------
 1 | spectralClustering <- function(affinity, K, type=3) {
 2 |     # Implements spectral clustering on given affinity matrix into K clusters.
 3 |     #
 4 |     # Args:
 5 |     #   affinity: Affinity matrix (size NxN) to perform clustering on
 6 |     #   K: Number of clusters 
 7 |     #   type (default 3): Used to speciy the type of spectral clustering
 8 |     #
 9 |     # Returns:
10 |     #   labels: A vector of length N assigning a label 1:K to each sample
11 | 
12 |     d <- rowSums(affinity)
13 |     d[d == 0] <- .Machine$double.eps
14 |     D <- diag(d)
15 |     L <- D - affinity
16 | 
17 |     if (type == 1) {
18 |         NL <- L
19 | 
20 |     } else if (type == 2) {
21 |         Di <- diag(1 / d)
22 |         NL <- Di %*% L
23 | 
24 |     } else if(type == 3) {
25 |         Di <- diag(1 / sqrt(d))
26 |         NL <- Di %*% L %*% Di
27 |     }
28 | 
29 |     eig <- eigen(NL)
30 |     res <- sort(abs(eig$values),index.return = TRUE)
31 |     U <- eig$vectors[,res$ix[1:K]]
32 |     normalize <- function(x) x / sqrt(sum(x^2))
33 | 
34 |     if (type == 3) {
35 |         U <- t(apply(U,1,normalize))
36 |     }
37 | 
38 |     eigDiscrete <- .discretisation(U)
39 |     eigDiscrete <- eigDiscrete$discrete
40 |     labels <- apply(eigDiscrete,1,which.max)
41 |   
42 |   
43 |  
44 |   return(labels)
45 | }
46 | 


--------------------------------------------------------------------------------
/R/displayClustersWithHeatmap.R:
--------------------------------------------------------------------------------
 1 | displayClustersWithHeatmap <- function (W, group, ColSideColors=NULL,...) {
 2 |     # Visualize the clusters present in given similarity matrix with sample 
 3 |     #   information displayed by given colors.
 4 |     #
 5 |     # Args:
 6 |     #   W: Affinity matrix
 7 |     #   group: labels of cluster groups 
 8 |     #   ColSideColors: Character vector of length(group) containing color names 
 9 |     #       for horizontal side bar to annotate columns of W, OR a character 
10 |     #       matrix with number of rows matching number of rows in W.
11 |     #
12 |     # Returns:
13 |     #   NULL
14 | 
15 |     normalize <- function(X) X/rowSums(X)
16 |     ind <- sort(as.vector(group), index.return = TRUE)
17 |     ind <- ind$ix
18 | 
19 |     diag(W) <- median(as.vector(W))
20 |     W <- normalize(W)
21 |     W <- W + t(W)
22 | 
23 |     if(is.null(ColSideColors)){
24 |         heatmap(W[ind, ind],scale="none",Rowv=NA,Colv=NA,...)
25 |     }
26 |     else{
27 |         if(is.vector(ColSideColors)){
28 |             heatmap(W[ind, ind],scale="none",Rowv=NA,Colv=NA,
29 |                 ColSideColors=ColSideColors[ind],...)
30 |         }
31 | 
32 |         else{
33 |             heatmapPlus(W[ind, ind],scale="none",Rowv=NA,Colv=NA,
34 |                 ColSideColors=ColSideColors[ind,],...)
35 |         }
36 |     }
37 |     return()
38 | }
39 | 


--------------------------------------------------------------------------------
/man/dist2.Rd:
--------------------------------------------------------------------------------
 1 | \name{dist2}
 2 | \alias{dist2}
 3 | %- Also NEED an '\alias' for EACH other topic documented here.
 4 | \title{
 5 | Pairwise squared Euclidean distances
 6 | }
 7 | \description{
 8 | Computes the squared Euclidean distances between all pairs of data point given
 9 | }
10 | \usage{
11 | dist2(X, C)
12 | }
13 | %- maybe also 'usage' for other objects documented here.
14 | \arguments{
15 |   \item{X}{
16 | A data matrix where each row is a different data point
17 | }
18 |   \item{C}{
19 | A data matrix where each row is a different data point. If this matrix is the same as X, pairwise distances for all data points are computed.
20 | }
21 | }
22 | \value{
23 | Returns an N x M matrix where N is the number of rows in X and M is the number of rows in M. element (n,m) is the squared Euclidean distance between nth data point in X and mth data point in C
24 | }
25 | \author{
26 | Dr. Anna Goldenberg, Bo Wang, Aziz Mezlini, Feyyaz Demir
27 | }
28 | %% ~Make other sections like Warning with \section{Warning }{....} ~
29 | 
30 | \examples{
31 | 
32 | ## Data1 is of size n x d_1, 
33 | ## where n is the number of patients, d_1 is the number of genes, 
34 | ## Data2 is of size n x d_2, 
35 | ## where n is the number of patients, d_2 is the number of methylation
36 | data(Data1)
37 | data(Data2)
38 | 
39 | ## Calculate distance matrices(here we calculate Euclidean Distance, 
40 | ## you can use other distance, e.g. correlation)
41 | Dist1 = dist2(as.matrix(Data1), as.matrix(Data1))
42 | Dist2 = dist2(as.matrix(Data2), as.matrix(Data2))
43 | 
44 | }
45 | 


--------------------------------------------------------------------------------
/R/groupPredict.r:
--------------------------------------------------------------------------------
 1 | groupPredict <- function(train, test, groups, K=20, alpha=0.5, t=20, method=1){
 2 |     # Predicts subtype of new patients from labeled training set of patients 
 3 |     #   using label propigation or local and global consistency.
 4 |     #
 5 |     # Args:
 6 |     #   train: List affinity matrices for samples with known labels
 7 |     #   test: List affinity matrices for samples with unknown labels.
 8 |     #       Length of test must match length of train (and order?)
 9 |     #   groups: Labels specifying the groups in train
10 |     #   K: SNF parameter for number of neighbours in KNN step
11 |     #   alpha: SNF Hyperparameter 
12 |     #   t: SNF varaible - number of iterations
13 |     #   method: 0/1 specifies method used (1) Label propagation or
14 |     #       (0) Local & global consistency.
15 |     #
16 |     # Returns: 
17 |     #   Vector of new labels assigned to the test samples 
18 |     Wi <- vector("list", length=length(train))
19 |     
20 |     for (i in 1:length(train)){
21 |         view <- standardNormalization(rbind(train[[i]],test[[i]]))
22 |         Dist1 <- dist2(view, view)
23 |         Wi[[i]] <- affinityMatrix(Dist1, K, alpha)
24 |     }
25 |     
26 |     W <- SNF(Wi,K,t)
27 |     Y0 <- matrix(0,nrow(view), max(groups))
28 |     for (i in 1:length(groups)){
29 |         Y0[i,groups[i]] <- 1
30 |     }
31 | 
32 |     Y <- .csPrediction(W,Y0,method)
33 |     newgroups <- rep(0,nrow(view))
34 |     for (i in 1:nrow(Y)){
35 |         newgroups[i] <- which(Y[i,] == max(Y[i,]))
36 |     }
37 |     
38 |     return (newgroups)
39 | }
40 | 


--------------------------------------------------------------------------------
/R/plotAlluvial.R:
--------------------------------------------------------------------------------
 1 | plotAlluvial <- function(W, clust.range, color.vect="gray") {
 2 |     # Plots alluvial of patient clusterings for clustering into clust.range clusters.
 3 |     #
 4 |     # Args:
 5 |     #   W: A similarity matrix to be used in spectral clustering
 6 |     #   clust.range: An integer vector specifying the number of clusters to be
 7 |     #	    chosen for spectral clustering
 8 |     #   color.vect: A colour vector of length dim(W)[[1]] specifying the 
 9 |     #       colorings for patients in the alluvial (default all gray)
10 |     #
11 |     # Returns:
12 |     #    NULL alluvial plot is output to k
13 |     
14 |     #Wrapper of spectralClustering to account for when clust.num=1 
15 |     get_spectral_labels <- function(aff, clust.num){
16 |         if(clust.num == 1){
17 |             return(rep(1,dim(aff)[[1]]))
18 |         }
19 |         else{
20 |             return(spectralClustering(aff, clust.num))
21 |         }
22 |     }
23 |     
24 |     #Error check for any value in clust.range ==1  or greater than N (dim(W)[[1]])
25 |     if(any(clust.range) < 1){
26 |         stop('All numbers in clust.range must be greater than or equal to 1.')
27 |     }
28 |    
29 |     #Initialize clust.map matrix specifying sample-groups for clust.range clusterings 
30 |     n.samples <- dim(W)[[1]]
31 |     n.clusterings <- length(clust.range)
32 |     clust.map <- matrix(rep(NA, n.samples*n.clusterings), n.samples, n.clusterings)
33 |     
34 |     clust.range <- unique(clust.range)
35 |     
36 | 
37 |     #Generates (patient X num.clusters) mapping for clustering 
38 |     for(i in c(1:length(clust.range))){
39 |         clust.map[,i] <- get_spectral_labels(W, clust.range[[i]])
40 |     }
41 |     colnames(clust.map) <- paste("",clust.range,sep="")
42 |     alluvial(clust.map, freq=rep(1,n.samples), col=color.vect)
43 | }
44 | 


--------------------------------------------------------------------------------
/man/calNMI.Rd:
--------------------------------------------------------------------------------
 1 | \name{calNMI}
 2 | \alias{calNMI}
 3 | %- Also NEED an '\alias' for EACH other topic documented here.
 4 | \title{
 5 | Mutual Information calculation
 6 | }
 7 | \description{
 8 | Calculate the mutual information between vectors x and y.
 9 | }
10 | \usage{
11 | calNMI(x, y)
12 | }
13 | %- maybe also 'usage' for other objects documented here.
14 | \arguments{
15 |   \item{x}{
16 | a vector
17 | }
18 |   \item{y}{
19 | a vector
20 | }
21 | }
22 | \value{
23 | Returns the mutual information between vectors x and y.
24 | }
25 | \references{
26 | B Wang, A Mezlini, F Demir, M Fiume, T Zu, M Brudno, B Haibe-Kains, A Goldenberg (2014) Similarity Network Fusion: a fast and effective method to aggregate multiple data types on a genome wide scale. Nature Methods. Online. Jan 26, 2014  
27 | }
28 | \author{
29 | Dr. Anna Goldenberg, Bo Wang, Aziz Mezlini, Feyyaz Demir
30 | }
31 | \examples{
32 | 
33 | # How to use SNF with multiple views
34 | 
35 | # Load views into list "dataL"
36 | data(dataL)
37 | data(label)
38 | 
39 | # Set the other parameters
40 | K = 20 # number of neighbours
41 | alpha = 0.5 # hyperparameter in affinityMatrix
42 | T = 20 # number of iterations of SNF
43 | 
44 | # Normalize the features in each of the views if necessary
45 | # dataL = lapply(dataL, standardNormalization)
46 | 
47 | # Calculate the distances for each view
48 | distL = lapply(dataL, function(x) (dist2(x, x))^(1/2))
49 | 
50 | # Construct the similarity graphs
51 | affinityL = lapply(distL, function(x) affinityMatrix(x, K, alpha))
52 | 
53 | # Example of how to use SNF to perform subtyping
54 | # Construct the fused network
55 | W = SNF(affinityL, K, T)
56 | # Perform clustering on the fused network.
57 | clustering = spectralClustering(W,3);
58 | # Use NMI to measure the goodness of the obtained labels.
59 | NMI = calNMI(clustering,label);
60 | 
61 | }
62 | 


--------------------------------------------------------------------------------
/man/concordanceNetworkNMI.Rd:
--------------------------------------------------------------------------------
 1 | \name{concordanceNetworkNMI}
 2 | \alias{concordanceNetworkNMI}
 3 | %- Also NEED an '\alias' for EACH other topic documented here.
 4 | \title{
 5 | Concordance Network NMI calculation
 6 | }
 7 | \description{
 8 | Given a list of affinity matrices, Wall, the number of clusters, return a matrix containing the NMIs between cluster assignments made with spectral clustering on all matrices provided.
 9 | }
10 | \usage{
11 | concordanceNetworkNMI(Wall, C)
12 | }
13 | %- maybe also 'usage' for other objects documented here.
14 | \arguments{
15 |   \item{Wall}{
16 | List of matrices. Each element of the list is a square, symmetric matrix that shows affinities of the data points from a certain view.
17 | }
18 |   \item{C}{
19 | Number of clusters
20 | }
21 | }
22 | \value{
23 | Returns an affinity matrix that represents the neighborhood graph of the data points.
24 | }
25 | \author{
26 | Dr. Anna Goldenberg, Bo Wang, Aziz Mezlini, Feyyaz Demir
27 | }
28 | %% ~Make other sections like Warning with \section{Warning }{....} ~
29 | 
30 | \examples{
31 | 
32 | # How to use SNF with multiple views
33 | 
34 | # Load views into list "dataL"
35 | data(dataL)
36 | data(label)
37 | 
38 | # Set the other parameters
39 | K = 20 # number of neighbours
40 | alpha = 0.5 # hyperparameter in affinityMatrix
41 | T = 20 # number of iterations of SNF
42 | # Normalize the features in each of the views.
43 | #dataL = lapply(dataL, standardNormalization)
44 | 
45 | # Calculate the distances for each view
46 | distL = lapply(dataL, function(x) (dist2(x, x)^(1/2)))
47 | 
48 | # Construct the similarity graphs
49 | affinityL = lapply(distL, function(x) affinityMatrix(x, K, alpha))
50 | 
51 | # an example of how to use concordanceNetworkNMI
52 | Concordance_matrix = concordanceNetworkNMI(affinityL, 3);
53 | 
54 | ## The output, Concordance_matrix,
55 | ## shows the concordance between the fused network and each individual network. 
56 | 
57 | }
58 | 


--------------------------------------------------------------------------------
/R/rankFeaturesByNMI.R:
--------------------------------------------------------------------------------
 1 | rankFeaturesByNMI <- function(data, W){  
 2 |     # Calculates the normalized mutual information (NMI) score between each 
 3 |     # features clustering and the clustering of the fused matrix W. Each feature
 4 |     # is ranked based on how similar it is to the clustering of the fused matrix.
 5 |     #
 6 |     # Args:
 7 |     #   data: A list of matrices 
 8 |     #   W: Fused affinity matrix from all data types in data
 9 |     #
10 |     # Returns:
11 |     #   A list containing NMI score for each feature from all data types
12 |     #   and their NMI score ranks.
13 |     #
14 | 
15 |     stopifnot(class(data) == "list")
16 |     
17 |     NUM.OF.DATA.TYPES <- length(data)
18 |     NMI.scores <- vector(mode="list", length=NUM.OF.DATA.TYPES)
19 |     NMI.ranks <- vector(mode="list", length=NUM.OF.DATA.TYPES)
20 |     num.of.clusters.fused <- estimateNumberOfClustersGivenGraph(W)[[1]]
21 |     clustering.fused <- spectralClustering(W, num.of.clusters.fused)
22 |     
23 |     for (data.type.ind in 1:NUM.OF.DATA.TYPES){
24 |         NUM.OF.FEATURES <- dim(data[[data.type.ind]])[2] 
25 |         NMI.scores[[data.type.ind]] <- vector(mode="numeric", 
26 |             length=NUM.OF.FEATURES)
27 |       
28 |         for (feature.ind in 1:NUM.OF.FEATURES){
29 |             affinity.matrix <- affinityMatrix(
30 |                 dist2(as.matrix(data[[data.type.ind]][, feature.ind]), 
31 |                 as.matrix(data[[data.type.ind]][, feature.ind])))      
32 | 
33 |             clustering.single.feature <- spectralClustering(affinity.matrix, 
34 |                 num.of.clusters.fused)
35 | 
36 |             NMI.scores[[data.type.ind]][feature.ind] <- calNMI(clustering.fused, 
37 |                 clustering.single.feature)      
38 |         }    
39 | 
40 |         NMI.ranks[[data.type.ind]] <- rank(-NMI.scores[[data.type.ind]],
41 |              ties.method="first")
42 |     }
43 |     
44 |     return(list(NMI.scores, NMI.ranks))  
45 | }
46 | 


--------------------------------------------------------------------------------
/man/affinityMatrix.Rd:
--------------------------------------------------------------------------------
 1 | \name{affinityMatrix}
 2 | \alias{affinityMatrix}
 3 | %- Also NEED an '\alias' for EACH other topic documented here.
 4 | \title{
 5 | Affinity matrix calculation
 6 | }
 7 | \description{
 8 | Computes affinity matrix from a generic distance matrix
 9 | }
10 | \usage{
11 | affinityMatrix(diff, K = 20, sigma = 0.5)
12 | }
13 | %- maybe also 'usage' for other objects documented here.
14 | \arguments{
15 |   \item{diff}{
16 | Distance matrix
17 | }
18 |   \item{K}{
19 | Number of nearest neighbors
20 | }
21 |   \item{sigma}{
22 | Variance for local model
23 | }
24 | }
25 | \value{
26 | Returns an affinity matrix that represents the neighborhood graph of the data points.
27 | }
28 | \references{
29 | B Wang, A Mezlini, F Demir, M Fiume, T Zu, M Brudno, B Haibe-Kains, A Goldenberg (2014) Similarity Network Fusion: a fast and effective method to aggregate multiple data types on a genome wide scale. Nature Methods. Online. Jan 26, 2014  
30 | }
31 | \author{
32 | Dr. Anna Goldenberg, Bo Wang, Aziz Mezlini, Feyyaz Demir
33 | }
34 | %% ~Make other sections like Warning with \section{Warning }{....} ~
35 | \examples{
36 | 
37 | ## First, set all the parameters:
38 | K = 20; ##number of neighbors, must be greater than 1. usually (10~30)
39 | alpha = 0.5; ##hyperparameter, usually (0.3~0.8)
40 | T = 20; ###Number of Iterations, usually (10~50)
41 | 
42 | ## Data1 is of size n x d_1, 
43 | ## where n is the number of patients, d_1 is the number of genes, 
44 | ## Data2 is of size n x d_2, 
45 | ## where n is the number of patients, d_2 is the number of methylation
46 | data(Data1)
47 | data(Data2)
48 | 
49 | ## Calculate distance matrices(here we calculate Euclidean Distance, 
50 | ## you can use other distance, e.g. correlation)
51 | Dist1 = (dist2(as.matrix(Data1),as.matrix(Data1)))^(1/2)
52 | Dist2 = (dist2(as.matrix(Data2),as.matrix(Data2)))^(1/2)
53 | 
54 | ## Next, construct similarity graphs
55 | W1 = affinityMatrix(Dist1, K, alpha)
56 | W2 = affinityMatrix(Dist2, K, alpha)
57 | 
58 | }
59 | 


--------------------------------------------------------------------------------
/man/plotAlluvial.Rd:
--------------------------------------------------------------------------------
 1 | \name{plotAlluvial}
 2 | \alias{plotAlluvial}
 3 | \title{
 4 | Plot Alluvial 
 5 | }
 6 | \description{
 7 | This function plots an alluvial (Parallel coordinate plot) of sample clusterings for a specified number of clusters. Samples can be coloured by providing a vector of colours, allowing for the visualization of sample properties over a range of clustering number choices.
 8 | 
 9 | *This is a wrapper function calling the \href{https://CRAN.R-project.org/package=alluvial}{Alluvial Package} (Bojanowski M. & Edwards R)
10 | 
11 | }
12 | \usage{
13 | plotAlluvial(W, clust.range, color.vect)
14 | }
15 | %- maybe also 'usage' for other objects documented here.
16 | \arguments{
17 |   \item{W}{
18 |   Affinity matrix of dimension n.samples by n.samples
19 | }
20 |   \item{clust.range}{
21 |   Integer vector specifying the number of clusters for each clustering
22 | }
23 |   \item{color.vect}{
24 |   A vector of color's of length n.samples to colour the samples
25 | }
26 | }
27 | \value{
28 | Plots an alluvial plot for range of clustering choices. 
29 | }
30 | \author{
31 | Daniel Cole
32 | }
33 | \seealso{More information on \href{https://CRAN.R-project.org/package=alluvial}{Alluvial Package}} 
34 | %% ~Make other sections like Warning with \section{Warning }{....} ~
35 | \examples{
36 | K <- 20
37 | alpha <- 0.5
38 | iter <- 20
39 | 
40 | data(Data1)
41 | data(Data2)
42 | 
43 | dist1 <- (dist2(as.matrix(Data1), as.matrix(Data1)))^(1/2)
44 | dist2 <- (dist2(as.matrix(Data2), as.matrix(Data2)))^(1/2)
45 | 
46 | W1 <- affinityMatrix(dist1, K, alpha)
47 | W2 <- affinityMatrix(dist2, K, alpha)
48 | 
49 | W <- SNF(list(W1, W2), K, iter)
50 | 
51 | #Plots the alluvial with no colouring
52 | plotAlluvial(W, 2:5)
53 | 
54 | #Change the colour of all samples a single colour
55 | plotAlluvial(W, 2:5, col="red")
56 | 
57 | colour.breaks <- 30
58 | #This will assign each sample to one of colour.breaks colour bins between green and red.
59 | colFunc <- colorRampPalette(c("green", "red"))
60 | colours <- colFunc(colour.breaks)[as.numeric(cut(Data1[,1],breaks=colour.breaks))]
61 | plotAlluvial(W, 2:5, col=colours)
62 | }
63 | 


--------------------------------------------------------------------------------
/man/groupPredict.Rd:
--------------------------------------------------------------------------------
 1 | \name{groupPredict}
 2 | \alias{groupPredict}
 3 | \title{
 4 | Group Predict
 5 | }
 6 | \description{
 7 | This function is used to predict the subtype of new patients.
 8 | }
 9 | \usage{
10 | groupPredict(train, test, groups, K=20, alpha=0.5, t=20, method=1)
11 | }
12 | %- maybe also 'usage' for other objects documented here.
13 | \arguments{
14 |   \item{train}{
15 | Training data. Has the same number of view and columns as test data. 
16 | }
17 |   \item{test}{
18 | Test data. Has the same number of view and columns as training data. 
19 | }
20 |   \item{groups}{
21 | The label for the training data.
22 | }
23 |   \item{K}{
24 | Number of neighbors.
25 | }
26 |   \item{alpha}{
27 | Hyperparameter used in constructing similarity network.
28 | }
29 |   \item{t}{
30 | Number of iterations.
31 | }
32 |   \item{method}{
33 | A indicator of which method to use to predict the label. method = 0 means to use local and global consistency; method = 1 means to use label propagation.
34 | }
35 | }
36 | \value{
37 | Returns the prediction of which group the test data belongs to. 
38 | }
39 | \author{
40 | Dr. Anna Goldenberg, Bo Wang, Aziz Mezlini, Feyyaz Demir
41 | }
42 | %% ~Make other sections like Warning with \section{Warning }{....} ~
43 | \examples{
44 | 
45 | # Provide an example of predicting the new labels with label propagation
46 | 
47 | # Load views into list "dataL" and the cluster assignment into vector "label"
48 | data(dataL)
49 | data(label)
50 | 
51 | # Create the training and test data
52 | n = floor(0.8*length(label)) # number of training cases
53 | trainSample = sample.int(length(label), n)
54 | train = lapply(dataL, function(x) x[trainSample, ]) # Use the first 150 samples for training
55 | test = lapply(dataL, function(x) x[-trainSample, ]) # Test the rest of the data set
56 | groups = label[trainSample]
57 | 
58 | # Set the other
59 | K = 20
60 | alpha = 0.5
61 | t = 20
62 | method = TRUE
63 | 
64 | # Apply the prediction function to the data
65 | newLabel = groupPredict(train,test,groups,K,alpha,t,method)
66 | 
67 | # Compare the prediction accuracy
68 | accuracy = sum(label[-trainSample] == newLabel[-c(1:n)])/(length(label) - n)
69 | 
70 | }
71 | 


--------------------------------------------------------------------------------
/man/spectralClustering.Rd:
--------------------------------------------------------------------------------
 1 | \name{spectralClustering}
 2 | \alias{spectralClustering}
 3 | %- Also NEED an '\alias' for EACH other topic documented here.
 4 | \title{
 5 | Spectral Clustering
 6 | }
 7 | \description{
 8 | Perform the famous spectral clustering algorithms. There are three variants. The default one is the third type. 
 9 | }
10 | \usage{
11 | spectralClustering(affinity, K, type = 3)
12 | }
13 | %- maybe also 'usage' for other objects documented here.
14 | \arguments{
15 |   \item{affinity}{
16 | Similarity matrix
17 | }
18 |   \item{K}{
19 | Number of clusters
20 | }
21 |   \item{type}{
22 | The variants of spectral clustering to use.
23 | }
24 | }
25 | \value{
26 | A vector consisting of cluster labels of each sample.
27 | }
28 | \author{
29 | Dr. Anna Goldenberg, Bo Wang, Aziz Mezlini, Feyyaz Demir
30 | }
31 | %% ~Make other sections like Warning with \section{Warning }{....} ~
32 | \examples{
33 | 
34 | ## First, set all the parameters:
35 | K = 20;##number of neighbors, usually (10~30)
36 | alpha = 0.5; ##hyperparameter, usually (0.3~0.8)
37 | T = 20; ###Number of Iterations, usually (10~50)
38 | 
39 | ## Data1 is of size n x d_1, 
40 | ## where n is the number of patients, d_1 is the number of genes, 
41 | ## Data2 is of size n x d_2, 
42 | ## where n is the number of patients, d_2 is the number of methylation
43 | data(Data1)
44 | data(Data2)
45 | 
46 | ## Calculate distance matrices (here we calculate Euclidean Distance, 
47 | ## you can use other distance, e.g. correlation)
48 | Dist1 = (dist2(as.matrix(Data1),as.matrix(Data1)))^(1/2)
49 | Dist2 = (dist2(as.matrix(Data2),as.matrix(Data2)))^(1/2)
50 | 
51 | ## Next, construct similarity graphs
52 | W1 = affinityMatrix(Dist1, K, alpha)
53 | W2 = affinityMatrix(Dist2, K, alpha)
54 | 
55 | # Next, we fuse all the graphs
56 | # then the overall matrix can be computed by
57 | W = SNF(list(W1,W2), K, T)
58 | 
59 | ## With this unified graph W of size n x n, 
60 | ## you can do either spectral clustering or Kernel NMF. 
61 | ## If you need help with further clustering, please let us know. 
62 | 
63 | ## You can display clusters in the data by the following function
64 | ## where C is the number of clusters.
65 | C = 2
66 | 
67 | ## You can get cluster labels for each data point by spectral clustering
68 | labels = spectralClustering(W, C)
69 | }
70 | 


--------------------------------------------------------------------------------
/MD5:
--------------------------------------------------------------------------------
 1 | 080dd5c9604c49af428f751b32ed1215 *DESCRIPTION
 2 | 2f18f0d376ef7c64e870bcddf8fd51da *NAMESPACE
 3 | 870e31d3a4bac353a7f264fa70a085b3 *R/SNF.R
 4 | 8da594216dd48283426561587e2e3ca3 *R/affinityMatrix.R
 5 | 1acbc5fc1358661bc5dad8d988df309e *R/calNMI.R
 6 | 0babf79c103f1decf0e46fdd65a89004 *R/chiDist2.R
 7 | 272cc0c2a3d6f34061c7232fcff5404b *R/concordanceNetworkNMI.R
 8 | 26b71060857c923e4de0d5433768b96d *R/displayClusters.R
 9 | d1663efe0f2777e076b3ac47dab3934d *R/displayClustersWithHeatmap.R
10 | 198c6241f17d9e535efc1a49cf2b52bd *R/dist2.R
11 | 5fca4b13f5393b182bdcca1e9511107c *R/estimateNumberOfClustersGivenGraph.R
12 | 6da9855a0bc030287f9f4ba7de643d8d *R/getColorsForGroups.R
13 | 7fd2c99de4a1bd63d6198414c5f9a5a0 *R/groupPredict.r
14 | 05f3526c8172e0db86435da3039a46d7 *R/heatmapPlus.R
15 | 0d279ac05c961e781026fc3d3b813c51 *R/internal.R
16 | dd44c617c65ee25640865d657be5b3b0 *R/plotAlluvial.R
17 | 0aadfa30aee3d079be6aaced0ba406d8 *R/rankFeaturesByNMI.R
18 | 9a06f16e80db36150776405fd24f11e4 *R/spectralClustering.r
19 | 768caaecb5cd499a8787ee4ed7fc823f *R/standardNormalization.R
20 | e88ad915b528b87fe9004ef3548bcedb *README
21 | 663e69ec428bfa12373878d5aac91dfb *data/Data1.rda
22 | 828e5456cb6c077a5d969739b105585f *data/Data2.rda
23 | 9107bbea063e947fba7a7f6123891211 *data/dataL.rda
24 | f32c4adbe155cae119c6299e3fc91d3b *data/label.rda
25 | 1611cc650c453a38cd8bce7f8ee7c725 *man/Data1.Rd
26 | 4a4849beb0130fc0a0524cbcbb004896 *man/Data2.Rd
27 | 707e25c8e00e3e3e567dbc3896c897ce *man/SNF.Rd
28 | b1695a6e7ba91b0164e8a9edd17da672 *man/affinityMatrix.Rd
29 | 86e36e385cf80c72143caddde1e5301b *man/calNMI.Rd
30 | 6cfa67279501ca2a9cfe2d05d3c9da3c *man/chiDist2.Rd
31 | 8e3dbed44f3eee857a7638891a0103ab *man/concordanceNetworkNMI.Rd
32 | d0e3eac3c047860daf2aaea259afb73e *man/dataL.Rd
33 | 09bf37bf72183a42581b1b5c1a8ce576 *man/displayClusters.Rd
34 | 1759ccc930097d8867fb1877a84eed4e *man/displayClustersWithHeatmap.Rd
35 | 607e658a5f8fe1ad84ff7dbefd62aead *man/dist2.Rd
36 | 08e2d670b205f980d4c7d8aa2bdd1de0 *man/estimateNumberOfClustersGivenGraph.Rd
37 | ad5e6a359736d83195838004d156181d *man/getColorsForGroups.Rd
38 | 299f2209cf51901f3af023792198e2c1 *man/groupPredict.Rd
39 | f67c0e9f46b9ef7b82e7159fe7d27dd3 *man/heatmapPlus.Rd
40 | 8f70b32428c5cca15926a128ed579ea1 *man/internal.Rd
41 | 7975be6e72cc919a4860ef7b52a532b8 *man/label.Rd
42 | 41478230376145724b77eaa39f21a3d6 *man/plotAlluvial.Rd
43 | d4803c2b70669a175bf2317fe0991823 *man/rankFeaturesByNMI.Rd
44 | c014a03fb5dd4e226b98606686414c4f *man/spectralClustering.Rd
45 | 167bf83b88be9e8bf404dcc41ab848d3 *man/standardNormalization.Rd
46 | 


--------------------------------------------------------------------------------
/man/rankFeaturesByNMI.Rd:
--------------------------------------------------------------------------------
 1 | \name{rankFeaturesByNMI}
 2 | \alias{rankFeaturesByNMI}
 3 | \title{
 4 | Rank Features by NMI
 5 | }
 6 | \description{
 7 | Ranks each features by NMI based on their clustering assingments
 8 | }
 9 | \usage{
10 | rankFeaturesByNMI(data, W) 
11 | }
12 | \arguments{
13 |   \item{data}{
14 | List containing all the data types.
15 | }
16 |   \item{W}{
17 | Target Matrix for which the NMI is calculated against.
18 | }
19 | }
20 | \value{
21 | List containing the NMI and rank based on NMI for each feature. 
22 | }
23 | \author{
24 | Dr. Anna Goldenberg, Bo Wang, Aziz Mezlini, Feyyaz Demir
25 | }
26 | %% ~Make other sections like Warning with \section{Warning }{....} ~
27 | 
28 | \examples{
29 | 
30 | ## First, set all the parameters:
31 | K = 20;		# number of neighbors, usually (10~30)
32 | alpha = 0.5;  	# hyperparameter, usually (0.3~0.8)
33 | T = 20; 	# Number of Iterations, usually (10~20)
34 | 
35 | ## Data1 is of size n x d_1, 
36 | ## where n is the number of patients, d_1 is the number of genes, 
37 | ## Data2 is of size n x d_2, 
38 | ## where n is the number of patients, d_2 is the number of methylation
39 | data(Data1)
40 | data(Data2)
41 | 
42 | ## Here, the simulation data (SNFdata) has two data types. They are complementary to each other. 
43 | ## And two data types have the same number of points. 
44 | ## The first half data belongs to the first cluster; the rest belongs to the second cluster.
45 | truelabel = c(matrix(1,100,1),matrix(2,100,1)); ## the ground truth of the simulated data
46 | 
47 | ## Calculate distance matrices
48 | ## (here we calculate Euclidean Distance, you can use other distance, e.g,correlation)
49 | 
50 | ## If the data are all continuous values, we recommend the users to perform 
51 | ## standard normalization before using SNF, 
52 | ## though it is optional depending on the data the users want to use.  
53 | # Data1 = standardNormalization(Data1);
54 | # Data2 = standardNormalization(Data2);
55 | 
56 | 
57 | 
58 | ## Calculate the pair-wise distance; 
59 | ## If the data is continuous, we recommend to use the function "dist2" as follows 
60 | Dist1 = (dist2(as.matrix(Data1),as.matrix(Data1)))^(1/2)
61 | Dist2 = (dist2(as.matrix(Data2),as.matrix(Data2)))^(1/2)
62 | 
63 | ## next, construct similarity graphs
64 | W1 = affinityMatrix(Dist1, K, alpha)
65 | W2 = affinityMatrix(Dist2, K, alpha)
66 | 
67 | ## next, we fuse all the graphs
68 | ## then the overall matrix can be computed by similarity network fusion(SNF):
69 | W = SNF(list(W1,W2), K, T)
70 | 
71 | NMI_scores <- rankFeaturesByNMI(list(Data1, Data2), W)
72 | 
73 | }
74 | 


--------------------------------------------------------------------------------
/man/displayClusters.Rd:
--------------------------------------------------------------------------------
 1 | \name{displayClusters}
 2 | \alias{displayClusters}
 3 | %- Also NEED an '\alias' for EACH other topic documented here.
 4 | \title{
 5 | Plot given similarity matrix by clusters
 6 | }
 7 | \description{
 8 | Visualize the clusters in given similarity matrix
 9 | }
10 | \usage{
11 | displayClusters(W, group)
12 | }
13 | %- maybe also 'usage' for other objects documented here.
14 | \arguments{
15 |   \item{W}{
16 | Similarity matrix
17 | }
18 |   \item{group}{
19 | A vector containing the labels for each sample in W. 
20 | }
21 | }
22 | \value{
23 | Plots given similarity matrix with patients ordered to form clusters.
24 | }
25 | \author{
26 | Dr. Anna Goldenberg, Bo Wang, Aziz Mezlini, Feyyaz Demir
27 | }
28 | %% ~Make other sections like Warning with \section{Warning }{....} ~
29 | 
30 | \examples{
31 | 
32 | ## First, set all the parameters:
33 | K = 20;			# number of neighbors, usually (10~30)
34 | alpha = 0.5;  	# hyperparameter, usually (0.3~0.8)
35 | T = 10; 		# Number of Iterations, usually (10~20)
36 | 
37 | ## Data1 is of size n x d_1, 
38 | ## where n is the number of patients, d_1 is the number of genes, 
39 | ## Data2 is of size n x d_2, 
40 | ## where n is the number of patients, d_2 is the number of methylation
41 | data(Data1)
42 | data(Data2)
43 | 
44 | ## Here, the simulation data (SNFdata) has two data types. They are complementary to each other. 
45 | ## And two data types have the same number of points. 
46 | ## The first half data belongs to the first cluster; the rest belongs to the second cluster.
47 | truelabel = c(matrix(1,100,1),matrix(2,100,1)); ## the ground truth of the simulated data
48 | 
49 | ## Calculate distance matrices
50 | ## (here we calculate Euclidean Distance, you can use other distance, e.g,correlation)
51 | 
52 | ## If the data are all continuous values, we recommend the users to perform 
53 | ## standard normalization before using SNF, 
54 | ## though it is optional depending on the data the users want to use.  
55 | # Data1 = standardNormalization(Data1);
56 | # Data2 = standardNormalization(Data2);
57 | 
58 | 
59 | ## Calculate the pair-wise distance; 
60 | ## If the data is continuous, we recommend to use the function "dist2" as follows 
61 | Dist1 = (dist2(as.matrix(Data1),as.matrix(Data1)))^(1/2)
62 | Dist2 = (dist2(as.matrix(Data2),as.matrix(Data2)))^(1/2)
63 | 
64 | ## next, construct similarity graphs
65 | W1 = affinityMatrix(Dist1, K, alpha)
66 | W2 = affinityMatrix(Dist2, K, alpha)
67 | 
68 | ## These similarity graphs have complementary information about clusters.
69 | displayClusters(W1, truelabel);
70 | displayClusters(W2, truelabel);
71 | 
72 | }
73 | 


--------------------------------------------------------------------------------
/R/estimateNumberOfClustersGivenGraph.R:
--------------------------------------------------------------------------------
 1 | estimateNumberOfClustersGivenGraph <- function(W, NUMC=2:5) {
 2 |     # Estimates the best number of clusters from a vector of choices, using 
 3 |     #   the eigen-gap & rotation cost heuristics.
 4 |     #
 5 |     # Args:
 6 |     #   W: Affinity matrix (usually result from SNF)
 7 |     #   NUMC: A vector of integers specifying which cluster numbers to check
 8 |     #
 9 |     # Returns:
10 |     #   A vector of the top two suggested number of clusters using
11 |     #       the eigen-gap and rotation cost heuristics. 
12 |     #
13 | 
14 |     #Put this check after the length(NUMC) check?
15 |     if (min(NUMC) == 1) {
16 |         warning('Note that we always assume there are more than one cluster.')
17 |         NUMC <- NUMC[NUMC > 1]
18 |     }
19 | 
20 |     #Why is this performed here?
21 |     W <- (W + t(W))/2
22 |     diag(W) <- 0
23 | 
24 |     #NUMC validity check
25 |     if (length(NUMC) <= 0) {
26 |         warning(paste("Invalid NUMC provided, must be an integer vector",
27 |              "with atleast one other number than 1.",
28 |               "Using default NUMC=c(2,3,4,5)",sep=""))
29 |         NUMC <- 2:5 
30 |     }
31 | 
32 |     # compute unnormalized Laplacian
33 |     degs <- rowSums(W)
34 |     degs[degs == 0] <- .Machine$double.eps    
35 |     D <- diag(degs)    
36 |     L <- D - W
37 |     Di <- diag(1 / sqrt(degs))
38 |     L <- Di %*% L %*% Di
39 |     #print(dim(L))
40 | 
41 |     # compute the eigenvectors corresponding to the k smallest
42 |     eigs <- eigen(L)
43 |     eigs_order <- sort(eigs$values, index.return=T)$ix
44 |     eigs$values <- eigs$values[eigs_order]
45 |     eigs$vectors <- eigs$vectors[, eigs_order]
46 |     eigengap <- abs(diff(eigs$values))
47 | #    eigengap <- eigengap * (1 - eigs$values[1:length(eigs$values) - 1]
48 | #        ) / (1 - eigs$values[2:length(eigs$values)])
49 | 
50 |     quality <- list()
51 |     for (c_index in 1:length(NUMC)) {
52 |         ck <- NUMC[c_index]
53 |         UU <- eigs$vectors[, 1:ck]
54 |         EigenvectorsDiscrete <- .discretisation(UU)[[1]]
55 |         EigenVectors <- EigenvectorsDiscrete^2
56 |       
57 |         #MATLAB: sort(EigenVectors,2, 'descend');
58 |         temp1 <- EigenVectors[do.call(order, lapply(1:ncol(EigenVectors),
59 |              function(i) EigenVectors[, i])), ]
60 |         temp1 <- t(apply(temp1, 1, sort, TRUE))  
61 |   
62 |         quality[[c_index]] <- (1 - eigs$values[ck + 1]) / 
63 |             (1 - eigs$values[ck]) * 
64 |             sum( sum( diag(1 / (temp1[, 1] + .Machine$double.eps) ) %*%
65 |             temp1[, 1:max(2, ck-1)] ))
66 |     }
67 |     #Eigen-gap best two clusters
68 |     t1 <- sort(eigengap[NUMC], decreasing=TRUE, index.return=T)$ix
69 |     K1 <- NUMC[t1[1]]
70 |     K12 <- NUMC[t1[2]]
71 | 
72 |     #Rotation cost best two clusters
73 |     t2 <- sort(unlist(quality), index.return=TRUE)$ix
74 |     K2 <- NUMC[t2[1]]
75 |     K22 <- NUMC[t2[2]]    
76 |   
77 |     output <- list("Eigen-gap best"=K1, "Eigen-gap 2nd best"=K12,
78 |         "Rotation cost best"=K2, "Rotation cost 2nd best"=K22)
79 |     return (output)
80 | }
81 | 


--------------------------------------------------------------------------------
/R/SNF.R:
--------------------------------------------------------------------------------
 1 | SNF <- function(Wall, K=20, t=20) {
 2 |     # Similarity Network Fusion takes multiple views of a network (Wall) and
 3 |     # fuses them together to create a overall affinity matrix.
 4 |     #
 5 |     # Args:
 6 |     #   Wall: List of matrices, each element is a square symmetric affinity 
 7 |     #       matrix.
 8 |     #   K: Number of neighbors used in the K-nearest neighbours step,??? more details???
 9 |     #   t: Number of iterations for the diffusion process
10 |     #
11 |     # Returns:  
12 |     #   W: Unified similarity graph of all data types in Wall. 
13 | 
14 |     check_wall_names <- function(Wall){
15 |         # Checks if dimnames are consistant across all matrices in Wall
16 |         #   #Move to internal functions?
17 |         # Args:
18 |         #   Wall: List of matrices
19 |         # Returns:
20 |         #   logical: True/False indicator of dimnames equivalence
21 |         name_match <- function(names_A, names_B){
22 |             return(identical(dimnames(names_A), dimnames(names_B)))
23 |         }
24 | 
25 |         return(all(unlist(lapply(Wall, FUN=name_match, Wall[[1]]))))
26 |     }
27 | 
28 |     #Check if Wall names are consistant across all matrices in Wall
29 |     wall.name.check <- check_wall_names(Wall)
30 |     wall.names <- dimnames(Wall[[1]])
31 |     if(!wall.name.check){
32 |         warning("Dim names not consistent across all matrices in Wall.
33 |             Returned matrix will have no dim names.")
34 |     }
35 |  
36 |     LW <- length(Wall)
37 | 
38 |     #Normalization method for affinity matrices
39 |     normalize <- function(X){
40 |         row.sum.mdiag <- rowSums(X) - diag(X) 
41 |         #If rowSumx(X) == diag(X), set row.sum.mdiag to 1 to avoid div by zero
42 |         row.sum.mdiag[row.sum.mdiag == 0] <- 1   
43 |         X <- X/(2*(row.sum.mdiag))
44 |         diag(X) <- 0.5
45 |         return(X)
46 |     }
47 |     
48 |     #Normalize different networks to avoid scale problems.
49 |     newW <- vector("list", LW)
50 |     nextW <- vector("list", LW)
51 |     for(i in 1:LW){
52 |       Wall[[i]] <- normalize(Wall[[i]])
53 |       Wall[[i]] <- (Wall[[i]] + t(Wall[[i]]))/2
54 |     }
55 |     
56 |     ### Calculate the local transition matrix. (KNN step?)
57 |     for(i in 1:LW){
58 |       newW[[i]] <- (.dominateset(Wall[[i]], K))
59 |     }
60 |     
61 |     #Perform the diffusion for t iterations
62 |     for (i in 1:t) {
63 |         for(j in 1:LW){
64 |             sumWJ <- matrix(0,dim(Wall[[j]])[1], dim(Wall[[j]])[2])
65 |             for(k in 1:LW){
66 |                 if(k != j) {
67 |                     sumWJ <- sumWJ + Wall[[k]]
68 |                 }
69 |             }
70 |             nextW[[j]] <- newW[[j]] %*% (sumWJ/(LW-1)) %*% t(newW[[j]])
71 |         }
72 | 
73 |         #Normalize each new obtained networks.
74 |         for(j in 1 : LW){
75 |           Wall[[j]] <- normalize(nextW[[j]])
76 |           Wall[[j]] <- (Wall[[j]] + t(Wall[[j]]))/2;
77 |         }
78 |     }
79 |     
80 |     # Construct the combined affinity matrix by summing diffused matrices
81 |     W <- matrix(0, nrow(Wall[[1]]), ncol(Wall[[1]]))
82 |     for(i in 1:LW){
83 |         W <- W + Wall[[i]]
84 |     }
85 | 
86 |     W <- W/LW
87 |     W <- normalize(W)
88 |     W <- (W + t(W)) / 2
89 | 
90 |     if(wall.name.check){
91 |         dimnames(W) <- wall.names
92 |     } 
93 | 
94 |     return(W)  
95 | }
96 | 


--------------------------------------------------------------------------------
/R/internal.R:
--------------------------------------------------------------------------------
  1 | .csPrediction <- function(W,Y0,method){
  2 |   ###This function implements the label propagation to predict the label(subtype) for new patients.	
  3 |   ### note method is an indicator of which semi-supervised method to use
  4 |   # method == 0 indicates to use the local and global consistency method
  5 |   # method >0 indicates to use label propagation method.
  6 |   
  7 |   alpha=0.9;
  8 |   P= W/rowSums(W)
  9 |   if(method==0){
 10 |     Y= (1-alpha)* solve( diag(dim(P)[1])- alpha*P)%*%Y0;
 11 |   } else {
 12 |     NLabel=which(rowSums(Y0)==0)[1]-1;
 13 |     Y=Y0;
 14 |     for (i in 1:1000){
 15 |       Y=P%*%Y;
 16 |       Y[1:NLabel,]=Y0[1:NLabel,];
 17 |     }
 18 |   }
 19 |   return(Y);
 20 | }
 21 | 
 22 | .discretisation <- function(eigenVectors) {
 23 |   
 24 |   normalize <- function(x) x / sqrt(sum(x^2))
 25 |   eigenVectors = t(apply(eigenVectors,1,normalize))
 26 |   
 27 |   n = nrow(eigenVectors)
 28 |   k = ncol(eigenVectors)
 29 |   
 30 |   R = matrix(0,k,k)
 31 |   R[,1] = t(eigenVectors[round(n/2),])
 32 |   
 33 |   mini <- function(x) {
 34 |     i = which(x == min(x))
 35 |     return(i[1])
 36 |   }
 37 |   
 38 |   c = matrix(0,n,1)
 39 |   for (j in 2:k) {
 40 |     c = c + abs(eigenVectors %*% matrix(R[,j-1],k,1))
 41 |     i = mini(c)
 42 |     R[,j] = t(eigenVectors[i,])
 43 |   }
 44 |   
 45 |   lastObjectiveValue = 0
 46 |   for (i in 1:20) {
 47 |     eigenDiscrete = .discretisationEigenVectorData(eigenVectors %*% R)
 48 |     
 49 |     svde = svd(t(eigenDiscrete) %*% eigenVectors)
 50 |     U = svde[['u']]
 51 |     V = svde[['v']]
 52 |     S = svde[['d']]
 53 |     
 54 |     NcutValue = 2 * (n-sum(S))
 55 |     if(abs(NcutValue - lastObjectiveValue) < .Machine$double.eps) 
 56 |       break
 57 |     
 58 |     lastObjectiveValue = NcutValue
 59 |     R = V %*% t(U)
 60 |     
 61 |   }
 62 |   
 63 |   return(list(discrete=eigenDiscrete,continuous =eigenVectors))
 64 | }
 65 | 
 66 | .discretisationEigenVectorData <- function(eigenVector) {
 67 |   
 68 |   Y = matrix(0,nrow(eigenVector),ncol(eigenVector))
 69 |   maxi <- function(x) {
 70 |     i = which(x == max(x))
 71 |     return(i[1])
 72 |   }
 73 |   j = apply(eigenVector,1,maxi)
 74 |   Y[cbind(1:nrow(eigenVector),j)] = 1
 75 |   
 76 |   return(Y)
 77 |   
 78 | }
 79 | 
 80 | .dominateset <- function(xx,KK=20) {
 81 |   ###This function outputs the top KK neighbors.	
 82 |   
 83 |   zero <- function(x) {
 84 |     s = sort(x, index.return=TRUE)
 85 |     x[s$ix[1:(length(x)-KK)]] = 0
 86 |     return(x)
 87 |   }
 88 |   normalize <- function(X) X / rowSums(X)
 89 |   A = matrix(0,nrow(xx),ncol(xx));
 90 |   for(i in 1:nrow(xx)){
 91 |     A[i,] = zero(xx[i,]);
 92 |     
 93 |   }
 94 |   
 95 |   
 96 |   return(normalize(A))
 97 | }
 98 | 
 99 | # Calculate the mutual information between vectors x and y.
100 | .mutualInformation <- function(x, y) {
101 |   classx <- unique(x)
102 |   classy <- unique(y)
103 |   nx <- length(x)
104 |   ncx <- length(classx)
105 |   ncy <- length(classy)
106 |   
107 |   probxy <- matrix(NA, ncx, ncy)
108 |   for (i in 1:ncx) {
109 |     for (j in 1:ncy) {
110 |       probxy[i, j] <- sum((x == classx[i]) & (y == classy[j])) / nx
111 |     }
112 |   }
113 |   
114 |   probx <- matrix(rowSums(probxy), ncx, ncy)
115 |   proby <- matrix(colSums(probxy), ncx, ncy, byrow=TRUE)
116 |   result <- sum(probxy * log(probxy / (probx * proby), 2), na.rm=TRUE)
117 |   return(result)
118 | }
119 | 
120 | # Calculate the entropy of vector x.
121 | .entropy <- function(x) {
122 |   class <- unique(x)
123 |   nx <- length(x)
124 |   nc <- length(class)
125 |   
126 |   prob <- rep.int(NA, nc)
127 |   for (i in 1:nc) {
128 |     prob[i] <- sum(x == class[i])/nx
129 |   }
130 |   
131 |   result <- -sum(prob * log(prob, 2))
132 |   return(result)
133 | }
134 | 
135 | .repmat = function(X,m,n){
136 |   ##R equivalent of repmat (matlab)
137 |   if (is.null(dim(X))) {
138 |     mx = length(X)
139 |     nx = 1
140 |   } else {
141 |     mx = dim(X)[1]
142 |     nx = dim(X)[2]
143 |   }
144 |   matrix(t(matrix(X,mx,nx*n)),mx*m,nx*n,byrow=T)
145 | }


--------------------------------------------------------------------------------
/man/SNF.Rd:
--------------------------------------------------------------------------------
  1 | \name{SNF}
  2 | \alias{SNF}
  3 | \title{
  4 | Similarity Network Fusion
  5 | }
  6 | \description{
  7 | Similarity Network Fusion takes multiple views of a network and fuses them together to construct an overall status matrix. The input to our algorithm can be feature vectors, pairwise distances, or pairwise similarities. The learned status matrix can then be used for retrieval, clustering, and classification.
  8 | }
  9 | \usage{
 10 | SNF(Wall, K, t)
 11 | }
 12 | %- maybe also 'usage' for other objects documented here.
 13 | \arguments{
 14 |   \item{Wall}{
 15 | List of matrices. Each element of the list is a square, symmetric matrix that shows affinities of the data points from a certain view.
 16 | }
 17 |   \item{K}{
 18 | Number of neighbors in K-nearest neighbors part of the algorithm.
 19 | }
 20 |   \item{t}{
 21 | Number of iterations for the diffusion process.
 22 | }
 23 | }
 24 | \value{
 25 | W is the overall status matrix derived
 26 | }
 27 | \references{
 28 | B Wang, A Mezlini, F Demir, M Fiume, T Zu, M Brudno, B Haibe-Kains, A Goldenberg (2014) Similarity Network Fusion: a fast and effective method to aggregate multiple data types on a genome wide scale. Nature Methods. Online. Jan 26, 2014  
 29 | 
 30 | Concise description can be found here: 
 31 | http://compbio.cs.toronto.edu/SNF/SNF/Software.html
 32 | }
 33 | \author{
 34 | Dr. Anna Goldenberg, Bo Wang, Aziz Mezlini, Feyyaz Demir
 35 | }
 36 | %% ~Make other sections like Warning with \section{Warning }{....} ~
 37 | 
 38 | \examples{
 39 | 
 40 | ## First, set all the parameters:
 41 | K = 20;		# number of neighbors, usually (10~30)
 42 | alpha = 0.5;  	# hyperparameter, usually (0.3~0.8)
 43 | T = 20; 	# Number of Iterations, usually (10~20)
 44 | 
 45 | ## Data1 is of size n x d_1, 
 46 | ## where n is the number of patients, d_1 is the number of genes, 
 47 | ## Data2 is of size n x d_2, 
 48 | ## where n is the number of patients, d_2 is the number of methylation
 49 | data(Data1)
 50 | data(Data2)
 51 | 
 52 | ## Here, the simulation data (SNFdata) has two data types. They are complementary to each other. 
 53 | ## And two data types have the same number of points. 
 54 | ## The first half data belongs to the first cluster; the rest belongs to the second cluster.
 55 | truelabel = c(matrix(1,100,1),matrix(2,100,1)); ## the ground truth of the simulated data
 56 | 
 57 | ## Calculate distance matrices
 58 | ## (here we calculate Euclidean Distance, you can use other distance, e.g,correlation)
 59 | 
 60 | ## If the data are all continuous values, we recommend the users to perform 
 61 | ## standard normalization before using SNF, 
 62 | ## though it is optional depending on the data the users want to use.  
 63 | # Data1 = standardNormalization(Data1);
 64 | # Data2 = standardNormalization(Data2);
 65 | 
 66 | 
 67 | 
 68 | ## Calculate the pair-wise distance; 
 69 | ## If the data is continuous, we recommend to use the function "dist2" as follows 
 70 | Dist1 = (dist2(as.matrix(Data1),as.matrix(Data1)))^(1/2)
 71 | Dist2 = (dist2(as.matrix(Data2),as.matrix(Data2)))^(1/2)
 72 | 
 73 | ## next, construct similarity graphs
 74 | W1 = affinityMatrix(Dist1, K, alpha)
 75 | W2 = affinityMatrix(Dist2, K, alpha)
 76 | 
 77 | ## These similarity graphs have complementary information about clusters.
 78 | displayClusters(W1,truelabel);
 79 | displayClusters(W2,truelabel);
 80 | 
 81 | ## next, we fuse all the graphs
 82 | ## then the overall matrix can be computed by similarity network fusion(SNF):
 83 | W = SNF(list(W1,W2), K, T)
 84 | 
 85 | ## With this unified graph W of size n x n, 
 86 | ## you can do either spectral clustering or Kernel NMF. 
 87 | ## If you need help with further clustering, please let us know. 
 88 | 
 89 | ## You can display clusters in the data by the following function
 90 | ## where C is the number of clusters.
 91 | C = 2 								# number of clusters
 92 | group = spectralClustering(W,C); 	# the final subtypes information
 93 | displayClusters(W, group)
 94 | 
 95 | ## You can get cluster labels for each data point by spectral clustering
 96 | labels = spectralClustering(W, C)
 97 | 
 98 | plot(Data1, col=labels, main='Data type 1')
 99 | plot(Data2, col=labels, main='Data type 2')
100 | }
101 | 


--------------------------------------------------------------------------------
/man/estimateNumberOfClustersGivenGraph.Rd:
--------------------------------------------------------------------------------
  1 | \name{estimateNumberOfClustersGivenGraph}
  2 | \alias{estimateNumberOfClustersGivenGraph}
  3 | \title{
  4 | Estimate Number Of Clusters Given Graph
  5 | }
  6 | \description{
  7 | This function estimates the number of clusters given the two huristics given in the supplementary materials of our nature method paper W is the similarity graph NUMC is a vector which contains the possible choices of number of clusters.
  8 | }
  9 | \usage{
 10 | estimateNumberOfClustersGivenGraph(W, NUMC=2:5) 
 11 | }
 12 | %- maybe also 'usage' for other objects documented here.
 13 | \arguments{
 14 |   \item{W}{
 15 | List of matrices. Each element of the list is a square, symmetric matrix that shows affinities of the data points from a certain view.
 16 | }
 17 |   \item{NUMC}{
 18 | A vector which contains the possible choices of number of clusters.
 19 | }
 20 | }
 21 | \value{
 22 | K1 is the estimated best number of clusters according to eigen-gaps
 23 | K12 is the estimated SECOND best number of clusters according to eigen-gaps    
 24 | K2 is the estimated number of clusters according to rotation cost
 25 | K22 is the estimated SECOND number of clusters according to rotation cost
 26 | }
 27 | \references{
 28 | B Wang, A Mezlini, F Demir, M Fiume, T Zu, M Brudno, B Haibe-Kains, A Goldenberg (2014) Similarity Network Fusion: a fast and effective method to aggregate multiple data types on a genome wide scale. Nature Methods. Online. Jan 26, 2014  
 29 | 
 30 | Concise description can be found here: 
 31 | http://compbio.cs.toronto.edu/SNF/SNF/Software.html
 32 | }
 33 | \author{
 34 | Dr. Anna Goldenberg, Bo Wang, Aziz Mezlini, Feyyaz Demir
 35 | }
 36 | %% ~Make other sections like Warning with \section{Warning }{....} ~
 37 | 
 38 | \examples{
 39 | 
 40 | ## First, set all the parameters:
 41 | K = 20;  	# number of neighbors, usually (10~30)
 42 | alpha = 0.5;  	# hyperparameter, usually (0.3~0.8)
 43 | T = 20; 	# Number of Iterations, usually (10~20)
 44 | 
 45 | ## Data1 is of size n x d_1, 
 46 | ## where n is the number of patients, d_1 is the number of genes, 
 47 | ## Data2 is of size n x d_2, 
 48 | ## where n is the number of patients, d_2 is the number of methylation
 49 | data(Data1)
 50 | data(Data2)
 51 | 
 52 | ## Here, the simulation data (SNFdata) has two data types. They are complementary to each other. 
 53 | ## And two data types have the same number of points. 
 54 | ## The first half data belongs to the first cluster; the rest belongs to the second cluster.
 55 | truelabel = c(matrix(1,100,1),matrix(2,100,1)); ## the ground truth of the simulated data
 56 | 
 57 | ## Calculate distance matrices
 58 | ## (here we calculate Euclidean Distance, you can use other distance, e.g,correlation)
 59 | 
 60 | ## If the data are all continuous values, we recommend the users to perform 
 61 | ## standard normalization before using SNF, 
 62 | ## though it is optional depending on the data the users want to use.  
 63 | # Data1 = standardNormalization(Data1);
 64 | # Data2 = standardNormalization(Data2);
 65 | 
 66 | 
 67 | 
 68 | ## Calculate the pair-wise distance; 
 69 | ## If the data is continuous, we recommend to use the function "dist2" as follows 
 70 | Dist1 = (dist2(as.matrix(Data1),as.matrix(Data1)))^(1/2)
 71 | Dist2 = (dist2(as.matrix(Data2),as.matrix(Data2)))^(1/2)
 72 | 
 73 | ## next, construct similarity graphs
 74 | W1 = affinityMatrix(Dist1, K, alpha)
 75 | W2 = affinityMatrix(Dist2, K, alpha)
 76 | 
 77 | ## These similarity graphs have complementary information about clusters.
 78 | displayClusters(W1,truelabel);
 79 | displayClusters(W2,truelabel);
 80 | 
 81 | ## next, we fuse all the graphs
 82 | ## then the overall matrix can be computed by similarity network fusion(SNF):
 83 | W = SNF(list(W1,W2), K, T)
 84 | 
 85 | ## With this unified graph W of size n x n, 
 86 | ## you can do either spectral clustering or Kernel NMF. 
 87 | ## If you need help with further clustering, please let us know. 
 88 | 
 89 | ## You can display clusters in the data by the following function
 90 | ## where C is the number of clusters.
 91 | C = 2 								# number of clusters
 92 | group = spectralClustering(W,C); 	# the final subtypes information
 93 | displayClusters(W, group)
 94 | 
 95 | ## You can get cluster labels for each data point by spectral clustering
 96 | labels = spectralClustering(W, C)
 97 | 
 98 | plot(Data1, col=labels, main='Data type 1')
 99 | plot(Data2, col=labels, main='Data type 2')
100 | 
101 | ## Here we provide two ways to estimate the number of clusters. Note that,
102 | ## these two methods cannot guarantee the accuracy of esstimated number of
103 | ## clusters, but just to offer two insights about the datasets.
104 | 
105 | estimationResult = estimateNumberOfClustersGivenGraph(W, 2:5);
106 | }
107 | 


--------------------------------------------------------------------------------
/man/getColorsForGroups.Rd:
--------------------------------------------------------------------------------
  1 | \name{getColorsForGroups}
  2 | \alias{getColorsForGroups}
  3 | \title{
  4 | Obtaining a vector of colors from a numeric vector of group
  5 | }
  6 | \description{
  7 | Convert a numeric vector containing group information to a vector of colors
  8 | }
  9 | \usage{
 10 | getColorsForGroups(group, colors)
 11 | }
 12 | %- maybe also 'usage' for other objects documented here.
 13 | \arguments{
 14 |   \item{group}{
 15 | A numeric vector containing the groups information such as the result of the spectralClustering function.
 16 | }
 17 |   \item{colors}{
 18 | a vector of colors to be used for the different groups. If the number of group is > 8, the user will have to use the colors argument and give a vector of colors with length at least equal to the number of groups.
 19 | }
 20 | }
 21 | \details{
 22 | Essentially used to construct a vector or a matrix with colors used as for the ColSideColors argument in the displayClustersWithHeatmap function. See the displayClustersWithHeatmap()'s example.
 23 | }
 24 | \value{
 25 | A character vector of colors, corresponding to the given vector of group, keeping the same order.
 26 | }
 27 | \author{
 28 | Florence Cavalli
 29 | }
 30 | \examples{
 31 | ## Example 1
 32 | gp=c(rep(1,10),rep(2,4),rep(1,3),rep(3,6))
 33 | ## Using the default colors
 34 | gp_colors=getColorsForGroups(gp)
 35 | gp_colors
 36 | ## Specifying the colors
 37 | gp_colors=getColorsForGroups(gp,colors=c("cyan","purple","orange"))
 38 | gp_colors
 39 | 
 40 | ## Example 2: Part of SNF
 41 | ## First, set all the parameters:
 42 | K = 20;    # number of neighbors, usually (10~30)
 43 | alpha = 0.5;    # hyperparameter, usually (0.3~0.8)
 44 | T = 20;   # Number of Iterations, usually (10~20)
 45 | 
 46 | ## Data1 is of size n x d_1, 
 47 | ## where n is the number of patients, d_1 is the number of genes, 
 48 | ## Data2 is of size n x d_2, 
 49 | ## where n is the number of patients, d_2 is the number of methylation
 50 | data(Data1)
 51 | data(Data2)
 52 | 
 53 | ## Here, the simulation data (SNFdata) has two data types. They are complementary to each other. 
 54 | ## And two data types have the same number of points. 
 55 | ## The first half data belongs to the first cluster; the rest belongs to the second cluster.
 56 | truelabel = c(matrix(1,100,1),matrix(2,100,1)); ## the ground truth of the simulated data
 57 | 
 58 | ## Calculate distance matrices
 59 | ## (here we calculate Euclidean Distance, you can use other distance, e.g,correlation)
 60 | 
 61 | ## If the data are all continuous values, we recommend the users to perform 
 62 | ## standard normalization before using SNF, 
 63 | ## though it is optional depending on the data the users want to use.  
 64 | # Data1 = standardNormalization(Data1);
 65 | # Data2 = standardNormalization(Data2);
 66 | 
 67 | ## Calculate the pair-wise distance; 
 68 | ## If the data is continuous, we recommend to use the function "dist2" as follows 
 69 | Dist1 = dist2(as.matrix(Data1),as.matrix(Data1));
 70 | Dist2 = dist2(as.matrix(Data2),as.matrix(Data2));
 71 | 
 72 | ## next, construct similarity graphs
 73 | W1 = affinityMatrix(Dist1, K, alpha)
 74 | W2 = affinityMatrix(Dist2, K, alpha)
 75 | 
 76 | ## next, we fuse all the graphs
 77 | ## then the overall matrix can be computed by similarity network fusion(SNF):
 78 | W = SNF(list(W1,W2), K, T)
 79 | 
 80 | ## With this unified graph W of size n x n, 
 81 | ## you can do either spectral clustering or Kernel NMF. 
 82 | ## If you need help with further clustering, please let us know. 
 83 | 
 84 | ## You can display clusters in the data by the following function
 85 | ## where C is the number of clusters.
 86 | C = 2     						# number of clusters
 87 | group = spectralClustering(W,C); 	# the final subtypes information
 88 | 
 89 | ## Get a matrix containing the group information 
 90 | ## for the samples such as the SpectralClustering result and the True label
 91 | M_label=cbind(group,truelabel)
 92 | colnames(M_label)=c("spectralClustering","TrueLabel")
 93 | 
 94 | ## ****
 95 | ## Comments
 96 | ## rownames(M_label)=names(spectralClustering) To add if the spectralClustering function 
 97 | ## pass the sample ID as names.
 98 | ## or rownames(M_label)=rownames(W) Having W with rownames and colmanes 
 99 | ## with smaple ID would help as well.
100 | ## ***
101 | 
102 | ## Use the getColorsForGroups function to assign a color to each group
103 | ## NB is more than 8 groups, you will have to input a vector 
104 | ## of colors into the getColorsForGroups function
105 | M_label_colors=t(apply(M_label,1,getColorsForGroups))
106 | ## or choose you own colors for each label, for example:
107 | M_label_colors=cbind("spectralClustering"=getColorsForGroups(M_label[,"spectralClustering"],
108 | colors=c("blue","green")),"TrueLabel"=getColorsForGroups(M_label[,"TrueLabel"],
109 | colors=c("orange","cyan")))
110 | 
111 | ## Visualize the clusters present in the given similarity matrix 
112 | ## as well as some sample information
113 | ## In this presentation no clustering method is ran the samples 
114 | ## are ordered in function of their group label present in the group arguments
115 | displayClustersWithHeatmap(W, group, M_label_colors[,"spectralClustering"]) 
116 | displayClustersWithHeatmap(W, group, M_label_colors)
117 | }


--------------------------------------------------------------------------------
/man/heatmapPlus.Rd:
--------------------------------------------------------------------------------
  1 | \name{heatmapPlus}
  2 | \alias{heatmapPlus}
  3 | %- Also NEED an '\alias' for EACH other topic documented here.
  4 | \title{
  5 | Display heatmap for clusters 
  6 | }
  7 | \description{
  8 | Visualize clusters with heatmap
  9 | }
 10 | \usage{
 11 | heatmapPlus(x, 
 12 | Rowv = NULL, 
 13 | Colv = if (symm) "Rowv" else NULL, 
 14 | distfun = dist, 
 15 | hclustfun = hclust, 
 16 | reorderfun = function(d, w) reorder(d, w), 
 17 | add.expr, 
 18 | symm = FALSE, 
 19 | revC = identical(Colv, "Rowv"), 
 20 | scale = c("row", "column", "none"), 
 21 | na.rm = TRUE, 
 22 | margins = c(5, 5), 
 23 | ColSideColors, 
 24 | RowSideColors, 
 25 | cexRow = 0.2 + 1/log10(nr), 
 26 | cexCol = 0.2 + 1/log10(nc), 
 27 | labRow = NULL, 
 28 | labCol = NULL, 
 29 | main = NULL, 
 30 | xlab = NULL, 
 31 | ylab = NULL, 
 32 | keep.dendro = FALSE, 
 33 | verbose = getOption("verbose"), ...)
 34 | 
 35 | 
 36 | }
 37 | %- maybe also 'usage' for other objects documented here.
 38 | \arguments{
 39 |   \item{x}{
 40 | numeric matrix of the values to be plotted.
 41 | }
 42 |  \item{Rowv}{
 43 | determines if and how the row dendrogram should be computed and reordered. Either a dendrogram or a vector of values used to reorder the row dendrogram or NA to suppress any row dendrogram (and reordering) or by default, NULL.
 44 | }
 45 |  \item{Colv}{
 46 | determines if and how the column dendrogram should be reordered. Has the same options as the Rowv argument above and additionally when x is a square matrix, Colv = "Rowv" means that columns should be treated identically to the rows.
 47 | }
 48 |  \item{distfun}{
 49 | function used to compute the distance (dissimilarity) between both rows and columns. Defaults to dist.
 50 | }
 51 |  \item{hclustfun}{
 52 | function used to compute the hierarchical clustering when Rowv or Colv are not dendrograms. Defaults to hclust.
 53 | }
 54 |  \item{reorderfun}{
 55 | function(d,w) of dendrogram and weights for reordering the row and column dendrograms. The default uses reorder.dendrogram.
 56 | }
 57 |  \item{add.expr}{
 58 | expression that will be evaluated after the call to image. Can be used to add components to the plot.
 59 | }
 60 |  \item{symm}{
 61 | logical indicating if x should be treated symmetrically; can only be true when x is a square matrix.
 62 | }
 63 |  \item{revC}{
 64 | logical indicating if the column order should be reversed for plotting, such that e.g., for the symmetric case, the symmetry axis is as usual.
 65 | }
 66 |  \item{scale}{
 67 | character indicating if the values should be centered and scaled in either the row direction or the column direction, or none. The default is "row" if symm false, and "none" otherwise. 
 68 | }
 69 |  \item{na.rm}{
 70 | logical indicating whether NA's should be removed.
 71 | 
 72 | }
 73 |  \item{margins}{
 74 | numeric vector of length 2 containing the margins (see par(mar= *)) for column and row names, respectively.
 75 | }
 76 |  \item{ColSideColors}{
 77 | (optional) character matrix with number of rows matching number of rows in x. Each column is plotted as a row similar to heatmap()'s ColSideColors. colnames() will be used for labels if present.
 78 | }
 79 |  \item{RowSideColors}{
 80 | (optional)character matrix with number of rows matching number of columns in x. Each column is plotted as a row similar to heatmap()'s RowSideColors. colnames() will be used for labels if present.
 81 | }
 82 |  \item{cexRow}{
 83 | positive numbers, used as cex.axis in for the row axis labeling. The defaults currently only use number of rows.
 84 | }
 85 |  \item{cexCol}{
 86 | positive numbers, used as cex.axis in for the column axis labeling. The defaults currently only use number of columns.
 87 | 
 88 | }
 89 |  \item{labRow}{
 90 | character vectors with row labels to use; these default to rownames(x).
 91 | }
 92 |  \item{labCol}{
 93 | character vectors with column labels to use; these default to colnames(x). 
 94 | }
 95 |  \item{main}{
 96 | y-axis titles; defaults to none.
 97 | }
 98 |  \item{xlab}{
 99 | x-axis titles; defaults to none.
100 | }
101 |  \item{ylab}{
102 | main titles; defaults to none.
103 | }
104 |  \item{keep.dendro}{
105 | logical indicating if the dendrogram(s) should be kept as part of the result (when Rowv and/or Colv are not NA).
106 | }
107 |  \item{verbose}{
108 | logical indicating if information should be printed.
109 | }
110 |  \item{...}{
111 | additional arguments passed on to image, e.g., col specifying the colors
112 | }
113 | 
114 | 
115 | }
116 | \value{
117 | Invisibly, a list with components
118 | 
119 | rowInd
120 | row index permutation vector as returned by order.dendrogram.
121 | 
122 | colInd
123 | column index permutation vector.
124 | 
125 | Rowv
126 | the row dendrogram; only if input Rowv was not NA and keep.dendro is true.
127 | 
128 | Colv
129 | the column dendrogram; only if input Colv was not NA and keep.dendro is true.
130 | }
131 | \author{
132 | Dr. Anna Goldenberg, Bo Wang, Aziz Mezlini, Feyyaz Demir
133 | }
134 | %% ~Make other sections like Warning with \section{Warning }{....} ~
135 | 
136 | \examples{
137 |   z = matrix(rnorm(30),nrow=5,ncol=6);
138 |   rlab = matrix(as.character(c(1:5,2:6,3:7,4:8)),nrow=5,ncol=4);
139 |   clab = matrix(as.character(c(1:6,6:1)),nrow=6,ncol=2);
140 |   colnames(rlab) = LETTERS[1:dim(rlab)[2]];
141 |   colnames(clab) = 1:dim(clab)[2];
142 |   heatmapPlus(z,ColSideColors=clab,RowSideColors=rlab);
143 | }
144 | 


--------------------------------------------------------------------------------
/man/displayClustersWithHeatmap.Rd:
--------------------------------------------------------------------------------
  1 | \name{displayClustersWithHeatmap}
  2 | \alias{displayClustersWithHeatmap}
  3 | \title{
  4 | Display the similarity matrix by clusters with some sample information
  5 | }
  6 | \description{
  7 | Visualize the clusters present in the given similarity matrix as well as some sample information.
  8 | }
  9 | \usage{
 10 | displayClustersWithHeatmap(W, group, ColSideColors=NULL, ...)
 11 | }
 12 | \arguments{
 13 |   \item{W}{
 14 | Similarity matrix
 15 | }
 16 |   \item{group}{
 17 | A numeric vector containing the groups information for each sample in W such as the result of the spectralClustering function. The order should correspond to the sample order in W.
 18 | }
 19 |   \item{ColSideColors}{
 20 | (optional) character vector of length ncol(x) containing the color names for a horizontal side bar that may be used to annotate the columns of x, used by the heatmap function, OR a character matrix with number of rows matching number of rows in x.  Each column is plotted as a row similar to heatmap()'s ColSideColors by the heatmap.plus function.
 21 | }
 22 |   \item{...}{
 23 | other paramater that can be pass on to the heatmap (if ColSideColor is a NULL or a vector) or  heatmap.plus function (if ColSideColors is matrix)  
 24 | }
 25 | }
 26 | \details{
 27 | Using the heatmap or heatmap.plus function to display the similarity matrix
 28 | For representation purpose, the similarity matrix diagonal is set to the median value of W, the matrix is normalised and W = W + t(W) is applied
 29 | In this presentation no clustering method is ran the samples are ordered in function of their group label present in the group arguments.
 30 | }
 31 | \value{
 32 | Plots the similarity matrix using the heatmap function. Samples are ordered by the clusters provided by the argument groups with sample information displayed with a color bar if the ColSideColors argument is informed.
 33 | }
 34 | \author{
 35 | Florence Cavalli
 36 | }
 37 | \examples{
 38 | ## First, set all the parameters:
 39 | K = 20;    # number of neighbors, usually (10~30)
 40 | alpha = 0.5;    # hyperparameter, usually (0.3~0.8)
 41 | T = 20;   # Number of Iterations, usually (10~20)
 42 | 
 43 | ## Data1 is of size n x d_1, 
 44 | ## where n is the number of patients, d_1 is the number of genes, 
 45 | ## Data2 is of size n x d_2, 
 46 | ## where n is the number of patients, d_2 is the number of methylation
 47 | data(Data1)
 48 | data(Data2)
 49 | 
 50 | ## Here, the simulation data (SNFdata) has two data types. They are complementary to each other. 
 51 | ## And two data types have the same number of points. 
 52 | ## The first half data belongs to the first cluster; the rest belongs to the second cluster.
 53 | truelabel = c(matrix(1,100,1),matrix(2,100,1)); ## the ground truth of the simulated data
 54 | 
 55 | ## Calculate distance matrices
 56 | ## (here we calculate Euclidean Distance, you can use other distance, e.g,correlation)
 57 | 
 58 | ## If the data are all continuous values, we recommend the users to perform 
 59 | ## standard normalization before using SNF, 
 60 | ## though it is optional depending on the data the users want to use.  
 61 | # Data1 = standardNormalization(Data1);
 62 | # Data2 = standardNormalization(Data2);
 63 | 
 64 | ## Calculate the pair-wise distance; 
 65 | ## If the data is continuous, we recommend to use the function "dist2" as follows 
 66 | Dist1 = (dist2(as.matrix(Data1),as.matrix(Data1)))^(1/2)
 67 | Dist2 = (dist2(as.matrix(Data2),as.matrix(Data2)))^(1/2)
 68 | 
 69 | ## next, construct similarity graphs
 70 | W1 = affinityMatrix(Dist1, K, alpha)
 71 | W2 = affinityMatrix(Dist2, K, alpha)
 72 | 
 73 | ## next, we fuse all the graphs
 74 | ## then the overall matrix can be computed by similarity network fusion(SNF):
 75 | W = SNF(list(W1,W2), K, T)
 76 | 
 77 | ## With this unified graph W of size n x n, 
 78 | ## you can do either spectral clustering or Kernel NMF. 
 79 | ## If you need help with further clustering, please let us know. 
 80 | 
 81 | ## You can display clusters in the data by the following function
 82 | ## where C is the number of clusters.
 83 | C = 2   							# number of clusters
 84 | group = spectralClustering(W,C); 	# the final subtypes information
 85 | 
 86 | ## Get a matrix containing the group information 
 87 | ## for the samples such as the SpectralClustering result and the True label
 88 | M_label=cbind(group,truelabel)
 89 | colnames(M_label)=c("spectralClustering","TrueLabel")
 90 | 
 91 | ## ****
 92 | ## Comments
 93 | ## rownames(M_label)=names(spectralClustering) To add if the spectralClustering function 
 94 | ## pass the sample ID as names.
 95 | ## or rownames(M_label)=rownames(W) Having W with rownames and colmanes 
 96 | ## with smaple ID would help as well.
 97 | ## ***
 98 | 
 99 | ## Use the getColorsForGroups function to assign a color to each group
100 | ## NB is more than 8 groups, you will have to input a vector 
101 | ## of colors into the getColorsForGroups function
102 | M_label_colors=t(apply(M_label,1,getColorsForGroups))
103 | ## or choose you own colors for each label, for example:
104 | M_label_colors=cbind("spectralClustering"=getColorsForGroups(M_label[,"spectralClustering"],
105 | colors=c("blue","green")),"TrueLabel"=getColorsForGroups(M_label[,"TrueLabel"],
106 | colors=c("orange","cyan")))
107 | 
108 | ## Visualize the clusters present in the given similarity matrix 
109 | ## as well as some sample information
110 | ## In this presentation no clustering method is ran the samples 
111 | ## are ordered in function of their group label present in the group arguments
112 | displayClustersWithHeatmap(W, group, M_label_colors[,"spectralClustering"]) 
113 | displayClustersWithHeatmap(W, group, M_label_colors)
114 | }
115 | 


--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
  1 | Install package by navigating to the parent folder of this one and running
  2 | 
  3 | >R CMD INSTALL SNFtool
  4 | 
  5 | After the installation is complete you can use the functions. Here is an example session.
  6 | 
  7 | ## First, set all the parameters:
  8 | K = 20;		# number of neighbors, usually (10~30)
  9 | alpha = 0.5;  	# hyperparameter, usually (0.3~0.8)
 10 | T = 10; 	# Number of Iterations, usually (10~20)
 11 | 
 12 | ## Data1 is of size n x d_1, where n is the number of patients, d_1 is the number of genes, e.g.
 13 | ## Data2 is of size n x d_2, where n is the number of patients, d_2 is the number of methylation, e.g.
 14 | data(Data1)
 15 | data(Data2)
 16 | 
 17 | ## Here, the simulation data (Data1, Data2) has two data types. They are complementary to each other. And two data types have the same number of points. The first half data belongs to the first cluster; the rest belongs to the second cluster.
 18 | 
 19 | truelabel = c(matrix(1,100,1),matrix(2,100,1)); ##the ground truth of the simulated data;
 20 | 
 21 | 
 22 | ## Calculate distance matrices(here we calculate Euclidean Distance, you can use other distance, e.g,correlation)
 23 | 
 24 | ## If the data are all continuous values, we recommend the users to perform standard normalization before using SNF, though it is optional depending on the data the users want to use.  
 25 | 
 26 | # Data1 = standardNormalization(Data1);
 27 | # Data2 = standardNormalization(Data2);
 28 | 
 29 | 
 30 | ## Calculate the pair-wise distance; If the data is continuous, we recommend to use the function "dist2" as follows; if the data is discrete, we recommend the users to use ""
 31 | Dist1 = dist2(as.matrix(Data1),as.matrix(Data1));
 32 | Dist2 = dist2(as.matrix(Data2),as.matrix(Data2));
 33 | 
 34 | ## next, construct similarity graphs
 35 | W1 = affinityMatrix(Dist1, K, alpha)
 36 | W2 = affinityMatrix(Dist2, K, alpha)
 37 | 
 38 | ## These similarity graphs have complementary information about clusters.
 39 | displayClusters(W1,truelabel);
 40 | displayClusters(W2,truelabel);
 41 | 
 42 | ## next, we fuse all the graphs
 43 | ## then the overall matrix can be computed by similarity network fusion(SNF):
 44 | W = SNF(list(W1,W2), K, T)
 45 | 
 46 | ## With this unified graph W of size n x n, you can do either spectral clustering or Kernel NMF. If you need help with further clustering, please let us know. 
 47 | ## for example, spectral clustering
 48 | 
 49 | C = 2 					# number of clusters
 50 | group = spectralClustering(W, C); 	# the final subtypes information
 51 | 
 52 | ## you can evaluate the goodness of the obtained clustering results by calculate Normalized mutual information (NMI): if NMI is close to 1, it indicates that the obtained clustering is very close to the "true" cluster information; if NMI is close to 0, it indicates the obtained clustering is not similar to the "true" cluster information.
 53 | 
 54 | displayClusters(W, group);
 55 | SNFNMI = calNMI(group, truelabel)
 56 | 
 57 | ## you can also find the concordance between each individual network and the fused network
 58 | 
 59 | ConcordanceMatrix = concordanceNetworkNMI(list(W, W1,W2));
 60 | 
 61 | ################################################################################
 62 | # We also provide an example using label propagation to predict the labels of new data points below.
 63 | # How to use SNF with multiple views
 64 | 
 65 | # Load views into list "dataL"
 66 | # load("Digits.RData")
 67 | data(Digits)
 68 | # Set the other parameters
 69 | K = 20 # number of neighbours
 70 | alpha = 0.5 # hyperparameter in affinityMatrix
 71 | T = 20 # number of iterations of SNF
 72 | # Normalize the features in each of the views (optional)
 73 | # dataL = lapply(dataL, standardNormalization)
 74 | 
 75 | # Calculate the distances for each view
 76 | distL = lapply(dataL, function(x) dist2(x, x))
 77 | 
 78 | # Construct the similarity graphs
 79 | affinityL = lapply(distL, function(x) affinityMatrix(x, K, alpha))
 80 | ################################################################################
 81 | # An example of how to use concordanceNetworkNMI
 82 | 
 83 | Concordance_matrix = concordanceNetworkNMI(affinityL, 3);
 84 | 
 85 | ## The output, Concordance_matrix,  shows the concordance between the fused network and each individual network. 
 86 | 
 87 | ################################################################################
 88 | # Example of how to use SNF to perform subtyping
 89 | # Construct the fused network
 90 | W = SNF(affinityL, K, T)
 91 | # perform clustering on the fused network.
 92 | clustering = spectralClustering(W,3);
 93 | # use NMI to measure the goodness of the obtained labels.
 94 | NMI = calNMI(clustering, label);
 95 | 
 96 | ################################################################################
 97 | # Provide an example of predicting the new labels with label propagation
 98 | 
 99 | # Load views into list "dataL" and the cluster assignment into vector "label"
100 | data(Digits)
101 | 
102 | # Create the training and test data
103 | n = floor(0.8*length(label)) # number of training cases
104 | trainSample = sample.int(length(label), n)
105 | train = lapply(dataL, function(x) x[trainSample, ]) # Use the first 150 samples for training
106 | test = lapply(dataL, function(x) x[-trainSample, ]) # Test the rest of the data set
107 | groups = label[trainSample]
108 | 
109 | # Set the other
110 | K = 20
111 | alpha = 0.5
112 | t = 20
113 | method = TRUE
114 | 
115 | # Apply the prediction function to the data
116 | newLabel = groupPredict(train,test,groups,K,alpha,t,method)
117 | 
118 | # Compare the prediction accuracy
119 | accuracy = sum(label[-trainSample] == newLabel[-c(1:n)])/(length(label) - n)
120 | 
121 | 
122 | ################################################################################
123 | # References: 
124 | # B Wang, A Mezlini, F Demir, M Fiume, T Zu, M Brudno, B Haibe-Kains, A Goldenberg (2014) Similarity Network Fusion: a fast and effective method to aggregate multiple data types on a genome wide scale. Nature Methods. Online. Jan 26, 2014  
125 | # Website: http://compbio.cs.toronto.edu/SNF/SNF/Software.html
126 | 
127 | 


--------------------------------------------------------------------------------
/R/heatmapPlus.R:
--------------------------------------------------------------------------------
  1 | globalVariables(c("sd", "par", "layout"))
  2 | heatmapPlus <-
  3 | function (x, Rowv = NULL, Colv = if (symm) "Rowv" else NULL, 
  4 |     distfun = dist, hclustfun = hclust, reorderfun = function(d, 
  5 |         w) reorder(d, w), add.expr, symm = FALSE, revC = identical(Colv, 
  6 |         "Rowv"), scale = c("row", "column", "none"), na.rm = TRUE, 
  7 |     margins = c(5, 5), ColSideColors, RowSideColors, cexRow = 0.2 + 
  8 |         1/log10(nr), cexCol = 0.2 + 1/log10(nc), labRow = NULL, 
  9 |     labCol = NULL, main = NULL, xlab = NULL, ylab = NULL, keep.dendro = FALSE, 
 10 |     verbose = getOption("verbose"), ...) 
 11 | {
 12 |     scale <- if (symm && missing(scale)) 
 13 |         "none"
 14 |     else match.arg(scale)
 15 |     if (length(di <- dim(x)) != 2 || !is.numeric(x)) 
 16 |         stop("'x' must be a numeric matrix")
 17 |     nr <- di[1]
 18 |     nc <- di[2]
 19 |     if (nr <= 1 || nc <= 1) 
 20 |         stop("'x' must have at least 2 rows and 2 columns")
 21 |     if (!is.numeric(margins) || length(margins) != 2) 
 22 |         stop("'margins' must be a numeric vector of length 2")
 23 |     doRdend <- !identical(Rowv, NA)
 24 |     doCdend <- !identical(Colv, NA)
 25 |     if (is.null(Rowv)) 
 26 |         Rowv <- rowMeans(x, na.rm = na.rm)
 27 |     if (is.null(Colv)) 
 28 |         Colv <- colMeans(x, na.rm = na.rm)
 29 |     if (doRdend) {
 30 |         if (inherits(Rowv, "dendrogram")) 
 31 |             ddr <- Rowv
 32 |         else {
 33 |             hcr <- hclustfun(distfun(x))
 34 |             ddr <- as.dendrogram(hcr)
 35 |             if (!is.logical(Rowv) || Rowv) 
 36 |                 ddr <- reorderfun(ddr, Rowv)
 37 |         }
 38 |         if (nr != length(rowInd <- order.dendrogram(ddr))) 
 39 |             stop("row dendrogram ordering gave index of wrong length")
 40 |     }
 41 |     else rowInd <- 1:nr
 42 |     if (doCdend) {
 43 |         if (inherits(Colv, "dendrogram")) 
 44 |             ddc <- Colv
 45 |         else if (identical(Colv, "Rowv")) {
 46 |             if (nr != nc) 
 47 |                 stop("Colv = \"Rowv\" but nrow(x) != ncol(x)")
 48 |             ddc <- ddr
 49 |         }
 50 |         else {
 51 |             hcc <- hclustfun(distfun(if (symm) 
 52 |                 x
 53 |             else t(x)))
 54 |             ddc <- as.dendrogram(hcc)
 55 |             if (!is.logical(Colv) || Colv) 
 56 |                 ddc <- reorderfun(ddc, Colv)
 57 |         }
 58 |         if (nc != length(colInd <- order.dendrogram(ddc))) 
 59 |             stop("column dendrogram ordering gave index of wrong length")
 60 |     }
 61 |     else colInd <- 1:nc
 62 |     x <- x[rowInd, colInd]
 63 |     labRow <- if (is.null(labRow)) 
 64 |         if (is.null(rownames(x))) 
 65 |             (1:nr)[rowInd]
 66 |         else rownames(x)
 67 |     else labRow[rowInd]
 68 |     labCol <- if (is.null(labCol)) 
 69 |         if (is.null(colnames(x))) 
 70 |             (1:nc)[colInd]
 71 |         else colnames(x)
 72 |     else labCol[colInd]
 73 |     if (scale == "row") {
 74 |         x <- sweep(x, 1, rowMeans(x, na.rm = na.rm))
 75 |         sx <- apply(x, 1, sd, na.rm = na.rm)
 76 |         x <- sweep(x, 1, sx, "/")
 77 |     }
 78 |     else if (scale == "column") {
 79 |         x <- sweep(x, 2, colMeans(x, na.rm = na.rm))
 80 |         sx <- apply(x, 2, sd, na.rm = na.rm)
 81 |         x <- sweep(x, 2, sx, "/")
 82 |     }
 83 |     lmat <- rbind(c(NA, 3), 2:1)
 84 |     lwid <- c(if (doRdend) 1 else 0.05, 4)
 85 |     lhei <- c((if (doCdend) 1 else 0.05) + if (!is.null(main)) 0.2 else 0, 
 86 |         4)
 87 | 
 88 |     if (!missing(ColSideColors)) {
 89 |         if (!is.matrix(ColSideColors))
 90 |             stop("'ColSideColors' must be a matrix")
 91 |         if (!is.character(ColSideColors) || dim(ColSideColors)[1] != nc) 
 92 |             stop("'ColSideColors' dim()[2] must be of length ncol(x)")
 93 |         lmat <- rbind(lmat[1, ] + 1, c(NA, 1), lmat[2, ] + 1)
 94 |         lhei <- c(lhei[1], 0.2, lhei[2])
 95 |     }
 96 |     if (!missing(RowSideColors)) {
 97 |         if (!is.matrix(RowSideColors))
 98 |             stop("'RowSideColors' must be a matrix")
 99 |         if (!is.character(RowSideColors) || dim(RowSideColors)[1] != nr) 
100 |             stop("'RowSideColors' must be a character vector of length nrow(x)")
101 |         lmat <- cbind(lmat[, 1] + 1, c(rep(NA, nrow(lmat) - 1), 
102 |             1), lmat[, 2] + 1)
103 |         lwid <- c(lwid[1], 0.2, lwid[2])
104 |     }
105 |     lmat[is.na(lmat)] <- 0
106 |     if (verbose) {
107 |         cat("layout: widths = ", lwid, ", heights = ", lhei, 
108 |             "; lmat=\n")
109 |         print(lmat)
110 |     }
111 |     op <- par(no.readonly = TRUE)
112 |     on.exit(par(op))
113 |     layout(lmat, widths = lwid, heights = lhei, respect = FALSE)
114 |     if (!missing(RowSideColors)) {
115 |         par(mar = c(margins[1], 0, 0, 0.5))
116 |         rsc=RowSideColors[rowInd,];
117 |         rsc.colors=matrix();
118 |         rsc.names=names(table(rsc));
119 |         rsc.i=1;
120 |         for(rsc.name in rsc.names){
121 |           rsc.colors[rsc.i]=rsc.name;
122 |           rsc[rsc==rsc.name]=rsc.i;
123 |           rsc.i=rsc.i+1;
124 |         }
125 |         rsc=matrix(as.numeric(rsc), nrow=dim(rsc)[1]);
126 |         image(t(rsc), col = as.vector(rsc.colors), axes = FALSE)
127 | 
128 |         if (length(colnames(RowSideColors))>0) {
129 |           axis(1, 0:(dim(rsc)[2]-1) / (dim(rsc)[2]-1), colnames(RowSideColors), las=2, tick=FALSE);
130 |         }
131 |     }
132 |     if (!missing(ColSideColors)) {
133 |         par(mar = c(0.5, 0, 0, margins[2]))
134 |         csc=ColSideColors[colInd,];
135 |         csc.colors=matrix();
136 |         csc.names=names(table(csc));
137 |         csc.i=1;
138 |         for(csc.name in csc.names){
139 |           csc.colors[csc.i]=csc.name;
140 |           csc[csc==csc.name]=csc.i;
141 |           csc.i=csc.i+1;
142 |         }
143 |         csc=matrix(as.numeric(csc), nrow=dim(csc)[1]);
144 |         image(csc, col = as.vector(csc.colors), axes = FALSE)
145 | 
146 |         if (length(colnames(ColSideColors))>0) {
147 |           axis(2, 0:(dim(csc)[2]-1) / (dim(csc)[2]-1), colnames(ColSideColors), las=2, tick=FALSE);
148 |         }
149 |     }
150 |     par(mar = c(margins[1], 0, 0, margins[2]))
151 |     if (!symm || scale != "none") {
152 |         x <- t(x)
153 |     }
154 |     if (revC) {
155 |         iy <- nr:1
156 |         ddr <- rev(ddr)
157 |         x <- x[, iy]
158 |     }
159 |     else iy <- 1:nr
160 |     image(1:nc, 1:nr, x, xlim = 0.5 + c(0, nc), ylim = 0.5 + 
161 |         c(0, nr), axes = FALSE, xlab = "", ylab = "", ...)
162 |     axis(1, 1:nc, labels = labCol, las = 2, line = -0.5, tick = 0, 
163 |         cex.axis = cexCol)
164 |     if (!is.null(xlab)) 
165 |         mtext(xlab, side = 1, line = margins[1] - 1.25)
166 |     axis(4, iy, labels = labRow, las = 2, line = -0.5, tick = 0, 
167 |         cex.axis = cexRow)
168 |     if (!is.null(ylab)) 
169 |         mtext(ylab, side = 4, line = margins[2] - 1.25)
170 |     if (!missing(add.expr)) 
171 |         eval(substitute(add.expr))
172 |     par(mar = c(margins[1], 0, 0, 0))
173 |     if (doRdend) 
174 |         plot(ddr, horiz = TRUE, axes = FALSE, yaxs = "i", leaflab = "none")
175 |     else frame()
176 |     par(mar = c(0, 0, if (!is.null(main)) 1 else 0, margins[2]))
177 |     if (doCdend) 
178 |         plot(ddc, axes = FALSE, xaxs = "i", leaflab = "none")
179 |     else if (!is.null(main)) 
180 |         frame()
181 |     if (!is.null(main)) 
182 |         title(main, cex.main = 1.5 * op[["cex.main"]])
183 |     invisible(list(rowInd = rowInd, colInd = colInd, Rowv = if (keep.dendro && 
184 |         doRdend) ddr, Colv = if (keep.dendro && doCdend) ddc))
185 | }
186 | 


--------------------------------------------------------------------------------