├── .Rbuildignore
├── .gitignore
├── .travis.yml
├── DESCRIPTION
├── LICENSE
├── NAMESPACE
├── R
    ├── RcppExports.R
    ├── datasetLiverBrainLung.R
    ├── dsa.R
    ├── estimateNoise.R
    ├── geneCollapse.R
    ├── generateData.R
    ├── hinge.R
    ├── hysime.R
    ├── linseedObject.R
    ├── plotProportions.R
    ├── preprocess.R
    ├── projectiveProjection.R
    ├── proportionsLiverBrainLung.R
    ├── sisal.R
    ├── softNeg.R
    ├── utils.R
    └── vca.R
├── Readme.md
├── Readme_files
    └── figure-markdown_github
    │   ├── deconvolution-1.png
    │   ├── networks-1.png
    │   ├── networks-2.png
    │   ├── proportions-1.png
    │   ├── rtsne-1.png
    │   └── visi-1.png
├── data
    ├── datasetLiverBrainLung.RData
    └── proportionsLiverBrainLung.RData
├── man
    ├── LinseedObject.Rd
    ├── collapseGenes.Rd
    ├── datasetLiverBrainLung.Rd
    ├── dotPlotPropotions.Rd
    ├── estimateAdditiveNoise.Rd
    ├── estimateNoise.Rd
    ├── fastDSA.Rd
    ├── generateMixedData.Rd
    ├── getProjectiveProjection.Rd
    ├── guessOrder.Rd
    ├── hinge.Rd
    ├── hysime.Rd
    ├── is_logscale.Rd
    ├── linearizeDataset.Rd
    ├── logDataset.Rd
    ├── plotProportions.Rd
    ├── preprocessDataset.Rd
    ├── preprocessGSE.Rd
    ├── projectiveProjection.Rd
    ├── proportionsLiverBrainLung.Rd
    ├── pureDsa.Rd
    ├── runDSA.Rd
    ├── sampleFromSimplexUniformly.Rd
    ├── sisal.Rd
    ├── softNeg.Rd
    └── vca.Rd
├── src
    ├── Makevars
    ├── Makevars.win
    ├── RcppExports.cpp
    ├── fcnnls.cpp
    ├── fcnnls.h
    └── pairwiseR2.cpp
└── vignettes
    └── linseedTutorial.Rmd


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | ^Readme.md
4 | ^Readme_files
5 | ^\.travis\.yml$
6 | ^\.idea


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | */*.o
2 | */*.so
3 | .Rhistory
4 | *.Rproj
5 | .Rproj.user
6 | .idea/
7 | *.dll


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: r
 2 | r:
 3 |     #- bioc-devel
 4 |   - bioc-release
 5 | # warnings_are_errors: true
 6 | sudo: false
 7 | fortran: true
 8 | cache: packages
 9 | 
10 | addons:
11 |   apt:
12 |     packages:
13 |       - libxml2-dev
14 |       - libssl-dev
15 |       - openssl
16 | 
17 | r_packages:
18 |   - Rcpp
19 |   - RcppArmadillo
20 |   - BH
21 |   - methods
22 |   - corpcor
23 |   - combinat
24 |   - NMF
25 |   - R6
26 |   - dplyr
27 |   - ggplot2
28 |   - reshape2
29 |   - progress
30 |   - Matrix
31 |   - Rtsne
32 |   - knitr
33 |   - rmarkdown
34 |   - testthat
35 |   - BiocManager
36 | 
37 | os:
38 |   - linux
39 |   - osx
40 | 
41 | bioc_required: true
42 | bioc_packages:
43 |   - GEOquery
44 |   - Biobase
45 |   - preprocessCore
46 |   - BiocStyle
47 | 
48 | before_install:
49 |   - if [ ${TRAVIS_OS_NAME} = 'osx' ]; then brew install libxml2; brew install openssl; brew install cairo; fi
50 | 
51 | script:
52 |   - R CMD build .
53 |   - FILE=$(ls -1t *.tar.gz | head -n 1)
54 |   - R CMD check "$FILE" --no-vignettes --no-build-vignettes
55 | 
56 | after_success:
57 |   - Rscript -e 'covr::codecov()'
58 | 
59 | notifications:
60 |   email:
61 |     recipients:
62 |       - zayats1812@mail.ru
63 |       - denklewer@gmail.com
64 |     on_success: always
65 |     on_failure: always
66 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: linseed
 2 | Type: Package
 3 | Title: Linear Subpsace identification to solve complete gene expression deconvolution problem
 4 | Version: 0.99.3
 5 | Authors@R: person("Konstantin", "Zaitsev",
 6 |     email = "zayats1812@mail.ru", role = c("aut", "cre"))
 7 | Maintainer: Konstantin Zaitsev <zayats1812@mail.ru>
 8 | Description: The package implements an algorithm to perform deconvolution
 9 |     of gene expression dataset without prior
10 |     knowledge of cell-specific markers.
11 | biocViews: Software, GeneExpression, Clustering
12 | License: MIT + file LICENSE
13 | LazyData: TRUE
14 | VignetteBuilder: knitr
15 | LinkingTo: Rcpp, RcppArmadillo, BH
16 | SystemRequirements: 
17 |   libxml2-dev, 
18 |   libssl-dev, 
19 |   openssl
20 | Depends:
21 |     R (>= 3.6)
22 | Imports:
23 |     Rcpp,
24 |     RcppArmadillo,
25 |     BH,
26 |     methods,
27 |     GEOquery,
28 |     Biobase,
29 |     corpcor,
30 |     combinat,
31 |     NMF,
32 |     R6,
33 |     dplyr,
34 |     ggplot2,
35 |     reshape2,
36 |     preprocessCore,
37 |     progress,
38 |     Matrix,
39 |     Rtsne
40 | Suggests:
41 |     BiocStyle,
42 |     knitr,
43 |     rmarkdown,
44 |     testthat
45 | RoxygenNote: 7.3.1
46 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Konstantin Zaitsev and other contributors
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(LinseedObject)
 4 | export(dotPlotPropotions)
 5 | export(estimateAdditiveNoise)
 6 | export(estimateNoise)
 7 | export(generateMixedData)
 8 | export(plotProportions)
 9 | export(preprocessDataset)
10 | export(preprocessGSE)
11 | import(BH)
12 | import(GEOquery)
13 | import(Matrix)
14 | import(NMF)
15 | import(Rcpp)
16 | import(RcppArmadillo)
17 | import(Rtsne)
18 | import(corpcor)
19 | import(dplyr)
20 | import(ggplot2)
21 | import(methods)
22 | import(preprocessCore)
23 | import(progress)
24 | import(reshape2)
25 | importFrom(Biobase,exprs)
26 | importFrom(R6,R6Class)
27 | importFrom(combinat,permn)
28 | useDynLib(linseed)
29 | 


--------------------------------------------------------------------------------
/R/RcppExports.R:
--------------------------------------------------------------------------------
 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand
 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 3 | 
 4 | fcnnls_c <- function(C, A) {
 5 |     .Call('_linseed_fcnnls_c', PACKAGE = 'linseed', C, A)
 6 | }
 7 | 
 8 | fcnnls_sum_to_one <- function(C, A, delta) {
 9 |     .Call('_linseed_fcnnls_sum_to_one', PACKAGE = 'linseed', C, A, delta)
10 | }
11 | 
12 | pairwiseR2 <- function(X) {
13 |     .Call('_linseed_pairwiseR2', PACKAGE = 'linseed', X)
14 | }
15 | 
16 | 


--------------------------------------------------------------------------------
/R/datasetLiverBrainLung.R:
--------------------------------------------------------------------------------
 1 | #' GSE19830 dataset
 2 | #'
 3 | #' Gene Expression data from GSE19830 experiment:
 4 | #' 3 tissues (liver, brain and lung) were mixed in different proportions:
 5 | #'
 6 | #' GSM495209-GSM495211 pure lung samples
 7 | #' GSM495212-GSM495214 pure brain samples
 8 | #' GSM495215-GSM495217 pure liver samples
 9 | #' GSM495218-GSM495220 5 % Liver / 25 % Brain / 70 % Lung
10 | #' GSM495221-GSM495223 70 % Liver / 5 % Brain / 25 % Lung
11 | #' GSM495224-GSM495226 25 % Liver / 70 % Brain / 5 % Lung
12 | #' GSM495227-GSM495229 70 % Liver / 25 % Brain / 5 % Lung
13 | #' GSM495230-GSM495232 45 % Liver / 45 % Brain / 10 % Lung
14 | #' GSM495233-GSM495235 55 % Liver / 20 % Brain / 25 % Lung
15 | #' GSM495236-GSM495238 50 % Liver / 30 % Brain / 20 % Lung
16 | #' GSM495239-GSM495241 55 % Liver / 30 % Brain / 15 % Lung
17 | #' GSM495242-GSM495244 50 % Liver / 40 % Brain / 10 % Lung
18 | #' GSM495245-GSM495247 60 % Liver / 35 % Brain / 5 % Lung
19 | #' GSM495248-GSM495250 65 % Liver / 34 % Brain / 1 % Lung
20 | #'
21 | #' @docType data
22 | #' 
23 | #' @name datasetLiverBrainLung
24 | #'
25 | #' @usage data(datasetLiverBrainLung)
26 | #'
27 | #' @format An object of class \code{'data.frame'}
28 | #'
29 | #' @keywords datasets
30 | #'
31 | #' @references Shen-Orr SS, Tibshirani R, Khatri P, et al. cell type-specific gene expression differences in complex tissues. Nature methods. 2010;7(4):287-289. doi:10.1038/nmeth.1439.
32 | #' (\href{http://www.ncbi.nlm.nih.gov/pubmed/20208531}{PubMed})
33 | #'
34 | #' @source \href{http://qtlarchive.org/db/q?pg=projdetails&proj=moore_2013b}{QTL Archive}
35 | #' @examples
36 | #' data("datasetLiverBrainLung")
37 | #' mixedSamples <- datasetLiverBrainLung[, 10:42]
38 | #' clustered <- preprocessDataset(mixedSamples)
39 | NULL
40 | 


--------------------------------------------------------------------------------
/R/dsa.R:
--------------------------------------------------------------------------------
 1 | #' Fast DSA algorithm implementation
 2 | #'
 3 | #' Runs DSA implementation using .fccnls for solving least-squares with multiple right-hand-sides
 4 | #'
 5 | #' @param dataset gene expression matrix
 6 | #' @param genes list with putative signatures for DSA algorithm
 7 | #' @import NMF
 8 | #' @import corpcor
 9 | #' @import BH
10 | #' 
11 | #' @return deconvolution results, list with H and W matrices
12 | fastDSA <- function(dataset, genes) {
13 |     eigengenes <- do.call(rbind, lapply(genes, function(geneSet) colMeans(dataset[geneSet,
14 |         ])))
15 |     eigenMultiplier <- fcnnls(t(eigengenes), matrix(1, ncol(eigengenes), 1))
16 |     H <- diag(as.numeric(eigenMultiplier$x),
17 |               length(as.numeric(eigenMultiplier$x))) %*% eigengenes
18 |     res <- .fcnnls(t(H), t(dataset), pseudo = TRUE)
19 |     return(list(H = H, W = t(res$coef)))
20 | }
21 | 
22 | #' DSA algorithm implementation for pure points
23 | #'
24 | #' Runs DSA implementation using fcnnls_c for solving least-squares with multiple right-hand-sides
25 | #'
26 | #' @param dataset gene expression matrix
27 | #' @param pure matrix contains expression of signature genes
28 | #' @import BH
29 | #'
30 | #' @return deconvolution results, list with H and W matrices
31 | pureDsa <- function(dataset, pure) {
32 |   eigenMultiplier <- fcnnls(t(pure), matrix(1, ncol(pure), 1))
33 |   H <- diag(as.numeric(eigenMultiplier$x),
34 |             length(as.numeric(eigenMultiplier$x))) %*% pure
35 |   res <- .fcnnls(t(H), t(dataset), pseudo = TRUE)
36 |   return(list(H = H, W = t(res$coef)))
37 | }
38 | #' Run DSA by clusters
39 | #'
40 | #' Runs DSA with provided clusters as putative signatures
41 | #'
42 | #' @param dataset gene expression matrix
43 | #' @param clustering numeric vector, clustering of the rows
44 | #' @param clusters numeric vector, which clusters use as putative signatures
45 | #'
46 | #' @return deconvolution results, list with H and W matrices
47 | runDSA <- function(dataset, clustering, clusters) {
48 |     genes <- lapply(clusters, function(i) rownames(dataset[clustering == i, ]))
49 |     fastDSA(dataset, genes)
50 | }
51 | 


--------------------------------------------------------------------------------
/R/estimateNoise.R:
--------------------------------------------------------------------------------
 1 | 
 2 | #' Noise estimation
 3 | #'
 4 | #' Estimates noise using multiple regression approach. Implements method described in
 5 | #' J. M. Bioucas-Dias and J. M. P. Nascimento, "Hyperspectral Subspace Identification," in IEEE Transactions on Geoscience and Remote Sensing, vol. 46, no. 8, pp. 2435-2445, Aug. 2008.
 6 | #'
 7 | #' Based on MATLAB original code from http://www.lx.it.pt/~bioucas/code.htm
 8 | #'
 9 | #' @param Y normalized gene expression data matrix, columns are genes and rows are samples
10 | #' @param noiseType character, describing noise type. Two possible values are "additive" and "possion"
11 | #' @param verbose logical, default value is FALSE
12 | #'
13 | #' @return list with two elements, w -- estimated noise and Rw estimated noise correlation matrix
14 | #'
15 | #' @examples
16 | #' n_genes <- 50
17 | #' n_samples <-  40
18 | #' n_values <-  n_genes * n_samples
19 | #' expression_matrix <- matrix(abs(rexp(n_genes*4, rate=2)), ncol=4) %*%
20 | #'                      matrix(abs(runif(n_samples * 4, 0, 1)), nrow =4)
21 | #' noise_matrix <- matrix(rnorm(n_genes*n_samples, 0, 0.02), nrow = n_genes)
22 | #' noisy_matrix <-  expression_matrix + noise_matrix
23 | #' noise_estimation_result <-  estimateNoise(noisy_matrix)
24 | #' @export
25 | estimateNoise <- function(Y, noiseType="additive", verbose=FALSE) {
26 |     L <- nrow(Y)
27 |     N <- ncol(Y)
28 | 
29 |     if (L < 2) stop("Too few samples in dataset")
30 |     if (!noiseType %in% c("additive", "poisson"))
31 |         stop("Unknown noise model, accepted values are \"additive\" (default) and \"poisson\"")
32 |     if (verbose) message("Noise estimation started:")
33 | 
34 |     if (noiseType == "poisson") {
35 |         sqY <- sqrt(Y * (Y > 0))
36 |         noiseEst <- estimateAdditiveNoise(sqY, verbose)
37 |         x <- (sqY - noiseEst$w)^2
38 |         w <- sqrt(x) * noiseEst$w * 2
39 |         Rw <- w %*% t(w) / N
40 |         return(list(w=w, Rw=Rw))
41 |     } else {
42 |         return(estimateAdditiveNoise(Y, verbose))
43 |     }
44 | 
45 | 
46 | }
47 | 
48 | #' Estimate additive noise
49 | #'
50 | #' Additive noise estimation subroutine
51 | #'
52 | #' @param Y normalized gene expression matrix
53 | #' @param verbose verbosity
54 | #'
55 | #' @return list with two elements, w -- estimated noise and Rw estimated noise correlation matrix
56 | #'
57 | #' @examples
58 | #' n_genes <- 50
59 | #' n_samples <-  40
60 | #' n_values <-  n_genes * n_samples
61 | #' expression_matrix <- matrix(abs(rexp(n_genes*4, rate=2)), ncol=4) %*%
62 | #'                      matrix(abs(runif(n_samples * 4, 0, 1)), nrow =4)
63 | #' noise_matrix <- matrix(rnorm(n_genes*n_samples, 0, 0.02), nrow = n_genes)
64 | #' noisy_matrix <-  expression_matrix + noise_matrix
65 | #' noise_estimation_result <-  estimateAdditiveNoise(noisy_matrix, verbose = FALSE)
66 | #' @export
67 | estimateAdditiveNoise <- function(Y, verbose) {
68 |     small <- 1e-6;
69 |     L <- nrow(Y)
70 |     N <- ncol(Y)
71 | 
72 |     w <- matrix(0, nrow=L, ncol=N)
73 |     if (verbose) message("Computing the sample correlation matrix and its inverse")
74 |     RR <- Y %*% t(Y)
75 |     RRi <- solve(RR + small * diag(nrow=L))
76 |     for (i in 1:L) {
77 |         XX <- RRi - RRi[, i, drop=F] %*% RRi[i, , drop=F] / RRi[i, i]
78 |         RRa <- RR[, i, drop=F]
79 |         RRa[i, ] <- 0
80 |         beta <- XX %*% RRa
81 |         beta[i, ] <- 0
82 |         w[i, ] = Y[i, ] - t(beta) %*% Y
83 |     }
84 | 
85 |     if (verbose) message("Computing correlation matrix")
86 |     Rw = diag(diag(w %*% t(w) / N))
87 |     return(list(w=w, Rw=Rw))
88 | }
89 | 


--------------------------------------------------------------------------------
/R/geneCollapse.R:
--------------------------------------------------------------------------------
 1 | # collapsing genes
 2 | 
 3 | #' Collapse Genes
 4 | #'
 5 | #' Collapses given dataset so every gene is presented by the highest expressed probe
 6 | #'
 7 | #' @param ge matrix, rows are probes, columns are samples
 8 | #' @param probes df, matrix or named vector
 9 | #'
10 | #' @return collapsed gene expression matrix with probes replaced with corresponding genes
11 | setGeneric("collapseGenes", function(ge, probes) {
12 |     standardGeneric("collapseGenes")
13 | })
14 | 
15 | setMethod("collapseGenes", c(ge = "ANY", probes = c("data.frame")), function(ge,
16 |     probes) collapseGenes(ge, as.matrix(probes)))
17 | setMethod("collapseGenes", c("ANY", "matrix"), function(ge, probes) {
18 |     probes <- setNames(as.character(probes[, 1]), rownames(probes))
19 |     collapseGenes(ge, probes)
20 | })
21 | 
22 | setMethod("collapseGenes", c("ANY", "character"), function(ge, probes) {
23 |     step0 <- length(probes)
24 |     # removing NA, empty string and non-unique mappings
25 |     probes <- probes[!is.na(probes)]
26 |     probes <- probes[probes != ""]
27 |     probes <- probes[!grepl("//", probes)]
28 | 
29 |     # also removing LOC and orf
30 |     probes <- probes[!grepl("^LOC\\d+", probes, ignore.case = TRUE)]
31 |     probes <- probes[!grepl("^C\\w+orf\\d+", probes, ignore.case = TRUE)]
32 | 
33 |     step1 <- length(probes)
34 | 
35 |     message(paste0(step0 - step1, " probes were removed while mapping probes to genes as non-mapped probes or non-uniqely mapped probes"))
36 | 
37 |     geneToProbes <- lapply(split(probes, probes), names)
38 | 
39 |     logGE <- logDataset(ge)
40 |     # TODO: needs speed up
41 |     bestProbes <- sapply(geneToProbes, function(probes) {
42 |         subset <- logGE[probes, , drop = FALSE]
43 |         probes[which.max(rowMeans(subset))]
44 |     })
45 | 
46 |     bestGenes <- ge[bestProbes, ]
47 |     rownames(bestGenes) <- names(geneToProbes)
48 | 
49 |     step2 <- nrow(bestGenes)
50 |     message(paste0(step1 - step2, " genes were collapsed while mapping genes to most expressed probe"))
51 |     message(paste0(step2, " genes left after collapsing"))
52 |     bestGenes
53 | })
54 | 
55 | 
56 | 
57 | 


--------------------------------------------------------------------------------
/R/generateData.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | #' Generate mixed data with or without noise
  4 | #'
  5 | #' Generates mixed data with or without noise under assumption of linear model
  6 | #'
  7 | #' @param samples number of samples
  8 | #' @param genes number of genes
  9 | #' @param cellTypes number of cell types
 10 | #' @param bias if not null bias should be a number in between 0 and 1, one of cell types presented in mix will be more abundunt and one cell type will be less abundunt
 11 | #' @param pureGenes number of genes which are not noisy
 12 | #' @param noiseDeviation standart deviation of normally distributed noise value
 13 | #' @param removeAngles if TRUE there will be no signature genes in the mix (simplex corners will be removed)
 14 | #' @param removeBorders if TRUE where will be no genes close to simplex border
 15 | #' @param borderShift number in between 0 and 1, every value in basis is guaranteed to be at least borderShift
 16 | #' @param spearmanThreshold Spearman treshold that has to be between generated samples
 17 | #' @param sampleLogMean average log expression of pure samples
 18 | #' @param sampleLogSd standard deviation of log expression of pure samples
 19 | #' @param cutCoef if removeAngles == T how much to cut angles
 20 | #'
 21 | #' @return list(data, proportions, basis,correlations_within_basis)
 22 | #' @export
 23 | generateMixedData <- function(samples=40, genes=12000, cellTypes=3, bias = NULL, spearmanThreshold = 0.5,
 24 |                               pureGenes = 0, noiseDeviation = 0, sampleLogMean = 4, sampleLogSd = 3,
 25 |                               removeAngles = F, cutCoef = 0.85,
 26 |                               removeBorders = F, borderShift = 0.25) {
 27 |     proportions <- sampleFromSimplexUniformly(samples, cellTypes, 100000)
 28 |     colnames(proportions) <- paste0("Sample ", 1:samples)
 29 |     rownames(proportions) <- paste0("Cell type ", 1:cellTypes)
 30 | 
 31 |     if (!is.null(bias)) {
 32 |         bias <- proportions[1, ] * bias
 33 |         proportions[1, ] <- proportions[1, ] - bias
 34 |         proportions[cellTypes, ] <- proportions[cellTypes, ] + bias
 35 |     }
 36 |     
 37 |     generateSample <- function(n, mean=sampleLogMean, sd=sampleLogSd) {
 38 |       rnorm(n, mean=mean, sd=sd)
 39 |     }
 40 |     
 41 |     shuffle <- function(x, threshold=spearmanThreshold) {
 42 |       n <- length(x)
 43 |       y <- x
 44 |       while (cor(x, y, method="spearman") > threshold) {
 45 |         z <- sample(1:n, 2)
 46 |         y[z] <- y[rev(z)]
 47 |       }
 48 |       return(y)
 49 |     }
 50 | 
 51 |     basis <- matrix(nrow=genes, ncol=cellTypes)
 52 |     for (j in 1:cellTypes) basis[, j] <- generateSample(genes)
 53 |     basis <- 2^basis
 54 |     
 55 |     if (removeAngles) {
 56 |         basisTmp <- basis / rowSums(basis)
 57 |         while (!all(sqrt(rowSums(basisTmp ^ 2)) < cutCoef)) {
 58 |             badIds <- which(sqrt(rowSums(basisTmp ^ 2)) >= cutCoef)
 59 |             basis[badIds, ] <- generateSample(cellTypes * length(badIds))
 60 |             basisTmp <- basis / rowSums(basis)
 61 |         }
 62 |     }
 63 | 
 64 |     if (removeBorders) {
 65 |         shift <- (1 / borderShift - 1) / cellTypes
 66 |         basis <- basis + shift
 67 |         basis <- apply(basis, 2, function(x) x / sum(x))
 68 |     }
 69 | 
 70 | 
 71 |     colnames(basis) <- paste0("Cell type ", 1:cellTypes)
 72 |     rownames(basis) <- paste0("Gene ", 1:genes)
 73 | 
 74 |     data <- basis %*% proportions
 75 | 
 76 |     if (noiseDeviation > 0) {
 77 |         noise <- matrix(rnorm(length(data), sd=noiseDeviation),
 78 |                         nrow = genes, ncol = samples)
 79 |         noised <- data + 2^noise
 80 |         noised[noised < 0] <- 0
 81 | 
 82 |         if (pureGenes > 0) {
 83 |             pure <- sample(1:genes, pureGenes)
 84 |             noised[pure, ] <- data[pure, ]
 85 |         }
 86 | 
 87 |         data <- noised
 88 |     }
 89 | 
 90 |     return(list(
 91 |         data = data, proportions = proportions, basis = basis, cors = cor(basis, method = "spearman")
 92 |     ))
 93 | 
 94 | }
 95 | 
 96 | #' Generation of points uniformly distributed on k-dimensional standard simplex
 97 | #'
 98 | #' @param n number of poitns
 99 | #' @param k dimensionality
100 | #' @param M grid size
101 | #'
102 | #'
103 | #' @return matrix where columns are points
104 | sampleFromSimplexUniformly <- function(n, k=3, M=100000) {
105 |     X <- matrix(0, nrow = k + 1, ncol=n)
106 |     X[k + 1, ] <- M
107 | 
108 |     X[2:k, ] <- replicate(n, sample(1:(M-1), k - 1))
109 |     X <- apply(X, 2, sort)
110 |     Y <- (X - X[c(k + 1, 1:k), ])[2:(k + 1), ]
111 |     return(Y / M)
112 | }
113 | 


--------------------------------------------------------------------------------
/R/hinge.R:
--------------------------------------------------------------------------------
1 | #' Hinge function
2 | #'
3 | #' @param Y matrix describing genes (possibly in reduced space)
4 | #'
5 | #' @return matrix -Y with negative values replaced with zeroes
6 | hinge <- function(Y) {
7 |     return(pmax(-Y, 0))
8 | }
9 | 


--------------------------------------------------------------------------------
/R/hysime.R:
--------------------------------------------------------------------------------
 1 | #' HySime
 2 | #'
 3 | #' Evalutes number of cell types presented in mixture using (HySime) hyperspectral signal identification by minimum error
 4 | #'
 5 | #' Original paper is J. M. Bioucas-Dias and J. M. P. Nascimento, "Hyperspectral Subspace Identification," in IEEE Transactions on Geoscience and Remote Sensing, vol. 46, no. 8, pp. 2435-2445, Aug. 2008.
 6 | #'
 7 | #' Based on MATLAB original code from http://www.lx.it.pt/~bioucas/code.htm
 8 | #'
 9 | #' @param Y gene expression matrix, columns are genes, rows are samples
10 | #' @param W estimated noise matrix
11 | #' @param Rn estimated noise correlation matrix
12 | #' @param verbose verbosity, default valie is FALSE
13 | #'
14 | #'
15 | #' @import ggplot2
16 | #'
17 | #' @return list
18 | hysime <- function(Y, W, Rn, verbose=FALSE) {
19 |     L <- nrow(Y)
20 |     N <- ncol(Y)
21 | 
22 |     Lw <- nrow(W)
23 |     Nw <- ncol(W)
24 | 
25 |     d1 <- nrow(Rn)
26 |     d2 <- ncol(Rn)
27 | 
28 |     if (Lw != L || Nw != N) {
29 |         stop("Noise matrix size are not in agreement")
30 |     } else if (d1 != d2 || d1 != L) {
31 |         stop("Bad correlation matrix")
32 |     }
33 | 
34 |     if (verbose) message("Computing the correlation matrices")
35 |     X <- Y - W
36 |     Ry <- Y %*% t(Y) / N
37 |     Rx <- X %*% t(X) / N
38 | 
39 |     if (verbose) message("Computing the eigen vectors of the signal correlation matrix")
40 | 
41 |     svdRes <- svd(Rx)
42 |     dx <- svdRes$d
43 |     E <- svdRes$u
44 | 
45 |     if (verbose) message("Estimating number of endmembers")
46 | 
47 |     Rn <- Rn + (sum(diag(Rx)) / L / 10^10) * diag(nrow=L)
48 | 
49 |     Py <- diag(t(E) %*% Ry %*% E)
50 |     Pn <- diag(t(E) %*% Rn %*% E)
51 | 
52 |     costF <- -Py + 2 * Pn
53 | 
54 |     kf <- sum(costF < 0)
55 |     ascOrder <- order(costF)
56 |     Ek <- E[, ascOrder[1:kf]]
57 | 
58 |     if (verbose) message(sprintf("Estimated signal subspace dimension in k = %d", kf))
59 | 
60 |     PySort <- sum(diag(Ry)) - cumsum(Py[ascOrder])
61 |     PnSort <- 2 * cumsum(Pn[ascOrder])
62 |     costFSort <- PySort + PnSort
63 | 
64 |     if (verbose) {
65 |         ind <- 1:(L - 1)
66 |         toPlot <- data.frame(indice = rep(ind, 3),
67 |                              error = c(costFSort[ind], PySort[ind], PnSort[ind]),
68 |                              error_type = c(rep("MSE", L-1), rep("Proj Error", L-1), rep("Noise Power", L-1)))
69 |         plot <- ggplot(data=toPlot, aes(x=indice, y=error, group=error_type, color=error_type)) +
70 |             geom_point() + geom_line() +
71 |             scale_y_log10() + theme_bw()
72 |         return(list(k=kf, E=Ek, plot=plot))
73 |     }
74 |     return(list(k=kf, E=Ek, plot=NULL))
75 | }
76 | 


--------------------------------------------------------------------------------
/R/linseedObject.R:
--------------------------------------------------------------------------------
  1 | #' Linseed Object
  2 | #' 
  3 | #' The Linseed Object Class. 
  4 | #' 
  5 | #' Provides an interface to perform 
  6 | #' collinear network construction,
  7 | #' linear subspace identification, 
  8 | #' simplex corner identification and gene expression deconvolution
  9 | #' 
 10 | #' @docType class
 11 | #' @importFrom R6 R6Class
 12 | #' @useDynLib linseed
 13 | #' @export
 14 | #' 
 15 | #' @return Object of \code{\link{R6Class}} -- an interface to work with gene expression data.
 16 | #' @format \code{\link{R6Class}} object.
 17 | #' @examples
 18 | #' LinseedObject$new("GSE19830", samples=10:42, topGenes=10000)
 19 | #' 
 20 | #' @field exp List of two elements raw and normalized gene expression dataset
 21 | #' @field name Character, optional, dataset name
 22 | #' @field cellTypeNumber Identified cell type number, required for projection, 
 23 | #' corner detection and deconvolution
 24 | #' @field projection Projection of genes into space lower-dimensionality (presumably simplex)
 25 | #' @field endpoints Simplex corners (in normalized, non-reduced space)
 26 | #' @field endpointsProjection Simplex corners (in reduced space)
 27 | #' @field distances Stores distances for every gene to each corner in reduced space
 28 | #' @field markers List that stores signatures genes for deconvolution, can be set manually or can be obtained by \code{selectGenes(k)}
 29 | #' @field signatures Deconvolution signature matrix
 30 | #' @field proportions Deconvolution proportion matrix
 31 | #' @field pairwise Calculated pairwise collinearity measure
 32 | #' @field spearman Calculated spearman correlations
 33 | #' @field genes genes significance test result list.
 34 | #' @field projectiveProjection calculated projection operator
 35 | #' @field Q Q matrix obtained from sysal alogorithm
 36 | #'
 37 | #' @name LinseedObject
 38 | #' @import Rcpp
 39 | #' @import RcppArmadillo
 40 | #' @import dplyr
 41 | #' @import ggplot2
 42 | #' @import Matrix
 43 | #' @import progress
 44 | #' @import Rtsne
 45 | LinseedObject <- R6Class("LinseedObject",
 46 |                           public = list(
 47 |                            exp = list(full=list(raw=NULL, norm=NULL),
 48 |                                   filtered=list(raw=NULL, norm=NULL)),
 49 |                            name = NULL,
 50 |                            
 51 |                            cellTypeNumber = NULL,
 52 |                            
 53 |                            projection = NULL,
 54 |                            endpoints = NULL,
 55 |                            endpointsProjection = NULL,
 56 |                            distances = NULL,
 57 |                            
 58 |                            
 59 |                            
 60 |                            markers = NULL,
 61 |                            signatures = NULL,
 62 |                            proportions = NULL,
 63 |                            pairwise = NULL,
 64 |                            spearman = NULL,
 65 |                            
 66 |                            genes = list(
 67 |                              pvals = NULL,
 68 |                              powers = NULL,
 69 |                              degrees = NULL
 70 |                            ),
 71 |                            
 72 |                            
 73 |                            projectiveProjection = NULL,
 74 |                            Q = NULL,
 75 | 
 76 |                            #' @description
 77 |                            #' Constructor
 78 |                            #' @param ... dataset matrix/GSE identifier as well as preprocessing parameters
 79 |                            initialize = function(...) {
 80 |                              args <- list(...)
 81 |                              dataset <- args[[1]]
 82 |                              if (inherits(dataset, "character") && grepl("^GSE", dataset)) {
 83 |                                self$name <- dataset
 84 |                                dataset <- preprocessGSE(...)
 85 |                              } else {
 86 |                                dataset <- preprocessDataset(...)
 87 |                              }
 88 |                              self$exp$full$raw <- dataset
 89 |                              self$exp$full$norm <- dataset / rowSums(dataset)
 90 |                            },
 91 |                            #' @description
 92 |                            #' Plot singular values elbow plot
 93 |                            #' @param dataset matrix identifier, can be "norm"or "raw"
 94 |                            #' @param components number of components to plot
 95 |                            svdPlot = function(dataset="norm", components=50) {
 96 |                              dataFull <- get(dataset, self$exp$full)
 97 |                              dataFiltered <- get(dataset, self$exp$filtered)
 98 |                              components <- min(components, ncol(dataFull))
 99 |                              
100 |                              if (is.null(dataFull)) {
101 |                                stop("Full dataset appears to be NULL, something is wrong")
102 |                              }
103 |                              
104 |                              vars <- svd(dataFull)$d^2
105 |                              vars <- cumsum(vars / sum(vars))
106 |                              df <- data.frame(dimension=1:length(vars), variance=vars, filtered=FALSE)
107 |                              colors <- 1
108 |                              
109 |                              if (!is.null(dataFiltered)) {
110 |                                vars <- svd(dataFiltered)$d^2
111 |                                vars <- cumsum(vars / sum(vars))
112 |                                dfFiltered <- data.frame(dimension=1:length(vars), variance=vars, filtered=TRUE)
113 |                                df <- rbind(df, dfFiltered)
114 |                                colors <- 2
115 |                              }
116 |                              
117 |                              
118 |                              ggplot(data=df, aes(x=dimension, y=variance)) +
119 |                                geom_point(aes(y=variance, color=filtered), size=0.5, alpha=1) +
120 |                                geom_line(aes(y=variance, color=filtered, group=filtered), 
121 |                                                      size=0.5, alpha=1) +
122 |                                scale_color_manual(values=c("#999999", "#E41A1C")[1:colors]) +
123 |                                theme_minimal(base_size = 8) + 
124 |                                theme(axis.line.x = element_line(colour = 'black', size=0.5, linetype='solid'),
125 |                                               axis.line.y = element_line(colour = 'black', size=0.5, linetype='solid'),
126 |                                               legend.position = c(1, 0), legend.justification = c(1, 0), 
127 |                                               legend.background = element_rect(colour="black", size=0.2),
128 |                                               legend.key.size = unit(0.1, "in")) +
129 |                                scale_x_continuous(minor_breaks = 1:components,
130 |                                                            limits=c(0, components))
131 |                           
132 |                            },
133 | 
134 |                            #' @description
135 |                            #' Evalutes number of cell types presented in mixture using (HySime) hyperspectral signal identification by minimum error
136 |                            #' @param dataset dataset identifier, can be "filtered"or "full
137 |                            #' @param error matrix identifier within the dataset
138 |                            #' @param set wether to set identified cell type number to the object
139 |                            hysime = function(dataset="filtered", error="norm", set=FALSE) {
140 |                              
141 |                              data <- get(dataset, self$exp)
142 |                              selected <- get(error, data)
143 |                              
144 |                              Y <- t(selected)
145 |                              noise <- estimateNoise(Y, verbose = T)
146 |                              hysimeRes <- hysime(Y, noise$w, noise$Rw, verbose=T)
147 |                              if (set) {
148 |                                self$cellTypeNumber <- hysimeRes$k
149 |                              }
150 |                              return(hysimeRes)
151 |                            },
152 | 
153 |                            #' @description
154 |                            #' Setter for sell type numbers
155 |                            #' @param k number to set
156 |                            setCellTypeNumber = function(k) {
157 |                              self$cellTypeNumber <- k
158 |                            },
159 | 
160 |                            #' @description
161 |                            #' Perform projection
162 |                            #' @param dataset dataset identifier can be "filtered" or "full"
163 |                            project = function(dataset) {
164 |                              data <- get(dataset, self$exp)
165 |                              Y <- t(data$norm)
166 |                              if (is.null(self$cellTypeNumber)) stop("Set cell type number first")
167 |                              self$projection <- projectiveProjection(Y, self$cellTypeNumber)
168 |                              self$projectiveProjection <- getProjectiveProjection(Y, self$cellTypeNumber)
169 |                            },
170 | 
171 |                            #' @description
172 |                            #' Plot projected points
173 |                            #' @param dims dimensions to plot
174 |                            #' @param color which points to color (corner, type, cluster, filtered)
175 |                            projectionPlot = function(dims=1:2, color=NULL) {
176 |                              if (is.null(self$projection)) {
177 |                                stop("You have to call 'project' method first")
178 |                              }
179 |                              
180 |                              toPlot <- t(self$projection[dims, ])
181 |                              toPlot <- as.data.frame(toPlot)
182 |                              colnames(toPlot) <- c("x", "y")
183 |                              toPlot$type <- "gene"
184 |                              
185 |                              if (!is.null(self$endpointsProjection)) {
186 |                                toPlotE <- t(self$endpointsProjection[dims, ])
187 |                                toPlotE <- as.data.frame(toPlotE)
188 |                                colnames(toPlotE) <- c("x", "y")
189 |                                
190 |                                
191 |                                toPlotE$corner <- 1:self$cellTypeNumber
192 |                                toPlotE$type <- "identified corners"
193 |                                toPlot$corner <- NA
194 |                                
195 |                                toPlot <- rbind(toPlot, toPlotE)
196 |                                toPlot$corner <- as.factor(toPlot$corner)
197 |                              }
198 |                              
199 |                              
200 |                              if (!is.null(self$markers)) {
201 |                                toPlot$cluster <- NA
202 |                                for (i in 1:length(self$markers)) {
203 |                                  toPlot[self$markers[[i]], "cluster"] <- i
204 |                                }
205 |                                toPlot$cluster <- as.factor(toPlot$cluster)
206 |                                toPlot <- toPlot[order(toPlot$cluster, decreasing = T), ]
207 |                                toPlot <- toPlot[nrow(toPlot):1, ]
208 |                              }
209 |                              
210 |                              if (!is.null(self$exp$filtered$norm)) {
211 |                                geneSubset <- rownames(self$exp$filtered$norm)
212 |                                toPlot$filtered <- F
213 |                                toPlot[geneSubset, "filtered"] <- T
214 |                              }
215 |                              
216 |                              
217 |                              axisNames <- paste0("Projection ", dims)
218 |                              pl <- ggplot(data=toPlot, aes(x=x, y=y)) +
219 |                                theme_bw() +
220 |                                labs(x=axisNames[1], y=axisNames[2])
221 |                              
222 |                              
223 |                              if (!is.null(color)) {
224 |                                pl <- pl + geom_point(aes_string(shape="type", size="type", color=color))
225 |                              } else {
226 |                                pl <- pl + geom_point(aes(shape=type, size=type))
227 |                              }
228 |                              
229 |                              if (!is.null(self$endpointsProjection)) {
230 |                                pl <- pl + 
231 |                                  scale_shape_manual(values=c(20, 18), labels=c("gene", "identified corners")) +
232 |                                  scale_size_manual(values=c(1, 3), labels=c("gene", "identified corners")) +
233 |                                  geom_polygon(data=dplyr::filter(toPlot, type=="identified corners"),
234 |                                               mapping=aes(x, y), fill=NA, color="black", lty=2)
235 |                              }
236 |                              pl
237 |                            },
238 | 
239 |                            #' @description
240 |                            #' Identify corners in projected space
241 |                            #' @param ... extra arguments for sisal algorithm
242 |                            sisalCorners = function(...){
243 |                              
244 |                              if (is.null(self$exp$filtered$norm)) {
245 |                                message("Could not find filtered dataset, using whole dataset as filtered")
246 |                                self$filterDataset(rownames(self$exp$full$norm))
247 |                              }
248 |                              
249 |                              Y <- t(self$exp$filtered$norm)
250 |                              p <- self$cellTypeNumber
251 |                              sisalRes <- sisal(Y, p, ...)
252 |                              self$endpoints = sisalRes$endpoints
253 |                              self$endpointsProjection = sisalRes$endpointsProjection
254 |                              self$distances = sisalRes$distances
255 |                              self$Q = sisalRes$Q
256 |                              self$projectiveProjection = sisalRes$projection
257 |                            },
258 | 
259 |                            #' @description
260 |                            #' Take top genes using distances calculated
261 |                            #' @param n how many genes to select
262 |                            selectGenes = function(n) {
263 |                              pureGeneSets <- apply(self$distances, 2, function(xx) {
264 |                                pure <- rownames(self$distances)[order(xx)[1:n]]
265 |                                return(pure)
266 |                              })
267 |                              pureGeneSets <-  split(pureGeneSets, rep(1:ncol(pureGeneSets), each = nrow(pureGeneSets)))
268 |                              self$markers <- pureGeneSets
269 |                            },
270 | 
271 |                            #' @description
272 |                            #' Solve the deconvolution of the dataset matrix
273 |                            #' @param dataset dataset identifier (filtered, full)
274 |                            #' @param error matrix identifier within dataset
275 |                            #' @param method which method to use ("dsa" by default, willl try CelLmix if something else)
276 |                            #' @param ... CellMix parameters
277 |                            deconvolve = function(dataset="filtered", error="norm", method="dsa", ...) {
278 |                              if (!method %in% c("dsa", "ssFrobenius")) {
279 |                                stop("Supported methods are `dsa` and `ssFrobenius`")
280 |                              }
281 |                              data <- get(dataset, self$exp)
282 |                              selected <- get(error, data)
283 |                              
284 |                              if (method == "dsa") {
285 |                                dsaRes <- fastDSA(selected, self$markers)
286 |                                self$signatures = dsaRes$W
287 |                                self$proportions = dsaRes$H
288 |                                
289 |                              } else {
290 |                                if (requireNamespace("CellMix")) {
291 |                                  res <- CellMix::ged(selected, self$cellTypeNumber, 
292 |                                                      data=self$markers, seed=1, method=method, ...)
293 |                                  dsaRes <- pureDsa(selected, CellMix::coef(res))
294 |                                  self$signatures = dsaRes$W
295 |                                  self$proportions = dsaRes$H
296 |                                } else {
297 |                                  stop("Different method than DSA provided, but no CellMix found")
298 |                                }
299 |                                
300 |                              }
301 |                              
302 |                              ctNames <- paste0("Cell type ", 1:length(self$markers))
303 |                              colnames(self$signatures) <- ctNames
304 |                              rownames(self$proportions) <- ctNames
305 |                              
306 |                            },
307 |                            
308 |                            #' @description
309 |                            #' Calculate deconvolution error
310 |                            #' @param dataset dataset identifier (filtered, full)
311 |                            #' @param error matrix identifier within dataset
312 |                            deconvolutionError = function(dataset="filtered", error="norm") {
313 |                              data <- get(dataset, self$exp)
314 |                              selected <- get(error, data)
315 |                              reconstruct <- self$signatures %*% self$proportions
316 |                              diff <- selected - reconstruct
317 |                              return(norm(diff, "F"))
318 |                            },
319 | 
320 |                            #' @description
321 |                            #' Calculated and set the deconvolution solution based on found endpoints in projected space
322 |                            #' @param dataset dataset identifier (filtered, full)
323 |                            #' @param error matrix identifier within dataset
324 |                            deconvolveByEndpoints = function(dataset="filtered", error="norm") {
325 |                              data <- get(dataset, self$exp)
326 |                              selected <- get(error, data)
327 |                              dsaRes <- pureDsa(selected, t(self$endpoints))
328 |                              
329 |                              self$signatures = dsaRes$W
330 |                              self$proportions = dsaRes$H
331 |                              
332 |                              ctNames <- paste0("Cell type ", 1:self$cellTypeNumber)
333 |                              
334 |                              rownames(self$signatures) <- rownames(selected)
335 |                              colnames(self$proportions) <- colnames(selected)
336 |                              
337 |                              colnames(self$signatures) <- ctNames
338 |                              rownames(self$proportions) <- ctNames
339 |                            },
340 | 
341 |                            #' @description
342 |                            #' Calculate correlation between rows of the matrix
343 |                            calculateSpearmanCorrelation = function() {
344 |                              self$spearman <- cor(t(self$exp$full$norm), method="spearman")
345 |                            },
346 |                            #' @description
347 |                            #' Significance test which will calculate p value for each each by shuffling network weights randomly
348 |                            #' @param iters number of shuffles
349 |                            #' @param spearmanThreshold threshold of correlation to consider (consider higher than this value)
350 |                            #' @param retVal Wether to return pvalues
351 |                            calculateSignificanceLevel = function(iters=1000,
352 |                                                                  spearmanThreshold=0,
353 |                                                                  retVal=F) {
354 |                              
355 |                              if (is.null(self$pairwise)) stop("call calculatePairwiseLinearity first")
356 |                              if (is.null(self$spearman)) stop("call calculateSpearmanCorrelation first")
357 |                              
358 |                              degrees <- rowSums(self$pairwise > 0 & 
359 |                                                 self$spearman > spearmanThreshold) - 1
360 |                              
361 |                              ij <- which(upper.tri(self$pairwise) & 
362 |                                            self$pairwise > 0 & 
363 |                                            self$spearman > spearmanThreshold, arr.ind = T)
364 |                              values <- self$pairwise[upper.tri(self$pairwise) & 
365 |                                                        self$pairwise > 0 & 
366 |                                                        self$spearman > spearmanThreshold] * 
367 |                                self$spearman[upper.tri(self$pairwise) & 
368 |                                                self$pairwise > 0 & 
369 |                                                self$spearman > spearmanThreshold]
370 |                              
371 |                              genes <- nrow(self$pairwise)
372 |                              edges <- length(values)
373 |                              
374 |                              sparsePP <- sparseMatrix(i = ij[, 1], j = ij[, 2], x = values, dims=c(genes, genes))
375 |                              sparsePPs <- sparsePP + t(sparsePP)
376 |                              vertexDegrees <- rowSums(sparsePPs)
377 |                              orderedDegrees <- order(vertexDegrees)
378 |                              
379 |                              results <- numeric(genes)
380 |                              gc(verbose = F)
381 |                              
382 |                              pb <- progress_bar$new(
383 |                                format = "Sampling weights [:bar] :percent eta: :eta",
384 |                                total = iters, clear = FALSE, width= 60)
385 |                              pb$tick(0)
386 |                              
387 |                              for (i in 1:iters) {
388 |                                valuesRandom <- sample(values, replace = T)
389 |                                sparseRandomPP <- sparseMatrix(i = ij[, 1], j = ij[, 2], x = valuesRandom, dims=c(genes, genes))
390 |                                sparseRandomPPs <- sparseRandomPP + t(sparseRandomPP)
391 |                                stats <- rowSums(sparseRandomPPs)
392 |                                results <- results + as.numeric(stats >= vertexDegrees)
393 |                                pb$tick(1)
394 |                              }
395 |                              
396 |                              pvals <- (results + 1) / (iters + 1)
397 |                              names(pvals) <- rownames(self$exp$norm)
398 |                              
399 |                              self$genes$pvals <- pvals
400 |                              self$genes$degrees <- degrees
401 |                              self$genes$powers <- vertexDegrees
402 |                              
403 |                              if (retVal) return(pvals)
404 |                            },
405 | 
406 |                            #' @description
407 |                            #' Calculate all pairwise collinearity coefficients
408 |                            #' @param negToZero wether to remove negative correlations
409 |                            calculatePairwiseLinearity = function(negToZero=T) {
410 |                              self$pairwise <- pairwiseR2(t(self$exp$full$norm))
411 |                              colnames(self$pairwise) <- rownames(self$pairwise) <- rownames(self$exp$full$norm)
412 |                              if (negToZero) {
413 |                                self$pairwise[self$pairwise < 0] <- 0
414 |                              }
415 |                            },
416 | 
417 |                            #' @description
418 |                            #' The main interface to search corners in projected space with sisal
419 |                            #' @param taus taus parameter for sisal
420 |                            #' @param sisalIter number of iterations
421 |                            #' @param dataset dataset identifier (filtered, full)
422 |                            #' @param error matrix identifier within the dataset
423 |                            smartSearchCorners = function(taus = 2^seq(0, -20, -1),
424 |                                                          sisalIter=100,
425 |                                                          dataset="filtered",
426 |                                                          error="norm") {
427 |                              
428 |                              if (is.null(self$exp$filtered$norm)) {
429 |                                message("Could not find filtered dataset, using whole dataset as filtered")
430 |                                self$filterDataset(rownames(self$exp$full$norm))
431 |                              }
432 |                              
433 |                              Y <- t(self$exp$filtered$norm)
434 |                              samples <- nrow(Y)
435 |                              taus_n <- length(taus)
436 |                              k <- self$cellTypeNumber
437 |                              
438 |                              keks <- lapply(taus, function(i) sisal(Y, k, sisalIter, i, returnPlot = F, nonNeg = T))
439 |                              endpoints_only <- lapply(keks, function(kek) kek$endpoints)
440 |                              
441 |                              Ymean <- rowMeans(Y)
442 |                              ref <- endpoints_only[[1]] - Ymean
443 |                              
444 |                              endpoints_only <- lapply(endpoints_only, function(endpoints) {
445 |                                shifted <- endpoints - Ymean
446 |                                cors <- cor(ref, shifted)
447 |                                return(endpoints[, apply(cors, 1, which.max)])
448 |                              })
449 |                              
450 |                              endpoints_array <- array(0, c(samples, k, taus_n))
451 |                              for (i in 1:taus_n) {
452 |                                endpoints_array[, , i] <- endpoints_only[[i]]
453 |                              }
454 |                              
455 |                              starting_point <- rep(1, k)
456 |                              to_change <- 1
457 |                              unchanged <- 0
458 |                              
459 |                              while (unchanged <= k) {
460 |                                starting_mat <- do.call(cbind, lapply(1:k, function(i) {
461 |                                  endpoints_array[, i, starting_point[i]]
462 |                                }))
463 |                                
464 |                                toCheck <- lapply(1:taus_n, function(i) {
465 |                                  starting_mat_copy <- starting_mat
466 |                                  starting_mat_copy[, to_change] <- endpoints_array[, to_change, i]
467 |                                  starting_mat_copy
468 |                                })
469 |                                
470 |                                
471 |                                errors <- sapply(toCheck, function(props) {
472 |                                  self$endpoints <- props
473 |                                  self$endpointsProjection <- t(self$projectiveProjection) %*% props
474 |                                  self$distances <- apply(self$endpointsProjection, 2, function(x) {
475 |                                    shifted <- self$projection - x
476 |                                    dds <- sqrt(colSums(shifted^2))
477 |                                    return(dds)
478 |                                  })
479 |                                  
480 |                                  self$deconvolveByEndpoints(dataset=dataset, error=error)
481 |                                  self$deconvolutionError(dataset=dataset, error=error)
482 |                                })
483 |                                
484 |                                new_position <- which.min(errors)
485 |                                if (starting_point[to_change] == new_position) {
486 |                                  unchanged <- unchanged + 1
487 |                                } else {
488 |                                  starting_point[to_change] = new_position
489 |                                }
490 |                                
491 |                                to_change <- to_change %% k + 1
492 |                              }
493 |                              
494 |                              message("Final vector is ")
495 |                              message(cat(starting_point))
496 |                              
497 |                              starting_mat <- do.call(cbind, lapply(1:k, function(i) {
498 |                                endpoints_array[, i, starting_point[i]]
499 |                              }))
500 |                              
501 |                              self$endpoints <- starting_mat
502 |                              rownames(self$endpoints) <- colnames(self$exp$filtered$norm)
503 |                              colnames(self$endpoints) <- paste0("Pure gene ", 1:k)
504 |                              self$endpointsProjection <- t(self$projectiveProjection) %*% self$endpoints
505 |                              
506 |                              self$distances <- apply(self$endpointsProjection, 2, function(x) {
507 |                                shifted <- self$projection - x
508 |                                dds <- sqrt(colSums(shifted^2))
509 |                                return(dds)
510 |                              })
511 |                              
512 |                            }, 
513 |                            
514 |                            #' @description
515 |                            #' Remove features which have pvalue of correlation higher than threshold
516 |                            #' @param pval pvalue threshold
517 |                            filterDatasetByPval = function(pval=0.001) {
518 |                              message(sprintf("Total number of genes is %d", nrow(self$exp$full$norm)))
519 |                              geneSubset <- rownames(self$exp$full$norm[self$genes$pvals < pval, ])
520 |                              self$filterDataset(geneSubset)
521 |                              message(sprintf("The number of genes after filtering is %d", nrow(self$exp$filtered$norm)))
522 |                            },
523 | 
524 |                            #' @description
525 |                            #' Remove subset of genes
526 |                            #' @param geneSubset vector of genes
527 |                            filterDataset = function(geneSubset) {
528 |                              self$exp$filtered$raw <- self$exp$full$raw[geneSubset, ]
529 |                              self$exp$filtered$norm <- self$exp$full$norm[geneSubset, ]
530 |                            },
531 | 
532 |                            #' @description
533 |                            #' Build t-SNE dimensionality reduction plot
534 |                            #' @param dataset dataset identifier (filtered, full)
535 |                            #' @param error matrix identifier within the dataset
536 |                            tsnePlot = function(dataset="filtered", error="norm") {
537 |                              data <- get(dataset, self$exp)
538 |                              selected <- get(error, data)
539 |                              
540 |                              tsne <- Rtsne(selected, perplexity = 100, max_iter = 2000)
541 |                              toPlot <- data.frame(
542 |                                tSNE1=tsne$Y[, 1],
543 |                                tSNE2=tsne$Y[, 2],
544 |                                marker=NA,
545 |                                row.names = rownames(selected)
546 |                              )
547 |                              
548 |                              if (!is.null(self$markers)) {
549 |                                for (i in 1:length(self$markers)) {
550 |                                    toPlot[self$markers[[i]], "marker"] <- i
551 |                                }
552 |                              }
553 |                              
554 |                              
555 |                              toPlot$marker <- as.factor(toPlot$marker)
556 |                              ggplot(data=toPlot, aes(x=tSNE1, y=tSNE2, color=marker)) +
557 |                                geom_point(size=0.5) + theme_bw(base_size=8) +
558 |                                theme(legend.key.size = unit(0.1, "in"), aspect.ratio = 1) +
559 |                                guides(color=guide_legend(title="Simplex\ncorner"))
560 |                              
561 |                            },
562 | 
563 |                            #' @description
564 |                            #' Visualize significance plot for genes, based on significance test. Color by pValue
565 |                            #' @param threshold pValue threshold
566 |                             significancePlot = function(threshold=0.001) {
567 |                              toPlot <- as.data.frame(self$genes)
568 |                              toPlot$name <- rownames(self$exp$full$norm)
569 |                              toPlot$significant <- toPlot$pvals < threshold
570 |                              
571 |                              ggplot(data=toPlot, aes(x=degrees, y=powers, color=significant)) +
572 |                                geom_point() + theme_bw(base_size=8) + 
573 |                                scale_color_manual(values=c("grey", "red"))
574 |                            }
575 |                            
576 |                            
577 |                          )
578 | )
579 | 


--------------------------------------------------------------------------------
/R/plotProportions.R:
--------------------------------------------------------------------------------
  1 | #' Draw a plot of estimated proportions
  2 | #'
  3 | #' Draws a plot of estimated proprotions
  4 | #' If ggplot2 and reshape2 are installed will use them and return ggplot object
  5 | #' Otherwise will use standart R functions
  6 | #'
  7 | #' @param ... matricies, data frames, NMF objects of estimated proportions or paths to file
  8 | #' @param point_size point size for plot
  9 | #' @param line_size line size for plot
 10 | #' @param pnames experiment titles
 11 | #'
 12 | #' @return ggplot object
 13 | #'
 14 | #'
 15 | #' @import ggplot2
 16 | #' @import reshape2
 17 | #' @export
 18 | plotProportions <- function(..., pnames = NULL, point_size=2, line_size=1) {
 19 |     proportions <- list(...)
 20 |     proportions <- lapply(proportions, toMatrix)
 21 | 
 22 |     newRowNames <- do.call(function(...) {
 23 |         mapply(function(...) {
 24 |             dots <- list(...)
 25 |             rn <- unlist(dots)
 26 |             paste0(rn, collapse = "\n")
 27 |         }, ...)
 28 |     }, lapply(proportions, rownames))
 29 | 
 30 |     proportions <- lapply(proportions, function(p) {
 31 |         rownames(p) <- newRowNames
 32 |         p
 33 |     })
 34 | 
 35 |     names(proportions) <- pnames
 36 | 
 37 | 
 38 |     cellTypes <- nrow(proportions[[1]])
 39 |     results.m <- melt(proportions)
 40 |     results.m[, 4] <- as.factor(results.m[, 4])
 41 |     
 42 |     results.m <- results.m[sample(nrow(results.m)), ]
 43 |     
 44 |     gplot <- ggplot(results.m,
 45 |                              aes(x = as.numeric(Var2),
 46 |                                           y = value,
 47 |                                           fill = Var1,
 48 |                                           color = L1)) +
 49 |         geom_line(size=line_size) +
 50 |         geom_point(size=point_size) +
 51 |         scale_x_discrete(labels = colnames(proportions[[1]])) +
 52 |         facet_grid(Var1 ~ .) +
 53 |         ylab("proportions") +
 54 |         ylim(0, 1.1) +
 55 |         theme_bw() +
 56 |         theme(axis.title.x = element_blank(),
 57 |                        axis.text.x = element_text(angle = 45,
 58 |                                                            hjust = 1)) +
 59 |         guides(fill = "none")
 60 |     if (length(proportions) > 1) {
 61 |         gplot <- gplot + theme(legend.title = element_blank(),
 62 |             legend.position = "top")
 63 | 
 64 |     } else {
 65 |         gplot <- gplot + theme(legend.position = "none")
 66 |     }
 67 |     gplot
 68 | }
 69 | 
 70 | #' Proportions dot plot
 71 | #'
 72 | #' @param predicted matrix of predicted proportions
 73 | #' @param actual matrix of actual proportions
 74 | #' @param main plot title
 75 | #' @param guess if True will function will try to guess how to reorder rows of predicted proportions to match rows of actual proportions
 76 | #' @param showR2 calculate and show R squared statistics
 77 | #'
 78 | #' @import ggplot2
 79 | #' @import reshape2
 80 | #'
 81 | #' @return ggplot dot plot. X axis is true proportion, Y axix is predicted proportion
 82 | #' @export
 83 | dotPlotPropotions <- function(predicted, actual, guess=FALSE, main=NULL, showR2=FALSE) {
 84 |   predicted <- as.matrix(predicted)
 85 |   actual <- as.matrix(actual)
 86 |   
 87 |   if (guess) {
 88 |     predicted <- predicted[guessOrder(predicted, actual), ]
 89 |   }
 90 |   
 91 |   colnames(predicted) <- colnames(actual)
 92 |   rownames(predicted) <- rownames(actual)
 93 |   
 94 |   xmelt <- melt(predicted)
 95 |   ymelt <- melt(actual)
 96 |   
 97 |   colnames(ymelt) <- c("Cell Type", "Sample", "Actual")
 98 |   colnames(xmelt) <- c("Cell Type", "Sample", "Predicted")
 99 |   
100 |   total <- cbind(ymelt, xmelt[, 3, drop=F])
101 |   
102 |   pred <- as.numeric(predicted)
103 |   act <- as.numeric(actual)
104 |   
105 |   r2 <- summary(lm(pred ~ act))$adj.r.squared
106 |   
107 |   pl <- ggplot(data=total, aes(x=Actual, y=Predicted, color=`Cell Type`)) +
108 |     geom_point() + theme_bw(base_size=8) +
109 |     theme(aspect.ratio = 1) + geom_abline(slope=1, intercept = 0, lty=2) +
110 |     xlim(c(0, 1)) + ylim(c(0, 1))
111 |   if (!is.null(main)) {
112 |     pl <- pl + labs(title=main)
113 |   }
114 |   if (showR2) {
115 |     subs <- substitute(italic(R)^2~"="~r2, list(r2=r2))
116 |     pl <- pl + annotate("text", label=as.character(as.expression(subs)), parse=T, x = 0.2, y=0.9)
117 |   }
118 |   pl
119 | }
120 | 
121 | 
122 | #' guess the order
123 | #'
124 | #' Function tries to guess ordering for rows of predicted proportions to match rows of actual proportions
125 | #'
126 | #'
127 | #' @importFrom combinat permn
128 | #' @param predicted predicted propotions
129 | #' @param actual actual proportions
130 | #'
131 | #' @return numeric, correct order of predicted proportions
132 | guessOrder <- function(predicted, actual) {
133 |   ctn <- nrow(predicted)
134 |   allPerms <- permn(ctn)
135 |   
136 |   vals <- sapply(allPerms, function(perm) {
137 |     sum(diag(cor(t(predicted[perm, ]), t(actual))))
138 |   })
139 |   perm <- allPerms[[which.max(vals)]]
140 |   return(perm)
141 | }
142 | 
143 | toMatrix <- function(x) {
144 |     if (is.data.frame(x)) {
145 |         # Convert data frame (or tibble) to a plain matrix
146 |         return(as.matrix(x))
147 |     }
148 |     if (inherits(x, "NMF")) {
149 |         # Handle NMF objects
150 |         return(toMatrix(coef(x)))
151 |     }
152 |     if (is.matrix(x)) {
153 |         # Return if already a matrix
154 |         return(x)
155 |     }
156 |     stop("Invalid type for plotting: ", paste(class(x), collapse = ", "))
157 | }
158 | 
159 | 
160 | 


--------------------------------------------------------------------------------
/R/preprocess.R:
--------------------------------------------------------------------------------
  1 | #' Preprocess Dataset
  2 | #'
  3 | #' Preprocesses given dataset. Preprocessing consists of 3 major steps:
  4 | #' 1) If needed, probes corresponding to the same genes are collapsed, only most expressed probe is taken for further analysis.
  5 | #'    It's common technique in microarray data analysis.
  6 | #' 2) If needed, only highly expressed genes are taken for further analysis. (Say hello to noize reduction)
  7 | #' 3) All genes are clustered with Kmeans using cosine simillarity as distance.
  8 | #'
  9 | #' @param dataset matrix, data.frame, path to file or GSE accession with expression data
 10 | #' @param annotation dataframe, matrix, named vector with annotation to probes
 11 | #' @param geneSymbol column from annotation to collapse the genes, deafult value is 'Gene Symbol'
 12 | #' @param samples character vector of samples. If column were not in samples, it would be excluded from analysis.
 13 | #' Default value is NULL, which takes every sample from dataset
 14 | #' @param topGenes integer How many genes include in analysis. We suppose to include only expressed genes. Default value is 10000
 15 | #'
 16 | #' @return clustered dataset, matrix, first column identifies cluster of the row
 17 | #' @import methods
 18 | #' @export
 19 | preprocessDataset <- function(dataset, annotation = NULL, geneSymbol = "Gene symbol",
 20 |     samples = NULL, topGenes = 10000) {
 21 |     if (inherits(dataset, "character")) {
 22 |         if (file.exists(dataset)) {
 23 |             message("File ", dataset, " exists")
 24 |             message("Reading dataset from file ", dataset)
 25 | 	    message("Make sure file is tab-separated and has row and column names")
 26 |             dataset <- read.table(dataset, header=1, row.names=1, sep="\t")
 27 | 	    message("File successfully read")
 28 |         } else {
 29 |             stop("File does not exist: ", dataset)
 30 |         }
 31 |     }
 32 |     if (inherits(dataset, "data.frame")) {
 33 |         dataset <- as.matrix(dataset)
 34 |     }
 35 | 
 36 |     if (!inherits(dataset, "matrix")) {
 37 |         stop("Unsupported type of dataset: please ensure first argument is matrix, data.frame, path to file or GSE accesssion")
 38 |     }
 39 | 
 40 |     # sample selection
 41 |     if (!is.null(samples)) {
 42 |         dataset <- dataset[, samples]
 43 |     }
 44 | 
 45 |     # annotating if necessary
 46 |     if (!is.null(annotation)) {
 47 |         fdata <- annotation[, geneSymbol, drop = FALSE]
 48 |         dataset <- collapseGenes(dataset, fdata)
 49 |     }
 50 | 
 51 |     # removing zeroes
 52 |     dataset <- dataset[!(rowSums(dataset) == 0), ]
 53 |     topGenes <- min(topGenes, nrow(dataset))
 54 |     dataset <- logDataset(dataset)
 55 |     topRows <- order(rowSums(dataset), decreasing = TRUE)[1:topGenes]
 56 |     topDataset <- dataset[topRows, ]
 57 |     
 58 |     # clustering in linear space
 59 |     topDataset <- linearizeDataset(topDataset)
 60 |     topDataset <- topDataset[!duplicated(topDataset), ]
 61 |     # clustered <- clusterCosine(topDataset, k)
 62 |     return(topDataset)
 63 | 
 64 | }
 65 | 
 66 | #' Preprocess GSE Dataset
 67 | #'
 68 | #' Downloads GSE dataset by GEO accession and performs preprocessing
 69 | #'
 70 | #' @param geoAccesion e.g 'GSE19830'
 71 | #' @param annotate annotate with feature data from provided geo platform
 72 | #' @param normalize quantile normalize GEO dataset
 73 | #' @param ... arguments further passed to preprocessDataset
 74 | #'
 75 | #' @return clustered dataset, matrix, first column identifies cluster of the row
 76 | #' @import GEOquery
 77 | #' @importFrom Biobase exprs
 78 | #' @import preprocessCore
 79 | #' @export
 80 | preprocessGSE <- function(geoAccesion, annotate = TRUE, normalize=TRUE, ...) {
 81 |     gse <- getGEO(geoAccesion, AnnotGPL = T)
 82 |     if (length(gse) > 1) {
 83 |         stop("This GSE has multiple expression sets. It's probably multiseries. Provide single series experiment")
 84 |     }
 85 |     
 86 |     gse <- gse[[1]]
 87 |     expressionData <- Biobase::exprs(gse)
 88 | 
 89 |     if (normalize) {
 90 |         expressionData <- logDataset(expressionData)
 91 |         expressionDataCopy <- normalize.quantiles(expressionData)
 92 |         colnames(expressionDataCopy) <- colnames(expressionData)
 93 |         rownames(expressionDataCopy) <- rownames(expressionData)
 94 |         expressionData <- expressionDataCopy
 95 |     }
 96 | 
 97 |     if (annotate) {
 98 |         preprocessDataset(expressionData, annotation = fData(gse), ...)
 99 |     } else {
100 |         preprocessDataset(expressionData, ...)
101 |     }
102 | 
103 | 
104 | }
105 | 
106 | 


--------------------------------------------------------------------------------
/R/projectiveProjection.R:
--------------------------------------------------------------------------------
 1 | #' Projective projection
 2 | #'
 3 | #' @param Y High dimensional data to project
 4 | #' @param p Dimensionality to project to
 5 | #' @param spherize Spherize dataset or not
 6 | #' 
 7 | #' @import corpcor
 8 | #'
 9 | #' @return matrix with coordinates of projected points.
10 | projectiveProjection <- function(Y, p, spherize=F) {
11 |   L <- nrow(Y)
12 |   N <- ncol(Y)
13 |   
14 |   Ymean <- apply(Y, 1, mean)
15 |   ym <- matrix(Ymean, ncol=1)
16 |   Y <- Y - Ymean
17 |   svdObj <- fast.svd(Y)
18 |   Up <- svdObj$u[, 1:(p-1)]
19 |   proj <- Up %*% t(Up)
20 |   D <- svdObj$d[1:(p-1)]
21 |   
22 |   Y <- proj %*% Y
23 |   Y <- Y + Ymean
24 |   YmeanOrtho <- ym - proj %*% ym
25 |   Up <- cbind(Up, YmeanOrtho / (sqrt(sum(YmeanOrtho ^ 2))))
26 |   singValues <- D
27 |   lamSphe <- 1e-8
28 |   
29 |   Y <- t(Up) %*% Y
30 |   
31 |   ## spherizing
32 |   if (spherize) {
33 |     Y <- Up %*% Y
34 |     Y <- Y - Ymean
35 |     C <- diag(1 / sqrt(D + lamSphe))
36 |     IC <- solve(C)
37 |     Y <- C %*% t(Up[, 1:(p-1)]) %*% Y
38 |     Y <- rbind(Y, 1)
39 |     Y <- Y / sqrt(p)
40 |   }
41 |   return(Y)
42 | }
43 | 
44 | 
45 | #' Get projective projection
46 | #'
47 | #' @param Y High dimensional data to project
48 | #' @param p Dimensionality to project to
49 | #' @param spherize Spherize dataset or not
50 | #' 
51 | #' @import corpcor
52 | #'
53 | #' @return projection
54 | getProjectiveProjection <- function(Y, p, spherize=F) {
55 |   L <- nrow(Y)
56 |   N <- ncol(Y)
57 |   
58 |   Ymean <- apply(Y, 1, mean)
59 |   ym <- matrix(Ymean, ncol=1)
60 |   Y <- Y - Ymean
61 |   svdObj <- fast.svd(Y)
62 |   Up <- svdObj$u[, 1:(p-1)]
63 |   proj <- Up %*% t(Up)
64 |   D <- svdObj$d[1:(p-1)]
65 |   
66 |   Y <- proj %*% Y
67 |   Y <- Y + Ymean
68 |   YmeanOrtho <- ym - proj %*% ym
69 |   Up <- cbind(Up, YmeanOrtho / (sqrt(sum(YmeanOrtho ^ 2))))
70 | 
71 |   return(Up)
72 | }


--------------------------------------------------------------------------------
/R/proportionsLiverBrainLung.R:
--------------------------------------------------------------------------------
 1 | #' GSE19830 proportions
 2 | #'
 3 | #' tissue proportions from GSE19830 experiment:
 4 | #' 3 tissues (liver, brain and lung) were mixed in different proportions:
 5 | #'
 6 | #' GSM495209-GSM495211 pure liver samples
 7 | #' GSM495212-GSM495214 pure brain samples
 8 | #' GSM495215-GSM495217 pure lung samples
 9 | #' GSM495218-GSM495220 5 % Liver / 25 % Brain / 70 % Lung
10 | #' GSM495221-GSM495223 70 % Liver / 5 % Brain / 25 % Lung
11 | #' GSM495224-GSM495226 25 % Liver / 70 % Brain / 5 % Lung
12 | #' GSM495227-GSM495229 70 % Liver / 25 % Brain / 5 % Lung
13 | #' GSM495230-GSM495232 45 % Liver / 45 % Brain / 10 % Lung
14 | #' GSM495233-GSM495235 55 % Liver / 20 % Brain / 25 % Lung
15 | #' GSM495236-GSM495238 50 % Liver / 30 % Brain / 20 % Lung
16 | #' GSM495239-GSM495241 55 % Liver / 30 % Brain / 15 % Lung
17 | #' GSM495242-GSM495244 50 % Liver / 40 % Brain / 10 % Lung
18 | #' GSM495245-GSM495247 60 % Liver / 35 % Brain / 5 % Lung
19 | #' GSM495248-GSM495250 65 % Liver / 34 % Brain / 1 % Lung
20 | #'
21 | #' @docType data
22 | #' 
23 | #' @name proportionsLiverBrainLung
24 | #'
25 | #' @usage data(proportionsLiverBrainLung)
26 | #'
27 | #' @format An object of class \code{'matrix'}
28 | #'
29 | #' @keywords datasets
30 | #'
31 | #' @references Shen-Orr SS, Tibshirani R, Khatri P, et al. cell type-specific gene expression differences in complex tissues. Nature methods. 2010;7(4):287-289. doi:10.1038/nmeth.1439.
32 | #' (\href{http://www.ncbi.nlm.nih.gov/pubmed/20208531}{PubMed})
33 | #'
34 | #' @source \href{http://qtlarchive.org/db/q?pg=projdetails&proj=moore_2013b}{QTL Archive}
35 | #' @examples
36 | #' data("proportionsLiverBrainLung")
37 | #' mixedProportions <- proportionsLiverBrainLung[, 10:42]
38 | #' barplot(mixedProportions,
39 | #'         main='Proprotions of tissues in samples',
40 | #'         col=c('#00BA38','#F8766D', '#619CFF'),
41 | #'         legend = rownames(mixedProportions))
42 | NULL
43 | 


--------------------------------------------------------------------------------
/R/sisal.R:
--------------------------------------------------------------------------------
  1 | #' SISAL algorithm
  2 | #'
  3 | #' Sisal alogorithm for simplex endpoints identification
  4 | #'
  5 | #' Implementation of method
  6 | #'
  7 | #'
  8 | #' @param Y gene expression matrix
  9 | #' @param p number of endpoints
 10 | #' @param iters number of iterations to perform
 11 | #' @param tau noise points penalty coefficient
 12 | #' @param mu regularization
 13 | #' @param spherize spherize or not
 14 | #' @param tol numeric tolerance
 15 | #' @param m0 starting points for SISAL algorithm, default points are getting from VCA
 16 | #' @param verbose verbosity
 17 | #' @param returnPlot logical, is it needed to return dataframe or not
 18 | #' @param nonNeg logical, force simplex corners to non-negative space
 19 | #'
 20 | #' @import corpcor
 21 | #' @import dplyr
 22 | #'
 23 | #' @return list containing the results of the sisal algorithm including original endpoints and their projection.
 24 | sisal <- function(Y, p, iters = 80, tau = 1,
 25 |                   mu = p * 1000 / ncol(Y),
 26 |                   spherize = F, tol = 1e-2, m0 = NULL, verbose=F,
 27 |                   returnPlot = T, nonNeg = F) {
 28 |     rnames <- rownames(Y)
 29 |     L <- nrow(Y)
 30 |     N <- ncol(Y)
 31 | 
 32 |     if (L < p) stop("Insufficient number of columns in y")
 33 | 
 34 |     # local stuff
 35 | 
 36 |     slack <- 1e-3
 37 |     energyDecreasing <- 0
 38 |     fValBack <- Inf
 39 |     lamSphe <- 1e-8
 40 |     lamQuad <- 1e-6
 41 |     ALiters <- 4
 42 |     flaged <- 0
 43 | 
 44 |     ## At first we are getting the affine set
 45 |     Ymean <- apply(Y, 1, mean)
 46 |     ym <- matrix(Ymean, ncol=1)
 47 |     Y <- Y - Ymean
 48 |     svdObj <- fast.svd(Y)
 49 |     Up <- svdObj$u[, 1:(p-1)]
 50 |     proj <- Up %*% t(Up)
 51 |     D <- svdObj$d[1:(p-1)]
 52 |     
 53 |     Y <- proj %*% Y
 54 |     
 55 |     Y <- Y + Ymean
 56 |     YmeanOrtho <- ym - proj %*% ym
 57 |     Up <- cbind(Up, YmeanOrtho / (sqrt(sum(YmeanOrtho ^ 2))))
 58 |     singValues <- D
 59 |     
 60 |     Y <- t(Up) %*% Y
 61 |     
 62 |     ## spherizing
 63 |     if (spherize) {
 64 |       Y <- Up %*% Y
 65 |       Y <- Y - Ymean
 66 |       C <- diag(1 / sqrt(D + lamSphe))
 67 |       IC <- solve(C)
 68 |       Y <- C %*% t(Up[, 1:(p-1)]) %*% Y
 69 |       Y <- rbind(Y, 1)
 70 |       Y <- Y / sqrt(p)
 71 |     }
 72 | 
 73 |     ## Init
 74 | 
 75 |     if (is.null(m0)) {
 76 |         Mvca <- vca(Y, p, verbose=verbose)
 77 |         M <- Mvca
 78 |         Ym <- apply(M, 1, mean)
 79 |         dQ <- M - Ym
 80 |         M <- M + p * dQ
 81 |     } else {
 82 |         M <- m0
 83 |         M <- M - Ymean
 84 |         M <- Up[, 1:(p-1)] %*% t(Up[, 1:(p - 1)]) %*% M
 85 |         M <- M + Ymean
 86 |         M <- t(Up) %*% M
 87 |         if (spherize) {
 88 |             M <- Up %*% M - Ymean
 89 |             M <- C %*% t(Up[, 1:(p - 1)]) %*% M
 90 |             M[p, ] <- 1
 91 |             M <- M / sqrt(p)
 92 |         }
 93 |     }
 94 | 
 95 |     if (returnPlot) {
 96 |         toPlot <- data.frame(
 97 |             x = Y[1, ],
 98 |             y = Y[2, ],
 99 |             type = "data point",
100 |             iter = NA,
101 |             tau = NA
102 |         )
103 |         starting <- data.frame(
104 |             x = M[1, ],
105 |             y = M[2, ],
106 |             type = "sisal",
107 |             iter = 0,
108 |             tau = tau
109 |         )
110 |         toPlot <- rbind(toPlot, starting)
111 |     }
112 | 
113 | 
114 |     Q0 <- solve(M)
115 |     Q <- Q0
116 | 
117 |     AAT <- kronecker(Y %*% t(Y), diag(nrow=p))
118 |     B <- kronecker(diag(nrow=p), matrix(1, nrow=1, ncol=p))
119 |     qm <- rowSums(solve(Y %*% t(Y)) %*% Y)
120 |     qm <- matrix(qm, ncol=1)
121 | 
122 |     H <- lamQuad * diag(nrow=p^2)
123 |     FF <- H + mu * AAT
124 |     IFF <- solve(FF)
125 | 
126 | 
127 |     G <- IFF %*% t(B) %*% solve(B %*% IFF %*% t(B))
128 |     qmAux <- G %*% qm
129 |     G <- IFF - G %*% B %*% IFF
130 | 
131 |     Z <- Q %*% Y
132 |     Bk <- 0 * Z
133 |     
134 |     fmin = Inf
135 |     Qmin = NULL
136 |     
137 |     for (k in 1:iters) {
138 |         IQ <- solve(Q)
139 |         g <- -t(IQ)
140 |         dim(g) <- c(nrow(g) * ncol(g), 1)
141 | 
142 |         q0 <- Q
143 |         dim(q0) <- c(nrow(Q) * ncol(Q), 1)
144 |         Q0 <- Q
145 | 
146 |         baux <- H %*% q0 - g
147 | 
148 |         if (verbose) {
149 |             if (spherize) {
150 |                 M <- IQ * sqrt(p)
151 |                 M <- M[1:(p-1), ]
152 |                 M <- Up[, 1:(p-1)] %*% IC %*% M
153 |                 M <- M + Ymean
154 |                 M <- t(Up) %*% M
155 |             } else {
156 |                 M <- IQ
157 |             }
158 |             message(sprintf("Iteration %d, simplex volume = %4f", k, abs(det(M)) / factorial(nrow(M))))
159 |         }
160 | 
161 |         if (k == iters) {
162 |             ALiters <- 100
163 |         }
164 | 
165 |         while (T) {
166 |             q <- Q
167 |             dim(q) <- c(nrow(Q) * ncol(Q), 1)
168 | 
169 |             f0val <- -log(abs(det(Q))) + tau * sum(hinge(Q %*% Y))
170 |             f0quad <- t(q - q0) %*% g + 0.5 * t(q - q0) %*% H %*% (q - q0) + tau * sum(hinge(Q %*% Y))
171 |             
172 |             for (i in 2:ALiters) {
173 |                 dqAux <- Z + Bk
174 |                 dtzB <- dqAux %*% t(Y)
175 |                 dim(dtzB) <- c(nrow(dtzB) * ncol(dtzB), 1)
176 |                 b <- baux + mu * dtzB
177 |                 q <- G %*% b + qmAux
178 |                 Q <- matrix(q, nrow=p, ncol=p)
179 | 
180 |                 Z <- softNeg(Q %*% Y - Bk, tau / mu)
181 | 
182 |                 Bk <- Bk - (Q %*% Y - Z)
183 | 
184 |             }
185 |             
186 |             fquad_tmp <- t(q - q0) %*% g + 0.5 * t(q - q0) %*% H %*% (q - q0) + tau * sum(hinge(Q %*% Y))
187 |             fval_tmp <- -log(abs(det(Q))) + tau * sum(hinge(Q %*% Y))
188 |             
189 |             fquad <- t(q - q0) %*% g + 0.5 * t(q - q0) %*% H %*% (q - q0) + tau * sum(hinge(Q %*% Y))
190 |             fval <- -log(abs(det(Q))) + tau * sum(hinge(Q %*% Y))
191 |             
192 |             # if (fval < fmin) {
193 |             #   fmin <- fval
194 |             #   Qmin <- Q
195 |             # }
196 | 
197 |             # message(sprintf("f0 quad: %4f , f0 val %4f ", f0quad, f0val))
198 |             # message(sprintf("f temp quad: %4f , f tmp val %4f ", fquad_tmp, fval_tmp))
199 |             # message(sprintf("f quad: %4f , f val %4f ", fquad, fval))
200 |             # stop("kek")
201 | 
202 |             if (f0quad >= fquad) {
203 |                 while (f0val - fval < 0) {
204 |                     Q <- (Q + Q0) / 2
205 |                     fval <- -log(abs(det(Q))) + tau * sum(hinge(Q %*% Y))
206 |                 }
207 |               
208 |                 break
209 |             }
210 | 
211 |         }
212 | 
213 |         if (returnPlot) {
214 |             M <- solve(Q)
215 |             toAdd <- data.frame(
216 |                 x = M[1, ],
217 |                 y = M[2, ],
218 |                 type = "sisal",
219 |                 iter = k,
220 |                 tau = tau
221 |             )
222 |             toPlot <- rbind(toPlot, toAdd)
223 |             toPlot <- tbl_df(toPlot)
224 |         }
225 | 
226 |     }
227 |     
228 |     if (nonNeg) {
229 |       qorig <- Up %*% solve(Q)
230 |       
231 |       if (any(qorig < 0)) {
232 |         diff <- Ymean - qorig
233 |         
234 |         shift <- sapply(1:p, function(i) {
235 |           tmp <- qorig[, i] / diff[, i]
236 |           ifelse(any(qorig[, i] < 0), min(tmp[qorig[, i] < 0]), 0)
237 |         })
238 |         
239 |         shift <- -shift + (0.00001)
240 |         qorig <- qorig + diff %*% diag(shift)
241 |         Q <- solve(t(Up) %*% qorig)
242 |         q <- Q
243 |         dim(q) <- c(nrow(Q) * ncol(Q), 1)
244 |       }
245 |     }
246 | 
247 |     distanceToEndpoints <- apply(M, 2, function(x) {
248 |         shifted <- Y - x
249 |         dds <- sqrt(colSums(shifted^2))
250 |         return(dds)
251 |     })
252 |     
253 |     endpointsProjection <- M
254 | 
255 |     if (spherize) {
256 |         M <- solve(Q)
257 |         M <- M * sqrt(p)
258 |         M <- M[1:(p - 1), ]
259 |         M <- Up[, 1:(p - 1)] %*% IC %*% M
260 |         M <- M + Ymean
261 |     } else {
262 |         M <- Up %*% solve(Q)
263 |     }
264 | 
265 |     colnames(M) <- paste0("Pure gene ", 1:p)
266 |     rownames(M) <- rnames
267 | 
268 |     retVal <- list(
269 |         endpoints = M,
270 |         endpointsProjection = endpointsProjection,
271 |         projection = Up,
272 |         shift = Ymean,
273 |         singValues = singValues,
274 |         distances = distanceToEndpoints,
275 |         reduced = Y,
276 |         q = Q
277 |     )
278 |     if (returnPlot) retVal$plotObj <- toPlot
279 |     return(retVal)
280 | 
281 | }
282 | 


--------------------------------------------------------------------------------
/R/softNeg.R:
--------------------------------------------------------------------------------
 1 | #' Soft negative score
 2 | #'
 3 | #' @param Y matrix
 4 | #' @param tau coefficient to penalize for volume
 5 | #'
 6 | #' @return score value
 7 | softNeg <- function(Y, tau) {
 8 |     z <- pmax(abs(Y + tau / 2) - tau / 2, 0)
 9 |     z <- z / (z + tau / 2) * (Y + tau / 2)
10 |     return(z)
11 | }
12 | 


--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
 1 | # # utils
 2 | # 
 3 | # write.table.mine <- function(data, fn, ...) write.table(data, fn, sep = "\t", quote = FALSE,
 4 | #     col.names = NA, ...)
 5 | # read.table.mine <- function(fn, ...) read.table(fn, sep = "\t", header = 1, row.names = 1,
 6 | #     ...)
 7 | # norm.length <- function(r) r/sqrt(sum(r^2))
 8 | # norm.length.matrix <- function(m) t(apply(m, 1, norm.length))
 9 | # norm.relative <- function(r) (r - min(r))/(max(r) - min(r))
10 | # norm.relative.matrix <- function(m) t(apply(m, 1, norm.relative))
11 | # similarity.cosine <- function(x, y) crossprod(x, y)/sqrt(crossprod(x) * crossprod(y))
12 | # space.metric <- function(x, y) 1 - similarity.cosine(x, y)
13 | 
14 | #' linearizeDataset
15 | #'
16 | #' @param ge gene expression matrix
17 | #'
18 | #' @return gene expression matrix in linear scale
19 | linearizeDataset <- function(ge) {
20 |     if (is_logscale(ge))
21 |         return(2^ge - 1)
22 |     return(ge)
23 | }
24 | 
25 | #' logDataset
26 | #'
27 | #' @param ge gene expression matrix
28 | #'
29 | #' @return gene expression matrix in log scale
30 | logDataset <- function(ge) {
31 |     if (is_logscale(ge))
32 |         return(ge)
33 |     return(log2(ge + 1))
34 | }
35 | 
36 | 
37 | #' is_logscale
38 | #'
39 | #' @param x gene expression matrix
40 | #'
41 | #' @return logical, whether x is in the log scale
42 | is_logscale <- function(x) {
43 |     qx <- quantile(as.numeric(x), na.rm = T)
44 |     if (qx[5] - qx[1] > 100 || qx[5] > 100) {
45 |         return(FALSE)
46 |     } else {
47 |         return(TRUE)
48 |     }
49 | }
50 | 
51 | 
52 | # r2Profiler <- function(r2Table,
53 | #                        rCheck = seq(0, 1, 0.1),
54 | #                        kCheck=c(1, 3, 5, 10)) {
55 | #         # rTemp <- (r2Table + t(r2Table)) / 2
56 | #         results <- do.call(cbind, lapply(rCheck, function(rTreshold) {
57 | #             rMask <- (r2Table > rTreshold)
58 | #             rMaskSums <- rowSums(rMask)
59 | #             sapply(kCheck, function(kn) {
60 | #                 sum(rMaskSums > kn)
61 | #             })
62 | #         }))
63 | #         colnames(results) <- rCheck
64 | #         rownames(results) <- kCheck
65 | #         return(results)
66 | # }
67 | # 
68 | # visualizeProfilerResults <- function(results) {
69 | #     requireNamespace("ggplot2")
70 | #     requireNamespace("reshape2")
71 | #     melted <- reshape2::melt(t(results))
72 | #     colnames(melted) <- c("R2", "K", "candidates")
73 | # 
74 | #     pl <- ggplot2::ggplot(data=melted, ggplot2::aes(x=R2, y=log10(candidates),
75 | #                                   group=as.factor(K), color=as.factor(K))) +
76 | #         ggplot2::geom_point() + ggplot2::geom_line() +
77 | #         ggplot2::geom_hline(yintercept = log10(500), color="green") +
78 | #         ggplot2::geom_hline(yintercept = log10(1000), color="red") +
79 | #         ggplot2::theme_bw()
80 | #     pl
81 | # }
82 | # 
83 | # r2Filtering <- function(r2Table, r2val, k) {
84 | #     if (r2val > 1) stop("Value of R^2 to filter should be less or equal")
85 | #     if (r2val < 0) warning("Negative values of R^2 might be not meaningful")
86 | #     if (k <= 0) stop("Number of neighbours should be positive")
87 | #     rSums <- rowSums(r2Table > r2val)
88 | #     filteredGenes <- rownames(r2Table)[rSums > k]
89 | #     filteredOutGenes<- rownames(r2Table)[rSums <= k]
90 | #     subset <- r2Table[filteredGenes, filteredGenes]
91 | #     return(list(filteredGenes=filteredGenes,
92 | #                 filteredOutGenes=filteredOutGenes,
93 | #                 r2Filtered=subset))
94 | # }


--------------------------------------------------------------------------------
/R/vca.R:
--------------------------------------------------------------------------------
 1 | #' Title
 2 | #'
 3 | #' @param R matrix describing points (possibly lying in a simplex) in high dimensional space
 4 | #' @param p number endpoints to find
 5 | #' @param SNR signal to noise ratio, NULL by default
 6 | #' @param verbose verbosity, deafult value is FALSE
 7 | #'
 8 | #' @return matrix of columns from R which are considered to be endpoints
 9 | vca <- function(R, p, SNR=NULL, verbose=F) {
10 |     L <- nrow(R)
11 |     N <- ncol(R)
12 | 
13 |     if (p < 0 || p > L) {
14 |         stop("p is out of range (negative or too big)")
15 |     }
16 |     SNRth <- 15 + 10 * log10(p)
17 | 
18 |     if (!is.null(SNR) && (SNR < SNRth)) {
19 |         if (verbose) message("Select the projective projection")
20 |         d <- p - 1
21 | 
22 |         if (exists("xp")) {
23 |             Ud <- Ud[, 1:d]
24 |         } else {
25 |             rm <- apply(R, 1, mean)
26 |             R0 <- R - rm
27 |             svdObj <- svd(R0 %*% t(R0), nu = p, nv = p)
28 |             Ud <- svdObj$u
29 |             xp <- t(Ud) %*% R0
30 |         }
31 | 
32 |         Rp <- Ud %*% xp[1:d, ] + rm
33 |         x <- xp[1:d, ]
34 |         c <- sqrt(max(colSums(x^2)))
35 |         y <- rbind(x, rep(c, N))
36 |     } else {
37 |         if (verbose) message("Select projection to p-1")
38 |         d <- p
39 |         Ud <- svd(R %*% t(R) / N, nu = d, nv = d)$u
40 | 
41 |         xp <- t(Ud) %*% R
42 |         Rp <- Ud %*% xp[1:d, ]
43 | 
44 |         x <- xp
45 |         u <- rowMeans(x)
46 |         y <- x / matrix(kronecker(colSums(x * u), rep(1, d)), nrow=d)
47 |     }
48 | 
49 |     ## VCA itself
50 | 
51 |     indice <- rep(0, p)
52 |     A <- matrix(0, nrow=p, ncol=p)
53 |     A[p, 1] <- 1
54 | 
55 |     for (i in 1:p) {
56 |         w <- matrix(runif(p), ncol=1)
57 |         f <- w - A %*% pseudoinverse(A) %*% w;
58 |         f <- f / sqrt(sum(f^2))
59 | 
60 |         v <- t(f) %*% y
61 |         indice[i] <- which.max(abs(v))
62 |         A[, i] <- y[, indice[i]]
63 |     }
64 |     Ae = Rp[, indice]
65 |     return(Ae)
66 | }
67 | 
68 | 
69 | #' #' Signal To Noise estimation
70 | #' #'
71 | #' #' @param R matrix containing points in high dimensional space
72 | #' #' @param rM vector of feature means
73 | #' #' @param x projection of R (shifted to zero) to lower dimensional space produced by SVD
74 | #' #'
75 | #' #' @return numeric, signal to noise ratio
76 | #' #'
77 | #' #' @examples
78 | #' estimateSnr <- function(R, rM, x) {
79 | #'     L <- nrow(R)
80 | #'     N <- ncol(R)
81 | #'     p <- nrow(x)
82 | #' 
83 | #'     py <- sum(R^2) / N
84 | #'     px <- sum(x^2) / N + crossprod(rM)
85 | #'     # message(sprintf("Technical: L = %d, N = %d, p = %d, py = %f, px = %f, py - px = %f",
86 | #'     #                 L, N, p, py, px, py -px))
87 | #'     # message(sprintf("SNR estimated = %f", 10 * log10( (px - p * py / L) / (py - px) )))
88 | #'     return(10 * log10( (px - p * py / L) / (py - px) ))
89 | #' }
90 | 


--------------------------------------------------------------------------------
/Readme.md:
--------------------------------------------------------------------------------
  1 | Linseed tutorial
  2 | ================
  3 | 
  4 | Linseed (LINear Subspace identification for gene Expresion Deconvolution) is a package that provides tools and interface to explore gene expression datasets in linear space.
  5 | 
  6 | ⚠️ Notice
  7 | 
  8 | The current version of this tool is stable and should work as expected. We will continue to maintain it to ensure it remains functional, addressing only critical bugs as needed. However, no new features will be added in this repository.
  9 | 
 10 | For enhanced functionality and continued development of deconvolution methods, please refer to our new tool, [DualSimplex](https://github.com/artyomovlab/dualsimplex), which represents the next step in this line of work.
 11 | 
 12 | 
 13 | Installing the package
 14 | ----------------------
 15 | 
 16 | You can install the package using `devtools::install_github`:
 17 | 
 18 | ``` r
 19 | devtools::install_github("ctlab/linseed")
 20 | ```
 21 | 
 22 | Current build was tested using rhub:
 23 | 
 24 | Getting started with linseed
 25 | ----------------------------
 26 | 
 27 | To start working with gene expression data, we need to create a new LinseedObject, in this tutorial we will use GSE19830 (mixture of Liver, Brain and Lung), we will take only mixed samples (10-42) and will take only 10000 most expressed genes.
 28 | 
 29 | ``` r
 30 | library(linseed)
 31 | lo <- LinseedObject$new("GSE19830", samples=10:42, topGenes=10000)
 32 | ```
 33 | 
 34 | Coolinearity networks
 35 | ---------------------
 36 | 
 37 | To build a coolinearity network we first have to evaluate all pairwise collinearity coefficients, all pairwise spearman correlation and then run significance test which will calculate p value for each each by shuffling network weights randomly.
 38 | 
 39 | ``` r
 40 | lo$calculatePairwiseLinearity()
 41 | lo$calculateSpearmanCorrelation()
 42 | lo$calculateSignificanceLevel(100)
 43 | lo$significancePlot(0.01)
 44 | ```
 45 | 
 46 | ![](Readme_files/figure-markdown_github/networks-1.png)
 47 | 
 48 | ``` r
 49 | lo$filterDatasetByPval(0.01)
 50 | ```
 51 | 
 52 |     ## Total number of genes is 10000
 53 | 
 54 |     ## The number of genes after filtering is 3297
 55 | 
 56 | ``` r
 57 | lo$svdPlot()
 58 | ```
 59 | 
 60 | ![](Readme_files/figure-markdown_github/networks-2.png)
 61 | 
 62 | To visualiaze what left after filtering we can call projection plot from our object. But we have to project the data to the simplex first.
 63 | 
 64 | ``` r
 65 | lo$setCellTypeNumber(3)
 66 | lo$project("full") # projecting full dataset
 67 | lo$projectionPlot(color="filtered")
 68 | ```
 69 | 
 70 | ![](Readme_files/figure-markdown_github/visi-1.png)
 71 | 
 72 | Deconvolution
 73 | -------------
 74 | 
 75 | To deconvolve the dataset, you first have to project (full or filtered dataset) to the simplex, and then find corners of it.
 76 | 
 77 | ``` r
 78 | lo$project("filtered")
 79 | lo$smartSearchCorners(dataset="filtered", error="norm")
 80 | ```
 81 | 
 82 |     ## Final vector is
 83 | 
 84 |     ## 4 1 5
 85 | 
 86 |     ## 
 87 | 
 88 | ``` r
 89 | lo$deconvolveByEndpoints()
 90 | plotProportions(lo$proportions)
 91 | ```
 92 | 
 93 | ![](Readme_files/figure-markdown_github/deconvolution-1.png)
 94 | 
 95 | We can also use tSNE to haave an idea of how data looks like when dimensionally reduced.
 96 | 
 97 | ``` r
 98 | # lets select 100 genes closest to the simplex corners 
 99 | lo$selectGenes(100)
100 | lo$tsnePlot()
101 | ```
102 | 
103 | ![](Readme_files/figure-markdown_github/rtsne-1.png)
104 | 
105 | To compare with actual proportions you can use `dotPlotProportions` function
106 | 
107 | ``` r
108 | data("proportionsLiverBrainLung")
109 | dotPlotPropotions(lo$proportions, proportionsLiverBrainLung[, 10:42], guess=TRUE)
110 | ```
111 | 
112 | ![](Readme_files/figure-markdown_github/proportions-1.png)
113 | 


--------------------------------------------------------------------------------
/Readme_files/figure-markdown_github/deconvolution-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ctlab/LinSeed/a54de83ab65f16611265e49ddd341f429a674e8d/Readme_files/figure-markdown_github/deconvolution-1.png


--------------------------------------------------------------------------------
/Readme_files/figure-markdown_github/networks-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ctlab/LinSeed/a54de83ab65f16611265e49ddd341f429a674e8d/Readme_files/figure-markdown_github/networks-1.png


--------------------------------------------------------------------------------
/Readme_files/figure-markdown_github/networks-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ctlab/LinSeed/a54de83ab65f16611265e49ddd341f429a674e8d/Readme_files/figure-markdown_github/networks-2.png


--------------------------------------------------------------------------------
/Readme_files/figure-markdown_github/proportions-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ctlab/LinSeed/a54de83ab65f16611265e49ddd341f429a674e8d/Readme_files/figure-markdown_github/proportions-1.png


--------------------------------------------------------------------------------
/Readme_files/figure-markdown_github/rtsne-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ctlab/LinSeed/a54de83ab65f16611265e49ddd341f429a674e8d/Readme_files/figure-markdown_github/rtsne-1.png


--------------------------------------------------------------------------------
/Readme_files/figure-markdown_github/visi-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ctlab/LinSeed/a54de83ab65f16611265e49ddd341f429a674e8d/Readme_files/figure-markdown_github/visi-1.png


--------------------------------------------------------------------------------
/data/datasetLiverBrainLung.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ctlab/LinSeed/a54de83ab65f16611265e49ddd341f429a674e8d/data/datasetLiverBrainLung.RData


--------------------------------------------------------------------------------
/data/proportionsLiverBrainLung.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ctlab/LinSeed/a54de83ab65f16611265e49ddd341f429a674e8d/data/proportionsLiverBrainLung.RData


--------------------------------------------------------------------------------
/man/LinseedObject.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/linseedObject.R
  3 | \docType{class}
  4 | \name{LinseedObject}
  5 | \alias{LinseedObject}
  6 | \title{Linseed Object}
  7 | \format{
  8 | \code{\link{R6Class}} object.
  9 | }
 10 | \value{
 11 | Object of \code{\link{R6Class}} -- an interface to work with gene expression data.
 12 | }
 13 | \description{
 14 | Linseed Object
 15 | 
 16 | Linseed Object
 17 | }
 18 | \details{
 19 | The Linseed Object Class. 
 20 | 
 21 | Provides an interface to perform 
 22 | collinear network construction,
 23 | linear subspace identification, 
 24 | simplex corner identification and gene expression deconvolution
 25 | }
 26 | \examples{
 27 | LinseedObject$new("GSE19830", samples=10:42, topGenes=10000)
 28 | 
 29 | }
 30 | \section{Public fields}{
 31 | \if{html}{\out{<div class="r6-fields">}}
 32 | \describe{
 33 | \item{\code{exp}}{List of two elements raw and normalized gene expression dataset}
 34 | 
 35 | \item{\code{name}}{Character, optional, dataset name}
 36 | 
 37 | \item{\code{cellTypeNumber}}{Identified cell type number, required for projection, 
 38 | corner detection and deconvolution}
 39 | 
 40 | \item{\code{projection}}{Projection of genes into space lower-dimensionality (presumably simplex)}
 41 | 
 42 | \item{\code{endpoints}}{Simplex corners (in normalized, non-reduced space)}
 43 | 
 44 | \item{\code{endpointsProjection}}{Simplex corners (in reduced space)}
 45 | 
 46 | \item{\code{distances}}{Stores distances for every gene to each corner in reduced space}
 47 | 
 48 | \item{\code{markers}}{List that stores signatures genes for deconvolution, can be set manually or can be obtained by \code{selectGenes(k)}}
 49 | 
 50 | \item{\code{signatures}}{Deconvolution signature matrix}
 51 | 
 52 | \item{\code{proportions}}{Deconvolution proportion matrix}
 53 | 
 54 | \item{\code{pairwise}}{Calculated pairwise collinearity measure}
 55 | 
 56 | \item{\code{spearman}}{Calculated spearman correlations}
 57 | 
 58 | \item{\code{genes}}{genes significance test result list.}
 59 | 
 60 | \item{\code{projectiveProjection}}{calculated projection operator}
 61 | 
 62 | \item{\code{Q}}{Q matrix obtained from sysal alogorithm}
 63 | }
 64 | \if{html}{\out{</div>}}
 65 | }
 66 | \section{Methods}{
 67 | \subsection{Public methods}{
 68 | \itemize{
 69 | \item \href{#method-LinseedObject-new}{\code{LinseedObject$new()}}
 70 | \item \href{#method-LinseedObject-svdPlot}{\code{LinseedObject$svdPlot()}}
 71 | \item \href{#method-LinseedObject-hysime}{\code{LinseedObject$hysime()}}
 72 | \item \href{#method-LinseedObject-setCellTypeNumber}{\code{LinseedObject$setCellTypeNumber()}}
 73 | \item \href{#method-LinseedObject-project}{\code{LinseedObject$project()}}
 74 | \item \href{#method-LinseedObject-projectionPlot}{\code{LinseedObject$projectionPlot()}}
 75 | \item \href{#method-LinseedObject-sisalCorners}{\code{LinseedObject$sisalCorners()}}
 76 | \item \href{#method-LinseedObject-selectGenes}{\code{LinseedObject$selectGenes()}}
 77 | \item \href{#method-LinseedObject-deconvolve}{\code{LinseedObject$deconvolve()}}
 78 | \item \href{#method-LinseedObject-deconvolutionError}{\code{LinseedObject$deconvolutionError()}}
 79 | \item \href{#method-LinseedObject-deconvolveByEndpoints}{\code{LinseedObject$deconvolveByEndpoints()}}
 80 | \item \href{#method-LinseedObject-calculateSpearmanCorrelation}{\code{LinseedObject$calculateSpearmanCorrelation()}}
 81 | \item \href{#method-LinseedObject-calculateSignificanceLevel}{\code{LinseedObject$calculateSignificanceLevel()}}
 82 | \item \href{#method-LinseedObject-calculatePairwiseLinearity}{\code{LinseedObject$calculatePairwiseLinearity()}}
 83 | \item \href{#method-LinseedObject-smartSearchCorners}{\code{LinseedObject$smartSearchCorners()}}
 84 | \item \href{#method-LinseedObject-filterDatasetByPval}{\code{LinseedObject$filterDatasetByPval()}}
 85 | \item \href{#method-LinseedObject-filterDataset}{\code{LinseedObject$filterDataset()}}
 86 | \item \href{#method-LinseedObject-tsnePlot}{\code{LinseedObject$tsnePlot()}}
 87 | \item \href{#method-LinseedObject-significancePlot}{\code{LinseedObject$significancePlot()}}
 88 | \item \href{#method-LinseedObject-clone}{\code{LinseedObject$clone()}}
 89 | }
 90 | }
 91 | \if{html}{\out{<hr>}}
 92 | \if{html}{\out{<a id="method-LinseedObject-new"></a>}}
 93 | \if{latex}{\out{\hypertarget{method-LinseedObject-new}{}}}
 94 | \subsection{Method \code{new()}}{
 95 | Constructor
 96 | \subsection{Usage}{
 97 | \if{html}{\out{<div class="r">}}\preformatted{LinseedObject$new(...)}\if{html}{\out{</div>}}
 98 | }
 99 | 
100 | \subsection{Arguments}{
101 | \if{html}{\out{<div class="arguments">}}
102 | \describe{
103 | \item{\code{...}}{dataset matrix/GSE identifier as well as preprocessing parameters}
104 | }
105 | \if{html}{\out{</div>}}
106 | }
107 | }
108 | \if{html}{\out{<hr>}}
109 | \if{html}{\out{<a id="method-LinseedObject-svdPlot"></a>}}
110 | \if{latex}{\out{\hypertarget{method-LinseedObject-svdPlot}{}}}
111 | \subsection{Method \code{svdPlot()}}{
112 | Plot singular values elbow plot
113 | \subsection{Usage}{
114 | \if{html}{\out{<div class="r">}}\preformatted{LinseedObject$svdPlot(dataset = "norm", components = 50)}\if{html}{\out{</div>}}
115 | }
116 | 
117 | \subsection{Arguments}{
118 | \if{html}{\out{<div class="arguments">}}
119 | \describe{
120 | \item{\code{dataset}}{matrix identifier, can be "norm"or "raw"}
121 | 
122 | \item{\code{components}}{number of components to plot}
123 | }
124 | \if{html}{\out{</div>}}
125 | }
126 | }
127 | \if{html}{\out{<hr>}}
128 | \if{html}{\out{<a id="method-LinseedObject-hysime"></a>}}
129 | \if{latex}{\out{\hypertarget{method-LinseedObject-hysime}{}}}
130 | \subsection{Method \code{hysime()}}{
131 | Evalutes number of cell types presented in mixture using (HySime) hyperspectral signal identification by minimum error
132 | \subsection{Usage}{
133 | \if{html}{\out{<div class="r">}}\preformatted{LinseedObject$hysime(dataset = "filtered", error = "norm", set = FALSE)}\if{html}{\out{</div>}}
134 | }
135 | 
136 | \subsection{Arguments}{
137 | \if{html}{\out{<div class="arguments">}}
138 | \describe{
139 | \item{\code{dataset}}{dataset identifier, can be "filtered"or "full}
140 | 
141 | \item{\code{error}}{matrix identifier within the dataset}
142 | 
143 | \item{\code{set}}{wether to set identified cell type number to the object}
144 | }
145 | \if{html}{\out{</div>}}
146 | }
147 | }
148 | \if{html}{\out{<hr>}}
149 | \if{html}{\out{<a id="method-LinseedObject-setCellTypeNumber"></a>}}
150 | \if{latex}{\out{\hypertarget{method-LinseedObject-setCellTypeNumber}{}}}
151 | \subsection{Method \code{setCellTypeNumber()}}{
152 | Setter for sell type numbers
153 | \subsection{Usage}{
154 | \if{html}{\out{<div class="r">}}\preformatted{LinseedObject$setCellTypeNumber(k)}\if{html}{\out{</div>}}
155 | }
156 | 
157 | \subsection{Arguments}{
158 | \if{html}{\out{<div class="arguments">}}
159 | \describe{
160 | \item{\code{k}}{number to set}
161 | }
162 | \if{html}{\out{</div>}}
163 | }
164 | }
165 | \if{html}{\out{<hr>}}
166 | \if{html}{\out{<a id="method-LinseedObject-project"></a>}}
167 | \if{latex}{\out{\hypertarget{method-LinseedObject-project}{}}}
168 | \subsection{Method \code{project()}}{
169 | Perform projection
170 | \subsection{Usage}{
171 | \if{html}{\out{<div class="r">}}\preformatted{LinseedObject$project(dataset)}\if{html}{\out{</div>}}
172 | }
173 | 
174 | \subsection{Arguments}{
175 | \if{html}{\out{<div class="arguments">}}
176 | \describe{
177 | \item{\code{dataset}}{dataset identifier can be "filtered" or "full"}
178 | }
179 | \if{html}{\out{</div>}}
180 | }
181 | }
182 | \if{html}{\out{<hr>}}
183 | \if{html}{\out{<a id="method-LinseedObject-projectionPlot"></a>}}
184 | \if{latex}{\out{\hypertarget{method-LinseedObject-projectionPlot}{}}}
185 | \subsection{Method \code{projectionPlot()}}{
186 | Plot projected points
187 | \subsection{Usage}{
188 | \if{html}{\out{<div class="r">}}\preformatted{LinseedObject$projectionPlot(dims = 1:2, color = NULL)}\if{html}{\out{</div>}}
189 | }
190 | 
191 | \subsection{Arguments}{
192 | \if{html}{\out{<div class="arguments">}}
193 | \describe{
194 | \item{\code{dims}}{dimensions to plot}
195 | 
196 | \item{\code{color}}{which points to color (corner, type, cluster, filtered)}
197 | }
198 | \if{html}{\out{</div>}}
199 | }
200 | }
201 | \if{html}{\out{<hr>}}
202 | \if{html}{\out{<a id="method-LinseedObject-sisalCorners"></a>}}
203 | \if{latex}{\out{\hypertarget{method-LinseedObject-sisalCorners}{}}}
204 | \subsection{Method \code{sisalCorners()}}{
205 | Identify corners in projected space
206 | \subsection{Usage}{
207 | \if{html}{\out{<div class="r">}}\preformatted{LinseedObject$sisalCorners(...)}\if{html}{\out{</div>}}
208 | }
209 | 
210 | \subsection{Arguments}{
211 | \if{html}{\out{<div class="arguments">}}
212 | \describe{
213 | \item{\code{...}}{extra arguments for sisal algorithm}
214 | }
215 | \if{html}{\out{</div>}}
216 | }
217 | }
218 | \if{html}{\out{<hr>}}
219 | \if{html}{\out{<a id="method-LinseedObject-selectGenes"></a>}}
220 | \if{latex}{\out{\hypertarget{method-LinseedObject-selectGenes}{}}}
221 | \subsection{Method \code{selectGenes()}}{
222 | Take top genes using distances calculated
223 | \subsection{Usage}{
224 | \if{html}{\out{<div class="r">}}\preformatted{LinseedObject$selectGenes(n)}\if{html}{\out{</div>}}
225 | }
226 | 
227 | \subsection{Arguments}{
228 | \if{html}{\out{<div class="arguments">}}
229 | \describe{
230 | \item{\code{n}}{how many genes to select}
231 | }
232 | \if{html}{\out{</div>}}
233 | }
234 | }
235 | \if{html}{\out{<hr>}}
236 | \if{html}{\out{<a id="method-LinseedObject-deconvolve"></a>}}
237 | \if{latex}{\out{\hypertarget{method-LinseedObject-deconvolve}{}}}
238 | \subsection{Method \code{deconvolve()}}{
239 | Solve the deconvolution of the dataset matrix
240 | \subsection{Usage}{
241 | \if{html}{\out{<div class="r">}}\preformatted{LinseedObject$deconvolve(
242 |   dataset = "filtered",
243 |   error = "norm",
244 |   method = "dsa",
245 |   ...
246 | )}\if{html}{\out{</div>}}
247 | }
248 | 
249 | \subsection{Arguments}{
250 | \if{html}{\out{<div class="arguments">}}
251 | \describe{
252 | \item{\code{dataset}}{dataset identifier (filtered, full)}
253 | 
254 | \item{\code{error}}{matrix identifier within dataset}
255 | 
256 | \item{\code{method}}{which method to use ("dsa" by default, willl try CelLmix if something else)}
257 | 
258 | \item{\code{...}}{CellMix parameters}
259 | }
260 | \if{html}{\out{</div>}}
261 | }
262 | }
263 | \if{html}{\out{<hr>}}
264 | \if{html}{\out{<a id="method-LinseedObject-deconvolutionError"></a>}}
265 | \if{latex}{\out{\hypertarget{method-LinseedObject-deconvolutionError}{}}}
266 | \subsection{Method \code{deconvolutionError()}}{
267 | Calculate deconvolution error
268 | \subsection{Usage}{
269 | \if{html}{\out{<div class="r">}}\preformatted{LinseedObject$deconvolutionError(dataset = "filtered", error = "norm")}\if{html}{\out{</div>}}
270 | }
271 | 
272 | \subsection{Arguments}{
273 | \if{html}{\out{<div class="arguments">}}
274 | \describe{
275 | \item{\code{dataset}}{dataset identifier (filtered, full)}
276 | 
277 | \item{\code{error}}{matrix identifier within dataset}
278 | }
279 | \if{html}{\out{</div>}}
280 | }
281 | }
282 | \if{html}{\out{<hr>}}
283 | \if{html}{\out{<a id="method-LinseedObject-deconvolveByEndpoints"></a>}}
284 | \if{latex}{\out{\hypertarget{method-LinseedObject-deconvolveByEndpoints}{}}}
285 | \subsection{Method \code{deconvolveByEndpoints()}}{
286 | Calculated and set the deconvolution solution based on found endpoints in projected space
287 | \subsection{Usage}{
288 | \if{html}{\out{<div class="r">}}\preformatted{LinseedObject$deconvolveByEndpoints(dataset = "filtered", error = "norm")}\if{html}{\out{</div>}}
289 | }
290 | 
291 | \subsection{Arguments}{
292 | \if{html}{\out{<div class="arguments">}}
293 | \describe{
294 | \item{\code{dataset}}{dataset identifier (filtered, full)}
295 | 
296 | \item{\code{error}}{matrix identifier within dataset}
297 | }
298 | \if{html}{\out{</div>}}
299 | }
300 | }
301 | \if{html}{\out{<hr>}}
302 | \if{html}{\out{<a id="method-LinseedObject-calculateSpearmanCorrelation"></a>}}
303 | \if{latex}{\out{\hypertarget{method-LinseedObject-calculateSpearmanCorrelation}{}}}
304 | \subsection{Method \code{calculateSpearmanCorrelation()}}{
305 | Calculate correlation between rows of the matrix
306 | \subsection{Usage}{
307 | \if{html}{\out{<div class="r">}}\preformatted{LinseedObject$calculateSpearmanCorrelation()}\if{html}{\out{</div>}}
308 | }
309 | 
310 | }
311 | \if{html}{\out{<hr>}}
312 | \if{html}{\out{<a id="method-LinseedObject-calculateSignificanceLevel"></a>}}
313 | \if{latex}{\out{\hypertarget{method-LinseedObject-calculateSignificanceLevel}{}}}
314 | \subsection{Method \code{calculateSignificanceLevel()}}{
315 | Significance test which will calculate p value for each each by shuffling network weights randomly
316 | \subsection{Usage}{
317 | \if{html}{\out{<div class="r">}}\preformatted{LinseedObject$calculateSignificanceLevel(
318 |   iters = 1000,
319 |   spearmanThreshold = 0,
320 |   retVal = F
321 | )}\if{html}{\out{</div>}}
322 | }
323 | 
324 | \subsection{Arguments}{
325 | \if{html}{\out{<div class="arguments">}}
326 | \describe{
327 | \item{\code{iters}}{number of shuffles}
328 | 
329 | \item{\code{spearmanThreshold}}{threshold of correlation to consider (consider higher than this value)}
330 | 
331 | \item{\code{retVal}}{Wether to return pvalues}
332 | }
333 | \if{html}{\out{</div>}}
334 | }
335 | }
336 | \if{html}{\out{<hr>}}
337 | \if{html}{\out{<a id="method-LinseedObject-calculatePairwiseLinearity"></a>}}
338 | \if{latex}{\out{\hypertarget{method-LinseedObject-calculatePairwiseLinearity}{}}}
339 | \subsection{Method \code{calculatePairwiseLinearity()}}{
340 | Calculate all pairwise collinearity coefficients
341 | \subsection{Usage}{
342 | \if{html}{\out{<div class="r">}}\preformatted{LinseedObject$calculatePairwiseLinearity(negToZero = T)}\if{html}{\out{</div>}}
343 | }
344 | 
345 | \subsection{Arguments}{
346 | \if{html}{\out{<div class="arguments">}}
347 | \describe{
348 | \item{\code{negToZero}}{wether to remove negative correlations}
349 | }
350 | \if{html}{\out{</div>}}
351 | }
352 | }
353 | \if{html}{\out{<hr>}}
354 | \if{html}{\out{<a id="method-LinseedObject-smartSearchCorners"></a>}}
355 | \if{latex}{\out{\hypertarget{method-LinseedObject-smartSearchCorners}{}}}
356 | \subsection{Method \code{smartSearchCorners()}}{
357 | The main interface to search corners in projected space with sisal
358 | \subsection{Usage}{
359 | \if{html}{\out{<div class="r">}}\preformatted{LinseedObject$smartSearchCorners(
360 |   taus = 2^seq(0, -20, -1),
361 |   sisalIter = 100,
362 |   dataset = "filtered",
363 |   error = "norm"
364 | )}\if{html}{\out{</div>}}
365 | }
366 | 
367 | \subsection{Arguments}{
368 | \if{html}{\out{<div class="arguments">}}
369 | \describe{
370 | \item{\code{taus}}{taus parameter for sisal}
371 | 
372 | \item{\code{sisalIter}}{number of iterations}
373 | 
374 | \item{\code{dataset}}{dataset identifier (filtered, full)}
375 | 
376 | \item{\code{error}}{matrix identifier within the dataset}
377 | }
378 | \if{html}{\out{</div>}}
379 | }
380 | }
381 | \if{html}{\out{<hr>}}
382 | \if{html}{\out{<a id="method-LinseedObject-filterDatasetByPval"></a>}}
383 | \if{latex}{\out{\hypertarget{method-LinseedObject-filterDatasetByPval}{}}}
384 | \subsection{Method \code{filterDatasetByPval()}}{
385 | Remove features which have pvalue of correlation higher than threshold
386 | \subsection{Usage}{
387 | \if{html}{\out{<div class="r">}}\preformatted{LinseedObject$filterDatasetByPval(pval = 0.001)}\if{html}{\out{</div>}}
388 | }
389 | 
390 | \subsection{Arguments}{
391 | \if{html}{\out{<div class="arguments">}}
392 | \describe{
393 | \item{\code{pval}}{pvalue threshold}
394 | }
395 | \if{html}{\out{</div>}}
396 | }
397 | }
398 | \if{html}{\out{<hr>}}
399 | \if{html}{\out{<a id="method-LinseedObject-filterDataset"></a>}}
400 | \if{latex}{\out{\hypertarget{method-LinseedObject-filterDataset}{}}}
401 | \subsection{Method \code{filterDataset()}}{
402 | Remove subset of genes
403 | \subsection{Usage}{
404 | \if{html}{\out{<div class="r">}}\preformatted{LinseedObject$filterDataset(geneSubset)}\if{html}{\out{</div>}}
405 | }
406 | 
407 | \subsection{Arguments}{
408 | \if{html}{\out{<div class="arguments">}}
409 | \describe{
410 | \item{\code{geneSubset}}{vector of genes}
411 | }
412 | \if{html}{\out{</div>}}
413 | }
414 | }
415 | \if{html}{\out{<hr>}}
416 | \if{html}{\out{<a id="method-LinseedObject-tsnePlot"></a>}}
417 | \if{latex}{\out{\hypertarget{method-LinseedObject-tsnePlot}{}}}
418 | \subsection{Method \code{tsnePlot()}}{
419 | Build t-SNE dimensionality reduction plot
420 | \subsection{Usage}{
421 | \if{html}{\out{<div class="r">}}\preformatted{LinseedObject$tsnePlot(dataset = "filtered", error = "norm")}\if{html}{\out{</div>}}
422 | }
423 | 
424 | \subsection{Arguments}{
425 | \if{html}{\out{<div class="arguments">}}
426 | \describe{
427 | \item{\code{dataset}}{dataset identifier (filtered, full)}
428 | 
429 | \item{\code{error}}{matrix identifier within the dataset}
430 | }
431 | \if{html}{\out{</div>}}
432 | }
433 | }
434 | \if{html}{\out{<hr>}}
435 | \if{html}{\out{<a id="method-LinseedObject-significancePlot"></a>}}
436 | \if{latex}{\out{\hypertarget{method-LinseedObject-significancePlot}{}}}
437 | \subsection{Method \code{significancePlot()}}{
438 | Visualize significance plot for genes, based on significance test. Color by pValue
439 | \subsection{Usage}{
440 | \if{html}{\out{<div class="r">}}\preformatted{LinseedObject$significancePlot(threshold = 0.001)}\if{html}{\out{</div>}}
441 | }
442 | 
443 | \subsection{Arguments}{
444 | \if{html}{\out{<div class="arguments">}}
445 | \describe{
446 | \item{\code{threshold}}{pValue threshold}
447 | }
448 | \if{html}{\out{</div>}}
449 | }
450 | }
451 | \if{html}{\out{<hr>}}
452 | \if{html}{\out{<a id="method-LinseedObject-clone"></a>}}
453 | \if{latex}{\out{\hypertarget{method-LinseedObject-clone}{}}}
454 | \subsection{Method \code{clone()}}{
455 | The objects of this class are cloneable with this method.
456 | \subsection{Usage}{
457 | \if{html}{\out{<div class="r">}}\preformatted{LinseedObject$clone(deep = FALSE)}\if{html}{\out{</div>}}
458 | }
459 | 
460 | \subsection{Arguments}{
461 | \if{html}{\out{<div class="arguments">}}
462 | \describe{
463 | \item{\code{deep}}{Whether to make a deep clone.}
464 | }
465 | \if{html}{\out{</div>}}
466 | }
467 | }
468 | }
469 | 


--------------------------------------------------------------------------------
/man/collapseGenes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/geneCollapse.R
 3 | \name{collapseGenes}
 4 | \alias{collapseGenes}
 5 | \title{Collapse Genes}
 6 | \usage{
 7 | collapseGenes(ge, probes)
 8 | }
 9 | \arguments{
10 | \item{ge}{matrix, rows are probes, columns are samples}
11 | 
12 | \item{probes}{df, matrix or named vector}
13 | }
14 | \value{
15 | collapsed gene expression matrix with probes replaced with corresponding genes
16 | }
17 | \description{
18 | Collapses given dataset so every gene is presented by the highest expressed probe
19 | }
20 | 


--------------------------------------------------------------------------------
/man/datasetLiverBrainLung.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/datasetLiverBrainLung.R
 3 | \docType{data}
 4 | \name{datasetLiverBrainLung}
 5 | \alias{datasetLiverBrainLung}
 6 | \title{GSE19830 dataset}
 7 | \format{
 8 | An object of class \code{'data.frame'}
 9 | }
10 | \source{
11 | \href{http://qtlarchive.org/db/q?pg=projdetails&proj=moore_2013b}{QTL Archive}
12 | }
13 | \usage{
14 | data(datasetLiverBrainLung)
15 | }
16 | \description{
17 | Gene Expression data from GSE19830 experiment:
18 | 3 tissues (liver, brain and lung) were mixed in different proportions:
19 | }
20 | \details{
21 | GSM495209-GSM495211 pure lung samples
22 | GSM495212-GSM495214 pure brain samples
23 | GSM495215-GSM495217 pure liver samples
24 | GSM495218-GSM495220 5 % Liver / 25 % Brain / 70 % Lung
25 | GSM495221-GSM495223 70 % Liver / 5 % Brain / 25 % Lung
26 | GSM495224-GSM495226 25 % Liver / 70 % Brain / 5 % Lung
27 | GSM495227-GSM495229 70 % Liver / 25 % Brain / 5 % Lung
28 | GSM495230-GSM495232 45 % Liver / 45 % Brain / 10 % Lung
29 | GSM495233-GSM495235 55 % Liver / 20 % Brain / 25 % Lung
30 | GSM495236-GSM495238 50 % Liver / 30 % Brain / 20 % Lung
31 | GSM495239-GSM495241 55 % Liver / 30 % Brain / 15 % Lung
32 | GSM495242-GSM495244 50 % Liver / 40 % Brain / 10 % Lung
33 | GSM495245-GSM495247 60 % Liver / 35 % Brain / 5 % Lung
34 | GSM495248-GSM495250 65 % Liver / 34 % Brain / 1 % Lung
35 | }
36 | \examples{
37 | data("datasetLiverBrainLung")
38 | mixedSamples <- datasetLiverBrainLung[, 10:42]
39 | clustered <- preprocessDataset(mixedSamples)
40 | }
41 | \references{
42 | Shen-Orr SS, Tibshirani R, Khatri P, et al. cell type-specific gene expression differences in complex tissues. Nature methods. 2010;7(4):287-289. doi:10.1038/nmeth.1439.
43 | (\href{http://www.ncbi.nlm.nih.gov/pubmed/20208531}{PubMed})
44 | }
45 | \keyword{datasets}
46 | 


--------------------------------------------------------------------------------
/man/dotPlotPropotions.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plotProportions.R
 3 | \name{dotPlotPropotions}
 4 | \alias{dotPlotPropotions}
 5 | \title{Proportions dot plot}
 6 | \usage{
 7 | dotPlotPropotions(
 8 |   predicted,
 9 |   actual,
10 |   guess = FALSE,
11 |   main = NULL,
12 |   showR2 = FALSE
13 | )
14 | }
15 | \arguments{
16 | \item{predicted}{matrix of predicted proportions}
17 | 
18 | \item{actual}{matrix of actual proportions}
19 | 
20 | \item{guess}{if True will function will try to guess how to reorder rows of predicted proportions to match rows of actual proportions}
21 | 
22 | \item{main}{plot title}
23 | 
24 | \item{showR2}{calculate and show R squared statistics}
25 | }
26 | \value{
27 | ggplot dot plot. X axis is true proportion, Y axix is predicted proportion
28 | }
29 | \description{
30 | Proportions dot plot
31 | }
32 | 


--------------------------------------------------------------------------------
/man/estimateAdditiveNoise.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/estimateNoise.R
 3 | \name{estimateAdditiveNoise}
 4 | \alias{estimateAdditiveNoise}
 5 | \title{Estimate additive noise}
 6 | \usage{
 7 | estimateAdditiveNoise(Y, verbose)
 8 | }
 9 | \arguments{
10 | \item{Y}{normalized gene expression matrix}
11 | 
12 | \item{verbose}{verbosity}
13 | }
14 | \value{
15 | list with two elements, w -- estimated noise and Rw estimated noise correlation matrix
16 | }
17 | \description{
18 | Additive noise estimation subroutine
19 | }
20 | \examples{
21 | n_genes <- 50
22 | n_samples <-  40
23 | n_values <-  n_genes * n_samples
24 | expression_matrix <- matrix(abs(rexp(n_genes*4, rate=2)), ncol=4) \%*\%
25 |                      matrix(abs(runif(n_samples * 4, 0, 1)), nrow =4)
26 | noise_matrix <- matrix(rnorm(n_genes*n_samples, 0, 0.02), nrow = n_genes)
27 | noisy_matrix <-  expression_matrix + noise_matrix
28 | noise_estimation_result <-  estimateAdditiveNoise(noisy_matrix, verbose = FALSE)
29 | }
30 | 


--------------------------------------------------------------------------------
/man/estimateNoise.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/estimateNoise.R
 3 | \name{estimateNoise}
 4 | \alias{estimateNoise}
 5 | \title{Noise estimation}
 6 | \usage{
 7 | estimateNoise(Y, noiseType = "additive", verbose = FALSE)
 8 | }
 9 | \arguments{
10 | \item{Y}{normalized gene expression data matrix, columns are genes and rows are samples}
11 | 
12 | \item{noiseType}{character, describing noise type. Two possible values are "additive" and "possion"}
13 | 
14 | \item{verbose}{logical, default value is FALSE}
15 | }
16 | \value{
17 | list with two elements, w -- estimated noise and Rw estimated noise correlation matrix
18 | }
19 | \description{
20 | Estimates noise using multiple regression approach. Implements method described in
21 | J. M. Bioucas-Dias and J. M. P. Nascimento, "Hyperspectral Subspace Identification," in IEEE Transactions on Geoscience and Remote Sensing, vol. 46, no. 8, pp. 2435-2445, Aug. 2008.
22 | }
23 | \details{
24 | Based on MATLAB original code from http://www.lx.it.pt/~bioucas/code.htm
25 | }
26 | \examples{
27 | n_genes <- 50
28 | n_samples <-  40
29 | n_values <-  n_genes * n_samples
30 | expression_matrix <- matrix(abs(rexp(n_genes*4, rate=2)), ncol=4) \%*\%
31 |                      matrix(abs(runif(n_samples * 4, 0, 1)), nrow =4)
32 | noise_matrix <- matrix(rnorm(n_genes*n_samples, 0, 0.02), nrow = n_genes)
33 | noisy_matrix <-  expression_matrix + noise_matrix
34 | noise_estimation_result <-  estimateNoise(noisy_matrix)
35 | }
36 | 


--------------------------------------------------------------------------------
/man/fastDSA.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dsa.R
 3 | \name{fastDSA}
 4 | \alias{fastDSA}
 5 | \title{Fast DSA algorithm implementation}
 6 | \usage{
 7 | fastDSA(dataset, genes)
 8 | }
 9 | \arguments{
10 | \item{dataset}{gene expression matrix}
11 | 
12 | \item{genes}{list with putative signatures for DSA algorithm}
13 | }
14 | \value{
15 | deconvolution results, list with H and W matrices
16 | }
17 | \description{
18 | Runs DSA implementation using .fccnls for solving least-squares with multiple right-hand-sides
19 | }
20 | 


--------------------------------------------------------------------------------
/man/generateMixedData.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/generateData.R
 3 | \name{generateMixedData}
 4 | \alias{generateMixedData}
 5 | \title{Generate mixed data with or without noise}
 6 | \usage{
 7 | generateMixedData(
 8 |   samples = 40,
 9 |   genes = 12000,
10 |   cellTypes = 3,
11 |   bias = NULL,
12 |   spearmanThreshold = 0.5,
13 |   pureGenes = 0,
14 |   noiseDeviation = 0,
15 |   sampleLogMean = 4,
16 |   sampleLogSd = 3,
17 |   removeAngles = F,
18 |   cutCoef = 0.85,
19 |   removeBorders = F,
20 |   borderShift = 0.25
21 | )
22 | }
23 | \arguments{
24 | \item{samples}{number of samples}
25 | 
26 | \item{genes}{number of genes}
27 | 
28 | \item{cellTypes}{number of cell types}
29 | 
30 | \item{bias}{if not null bias should be a number in between 0 and 1, one of cell types presented in mix will be more abundunt and one cell type will be less abundunt}
31 | 
32 | \item{spearmanThreshold}{Spearman treshold that has to be between generated samples}
33 | 
34 | \item{pureGenes}{number of genes which are not noisy}
35 | 
36 | \item{noiseDeviation}{standart deviation of normally distributed noise value}
37 | 
38 | \item{sampleLogMean}{average log expression of pure samples}
39 | 
40 | \item{sampleLogSd}{standard deviation of log expression of pure samples}
41 | 
42 | \item{removeAngles}{if TRUE there will be no signature genes in the mix (simplex corners will be removed)}
43 | 
44 | \item{cutCoef}{if removeAngles == T how much to cut angles}
45 | 
46 | \item{removeBorders}{if TRUE where will be no genes close to simplex border}
47 | 
48 | \item{borderShift}{number in between 0 and 1, every value in basis is guaranteed to be at least borderShift}
49 | }
50 | \value{
51 | list(data, proportions, basis,correlations_within_basis)
52 | }
53 | \description{
54 | Generates mixed data with or without noise under assumption of linear model
55 | }
56 | 


--------------------------------------------------------------------------------
/man/getProjectiveProjection.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/projectiveProjection.R
 3 | \name{getProjectiveProjection}
 4 | \alias{getProjectiveProjection}
 5 | \title{Get projective projection}
 6 | \usage{
 7 | getProjectiveProjection(Y, p, spherize = F)
 8 | }
 9 | \arguments{
10 | \item{Y}{High dimensional data to project}
11 | 
12 | \item{p}{Dimensionality to project to}
13 | 
14 | \item{spherize}{Spherize dataset or not}
15 | }
16 | \value{
17 | projection
18 | }
19 | \description{
20 | Get projective projection
21 | }
22 | 


--------------------------------------------------------------------------------
/man/guessOrder.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plotProportions.R
 3 | \name{guessOrder}
 4 | \alias{guessOrder}
 5 | \title{guess the order}
 6 | \usage{
 7 | guessOrder(predicted, actual)
 8 | }
 9 | \arguments{
10 | \item{predicted}{predicted propotions}
11 | 
12 | \item{actual}{actual proportions}
13 | }
14 | \value{
15 | numeric, correct order of predicted proportions
16 | }
17 | \description{
18 | Function tries to guess ordering for rows of predicted proportions to match rows of actual proportions
19 | }
20 | 


--------------------------------------------------------------------------------
/man/hinge.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hinge.R
 3 | \name{hinge}
 4 | \alias{hinge}
 5 | \title{Hinge function}
 6 | \usage{
 7 | hinge(Y)
 8 | }
 9 | \arguments{
10 | \item{Y}{matrix describing genes (possibly in reduced space)}
11 | }
12 | \value{
13 | matrix -Y with negative values replaced with zeroes
14 | }
15 | \description{
16 | Hinge function
17 | }
18 | 


--------------------------------------------------------------------------------
/man/hysime.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hysime.R
 3 | \name{hysime}
 4 | \alias{hysime}
 5 | \title{HySime}
 6 | \usage{
 7 | hysime(Y, W, Rn, verbose = FALSE)
 8 | }
 9 | \arguments{
10 | \item{Y}{gene expression matrix, columns are genes, rows are samples}
11 | 
12 | \item{W}{estimated noise matrix}
13 | 
14 | \item{Rn}{estimated noise correlation matrix}
15 | 
16 | \item{verbose}{verbosity, default valie is FALSE}
17 | }
18 | \value{
19 | list
20 | }
21 | \description{
22 | Evalutes number of cell types presented in mixture using (HySime) hyperspectral signal identification by minimum error
23 | }
24 | \details{
25 | Original paper is J. M. Bioucas-Dias and J. M. P. Nascimento, "Hyperspectral Subspace Identification," in IEEE Transactions on Geoscience and Remote Sensing, vol. 46, no. 8, pp. 2435-2445, Aug. 2008.
26 | 
27 | Based on MATLAB original code from http://www.lx.it.pt/~bioucas/code.htm
28 | }
29 | 


--------------------------------------------------------------------------------
/man/is_logscale.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{is_logscale}
 4 | \alias{is_logscale}
 5 | \title{is_logscale}
 6 | \usage{
 7 | is_logscale(x)
 8 | }
 9 | \arguments{
10 | \item{x}{gene expression matrix}
11 | }
12 | \value{
13 | logical, whether x is in the log scale
14 | }
15 | \description{
16 | is_logscale
17 | }
18 | 


--------------------------------------------------------------------------------
/man/linearizeDataset.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{linearizeDataset}
 4 | \alias{linearizeDataset}
 5 | \title{linearizeDataset}
 6 | \usage{
 7 | linearizeDataset(ge)
 8 | }
 9 | \arguments{
10 | \item{ge}{gene expression matrix}
11 | }
12 | \value{
13 | gene expression matrix in linear scale
14 | }
15 | \description{
16 | linearizeDataset
17 | }
18 | 


--------------------------------------------------------------------------------
/man/logDataset.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{logDataset}
 4 | \alias{logDataset}
 5 | \title{logDataset}
 6 | \usage{
 7 | logDataset(ge)
 8 | }
 9 | \arguments{
10 | \item{ge}{gene expression matrix}
11 | }
12 | \value{
13 | gene expression matrix in log scale
14 | }
15 | \description{
16 | logDataset
17 | }
18 | 


--------------------------------------------------------------------------------
/man/plotProportions.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plotProportions.R
 3 | \name{plotProportions}
 4 | \alias{plotProportions}
 5 | \title{Draw a plot of estimated proportions}
 6 | \usage{
 7 | plotProportions(..., pnames = NULL, point_size = 2, line_size = 1)
 8 | }
 9 | \arguments{
10 | \item{...}{matricies, data frames, NMF objects of estimated proportions or paths to file}
11 | 
12 | \item{pnames}{experiment titles}
13 | 
14 | \item{point_size}{point size for plot}
15 | 
16 | \item{line_size}{line size for plot}
17 | }
18 | \value{
19 | ggplot object
20 | }
21 | \description{
22 | Draws a plot of estimated proprotions
23 | If ggplot2 and reshape2 are installed will use them and return ggplot object
24 | Otherwise will use standart R functions
25 | }
26 | 


--------------------------------------------------------------------------------
/man/preprocessDataset.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/preprocess.R
 3 | \name{preprocessDataset}
 4 | \alias{preprocessDataset}
 5 | \title{Preprocess Dataset}
 6 | \usage{
 7 | preprocessDataset(
 8 |   dataset,
 9 |   annotation = NULL,
10 |   geneSymbol = "Gene symbol",
11 |   samples = NULL,
12 |   topGenes = 10000
13 | )
14 | }
15 | \arguments{
16 | \item{dataset}{matrix, data.frame, path to file or GSE accession with expression data}
17 | 
18 | \item{annotation}{dataframe, matrix, named vector with annotation to probes}
19 | 
20 | \item{geneSymbol}{column from annotation to collapse the genes, deafult value is 'Gene Symbol'}
21 | 
22 | \item{samples}{character vector of samples. If column were not in samples, it would be excluded from analysis.
23 | Default value is NULL, which takes every sample from dataset}
24 | 
25 | \item{topGenes}{integer How many genes include in analysis. We suppose to include only expressed genes. Default value is 10000}
26 | }
27 | \value{
28 | clustered dataset, matrix, first column identifies cluster of the row
29 | }
30 | \description{
31 | Preprocesses given dataset. Preprocessing consists of 3 major steps:
32 | 1) If needed, probes corresponding to the same genes are collapsed, only most expressed probe is taken for further analysis.
33 |    It's common technique in microarray data analysis.
34 | 2) If needed, only highly expressed genes are taken for further analysis. (Say hello to noize reduction)
35 | 3) All genes are clustered with Kmeans using cosine simillarity as distance.
36 | }
37 | 


--------------------------------------------------------------------------------
/man/preprocessGSE.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/preprocess.R
 3 | \name{preprocessGSE}
 4 | \alias{preprocessGSE}
 5 | \title{Preprocess GSE Dataset}
 6 | \usage{
 7 | preprocessGSE(geoAccesion, annotate = TRUE, normalize = TRUE, ...)
 8 | }
 9 | \arguments{
10 | \item{geoAccesion}{e.g 'GSE19830'}
11 | 
12 | \item{annotate}{annotate with feature data from provided geo platform}
13 | 
14 | \item{normalize}{quantile normalize GEO dataset}
15 | 
16 | \item{...}{arguments further passed to preprocessDataset}
17 | }
18 | \value{
19 | clustered dataset, matrix, first column identifies cluster of the row
20 | }
21 | \description{
22 | Downloads GSE dataset by GEO accession and performs preprocessing
23 | }
24 | 


--------------------------------------------------------------------------------
/man/projectiveProjection.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/projectiveProjection.R
 3 | \name{projectiveProjection}
 4 | \alias{projectiveProjection}
 5 | \title{Projective projection}
 6 | \usage{
 7 | projectiveProjection(Y, p, spherize = F)
 8 | }
 9 | \arguments{
10 | \item{Y}{High dimensional data to project}
11 | 
12 | \item{p}{Dimensionality to project to}
13 | 
14 | \item{spherize}{Spherize dataset or not}
15 | }
16 | \value{
17 | matrix with coordinates of projected points.
18 | }
19 | \description{
20 | Projective projection
21 | }
22 | 


--------------------------------------------------------------------------------
/man/proportionsLiverBrainLung.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/proportionsLiverBrainLung.R
 3 | \docType{data}
 4 | \name{proportionsLiverBrainLung}
 5 | \alias{proportionsLiverBrainLung}
 6 | \title{GSE19830 proportions}
 7 | \format{
 8 | An object of class \code{'matrix'}
 9 | }
10 | \source{
11 | \href{http://qtlarchive.org/db/q?pg=projdetails&proj=moore_2013b}{QTL Archive}
12 | }
13 | \usage{
14 | data(proportionsLiverBrainLung)
15 | }
16 | \description{
17 | tissue proportions from GSE19830 experiment:
18 | 3 tissues (liver, brain and lung) were mixed in different proportions:
19 | }
20 | \details{
21 | GSM495209-GSM495211 pure liver samples
22 | GSM495212-GSM495214 pure brain samples
23 | GSM495215-GSM495217 pure lung samples
24 | GSM495218-GSM495220 5 % Liver / 25 % Brain / 70 % Lung
25 | GSM495221-GSM495223 70 % Liver / 5 % Brain / 25 % Lung
26 | GSM495224-GSM495226 25 % Liver / 70 % Brain / 5 % Lung
27 | GSM495227-GSM495229 70 % Liver / 25 % Brain / 5 % Lung
28 | GSM495230-GSM495232 45 % Liver / 45 % Brain / 10 % Lung
29 | GSM495233-GSM495235 55 % Liver / 20 % Brain / 25 % Lung
30 | GSM495236-GSM495238 50 % Liver / 30 % Brain / 20 % Lung
31 | GSM495239-GSM495241 55 % Liver / 30 % Brain / 15 % Lung
32 | GSM495242-GSM495244 50 % Liver / 40 % Brain / 10 % Lung
33 | GSM495245-GSM495247 60 % Liver / 35 % Brain / 5 % Lung
34 | GSM495248-GSM495250 65 % Liver / 34 % Brain / 1 % Lung
35 | }
36 | \examples{
37 | data("proportionsLiverBrainLung")
38 | mixedProportions <- proportionsLiverBrainLung[, 10:42]
39 | barplot(mixedProportions,
40 |         main='Proprotions of tissues in samples',
41 |         col=c('#00BA38','#F8766D', '#619CFF'),
42 |         legend = rownames(mixedProportions))
43 | }
44 | \references{
45 | Shen-Orr SS, Tibshirani R, Khatri P, et al. cell type-specific gene expression differences in complex tissues. Nature methods. 2010;7(4):287-289. doi:10.1038/nmeth.1439.
46 | (\href{http://www.ncbi.nlm.nih.gov/pubmed/20208531}{PubMed})
47 | }
48 | \keyword{datasets}
49 | 


--------------------------------------------------------------------------------
/man/pureDsa.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dsa.R
 3 | \name{pureDsa}
 4 | \alias{pureDsa}
 5 | \title{DSA algorithm implementation for pure points}
 6 | \usage{
 7 | pureDsa(dataset, pure)
 8 | }
 9 | \arguments{
10 | \item{dataset}{gene expression matrix}
11 | 
12 | \item{pure}{matrix contains expression of signature genes}
13 | }
14 | \value{
15 | deconvolution results, list with H and W matrices
16 | }
17 | \description{
18 | Runs DSA implementation using fcnnls_c for solving least-squares with multiple right-hand-sides
19 | }
20 | 


--------------------------------------------------------------------------------
/man/runDSA.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dsa.R
 3 | \name{runDSA}
 4 | \alias{runDSA}
 5 | \title{Run DSA by clusters}
 6 | \usage{
 7 | runDSA(dataset, clustering, clusters)
 8 | }
 9 | \arguments{
10 | \item{dataset}{gene expression matrix}
11 | 
12 | \item{clustering}{numeric vector, clustering of the rows}
13 | 
14 | \item{clusters}{numeric vector, which clusters use as putative signatures}
15 | }
16 | \value{
17 | deconvolution results, list with H and W matrices
18 | }
19 | \description{
20 | Runs DSA with provided clusters as putative signatures
21 | }
22 | 


--------------------------------------------------------------------------------
/man/sampleFromSimplexUniformly.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/generateData.R
 3 | \name{sampleFromSimplexUniformly}
 4 | \alias{sampleFromSimplexUniformly}
 5 | \title{Generation of points uniformly distributed on k-dimensional standard simplex}
 6 | \usage{
 7 | sampleFromSimplexUniformly(n, k = 3, M = 1e+05)
 8 | }
 9 | \arguments{
10 | \item{n}{number of poitns}
11 | 
12 | \item{k}{dimensionality}
13 | 
14 | \item{M}{grid size}
15 | }
16 | \value{
17 | matrix where columns are points
18 | }
19 | \description{
20 | Generation of points uniformly distributed on k-dimensional standard simplex
21 | }
22 | 


--------------------------------------------------------------------------------
/man/sisal.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sisal.R
 3 | \name{sisal}
 4 | \alias{sisal}
 5 | \title{SISAL algorithm}
 6 | \usage{
 7 | sisal(
 8 |   Y,
 9 |   p,
10 |   iters = 80,
11 |   tau = 1,
12 |   mu = p * 1000/ncol(Y),
13 |   spherize = F,
14 |   tol = 0.01,
15 |   m0 = NULL,
16 |   verbose = F,
17 |   returnPlot = T,
18 |   nonNeg = F
19 | )
20 | }
21 | \arguments{
22 | \item{Y}{gene expression matrix}
23 | 
24 | \item{p}{number of endpoints}
25 | 
26 | \item{iters}{number of iterations to perform}
27 | 
28 | \item{tau}{noise points penalty coefficient}
29 | 
30 | \item{mu}{regularization}
31 | 
32 | \item{spherize}{spherize or not}
33 | 
34 | \item{tol}{numeric tolerance}
35 | 
36 | \item{m0}{starting points for SISAL algorithm, default points are getting from VCA}
37 | 
38 | \item{verbose}{verbosity}
39 | 
40 | \item{returnPlot}{logical, is it needed to return dataframe or not}
41 | 
42 | \item{nonNeg}{logical, force simplex corners to non-negative space}
43 | }
44 | \value{
45 | list containing the results of the sisal algorithm including original endpoints and their projection.
46 | }
47 | \description{
48 | Sisal alogorithm for simplex endpoints identification
49 | }
50 | \details{
51 | Implementation of method
52 | }
53 | 


--------------------------------------------------------------------------------
/man/softNeg.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/softNeg.R
 3 | \name{softNeg}
 4 | \alias{softNeg}
 5 | \title{Soft negative score}
 6 | \usage{
 7 | softNeg(Y, tau)
 8 | }
 9 | \arguments{
10 | \item{Y}{matrix}
11 | 
12 | \item{tau}{coefficient to penalize for volume}
13 | }
14 | \value{
15 | score value
16 | }
17 | \description{
18 | Soft negative score
19 | }
20 | 


--------------------------------------------------------------------------------
/man/vca.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/vca.R
 3 | \name{vca}
 4 | \alias{vca}
 5 | \title{Title}
 6 | \usage{
 7 | vca(R, p, SNR = NULL, verbose = F)
 8 | }
 9 | \arguments{
10 | \item{R}{matrix describing points (possibly lying in a simplex) in high dimensional space}
11 | 
12 | \item{p}{number endpoints to find}
13 | 
14 | \item{SNR}{signal to noise ratio, NULL by default}
15 | 
16 | \item{verbose}{verbosity, deafult value is FALSE}
17 | }
18 | \value{
19 | matrix of columns from R which are considered to be endpoints
20 | }
21 | \description{
22 | Title
23 | }
24 | 


--------------------------------------------------------------------------------
/src/Makevars:
--------------------------------------------------------------------------------
1 | PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)
2 | 


--------------------------------------------------------------------------------
/src/Makevars.win:
--------------------------------------------------------------------------------
1 | PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)
2 | 


--------------------------------------------------------------------------------
/src/RcppExports.cpp:
--------------------------------------------------------------------------------
 1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand
 2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 3 | 
 4 | #include <RcppArmadillo.h>
 5 | #include <Rcpp.h>
 6 | 
 7 | using namespace Rcpp;
 8 | 
 9 | #ifdef RCPP_USE_GLOBAL_ROSTREAM
10 | Rcpp::Rostream<true>&  Rcpp::Rcout = Rcpp::Rcpp_cout_get();
11 | Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
12 | #endif
13 | 
14 | // fcnnls_c
15 | arma::mat fcnnls_c(const arma::mat& C, const arma::mat& A);
16 | RcppExport SEXP _linseed_fcnnls_c(SEXP CSEXP, SEXP ASEXP) {
17 | BEGIN_RCPP
18 |     Rcpp::RObject rcpp_result_gen;
19 |     Rcpp::RNGScope rcpp_rngScope_gen;
20 |     Rcpp::traits::input_parameter< const arma::mat& >::type C(CSEXP);
21 |     Rcpp::traits::input_parameter< const arma::mat& >::type A(ASEXP);
22 |     rcpp_result_gen = Rcpp::wrap(fcnnls_c(C, A));
23 |     return rcpp_result_gen;
24 | END_RCPP
25 | }
26 | // fcnnls_sum_to_one
27 | arma::mat fcnnls_sum_to_one(const arma::mat& C, const arma::mat& A, double delta);
28 | RcppExport SEXP _linseed_fcnnls_sum_to_one(SEXP CSEXP, SEXP ASEXP, SEXP deltaSEXP) {
29 | BEGIN_RCPP
30 |     Rcpp::RObject rcpp_result_gen;
31 |     Rcpp::RNGScope rcpp_rngScope_gen;
32 |     Rcpp::traits::input_parameter< const arma::mat& >::type C(CSEXP);
33 |     Rcpp::traits::input_parameter< const arma::mat& >::type A(ASEXP);
34 |     Rcpp::traits::input_parameter< double >::type delta(deltaSEXP);
35 |     rcpp_result_gen = Rcpp::wrap(fcnnls_sum_to_one(C, A, delta));
36 |     return rcpp_result_gen;
37 | END_RCPP
38 | }
39 | // pairwiseR2
40 | arma::mat pairwiseR2(const arma::mat& X);
41 | RcppExport SEXP _linseed_pairwiseR2(SEXP XSEXP) {
42 | BEGIN_RCPP
43 |     Rcpp::RObject rcpp_result_gen;
44 |     Rcpp::RNGScope rcpp_rngScope_gen;
45 |     Rcpp::traits::input_parameter< const arma::mat& >::type X(XSEXP);
46 |     rcpp_result_gen = Rcpp::wrap(pairwiseR2(X));
47 |     return rcpp_result_gen;
48 | END_RCPP
49 | }
50 | 
51 | static const R_CallMethodDef CallEntries[] = {
52 |     {"_linseed_fcnnls_c", (DL_FUNC) &_linseed_fcnnls_c, 2},
53 |     {"_linseed_fcnnls_sum_to_one", (DL_FUNC) &_linseed_fcnnls_sum_to_one, 3},
54 |     {"_linseed_pairwiseR2", (DL_FUNC) &_linseed_pairwiseR2, 1},
55 |     {NULL, NULL, 0}
56 | };
57 | 
58 | RcppExport void R_init_linseed(DllInfo *dll) {
59 |     R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
60 |     R_useDynamicSymbols(dll, FALSE);
61 | }
62 | 


--------------------------------------------------------------------------------
/src/fcnnls.cpp:
--------------------------------------------------------------------------------
  1 | #include "fcnnls.h"
  2 | 
  3 | arma::mat cssls(const arma::mat& CtC,
  4 |                 const arma::mat& CtA,
  5 |                 bool pseudo) {
  6 |     mat K(CtC.n_rows, CtC.n_cols);
  7 |     K.fill(0);
  8 |     if (pseudo) {
  9 |         K = pinv(CtC) * CtA;
 10 |     } else {
 11 |         K = solve(CtC, diagmat(ones(CtC.n_rows))) * CtA;
 12 |     }
 13 |     return K;
 14 | }
 15 | 
 16 | arma::mat cssls(const arma::mat& CtC,
 17 |                 const arma::mat& CtA,
 18 |                 const arma::umat& Pset,
 19 |                 bool pseudo) {
 20 |     mat K(CtA.n_rows, CtA.n_cols);
 21 |     K.fill(0);
 22 | 
 23 |     int l = Pset.n_rows;
 24 |     int RHSc = Pset.n_cols;
 25 |     rowvec lvec(l);
 26 |     for (int i = 1; i <= l; i++) {
 27 |         lvec[i - 1] = 1 << (l - i);
 28 |     }
 29 | 
 30 |     mat codedPset = lvec * Pset;
 31 |     vec sortedPset = arma::sort(codedPset.row(0).t());
 32 |     uvec sortedEset = sort_index(codedPset.row(0).t());
 33 |     vec breaks = diff(sortedPset);
 34 |     uvec bbreaks = find(breaks) + 1;
 35 |     uvec breakIdx(bbreaks.n_elem + 2);
 36 |     breakIdx[0] = 0;
 37 |     if (bbreaks.n_elem > 0) {
 38 |         breakIdx.subvec(1, bbreaks.n_elem) = bbreaks;
 39 |     }
 40 | 
 41 |     breakIdx[bbreaks.n_elem + 1] = RHSc;
 42 | 
 43 |     for (unsigned int k = 0; k < breakIdx.n_elem - 1; k++) {
 44 |         uvec cols2solve = sortedEset.subvec(breakIdx[k], breakIdx[k + 1] - 1);
 45 |         uvec vars = find(Pset.col(sortedEset[breakIdx[k]]));
 46 |         if (vars.n_elem == 0) break;
 47 |         //K.submat(vars, cols2solve) = solve(CtC.submat(vars, vars), diagmat(ones(vars.n_elem))) * CtA.submat(vars, cols2solve);
 48 |         K.submat(vars, cols2solve) = pinv(CtC.submat(vars, vars)) * CtA.submat(vars, cols2solve);
 49 |     }
 50 |     return K;
 51 | }
 52 | 
 53 | arma::mat fcnnls_c(const arma::mat& C,
 54 |                    const arma::mat& A) {
 55 |     int l_var = C.n_cols;
 56 |     if (C.n_rows != A.n_rows) {
 57 |         throw Rcpp::exception("Wrong argument sizes", "fcnnls.cpp", 4);
 58 |     }
 59 |     int pRHS = A.n_cols;
 60 |     mat W = zeros(l_var, pRHS);
 61 | 
 62 |     int iter = 0;
 63 |     int max_iter = 3 * l_var;
 64 | 
 65 |     mat CtC = C.t() * C;
 66 |     mat CtA = C.t() * A;
 67 |     mat K = cssls(CtC, CtA, false);
 68 |     umat Pset = K > zeros(K.n_rows, K.n_cols);
 69 |     K.elem(find(K <= zeros(K.n_rows, K.n_cols))).zeros();
 70 |     mat D = K;
 71 |     uvec Fset = find(any(Pset < ones(Pset.n_rows, Pset.n_cols)));
 72 |     while (Fset.n_elem > 0) {
 73 | 
 74 |         K.cols(Fset) = cssls(CtC, CtA.cols(Fset), Pset.cols(Fset), false);
 75 |         uvec Hset = Fset.elem( find(any(K.cols(Fset) < zeros(K.n_rows, Fset.n_elem))) );
 76 | 
 77 |         if (Hset.n_elem > 0) {
 78 |             int nHset = Hset.n_elem;
 79 |             mat alpha(l_var, nHset, fill::zeros);
 80 |             while (Hset.n_elem > 0 && iter < max_iter) {
 81 |                 iter++;
 82 |                 alpha.cols(0, nHset - 1).fill(datum::inf);
 83 |                 mat twos(Pset.n_rows, Hset.n_elem);
 84 |                 twos.fill(2);
 85 |                 uvec ij = find(Pset.cols(Hset) + (K.cols(Hset) < 0) == twos);
 86 |                 uvec is(ij.n_elem), js(ij.n_elem);
 87 |                 for (unsigned int i = 0; i < ij.n_elem; i++) {
 88 |                     is[i] = ij[i] % Pset.n_rows;
 89 |                     js[i] = ij[i] / Pset.n_rows;
 90 |                 }
 91 | 
 92 |                 uvec hIdx = ij;
 93 |                 uvec negIdx = Hset.elem(js) * l_var + is;
 94 | 
 95 |                 alpha.elem(hIdx) = D.elem(negIdx) / (D.elem(negIdx) - K.elem(negIdx));
 96 | 
 97 |                 urowvec minIdx = index_min(alpha.cols(0, nHset - 1));
 98 |                 rowvec alphaMin = arma::min(alpha.cols(0, nHset - 1));
 99 |                 for (int i = 0; i < l_var; i ++) {
100 |                     alpha.cols(0, nHset-1).row(i) = alphaMin;
101 |                 }
102 | 
103 | 
104 |                 D.cols(Hset) = D.cols(Hset) - alpha.cols(0, nHset - 1) % (D.cols(Hset) - K.cols(Hset));
105 |                 uvec idx2zero = Hset * l_var + minIdx.t();
106 |                 D.elem(idx2zero).zeros();
107 |                 Pset.elem(idx2zero).zeros();
108 |                 K.cols(Hset) = cssls(CtC, CtA.cols(Hset), Pset.cols(Hset), false);
109 |                 Hset = find(any(K < zeros(K.n_rows, K.n_cols)));
110 |                 nHset = Hset.n_elem;
111 |             }
112 |         }
113 | 
114 |         // if (iter == max_iter) {
115 | 
116 |         //     return K;
117 |         // }
118 |         W.cols(Fset) = CtA.cols(Fset) - CtC * K.cols(Fset);
119 |         mat tmp = (ones(l_var, Fset.n_elem) - Pset.cols(Fset)) % W.cols(Fset);
120 |         uvec Jset = find(all(tmp <= zeros(l_var, Fset.n_elem)));
121 |         uvec FsetDiff(Fset.n_elem - Jset.n_elem);
122 |         uvec toDiff = Fset.elem(Jset);
123 |         std::set_difference(Fset.begin(), Fset.end(),
124 |                             toDiff.begin(), toDiff.end(),
125 |                             FsetDiff.begin());
126 | 
127 |         Fset = FsetDiff;
128 |         if (Fset.n_elem > 0) {
129 |             tmp = (ones(l_var, Fset.n_elem) - Pset.cols(Fset)) % W.cols(Fset);
130 |             urowvec mxidx = index_max(tmp);
131 |             Pset.elem(Fset * l_var + mxidx.t()).ones();
132 |             // Pset = K >= zeros(K.n_rows, K.n_cols);
133 |             D.cols(Fset) = K.cols(Fset);
134 |         }
135 | 
136 |     }
137 |     return K;
138 | 
139 | }
140 | 
141 | 
142 | arma::mat fcnnls_sum_to_one(const arma::mat& C,
143 |                             const arma::mat& A,
144 |                             double delta = 1) {
145 |     mat C_copy(C.n_rows + 1, C.n_cols);
146 |     mat A_copy(A.n_rows + 1, A.n_cols);
147 |     C_copy.rows(0, C.n_rows - 1) = C * delta;
148 |     A_copy.rows(0, A.n_rows - 1) = A * delta;
149 |     C_copy.row(C.n_rows).fill(1);
150 |     A_copy.row(A.n_rows).fill(1);
151 |     return(fcnnls_c(C_copy, A_copy));
152 | }
153 | 


--------------------------------------------------------------------------------
/src/fcnnls.h:
--------------------------------------------------------------------------------
 1 | // [[Rcpp::depends(BH)]]
 2 | // [[Rcpp::depends(RcppArmadillo)]]
 3 | #include <RcppArmadillo.h>
 4 | #include <random>
 5 | #include <math.h>
 6 | #include <algorithm>
 7 | #include <boost/math/distributions/students_t.hpp>
 8 | #include <boost/math/distributions/fisher_f.hpp>
 9 | using namespace boost::math;
10 | using namespace Rcpp;
11 | using namespace arma;
12 | 
13 | arma::mat cssls(const arma::mat& CtC, const arma::mat& CtA, bool pseudo);
14 | arma::mat cssls(const arma::mat& CtC, const arma::mat& CtA, const arma::umat& Pset, bool pseudo);
15 | // [[Rcpp::export]]
16 | arma::mat fcnnls_c(const arma::mat& C, const arma::mat& A);
17 | // [[Rcpp::export]]
18 | arma::mat fcnnls_sum_to_one(const arma::mat& C, const arma::mat& A, double delta);
19 | 


--------------------------------------------------------------------------------
/src/pairwiseR2.cpp:
--------------------------------------------------------------------------------
 1 | // You should have received a copy of the GNU General Public License
 2 | // along with RcppArmadillo.  If not, see <http://www.gnu.org/licenses/>.
 3 | // [[Rcpp::depends(BH)]]
 4 | // [[Rcpp::depends(RcppArmadillo)]]
 5 | #include <RcppArmadillo.h>
 6 | #include <math.h>
 7 | 
 8 | using namespace Rcpp;
 9 | using namespace arma;
10 | 
11 | // [[Rcpp::export]]
12 | arma::mat pairwiseR2(const arma::mat& X) {
13 |   int genes_count = X.n_cols;
14 | 
15 |   mat Y = square(X);
16 |   rowvec Ysum = sum(Y);
17 |   mat Z = 2 * X.t() * X;
18 |   
19 |   mat r2(genes_count, genes_count, fill::zeros);
20 |   vec tsss(genes_count);
21 |   
22 |   for (int i = 0; i < genes_count; i++) {
23 |     double mean_x_intercept = mean(X.col(i));
24 |     vec x_int = X.col(i) - mean_x_intercept;
25 |     double tss = dot(x_int, x_int);
26 |     tsss[i] = tss;
27 |   }
28 |   
29 |   for (int i = 0; i < genes_count; i++) {
30 |     vec rss = Ysum[i] + (Ysum - Z.row(i)).t();
31 |     r2.col(i) += (ones(genes_count) - rss / tsss) / 2.0;
32 |     r2.row(i) += ((ones(genes_count) - rss / tsss) / 2.0).t();
33 |   }
34 |   
35 |   return r2;
36 | }
37 | 
38 | 


--------------------------------------------------------------------------------
/vignettes/linseedTutorial.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Linseed Tutorial"
 3 | author: "Konstantin Zaitsev"
 4 | date: "`r Sys.Date()`"
 5 | output: html_document
 6 | vignette: >
 7 |   %\VignetteIndexEntry{Linseed Tutorial}
 8 |   %\VignetteEngine{knitr::rmarkdown}
 9 |   \usepackage[utf8]{inputenc}
10 | ---
11 | # Linseed tutorial
12 | 
13 | Linseed (LINear Subspace identification for gene Expresion Deconvolution) is a package that provides tools and interface to explore gene expression datasets in linear space.
14 | 
15 | ## Installing the package
16 | 
17 | You can install the package using `devtools::install_github`:
18 | 
19 | ```{r eval=FALSE}
20 | devtools::install_github("ctlab/linseed")
21 | ```
22 | 
23 | Current build was tested using rhub: 
24 | 
25 | ## Getting started with linseed
26 | 
27 | To start working with gene expression data, we need to create a new LinseedObject, in this tutorial we will use GSE19830 (mixture of Liver, Brain and Lung), we will take only mixed samples (10-42) and will take only 10000 most expressed genes.
28 | 
29 | ```{r warning=FALSE, message=FALSE}
30 | library(linseed)
31 | lo <- LinseedObject$new("GSE19830", samples=10:42, topGenes=10000)
32 | ```
33 | 
34 | ## Coolinearity networks
35 | 
36 | To build a coolinearity network we first have to evaluate all pairwise collinearity coefficients, all pairwise spearman correlation and then run significance test which will calculate p value for each each by shuffling network weights randomly.
37 | 
38 | ```{r networks, warning=FALSE}
39 | lo$calculatePairwiseLinearity()
40 | lo$calculateSpearmanCorrelation()
41 | lo$calculateSignificanceLevel(100)
42 | lo$significancePlot(0.01)
43 | 
44 | lo$filterDatasetByPval(0.01)
45 | lo$svdPlot()
46 | 
47 | ```
48 | 
49 | To visualiaze what left after filtering we can call projection plot from our object. But we have to project the data to the simplex first.
50 | 
51 | ```{r visi, warning=FALSE, message=FALSE}
52 | lo$setCellTypeNumber(3)
53 | lo$project("full") # projecting full dataset
54 | lo$projectionPlot(color="filtered")
55 | ```
56 | 
57 | 
58 | ## Deconvolution
59 | 
60 | To deconvolve the dataset, you first have to project (full or filtered dataset) to the simplex, and then find corners of it.
61 | 
62 | ```{r deconvolution}
63 | data <- get("filtered", lo$exp)
64 | Y <- t(data$norm)
65 | if (is.null(lo$cellTypeNumber)) stop("Set cell type number first")
66 | lo$project("filtered")
67 | lo$smartSearchCorners(dataset="filtered", error="norm")
68 | lo$deconvolveByEndpoints()
69 | plotProportions(lo$proportions)
70 | ```
71 | 
72 | We can also use tSNE to haave an idea of how data looks like when dimensionally reduced.
73 | 
74 | ```{r rtsne}
75 | # lets select 100 genes closest to 
76 | lo$selectGenes(100)
77 | lo$tsnePlot()
78 | 
79 | ```
80 | 
81 | To compare with actual proportions you can use `dotPlotProportions` function
82 | 
83 | ```{r proportions}
84 | data("proportionsLiverBrainLung")
85 | dotPlotPropotions(lo$proportions, proportionsLiverBrainLung[, 10:42], guess=TRUE)
86 | 
87 | ```
88 | 


--------------------------------------------------------------------------------