├── .github
├── .gitignore
└── workflows
│ └── pkgdown.yaml
├── vignettes
├── .gitignore
└── APL.Rmd
├── tests
├── testthat.R
└── testthat
│ ├── testdata
│ ├── smoke.rda
│ ├── countries.rda
│ ├── smoke_scRNAseq.rda
│ ├── AP_coordinates
│ │ ├── example3
│ │ │ ├── genes_order.txt
│ │ │ ├── AP_coordinates_samples.txt
│ │ │ ├── AP_coordinates_genes.txt
│ │ │ └── gene_ranking.txt
│ │ ├── example1
│ │ │ ├── AP_coordinates_samples.txt
│ │ │ ├── genes_order.txt
│ │ │ ├── AP_coordinates_genes.txt
│ │ │ └── gene_ranking.txt
│ │ ├── example2
│ │ │ ├── AP_coordinates_samples.txt
│ │ │ ├── genes_order.txt
│ │ │ ├── AP_coordinates_genes.txt
│ │ │ └── gene_ranking.txt
│ │ ├── notes.txt
│ │ └── input_data.txt
│ └── input_data.tsv
│ ├── test-convert.R
│ ├── test-CA.R
│ └── test-apl.R
├── NEWS.md
├── _pkgdown.yml
├── man
├── figures
│ └── fig_AP.png
├── scree_plot.Rd
├── is.empty.Rd
├── rm_zeros.Rd
├── pipe.Rd
├── comp_ft_residuals.Rd
├── inertia_rows.Rd
├── subset_dims.Rd
├── recompute.Rd
├── as.list-cacomp-method.Rd
├── cacomp_names.Rd
├── clip_residuals.Rd
├── cacomp_slot.Rd
├── show.cacomp.Rd
├── check_cacomp.Rd
├── random_direction_cutoff.Rd
├── calc_residuals.Rd
├── plot_enrichment.Rd
├── comp_std_residuals.Rd
├── var_rows.Rd
├── comp_NB_residuals.Rd
├── permutation_cutoff.Rd
├── apl_ggplot.Rd
├── apl_plotly.Rd
├── ca_coords.Rd
├── elbow_method.Rd
├── apl_coords.Rd
├── apl_topGO.Rd
├── apl.Rd
├── run_cacomp.Rd
├── ca_3Dplot.Rd
├── cacomp-class.Rd
├── apl_score.Rd
├── as.cacomp.Rd
├── ca_biplot.Rd
├── pick_dims.Rd
├── cacomp.Rd
└── runAPL.Rd
├── .lintr
├── .gitignore
├── .Rbuildignore
├── R
├── utils-pipe.R
├── import_packages.R
├── generic_methods.R
├── convert.R
└── constructor.R
├── NAMESPACE
├── DESCRIPTION
└── README.md
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 |
--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 |
--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(APL)
3 | test_check("APL")
4 |
--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
1 | # Changes in version 0.99.0 (2021-12-06)
2 | + Submitted to Bioconductor
3 |
--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
1 | url: https://vingronlab.github.io/APL/
2 | template:
3 | bootstrap: 5
4 |
5 |
--------------------------------------------------------------------------------
/man/figures/fig_AP.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VingronLab/APL/HEAD/man/figures/fig_AP.png
--------------------------------------------------------------------------------
/tests/testthat/testdata/smoke.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VingronLab/APL/HEAD/tests/testthat/testdata/smoke.rda
--------------------------------------------------------------------------------
/.lintr:
--------------------------------------------------------------------------------
1 | linters: linters_with_defaults(
2 | indentation_linter(indent = 4L),
3 | commented_code_linter = NULL
4 | )
5 |
--------------------------------------------------------------------------------
/tests/testthat/testdata/countries.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VingronLab/APL/HEAD/tests/testthat/testdata/countries.rda
--------------------------------------------------------------------------------
/tests/testthat/testdata/smoke_scRNAseq.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VingronLab/APL/HEAD/tests/testthat/testdata/smoke_scRNAseq.rda
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | .Rproj
6 | .ipynb_checkpoints
7 | .ipynb*
8 | .ipynb*/
9 | APL.Rproj
10 | /doc/
11 | /Meta/
12 | inst/doc
13 | docs
14 | /renv/
15 | renv.lock
16 | .Rprofile
17 | .editorconfig
18 |
--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^renv$
2 | ^renv\.lock$
3 | ^.*\.Rproj$
4 | ^\.Rproj\.user$
5 | ^LICENSE\.md$
6 | ^.git
7 | ^.git/*
8 | ^.Rhistory
9 | ^.gitignore
10 | ^doc$
11 | ^docs$
12 | ^Meta$
13 | ^README\.Rmd$
14 | ^_pkgdown\.yml$
15 | ^pkgdown$
16 | ^\.github$
17 | ^.lintr
18 | ^renv
19 | ^renv.lock
20 | .editorconfig
--------------------------------------------------------------------------------
/man/scree_plot.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CA.R
3 | \name{scree_plot}
4 | \alias{scree_plot}
5 | \title{Scree Plot}
6 | \usage{
7 | scree_plot(df)
8 | }
9 | \arguments{
10 | \item{df}{A data frame with columns "dims" and "inertia".}
11 | }
12 | \value{
13 | Returns a ggplot object.
14 | }
15 | \description{
16 | Plots a scree plot.
17 | }
18 |
--------------------------------------------------------------------------------
/man/is.empty.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/constructor.R
3 | \name{is.empty}
4 | \alias{is.empty}
5 | \title{Helper function to check if object is empty.}
6 | \usage{
7 | is.empty(x)
8 | }
9 | \arguments{
10 | \item{x}{object}
11 | }
12 | \value{
13 | TRUE if x has length 0 and is not NULL. FALSE otherwise
14 | }
15 | \description{
16 | Helper function to check if object is empty.
17 | }
18 |
--------------------------------------------------------------------------------
/man/rm_zeros.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CA.R
3 | \name{rm_zeros}
4 | \alias{rm_zeros}
5 | \title{removes 0-only rows and columns in a matrix.}
6 | \usage{
7 | rm_zeros(obj)
8 | }
9 | \arguments{
10 | \item{obj}{A matrix.}
11 | }
12 | \value{
13 | Input matrix with rows & columns consisting of only 0 removed.
14 | }
15 | \description{
16 | removes 0-only rows and columns in a matrix.
17 | }
18 |
--------------------------------------------------------------------------------
/R/utils-pipe.R:
--------------------------------------------------------------------------------
1 | #' Pipe operator
2 | #'
3 | #' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
4 | #'
5 | #' @name %>%
6 | #' @rdname pipe
7 | #' @keywords internal
8 | #' @export
9 | #' @param lhs A value or the magrittr placeholder.
10 | #' @param rhs A function call using the magrittr semantics.
11 | #' @return \code{magrittr::\link[magrittr:pipe]{\%>\%}}
12 | #' @importFrom magrittr %>%
13 | #' @usage lhs \%>\% rhs
14 | #' @examples
15 | #' x <- 1:100
16 | #' x %>% head()
17 | NULL
18 |
--------------------------------------------------------------------------------
/tests/testthat/testdata/AP_coordinates/example3/genes_order.txt:
--------------------------------------------------------------------------------
1 | "11" "Burundi"
2 | "28" "Central African Republic"
3 | "37" "Congo, Dem. Rep."
4 | "14" "Burkina Faso"
5 | "26" "Bhutan"
6 | "19" "Bosnia and Herzegovina"
7 | "1" "Afghanistan"
8 | "15" "Bangladesh"
9 | "36" "Cameroon"
10 | "25" "Brunei Darussalam"
11 | "6" "Argentina"
12 | "12" "Belgium"
13 | "29" "Canada"
14 | "2" "Angola"
15 | "17" "Bahrain"
16 | "24" "Barbados"
17 | "5" "United Arab Emirates"
18 | "8" "Australia"
19 | "18" "Bahamas, The"
20 | "13" "Benin"
21 |
--------------------------------------------------------------------------------
/R/import_packages.R:
--------------------------------------------------------------------------------
1 |
2 | #' @import methods
3 | #' @import SummarizedExperiment org.Hs.eg.db org.Mm.eg.db
4 | #' @importFrom stats as.formula na.omit quantile runif var
5 | #' @importFrom utils head setTxtProgressBar txtProgressBar
6 | #' @importFrom ggplot2 ggplot aes geom_point guide_colorbar
7 | #' @importFrom topGO showSigOfNodes score
8 | #' @importFrom viridisLite viridis
9 | #' @importFrom rlang .data
10 | #' @importFrom RSpectra svds
11 | #' @importClassesFrom SeuratObject Seurat
12 | #' @importClassesFrom SingleCellExperiment SingleCellExperiment
13 | NULL
14 |
--------------------------------------------------------------------------------
/man/pipe.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/utils-pipe.R
3 | \name{\%>\%}
4 | \alias{\%>\%}
5 | \title{Pipe operator}
6 | \usage{
7 | lhs \%>\% rhs
8 | }
9 | \arguments{
10 | \item{lhs}{A value or the magrittr placeholder.}
11 |
12 | \item{rhs}{A function call using the magrittr semantics.}
13 | }
14 | \value{
15 | \code{magrittr::\link[magrittr:pipe]{\%>\%}}
16 | }
17 | \description{
18 | See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
19 | }
20 | \examples{
21 | x <- 1:100
22 | x \%>\% head()
23 | }
24 | \keyword{internal}
25 |
--------------------------------------------------------------------------------
/man/comp_ft_residuals.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CA.R
3 | \name{comp_ft_residuals}
4 | \alias{comp_ft_residuals}
5 | \title{Compute Freeman-Tukey residuals}
6 | \usage{
7 | comp_ft_residuals(mat)
8 | }
9 | \arguments{
10 | \item{mat}{A numerical matrix or coercible to one by `as.matrix()`.
11 | Should have row and column names.}
12 | }
13 | \value{
14 | A named list. The elements are:
15 | \itemize{
16 | \item "S": standardized residual matrix.
17 | \item "tot": grand total of the original matrix.
18 | \item "rowm": row masses.
19 | \item "colm": column masses.
20 | }
21 | }
22 | \description{
23 | Computes Freeman-Tukey residuals
24 | }
25 |
--------------------------------------------------------------------------------
/man/inertia_rows.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CA.R
3 | \name{inertia_rows}
4 | \alias{inertia_rows}
5 | \title{Find most variable rows}
6 | \usage{
7 | inertia_rows(mat, top = 5000, ...)
8 | }
9 | \arguments{
10 | \item{mat}{A matrix with genes in rows and cells in columns.}
11 |
12 | \item{top}{Number of genes to select.}
13 |
14 | \item{...}{Further arguments for `comp_std_residuals`}
15 | }
16 | \value{
17 | Returns a matrix, which consists of the top variable rows of mat.
18 | }
19 | \description{
20 | Calculates the contributing inertia of each row which is defined as sum of squares of pearson residuals and selects the
21 | rows with the largested inertias, e.g. 5,000.
22 | }
23 |
--------------------------------------------------------------------------------
/man/subset_dims.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CA.R
3 | \name{subset_dims}
4 | \alias{subset_dims}
5 | \title{Subset dimensions of a caobj}
6 | \usage{
7 | subset_dims(caobj, dims)
8 | }
9 | \arguments{
10 | \item{caobj}{A caobj.}
11 |
12 | \item{dims}{Integer. Number of dimensions.}
13 | }
14 | \value{
15 | Returns caobj.
16 | }
17 | \description{
18 | Subsets the dimensions according to user input.
19 | }
20 | \examples{
21 | # Simulate scRNAseq data.
22 | cnts <- data.frame(cell_1 = rpois(10, 5),
23 | cell_2 = rpois(10, 10),
24 | cell_3 = rpois(10, 20))
25 | rownames(cnts) <- paste0("gene_", 1:10)
26 | cnts <- as.matrix(cnts)
27 |
28 | # Run correspondence analysis.
29 | ca <- cacomp(cnts)
30 | ca <- subset_dims(ca, 2)
31 | }
32 |
--------------------------------------------------------------------------------
/man/recompute.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/convert.R
3 | \name{recompute}
4 | \alias{recompute}
5 | \title{Recompute missing values of cacomp object.}
6 | \usage{
7 | recompute(calist, mat, ...)
8 | }
9 | \arguments{
10 | \item{calist}{A list with std_coords_cols, the prin_coords_rows and D.}
11 |
12 | \item{mat}{A matrix from which the cacomp object is derived from.}
13 |
14 | \item{...}{Further arguments forwarded to cacomp.}
15 | }
16 | \value{
17 | A cacomp object with additional calculated row_masses, col_masses,
18 | std_coords_rows, U and V.
19 | }
20 | \description{
21 | The caobj needs to have the std_coords_cols, the prin_coords_rows and D
22 | calculated. From this the remainder will be calculated.
23 | Future updates might extend this functionality.
24 | }
25 |
--------------------------------------------------------------------------------
/man/as.list-cacomp-method.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/generic_methods.R
3 | \name{as.list,cacomp-method}
4 | \alias{as.list,cacomp-method}
5 | \title{Convert cacomp object to list.}
6 | \usage{
7 | \S4method{as.list}{cacomp}(x)
8 | }
9 | \arguments{
10 | \item{x}{A cacomp object.}
11 | }
12 | \value{
13 | A cacomp object.
14 | }
15 | \description{
16 | Convert cacomp object to list.
17 | }
18 | \examples{
19 |
20 | # Simulate counts
21 | cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
22 | x = sample(1:100, 50, replace = TRUE))
23 | rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
24 | colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
25 |
26 | # Run correspondence analysis
27 | ca <- cacomp(obj = cnts, princ_coords = 3)
28 | ca_list <- as.list(ca)
29 | }
30 |
--------------------------------------------------------------------------------
/man/cacomp_names.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/constructor.R
3 | \name{cacomp_names}
4 | \alias{cacomp_names}
5 | \title{Prints slot names of cacomp object}
6 | \usage{
7 | cacomp_names(caobj)
8 | }
9 | \arguments{
10 | \item{caobj}{a cacomp object}
11 | }
12 | \value{
13 | Prints slot names of cacomp object
14 | }
15 | \description{
16 | Prints slot names of cacomp object
17 | }
18 | \examples{
19 | # Simulate scRNAseq data.
20 | cnts <- data.frame(cell_1 = rpois(10, 5),
21 | cell_2 = rpois(10, 10),
22 | cell_3 = rpois(10, 20))
23 | rownames(cnts) <- paste0("gene_", 1:10)
24 | cnts <- as.matrix(cnts)
25 |
26 | # Run correspondence analysis.
27 | ca <- cacomp(obj = cnts, princ_coords = 3, top = 5)
28 |
29 | # show slot names:
30 | cacomp_names(ca)
31 |
32 | }
33 |
--------------------------------------------------------------------------------
/man/clip_residuals.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CA.R
3 | \name{clip_residuals}
4 | \alias{clip_residuals}
5 | \title{Perform clipping of residuals}
6 | \usage{
7 | clip_residuals(S, cutoff = sqrt(ncol(S)))
8 | }
9 | \arguments{
10 | \item{S}{Matrix of residuals.}
11 |
12 | \item{cutoff}{Value above/below which clipping should happen.}
13 | }
14 | \value{
15 | Matrix of clipped residuals.
16 | }
17 | \description{
18 | Clips Pearson or negative-binomial residuals above or below a determined
19 | value. For Pearson (Poisson) residuals it is set by default for 1, for NB at
20 | sqrt(n).
21 | }
22 | \references{
23 | Lause, J., Berens, P. & Kobak, D. Analytic Pearson residuals for
24 | normalization of single-cell RNA-seq UMI data. Genome Biol 22, 258 (2021).
25 | https://doi.org/10.1186/s13059-021-02451-7
26 | }
27 |
--------------------------------------------------------------------------------
/tests/testthat/testdata/AP_coordinates/example1/AP_coordinates_samples.txt:
--------------------------------------------------------------------------------
1 | "V1" "V2" "V3" "V4" "V5" "V6" "V7" "V8" "V9" "V10" "V11" "V12" "V13" "V14" "V15" "V16" "V17" "V18" "V19" "V20"
2 | "1" -0.497520162742696 -0.497520162756415 -0.497520162738791 -0.497520162737093 -0.497520162736328 1.63637085711038 1.61927262333547 1.47122333782444 -0.497520162737096 1.1890857588906 -0.49752016274269 4.133891377865 -0.497520162738793 -0.49752016273709 -0.497520162736324 -0.497520162742692 -0.497520162756414 -0.49752016273879 -0.497520162737097 -0.497520162736327
3 | "2" 8.25962218436963 7.47376630424699 8.157375687224 7.26316523278324 7.8674906965723 4.21515861914502 4.20135613303697 4.07689426627507 4.2804018285755 3.81263706429037 4.38511077992584 5.3344076850863 4.41474200626891 6.81688703644307 4.61353926316727 3.13790937563177 2.74310982704723 3.2117318008343 2.80911936673341 3.33629235548098
4 |
--------------------------------------------------------------------------------
/tests/testthat/testdata/AP_coordinates/example2/AP_coordinates_samples.txt:
--------------------------------------------------------------------------------
1 | "V1" "V2" "V3" "V4" "V5" "V6" "V7" "V8" "V9" "V10" "V11" "V12" "V13" "V14" "V15" "V16" "V17" "V18" "V19" "V20"
2 | "1" -0.819303761786799 -0.524462196212651 -0.690540545321318 -0.601573222546409 -0.295026840676528 1.49053923705725 0.676946241142394 1.80151507767079 0.99509136392699 2.11134102092629 -0.155429394800191 1.62784456619983 -0.437020255007118 1.41419203571708 -0.820028450041812 -0.556025131641147 -0.505372951386226 -0.691354836861871 -0.633727014767599 -1.03355840189376
3 | "2" 3.38201692874111 3.5330668253809 3.29387869717513 3.60420157310076 3.10443939196198 1.40095669441291 2.91822188207523 1.21287091464517 2.28344185385098 1.00775719856003 1.67522416950889 3.39087497643017 1.77453373365817 3.26728018371152 1.3720051021511 0.37867034651444 1.079639384027 0.46422153612581 1.02304571831716 0.77749296941889
4 |
--------------------------------------------------------------------------------
/tests/testthat/testdata/AP_coordinates/example3/AP_coordinates_samples.txt:
--------------------------------------------------------------------------------
1 | "V1" "V2" "V3" "V4" "V5" "V6" "V7" "V8" "V9" "V10" "V11" "V12" "V13" "V14" "V15" "V16" "V17" "V18" "V19" "V20"
2 | "1" -0.860825655533233 -0.195215644898544 -1.04961066415878 -0.702979515229828 -0.665958384755548 0.890576563883286 0.576142490585286 1.35906065068609 0.970463082293897 1.96830121597133 0.0534435764244335 2.44349727159309 -0.354694129476072 1.90549741395287 -0.96235062777159 -0.434932370721346 -0.722789419983387 -0.587746131595263 -0.812890461978997 -1.04575882446561
3 | "2" 3.66488214776354 3.50435250102428 3.74970406575467 3.43908812256832 3.53122953340069 1.9670830311872 2.20724230116108 1.16326507594813 1.55545115496683 0.561458398050811 1.47478062061375 3.95575532693773 1.37741852892094 3.0984596705426 1.18476841980351 1.47948801613432 1.10965278138218 1.39956763662195 0.949002349607891 1.01565019749943
4 |
--------------------------------------------------------------------------------
/man/cacomp_slot.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/constructor.R
3 | \name{cacomp_slot}
4 | \alias{cacomp_slot}
5 | \title{Access slots in a cacomp object}
6 | \usage{
7 | cacomp_slot(caobj, slot)
8 | }
9 | \arguments{
10 | \item{caobj}{a cacomp object}
11 |
12 | \item{slot}{slot to return}
13 | }
14 | \value{
15 | Chosen slot of the cacomp object
16 | }
17 | \description{
18 | Access slots in a cacomp object
19 | }
20 | \examples{
21 | # Simulate scRNAseq data.
22 | cnts <- data.frame(cell_1 = rpois(10, 5),
23 | cell_2 = rpois(10, 10),
24 | cell_3 = rpois(10, 20))
25 | rownames(cnts) <- paste0("gene_", 1:10)
26 | cnts <- as.matrix(cnts)
27 |
28 | # Run correspondence analysis.
29 | ca <- cacomp(obj = cnts, princ_coords = 3, top = 5)
30 |
31 | # access left singular vectors
32 | cacomp_slot(ca, "U")
33 |
34 | }
35 |
--------------------------------------------------------------------------------
/tests/testthat/testdata/AP_coordinates/notes.txt:
--------------------------------------------------------------------------------
1 | ## Input data: 39 rows x 20 columns
2 |
3 | ## Analysess:
4 |
5 | #1. 39 genes and 19 dimensions
6 | #2. 39 genes and 4 dimensions
7 | #3. 20 genes and 4 dimensions
8 |
9 | # Sample IDs which I used to compute the AP:
10 | 6, 7, 8, 10, 12 (counting of samples starts from 1)
11 |
12 | # Output:
13 |
14 | - "AP.jpg" - Association Plot
15 | - "2D.jpg" - 2D correspondence analysis
16 | - "AP_coordinates_genes.txt" - coordinates of genes in the AP - the order of genes is changed, see: genes_order.txt
17 | - "AP_coordinates_samples.txt" - coordinates of samples in the AP
18 | - "genes_order.txt" - the order of genes in AP_coodinates_genes.txt file
19 | - "gene_ranking.txt" - gene ranking according to the AP and Salpha scores (calculated based on 10 permutations) #There are also gene coordinates from AP, so probably this will be the best file to use for you
20 |
21 |
--------------------------------------------------------------------------------
/man/show.cacomp.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/generic_methods.R
3 | \name{show.cacomp}
4 | \alias{show.cacomp}
5 | \alias{show,cacomp-method}
6 | \title{Prints cacomp object}
7 | \usage{
8 | show.cacomp(object)
9 |
10 | \S4method{show}{cacomp}(object)
11 | }
12 | \arguments{
13 | \item{object}{cacomp object to print}
14 | }
15 | \value{
16 | prints summary information about cacomp object.
17 | }
18 | \description{
19 | Provides more user friendly printing of cacomp objects.
20 | }
21 | \examples{
22 | # Simulate scRNAseq data.
23 | cnts <- data.frame(cell_1 = rpois(10, 5),
24 | cell_2 = rpois(10, 10),
25 | cell_3 = rpois(10, 20))
26 | rownames(cnts) <- paste0("gene_", 1:10)
27 | cnts <- as.matrix(cnts)
28 |
29 | # Run correspondence analysis.
30 | ca <- cacomp(obj = cnts, princ_coords = 3, top = 5)
31 |
32 | ca
33 | }
34 |
--------------------------------------------------------------------------------
/man/check_cacomp.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/constructor.R
3 | \name{check_cacomp}
4 | \alias{check_cacomp}
5 | \title{Check if cacomp object was correctly created.}
6 | \usage{
7 | check_cacomp(object)
8 | }
9 | \arguments{
10 | \item{object}{A cacomp object.}
11 | }
12 | \value{
13 | TRUE if it is a valid cacomp object. FALSE otherwise.
14 | }
15 | \description{
16 | Checks if the slots in a cacomp object are of the correct size
17 | and whether they are coherent.
18 | }
19 | \examples{
20 | # Simulate scRNAseq data.
21 | cnts <- data.frame(cell_1 = rpois(10, 5),
22 | cell_2 = rpois(10, 10),
23 | cell_3 = rpois(10, 20))
24 | rownames(cnts) <- paste0("gene_", 1:10)
25 | cnts <- as.matrix(cnts)
26 |
27 | # Run correspondence analysis.
28 | ca <- cacomp(obj = cnts, princ_coords = 3, top = 5)
29 |
30 | check_cacomp(ca)
31 | }
32 |
--------------------------------------------------------------------------------
/tests/testthat/testdata/AP_coordinates/example1/genes_order.txt:
--------------------------------------------------------------------------------
1 | "11" "Burundi"
2 | "28" "Central African Republic"
3 | "37" "Congo, Dem. Rep."
4 | "14" "Burkina Faso"
5 | "26" "Bhutan"
6 | "19" "Bosnia and Herzegovina"
7 | "1" "Afghanistan"
8 | "15" "Bangladesh"
9 | "36" "Cameroon"
10 | "25" "Brunei Darussalam"
11 | "6" "Argentina"
12 | "12" "Belgium"
13 | "29" "Canada"
14 | "2" "Angola"
15 | "17" "Bahrain"
16 | "24" "Barbados"
17 | "5" "United Arab Emirates"
18 | "8" "Australia"
19 | "18" "Bahamas, The"
20 | "13" "Benin"
21 | "32" "Channel Islands"
22 | "31" "Switzerland"
23 | "34" "China"
24 | "9" "Austria"
25 | "35" "Cote d'Ivoire"
26 | "16" "Bulgaria"
27 | "21" "Belize"
28 | "7" "Armenia"
29 | "33" "Chile"
30 | "27" "Botswana"
31 | "3" "Albania"
32 | "39" "Colombia"
33 | "23" "Brazil"
34 | "20" "Belarus"
35 | "38" "Congo, Rep."
36 | "4" "Arab World"
37 | "30" "Central Europe and the Baltics"
38 | "10" "Azerbaijan"
39 | "22" "Bolivia"
40 |
--------------------------------------------------------------------------------
/tests/testthat/testdata/AP_coordinates/example2/genes_order.txt:
--------------------------------------------------------------------------------
1 | "11" "Burundi"
2 | "28" "Central African Republic"
3 | "37" "Congo, Dem. Rep."
4 | "14" "Burkina Faso"
5 | "26" "Bhutan"
6 | "19" "Bosnia and Herzegovina"
7 | "1" "Afghanistan"
8 | "15" "Bangladesh"
9 | "36" "Cameroon"
10 | "25" "Brunei Darussalam"
11 | "6" "Argentina"
12 | "12" "Belgium"
13 | "29" "Canada"
14 | "2" "Angola"
15 | "17" "Bahrain"
16 | "24" "Barbados"
17 | "5" "United Arab Emirates"
18 | "8" "Australia"
19 | "18" "Bahamas, The"
20 | "13" "Benin"
21 | "32" "Channel Islands"
22 | "31" "Switzerland"
23 | "34" "China"
24 | "9" "Austria"
25 | "35" "Cote d'Ivoire"
26 | "16" "Bulgaria"
27 | "21" "Belize"
28 | "7" "Armenia"
29 | "33" "Chile"
30 | "27" "Botswana"
31 | "3" "Albania"
32 | "39" "Colombia"
33 | "23" "Brazil"
34 | "20" "Belarus"
35 | "38" "Congo, Rep."
36 | "4" "Arab World"
37 | "30" "Central Europe and the Baltics"
38 | "10" "Azerbaijan"
39 | "22" "Bolivia"
40 |
--------------------------------------------------------------------------------
/man/random_direction_cutoff.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/apl.R
3 | \name{random_direction_cutoff}
4 | \alias{random_direction_cutoff}
5 | \title{Random direction association plot coordinates}
6 | \usage{
7 | random_direction_cutoff(caobj, dims = caobj@dims, reps = 100)
8 | }
9 | \arguments{
10 | \item{caobj}{A "cacomp" object with principal row coordinates and
11 | standardized column coordinates calculated.}
12 |
13 | \item{dims}{Integer. Number of CA dimensions to retain. Needs to be the same
14 | as in caobj!}
15 |
16 | \item{reps}{Integer. Number of permutations to perform.}
17 | }
18 | \value{
19 | List with permuted apl coordinates ("apl_perm") and, a list of saved ca
20 | components ("saved_ca") that allow for quick recomputation of the CA results.
21 | For random_direction_cutoff this saved_ca is empty.
22 | }
23 | \description{
24 | Calculates matrix of apl coordinates for random directions
25 | }
26 |
--------------------------------------------------------------------------------
/tests/testthat/testdata/AP_coordinates/example3/AP_coordinates_genes.txt:
--------------------------------------------------------------------------------
1 | "V1" "V2" "V3" "V4" "V5" "V6" "V7" "V8" "V9" "V10" "V11" "V12" "V13" "V14" "V15" "V16" "V17" "V18" "V19" "V20"
2 | "1" 0.948319976974246 0.728090107961784 0.553508142645055 0.607844782388438 0.468089712504795 -0.243828891270645 0.496370015470564 0.451986328251284 0.406323772624122 -0.553295604715508 -0.550113810211719 -0.545073205518662 -0.544067971091048 0.060629751367586 -0.525559867564279 -0.523369968018623 -0.502737028569807 -0.513959001164854 -0.58523132701876 0.366070107530785
3 | "2" 0.878605308254436 0.668167474732221 0.5351916277987 0.276188810756928 0.421166403205462 0.799543631883141 0.428934591316306 0.253593171895212 0.225418771598896 0.378417445994173 0.353331876483964 0.349284132679116 0.339318272163175 0.364354451424873 0.502829969179454 0.307988692363962 0.458179660842991 0.353727220541737 0.250337111778779 0.196005494211658
4 | "3" 1 2 4 3 6 11 5 7 8 19 18 17 16 10 15 14 12 13 20 9
5 |
--------------------------------------------------------------------------------
/man/calc_residuals.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CA.R
3 | \name{calc_residuals}
4 | \alias{calc_residuals}
5 | \title{Calculate residuals for Correspondence analysis}
6 | \usage{
7 | calc_residuals(mat, residuals = "pearson", clip = FALSE, cutoff = NULL)
8 | }
9 | \arguments{
10 | \item{mat}{A numerical matrix or coercible to one by `as.matrix()`.
11 | Should have row and column names.}
12 |
13 | \item{residuals}{character string. Specifies which kind of residuals should
14 | be calculated. Can be "pearson" (default), "freemantukey" or "NB" for
15 | negative-binomial.}
16 |
17 | \item{clip}{logical. Whether residuals should be clipped if they are
18 | higher/lower than a specified cutoff}
19 |
20 | \item{cutoff}{numeric. Residuals that are larger than cutoff or lower than
21 | -cutoff are clipped to cutoff.}
22 | }
23 | \value{
24 | A named list. The elements are:
25 | \itemize{
26 | \item "S": standardized residual matrix.
27 | \item "tot": grand total of the original matrix.
28 | \item "rowm": row masses.
29 | \item "colm": column masses.
30 | }
31 | }
32 | \description{
33 | \code{calc_residuals} provides optional residuals as the basis for Correspondence
34 | Analysis
35 | }
36 |
--------------------------------------------------------------------------------
/man/plot_enrichment.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/plot.R
3 | \name{plot_enrichment}
4 | \alias{plot_enrichment}
5 | \title{Generates plot for results from apl_topGO}
6 | \usage{
7 | plot_enrichment(genenr, ntop = 10)
8 | }
9 | \arguments{
10 | \item{genenr}{data.frame. gene enrichment results table.}
11 |
12 | \item{ntop}{numeric. Number of elements to plot.}
13 | }
14 | \value{
15 | Returns a ggplot plot.
16 | }
17 | \description{
18 | Plots the results from the data frame generated via apl_topGO.
19 | }
20 | \examples{
21 | library(SeuratObject)
22 | set.seed(1234)
23 | cnts <- SeuratObject::LayerData(pbmc_small, assay = "RNA", layer = "counts")
24 | cnts <- as.matrix(cnts)
25 |
26 | # Run CA on example from Seurat
27 |
28 | ca <- cacomp(pbmc_small,
29 | princ_coords = 3,
30 | return_input = FALSE,
31 | assay = "RNA",
32 | slot = "counts")
33 |
34 | grp <- which(Idents(pbmc_small) == 2)
35 | ca <- apl_coords(ca, group = grp)
36 | ca <- apl_score(ca,
37 | mat = cnts)
38 |
39 | enr <- apl_topGO(ca,
40 | ontology = "BP",
41 | organism = "hs")
42 |
43 | plot_enrichment(enr)
44 | }
45 |
--------------------------------------------------------------------------------
/man/comp_std_residuals.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CA.R
3 | \name{comp_std_residuals}
4 | \alias{comp_std_residuals}
5 | \title{Compute Standardized Residuals}
6 | \usage{
7 | comp_std_residuals(mat, clip = FALSE, cutoff = NULL)
8 | }
9 | \arguments{
10 | \item{mat}{A numerical matrix or coercible to one by `as.matrix()`.
11 | Should have row and column names.}
12 |
13 | \item{clip}{logical. Whether residuals should be clipped if they are
14 | higher/lower than a specified cutoff}
15 |
16 | \item{cutoff}{numeric. Residuals that are larger than cutoff or lower than
17 | -cutoff are clipped to cutoff.}
18 | }
19 | \value{
20 | A named list. The elements are:
21 | \itemize{
22 | \item "S": standardized residual matrix.
23 | \item "tot": grand total of the original matrix.
24 | \item "rowm": row masses.
25 | \item "colm": column masses.
26 | }
27 | }
28 | \description{
29 | `comp_std_residuals` computes the standardized residual matrix S based on
30 | the Poisson model,
31 | which is the basis for correspondence analysis and serves
32 | as input for singular value decomposition (SVD).
33 | }
34 | \details{
35 | Calculates standardized residual matrix S from the proportion matrix P and
36 | the expected values E according to \eqn{S = \frac{(P-E)}{sqrt(E)}}.
37 | }
38 |
--------------------------------------------------------------------------------
/man/var_rows.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CA.R
3 | \name{var_rows}
4 | \alias{var_rows}
5 | \title{Find most variable rows}
6 | \usage{
7 | var_rows(mat, residuals = "pearson", top = 5000, ...)
8 | }
9 | \arguments{
10 | \item{mat}{A numeric matrix. For sequencing a count matrix,
11 | gene expression values with genes in rows and samples/cells in columns.
12 | Should contain row and column names.}
13 |
14 | \item{residuals}{character string. Specifies which kind of residuals should
15 | be calculated. Can be "pearson" (default), "freemantukey" or "NB" for
16 | negative-binomial.}
17 |
18 | \item{top}{Integer. Number of most variable rows to retain. Default 5000.}
19 |
20 | \item{...}{Further arguments for `calc_residuals`.}
21 | }
22 | \value{
23 | Returns a matrix, which consists of the top variable rows of mat.
24 | }
25 | \description{
26 | Calculates the variance of the chi-square component matrix and selects the
27 | rows with the highest variance, e.g. 5,000.
28 | }
29 | \examples{
30 | set.seed(1234)
31 |
32 | # Simulate counts
33 | cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
34 | x = sample(1:20, 50, replace = TRUE))
35 | rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
36 | colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
37 |
38 | # Choose top 5000 most variable genes
39 | cnts <- var_rows(mat = cnts, top = 5000)
40 |
41 |
42 | }
43 |
--------------------------------------------------------------------------------
/man/comp_NB_residuals.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CA.R
3 | \name{comp_NB_residuals}
4 | \alias{comp_NB_residuals}
5 | \title{Compute Negative-Binomial residuals}
6 | \usage{
7 | comp_NB_residuals(mat, theta = 100, clip = FALSE, cutoff = NULL, freq = TRUE)
8 | }
9 | \arguments{
10 | \item{mat}{A numerical matrix or coercible to one by `as.matrix()`.
11 | Should have row and column names.}
12 |
13 | \item{theta}{Overdispersion parameter. By default set to 100 as described in
14 | Lause and Berens, 2021 (see references).}
15 |
16 | \item{clip}{logical. Whether residuals should be clipped if they are
17 | higher/lower than a specified cutoff}
18 |
19 | \item{cutoff}{numeric. Residuals that are larger than cutoff or lower than
20 | -cutoff are clipped to cutoff.}
21 |
22 | \item{freq}{logical. Whether a table of frequencies (as used in CA) should
23 | be used.}
24 | }
25 | \value{
26 | A named list. The elements are:
27 | \itemize{
28 | \item "S": standardized residual matrix.
29 | \item "tot": grand total of the original matrix.
30 | \item "rowm": row masses.
31 | \item "colm": column masses.
32 | }
33 | }
34 | \description{
35 | Computes the residuals based on the negative binomial model. By default a
36 | theta of 100 is used to capture technical variation.
37 | }
38 | \references{
39 | Lause, J., Berens, P. & Kobak, D. Analytic Pearson residuals for
40 | normalization of single-cell RNA-seq UMI data. Genome Biol 22, 258 (2021).
41 | https://doi.org/10.1186/s13059-021-02451-7
42 | }
43 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | export("%>%")
4 | export(apl)
5 | export(apl_coords)
6 | export(apl_score)
7 | export(apl_topGO)
8 | export(as.cacomp)
9 | export(ca_3Dplot)
10 | export(ca_biplot)
11 | export(ca_coords)
12 | export(cacomp)
13 | export(cacomp_names)
14 | export(cacomp_slot)
15 | export(check_cacomp)
16 | export(new_cacomp)
17 | export(pick_dims)
18 | export(plot_enrichment)
19 | export(runAPL)
20 | export(run_APL)
21 | export(show.cacomp)
22 | export(subset_dims)
23 | export(var_rows)
24 | exportClasses(cacomp)
25 | exportMethods(as.cacomp)
26 | exportMethods(as.list)
27 | exportMethods(ca_3Dplot)
28 | exportMethods(ca_biplot)
29 | exportMethods(cacomp)
30 | exportMethods(pick_dims)
31 | exportMethods(runAPL)
32 | exportMethods(show)
33 | import(SummarizedExperiment)
34 | import(methods)
35 | import(org.Hs.eg.db)
36 | import(org.Mm.eg.db)
37 | importClassesFrom(SeuratObject,Seurat)
38 | importClassesFrom(SingleCellExperiment,SingleCellExperiment)
39 | importFrom(RSpectra,svds)
40 | importFrom(ggplot2,aes)
41 | importFrom(ggplot2,geom_point)
42 | importFrom(ggplot2,ggplot)
43 | importFrom(ggplot2,guide_colorbar)
44 | importFrom(magrittr,"%>%")
45 | importFrom(rlang,.data)
46 | importFrom(stats,as.formula)
47 | importFrom(stats,na.omit)
48 | importFrom(stats,quantile)
49 | importFrom(stats,runif)
50 | importFrom(stats,var)
51 | importFrom(topGO,score)
52 | importFrom(topGO,showSigOfNodes)
53 | importFrom(utils,head)
54 | importFrom(utils,setTxtProgressBar)
55 | importFrom(utils,txtProgressBar)
56 | importFrom(viridisLite,viridis)
57 |
--------------------------------------------------------------------------------
/tests/testthat/testdata/AP_coordinates/example3/gene_ranking.txt:
--------------------------------------------------------------------------------
1 | "Gene_name" "x-coordinate" "y-coordinate" "Gene_score"
2 | "13" "Benin" 0.366070107530785 0.196005494211658 -0.535770235162441
3 | "36" "Cameroon" 0.406323772624122 0.225418771598896 -0.630849920112816
4 | "14" "Burkina Faso" 0.607844782388438 0.276188810756928 -0.662926798494357
5 | "15" "Bangladesh" 0.451986328251284 0.253593171895212 -0.714820520538758
6 | "26" "Bhutan" 0.468089712504795 0.421166403205462 -1.46973787199919
7 | "1" "Afghanistan" 0.496370015470564 0.428934591316306 -1.47719975807851
8 | "2" "Angola" 0.060629751367586 0.364354451424873 -1.61580051736753
9 | "18" "Bahamas, The" -0.58523132701876 0.250337111778779 -1.73705672652319
10 | "37" "Congo, Dem. Rep." 0.553508142645055 0.5351916277987 -1.90896059049833
11 | "24" "Barbados" -0.523369968018623 0.307988692363962 -1.94045589708557
12 | "29" "Canada" -0.544067971091048 0.339318272163175 -2.10530434398202
13 | "8" "Australia" -0.513959001164854 0.353727220541737 -2.14149234698424
14 | "12" "Belgium" -0.545073205518662 0.349284132679116 -2.1521634719399
15 | "6" "Argentina" -0.550113810211719 0.353331876483964 -2.17582813956946
16 | "25" "Brunei Darussalam" -0.553295604715508 0.378417445994173 -2.29443107925458
17 | "28" "Central African Republic" 0.728090107961784 0.668167474732221 -2.34621343037817
18 | "5" "United Arab Emirates" -0.502737028569807 0.458179660842991 -2.610866211436
19 | "17" "Bahrain" -0.525559867564279 0.502829969179454 -2.83912946185094
20 | "11" "Burundi" 0.948319976974246 0.878605308254436 -3.0942285009405
21 | "19" "Bosnia and Herzegovina" -0.243828891270645 0.799543631883141 -3.9226069065289
22 |
--------------------------------------------------------------------------------
/man/permutation_cutoff.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/apl.R
3 | \name{permutation_cutoff}
4 | \alias{permutation_cutoff}
5 | \title{Calculates permuted association plot coordinates}
6 | \usage{
7 | permutation_cutoff(
8 | caobj,
9 | mat,
10 | group = caobj@group,
11 | dims = caobj@dims,
12 | reps = 10,
13 | store_perm = FALSE,
14 | python = TRUE
15 | )
16 | }
17 | \arguments{
18 | \item{caobj}{A "cacomp" object with principal row coordinates and
19 | standardized column coordinates calculated.}
20 |
21 | \item{mat}{A numeric matrix. For sequencing a count matrix, gene expression
22 | values with genes in rows and samples/cells in columns.
23 | Should contain row and column names.}
24 |
25 | \item{group}{Vector of indices of the columns to calculate centroid/x-axis
26 | direction.}
27 |
28 | \item{dims}{Integer. Number of CA dimensions to retain. Needs to be the same
29 | as in caobj!}
30 |
31 | \item{reps}{Integer. Number of permutations to perform.}
32 |
33 | \item{store_perm}{Logical. Whether permuted data should be stored in the CA
34 | object.
35 | This implementation dramatically speeds up computation compared to `svd()`
36 | in R.}
37 |
38 | \item{python}{DEPRACTED. A logical value indicating whether to use singular-value
39 | decomposition from the python package torch.}
40 | }
41 | \value{
42 | List with permuted apl coordinates ("apl_perm") and, a list of saved ca
43 | components ("saved_ca") that allow for quick recomputation of the CA results.
44 | For random_direction_cutoff this saved_ca is empty.
45 | }
46 | \description{
47 | Calculates matrix of apl coordinates when permuting the original data.
48 | }
49 |
--------------------------------------------------------------------------------
/tests/testthat/testdata/AP_coordinates/example1/AP_coordinates_genes.txt:
--------------------------------------------------------------------------------
1 | "V1" "V2" "V3" "V4" "V5" "V6" "V7" "V8" "V9" "V10" "V11" "V12" "V13" "V14" "V15" "V16" "V17" "V18" "V19" "V20" "V21" "V22" "V23" "V24" "V25" "V26" "V27" "V28" "V29" "V30" "V31" "V32" "V33" "V34" "V35" "V36" "V37" "V38" "V39"
2 | "1" 0.898371653273147 0.700255713010355 0.525371758267359 0.493483081784703 0.479794305974388 -0.168193760838336 0.457564161368761 0.387472726288257 0.366533471653081 -0.397712533701071 -0.412819211563902 -0.403162104224234 -0.391598006862012 0.160505779017826 -0.397474395358172 -0.366573604244055 -0.388969467656746 -0.376470789433401 -0.41782033093511 0.371710481815176 -0.323128564526515 -0.347327954758956 0.293464674324616 -0.31603451736632 0.295935094579746 -0.180573442611186 -0.101016809262888 0.0297001312294838 -0.220920626787251 -0.109912034733154 0.147679647753861 -0.0808501374404857 -0.199897338107552 -0.132301031889015 0.189674099277819 -0.084231483403504 -0.136815218930296 0.079954993980841 0.0763269801211523
3 | "2" 1.17850669951975 0.922917979522332 0.775111250568603 0.51544431851946 0.61849271694774 0.708710350044667 0.598566346331496 0.542474410165313 0.485011904246648 0.479440965862673 0.42956789512184 0.422269123623575 0.428641657689617 0.476151034497615 0.554450026348923 0.404918219307577 0.490699703280908 0.420725523089288 0.423291048139206 0.349564824608867 0.390771868446349 0.444724590999482 0.349093065452325 0.427284462274932 0.338276212911539 0.445145288122006 0.333203336269937 0.412599888090165 0.310567596541093 0.354448484192017 0.426469719370387 0.313739024288438 0.27950479406108 0.423946651564449 0.32272720918723 0.244146587721156 0.34092997551302 0.281767557818549 0.217444942061676
4 | "3" 1 2 3 4 5 24 6 7 9 36 38 37 34 13 35 31 33 32 39 8 29 30 11 28 10 25 20 17 27 21 14 18 26 22 12 19 23 15 16
5 |
--------------------------------------------------------------------------------
/tests/testthat/testdata/AP_coordinates/example2/AP_coordinates_genes.txt:
--------------------------------------------------------------------------------
1 | "V1" "V2" "V3" "V4" "V5" "V6" "V7" "V8" "V9" "V10" "V11" "V12" "V13" "V14" "V15" "V16" "V17" "V18" "V19" "V20" "V21" "V22" "V23" "V24" "V25" "V26" "V27" "V28" "V29" "V30" "V31" "V32" "V33" "V34" "V35" "V36" "V37" "V38" "V39"
2 | "1" 1.17484147858886 0.918251524726067 0.707352964776721 0.659334062031513 0.591111717263373 -0.202596679230447 0.566050278113453 0.515545901239115 0.51122370357559 -0.541832206823089 -0.528375373721855 -0.523093044060456 -0.516913310908317 0.179903144134825 -0.515622126246806 -0.495322587859203 -0.47617120932336 -0.486265584873309 -0.538012266428231 0.46660339350845 -0.415806421614723 -0.46359904965256 0.379581633138169 -0.419212344064118 0.375609622422642 -0.252645734099141 -0.129368951687118 0.0583393083734729 -0.29032497061507 -0.149448409679685 0.192689599496444 -0.108851566434673 -0.255731102123635 -0.176260871721386 0.213539414949807 -0.0826348047718997 -0.175778317867741 0.118643553602509 0.115239525292288
3 | "2" 0.892727782400543 0.696805849208181 0.607676905606093 0.209459617449081 0.500936560797782 0.689328293173901 0.467427881465475 0.382994747014415 0.308047942336173 0.263238202574069 0.251646208394254 0.25318026478414 0.25644196137184 0.45844562830569 0.426290375045695 0.2109356616869 0.380529288289148 0.281257079798341 0.234151041985703 0.166135088099633 0.277531027121789 0.315119718788519 0.232809016489976 0.314917636729549 0.230118794618802 0.394133471715775 0.31724996724156 0.382411643439824 0.239732963941796 0.329957129605282 0.399734524035285 0.268540654620658 0.226333867336864 0.386214534970047 0.265348050842096 0.182134958546392 0.308285912900536 0.236851843036183 0.182489665038921
4 | "3" 1 2 3 4 5 24 6 7 8 39 37 36 35 14 34 33 31 32 38 9 28 30 10 29 11 25 20 17 27 21 13 19 26 23 12 18 22 15 16
5 |
--------------------------------------------------------------------------------
/man/apl_ggplot.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/apl.R
3 | \name{apl_ggplot}
4 | \alias{apl_ggplot}
5 | \title{Plot Association Plot with ggplot}
6 | \usage{
7 | apl_ggplot(
8 | rows,
9 | rows_group = NULL,
10 | cols,
11 | cols_group = NULL,
12 | rows_scored = NULL,
13 | rows_color = "#0066FF",
14 | rows_high_color = "#FF0000",
15 | cols_color = "#601A4A",
16 | cols_high_color = "#EE442F",
17 | score_color = "rainbow",
18 | row_labs = FALSE,
19 | col_labs = FALSE,
20 | show_score = FALSE,
21 | show_cols = FALSE,
22 | show_rows = TRUE
23 | )
24 | }
25 | \arguments{
26 | \item{rows}{Row APL-coordinates}
27 |
28 | \item{rows_group}{Row AP-coordinates to highlight}
29 |
30 | \item{cols}{Column AP-coordinates}
31 |
32 | \item{cols_group}{Column AP-coordinates for the group to be highlighted.}
33 |
34 | \item{rows_scored}{Row AP-coordinates of rows above a score cutoff.}
35 |
36 | \item{rows_color}{Color for rows}
37 |
38 | \item{rows_high_color}{Color for rows to be highlighted.}
39 |
40 | \item{cols_color}{Column points color.}
41 |
42 | \item{cols_high_color}{Color for column points to be highlighted..}
43 |
44 | \item{score_color}{Color scheme for row points with a score.}
45 |
46 | \item{row_labs}{Logical. Whether labels for rows indicated by rows_idx
47 | should be labeled with text. Default TRUE.}
48 |
49 | \item{col_labs}{Logical. Whether labels for columns indicated by cols_idx
50 | shouls be labeled with text. Default FALSE.}
51 |
52 | \item{show_score}{Logical. Whether the S-alpha score should be shown in
53 | the plot.}
54 |
55 | \item{show_cols}{Logical. Whether column points should be plotted.}
56 |
57 | \item{show_rows}{Logical. Whether row points should be plotted.}
58 | }
59 | \value{
60 | ggplot Association Plot
61 | }
62 | \description{
63 | Uses ggplot to plot an Association Plot
64 | }
65 |
--------------------------------------------------------------------------------
/man/apl_plotly.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/apl.R
3 | \name{apl_plotly}
4 | \alias{apl_plotly}
5 | \title{Plot Association Plot with plotly}
6 | \usage{
7 | apl_plotly(
8 | rows,
9 | rows_group = NULL,
10 | cols,
11 | cols_group,
12 | rows_scored = NULL,
13 | rows_color = "#0066FF",
14 | rows_high_color = "#FF0000",
15 | cols_color = "#601A4A",
16 | cols_high_color = "#EE442F",
17 | score_color = "rainbow",
18 | row_labs = FALSE,
19 | col_labs = FALSE,
20 | show_score = FALSE,
21 | show_cols = FALSE,
22 | show_rows = TRUE
23 | )
24 | }
25 | \arguments{
26 | \item{rows}{Row APL-coordinates}
27 |
28 | \item{rows_group}{Row AP-coordinates to highlight}
29 |
30 | \item{cols}{Column AP-coordinates}
31 |
32 | \item{cols_group}{Column AP-coordinates for the group to be highlighted.}
33 |
34 | \item{rows_scored}{Row AP-coordinates of rows above a score cutoff.}
35 |
36 | \item{rows_color}{Color for rows}
37 |
38 | \item{rows_high_color}{Color for rows to be highlighted.}
39 |
40 | \item{cols_color}{Column points color.}
41 |
42 | \item{cols_high_color}{Color for column points to be highlighted.}
43 |
44 | \item{score_color}{Color scheme for row points with a score.}
45 |
46 | \item{row_labs}{Logical. Whether labels for rows indicated by rows_idx
47 | should be labeled with text. Default TRUE.}
48 |
49 | \item{col_labs}{Logical. Whether labels for columns indicated by cols_idx
50 | shouls be labeled with text. Default FALSE.}
51 |
52 | \item{show_score}{Logical. Whether the S-alpha score should be shown in
53 | the plot.}
54 |
55 | \item{show_cols}{Logical. Whether column points should be plotted.}
56 |
57 | \item{show_rows}{Logical. Whether row points should be plotted.}
58 | }
59 | \value{
60 | Interactive plotly Association Plot
61 | }
62 | \description{
63 | Uses plotly to generate an interactive Association Plot
64 | }
65 |
--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml:
--------------------------------------------------------------------------------
1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
3 | on:
4 | push:
5 | branches: [main, master]
6 | pull_request:
7 | branches: [main, master]
8 | release:
9 | types: [published]
10 | workflow_dispatch:
11 |
12 | name: pkgdown
13 |
14 | jobs:
15 | pkgdown:
16 | runs-on: ubuntu-latest
17 | # Only restrict concurrency for non-PR jobs
18 | concurrency:
19 | group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
20 | env:
21 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
22 | permissions:
23 | contents: write
24 | steps:
25 | - uses: actions/checkout@v3
26 |
27 | - uses: r-lib/actions/setup-pandoc@v2
28 |
29 | - uses: r-lib/actions/setup-r@v2
30 | with:
31 | use-public-rspm: true
32 |
33 | - uses: r-lib/actions/setup-r-dependencies@v2
34 | with:
35 | extra-packages:
36 | any::pkgdown
37 | local::.
38 | any::remotes
39 | needs: website
40 |
41 | - name: Install Miniconda
42 | run: |
43 | Rscript -e "remotes::install_github('rstudio/reticulate')"
44 | Rscript -e "reticulate::install_miniconda()"
45 |
46 | - name: Install Python dependencies
47 | run: |
48 | Rscript -e "reticulate::conda_create('r-reticulate', packages = c('python==3.6.11', 'numpy', 'pytorch'))"
49 |
50 | - name: Build site
51 | run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
52 | shell: Rscript {0}
53 |
54 | - name: Deploy to GitHub pages 🚀
55 | if: github.event_name != 'pull_request'
56 | uses: JamesIves/github-pages-deploy-action@v4.4.1
57 | with:
58 | clean: false
59 | branch: gh-pages
60 | folder: docs
61 |
--------------------------------------------------------------------------------
/man/ca_coords.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CA.R
3 | \name{ca_coords}
4 | \alias{ca_coords}
5 | \title{Calculate correspondence analysis row and column coordinates.}
6 | \usage{
7 | ca_coords(caobj, dims = NULL, princ_coords = 3, princ_only = FALSE)
8 | }
9 | \arguments{
10 | \item{caobj}{A "cacomp" object as outputted from `cacomp()`.}
11 |
12 | \item{dims}{Integer indicating the number of dimensions to use for the
13 | calculation of coordinates.
14 | All elements of caobj (where applicable) will be reduced to the given
15 | number of dimensions. Default NULL (keeps all dimensions).}
16 |
17 | \item{princ_coords}{Integer. Number indicating whether principal
18 | coordinates should be calculated for the rows (=1), columns (=2), both (=3)
19 | or none (=0).
20 | Default 3.}
21 |
22 | \item{princ_only}{Logical, whether only principal coordinates should be
23 | calculated.
24 | Or, in other words, whether the standardized coordinates are already
25 | calculated and stored in `caobj`. Default `FALSE`.}
26 | }
27 | \value{
28 | Returns input object with coordinates added.
29 | std_coords_rows/std_coords_cols: Standardized coordinates of rows/columns.
30 | prin_coords_rows/prin_coords_cols: Principal coordinates of rows/columns.
31 | }
32 | \description{
33 | `ca_coords` calculates the standardized and principal
34 | coordinates of the rows and columns in CA space.
35 | }
36 | \details{
37 | Takes a "cacomp" object and calculates standardized and principal
38 | coordinates for the visualization of CA results in a biplot or
39 | to subsequently calculate coordinates in an Association Plot.
40 | }
41 | \examples{
42 | # Simulate scRNAseq data.
43 | cnts <- data.frame(cell_1 = rpois(10, 5),
44 | cell_2 = rpois(10, 10),
45 | cell_3 = rpois(10, 20))
46 | rownames(cnts) <- paste0("gene_", 1:10)
47 | cnts <- as.matrix(cnts)
48 |
49 | # Run correspondence analysis.
50 | ca <- cacomp(obj = cnts, princ_coords = 1)
51 | ca <- ca_coords(ca, princ_coords = 3)
52 | }
53 |
--------------------------------------------------------------------------------
/man/elbow_method.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CA.R
3 | \name{elbow_method}
4 | \alias{elbow_method}
5 | \title{Runs elbow method}
6 | \usage{
7 | elbow_method(obj, mat, reps, python = FALSE, return_plot = FALSE)
8 | }
9 | \arguments{
10 | \item{obj}{A "cacomp" object as outputted from `cacomp()`}
11 |
12 | \item{mat}{A numeric matrix. For sequencing a count matrix, gene expression
13 | values with genes in rows and samples/cells in columns.
14 | Should contain row and column names.}
15 |
16 | \item{reps}{Integer. Number of permutations to perform when choosing
17 | "elbow_rule".}
18 |
19 | \item{python}{A logical value indicating whether to use singular value
20 | decomposition from the python package torch.
21 | This implementation dramatically speeds up computation compared to `svd()`
22 | in R.}
23 |
24 | \item{return_plot}{TRUE/FALSE. Whether a plot should be returned when
25 | choosing "elbow_rule".}
26 | }
27 | \value{
28 | `elbow_method` (for `return_plot=TRUE`) returns a list with two elements:
29 | "dims" contains the number of dimensions and "plot" a ggplot. if
30 | `return_plot=TRUE` it just returns the number of picked dimensions.
31 | }
32 | \description{
33 | Helper function for pick_dims() to run the elbow method.
34 | }
35 | \examples{
36 |
37 | # Get example data from Seurat
38 | library(SeuratObject)
39 | set.seed(2358)
40 | cnts <- as.matrix(SeuratObject::LayerData(pbmc_small,
41 | assay = "RNA",
42 | layer = "data"))
43 | # Run correspondence analysis.
44 | ca <- cacomp(obj = cnts)
45 |
46 | # pick dimensions with the elbow rule. Returns list.
47 | pd <- pick_dims(obj = ca,
48 | mat = cnts,
49 | method = "elbow_rule",
50 | return_plot = TRUE,
51 | reps = 10)
52 | pd$plot
53 | ca_sub <- subset_dims(ca, dims = pd$dims)
54 |
55 | }
56 | \references{
57 | Ciampi, Antonio, González Marcos, Ana and Castejón Limas, Manuel. \cr
58 | Correspondence analysis and 2-way clustering. (2005), SORT 29(1).
59 | }
60 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: APL
2 | Type: Package
3 | Title: Association Plots
4 | Version: 1.10.2
5 | Authors@R:
6 | c(person(given = "Clemens",
7 | family = "Kohl",
8 | role = c("cre", "aut"),
9 | email = "kohl.clemens@gmail.com"),
10 | person(given = "Elzbieta",
11 | family = "Gralinska",
12 | role = c("aut"),
13 | email = "gralinska@molgen.mpg.de"),
14 | person(given = "Martin",
15 | family = "Vingron",
16 | role = c("aut"),
17 | email = "vingron@molgen.mpg.de"))
18 | Description: APL is a package developed for computation of Association Plots
19 | (AP), a method for visualization and analysis of single cell transcriptomics
20 | data. The main focus of APL is the identification of genes characteristic for
21 | individual clusters of cells from input data. The package performs
22 | correspondence analysis (CA) and allows to identify cluster-specific
23 | genes using Association Plots. Additionally, APL computes the
24 | cluster-specificity scores for all genes which allows to rank the genes by
25 | their specificity for a selected cell cluster of interest.
26 | biocViews:
27 | StatisticalMethod,
28 | DimensionReduction,
29 | SingleCell,
30 | Sequencing,
31 | RNASeq,
32 | GeneExpression
33 | License: GPL (>= 3)
34 | Encoding: UTF-8
35 | RoxygenNote: 7.3.2
36 | VignetteBuilder: knitr
37 | Imports:
38 | Matrix,
39 | RSpectra,
40 | ggrepel,
41 | ggplot2,
42 | viridisLite,
43 | plotly,
44 | SeuratObject,
45 | SingleCellExperiment,
46 | magrittr,
47 | SummarizedExperiment,
48 | topGO,
49 | methods,
50 | stats,
51 | utils,
52 | org.Hs.eg.db,
53 | org.Mm.eg.db,
54 | rlang
55 | Depends: R (>= 4.4.0)
56 | Suggests:
57 | BiocStyle,
58 | knitr,
59 | rmarkdown,
60 | scRNAseq,
61 | scater,
62 | scran,
63 | sparseMatrixStats,
64 | testthat
65 | Config/testthat/edition: 3
66 | Collate:
67 | 'constructor.R'
68 | 'CA.R'
69 | 'apl.R'
70 | 'convert.R'
71 | 'generic_methods.R'
72 | 'import_packages.R'
73 | 'plot.R'
74 | 'utils-pipe.R'
75 | URL: https://vingronlab.github.io/APL/
76 |
--------------------------------------------------------------------------------
/man/apl_coords.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/apl.R
3 | \name{apl_coords}
4 | \alias{apl_coords}
5 | \title{Calculate Association Plot coordinates}
6 | \usage{
7 | apl_coords(caobj, group, calc_rows = TRUE, calc_cols = TRUE)
8 | }
9 | \arguments{
10 | \item{caobj}{A "cacomp" object with principal row coordinates and
11 | standardized column coordinates calculated.}
12 |
13 | \item{group}{Numeric/Character. Vector of indices or column names of
14 | the columns to calculate centroid/x-axis direction.}
15 |
16 | \item{calc_rows}{TRUE/FALSE. Whether apl row coordinates should
17 | be calculated. Default TRUE.}
18 |
19 | \item{calc_cols}{TRUE/FALSE. Whether apl column coordinates should
20 | be calculated. Default TRUE.}
21 | }
22 | \value{
23 | Returns input "cacomp" object and adds components "apl_rows" and/or
24 | "apl_cols" for row and column coordinates.
25 | In "group" the indices of the columns used to calculate the
26 | centroid are saved.
27 | }
28 | \description{
29 | Calculates the Association Plot coordinates for either the rows,
30 | columns or both (default).
31 | }
32 | \details{
33 | Coordinates (x,y) of row vector \eqn{\vec{r}} are defined as
34 | \deqn{x(\vec{r}) := \left|\vec{r}\right|\cos(\phi(\vec{r}))}
35 | \deqn{y(\vec{r}) := \left|\vec{r}\right|\sin(\phi(\vec{r}))}
36 | The x-direction is determined by calculating the centroid of the columns
37 | selected with the indices in "group".
38 | }
39 | \examples{
40 | set.seed(1234)
41 | # Simulate scRNAseq data
42 | cnts <- data.frame(cell_1 = rpois(10, 5),
43 | cell_2 = rpois(10, 10),
44 | cell_3 = rpois(10, 20),
45 | cell_4 = rpois(10, 20))
46 | rownames(cnts) <- paste0("gene_", 1:10)
47 | cnts <- as.matrix(cnts)
48 |
49 | # Run correspondence analysis
50 | ca <- cacomp(obj = cnts, princ_coords = 3, dims = 3)
51 | # Calculate APL coordinates
52 | ca <- apl_coords(ca, group = 3:4)
53 | }
54 | \references{
55 | Association Plots: Visualizing associations in high-dimensional
56 | correspondence analysis biplots
57 | Elzbieta Gralinska, Martin Vingron
58 | bioRxiv 2020.10.23.352096; doi: https://doi.org/10.1101/2020.10.23.352096
59 | }
60 |
--------------------------------------------------------------------------------
/tests/testthat/test-convert.R:
--------------------------------------------------------------------------------
1 | # context("test conversion to and from cacomp")
2 |
3 | load("./testdata/smoke.rda")
4 | load("./testdata/smoke_scRNAseq.rda")
5 | set.seed(2358)
6 |
7 | d <- min(nrow(smoke), ncol(smoke)) - 1
8 | ca <- cacomp(smoke, top = nrow(smoke), dims = d, princ_coords = 3)
9 |
10 | test_that("check recompute function", {
11 |
12 | calist <- APL::as.list(ca)
13 |
14 | calist_sub <- calist[c("D",
15 | "std_coords_cols",
16 | "std_coords_rows",
17 | "params")]
18 | expect_equal(recompute(calist_sub, smoke), ca)
19 |
20 | calist_sub <- calist[c("std_coords_cols",
21 | "std_coords_rows",
22 | "prin_coords_rows",
23 | "params")]
24 | expect_equal(recompute(calist_sub, smoke), ca)
25 |
26 | calist_sub <- calist[c("V",
27 | "U",
28 | "D",
29 | "params")]
30 | expect_equal(recompute(calist_sub, smoke), ca)
31 |
32 | calist_sub <- calist[c("std_coords_rows",
33 | "V",
34 | "params")]
35 | expect_equal(recompute(calist_sub, smoke), ca)
36 |
37 | calist_sub <- calist[c("std_coords_cols",
38 | "std_coords_rows",
39 | "prin_coords_rows",
40 | "params")]
41 | expect_error(recompute(calist_sub, smoke[1:3, ]), "mat does not have have the correct number of rows.")
42 | expect_error(recompute(calist_sub, smoke[, 1:3]), "mat does not have have the correct number of columns.")
43 |
44 | })
45 |
46 | # d <- min(nrow(smoke), ncol(smoke)) - 1
47 | # seu <- SeuratObject::CreateSeuratObject(smoke)
48 | # seu <- cacomp(seu,
49 | # princ_coords = 3,
50 | # return_input = TRUE,
51 | # dims = d,
52 | # assay = "RNA",
53 | # slot = "counts")
54 | #
55 | # sce <- SingleCellExperiment::SingleCellExperiment(list(counts = smoke))
56 | # sce <- cacomp(
57 | # sce,
58 | # dims = 3,
59 | # princ_coords = 3,
60 | # return_input = TRUE,
61 | # assay = "counts"
62 | # )
63 | # save(seu, sce, file = "./tests/testthat/testdata/smoke_scRNAseq.rda")
64 |
65 | test_that("check Seurat integration", {
66 | expect_equal(as.cacomp(seu, assay = "RNA", slot = "counts"), ca)
67 | })
68 |
69 | test_that("check SingleCellExperiment integration", {
70 | expect_equal(as.cacomp(sce, assay = "counts"), ca)
71 | })
72 |
--------------------------------------------------------------------------------
/tests/testthat/test-CA.R:
--------------------------------------------------------------------------------
1 | # context("Correspondence Analysis")
2 |
3 |
4 | # library(ca)
5 | # data(smoke)
6 | #
7 | # smoke_ca <- ca(smoke)
8 | #
9 | # smoke_prin <- cacoord(smoke_ca,
10 | # type = c("principal"),
11 | # dim = NA,
12 | # rows = TRUE,
13 | # cols = TRUE)
14 | #
15 | # smoke <- as.matrix(smoke)
16 | # save(smoke, smoke_ca, smoke_prin, file = "./tests/testthat/testdata/smoke.rda")
17 |
18 | load("./testdata/smoke.rda")
19 | d <- min(nrow(smoke), ncol(smoke)) - 1
20 | # suppressWarnings(ca_python <- cacomp(obj = smoke, top = nrow(smoke), dims = d, princ_coords = 3, coords = TRUE, python = TRUE))
21 | ca_svd <- cacomp(obj = smoke, top = nrow(smoke), dims = d, princ_coords = 3, coords = TRUE, python = FALSE)
22 | cac <- ca_coords(ca_svd, princ_coords = 3)
23 |
24 |
25 | # test_that("CA with torch svd results", {
26 | #
27 | # expect_equal(ca_python@dims, length(smoke_ca$sv))
28 | #
29 | # expect_equal(as.numeric(ca_python@D), smoke_ca$sv)
30 | # expect_equal(ca_python@std_coords_cols, smoke_ca$colcoord)
31 | # expect_equal(ca_python@std_coords_rows, smoke_ca$rowcoord)
32 | #
33 | # expect_equal(ca_python@prin_coords_cols, smoke_prin$columns)
34 | # expect_equal(ca_python@prin_coords_rows, smoke_prin$rows)
35 | #
36 | # expect_equal(as.numeric(ca_python@row_masses), smoke_ca$rowmass)
37 | # expect_equal(as.numeric(ca_python@row_inertia), smoke_ca$rowinertia)
38 | #
39 | # expect_equal(as.numeric(ca_python@col_masses), smoke_ca$colmass)
40 | # expect_equal(as.numeric(ca_python@col_inertia), smoke_ca$colinertia)
41 | #
42 | # })
43 |
44 |
45 | test_that("CA with R svd results", {
46 | expect_equal(ca_svd@dims, length(smoke_ca$sv))
47 |
48 | expect_equal(abs(as.numeric(ca_svd@D)), abs(smoke_ca$sv))
49 | expect_equal(abs(ca_svd@std_coords_cols), abs(smoke_ca$colcoord))
50 | expect_equal(abs(ca_svd@std_coords_rows), abs(smoke_ca$rowcoord))
51 |
52 | expect_equal(abs(ca_svd@prin_coords_cols), abs(smoke_prin$columns))
53 | expect_equal(abs(ca_svd@prin_coords_rows), abs(smoke_prin$rows))
54 |
55 | expect_equal(as.numeric(ca_svd@row_masses), smoke_ca$rowmass)
56 | expect_equal(as.numeric(ca_svd@row_inertia), smoke_ca$rowinertia)
57 |
58 | expect_equal(as.numeric(ca_svd@col_masses), smoke_ca$colmass)
59 | expect_equal(as.numeric(ca_svd@col_inertia), smoke_ca$colinertia)
60 |
61 | })
62 |
63 | test_that("CA coord function", {
64 |
65 | expect_equal(abs(cac@std_coords_cols), abs(smoke_ca$colcoord))
66 | expect_equal(abs(cac@std_coords_rows), abs(smoke_ca$rowcoord))
67 |
68 | expect_equal(abs(cac@prin_coords_cols), abs(smoke_prin$columns))
69 | expect_equal(abs(cac@prin_coords_rows), abs(smoke_prin$rows))
70 |
71 | })
72 |
73 | # cacomp test for 2x2 matrix (--> only 1 dim --> error). Error handling!
74 |
--------------------------------------------------------------------------------
/man/apl_topGO.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/apl.R
3 | \name{apl_topGO}
4 | \alias{apl_topGO}
5 | \title{Run Gene overrepresentation analysis with topGO}
6 | \usage{
7 | apl_topGO(
8 | caobj,
9 | ontology,
10 | organism = "hs",
11 | ngenes = 1000,
12 | score_cutoff = 0,
13 | use_coords = FALSE,
14 | return_plot = FALSE,
15 | top_res = 15
16 | )
17 | }
18 | \arguments{
19 | \item{caobj}{A "cacomp" object with principal row coordinates and
20 | standardized column coordinates calculated.}
21 |
22 | \item{ontology}{Character string. Chooses GO sets for 'BP'
23 | (biological processes), 'CC' (cell compartment) or 'MF' (molecular function).}
24 |
25 | \item{organism}{Character string. Either 'hs' (homo sapiens), 'mm'
26 | (mus musculus) or the name of the organism package such as 'org.*.eg.db'.}
27 |
28 | \item{ngenes}{Numeric. Number of top ranked genes to test for
29 | overrepresentation.}
30 |
31 | \item{score_cutoff}{numeric. S-alpha score cutoff. Only genes with a score
32 | larger will be tested.}
33 |
34 | \item{use_coords}{Logical. Whether the x-coordinates of the row APL
35 | coordinates should be used for ranking.
36 | Only recommended when no S-alpha score (see apl_score()) can be calculated.}
37 |
38 | \item{return_plot}{Logical. Whether a plot of significant gene sets should
39 | be additionally returned.}
40 |
41 | \item{top_res}{Numeric. Number of top scoring genes to plot.}
42 | }
43 | \value{
44 | A data.frame containing the gene sets with the highest overrepresentation.
45 | }
46 | \description{
47 | This function uses the Kolmogorov-Smirnov test as implemented by the package
48 | topGO to test for overrepresentation in Gene Ontology gene sets.
49 | }
50 | \details{
51 | For a chosen group of cells/samples,
52 | the top 'ngenes' group specific genes are used for gene overrepresentation
53 | analysis.
54 | The genes are ranked either by the precomputed APL score, or, if
55 | not available by their APL x-coordinates.
56 | }
57 | \examples{
58 | library(SeuratObject)
59 | set.seed(1234)
60 | cnts <- SeuratObject::LayerData(pbmc_small, assay = "RNA", layer = "counts")
61 | cnts <- as.matrix(cnts)
62 |
63 | # Run CA on example from Seurat
64 |
65 | ca <- cacomp(pbmc_small,
66 | princ_coords = 3,
67 | return_input = FALSE,
68 | assay = "RNA",
69 | slot = "counts")
70 |
71 | grp <- which(Idents(pbmc_small) == 2)
72 | ca <- apl_coords(ca, group = grp)
73 | ca <- apl_score(ca,
74 | mat = cnts)
75 |
76 | enr <- apl_topGO(ca,
77 | ontology = "BP",
78 | organism = "hs")
79 |
80 | plot_enrichment(enr)
81 | }
82 | \references{
83 | Adrian Alexa and Jorg Rahnenfuhrer \cr
84 | topGO: Enrichment Analysis for Gene Ontology. \cr
85 | R package version 2.42.0.
86 | }
87 |
--------------------------------------------------------------------------------
/man/apl.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/apl.R
3 | \name{apl}
4 | \alias{apl}
5 | \title{Association Plot}
6 | \usage{
7 | apl(
8 | caobj,
9 | type = "ggplot",
10 | rows_idx = NULL,
11 | cols_idx = caobj@group,
12 | row_labs = FALSE,
13 | col_labs = FALSE,
14 | show_score = FALSE,
15 | show_cols = FALSE,
16 | show_rows = TRUE,
17 | score_cutoff = 0,
18 | score_color = "rainbow"
19 | )
20 | }
21 | \arguments{
22 | \item{caobj}{An object of class "cacomp" and "APL" with apl
23 | coordinates calculated.}
24 |
25 | \item{type}{"ggplot"/"plotly". For a static plot a string "ggplot",
26 | for an interactive plot "plotly". Default "ggplot".}
27 |
28 | \item{rows_idx}{numeric/character vector.
29 | Indices or names of the rows that should be labelled. Default NULL.}
30 |
31 | \item{cols_idx}{numeric/character vector.
32 | Indices or names of the columns that should be labelled.
33 | Default is only to label columns making up the centroid: caobj@group.}
34 |
35 | \item{row_labs}{Logical. Whether labels for rows indicated by rows_idx
36 | should be labeled with text. Default TRUE.}
37 |
38 | \item{col_labs}{Logical. Whether labels for columns indicated by cols_idx
39 | shouls be labeled with text. Default FALSE.}
40 |
41 | \item{show_score}{Logical. Whether the S-alpha score should be shown in
42 | the plot.}
43 |
44 | \item{show_cols}{Logical. Whether column points should be plotted.}
45 |
46 | \item{show_rows}{Logical. Whether row points should be plotted.}
47 |
48 | \item{score_cutoff}{Numeric. Rows (genes) with a score >= score_cutoff will
49 | be colored according to their score if show_score = TRUE.}
50 |
51 | \item{score_color}{Either "rainbow" or "viridis".}
52 | }
53 | \value{
54 | Either a ggplot or plotly object.
55 | }
56 | \description{
57 | Plot an Association Plot for the chosen columns.
58 | }
59 | \details{
60 | For an interactive plot type="plotly" can be chosen, otherwise a static plot
61 | will be returned.
62 | The row and column coordinates have to be already calculated by
63 | `apl_coords()`.
64 | }
65 | \examples{
66 | set.seed(1234)
67 |
68 | # Simulate counts
69 | cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
70 | x = sample(1:100, 50, replace = TRUE))
71 | rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
72 | colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
73 |
74 | # Run correspondence analysis
75 | ca <- cacomp(obj = cnts, princ_coords = 3)
76 |
77 | # Calculate APL coordinates for arbitrary group
78 | ca <- apl_coords(ca, group = 1:10)
79 |
80 | # plot results
81 | # Note:
82 | # Due to random gene expression & group, no highly
83 | # associated genes are visible.
84 | apl(ca, type = "ggplot")
85 | }
86 | \references{
87 | Association Plots: Visualizing associations in high-dimensional
88 | correspondence analysis biplots \cr
89 | Elzbieta Gralinska, Martin Vingron \cr
90 | bioRxiv 2020.10.23.352096; doi: https://doi.org/10.1101/2020.10.23.352096 \cr
91 | }
92 |
--------------------------------------------------------------------------------
/tests/testthat/testdata/AP_coordinates/example2/gene_ranking.txt:
--------------------------------------------------------------------------------
1 | "Gene_name" "x-coordinate" "y-coordinate" "Gene_score"
2 | "14" "Burkina Faso" 0.659334062031513 0.209459617449081 0.0932180194135293
3 | "13" "Benin" 0.46660339350845 0.166135088099633 0.0175825284826763
4 | "35" "Cote d'Ivoire" 0.375609622422642 0.230118794618802 -0.246342918731435
5 | "34" "China" 0.379581633138169 0.232809016489976 -0.249641893125213
6 | "36" "Cameroon" 0.51122370357559 0.308047942336173 -0.321351492425428
7 | "22" "Bolivia" 0.115239525292288 0.182489665038921 -0.37798359928269
8 | "38" "Congo, Rep." 0.213539414949807 0.265348050842096 -0.50362884062404
9 | "15" "Bangladesh" 0.515545901239115 0.382994747014415 -0.519591435321639
10 | "10" "Azerbaijan" 0.118643553602509 0.236851843036183 -0.521506703995547
11 | "4" "Arab World" -0.0826348047718997 0.182134958546392 -0.574899247963387
12 | "1" "Afghanistan" 0.566050278113453 0.467427881465475 -0.69728833429889
13 | "26" "Bhutan" 0.591111717263373 0.500936560797782 -0.762792328085333
14 | "39" "Colombia" -0.108851566434673 0.268540654620658 -0.834648617567197
15 | "23" "Brazil" -0.255731102123635 0.226333867336864 -0.867453948024074
16 | "3" "Albania" 0.192689599496444 0.399734524035285 -0.887691094408197
17 | "37" "Congo, Dem. Rep." 0.707352964776721 0.607676905606093 -0.935043069273576
18 | "33" "Chile" -0.29032497061507 0.239732963941796 -0.938262164832479
19 | "28" "Central African Republic" 0.918251524726067 0.696805849208181 -0.965037362320114
20 | "7" "Armenia" 0.0583393083734729 0.382411643439824 -0.975222047613535
21 | "21" "Belize" -0.129368951687118 0.31724996724156 -0.986814879273313
22 | "30" "Central Europe and the Baltics" -0.175778317867741 0.308285912900536 -1.00899668774541
23 | "27" "Botswana" -0.149448409679685 0.329957129605282 -1.04123856341249
24 | "2" "Angola" 0.179903144134825 0.45844562830569 -1.05915872379313
25 | "24" "Barbados" -0.495322587859203 0.2109356616869 -1.06542800242783
26 | "32" "Channel Islands" -0.415806421614723 0.277531027121789 -1.16590216174509
27 | "18" "Bahamas, The" -0.538012266428231 0.234151041985703 -1.17086294608751
28 | "12" "Belgium" -0.523093044060456 0.25318026478414 -1.20737487038153
29 | "6" "Argentina" -0.528375373721855 0.251646208394254 -1.2085110360082
30 | "29" "Canada" -0.516913310908317 0.25644196137184 -1.21001067306841
31 | "20" "Belarus" -0.176260871721386 0.386214534970047 -1.2201004757536
32 | "11" "Burundi" 1.17484147858886 0.892727782400543 -1.23797453560338
33 | "8" "Australia" -0.486265584873309 0.281257079798341 -1.24643189718138
34 | "25" "Brunei Darussalam" -0.541832206823089 0.263238202574069 -1.25329807944329
35 | "9" "Austria" -0.419212344064118 0.314917636729549 -1.27035457580852
36 | "31" "Switzerland" -0.46359904965256 0.315119718788519 -1.31528745777646
37 | "16" "Bulgaria" -0.252645734099141 0.394133471715775 -1.31788820894164
38 | "5" "United Arab Emirates" -0.47617120932336 0.380529288289148 -1.5046450383604
39 | "17" "Bahrain" -0.515622126246806 0.426290375045695 -1.66777652749158
40 | "19" "Bosnia and Herzegovina" -0.202596679230447 0.689328293173901 -2.06567563520669
41 |
--------------------------------------------------------------------------------
/tests/testthat/testdata/AP_coordinates/example1/gene_ranking.txt:
--------------------------------------------------------------------------------
1 | "Gene_name" "x-coordinate" "y-coordinate" "Gene_score"
2 | "14" "Burkina Faso" 0.493483081784703 0.51544431851946 0.143971887195826
3 | "13" "Benin" 0.371710481815176 0.349564824608867 0.13467843966127
4 | "11" "Burundi" 0.898371653273147 1.17850669951975 0.0992527774200379
5 | "28" "Central African Republic" 0.700255713010355 0.922917979522332 0.0744457972915453
6 | "35" "Cote d'Ivoire" 0.295935094579746 0.338276212911539 0.0665576060174207
7 | "26" "Bhutan" 0.479794305974388 0.61849271694774 0.0604083110606367
8 | "34" "China" 0.293464674324616 0.349093065452325 0.0567525214399028
9 | "1" "Afghanistan" 0.457564161368761 0.598566346331496 0.0516897900148708
10 | "36" "Cameroon" 0.366533471653081 0.485011904246648 0.0376578124354463
11 | "15" "Bangladesh" 0.387472726288257 0.542474410165313 0.0196330347472275
12 | "37" "Congo, Dem. Rep." 0.525371758267359 0.775111250568603 -0.000213740285928798
13 | "38" "Congo, Rep." 0.189674099277819 0.32272720918723 -0.0291599597207467
14 | "22" "Bolivia" 0.0763269801211523 0.217444942061676 -0.0711175424522714
15 | "10" "Azerbaijan" 0.079954993980841 0.281767557818549 -0.111105247040777
16 | "3" "Albania" 0.147679647753861 0.426469719370387 -0.141499873132641
17 | "2" "Angola" 0.160505779017826 0.476151034497615 -0.16236152379775
18 | "4" "Arab World" -0.084231483403504 0.244146587721156 -0.249781791095949
19 | "7" "Armenia" 0.0297001312294838 0.412599888090165 -0.25007456914523
20 | "39" "Colombia" -0.0808501374404857 0.313739024288438 -0.293589510498607
21 | "21" "Belize" -0.101016809262888 0.333203336269937 -0.32695449431724
22 | "27" "Botswana" -0.109912034733154 0.354448484192017 -0.350255576565193
23 | "30" "Central Europe and the Baltics" -0.136815218930296 0.34092997551302 -0.367992164201472
24 | "23" "Brazil" -0.199897338107552 0.27950479406108 -0.38942324993874
25 | "20" "Belarus" -0.132301031889015 0.423946651564449 -0.419769717266815
26 | "33" "Chile" -0.220920626787251 0.310567596541093 -0.43150952604888
27 | "16" "Bulgaria" -0.180573442611186 0.445145288122006 -0.482416446456921
28 | "32" "Channel Islands" -0.323128564526515 0.390771868446349 -0.588102175923988
29 | "9" "Austria" -0.31603451736632 0.427284462274932 -0.605766497084201
30 | "24" "Barbados" -0.366573604244055 0.404918219307577 -0.641139537859345
31 | "19" "Bosnia and Herzegovina" -0.168193760838336 0.708710350044667 -0.64875430280551
32 | "31" "Switzerland" -0.347327954758956 0.444724590999482 -0.648885693351841
33 | "8" "Australia" -0.376470789433401 0.420725523089288 -0.661755300069471
34 | "29" "Canada" -0.391598006862012 0.428641657689617 -0.682250270252899
35 | "12" "Belgium" -0.403162104224234 0.422269123623575 -0.689493295650689
36 | "6" "Argentina" -0.412819211563902 0.42956789512184 -0.704099535716694
37 | "18" "Bahamas, The" -0.41782033093511 0.423291048139206 -0.704844466381713
38 | "5" "United Arab Emirates" -0.388969467656746 0.490699703280908 -0.721701895420701
39 | "25" "Brunei Darussalam" -0.397712533701071 0.479440965862673 -0.722810664949909
40 | "17" "Bahrain" -0.397474395358172 0.554450026348923 -0.773434482774928
41 |
--------------------------------------------------------------------------------
/tests/testthat/test-apl.R:
--------------------------------------------------------------------------------
1 |
2 | # tab <- read.delim(file = "/home/kohl/PhD/gits/APL/tests/testthat/testdata/input_data.tsv")
3 | # mat <- as.matrix(tab[,-1])
4 | # rownames(mat) <- tab$Country.Name
5 | # save(mat, file = "/home/kohl/PhD/gits/APL/tests/testthat/testdata/countries.rda")
6 |
7 | load("./testdata/countries.rda")
8 |
9 | grp <- c(6, 7, 8, 10, 12)
10 |
11 |
12 | ca <- cacomp(mat, princ_coords = 3, dims = 19, top = 39)
13 | ca <- apl_coords(ca, group = grp)
14 |
15 |
16 |
17 | test_that("Example 1, 39 genes and 19 dimensions", {
18 |
19 | samples1 <- read.delim(file = "./testdata/AP_coordinates/example1/AP_coordinates_samples.txt")
20 | samples1 <- t(samples1)
21 | rownames(samples1) <- colnames(mat)
22 | colnames(samples1) <- c("x", "y")
23 |
24 | genes1 <- read.delim(file = "./testdata/AP_coordinates/example1/gene_ranking.txt")
25 | ord <- order(as.numeric(rownames(genes1)))
26 | rwnms <- rownames(mat)[as.numeric(rownames(genes1))[ord]]
27 |
28 | genes1_sort <- as.matrix(genes1[ord, c("x.coordinate","y.coordinate")])
29 | dimnames(genes1_sort) <- list(rwnms, c("x", "y"))
30 |
31 | ca <- cacomp(mat, princ_coords = 3, dims = 19, top = 39)
32 | ca <- apl_coords(ca, group = grp)
33 |
34 | expect_equal(ca@apl_cols, samples1, tolerance = 1e-6)
35 | expect_equal(ca@apl_rows, genes1_sort, tolerance = 1e-6)
36 |
37 | })
38 |
39 | test_that("Example 2, 39 genes and 4 dimensions",{
40 | samples2 <- read.delim(file = "./testdata/AP_coordinates/example2/AP_coordinates_samples.txt")
41 | samples2 <- t(samples2)
42 | rownames(samples2) <- colnames(mat)
43 | colnames(samples2) <- c("x", "y")
44 |
45 | genes2 <- read.delim(file = "./testdata/AP_coordinates/example2/gene_ranking.txt")
46 | ord <- order(as.numeric(rownames(genes2)))
47 | rwnms <- rownames(mat)[as.numeric(rownames(genes2))[ord]]
48 |
49 | genes2_sort <- as.matrix(genes2[ord, c("x.coordinate","y.coordinate")])
50 | dimnames(genes2_sort) <- list(rwnms, c("x", "y"))
51 |
52 | ca <- cacomp(mat, princ_coords = 3, dims = 4, top = 39)
53 | ca <- apl_coords(ca, group = grp)
54 |
55 | expect_equal(ca@apl_cols, samples2, tolerance = 1e-8)
56 | expect_equal(ca@apl_rows, genes2_sort, tolerance = 1e-8)
57 | })
58 |
59 |
60 | test_that("Example 3, 20 genes and 4 dimensions",{
61 | samples3 <- read.delim(file = "./testdata/AP_coordinates/example3/AP_coordinates_samples.txt")
62 | samples3 <- t(samples3)
63 | rownames(samples3) <- colnames(mat)
64 | colnames(samples3) <- c("x", "y")
65 |
66 | genes3 <- read.delim(file = "./testdata/AP_coordinates/example3/gene_ranking.txt")
67 | ord <- order(as.numeric(rownames(genes3)))
68 | rwnms <- rownames(mat)[as.numeric(rownames(genes3))[ord]]
69 |
70 | genes3_sort <- as.matrix(genes3[ord, c("x.coordinate","y.coordinate")])
71 | dimnames(genes3_sort) <- list(rwnms, c("x", "y"))
72 |
73 | ca <- cacomp(mat, princ_coords = 3, dims = 4, top = 20)
74 | ca <- apl_coords(ca, group = grp)
75 |
76 | expect_equal(ca@apl_cols, samples3, tolerance = 1e-8)
77 | expect_equal(ca@apl_rows[order(rownames(ca@apl_rows)),], genes3_sort[order(rownames(genes3_sort)),], tolerance = 1e-8)
78 | })
79 |
80 |
--------------------------------------------------------------------------------
/man/run_cacomp.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CA.R
3 | \name{run_cacomp}
4 | \alias{run_cacomp}
5 | \title{Internal function for `cacomp`}
6 | \usage{
7 | run_cacomp(
8 | obj,
9 | coords = TRUE,
10 | princ_coords = 3,
11 | python = FALSE,
12 | dims = 100,
13 | top = 5000,
14 | inertia = TRUE,
15 | rm_zeros = TRUE,
16 | residuals = "pearson",
17 | cutoff = NULL,
18 | clip = FALSE,
19 | ...
20 | )
21 | }
22 | \arguments{
23 | \item{obj}{A numeric matrix or Seurat/SingleCellExperiment object. For
24 | sequencing a count matrix, gene expression values with genes in rows and
25 | samples/cells in columns.
26 | Should contain row and column names.}
27 |
28 | \item{coords}{Logical. Indicates whether CA standard coordinates should be
29 | calculated.}
30 |
31 | \item{princ_coords}{Integer. Number indicating whether principal
32 | coordinates should be calculated for the rows (=1), columns (=2),
33 | both (=3) or none (=0).}
34 |
35 | \item{python}{DEPRACTED. A logical value indicating whether to use singular-value
36 | decomposition from the python package torch.
37 | This implementation dramatically speeds up computation compared to `svd()`
38 | in R when calculating the full SVD. This parameter only works when dims==NULL
39 | or dims==rank(mat), where caculating a full SVD is demanded.}
40 |
41 | \item{dims}{Integer. Number of CA dimensions to retain. If NULL:
42 | (0.2 * min(nrow(A), ncol(A)) - 1 ).}
43 |
44 | \item{top}{Integer. Number of most variable rows to retain.
45 | Set NULL to keep all.}
46 |
47 | \item{inertia}{Logical. Whether total, row and column inertias should be
48 | calculated and returned.}
49 |
50 | \item{rm_zeros}{Logical. Whether rows & cols containing only 0s should be
51 | removed. Keeping zero only rows/cols might lead to unexpected results.}
52 |
53 | \item{residuals}{character string. Specifies which kind of residuals should
54 | be calculated. Can be "pearson" (default), "freemantukey" or "NB" for
55 | negative-binomial.}
56 |
57 | \item{cutoff}{numeric. Residuals that are larger than cutoff or lower than
58 | -cutoff are clipped to cutoff.}
59 |
60 | \item{clip}{logical. Whether residuals should be clipped if they are
61 | higher/lower than a specified cutoff}
62 |
63 | \item{...}{Arguments forwarded to methods.}
64 | }
65 | \value{
66 | Returns a named list of class "cacomp" with components
67 | U, V and D: The results from the SVD.
68 | row_masses and col_masses: Row and columns masses.
69 | top_rows: How many of the most variable rows/genes were retained for the
70 | analysis.
71 | tot_inertia, row_inertia and col_inertia: Only if inertia = TRUE. Total,
72 | row and column inertia respectively.
73 | }
74 | \description{
75 | `run_cacomp` performs correspondence analysis on a matrix and returns the
76 | transformed data.
77 | }
78 | \details{
79 | The calculation is performed according to the work of Michael Greenacre.
80 | When working with large matrices,
81 | CA coordinates and
82 | principal coordinates should only be computed when needed to save
83 | computational time.
84 | }
85 | \references{
86 | Greenacre, M. Correspondence Analysis in Practice, Third Edition, 2017.
87 | }
88 |
--------------------------------------------------------------------------------
/man/ca_3Dplot.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/plot.R
3 | \name{ca_3Dplot}
4 | \alias{ca_3Dplot}
5 | \alias{ca_3Dplot,cacomp-method}
6 | \alias{ca_3Dplot,Seurat-method}
7 | \alias{ca_3Dplot,SingleCellExperiment-method}
8 | \title{Plot of the first 3D CA projection of the data.}
9 | \usage{
10 | ca_3Dplot(
11 | obj,
12 | xdim = 1,
13 | ydim = 2,
14 | zdim = 3,
15 | princ_coords = 1,
16 | row_labels = NULL,
17 | col_labels = NULL,
18 | ...
19 | )
20 |
21 | \S4method{ca_3Dplot}{cacomp}(
22 | obj,
23 | xdim = 1,
24 | ydim = 2,
25 | zdim = 3,
26 | princ_coords = 1,
27 | row_labels = NULL,
28 | col_labels = NULL,
29 | ...
30 | )
31 |
32 | \S4method{ca_3Dplot}{Seurat}(
33 | obj,
34 | xdim = 1,
35 | ydim = 2,
36 | zdim = 3,
37 | princ_coords = 1,
38 | row_labels = NULL,
39 | col_labels = NULL,
40 | ...,
41 | assay = SeuratObject::DefaultAssay(obj),
42 | slot = "counts"
43 | )
44 |
45 | \S4method{ca_3Dplot}{SingleCellExperiment}(
46 | obj,
47 | xdim = 1,
48 | ydim = 2,
49 | zdim = 3,
50 | princ_coords = 1,
51 | row_labels = NULL,
52 | col_labels = NULL,
53 | ...,
54 | assay = "counts"
55 | )
56 | }
57 | \arguments{
58 | \item{obj}{An object of class "cacomp", or alternatively an object of
59 | class "Seurat" or "SingleCellExperiment" with a dim. reduction named "CA"
60 | saved.}
61 |
62 | \item{xdim}{Integer. The dimension for the x-axis. Default 1.}
63 |
64 | \item{ydim}{Integer. The dimension for the y-axis. Default 2.}
65 |
66 | \item{zdim}{Integer. The dimension for the z-axis. Default 3.}
67 |
68 | \item{princ_coords}{Integer. If 1 then principal coordinates are used for
69 | the rows, if 2 for the columns. Default 1 (rows).}
70 |
71 | \item{row_labels}{Numeric vector. Indices for the rows for which a label
72 | should be added (label should be stored in rownames). Default NULL.}
73 |
74 | \item{col_labels}{Numeric vector. Indices for the columns for which
75 | a label should be added (label should be stored in colnames).
76 | Default NULL (no columns).}
77 |
78 | \item{...}{Further arguments.}
79 |
80 | \item{assay}{SingleCellExperiment assay to obtain counts from.}
81 |
82 | \item{slot}{Seurat slot from assay to get count matrix from.}
83 | }
84 | \value{
85 | Plot of class "plotly".
86 | }
87 | \description{
88 | Plots the first 3 dimensions of the rows and columns in the same plot.
89 | }
90 | \details{
91 | Depending on whether `princ_coords` is set to 1 or 2 either the principal
92 | coordinates of either the rows (1) or the columns (2)
93 | are chosen. For the other the standardized coordinates are plotted
94 | (assymetric biplot).
95 | Labels for rows and columns should be stored in the row- and column
96 | names respectively.
97 | }
98 | \examples{
99 | # Simulate counts
100 | cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
101 | x = sample(1:100, 50, replace = TRUE))
102 | rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
103 | colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
104 |
105 | # Run correspondence analysis
106 | ca <- cacomp(obj = cnts, princ_coords = 3)
107 |
108 | ca_3Dplot(ca)
109 | }
110 |
--------------------------------------------------------------------------------
/man/cacomp-class.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/constructor.R
3 | \docType{class}
4 | \name{cacomp-class}
5 | \alias{cacomp-class}
6 | \alias{new_cacomp}
7 | \title{An S4 class that contains all elements needed for CA.}
8 | \usage{
9 | new_cacomp(...)
10 | }
11 | \arguments{
12 | \item{...}{slot names and objects for new cacomp object.}
13 | }
14 | \value{
15 | cacomp object
16 | }
17 | \description{
18 | This class contains elements necessary to computer CA coordinates or
19 | Association Plot coordinates,
20 | as well as other informative data such as row/column inertia,
21 | gene-wise APL-scores, etc. ...
22 |
23 | Creates new cacomp object.
24 | }
25 | \section{Slots}{
26 |
27 | \describe{
28 | \item{\code{U}}{class "matrix". Left singular vectors of the original input matrix.}
29 |
30 | \item{\code{V}}{class "matrix". Right singular vectors of the original input matrix.}
31 |
32 | \item{\code{D}}{class "numeric". Singular values of the original inpt matrix.}
33 |
34 | \item{\code{std_coords_rows}}{class "matrix". Standardized CA coordinates of the
35 | rows.}
36 |
37 | \item{\code{std_coords_cols}}{class "matrix". Standardized CA coordinates of the
38 | columns.}
39 |
40 | \item{\code{prin_coords_rows}}{class "matrix". Principal CA coordinates of the rows.}
41 |
42 | \item{\code{prin_coords_cols}}{class "matrix". Principal CA coordinates of the
43 | columns.}
44 |
45 | \item{\code{apl_rows}}{class "matrix". Association Plot coordinates of the rows
46 | for the direction defined in slot "group"}
47 |
48 | \item{\code{apl_cols}}{class "matrix". Association Plot coordinates of the columns
49 | for the direction defined in slot "group"}
50 |
51 | \item{\code{APL_score}}{class "data.frame". Contains rows sorted by the APL score.
52 | Columns: Rowname (gene name in the case of gene expression data),
53 | APL score calculated for the direction defined in slot "group",
54 | the original row number and the rank of the row as determined by the score.}
55 |
56 | \item{\code{dims}}{class "numeric". Number of dimensions in CA space.}
57 |
58 | \item{\code{group}}{class "numeric". Indices of the chosen columns for APL
59 | calculations.}
60 |
61 | \item{\code{row_masses}}{class "numeric". Row masses of the frequency table.}
62 |
63 | \item{\code{col_masses}}{class "numeric". Column masses of the frequency table.}
64 |
65 | \item{\code{top_rows}}{class "numeric". Number of most variable rows chosen.}
66 |
67 | \item{\code{tot_inertia}}{class "numeric". Total inertia in CA space.}
68 |
69 | \item{\code{row_inertia}}{class "numeric". Row-wise inertia in CA space.}
70 |
71 | \item{\code{col_inertia}}{class "numeric". Column-wise inertia in CA space.}
72 |
73 | \item{\code{permuted_data}}{class "list". Storage slot for permuted data.}
74 |
75 | \item{\code{params}}{class "list". List of parameters.}
76 | }}
77 |
78 | \examples{
79 | set.seed(1234)
80 |
81 | # Simulate counts
82 | cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
83 | x = sample(1:20, 50, replace = TRUE))
84 | rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
85 | colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
86 |
87 | res <- APL:::comp_std_residuals(mat=cnts)
88 | SVD <- svd(res$S)
89 | names(SVD) <- c("D", "U", "V")
90 | SVD <- SVD[c(2, 1, 3)]
91 |
92 | ca <- new_cacomp(U = SVD$U,
93 | V = SVD$V,
94 | D = SVD$D,
95 | row_masses = res$rowm,
96 | col_masses = res$colm)
97 | }
98 |
--------------------------------------------------------------------------------
/man/apl_score.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/apl.R
3 | \name{apl_score}
4 | \alias{apl_score}
5 | \title{Find rows most highly associated with a condition}
6 | \usage{
7 | apl_score(
8 | caobj,
9 | mat = NULL,
10 | dims = caobj@dims,
11 | group = caobj@group,
12 | reps = 10,
13 | quant = 0.99,
14 | python = FALSE,
15 | store_perm = TRUE,
16 | method = "permutation"
17 | )
18 | }
19 | \arguments{
20 | \item{caobj}{A "cacomp" object with principal row coordinates and
21 | standardized column coordinates calculated.}
22 |
23 | \item{mat}{A numeric matrix. For sequencing a count matrix, gene expression
24 | values with genes in rows and samples/cells in columns.
25 | Should contain row and column names.}
26 |
27 | \item{dims}{Integer. Number of CA dimensions to retain. Needs to be the same
28 | as in caobj!}
29 |
30 | \item{group}{Vector of indices of the columns to calculate centroid/x-axis
31 | direction.}
32 |
33 | \item{reps}{Integer. Number of permutations to perform.}
34 |
35 | \item{quant}{Numeric. Single number between 0 and 1 indicating the quantile
36 | used to calculate the cutoff. Default 0.99.}
37 |
38 | \item{python}{DEPRACTED. A logical value indicating whether to use singular-value
39 | decomposition from the python package torch.}
40 |
41 | \item{store_perm}{Logical. Whether permuted data should be stored in the CA
42 | object.
43 | This implementation dramatically speeds up computation compared to `svd()`
44 | in R.}
45 |
46 | \item{method}{Method to calculate the cutoff. Either "random" for random
47 | direction method or "permutation" for the permutation method.}
48 | }
49 | \value{
50 | Returns the input "cacomp" object with "APL_score" component added.
51 | APL_score contains a data frame with ranked rows, their score and their
52 | original row number.
53 | }
54 | \description{
55 | Ranks rows by a calculated score which balances the association of the row
56 | with the condition and how associated it is with other conditions.
57 | }
58 | \details{
59 | The score is calculated by permuting the values of each row to determine the
60 | cutoff angle of the 99% quantile.
61 | \deqn{S_{alpha}(x,y)=x-\frac{y}{\tan\alpha}}
62 | By default the permutation is repeated 10 times (for random direction min.
63 | 300 repetition is recommended!), but for very large matrices
64 | this can be reduced.
65 | The method "permutation" permutes the columns in each row and calculates
66 | AP-coordinates for each such permutation. The cutoff is then taken by the
67 | quantile specified by "quan". The "random" method in contrast calculates
68 | AP-coordinates for the original data, but by looking into random directions.
69 |
70 | If store_perm is TRUE the permuted data is stored in the cacomp object and
71 | can be used for future scoring.
72 | }
73 | \examples{
74 | set.seed(1234)
75 |
76 | # Simulate counts
77 | cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
78 | x = sample(1:20, 50, replace = TRUE))
79 | rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
80 | colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
81 |
82 | # Run correspondence analysis.
83 | ca <- cacomp(obj = cnts, princ_coords = 3)
84 |
85 | # Calculate APL coordinates:
86 | ca <- apl_coords(ca, group = 1:10)
87 |
88 | # Rank genes by S-alpha score
89 | ca <- apl_score(ca, mat = cnts)
90 | }
91 | \references{
92 | Association Plots: Visualizing associations in high-dimensional
93 | correspondence analysis biplots \cr
94 | Elzbieta Gralinska, Martin Vingron \cr
95 | bioRxiv 2020.10.23.352096; doi: https://doi.org/10.1101/2020.10.23.352096
96 | }
97 |
--------------------------------------------------------------------------------
/R/generic_methods.R:
--------------------------------------------------------------------------------
1 | #' @include constructor.R
2 | NULL
3 |
4 | #' Prints cacomp object
5 | #'
6 | #' @description Provides more user friendly printing of cacomp objects.
7 | #'
8 | #' @param object cacomp object to print
9 | #' @returns prints summary information about cacomp object.
10 | #' @export
11 | #' @examples
12 | #' # Simulate scRNAseq data.
13 | #' cnts <- data.frame(cell_1 = rpois(10, 5),
14 | #' cell_2 = rpois(10, 10),
15 | #' cell_3 = rpois(10, 20))
16 | #' rownames(cnts) <- paste0("gene_", 1:10)
17 | #' cnts <- as.matrix(cnts)
18 | #'
19 | #' # Run correspondence analysis.
20 | #' ca <- cacomp(obj = cnts, princ_coords = 3, top = 5)
21 | #'
22 | #' ca
23 | show.cacomp <- function(object){
24 |
25 | if (!is.empty(object@V) && !is.empty(object@U) && !is.empty(object@D)){
26 | cat("cacomp object with",
27 | nrow(object@V),
28 | "columns,",
29 | nrow(object@U),
30 | "rows and",
31 | length(object@D),
32 | "dimensions.")
33 | } else {
34 | cat("Uncomplete cacomp object.",
35 | "Consider running as.cacomp(object, recompute=TRUE).")
36 | }
37 |
38 | cat("\nCalc. standard coord.: ",
39 | paste0("std_coords_rows"[!is.empty(object@std_coords_rows)],
40 | ifelse(!is.empty(object@std_coords_rows) &&
41 | !is.empty(object@std_coords_cols),
42 | ", ",
43 | ""),
44 | "std_coords_cols"[!is.empty(object@std_coords_cols)]))
45 |
46 | cat("\nCalc. principal coord.:",
47 | paste0("prin_coords_rows"[!is.empty(object@prin_coords_rows)],
48 | ifelse(!is.empty(object@prin_coords_rows) &&
49 | !is.empty(object@prin_coords_cols),
50 | ", ",
51 | ""),
52 | "prin_coords_cols"[!is.empty(object@prin_coords_cols)]))
53 |
54 |
55 | cat("\nCalc. APL coord.: ",
56 | paste0("apl_rows"[!is.empty(object@apl_rows)],
57 | ifelse(!is.empty(object@apl_rows) && !is.empty(object@apl_cols),
58 | ", ",
59 | ""),
60 | "apl_cols"[!is.empty(object@apl_cols)]))
61 |
62 | if (!is.empty(object@D)){
63 | prinInertia <- object@D^2
64 | percentInertia <- prinInertia / sum(prinInertia) * 100
65 | cat("\nExplained inertia: ",
66 | paste0(round(percentInertia[1], 1),
67 | "% Dim1, ",
68 | round(percentInertia[2], 1),
69 | "% Dim2\n"))
70 | }
71 |
72 | }
73 |
74 | #' @rdname show.cacomp
75 | #' @export
76 | setMethod(f = "show", signature(object = "cacomp"), function(object) {
77 | show.cacomp(object)
78 | })
79 |
80 | #' Convert cacomp object to list.
81 | #' @param x A cacomp object.
82 | #' @return A cacomp object.
83 | #' @export
84 | #' @examples
85 | #'
86 | #' # Simulate counts
87 | #' cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
88 | #' x = sample(1:100, 50, replace = TRUE))
89 | #' rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
90 | #' colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
91 | #'
92 | #' # Run correspondence analysis
93 | #' ca <- cacomp(obj = cnts, princ_coords = 3)
94 | #' ca_list <- as.list(ca)
95 | setMethod("as.list",signature(x="cacomp"),function(x) {
96 | mapply(function(y) {
97 |
98 | if (inherits(slot(x,y),"cacomp")) {
99 | as.list(slot(x,y))
100 | } else {
101 | slot(x,y)
102 | }
103 | },
104 | slotNames(class(x)),
105 | SIMPLIFY=FALSE)
106 | })
107 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | [](https://bioconductor.org/checkResults/release/bioc-LATEST/APL)
4 | [](https://bioconductor.org/checkResults/devel/bioc-LATEST/APL)
5 | [](http://bioconductor.org/packages/stats/bioc/APL/)
6 | [](https://support.bioconductor.org/tag/APL)
7 | [](https://bioconductor.org/packages/release/bioc/html/APL.html#since)
8 | [](http://bioconductor.org/checkResults/devel/bioc-LATEST/APL/)
9 | [](https://bioconductor.org/packages/release/bioc/html/APL.html#since)
10 |
11 |
12 |
13 |
14 | # APL
15 |
16 | `APL` is a package developed for computation of Association Plots, a method for visualization and analysis of single cell transcriptomics data. The main focus of `APL` is the identification of genes characteristic for individual clusters of cells from input data.
17 |
18 | When working with `APL` package please cite:
19 | ```
20 | Gralinska, E., Kohl, C., Fadakar, B. S., & Vingron, M. (2022).
21 | Visualizing Cluster-specific Genes from Single-cell Transcriptomics Data Using Association Plots.
22 | Journal of Molecular Biology, 434(11), 167525.
23 | ```
24 |
25 | ## Installation
26 |
27 | The `APL` can be installed from GitHub:
28 |
29 | library(devtools)
30 | install_github("VingronLab/APL")
31 |
32 |
33 | To additionally build the package vignette, run instead:
34 |
35 | install_github("VingronLab/APL", build_vignettes = TRUE, dependencies = TRUE)
36 |
37 |
38 | Building the vignette will however take considerable time.
39 |
40 | **The vignette can also be found under the link: https://vingronlab.github.io/APL/ (hyperlink in the GitHub repository description).**
41 |
42 | To install the `APL` from Bioconductor, run:
43 |
44 | if (!requireNamespace("BiocManager", quietly = TRUE))
45 | install.packages("BiocManager")
46 |
47 | BiocManager::install("APL")
48 |
49 |
50 | ## Pytorch installation
51 |
52 | In order to speed up the singular value decomposition, we highly recommend the installation of `pytorch`.
53 | Users can instead also opt to use the slower R native SVD. For this, please set the argument `python = FALSE` wherever applicable in the package vignette.
54 |
55 | ### Install pytorch with reticulate
56 |
57 | library(reticulate)
58 | install_miniconda()
59 | conda_install(envname = "r-reticulate", packages = "numpy")
60 | conda_install(envname = "r-reticulate", packages = "pytorch")
61 |
62 | ### Manually install pytorch with conda
63 |
64 | Download the appropriate Miniconda installer for your system from [the conda website](https://docs.conda.io/en/latest/miniconda.html).
65 | Follow the installation instructions on their website and make sure the R package `reticulate` is also installed before proceeding.
66 | Once installed, list all available conda environments via
67 | `conda info --envs`
68 | One of the environments should have `r-reticulate` in its name. Depending on where
69 | you installed it and your system, the exact path might be different.
70 | Activate the environment and install pytorch into it.
71 |
72 | conda activate ~/.local/share/r-miniconda/envs/r-reticulate # change path accordingly.
73 | conda install numpy
74 | conda install pytorch
75 |
76 |
77 | ## Feature overview
78 |
79 | Please run
80 |
81 | vignette("APL")
82 |
83 | after installation with `build_vignettes = TRUE` for an introduction into the package.
84 |
--------------------------------------------------------------------------------
/man/as.cacomp.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/convert.R
3 | \name{as.cacomp}
4 | \alias{as.cacomp}
5 | \alias{as.cacomp,cacomp-method}
6 | \alias{as.cacomp,list-method}
7 | \alias{as.cacomp,Seurat-method}
8 | \alias{as.cacomp,SingleCellExperiment-method}
9 | \title{Create cacomp object from Seurat/SingleCellExperiment container}
10 | \usage{
11 | as.cacomp(obj, ...)
12 |
13 | \S4method{as.cacomp}{cacomp}(obj, ...)
14 |
15 | \S4method{as.cacomp}{list}(obj, ..., mat = NULL)
16 |
17 | \S4method{as.cacomp}{Seurat}(obj, ..., assay = "RNA", slot = "counts")
18 |
19 | \S4method{as.cacomp}{SingleCellExperiment}(obj, ..., assay = "counts")
20 | }
21 | \arguments{
22 | \item{obj}{An object of class "Seurat" or "SingleCellExperiment"
23 | with a dim. reduction named "CA" saved. For obj "cacomp" input is returned.}
24 |
25 | \item{...}{Further arguments.}
26 |
27 | \item{mat}{Original input matrix.}
28 |
29 | \item{assay}{Character. The assay from which extract the count matrix,
30 | e.g. "RNA" for Seurat objects or "counts"/"logcounts" for
31 | SingleCellExperiments.}
32 |
33 | \item{slot}{character. Slot of the Seurat assay to use. Default "counts".}
34 | }
35 | \value{
36 | A cacomp object.
37 | }
38 | \description{
39 | Converts the values stored in the Seurat/SingleCellExperiment dimensional
40 | reduction slot "CA" to a cacomp object.
41 | If recompute = TRUE additional parameters are recomputed from the saved
42 | values without rerunning SVD (need to specify assay to work).
43 |
44 | as.cacomp.cacomp returns input without any calculations.
45 |
46 | Recomputes missing values and returns cacomp object from a list.
47 | If you have a *complete* cacomp object in list form,
48 | use do.call(new_cacomp, obj).
49 |
50 | as.cacomp.Seurat: Converts the values stored in the Seurat DimReduc slot
51 | "CA" to an cacomp object.
52 |
53 | as.cacomp.SingleCellExperiment: Converts the values stored in the
54 | SingleCellExperiment reducedDim slot "CA" to a cacomp object.
55 | }
56 | \details{
57 | By default extracts std_coords_cols, D, prin_coords_rows, top_rows and dims
58 | from obj and outputs a cacomp object.
59 | If recompute = TRUE the following are additionally recalculated
60 | (doesn't run SVD):
61 | U, V, std_coords_rows, row_masses, col_masses.
62 | }
63 | \examples{
64 | #########
65 | # lists #
66 | #########
67 |
68 | # Simulate counts
69 | cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
70 | x = sample(1:100, 50, replace = TRUE))
71 | rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
72 | colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
73 |
74 | # Run correspondence analysis
75 | ca <- cacomp(obj = cnts, princ_coords = 3)
76 | ca_list <- as.list(ca)
77 |
78 | # Only keep subset of elements for demonstration
79 | ca_list <- ca_list[c("U", "std_coords_rows", "std_coords_cols", "params")]
80 |
81 | # convert (incomplete) list to cacomp object.
82 | ca <- as.cacomp(ca_list, mat = cnts)
83 |
84 | ##########
85 | # Seurat #
86 | ##########
87 | library(SeuratObject)
88 | set.seed(1234)
89 |
90 | # Simulate counts
91 | cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
92 | x = sample(1:100, 50, replace = TRUE))
93 | rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
94 | colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
95 |
96 | seu <- CreateSeuratObject(counts = cnts)
97 | seu <- cacomp(seu, return_input = TRUE)
98 |
99 | ca <- as.cacomp(seu, assay = "RNA", slot = "counts")
100 |
101 | ########################
102 | # SingleCellExperiment #
103 | ########################
104 | library(SingleCellExperiment)
105 | set.seed(1234)
106 |
107 | # Simulate counts
108 | cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
109 | x = sample(1:100, 50, replace = TRUE))
110 | rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
111 | colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
112 |
113 | sce <- SingleCellExperiment(assays=list(counts=cnts))
114 | sce <- cacomp(sce, return_input = TRUE)
115 |
116 | ca <- as.cacomp(sce, assay = "counts")
117 | }
118 |
--------------------------------------------------------------------------------
/man/ca_biplot.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/plot.R
3 | \name{ca_biplot}
4 | \alias{ca_biplot}
5 | \alias{ca_biplot,cacomp-method}
6 | \alias{ca_biplot,Seurat-method}
7 | \alias{ca_biplot,SingleCellExperiment-method}
8 | \title{Plot of 2D CA projection of the data.}
9 | \usage{
10 | ca_biplot(
11 | obj,
12 | xdim = 1,
13 | ydim = 2,
14 | princ_coords = 1,
15 | row_labels = NULL,
16 | col_labels = NULL,
17 | type = "ggplot",
18 | col_metadata = NULL,
19 | row_metadata = NULL,
20 | show_all = TRUE,
21 | ...
22 | )
23 |
24 | \S4method{ca_biplot}{cacomp}(
25 | obj,
26 | xdim = 1,
27 | ydim = 2,
28 | princ_coords = 1,
29 | row_labels = NULL,
30 | col_labels = NULL,
31 | type = "ggplot",
32 | col_metadata = NULL,
33 | row_metadata = NULL,
34 | show_all = TRUE,
35 | ...
36 | )
37 |
38 | \S4method{ca_biplot}{Seurat}(
39 | obj,
40 | xdim = 1,
41 | ydim = 2,
42 | princ_coords = 1,
43 | row_labels = NULL,
44 | col_labels = NULL,
45 | type = "ggplot",
46 | col_metadata = NULL,
47 | row_metadata = NULL,
48 | show_all = TRUE,
49 | ...,
50 | assay = SeuratObject::DefaultAssay(obj),
51 | slot = "counts"
52 | )
53 |
54 | \S4method{ca_biplot}{SingleCellExperiment}(
55 | obj,
56 | xdim = 1,
57 | ydim = 2,
58 | princ_coords = 1,
59 | row_labels = NULL,
60 | col_labels = NULL,
61 | type = "ggplot",
62 | col_metadata = NULL,
63 | row_metadata = NULL,
64 | show_all = TRUE,
65 | ...,
66 | assay = "counts"
67 | )
68 | }
69 | \arguments{
70 | \item{obj}{An object of class "cacomp" with the relevant standardized and
71 | principal coordinates calculated,
72 | or alternatively an object of class "Seurat" or "SingleCellExperiment"
73 | with a dim. reduction named "CA" saved.}
74 |
75 | \item{xdim}{Integer. The dimension for the x-axis. Default 1.}
76 |
77 | \item{ydim}{Integer. The dimension for the y-axis. Default 2.}
78 |
79 | \item{princ_coords}{Integer. If 1 then principal coordinates are used for
80 | the rows,
81 | if 2 for the columns. Default 1 (rows).}
82 |
83 | \item{row_labels}{Numeric vector. Indices for the rows for which a label
84 | should be added
85 | (label should be stored in rownames). Default NULL.}
86 |
87 | \item{col_labels}{Numeric vector. Indices for the columns for which a label
88 | should be added
89 | (label should be stored in colnames).
90 | Default NULL (no columns).}
91 |
92 | \item{type}{String. Type of plot to draw. Either "ggplot" or "plotly".
93 | Default "ggplot".}
94 |
95 | \item{col_metadata}{named vector of additional metadata to color points.
96 | The names of the elements in col_metadata should correspond to the column
97 | names in 'obj'. If NULL columns will be in a single color. Can also specify
98 | a metadata column for Seurat/SingleCellExperiment objects.}
99 |
100 | \item{row_metadata}{named vector of additional metadata to color points.
101 | The names of the elements in row_metadata should correspond to the row
102 | names in 'obj'. If NULL rows will be in a single color. Can also specify
103 | a metadata column for Seurat/SingleCellExperiment objects.}
104 |
105 | \item{show_all}{logical. If FALSE cells/genes that are not in col_metadata/
106 | row_metadata are not plotted. If *_metadata is NULL, the cell or genes
107 | respectively will still be plotted.}
108 |
109 | \item{...}{Further arguments.}
110 |
111 | \item{assay}{SingleCellExperiment assay for recomputation}
112 |
113 | \item{slot}{Seurat assay slot from which to get matrix.}
114 | }
115 | \value{
116 | Plot of class "plotly" or "ggplot".
117 | }
118 | \description{
119 | Plots the first 2 dimensions of the rows and columns in the same plot.
120 | }
121 | \details{
122 | Choosing type "plotly" will generate an interactive html plot with the
123 | package plotly.
124 | Type "ggplot" generates a static plot.
125 | Depending on whether `princ_coords` is set to 1 or 2 either
126 | the principal coordinates of either the rows (1) or the columns (2)
127 | are chosen. For the other the standard coordinates are plotted
128 | (assymetric biplot).
129 | Labels for rows and columns should be stored in the row and column names
130 | respectively.
131 | }
132 | \examples{
133 | # Simulate counts
134 | cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
135 | x = sample(1:100, 50, replace = TRUE))
136 | rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
137 | colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
138 |
139 | # Run correspondence analysis
140 | ca <- cacomp(obj = cnts, princ_coords = 3)
141 |
142 | ca_biplot(ca)
143 | }
144 |
--------------------------------------------------------------------------------
/man/pick_dims.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CA.R
3 | \name{pick_dims}
4 | \alias{pick_dims}
5 | \alias{pick_dims,cacomp-method}
6 | \alias{pick_dims,Seurat-method}
7 | \alias{pick_dims,SingleCellExperiment-method}
8 | \title{Compute statistics to help choose the number of dimensions}
9 | \usage{
10 | pick_dims(
11 | obj,
12 | mat = NULL,
13 | method = "scree_plot",
14 | reps = 3,
15 | python = FALSE,
16 | return_plot = FALSE,
17 | ...
18 | )
19 |
20 | \S4method{pick_dims}{cacomp}(
21 | obj,
22 | mat = NULL,
23 | method = "scree_plot",
24 | reps = 3,
25 | python = FALSE,
26 | return_plot = FALSE,
27 | ...
28 | )
29 |
30 | \S4method{pick_dims}{Seurat}(
31 | obj,
32 | mat = NULL,
33 | method = "scree_plot",
34 | reps = 3,
35 | python = FALSE,
36 | return_plot = FALSE,
37 | ...,
38 | assay = SeuratObject::DefaultAssay(obj),
39 | slot = "counts"
40 | )
41 |
42 | \S4method{pick_dims}{SingleCellExperiment}(
43 | obj,
44 | mat = NULL,
45 | method = "scree_plot",
46 | reps = 3,
47 | python = FALSE,
48 | return_plot = FALSE,
49 | ...,
50 | assay = "counts"
51 | )
52 | }
53 | \arguments{
54 | \item{obj}{A "cacomp" object as outputted from \code{cacomp()},
55 | a "Seurat" object with a "CA" DimReduc object stored,
56 | or a "SingleCellExperiment" object with a "CA" dim. reduction stored.}
57 |
58 | \item{mat}{A numeric matrix. For sequencing a count matrix, gene expression
59 | values with genes in rows and samples/cells in columns.
60 | Should contain row and column names.}
61 |
62 | \item{method}{String. Either "scree_plot", "avg_inertia", "maj_inertia" or
63 | "elbow_rule" (see Details section). Default "scree_plot".}
64 |
65 | \item{reps}{Integer. Number of permutations to perform when choosing
66 | "elbow_rule". Default 3.}
67 |
68 | \item{python}{DEPRACTED. A logical value indicating whether to use singular value
69 | decomposition from the python package torch.
70 | This implementation dramatically speeds up computation compared to \code{svd()}
71 | in R.}
72 |
73 | \item{return_plot}{TRUE/FALSE. Whether a plot should be returned when
74 | choosing "elbow_rule". Default FALSE.}
75 |
76 | \item{...}{Arguments forwarded to methods.}
77 |
78 | \item{assay}{Character. The assay from which to extract the count matrix
79 | for SVD, e.g. "RNA" for Seurat objects or "counts"/"logcounts" for
80 | SingleCellExperiments.}
81 |
82 | \item{slot}{Character. Data slot of the Seurat assay.
83 | E.g. "data" or "counts". Default "counts".}
84 | }
85 | \value{
86 | For \code{avg_inertia}, \code{maj_inertia} and \code{elbow_rule} (when \code{return_plot=FALSE})
87 | returns an integer, indicating the suggested number of dimensions to use.
88 | \itemize{
89 | \item \code{scree_plot} returns a ggplot object.
90 | \item \code{elbow_rule} (for \code{return_plot=TRUE}) returns a list with two elements:
91 | "dims" contains the number of dimensions and "plot" a ggplot.
92 | }
93 | }
94 | \description{
95 | Allow the user to choose from 4 different methods ("avg_inertia",
96 | "maj_inertia", "scree_plot" and "elbow_rule")
97 | to estimate the number of dimensions that best represent the data.
98 | }
99 | \details{
100 | \itemize{
101 | \item "avg_inertia" calculates the number of dimensions in which the inertia is
102 | above the average inertia.
103 | \item "maj_inertia" calculates the number of dimensions in which cumulatively
104 | explain up to 80\% of the total inertia.
105 | \item "scree_plot" plots a scree plot.
106 | \item "elbow_rule" formalization of the commonly used elbow rule. Permutes the
107 | rows for each column and reruns \code{cacomp()} for a total of \code{reps} times.
108 | The number of relevant dimensions is obtained from the point where the
109 | line for the explained inertia of the permuted data intersects with the
110 | actual data.
111 | }
112 | }
113 | \examples{
114 | # Simulate counts
115 | cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
116 | x = sample(1:20, 50, replace = TRUE))
117 | rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
118 | colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
119 |
120 | # Run correspondence analysis.
121 | ca <- cacomp(obj = cnts)
122 |
123 | # pick dimensions with the elbow rule. Returns list.
124 |
125 | set.seed(2358)
126 | pd <- pick_dims(obj = ca,
127 | mat = cnts,
128 | method = "elbow_rule",
129 | return_plot = TRUE,
130 | reps = 10)
131 | pd$plot
132 | ca_sub <- subset_dims(ca, dims = pd$dims)
133 |
134 | # pick dimensions which explain cumulatively >80\% of total inertia.
135 | # Returns vector.
136 | pd <- pick_dims(obj = ca,
137 | method = "maj_inertia")
138 | ca_sub <- subset_dims(ca, dims = pd)
139 |
140 | ################################
141 | # pick_dims for Seurat objects #
142 | ################################
143 | library(SeuratObject)
144 | set.seed(1234)
145 |
146 | # Simulate counts
147 | cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
148 | x = sample(1:20, 50, replace = TRUE))
149 | rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
150 | colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
151 |
152 | # Create Seurat object
153 | seu <- CreateSeuratObject(counts = cnts)
154 |
155 | # run CA and save in dim. reduction slot.
156 | seu <- cacomp(seu, return_input = TRUE, assay = "RNA", slot = "counts")
157 |
158 | # pick dimensions
159 | pd <- pick_dims(obj = seu,
160 | method = "maj_inertia",
161 | assay = "RNA",
162 | slot = "counts")
163 |
164 | ##############################################
165 | # pick_dims for SingleCellExperiment objects #
166 | ##############################################
167 | library(SingleCellExperiment)
168 | set.seed(1234)
169 |
170 | # Simulate counts
171 | cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
172 | x = sample(1:20, 50, replace = TRUE))
173 | rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
174 | colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
175 |
176 | # Create SingleCellExperiment object
177 | sce <- SingleCellExperiment(assays=list(counts=cnts))
178 |
179 | # run CA and save in dim. reduction slot.
180 | sce <- cacomp(sce, return_input = TRUE, assay = "counts")
181 |
182 | # pick dimensions
183 | pd <- pick_dims(obj = sce,
184 | method = "maj_inertia",
185 | assay = "counts")
186 | }
187 |
--------------------------------------------------------------------------------
/man/cacomp.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CA.R
3 | \name{cacomp}
4 | \alias{cacomp}
5 | \alias{cacomp,matrix-method}
6 | \alias{cacomp,dgCMatrix-method}
7 | \alias{cacomp,Seurat-method}
8 | \alias{cacomp,SingleCellExperiment-method}
9 | \title{Correspondance Analysis}
10 | \usage{
11 | cacomp(
12 | obj,
13 | coords = TRUE,
14 | princ_coords = 3,
15 | python = FALSE,
16 | dims = NULL,
17 | top = 5000,
18 | inertia = TRUE,
19 | rm_zeros = TRUE,
20 | residuals = "pearson",
21 | cutoff = NULL,
22 | clip = FALSE,
23 | ...
24 | )
25 |
26 | \S4method{cacomp}{matrix}(
27 | obj,
28 | coords = TRUE,
29 | princ_coords = 3,
30 | python = FALSE,
31 | dims = NULL,
32 | top = 5000,
33 | inertia = TRUE,
34 | rm_zeros = TRUE,
35 | residuals = "pearson",
36 | cutoff = NULL,
37 | clip = FALSE,
38 | ...
39 | )
40 |
41 | \S4method{cacomp}{dgCMatrix}(
42 | obj,
43 | coords = TRUE,
44 | princ_coords = 3,
45 | python = FALSE,
46 | dims = NULL,
47 | top = 5000,
48 | inertia = TRUE,
49 | rm_zeros = TRUE,
50 | residuals = "pearson",
51 | cutoff = NULL,
52 | clip = FALSE,
53 | ...
54 | )
55 |
56 | \S4method{cacomp}{Seurat}(
57 | obj,
58 | coords = TRUE,
59 | princ_coords = 3,
60 | python = FALSE,
61 | dims = NULL,
62 | top = 5000,
63 | inertia = TRUE,
64 | rm_zeros = TRUE,
65 | residuals = "pearson",
66 | cutoff = NULL,
67 | clip = FALSE,
68 | ...,
69 | assay = SeuratObject::DefaultAssay(obj),
70 | slot = "counts",
71 | return_input = FALSE
72 | )
73 |
74 | \S4method{cacomp}{SingleCellExperiment}(
75 | obj,
76 | coords = TRUE,
77 | princ_coords = 3,
78 | python = FALSE,
79 | dims = NULL,
80 | top = 5000,
81 | inertia = TRUE,
82 | rm_zeros = TRUE,
83 | residuals = "pearson",
84 | cutoff = NULL,
85 | clip = FALSE,
86 | ...,
87 | assay = "counts",
88 | return_input = FALSE
89 | )
90 | }
91 | \arguments{
92 | \item{obj}{A numeric matrix or Seurat/SingleCellExperiment object.
93 | For sequencing a count matrix, gene expression values with genes in rows
94 | and samples/cells in columns.
95 | Should contain row and column names.}
96 |
97 | \item{coords}{Logical. Indicates whether CA standard coordinates should be
98 | calculated.}
99 |
100 | \item{princ_coords}{Integer. Number indicating whether principal
101 | coordinates should be calculated for the rows (=1), columns (=2),
102 | both (=3) or none (=0).}
103 |
104 | \item{python}{DEPRACTED. A logical value indicating whether to use singular-value
105 | decomposition from the python package torch.
106 | This implementation dramatically speeds up computation compared to `svd()`
107 | in R when calculating the full SVD. This parameter only works when dims==NULL
108 | or dims==rank(mat), where caculating a full SVD is demanded.}
109 |
110 | \item{dims}{Integer. Number of CA dimensions to retain. If NULL:
111 | (0.2 * min(nrow(A), ncol(A)) - 1 ).}
112 |
113 | \item{top}{Integer. Number of most variable rows to retain.
114 | Set NULL to keep all.}
115 |
116 | \item{inertia}{Logical. Whether total, row and column inertias should be
117 | calculated and returned.}
118 |
119 | \item{rm_zeros}{Logical. Whether rows & cols containing only 0s should be
120 | removed. Keeping zero only rows/cols might lead to unexpected results.}
121 |
122 | \item{residuals}{character string. Specifies which kind of residuals should
123 | be calculated. Can be "pearson" (default), "freemantukey" or "NB" for
124 | negative-binomial.}
125 |
126 | \item{cutoff}{numeric. Residuals that are larger than cutoff or lower than
127 | -cutoff are clipped to cutoff.}
128 |
129 | \item{clip}{logical. Whether residuals should be clipped if they are
130 | higher/lower than a specified cutoff}
131 |
132 | \item{...}{Other parameters}
133 |
134 | \item{assay}{Character. The assay from which extract the count matrix for
135 | SVD, e.g. "RNA" for Seurat objects or "counts"/"logcounts" for
136 | SingleCellExperiments.}
137 |
138 | \item{slot}{character. The slot of the Seurat assay. Default "counts".}
139 |
140 | \item{return_input}{Logical. If TRUE returns the input
141 | (SingleCellExperiment/Seurat object) with the CA results saved in the
142 | reducedDim/DimReduc slot "CA".
143 | Otherwise returns a "cacomp". Default FALSE.}
144 | }
145 | \value{
146 | Returns a named list of class "cacomp" with components
147 | U, V and D: The results from the SVD.
148 | row_masses and col_masses: Row and columns masses.
149 | top_rows: How many of the most variable rows were retained for the analysis.
150 | tot_inertia, row_inertia and col_inertia: Only if inertia = TRUE.
151 | Total, row and column inertia respectively.
152 |
153 | If return_imput = TRUE with Seurat container: Returns input obj of class
154 | "Seurat" with a new Dimensional Reduction Object named "CA".
155 | Standard coordinates of the cells are saved as embeddings,
156 | the principal coordinates of the genes as loadings and
157 | the singular values (= square root of principal intertias/eigenvalues)
158 | are stored as stdev.
159 | To recompute a regular "cacomp" object without rerunning cacomp use
160 | `as.cacomp()`.
161 |
162 | If return_input =TRUE for SingleCellExperiment input returns a
163 | SingleCellExperiment object with a matrix of standardized coordinates of
164 | the columns in
165 | reducedDim(obj, "CA"). Additionally, the matrix contains the following
166 | attributes:
167 | "prin_coords_rows": Principal coordinates of the rows.
168 | "singval": Singular values. For the explained inertia of each principal
169 | axis calculate singval^2.
170 | "percInertia": Percent explained inertia of each principal axis.
171 | To recompute a regular "cacomp" object from a SingleCellExperiment without
172 | rerunning cacomp use `as.cacomp()`.
173 | }
174 | \description{
175 | `cacomp` performs correspondence analysis on a matrix or
176 | Seurat/SingleCellExperiment object and returns the transformed data.
177 |
178 | `cacomp.seurat` performs correspondence analysis on an assay from a Seurat
179 | container and stores the standardized coordinates of the columns (= cells)
180 | and the principal coordinates of the rows (= genes) as a DimReduc Object in
181 | the Seurat container.
182 |
183 | `cacomp.SingleCellExperiment` performs correspondence analysis on an assay
184 | from a SingleCellExperiment and stores the standardized coordinates
185 | of the columns (= cells) and the principal coordinates of the rows
186 | (= genes) as a matrix in the SingleCellExperiment container.
187 | }
188 | \details{
189 | The calculation is performed according to the work of Michael Greenacre.
190 | Singular value decomposition can be performed either with the base R
191 | function `svd` or preferably by the faster pytorch implementation
192 | (python = TRUE). When working with large matrices, CA coordinates and
193 | principal coordinates should only be computed when needed to save
194 | computational time.
195 | }
196 | \examples{
197 | # Simulate scRNAseq data.
198 | cnts <- data.frame(cell_1 = rpois(10, 5),
199 | cell_2 = rpois(10, 10),
200 | cell_3 = rpois(10, 20))
201 | rownames(cnts) <- paste0("gene_", 1:10)
202 | cnts <- as.matrix(cnts)
203 |
204 | # Run correspondence analysis.
205 | ca <- cacomp(obj = cnts, princ_coords = 3, top = 5)
206 |
207 | ###########
208 | # Seurat #
209 | ###########
210 | library(SeuratObject)
211 | set.seed(1234)
212 |
213 | # Simulate counts
214 | cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
215 | x = sample(1:20, 50, replace = TRUE))
216 | rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
217 | colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
218 |
219 | # Create Seurat object
220 | seu <- CreateSeuratObject(counts = cnts)
221 |
222 | # Run CA and save in dim. reduction slot
223 | seu <- cacomp(seu, return_input = TRUE, assay = "RNA", slot = "counts")
224 |
225 | # Run CA and return cacomp object
226 | ca <- cacomp(seu, return_input = FALSE, assay = "RNA", slot = "counts")
227 |
228 | ########################
229 | # SingleCellExperiment #
230 | ########################
231 | library(SingleCellExperiment)
232 | set.seed(1234)
233 |
234 | # Simulate counts
235 | cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
236 | x = sample(1:20, 50, replace = TRUE))
237 | rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
238 | colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
239 | logcnts <- log2(cnts + 1)
240 |
241 | # Create SingleCellExperiment object
242 | sce <- SingleCellExperiment(assays=list(counts=cnts, logcounts=logcnts))
243 |
244 | # run CA and save in dim. reduction slot.
245 | sce <- cacomp(sce, return_input = TRUE, assay = "counts") # on counts
246 | sce <- cacomp(sce, return_input = TRUE, assay = "logcounts") # on logcounts
247 |
248 | # run CA and return cacomp object.
249 | ca <- cacomp(sce, return_input = FALSE, assay = "counts")
250 | }
251 | \references{
252 | Greenacre, M. Correspondence Analysis in Practice, Third Edition, 2017.
253 | }
254 |
--------------------------------------------------------------------------------
/man/runAPL.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/apl.R
3 | \name{run_APL}
4 | \alias{run_APL}
5 | \alias{runAPL}
6 | \alias{runAPL,matrix-method}
7 | \alias{runAPL,SingleCellExperiment-method}
8 | \alias{runAPL,Seurat-method}
9 | \alias{runAPL,dgCMatrix-method}
10 | \title{Compute and plot Association Plot}
11 | \usage{
12 | run_APL(
13 | obj,
14 | group,
15 | caobj = NULL,
16 | dims = NULL,
17 | nrow = 10,
18 | top = 5000,
19 | clip = FALSE,
20 | score = TRUE,
21 | score_method = "permutation",
22 | mark_rows = NULL,
23 | mark_cols = NULL,
24 | reps = 3,
25 | python = FALSE,
26 | row_labs = TRUE,
27 | col_labs = TRUE,
28 | type = "plotly",
29 | show_cols = FALSE,
30 | show_rows = TRUE,
31 | score_cutoff = 0,
32 | score_color = "rainbow",
33 | pd_method = "elbow_rule",
34 | pd_reps = 1,
35 | pd_use = TRUE
36 | )
37 |
38 | runAPL(
39 | obj,
40 | group,
41 | caobj = NULL,
42 | dims = NULL,
43 | nrow = 10,
44 | top = 5000,
45 | clip = FALSE,
46 | score = TRUE,
47 | score_method = "permutation",
48 | mark_rows = NULL,
49 | mark_cols = caobj@group,
50 | reps = 3,
51 | python = FALSE,
52 | row_labs = TRUE,
53 | col_labs = TRUE,
54 | type = "plotly",
55 | show_cols = FALSE,
56 | show_rows = TRUE,
57 | score_cutoff = 0,
58 | score_color = "rainbow",
59 | pd_method = "elbow_rule",
60 | pd_reps = 1,
61 | pd_use = TRUE,
62 | ...
63 | )
64 |
65 | \S4method{runAPL}{matrix}(
66 | obj,
67 | group,
68 | caobj = NULL,
69 | dims = NULL,
70 | nrow = 10,
71 | top = 5000,
72 | clip = FALSE,
73 | score = TRUE,
74 | score_method = "permutation",
75 | mark_rows = NULL,
76 | mark_cols = NULL,
77 | reps = 3,
78 | python = FALSE,
79 | row_labs = TRUE,
80 | col_labs = TRUE,
81 | type = "plotly",
82 | show_cols = FALSE,
83 | show_rows = TRUE,
84 | score_cutoff = 0,
85 | score_color = "rainbow",
86 | pd_method = "elbow_rule",
87 | pd_reps = 1,
88 | pd_use = TRUE,
89 | ...
90 | )
91 |
92 | \S4method{runAPL}{SingleCellExperiment}(
93 | obj,
94 | group,
95 | caobj = NULL,
96 | dims = NULL,
97 | nrow = 10,
98 | top = 5000,
99 | clip = FALSE,
100 | score = TRUE,
101 | score_method = "permutation",
102 | mark_rows = NULL,
103 | mark_cols = NULL,
104 | reps = 3,
105 | python = FALSE,
106 | row_labs = TRUE,
107 | col_labs = TRUE,
108 | type = "plotly",
109 | show_cols = FALSE,
110 | show_rows = TRUE,
111 | score_cutoff = 0,
112 | score_color = "rainbow",
113 | pd_method = "elbow_rule",
114 | pd_reps = 1,
115 | pd_use = TRUE,
116 | ...,
117 | assay = "counts"
118 | )
119 |
120 | \S4method{runAPL}{Seurat}(
121 | obj,
122 | group,
123 | caobj = NULL,
124 | dims = NULL,
125 | nrow = 10,
126 | top = 5000,
127 | clip = FALSE,
128 | score = TRUE,
129 | score_method = "permutation",
130 | mark_rows = NULL,
131 | mark_cols = NULL,
132 | reps = 3,
133 | python = FALSE,
134 | row_labs = TRUE,
135 | col_labs = TRUE,
136 | type = "plotly",
137 | show_cols = FALSE,
138 | show_rows = TRUE,
139 | score_cutoff = 0,
140 | score_color = "rainbow",
141 | pd_method = "elbow_rule",
142 | pd_reps = 1,
143 | pd_use = TRUE,
144 | ...,
145 | assay = SeuratObject::DefaultAssay(obj),
146 | slot = "counts"
147 | )
148 |
149 | \S4method{runAPL}{dgCMatrix}(
150 | obj,
151 | group,
152 | caobj = NULL,
153 | dims = NULL,
154 | nrow = 10,
155 | top = 5000,
156 | clip = FALSE,
157 | score = TRUE,
158 | score_method = "permutation",
159 | mark_rows = NULL,
160 | mark_cols = NULL,
161 | reps = 3,
162 | python = FALSE,
163 | row_labs = TRUE,
164 | col_labs = TRUE,
165 | type = "plotly",
166 | show_cols = FALSE,
167 | show_rows = TRUE,
168 | score_cutoff = 0,
169 | score_color = "rainbow",
170 | pd_method = "elbow_rule",
171 | pd_reps = 1,
172 | pd_use = TRUE,
173 | ...
174 | )
175 | }
176 | \arguments{
177 | \item{obj}{A numeric matrix. For sequencing usually a count matrix,
178 | gene expression values with genes in rows and samples/cells in columns.
179 | Should contain row and column names.}
180 |
181 | \item{group}{Numeric/Character. Vector of indices or column names of
182 | the columns to calculate centroid/x-axis direction.}
183 |
184 | \item{caobj}{A "cacomp" object as outputted from `cacomp()`. If not supplied
185 | will be calculated. Default NULL.}
186 |
187 | \item{dims}{Integer. Number of CA dimensions to retain. If NULL:
188 | (0.2 * min(nrow(A), ncol(A)) - 1 ).}
189 |
190 | \item{nrow}{Integer. The top nrow scored row labels will be added to the
191 | plot if score = TRUE. Default 10.}
192 |
193 | \item{top}{Integer. Number of most variable rows to retain.
194 | Set NULL to keep all.}
195 |
196 | \item{clip}{logical. Whether residuals should be clipped if they are
197 | higher/lower than a specified cutoff}
198 |
199 | \item{score}{Logical. Whether rows should be scored and ranked. Ignored when
200 | a vector is supplied to mark_rows. Default TRUE.}
201 |
202 | \item{score_method}{Method to calculate the cutoff. Either "random" for random
203 | direction method or "permutation" for the permutation method.}
204 |
205 | \item{mark_rows}{Character vector. Names of rows that should be highlighted
206 | in the plot. If not NULL, score is ignored. Default NULL.}
207 |
208 | \item{mark_cols}{Character vector. Names of cols that should be highlighted
209 | in the plot.}
210 |
211 | \item{reps}{Integer. Number of permutations during scoring. Default 3.}
212 |
213 | \item{python}{DEPRACTED. A logical value indicating whether to use singular-value
214 | decomposition from the python package torch.
215 | This implementation dramatically speeds up computation compared to `svd()`
216 | in R when calculating the full SVD. This parameter only works when dims==NULL
217 | or dims==rank(mat), where caculating a full SVD is demanded.}
218 |
219 | \item{row_labs}{Logical. Whether labels for rows indicated by rows_idx
220 | should be labeled with text. Default TRUE.}
221 |
222 | \item{col_labs}{Logical. Whether labels for columns indicated by cols_idx
223 | shouls be labeled with text. Default FALSE.}
224 |
225 | \item{type}{"ggplot"/"plotly". For a static plot a string "ggplot",
226 | for an interactive plot "plotly". Default "ggplot".}
227 |
228 | \item{show_cols}{Logical. Whether column points should be plotted.}
229 |
230 | \item{show_rows}{Logical. Whether row points should be plotted.}
231 |
232 | \item{score_cutoff}{Numeric. Rows (genes) with a score >= score_cutoff will
233 | be colored according to their score if show_score = TRUE.}
234 |
235 | \item{score_color}{Either "rainbow" or "viridis".}
236 |
237 | \item{pd_method}{Which method to use for pick_dims (\link[APL]{pick_dims}).}
238 |
239 | \item{pd_reps}{Number of repetitions performed when using "elbow_rule" in
240 | `pick_dims`.
241 | (\link[APL]{pick_dims})}
242 |
243 | \item{pd_use}{Whether to use `pick_dims` (\link[APL]{pick_dims}) to determine
244 | the number of dimensions. Ignored when `dims` is set by the user.}
245 |
246 | \item{...}{Arguments forwarded to methods.}
247 |
248 | \item{assay}{Character. The assay from which extract the count matrix for
249 | SVD, e.g. "RNA" for Seurat objects or "counts"/"logcounts" for
250 | SingleCellExperiments.}
251 |
252 | \item{slot}{character. The Seurat assay slot from which to extract the
253 | count matrix.}
254 | }
255 | \value{
256 | Association Plot (plotly object).
257 | }
258 | \description{
259 | Computes singular value decomposition and coordinates for
260 | the Association Plot.
261 |
262 | runAPL.SingleCellExperiment: Computes singular value decomposition and
263 | coordinates for the Association Plot from SingleCellExperiment objects with
264 | reducedDim(obj, "CA") slot (optional).
265 |
266 | runAPL.Seurat: Computes singular value decomposition and coordinates for
267 | the Association Plot from Seurat objects, optionally with a DimReduc Object
268 | in the "CA" slot.
269 | }
270 | \details{
271 | The function is a wrapper that calls `cacomp()`, `apl_coords()`,
272 | `apl_score()` and finally `apl()` for ease of use.
273 | The chosen defaults are most useful for genomics experiments, but for more
274 | fine grained control the functions
275 | can be also run individually for the same results.
276 | If score = FALSE, nrow and reps are ignored. If mark_rows is not NULL score
277 | is treated as if FALSE.
278 | }
279 | \examples{
280 | set.seed(1234)
281 |
282 | # Simulate counts
283 | cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
284 | x = sample(1:100, 50, replace = TRUE))
285 | rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
286 | colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
287 |
288 | # (nonsensical) APL
289 | APL:::run_APL(obj = cnts,
290 | group = 1:10,
291 | dims = 10,
292 | top = 500,
293 | score = TRUE,
294 | show_cols = TRUE,
295 | type = "ggplot")
296 | set.seed(1234)
297 |
298 | # Simulate counts
299 | cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
300 | x = sample(1:100, 50, replace = TRUE))
301 | rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
302 | colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
303 |
304 | # (nonsensical) APL
305 | runAPL(obj = cnts,
306 | group = 1:10,
307 | dims = 10,
308 | top = 500,
309 | score = TRUE,
310 | show_cols = TRUE,
311 | type = "ggplot")
312 |
313 | ########################
314 | # SingleCellExperiment #
315 | ########################
316 | library(SingleCellExperiment)
317 | set.seed(1234)
318 |
319 | # Simulate counts
320 | cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
321 | x = sample(1:100, 50, replace = TRUE))
322 | rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
323 | colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
324 |
325 | sce <- SingleCellExperiment(assays=list(counts=cnts))
326 |
327 | # (nonsensical) APL
328 | runAPL(obj = sce,
329 | group = 1:10,
330 | dims = 10,
331 | top = 500,
332 | score = TRUE,
333 | show_cols = TRUE,
334 | type = "ggplot",
335 | assay = "counts")
336 |
337 | ###########
338 | # Seurat #
339 | ###########
340 | library(SeuratObject)
341 | set.seed(1234)
342 |
343 | # Simulate counts
344 | cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
345 | x = sample(1:100, 50, replace = TRUE))
346 | rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
347 | colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
348 |
349 | seu <- CreateSeuratObject(counts = cnts)
350 |
351 | # (nonsensical) APL
352 | runAPL(obj = seu,
353 | group = 1:10,
354 | dims = 10,
355 | top = 500,
356 | score = TRUE,
357 | show_cols = TRUE,
358 | type = "ggplot",
359 | assay = "RNA",
360 | slot = "counts")
361 | set.seed(1234)
362 |
363 | # Simulate counts
364 | cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
365 | x = sample(seq(0.01,0.1,by=0.01), 50, replace = TRUE))
366 | rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
367 | colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
368 | cnts <- Matrix::Matrix(cnts)
369 |
370 | # (nonsensical) APL
371 | runAPL(obj = cnts,
372 | group = 1:10,
373 | dims = 10,
374 | top = 500,
375 | score = TRUE,
376 | show_cols = TRUE,
377 | type = "ggplot")
378 | }
379 | \references{
380 | Association Plots: Visualizing associations in high-dimensional
381 | correspondence analysis biplots \cr
382 | Elzbieta Gralinska, Martin Vingron \cr
383 | bioRxiv 2020.10.23.352096; doi: https://doi.org/10.1101/2020.10.23.352096 \cr
384 | }
385 |
--------------------------------------------------------------------------------
/tests/testthat/testdata/input_data.tsv:
--------------------------------------------------------------------------------
1 | Country.Name UEM.M.2015 UEM.F.2015 UEM.M.2010 UEM.F.2010 UEM.M.2000 AGR.M.2015 AGR.F.2015 AGR.M.2010 AGR.F.2010 AGR.M.2000 IND.M.2015 IND.F.2015 IND.M.2010 IND.F.2010 IND.M.2000 SRV.M.2015 SRV.F.2015 SRV.M.2010 SRV.F.2010 SRV.M.2000
2 | Afghanistan 10.6840000152588 14.4270000457764 10.878999710083 14.8149995803833 10.9519996643066 37.8530144169035 58.5918341132492 45.9080105940595 62.1560903177743 56.5846601919507 14.2539401104799 18.7687265630838 12.5339775963636 13.8681186532326 8.56641753038635 37.2090454573578 8.21329616999908 30.6799036698646 9.16164689261626 23.8960300748246
3 | Angola 6.88299989700317 7.68100023269653 7.94799995422363 10.9219999313354 4.83599996566772 40.7833840176982 53.0400341192767 39.1699653458642 49.4373997474282 28.2151745299088 14.9266557558941 1.08105549079682 13.1395026410404 1.03508630812801 14.7837272601248 37.4069621054705 38.1979088365961 39.7416102887755 38.6055150750009 52.1651000594079
4 | Albania 17.068000793457 17.0979995727539 12.7620000839233 15.8809995651245 16.3409996032715 30.5040491882837 39.6420792220923 29.8886125356392 44.6554113386461 36.4117418774221 18.2201596562287 11.516746063801 24.1370091778299 8.9527849210664 14.9247657984299 34.207791943833 31.743176722583 33.2123782026077 30.5108025707205 32.3233288513279
5 | Arab World 8.19390895838925 20.0161802509865 6.9908812598603 17.9412175476643 11.1299650216614 17.8642307423098 24.6449124655064 19.672914073688 28.4233914901091 24.9142486487184 25.777529922088 7.68758538616177 26.2659536398542 8.43280133428351 20.4083797382762 48.164425070158 47.6515339471448 47.0699117483611 45.2025348058708 43.5474153392269
6 | United Arab Emirates 1.37399995326996 4.70300006866455 2.1010000705719 5.88299989700317 2.18600010871887 2.31771090704107 0.01238861024668 3.72016195063638 0.019764569755163 8.76217828936375 40.5599425192177 5.87315420755638 40.0132808644816 7.06065721129623 35.2687931646872 55.7483480313275 89.4114548308246 54.1655578145382 87.0365757151367 53.7830284372302
7 | Argentina 6.96500015258789 8.85099983215332 6.66699981689453 9.19600009918213 14.0200004577637 0.365627555611086 0.06562727729574 1.75839367093423 0.347779307279997 0.844323589764976 31.7854085496292 7.89076856900818 30.6925555436508 8.5755299723227 27.1559227405518 60.8839658493927 83.1926073368043 60.8811191525665 81.879780130292 57.980610592186
8 | Armenia 17.3859996795654 19.2520008087158 17.0849990844727 21.2849998474121 8.03800010681152 25.7987005495129 32.3646065908367 26.2351360596215 37.2101543676712 35.7070045046545 19.1168791699127 6.08355419438523 22.1424509312325 4.58436151580181 20.8854902026784 37.6984206010089 42.2998380210259 34.53658523013 36.9212698606949 35.3695034318197
9 | Australia 6.03900003433228 6.07299995422363 5.07600021362305 5.3769998550415 6.45499992370605 3.10729019538592 1.73858872516368 3.80265547127396 2.16024308609024 5.57902401508923 27.5719157008897 7.19386885826237 29.5175676108789 8.14987892307449 28.4984844159813 63.2817929492897 84.9945403349258 61.6037798726544 84.3128745262128 59.4674911991661
10 | Austria 6.08500003814697 5.30800008773804 4.98400020599365 4.62900018692017 4.75899982452393 4.52200730190182 3.98558638106129 5.13371435288384 4.761874049772 5.52207305574734 35.4895407112927 11.494661811554 34.6456851937484 10.9390539332283 40.2555119165146 53.9034532921234 79.2117521711732 55.2375498854336 79.6700745586658 49.4643666358969
11 | Azerbaijan 4.08799982070923 5.86700010299683 4.40000009536743 6.92500019073486 10.956000328064 29.713537216494 39.5932818623105 30.9801342769566 41.4435055627308 32.5553774537565 20.8925119064255 5.53219626749309 19.8570758926669 5.67385216944981 12.2551252604439 45.3069096495127 49.0075226649218 44.7627897350092 45.9576416332684 44.2334961085458
12 | Burundi 2.01500010490417 1.12600004673004 2.26600003242493 1.2940000295639 2.41199994087219 85.6976800111714 95.0287901554352 85.4850005552875 95.0992833942153 85.1367445108122 2.88761797307329 0.438011808713489 3.12944258389122 0.494517047143879 3.56293790449487 9.39872119741325 3.40719813645502 9.11955985760446 3.11318728236891 8.88733950787196
13 | Belgium 9.09200000762939 7.76800012588501 8.10799980163574 8.51599979400635 5.30800008773804 1.42180112941731 0.672371253574105 1.58973162095871 0.814207588746452 2.30101566108169 29.7769163685032 7.71059487282257 31.493225719076 9.21518329887166 34.2321058130084 59.7083746724182 83.8490362765479 58.8090432965048 81.4536972711161 58.1588782124085
14 | Benin 2.39499998092651 2.77600002288818 0.935000002384186 1.14600002765656 1.11300003528595 47.5951266358878 32.3668421884252 50.655895980188 36.3584999831141 52.9193774275583 19.9416773156035 16.3141878637436 19.7050198902156 17.3538199968234 19.6221478104001 30.0691715814223 48.5429717793437 28.7040841272121 45.1426679893845 26.3454747267556
15 | Burkina Faso 4.01800012588501 9.38799953460693 3.55900001525879 6.20900011062622 2.43199992179871 34.1551937764624 18.7358433777082 53.6433784810451 35.8900629300036 82.7396173319413 30.0481246462594 29.7533551454923 18.0942595783406 22.1506209877055 4.65301797583635 31.7796407441471 42.1228002139059 24.7033619253556 35.7503159716647 10.1753666313856
16 | Bangladesh 3.16100001335144 7.46700000762939 3.0090000629425 4.44000005722046 3.25200009346008 34.6034815201925 59.2229696385192 39.7973485445393 62.675890312812 58.7560268489109 20.9695164069447 14.2167704153356 18.629062068294 12.4447785665381 10.9305893542175 41.2650342014641 19.0941847602169 38.5645930241373 20.4383732543633 27.0613837034115
17 | Bulgaria 9.77600002288818 8.41600036621094 10.8570003509521 9.60599994659424 16.5620002746582 8.19955738637818 3.96467134065307 7.32755480918923 4.70139213588005 12.8653049039204 32.6728182406129 20.8271166440149 36.3132923914448 21.7587389804456 31.3760254931073 49.3516269314502 66.7922112124146 45.50215159828 63.9329620453712 39.1966701240408
18 | Bahrain 0.404000014066696 3.74499988555908 0.442999988794327 3.7039999961853 0.649999976158142 1.34454602355564 0.049090049140406 1.34501502172946 0.048148000719368 2.00388946068692 41.9538195811717 8.85738478740921 42.4152643759212 8.70804749340221 32.3414069024451 56.2976359246658 87.3485265400852 55.7977176542525 87.5398024900401 65.0047053187919
19 | Bahamas, The 11.0880002975464 13.003999710083 14.6370000839233 15.0539999008179 5.99100017547607 3.73963857520798 0.334934592819595 4.11961855830491 0.411138638252244 6.36628931391194 22.2626752120574 2.65859776388191 23.4440948961179 3.04276573813465 25.5290841214494 62.9096829474301 84.0024662738985 57.7984333005678 81.4920941278947 62.1145646179444
20 | Bosnia and Herzegovina 25.7479991912842 30.6539993286133 25.6830005645752 29.9309997558594 23.1609992980957 12.9220756101012 12.2645342501349 13.3049723182051 15.9484049210435 17.859688956593 28.9226391057519 11.8345884639975 29.2905576493788 11.420546200109 28.3451389220266 32.4080282049924 45.2461862008465 31.7214666328724 42.700047786528 30.6334048553668
21 | Belarus 7.54400014877319 4.26200008392334 7.49100017547607 4.74100017547607 13.7880001068115 11.8639536963483 6.34934412521728 13.0927986322389 7.57785336336403 16.543219949139 38.8537077692056 19.4702369406733 39.4782149693279 20.8055177737547 33.0459227840703 41.7383375039438 69.9184170241287 39.9379862229572 66.8756305043265 36.6228571599792
22 | Belize 5.43100023269653 11.0649995803833 5.87599992752075 12.956000328064 7.23000001907349 24.4943157494942 4.06966580918732 25.4520712042915 3.3929751088723 32.2097451258239 18.6499530790336 7.1112424673151 19.2295335020512 8.06201532266494 18.4751463455362 51.4247291350152 77.7540934153397 49.4423953661366 75.5890061274627 42.085112048461
23 | Bolivia 2.56399989128113 3.79699993133545 2.15499997138977 3.07699990272522 2.32699990272522 26.8367967963018 27.2244867839196 28.8975426966287 29.6516538190697 39.243059350225 29.5728002294542 10.5688616462612 27.7214461247441 10.2670537996148 24.4514592318484 41.0264012245186 58.4096516384838 41.2260112072374 57.0042924785903 33.9784815152013
24 | Brazil 7.21299982070923 10.0550003051758 5.60599994659424 10.6359996795654 7.90399980545044 12.940075137032 4.51883693563766 15.6609081743726 6.23760723941117 19.1080773755702 27.6254742288144 10.4875868271713 27.7962016710379 11.2670129831341 25.0206406583239 52.2214499285586 74.9376786916657 50.9359449860937 71.8593822284931 47.9663599499601
25 | Barbados 12.3529996871948 10.3219995498657 10.9020004272461 10.4720001220703 7.43800020217896 3.70133286667484 1.39897675570793 3.35364864953735 1.69476504366585 4.60403384875786 25.4220117051662 8.68979786731415 25.4713355332621 7.82922348686829 27.5890294337571 58.5236578306314 79.5892253994942 60.273017089364 80.0040108137672 60.3689373980461
26 | Brunei Darussalam 7.26999998092651 8.79800033569336 5.87599992752075 7.96700000762939 4.51399993896484 0.88278962052145 0.416793126941428 0.739814653136596 0.517225438453593 1.32439078715342 21.4187750082203 9.38103791508986 23.8933776338316 9.63493464281319 28.2056091354989 70.4284321845913 81.4041722916177 69.4908102540112 81.8808413922143 65.9550429571979
27 | Bhutan 1.89600002765656 3.24499988555908 2.75699996948242 4.05399990081787 1.22200000286102 51.2946567369433 64.1505020656227 53.3552892864206 63.1852415861599 57.4966984192026 9.74761331001144 8.93629186962433 6.38011310587837 6.44853085332855 8.03460234643553 37.0617289897961 23.6682080246491 37.5066248150085 26.312230862237 33.2457129361251
28 | Botswana 14.8100004196167 20.8439998626709 14.6619997024536 21.5359992980957 14.7600002288818 22.4254149825805 13.954411034723 24.444216036958 15.4111149645212 18.8320735006111 21.7737120057491 7.88631252920792 20.934264970185 8.10533121264347 22.6900350968499 40.9908725920536 57.3144831835424 39.9586663791761 54.9475545247397 43.7178895478333
29 | Central African Republic 3.94400000572205 3.76900005340576 4.01599979400635 3.82100009918213 4.08400011062622 71.9738010565036 79.6860033622247 72.024476007845 79.9747624084491 72.8395712908062 7.24358285330538 2.58572700343874 8.32085266296868 3.06811015187016 8.81372135423733 16.8386174585611 13.9592684337679 15.6386724505547 13.1361277991158 14.2627090737827
30 | Canada 7.45900011062622 6.28499984741211 8.78899955749512 7.23299980163574 6.94199991226196 2.04423078993088 0.951207238142733 2.22463620162913 1.03064131823743 3.01880148379753 27.9075889553813 8.0135693270031 27.9798870385329 7.92601219261315 30.9808692188154 62.589182350411 84.7502214648194 61.0064795944466 83.8103474616218 59.0583298288602
31 | Central Europe and the Baltics 7.74020473088915 7.90567223682592 9.96677631874455 9.81944761452723 11.9682940640283 11.5688662629283 9.20035554902194 12.5609959883593 11.5927911055313 18.8309308218994 37.5896041916351 17.2087211486188 36.9529718789418 16.9165246743873 34.4635746890898 43.1009681723622 65.6852633119561 40.519477755266 61.6714565500074 34.7367773326694
32 | Switzerland 4.7039999961853 4.91200017929077 4.48400020599365 5.18800020217896 2.29699993133545 3.74513286376686 2.73092733490052 3.94958681010094 2.53811727798916 5.38441248659413 28.5783177465053 9.60103545797122 30.8402042936878 10.1572098204503 34.1344974023386 62.9715985474426 82.7560354408853 60.7252522330816 82.1166736035794 58.1850690024549
33 | Channel Islands 7.89499998092651 8.32600021362305 8.26599979400635 8.56599998474121 8.1899995803833 4.83551250100136 2.26984829940678 5.48477595630134 2.82531052199306 6.66540624060059 32.3868794983221 10.8028644985235 34.0048767603197 12.3161600462105 38.8741895091103 54.8816839621708 78.6012854584717 52.2443474893725 76.2934456827455 46.2704029187691
34 | Chile 6.06400012969971 7.13700008392334 7.44299983978271 9.90900039672852 9.43200016021729 11.9759002436269 4.20947977911087 13.0718252667967 4.63157810402245 17.678873983313 29.7561067219155 10.4248002147113 28.8065144113413 9.25324648677127 26.3833645031928 52.2039938006014 78.2287172654227 50.6786613647715 76.2061724349536 46.5057613532769
35 | China 5.11499977111816 4.01200008392334 5.0019998550415 3.92300009727478 3.61199998855591 29.4561000646645 24.4548620502283 37.2563142762865 32.2261471314498 51.6157732707462 29.2663293402916 26.0108278071535 27.8448646052482 26.8381491674875 20.0246066346865 36.1616224951424 45.522308227869 29.8968212634238 37.0136656785436 24.7476182675561
36 | Cote d'Ivoire 2.5220000743866 3.94799995422363 6.76200008392334 6.61499977111816 4.58099985122681 47.171552058119 34.9715730777324 47.0068703791369 39.4560959337257 49.0663591009697 12.8105589435857 10.092183615497 10.7410180171417 10.3769416600407 12.4483623080556 37.4968613092001 50.988243352547 35.4891796519704 43.5519635257042 33.9042805597209
37 | Cameroon 3.06299996376038 4.00600004196167 3.41300010681152 4.89900016784668 8.4680004119873 40.5565032880577 49.8170461303385 48.0365796469669 58.1961060694681 57.8253400413932 16.9232616438237 10.3491132673038 13.7481937637596 9.33986928783898 10.8117600427362 39.4572351043582 35.8278423913364 34.8022292458383 27.5640730802995 22.8958143210788
38 | Congo, Dem. Rep. 5.19600009918213 3.56200003623962 4.72800016403198 3.19199991226196 3.53399991989136 56.5562725027826 72.0112166573572 59.4430603404656 75.3756770074007 62.6701049092212 14.5125959359357 3.36857936816469 13.1199070101871 3.16562158440437 12.3968455083798 23.7360780772033 21.0582020988296 22.7090343024846 18.2667010343166 21.3990533423936
39 | Congo, Rep. 9.0930004119873 10.7670001983643 12.4320001602173 15.5559997558594 18.6159992218018 31.500185315006 31.4938938062558 32.0253676721182 32.3673868397949 32.7342728923526 21.7085907386955 19.7463702674023 20.7273456288856 19.4052308695495 19.0975692997385 37.6982252682246 37.9927340259931 34.8152848685518 32.6713825347961 29.5521585861072
40 | Colombia 6.36199998855591 10.8400001525879 8.63500022888184 14.1890001296997 17.3519992828369 20.9589929996166 6.43735180191803 23.9476804943024 6.00934413433163 26.6986108578931 22.5227476712479 12.4815080181137 20.8759896481155 13.6113411987855 15.8882524271435 50.1553216889453 70.2411379016403 46.5413296287003 66.1903165830769 40.061139008512
41 |
--------------------------------------------------------------------------------
/tests/testthat/testdata/AP_coordinates/input_data.txt:
--------------------------------------------------------------------------------
1 | Country.Name UEM.M.2015 UEM.F.2015 UEM.M.2010 UEM.F.2010 UEM.M.2000 AGR.M.2015 AGR.F.2015 AGR.M.2010 AGR.F.2010 AGR.M.2000 IND.M.2015 IND.F.2015 IND.M.2010 IND.F.2010 IND.M.2000 SRV.M.2015 SRV.F.2015 SRV.M.2010 SRV.F.2010 SRV.M.2000
2 | Afghanistan 10.6840000152588 14.4270000457764 10.878999710083 14.8149995803833 10.9519996643066 37.8530144169035 58.5918341132492 45.9080105940595 62.1560903177743 56.5846601919507 14.2539401104799 18.7687265630838 12.5339775963636 13.8681186532326 8.56641753038635 37.2090454573578 8.21329616999908 30.6799036698646 9.16164689261626 23.8960300748246
3 | Angola 6.88299989700317 7.68100023269653 7.94799995422363 10.9219999313354 4.83599996566772 40.7833840176982 53.0400341192767 39.1699653458642 49.4373997474282 28.2151745299088 14.9266557558941 1.08105549079682 13.1395026410404 1.03508630812801 14.7837272601248 37.4069621054705 38.1979088365961 39.7416102887755 38.6055150750009 52.1651000594079
4 | Albania 17.068000793457 17.0979995727539 12.7620000839233 15.8809995651245 16.3409996032715 30.5040491882837 39.6420792220923 29.8886125356392 44.6554113386461 36.4117418774221 18.2201596562287 11.516746063801 24.1370091778299 8.9527849210664 14.9247657984299 34.207791943833 31.743176722583 33.2123782026077 30.5108025707205 32.3233288513279
5 | Arab World 8.19390895838925 20.0161802509865 6.9908812598603 17.9412175476643 11.1299650216614 17.8642307423098 24.6449124655064 19.672914073688 28.4233914901091 24.9142486487184 25.777529922088 7.68758538616177 26.2659536398542 8.43280133428351 20.4083797382762 48.164425070158 47.6515339471448 47.0699117483611 45.2025348058708 43.5474153392269
6 | United Arab Emirates 1.37399995326996 4.70300006866455 2.1010000705719 5.88299989700317 2.18600010871887 2.31771090704107 0.01238861024668 3.72016195063638 0.019764569755163 8.76217828936375 40.5599425192177 5.87315420755638 40.0132808644816 7.06065721129623 35.2687931646872 55.7483480313275 89.4114548308246 54.1655578145382 87.0365757151367 53.7830284372302
7 | Argentina 6.96500015258789 8.85099983215332 6.66699981689453 9.19600009918213 14.0200004577637 0.365627555611086 0.06562727729574 1.75839367093423 0.347779307279997 0.844323589764976 31.7854085496292 7.89076856900818 30.6925555436508 8.5755299723227 27.1559227405518 60.8839658493927 83.1926073368043 60.8811191525665 81.879780130292 57.980610592186
8 | Armenia 17.3859996795654 19.2520008087158 17.0849990844727 21.2849998474121 8.03800010681152 25.7987005495129 32.3646065908367 26.2351360596215 37.2101543676712 35.7070045046545 19.1168791699127 6.08355419438523 22.1424509312325 4.58436151580181 20.8854902026784 37.6984206010089 42.2998380210259 34.53658523013 36.9212698606949 35.3695034318197
9 | Australia 6.03900003433228 6.07299995422363 5.07600021362305 5.3769998550415 6.45499992370605 3.10729019538592 1.73858872516368 3.80265547127396 2.16024308609024 5.57902401508923 27.5719157008897 7.19386885826237 29.5175676108789 8.14987892307449 28.4984844159813 63.2817929492897 84.9945403349258 61.6037798726544 84.3128745262128 59.4674911991661
10 | Austria 6.08500003814697 5.30800008773804 4.98400020599365 4.62900018692017 4.75899982452393 4.52200730190182 3.98558638106129 5.13371435288384 4.761874049772 5.52207305574734 35.4895407112927 11.494661811554 34.6456851937484 10.9390539332283 40.2555119165146 53.9034532921234 79.2117521711732 55.2375498854336 79.6700745586658 49.4643666358969
11 | Azerbaijan 4.08799982070923 5.86700010299683 4.40000009536743 6.92500019073486 10.956000328064 29.713537216494 39.5932818623105 30.9801342769566 41.4435055627308 32.5553774537565 20.8925119064255 5.53219626749309 19.8570758926669 5.67385216944981 12.2551252604439 45.3069096495127 49.0075226649218 44.7627897350092 45.9576416332684 44.2334961085458
12 | Burundi 2.01500010490417 1.12600004673004 2.26600003242493 1.2940000295639 2.41199994087219 85.6976800111714 95.0287901554352 85.4850005552875 95.0992833942153 85.1367445108122 2.88761797307329 0.438011808713489 3.12944258389122 0.494517047143879 3.56293790449487 9.39872119741325 3.40719813645502 9.11955985760446 3.11318728236891 8.88733950787196
13 | Belgium 9.09200000762939 7.76800012588501 8.10799980163574 8.51599979400635 5.30800008773804 1.42180112941731 0.672371253574105 1.58973162095871 0.814207588746452 2.30101566108169 29.7769163685032 7.71059487282257 31.493225719076 9.21518329887166 34.2321058130084 59.7083746724182 83.8490362765479 58.8090432965048 81.4536972711161 58.1588782124085
14 | Benin 2.39499998092651 2.77600002288818 0.935000002384186 1.14600002765656 1.11300003528595 47.5951266358878 32.3668421884252 50.655895980188 36.3584999831141 52.9193774275583 19.9416773156035 16.3141878637436 19.7050198902156 17.3538199968234 19.6221478104001 30.0691715814223 48.5429717793437 28.7040841272121 45.1426679893845 26.3454747267556
15 | Burkina Faso 4.01800012588501 9.38799953460693 3.55900001525879 6.20900011062622 2.43199992179871 34.1551937764624 18.7358433777082 53.6433784810451 35.8900629300036 82.7396173319413 30.0481246462594 29.7533551454923 18.0942595783406 22.1506209877055 4.65301797583635 31.7796407441471 42.1228002139059 24.7033619253556 35.7503159716647 10.1753666313856
16 | Bangladesh 3.16100001335144 7.46700000762939 3.0090000629425 4.44000005722046 3.25200009346008 34.6034815201925 59.2229696385192 39.7973485445393 62.675890312812 58.7560268489109 20.9695164069447 14.2167704153356 18.629062068294 12.4447785665381 10.9305893542175 41.2650342014641 19.0941847602169 38.5645930241373 20.4383732543633 27.0613837034115
17 | Bulgaria 9.77600002288818 8.41600036621094 10.8570003509521 9.60599994659424 16.5620002746582 8.19955738637818 3.96467134065307 7.32755480918923 4.70139213588005 12.8653049039204 32.6728182406129 20.8271166440149 36.3132923914448 21.7587389804456 31.3760254931073 49.3516269314502 66.7922112124146 45.50215159828 63.9329620453712 39.1966701240408
18 | Bahrain 0.404000014066696 3.74499988555908 0.442999988794327 3.7039999961853 0.649999976158142 1.34454602355564 0.049090049140406 1.34501502172946 0.048148000719368 2.00388946068692 41.9538195811717 8.85738478740921 42.4152643759212 8.70804749340221 32.3414069024451 56.2976359246658 87.3485265400852 55.7977176542525 87.5398024900401 65.0047053187919
19 | Bahamas, The 11.0880002975464 13.003999710083 14.6370000839233 15.0539999008179 5.99100017547607 3.73963857520798 0.334934592819595 4.11961855830491 0.411138638252244 6.36628931391194 22.2626752120574 2.65859776388191 23.4440948961179 3.04276573813465 25.5290841214494 62.9096829474301 84.0024662738985 57.7984333005678 81.4920941278947 62.1145646179444
20 | Bosnia and Herzegovina 25.7479991912842 30.6539993286133 25.6830005645752 29.9309997558594 23.1609992980957 12.9220756101012 12.2645342501349 13.3049723182051 15.9484049210435 17.859688956593 28.9226391057519 11.8345884639975 29.2905576493788 11.420546200109 28.3451389220266 32.4080282049924 45.2461862008465 31.7214666328724 42.700047786528 30.6334048553668
21 | Belarus 7.54400014877319 4.26200008392334 7.49100017547607 4.74100017547607 13.7880001068115 11.8639536963483 6.34934412521728 13.0927986322389 7.57785336336403 16.543219949139 38.8537077692056 19.4702369406733 39.4782149693279 20.8055177737547 33.0459227840703 41.7383375039438 69.9184170241287 39.9379862229572 66.8756305043265 36.6228571599792
22 | Belize 5.43100023269653 11.0649995803833 5.87599992752075 12.956000328064 7.23000001907349 24.4943157494942 4.06966580918732 25.4520712042915 3.3929751088723 32.2097451258239 18.6499530790336 7.1112424673151 19.2295335020512 8.06201532266494 18.4751463455362 51.4247291350152 77.7540934153397 49.4423953661366 75.5890061274627 42.085112048461
23 | Bolivia 2.56399989128113 3.79699993133545 2.15499997138977 3.07699990272522 2.32699990272522 26.8367967963018 27.2244867839196 28.8975426966287 29.6516538190697 39.243059350225 29.5728002294542 10.5688616462612 27.7214461247441 10.2670537996148 24.4514592318484 41.0264012245186 58.4096516384838 41.2260112072374 57.0042924785903 33.9784815152013
24 | Brazil 7.21299982070923 10.0550003051758 5.60599994659424 10.6359996795654 7.90399980545044 12.940075137032 4.51883693563766 15.6609081743726 6.23760723941117 19.1080773755702 27.6254742288144 10.4875868271713 27.7962016710379 11.2670129831341 25.0206406583239 52.2214499285586 74.9376786916657 50.9359449860937 71.8593822284931 47.9663599499601
25 | Barbados 12.3529996871948 10.3219995498657 10.9020004272461 10.4720001220703 7.43800020217896 3.70133286667484 1.39897675570793 3.35364864953735 1.69476504366585 4.60403384875786 25.4220117051662 8.68979786731415 25.4713355332621 7.82922348686829 27.5890294337571 58.5236578306314 79.5892253994942 60.273017089364 80.0040108137672 60.3689373980461
26 | Brunei Darussalam 7.26999998092651 8.79800033569336 5.87599992752075 7.96700000762939 4.51399993896484 0.88278962052145 0.416793126941428 0.739814653136596 0.517225438453593 1.32439078715342 21.4187750082203 9.38103791508986 23.8933776338316 9.63493464281319 28.2056091354989 70.4284321845913 81.4041722916177 69.4908102540112 81.8808413922143 65.9550429571979
27 | Bhutan 1.89600002765656 3.24499988555908 2.75699996948242 4.05399990081787 1.22200000286102 51.2946567369433 64.1505020656227 53.3552892864206 63.1852415861599 57.4966984192026 9.74761331001144 8.93629186962433 6.38011310587837 6.44853085332855 8.03460234643553 37.0617289897961 23.6682080246491 37.5066248150085 26.312230862237 33.2457129361251
28 | Botswana 14.8100004196167 20.8439998626709 14.6619997024536 21.5359992980957 14.7600002288818 22.4254149825805 13.954411034723 24.444216036958 15.4111149645212 18.8320735006111 21.7737120057491 7.88631252920792 20.934264970185 8.10533121264347 22.6900350968499 40.9908725920536 57.3144831835424 39.9586663791761 54.9475545247397 43.7178895478333
29 | Central African Republic 3.94400000572205 3.76900005340576 4.01599979400635 3.82100009918213 4.08400011062622 71.9738010565036 79.6860033622247 72.024476007845 79.9747624084491 72.8395712908062 7.24358285330538 2.58572700343874 8.32085266296868 3.06811015187016 8.81372135423733 16.8386174585611 13.9592684337679 15.6386724505547 13.1361277991158 14.2627090737827
30 | Canada 7.45900011062622 6.28499984741211 8.78899955749512 7.23299980163574 6.94199991226196 2.04423078993088 0.951207238142733 2.22463620162913 1.03064131823743 3.01880148379753 27.9075889553813 8.0135693270031 27.9798870385329 7.92601219261315 30.9808692188154 62.589182350411 84.7502214648194 61.0064795944466 83.8103474616218 59.0583298288602
31 | Central Europe and the Baltics 7.74020473088915 7.90567223682592 9.96677631874455 9.81944761452723 11.9682940640283 11.5688662629283 9.20035554902194 12.5609959883593 11.5927911055313 18.8309308218994 37.5896041916351 17.2087211486188 36.9529718789418 16.9165246743873 34.4635746890898 43.1009681723622 65.6852633119561 40.519477755266 61.6714565500074 34.7367773326694
32 | Switzerland 4.7039999961853 4.91200017929077 4.48400020599365 5.18800020217896 2.29699993133545 3.74513286376686 2.73092733490052 3.94958681010094 2.53811727798916 5.38441248659413 28.5783177465053 9.60103545797122 30.8402042936878 10.1572098204503 34.1344974023386 62.9715985474426 82.7560354408853 60.7252522330816 82.1166736035794 58.1850690024549
33 | Channel Islands 7.89499998092651 8.32600021362305 8.26599979400635 8.56599998474121 8.1899995803833 4.83551250100136 2.26984829940678 5.48477595630134 2.82531052199306 6.66540624060059 32.3868794983221 10.8028644985235 34.0048767603197 12.3161600462105 38.8741895091103 54.8816839621708 78.6012854584717 52.2443474893725 76.2934456827455 46.2704029187691
34 | Chile 6.06400012969971 7.13700008392334 7.44299983978271 9.90900039672852 9.43200016021729 11.9759002436269 4.20947977911087 13.0718252667967 4.63157810402245 17.678873983313 29.7561067219155 10.4248002147113 28.8065144113413 9.25324648677127 26.3833645031928 52.2039938006014 78.2287172654227 50.6786613647715 76.2061724349536 46.5057613532769
35 | China 5.11499977111816 4.01200008392334 5.0019998550415 3.92300009727478 3.61199998855591 29.4561000646645 24.4548620502283 37.2563142762865 32.2261471314498 51.6157732707462 29.2663293402916 26.0108278071535 27.8448646052482 26.8381491674875 20.0246066346865 36.1616224951424 45.522308227869 29.8968212634238 37.0136656785436 24.7476182675561
36 | Cote d'Ivoire 2.5220000743866 3.94799995422363 6.76200008392334 6.61499977111816 4.58099985122681 47.171552058119 34.9715730777324 47.0068703791369 39.4560959337257 49.0663591009697 12.8105589435857 10.092183615497 10.7410180171417 10.3769416600407 12.4483623080556 37.4968613092001 50.988243352547 35.4891796519704 43.5519635257042 33.9042805597209
37 | Cameroon 3.06299996376038 4.00600004196167 3.41300010681152 4.89900016784668 8.4680004119873 40.5565032880577 49.8170461303385 48.0365796469669 58.1961060694681 57.8253400413932 16.9232616438237 10.3491132673038 13.7481937637596 9.33986928783898 10.8117600427362 39.4572351043582 35.8278423913364 34.8022292458383 27.5640730802995 22.8958143210788
38 | Congo, Dem. Rep. 5.19600009918213 3.56200003623962 4.72800016403198 3.19199991226196 3.53399991989136 56.5562725027826 72.0112166573572 59.4430603404656 75.3756770074007 62.6701049092212 14.5125959359357 3.36857936816469 13.1199070101871 3.16562158440437 12.3968455083798 23.7360780772033 21.0582020988296 22.7090343024846 18.2667010343166 21.3990533423936
39 | Congo, Rep. 9.0930004119873 10.7670001983643 12.4320001602173 15.5559997558594 18.6159992218018 31.500185315006 31.4938938062558 32.0253676721182 32.3673868397949 32.7342728923526 21.7085907386955 19.7463702674023 20.7273456288856 19.4052308695495 19.0975692997385 37.6982252682246 37.9927340259931 34.8152848685518 32.6713825347961 29.5521585861072
40 | Colombia 6.36199998855591 10.8400001525879 8.63500022888184 14.1890001296997 17.3519992828369 20.9589929996166 6.43735180191803 23.9476804943024 6.00934413433163 26.6986108578931 22.5227476712479 12.4815080181137 20.8759896481155 13.6113411987855 15.8882524271435 50.1553216889453 70.2411379016403 46.5413296287003 66.1903165830769 40.061139008512
41 |
--------------------------------------------------------------------------------
/R/convert.R:
--------------------------------------------------------------------------------
1 | #' @include constructor.R
2 | NULL
3 |
4 |
5 | #' Recompute missing values of cacomp object.
6 | #'
7 | #' @description
8 | #' The caobj needs to have the std_coords_cols, the prin_coords_rows and D
9 | #' calculated. From this the remainder will be calculated.
10 | #' Future updates might extend this functionality.
11 | #'
12 | #' @return
13 | #' A cacomp object with additional calculated row_masses, col_masses,
14 | #' std_coords_rows, U and V.
15 | #'
16 | #' @param calist A list with std_coords_cols, the prin_coords_rows and D.
17 | #' @param mat A matrix from which the cacomp object is derived from.
18 | #' @param ... Further arguments forwarded to cacomp.
19 | recompute <- function(calist, mat, ...){
20 |
21 | stopifnot(is(calist, "list"))
22 | stopifnot(is(mat, "matrix") | is(mat, "Matrix"))
23 |
24 | if(is.null(calist$params)){
25 | warning("No parameters provided for recalculation!")
26 | calist$params <- list()
27 |
28 | }
29 |
30 | # if (is.null(calist$top_rows)) top <- nrow(mat)
31 | if(exists("rm_zeros")){
32 | if(isTRUE(rm_zeros)){
33 | mat <- rm_zeros(mat)
34 | }
35 | } else if (isTRUE(calist$params$rm_zeros)){
36 | mat <- rm_zeros(mat)
37 | }
38 |
39 | # make stock of what we have
40 |
41 | std_rows <- is.null(calist$std_coords_rows)
42 | std_cols <- is.null(calist$std_coords_cols)
43 | prin_rows <- is.null(calist$prin_coords_rows)
44 | prin_cols <- is.null(calist$prin_coords_cols)
45 |
46 | sp_rows <- std_rows & prin_rows
47 | sp_cols <- std_cols & prin_cols
48 |
49 | d <- is.null(calist$D)
50 | v <- is.null(calist$V)
51 | u <- is.null(calist$U)
52 |
53 | # mat <- var_rows(mat = mat,
54 | # top = nrow(mat))
55 | res <- comp_std_residuals(mat=mat)
56 |
57 | S <- res$S
58 | tot <- res$tot
59 | rowm <- res$rowm
60 | colm <- res$colm
61 |
62 | if(std_rows & !u) {
63 | calist$std_coords_rows <- sweep(x = calist$U,
64 | MARGIN = 1,
65 | STATS = sqrt(rowm),
66 | FUN = "/")
67 | std_rows <- FALSE
68 | }
69 | if(std_cols & !v){
70 | calist$std_coords_cols <- sweep(x = calist$V,
71 | MARGIN = 1,
72 | STATS = sqrt(colm),
73 | FUN = "/")
74 | std_cols <- FALSE
75 | }
76 |
77 | call_svd <- FALSE
78 | done <- FALSE
79 |
80 | while (isFALSE(done)){
81 | if (std_cols){
82 | if (d){
83 | if(prin_cols){
84 | call_svd <- TRUE
85 | done <- TRUE
86 | } else {
87 | # check if we can get D with row coords, otherwise call cacomp
88 | if(std_rows | prin_rows){
89 | call_svd <- TRUE
90 | done <- TRUE
91 |
92 | } else {
93 | calist$D <- calist$prin_coords_rows[1,]/calist$std_coords_rows[1,]
94 | d <- FALSE
95 | }
96 | }
97 | } else if (prin_cols){
98 | call_svd <- TRUE
99 | done <- TRUE
100 |
101 | } else {
102 | # calculate std_coords
103 | calist$std_coords_cols <- sweep(calist$prin_coords_cols,
104 | 2,
105 | calist$D,
106 | "/")
107 | std_cols <- FALSE
108 | }
109 | } else if (d) {
110 | if (prin_cols) {
111 | # check if we can get d through rows, otherweise cacomp
112 | if(std_rows | prin_rows){
113 | call_svd <- TRUE
114 | done <- TRUE
115 |
116 | } else {
117 | calist$D <- calist$prin_coords_rows[1,]/calist$std_coords_rows[1,]
118 | d <- FALSE
119 | }
120 | } else {
121 | # calculate d from col coordinates
122 |
123 | # calist$D <- colMeans(sweep(calist$prin_coords_cols,
124 | # 1,
125 | # calist$std_coords_cols,
126 | # "/"))
127 | calist$D <- calist$prin_coords_cols[1,]/calist$std_coords_cols[1,]
128 | d <- FALSE
129 |
130 | }
131 | } else if (prin_cols){
132 | # calculate prin_cols with D and std
133 | calist$prin_coords_cols <- sweep(calist$std_coords_cols,
134 | 2,
135 | calist$D,
136 | "*")
137 | prin_cols <- FALSE
138 |
139 | } else {
140 | # all calculated
141 | done <- TRUE
142 | }
143 | }
144 |
145 |
146 | done <- FALSE
147 | while (isFALSE(done)){
148 | if (std_rows){
149 | if (d){
150 | if(prin_rows){
151 | call_svd <- TRUE
152 | done <- TRUE
153 |
154 | } else {
155 | # check if we can get D with row coords, otherwise call cacomp
156 | if(std_cols | prin_cols){
157 | call_svd <- TRUE
158 | done <- TRUE
159 |
160 | } else {
161 | calist$D <- calist$prin_coords_cols[1,]/calist$std_coords_cols[1,]
162 | d <- FALSE
163 | }
164 | }
165 | } else if (prin_rows){
166 | call_svd <- TRUE
167 | done <- TRUE
168 | } else {
169 | # calculate std_coords
170 | calist$std_coords_rows <- sweep(calist$prin_coords_rows,
171 | 2,
172 | calist$D,
173 | "/")
174 | std_rows <- FALSE
175 | }
176 | } else if (d) {
177 | if (prin_rows) {
178 | # check if we can get d through rows, otherweise cacomp
179 | if(std_cols | prin_cols){
180 | call_svd <- TRUE
181 | done <- TRUE
182 |
183 | } else {
184 | calist$D <- calist$prin_coords_cols[1,]/calist$std_coords_cols[1,]
185 | d <- FALSE
186 | }
187 | } else {
188 | # calculate d from col coordinates
189 |
190 | # calist$D <- colMeans(sweep(calist$prin_coords_rows, 1, calist$std_coords_rows, "/"))
191 | calist$D <- calist$prin_coords_rows[1,]/calist$std_coords_rows[1,]
192 | d <- FALSE
193 |
194 | }
195 | } else if (prin_rows){
196 | # calculate prin_rows with D and std
197 | calist$prin_coords_rows <- sweep(calist$std_coords_rows, 2, calist$D, "*")
198 | prin_rows <- FALSE
199 |
200 | } else {
201 | # all calculated
202 | done <- TRUE
203 | }
204 | }
205 |
206 | if (!is.null(calist$std_coords_rows)) top <- nrow(calist$std_coords_rows)
207 |
208 | if (!is.null(calist$std_coords_rows) |
209 | !is.null(calist$std_coords_cols) |
210 | !is.null(calist$D)){
211 |
212 | dims <- min(ncol(calist$std_coords_rows),
213 | ncol(calist$std_coords_cols),
214 | length(calist$D),
215 | na.rm = TRUE)
216 | }
217 |
218 | # if (calist$params$clip)
219 | if(isTRUE(call_svd)){
220 | message("Calling cacomp to recompute from matrix.")
221 | ca <- cacomp(mat,
222 | princ_coords = 3,
223 | top = top,
224 | residuals = calist$params$residuals,
225 | clip = calist$params$clip,
226 | cutoff = calist$params$cutoff,
227 | rm_zeros = calist$params$rm_zeros,
228 | dims = min(nrow(mat), ncol(mat)) - 1,
229 | ...)
230 | return(ca)
231 | } else {
232 |
233 | if (nrow(mat) != nrow(calist$std_coords_rows)){
234 | stop("mat does not have have the correct number of rows.")
235 | }
236 |
237 | if (ncol(mat) != nrow(calist$std_coords_cols)){
238 | stop("mat does not have have the correct number of columns.")
239 | }
240 |
241 | calist$std_coords_rows[is.na(calist$std_coords_rows)] <- 0
242 | calist$std_coords_cols[is.na(calist$std_coords_cols)] <- 0
243 | calist$std_coords_rows[is.infinite(calist$std_coords_rows)] <- 0
244 | calist$std_coords_cols[is.infinite(calist$std_coords_cols)] <- 0
245 |
246 | ordidx <- match(rownames(calist$prin_coords_rows), names(rowm))
247 | calist$row_masses <- rowm[ordidx]
248 |
249 | ordidx <- match(rownames(calist$std_coords_cols), names(colm))
250 | calist$col_masses <- colm[ordidx]
251 |
252 | if (u) calist$U <- sweep(calist$std_coords_rows,
253 | 1,
254 | sqrt(calist$row_masses),
255 | "*")
256 | if (v) calist$V <- sweep(calist$std_coords_cols,
257 | 1,
258 | sqrt(calist$col_masses),
259 | "*")
260 |
261 | calist$tot_inertia <- sum(calist$D^2)
262 | calist$row_inertia <- Matrix::rowSums(S^2)
263 | calist$col_inertia <- Matrix::colSums(S^2)
264 |
265 | calist$top_rows <- nrow(mat)
266 | calist$dims <- length(calist$D)
267 | }
268 |
269 | ca <- do.call(new_cacomp, calist)
270 | return(ca)
271 | }
272 |
273 |
274 | #' Create cacomp object from Seurat/SingleCellExperiment container
275 | #'
276 | #' @description
277 | #' Converts the values stored in the Seurat/SingleCellExperiment dimensional
278 | #' reduction slot "CA" to a cacomp object.
279 | #' If recompute = TRUE additional parameters are recomputed from the saved
280 | #' values without rerunning SVD (need to specify assay to work).
281 | #'
282 | #' @details
283 | #' By default extracts std_coords_cols, D, prin_coords_rows, top_rows and dims
284 | #' from obj and outputs a cacomp object.
285 | #' If recompute = TRUE the following are additionally recalculated
286 | #' (doesn't run SVD):
287 | #' U, V, std_coords_rows, row_masses, col_masses.
288 | #'
289 | #' @return
290 | #' A cacomp object.
291 | #'
292 | #' @param obj An object of class "Seurat" or "SingleCellExperiment"
293 | #' with a dim. reduction named "CA" saved. For obj "cacomp" input is returned.
294 | #' @param assay Character. The assay from which extract the count matrix,
295 | #' e.g. "RNA" for Seurat objects or "counts"/"logcounts" for
296 | #' SingleCellExperiments.
297 | #' @param ... Further arguments.
298 | #' @export
299 | setGeneric("as.cacomp", function(obj, ...) {
300 | standardGeneric("as.cacomp")
301 | })
302 |
303 | #' @description as.cacomp.cacomp returns input without any calculations.
304 | #' @rdname as.cacomp
305 | #' @export
306 | setMethod(f = "as.cacomp", signature=(obj="cacomp"), function(obj, ...) {
307 | stopifnot(is(obj, "cacomp"))
308 | return(obj)
309 | })
310 |
311 |
312 | #' @description Recomputes missing values and returns cacomp object from a list.
313 | #' If you have a *complete* cacomp object in list form,
314 | #' use do.call(new_cacomp, obj).
315 | #' @param mat Original input matrix.
316 | #' @rdname as.cacomp
317 | #' @export
318 | #' @examples
319 | #' #########
320 | #' # lists #
321 | #' #########
322 | #'
323 | #' # Simulate counts
324 | #' cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
325 | #' x = sample(1:100, 50, replace = TRUE))
326 | #' rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
327 | #' colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
328 | #'
329 | #' # Run correspondence analysis
330 | #' ca <- cacomp(obj = cnts, princ_coords = 3)
331 | #' ca_list <- as.list(ca)
332 | #'
333 | #' # Only keep subset of elements for demonstration
334 | #' ca_list <- ca_list[c("U", "std_coords_rows", "std_coords_cols", "params")]
335 | #'
336 | #' # convert (incomplete) list to cacomp object.
337 | #' ca <- as.cacomp(ca_list, mat = cnts)
338 | setMethod(f = "as.cacomp",
339 | signature=(obj="list"),
340 | function(obj, ..., mat = NULL) {
341 |
342 | try_obj <- try(do.call(new_cacomp, obj), silent = TRUE)
343 | if (is(try_obj, "try-error")){
344 | obj <- recompute(calist = obj, mat = mat)
345 | return(obj)
346 | } else if (is(try_obj, "cacomp")){
347 | return(try_obj)
348 | } else {
349 | stop("Unexpected output from try().")
350 | }
351 | })
352 |
353 | #' @description
354 | #' as.cacomp.Seurat: Converts the values stored in the Seurat DimReduc slot
355 | #' "CA" to an cacomp object.
356 | #' @param slot character. Slot of the Seurat assay to use. Default "counts".
357 | #' @rdname as.cacomp
358 | #' @export
359 | #' @examples
360 | #'
361 | #' ##########
362 | #' # Seurat #
363 | #' ##########
364 | #' library(SeuratObject)
365 | #' set.seed(1234)
366 | #'
367 | #' # Simulate counts
368 | #' cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
369 | #' x = sample(1:100, 50, replace = TRUE))
370 | #' rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
371 | #' colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
372 | #'
373 | #' seu <- CreateSeuratObject(counts = cnts)
374 | #' seu <- cacomp(seu, return_input = TRUE)
375 | #'
376 | #' ca <- as.cacomp(seu, assay = "RNA", slot = "counts")
377 | setMethod(f = "as.cacomp",
378 | signature=(obj="Seurat"),
379 | function(obj, ..., assay="RNA", slot = "counts") {
380 |
381 | stopifnot("obj doesn't belong to class 'Seurat'" = is(obj, "Seurat"))
382 | stopifnot("obj doesn't contain a DimReduc object named 'CA'. Try running cacomp()." =
383 | "CA" %in% names(obj@reductions))
384 |
385 | if (is.null(assay)) assay <- SeuratObject::DefaultAssay(obj)
386 |
387 | ca_list <- list("std_coords_cols" = SeuratObject::Embeddings(obj, reduction = "CA"),
388 | "D" = SeuratObject::Stdev(obj, reduction = "CA"),
389 | "prin_coords_rows" = SeuratObject::Loadings(obj, reduction = "CA"),
390 | "params" = obj@reductions$CA@misc)
391 |
392 | ca_list$top_rows <- nrow(ca_list$prin_coords_rows)
393 | ca_list$dims <- length(ca_list$D)
394 |
395 | colnames(ca_list$std_coords_cols) <- paste0("Dim", seq_len(ncol(ca_list$std_coords_cols)))
396 | colnames(ca_list$prin_coords_rows) <- paste0("Dim", seq_len(ncol(ca_list$prin_coords_rows)))
397 | names(ca_list$D) <- paste0("Dim", seq_len(length(ca_list$D)))
398 |
399 | stopifnot("Assay is needed to recompute cacomp." = !is.null(assay))
400 |
401 | seu <- SeuratObject::LayerData(object = obj, assay = assay, layer = slot)
402 | seu <- as.matrix(seu)
403 | seu <- seu[rownames(ca_list$prin_coords_rows),]
404 |
405 | ca_obj <- recompute(calist = ca_list, mat = seu)
406 |
407 | # ca_obj <- do.call(new_cacomp, ca_list)
408 |
409 | stopifnot(validObject(ca_obj))
410 | return(ca_obj)
411 | })
412 |
413 |
414 | #' @description
415 | #' as.cacomp.SingleCellExperiment: Converts the values stored in the
416 | #' SingleCellExperiment reducedDim slot "CA" to a cacomp object.
417 | #'
418 | #' @rdname as.cacomp
419 | #' @export
420 | #' @examples
421 | #'
422 | #' ########################
423 | #' # SingleCellExperiment #
424 | #' ########################
425 | #' library(SingleCellExperiment)
426 | #' set.seed(1234)
427 | #'
428 | #' # Simulate counts
429 | #' cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
430 | #' x = sample(1:100, 50, replace = TRUE))
431 | #' rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
432 | #' colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
433 | #'
434 | #' sce <- SingleCellExperiment(assays=list(counts=cnts))
435 | #' sce <- cacomp(sce, return_input = TRUE)
436 | #'
437 | #' ca <- as.cacomp(sce, assay = "counts")
438 | setMethod(f = "as.cacomp",
439 | signature=(obj="SingleCellExperiment"),
440 | function(obj, ..., assay="counts") {
441 |
442 | # TODO: Change to principal coordingates or columns.
443 | sce_ca <- SingleCellExperiment::reducedDim(obj, "CA")
444 | stopifnot("Attribute singval of dimension reduction slot CA is empty.\nThis can happen after subsetting the sce obj." =
445 | !is.null(attr(sce_ca, "singval")))
446 | stopifnot("Attribute prin_coords_rows of dimension reduction slot CA is empty.\nThis can happen after subsetting the sce obj." =
447 | !is.null(attr(sce_ca, "prin_coords_rows")))
448 |
449 | ca_list <- list("std_coords_cols" = sce_ca,
450 | "D" = attr(sce_ca, "singval"),
451 | "prin_coords_rows" = attr(sce_ca, "prin_coords_rows"),
452 | "params" = attr(sce_ca, "params"))
453 |
454 | if(is.null(assay)) assay <- "counts"
455 |
456 | attr(ca_list$std_coords_cols, "prin_coords_rows") <- NULL
457 | attr(ca_list$std_coords_cols, "singval") <- NULL
458 | attr(ca_list$std_coords_cols, "percInertia") <- NULL
459 | attr(ca_list$std_coords_cols, "params") <- NULL
460 |
461 | ca_list$top_rows <- nrow(ca_list$prin_coords_rows)
462 | ca_list$dims <- length(ca_list$D)
463 |
464 |
465 | stopifnot("Assay is needed to recompute cacomp." = !is.null(assay))
466 | scemat <- SummarizedExperiment::assay(obj, assay)
467 | scemat <- scemat[rownames(ca_list$prin_coords_rows),]
468 |
469 | ca_obj <- recompute(calist = ca_list, mat = scemat)
470 |
471 |
472 | # ca_obj <- do.call(new_cacomp, ca_list)
473 |
474 | stopifnot(validObject(ca_obj))
475 | return(ca_obj)
476 | })
477 |
478 |
--------------------------------------------------------------------------------
/R/constructor.R:
--------------------------------------------------------------------------------
1 |
2 | #' Helper function to check if object is empty.
3 | #' @param x object
4 | #' @return TRUE if x has length 0 and is not NULL. FALSE otherwise
5 | is.empty <- function(x) return(isTRUE(length(x) == 0 & !is.null(x)))
6 |
7 |
8 | #' Check if cacomp object was correctly created.
9 | #'
10 | #' @description Checks if the slots in a cacomp object are of the correct size
11 | #' and whether they are coherent.
12 | #' @param object A cacomp object.
13 | #' @return TRUE if it is a valid cacomp object. FALSE otherwise.
14 | #' @export
15 | #' @examples
16 | #' # Simulate scRNAseq data.
17 | #' cnts <- data.frame(cell_1 = rpois(10, 5),
18 | #' cell_2 = rpois(10, 10),
19 | #' cell_3 = rpois(10, 20))
20 | #' rownames(cnts) <- paste0("gene_", 1:10)
21 | #' cnts <- as.matrix(cnts)
22 | #'
23 | #' # Run correspondence analysis.
24 | #' ca <- cacomp(obj = cnts, princ_coords = 3, top = 5)
25 | #'
26 | #' check_cacomp(ca)
27 | check_cacomp <- function(object) {
28 | errors <- character()
29 |
30 | dim_rows <- object@top_rows
31 | dims <- object@dims
32 |
33 | # SVD results
34 | if (isTRUE(!is.empty(object@U) &
35 | nrow(object@U) != dim_rows)) {
36 | msg <- paste0("Nr. of rows in U is ",
37 | nrow(object@U),
38 | ". Should be ",
39 | dim_rows,
40 | ".")
41 | errors <- c(errors, msg)
42 | }
43 |
44 | if (isTRUE(!is.empty(object@U) &
45 | ncol(object@U) != dims)) {
46 | msg <- paste0("Nr. of columns in U is ",
47 | ncol(object@U),
48 | ". Should be ",
49 | dims,
50 | ".")
51 | errors <- c(errors, msg)
52 | }
53 |
54 | if (isTRUE(!is.empty(object@V) &
55 | ncol(object@V) != dims)) {
56 | msg <- paste0("Nr. of columns in V is ",
57 | ncol(object@V),
58 | ". Should be ",
59 | dims,
60 | ".")
61 | errors <- c(errors, msg)
62 | }
63 |
64 | if (isTRUE(!is.empty(object@D) &
65 | length(object@D) != dims)) {
66 | msg <- paste0("Length of D is ", ncol(object@D), ". Should be ", dims, ".")
67 | errors <- c(errors, msg)
68 | }
69 |
70 | # CA results
71 |
72 | if (isTRUE(!is.empty(object@row_masses) &
73 | length(object@row_masses) != dim_rows)) {
74 |
75 | msg <- paste0("Length of row_masses is ",
76 | length(object@row_masses),
77 | ". Should be ",
78 | dim_rows,
79 | ".")
80 | errors <- c(errors, msg)
81 | }
82 |
83 | if (isTRUE(!is.empty(object@col_masses) &
84 | length(object@col_masses) != nrow(object@V))) {
85 |
86 | msg <- paste0("Length of col_masses is ",
87 | length(object@col_masses),
88 | ". Should be ",
89 | nrow(object@V),
90 | ".")
91 | errors <- c(errors, msg)
92 | }
93 |
94 | if (isTRUE(!is.empty(object@row_inertia) &
95 | length(object@row_inertia) != dim_rows)){
96 |
97 | msg <- paste0("Length of row_inertia is ",
98 | length(object@row_inertia),
99 | ". Should be ",
100 | dim_rows,
101 | ".")
102 | errors <- c(errors, msg)
103 | }
104 |
105 | if (isTRUE(!is.empty(object@col_inertia) &
106 | length(object@col_inertia) != nrow(object@V))) {
107 |
108 | msg <- paste0("Length of col_inertia is ",
109 | length(object@col_inertia),
110 | ". Should be ",
111 | nrow(object@V),
112 | ".")
113 | errors <- c(errors, msg)
114 | }
115 |
116 | if (isTRUE(!is.empty(object@tot_inertia) &
117 | length(object@tot_inertia) != 1)) {
118 |
119 | msg <- paste0("Length of tot_inertia is ",
120 | length(object@tot_inertia),
121 | ". Should be 1.")
122 | errors <- c(errors, msg)
123 | }
124 |
125 | # standardized coordinates
126 |
127 | if (isTRUE(!is.empty(object@std_coords_rows) &
128 | nrow(object@std_coords_rows) != dim_rows)) {
129 |
130 | msg <- paste0("Nr. of rows in std_coords_rows is ",
131 | nrow(object@std_coords_rows),
132 | ". Should be ",
133 | dim_rows,
134 | ".")
135 | errors <- c(errors, msg)
136 | }
137 |
138 | if (isTRUE(!is.empty(object@std_coords_rows) &
139 | ncol(object@std_coords_rows) != dims)) {
140 |
141 | msg <- paste0("Nr. of columns in std_coords_rows is ",
142 | ncol(object@std_coords_rows),
143 | ". Should be ",
144 | dims,
145 | ".")
146 | errors <- c(errors, msg)
147 | }
148 |
149 | if (isTRUE(!is.empty(object@std_coords_cols) &
150 | nrow(object@std_coords_cols) != nrow(object@V))) {
151 |
152 | msg <- paste0("Nr. of rows in std_coords_cols is ",
153 | nrow(object@std_coords_cols),
154 | ". Should be ",
155 | nrow(object@V),
156 | ".")
157 | errors <- c(errors, msg)
158 | }
159 |
160 | if (isTRUE(!is.empty(object@std_coords_cols) &
161 | ncol(object@std_coords_cols) != dims)) {
162 |
163 | msg <- paste0("Nr. of columns in std_coords_cols is ",
164 | ncol(object@std_coords_cols),
165 | ". Should be ",
166 | dims,
167 | ".")
168 | errors <- c(errors, msg)
169 | }
170 |
171 |
172 | # principal coordinates
173 |
174 | if (isTRUE(!is.empty(object@prin_coords_rows) &
175 | nrow(object@prin_coords_rows) != dim_rows)) {
176 |
177 | msg <- paste0("Nr. of rows in prin_coords_rows is ",
178 | nrow(object@prin_coords_rows),
179 | ". Should be ",
180 | dim_rows,
181 | ".")
182 | errors <- c(errors, msg)
183 | }
184 |
185 | if (isTRUE(!is.empty(object@prin_coords_rows) &
186 | ncol(object@prin_coords_rows) != dims)) {
187 |
188 | msg <- paste0("Nr. of columns in prin_coords_rows is ",
189 | ncol(object@prin_coords_rows),
190 | ". Should be ",
191 | dims,
192 | ".")
193 | errors <- c(errors, msg)
194 | }
195 |
196 | if (isTRUE(!is.empty(object@prin_coords_cols) &
197 | nrow(object@prin_coords_cols) != nrow(object@V))) {
198 |
199 | msg <- paste0("Nr. of rows in prin_coords_cols is ",
200 | nrow(object@prin_coords_cols),
201 | ". Should be ",
202 | nrow(object@V),
203 | ".")
204 | errors <- c(errors, msg)
205 | }
206 |
207 | if (isTRUE(!is.empty(object@prin_coords_cols) &
208 | ncol(object@prin_coords_cols) != dims)) {
209 |
210 | msg <- paste0("Nr. of columns in prin_coords_cols is ",
211 | ncol(object@prin_coords_cols),
212 | ". Should be ",
213 | dims,
214 | ".")
215 | errors <- c(errors, msg)
216 | }
217 |
218 | # AP coordinates
219 |
220 | if (isTRUE(!is.empty(object@apl_rows) &
221 | nrow(object@apl_rows) != dim_rows)) {
222 |
223 | msg <- paste0("Nr. of rows in apl_rows is ",
224 | ncol(object@apl_rows),
225 | ". Should be ",
226 | dim_rows,
227 | ".")
228 | errors <- c(errors, msg)
229 | }
230 |
231 | if (isTRUE(!is.empty(object@apl_rows) &
232 | ncol(object@apl_rows) != 2)) {
233 |
234 | msg <- paste0("Nr. of columns in apl_rows is ",
235 | ncol(object@apl_rows),
236 | ". Should be 2.")
237 | errors <- c(errors, msg)
238 | }
239 |
240 | if (isTRUE(!is.empty(object@apl_cols) &
241 | nrow(object@apl_cols) != nrow(object@V))) {
242 |
243 | msg <- paste0("Nr. of rows in apl_cols is ",
244 | ncol(object@apl_cols),
245 | ". Should be ",
246 | nrow(object@V),
247 | ".")
248 | errors <- c(errors, msg)
249 | }
250 |
251 | if (isTRUE(!is.empty(object@apl_cols) &
252 | ncol(object@apl_cols) != 2)) {
253 |
254 | msg <- paste0("Nr. of columns in apl_cols is ",
255 | ncol(object@apl_cols),
256 | ". Should be 2.")
257 | errors <- c(errors, msg)
258 | }
259 |
260 | # Salpha score
261 | if (isTRUE(!is.empty(object@APL_score) &
262 | ncol(object@APL_score) != 4)) {
263 |
264 | msg <- paste0("Nr. of columns in APL_score is ",
265 | ncol(object@APL_score),
266 | ". Should be 4.")
267 | errors <- c(errors, msg)
268 | }
269 | if (isTRUE(!is.empty(object@APL_score) &
270 | nrow(object@APL_score) != dim_rows)) {
271 |
272 | msg <- paste0("Nr. of rows in APL_score is ",
273 | nrow(object@APL_score),
274 | ". Should be ",
275 | dim_rows,
276 | ".")
277 | errors <- c(errors, msg)
278 | }
279 |
280 | if (length(errors) == 0) TRUE else errors
281 | }
282 |
283 | #' An S4 class that contains all elements needed for CA.
284 | #' @name cacomp-class
285 | #' @rdname cacomp-class
286 | #' @description
287 | #' This class contains elements necessary to computer CA coordinates or
288 | #' Association Plot coordinates,
289 | #' as well as other informative data such as row/column inertia,
290 | #' gene-wise APL-scores, etc. ...
291 | #'
292 | #' @slot U class "matrix". Left singular vectors of the original input matrix.
293 | #' @slot V class "matrix". Right singular vectors of the original input matrix.
294 | #' @slot D class "numeric". Singular values of the original inpt matrix.
295 | #' @slot std_coords_rows class "matrix". Standardized CA coordinates of the
296 | #' rows.
297 | #' @slot std_coords_cols class "matrix". Standardized CA coordinates of the
298 | #' columns.
299 | #' @slot prin_coords_rows class "matrix". Principal CA coordinates of the rows.
300 | #' @slot prin_coords_cols class "matrix". Principal CA coordinates of the
301 | #' columns.
302 | #' @slot apl_rows class "matrix". Association Plot coordinates of the rows
303 | #' for the direction defined in slot "group"
304 | #' @slot apl_cols class "matrix". Association Plot coordinates of the columns
305 | #' for the direction defined in slot "group"
306 | #' @slot APL_score class "data.frame". Contains rows sorted by the APL score.
307 | #' Columns: Rowname (gene name in the case of gene expression data),
308 | #' APL score calculated for the direction defined in slot "group",
309 | #' the original row number and the rank of the row as determined by the score.
310 | #' @slot dims class "numeric". Number of dimensions in CA space.
311 | #' @slot group class "numeric". Indices of the chosen columns for APL
312 | #' calculations.
313 | #' @slot row_masses class "numeric". Row masses of the frequency table.
314 | #' @slot col_masses class "numeric". Column masses of the frequency table.
315 | #' @slot top_rows class "numeric". Number of most variable rows chosen.
316 | #' @slot tot_inertia class "numeric". Total inertia in CA space.
317 | #' @slot row_inertia class "numeric". Row-wise inertia in CA space.
318 | #' @slot col_inertia class "numeric". Column-wise inertia in CA space.
319 | #' @slot permuted_data class "list". Storage slot for permuted data.
320 | #' @slot params class "list". List of parameters.
321 | #' @export
322 | setClass("cacomp",
323 | representation(
324 | U = "matrix",
325 | V = "matrix",
326 | D = "numeric",
327 | std_coords_rows = "matrix",
328 | std_coords_cols = "matrix",
329 | prin_coords_rows = "matrix",
330 | prin_coords_cols = "matrix",
331 | apl_rows = "matrix",
332 | apl_cols = "matrix",
333 | APL_score = "data.frame",
334 | params = "list",
335 | dims = "numeric",
336 | group = "numeric",
337 | row_masses = "numeric",
338 | col_masses = "numeric",
339 | top_rows = "numeric",
340 | tot_inertia = "numeric",
341 | row_inertia = "numeric",
342 | col_inertia = "numeric",
343 | permuted_data = "list"
344 | ),
345 | prototype(
346 | U = matrix(0, 0, 0),
347 | V = matrix(0, 0, 0),
348 | D = numeric(),
349 | std_coords_rows = matrix(0, 0, 0),
350 | std_coords_cols = matrix(0, 0, 0),
351 | prin_coords_rows = matrix(0, 0, 0),
352 | prin_coords_cols = matrix(0, 0, 0),
353 | apl_rows = matrix(0, 0, 0),
354 | apl_cols = matrix(0, 0, 0),
355 | APL_score = data.frame(),
356 | params = list(),
357 | dims = numeric(),
358 | group = numeric(),
359 | row_masses = numeric(),
360 | col_masses = numeric(),
361 | top_rows = numeric(),
362 | tot_inertia = numeric(),
363 | row_inertia = numeric(),
364 | col_inertia = numeric(),
365 | permuted_data = list()),
366 | validity = check_cacomp
367 | )
368 |
369 | #' Create new "cacomp" object.
370 | #' @description Creates new cacomp object.
371 | #'
372 | #' @param ... slot names and objects for new cacomp object.
373 | #' @return cacomp object
374 | #' @rdname cacomp-class
375 | #' @export
376 | #' @examples
377 | #' set.seed(1234)
378 | #'
379 | #' # Simulate counts
380 | #' cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
381 | #' x = sample(1:20, 50, replace = TRUE))
382 | #' rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
383 | #' colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
384 | #'
385 | #' res <- APL:::comp_std_residuals(mat=cnts)
386 | #' SVD <- svd(res$S)
387 | #' names(SVD) <- c("D", "U", "V")
388 | #' SVD <- SVD[c(2, 1, 3)]
389 | #'
390 | #' ca <- new_cacomp(U = SVD$U,
391 | #' V = SVD$V,
392 | #' D = SVD$D,
393 | #' row_masses = res$rowm,
394 | #' col_masses = res$colm)
395 | new_cacomp <- function(...) new("cacomp",...)
396 |
397 |
398 | #' Access slots in a cacomp object
399 | #'
400 | #' @param caobj a cacomp object
401 | #' @param slot slot to return
402 | #' @returns Chosen slot of the cacomp object
403 | #' @examples
404 | #' # Simulate scRNAseq data.
405 | #' cnts <- data.frame(cell_1 = rpois(10, 5),
406 | #' cell_2 = rpois(10, 10),
407 | #' cell_3 = rpois(10, 20))
408 | #' rownames(cnts) <- paste0("gene_", 1:10)
409 | #' cnts <- as.matrix(cnts)
410 | #'
411 | #' # Run correspondence analysis.
412 | #' ca <- cacomp(obj = cnts, princ_coords = 3, top = 5)
413 | #'
414 | #' # access left singular vectors
415 | #' cacomp_slot(ca, "U")
416 | #'
417 | #' @export
418 | cacomp_slot <- function(caobj, slot){
419 | stopifnot(slot %in% slotNames(caobj))
420 |
421 | return(slot(caobj, slot))
422 | }
423 |
424 | #' Prints slot names of cacomp object
425 | #'
426 | #' @param caobj a cacomp object
427 | #' @returns Prints slot names of cacomp object
428 | #' @examples
429 | #' # Simulate scRNAseq data.
430 | #' cnts <- data.frame(cell_1 = rpois(10, 5),
431 | #' cell_2 = rpois(10, 10),
432 | #' cell_3 = rpois(10, 20))
433 | #' rownames(cnts) <- paste0("gene_", 1:10)
434 | #' cnts <- as.matrix(cnts)
435 | #'
436 | #' # Run correspondence analysis.
437 | #' ca <- cacomp(obj = cnts, princ_coords = 3, top = 5)
438 | #'
439 | #' # show slot names:
440 | #' cacomp_names(ca)
441 | #'
442 | #' @export
443 | cacomp_names <- function(caobj){
444 | slotNames(caobj)
445 | }
446 |
447 |
448 |
449 | # Left here for potential future inclusion:
450 | #
451 | #' #' Subset rows and columns of a cacomp object.
452 | #' #'
453 | #' #' @param x cacomp object
454 | #' #' @param i rows to subset to.
455 | #' #' @param j columns to subset to.
456 | #' #' @param drop Whether or not to coerce to the lowest possible dimension. Should
457 | #' #' be FALSE!
458 | #' #' @param ... Furhter arguments
459 | #' #'
460 | #' #' @returns
461 | #' #' Returns a cacomp object with rows and columns subsetted.
462 | #' #' @export
463 | #' setMethod(
464 | #' f = "[",
465 | #' signature="cacomp",
466 | #' definition=function(x, i=NULL, j=NULL,...,drop=FALSE){
467 | #' if (is.null(i)) i <- seq_len(nrow(x@U))
468 | #' if (is.null(j)) j <- seq_len(nrow(x@V))
469 | #'
470 | #' initialize(x,
471 | #' U = x@U[i,],
472 | #' V = x@V[j,],
473 | #' D = x@D,
474 | #' std_coords_rows = x@std_coords_rows[i,],
475 | #' std_coords_cols = x@std_coords_cols[j,],
476 | #' prin_coords_rows = if(!is.empty(x@prin_coords_rows)) x@prin_coords_rows[i,] else matrix(0, 0, 0),
477 | #' prin_coords_cols = if(!is.empty(x@prin_coords_cols)) x@prin_coords_cols[j,] else matrix(0, 0, 0),
478 | #' apl_rows = if(!is.empty(x@apl_rows)) x@apl_rows[i,] else matrix(0, 0, 0),
479 | #' apl_cols = if(!is.empty(x@apl_cols)) x@apl_cols[j,] else matrix(0, 0, 0),
480 | #' APL_score = if(!is.empty(x@APL_score)) x@APL_score[which(x@APL_score$Row_num %in% i),] else data.frame(),
481 | #' dims = x@dims,
482 | #' group = intersect(x@group, j),
483 | #' row_masses = x@row_masses[i],
484 | #' col_masses = x@col_masses[j],
485 | #' top_rows = min(length(i), x@top_rows),
486 | #' tot_inertia = sum(x@row_inertia[i]),
487 | #' row_inertia = x@row_inertia[i],
488 | #' col_inertia = x@col_inertia[j],
489 | #' permuted_data = list()
490 | #' )
491 | #' }
492 | #' )
--------------------------------------------------------------------------------
/vignettes/APL.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Analyzing data with APL"
3 | author:
4 | - name: Elzbieta Gralinska
5 | affiliation: Max Planck Institute for Molecular Genetics, Berlin, Germany
6 | email: gralinska@molgen.mpg.de
7 | - name: Clemens Kohl
8 | affiliation: Max Planck Institute for Molecular Genetics, Berlin, Germany
9 | email: kohl@molgen.mpg.de
10 | - name: Martin Vingron
11 | affiliation: Max Planck Institute for Molecular Genetics, Berlin, Germany
12 | email: vingron@molgen.mpg.de
13 | package: APL
14 | output:
15 | BiocStyle::html_document
16 | abstract: |
17 | This package performs correspondence analysis (CA) and allows to identify cluster-specific genes using Association Plots (AP).
18 | Additionally, APL computes the cluster-specificity scores for all genes which allows to rank the genes by their specificity for a selected cell cluster of interest.
19 | vignette: |
20 | %\VignetteIndexEntry{Analyzing data with APL}
21 | %\VignetteEncoding{UTF-8}
22 | %\VignetteEngine{knitr::rmarkdown}
23 | editor_options:
24 | markdown:
25 | wrap: sentence
26 | ---
27 |
28 | ```{r, echo = FALSE}
29 | knitr::opts_chunk$set(collapse = TRUE, comment = "#>", results = "hold")
30 | ```
31 |
32 | # Introduction
33 |
34 | "APL" is a package developed for computation of Association Plots, a method for visualization and analysis of single cell transcriptomics data.
35 | The main focus of "APL" is the identification of genes characteristic for individual clusters of cells from input data.
36 |
37 | When working with `r BiocStyle::Rpackage("APL")` package please cite:
38 |
39 | Gralinska, E., Kohl, C., Fadakar, B. S., & Vingron, M. (2022).
40 | Visualizing Cluster-specific Genes from Single-cell Transcriptomics Data Using Association Plots.
41 | Journal of Molecular Biology, 434(11), 167525.
42 |
43 | A citation can also be obtained in R by running `citation("APL")`.
44 | For a mathematical description of the method, please refer to the manuscript.
45 |
46 | # Installation
47 |
48 | To install the `r BiocStyle::Rpackage("APL")` from Bioconductor, run:
49 |
50 | ```{r bioc_install, eval=FALSE}
51 | if (!requireNamespace("BiocManager", quietly = TRUE)) {
52 | install.packages("BiocManager")
53 | }
54 |
55 | BiocManager::install("APL")
56 | ```
57 |
58 | Alternatively the package can also be installed from GitHub:
59 |
60 | ```{r git_install, eval=FALSE}
61 | library(devtools)
62 | install_github("VingronLab/APL")
63 | ```
64 |
65 | To additionally build the package vignette, run instead
66 |
67 | ```{r git_vignette, eval=FALSE}
68 | install_github("VingronLab/APL", build_vignettes = TRUE, dependencies = TRUE)
69 | ```
70 |
71 | Building the vignette will however take considerable time.
72 |
73 | ## Changes regarding python dependencies
74 |
75 | Previous versions of `r BiocStyle::Rpackage("APL")` used pytorch SVD to speed up the computation of the full SVD. This has been deprecated in favor of fast truncated SVD implementations starting with Version 1.10.1.
76 | Calling `runAPL` or `cacomp` with `python = TRUE` will not lead to an error, but only issue a warning.
77 | If you still want to perform a full SVD, set the dimensions to rank of the matrix. Until a faster replacement is identified, this computation will be performed by the rather slow base R svd and should therefore not be done on very large matrices.
78 | The default number of dimensions now defaults to half of the rank of the matrix.
79 |
80 | # Preprocessing
81 |
82 | ## Setup
83 |
84 | In this vignette we will use a small data set published by [Darmanis et al. (2015)](https://doi.org/10.1073/pnas.1507125112) consisting of 466 human adult cortical single cells sequenced on the Fluidigm platform as an example.
85 | To obtain the data necessary to follow the vignette we use the Bioconductor package `r BiocStyle::Biocpkg("scRNAseq")`.
86 |
87 | Besides the package `r BiocStyle::Rpackage("APL")` we will use Bioconductor packages to preprocess the data.
88 | Namely we will use `r BiocStyle::Biocpkg("SingleCellExperiment")`, `r BiocStyle::Biocpkg("scater")` and `r BiocStyle::Biocpkg("scran")`.
89 | However, the preprocessing could equally be performed with the single-cell RNA-seq analysis suite `r BiocStyle::CRANpkg("Seurat")`.
90 |
91 | The preprocessing steps are performed according to the recommendations published in [Orchestrating Single-Cell Analysis with Bioconductor](https://bioconductor.org/books/release/OSCA/) by Amezquita *et al.* (2022).
92 | For more information about the rational behind them please refer to the book.
93 |
94 | ```{r setup, message=FALSE, warning=FALSE}
95 | library(APL)
96 | library(scRNAseq)
97 | library(SingleCellExperiment)
98 | library(scran)
99 | library(scater)
100 | set.seed(1234)
101 | ```
102 |
103 | ## Loading the data
104 |
105 | We start with the loading and preprocessing of the Darmanis data.
106 |
107 | ```{r load_data}
108 | darmanis <- DarmanisBrainData()
109 | darmanis
110 | ```
111 |
112 |
113 | ## Normalization, PCA & Clustering
114 |
115 | Association Plots from `r BiocStyle::Rpackage("APL")` should be computed based on the normalized expression data.
116 | Therefore, we first normalize the counts from the Darmanis data and calculate both PCA and UMAP for visualizations later.
117 |
118 | For now, `r BiocStyle::Rpackage("APL")` requires the data to be clustered beforehand. The darmanis data comes already annotated, so we will use the cell types stored in the `cell.type` metadata column instead of performing a clustering.
119 |
120 | ```{r preprocess}
121 | set.seed(100)
122 | clust <- quickCluster(darmanis)
123 | darmanis <- computeSumFactors(darmanis, cluster = clust, min.mean = 0.1)
124 | darmanis <- logNormCounts(darmanis)
125 |
126 | dec <- modelGeneVar(darmanis)
127 | top_darmanis <- getTopHVGs(dec, n = 5000)
128 | darmanis <- fixedPCA(darmanis, subset.row = top_darmanis)
129 | darmanis <- runUMAP(darmanis, dimred = "PCA")
130 |
131 | plotReducedDim(darmanis, dimred = "UMAP", colour_by = "cell.type")
132 | ```
133 |
134 | # Quick start
135 |
136 | The fastest way to compute the Association Plot for a selected cluster of cells from the input data is by using a wrapper function `runAPL()`.
137 | `runAPL()` automates most of the analysis steps for ease of use.
138 |
139 | For example, to generate an Association Plot for the oligodendrocytes we can use the following command:
140 |
141 | ```{r runAPL}
142 | runAPL(
143 | darmanis,
144 | assay = "logcounts",
145 | top = 5000,
146 | group = which(darmanis$cell.type == "oligodendrocytes"),
147 | type = "ggplot"
148 | )
149 | ```
150 |
151 | The generated Association Plot is computed based on the log-normalized count matrix.
152 | By default `runAPL` uses the top 5,000 most variable genes in the data, but the data can be subset to any number of genes by changing the value for the argument `top`.
153 | The dimensionality of the CA is determined automatically by the elbow rule described below (see [here](#dim_reduc)).
154 | This default behavior can be overriden by setting the dimensions manually (parameter `dims`).
155 | The cluster-specificity score ($S_\alpha$) for each gene is also calculated (`score = TRUE`).
156 | In order to better explore the data, `type` can be set to `"plotly"` to obtain an interactive plot.
157 | `runAPL` has many arguments to further customize the output and fine tune the calculations.
158 | Please refer to the documentation (`?runAPL`) for more information.
159 | The following sections in this vignette will discuss the choice of dimensionality and the $S_\alpha$-score.
160 |
161 | # Step-by-step way of computing Association Plots
162 |
163 | Alternatively, Association Plots can be computed step-by-step.
164 | This allows to adjust the Association Plots to user's needs.
165 | Below we explain each step of the process of generating Association Plots.
166 |
167 | ## Correspondence Analysis
168 |
169 | The first step of Association Plot computations is correspondence analysis (CA).
170 | CA is a data dimensionality reduction method similar to PCA, however it allows for a simultaneous embedding of both cells and genes from the input data in the same space.
171 | In this example we perform CA on the log-normalized count matrix of the darmanis brain data.
172 |
173 | ```{r cacomp}
174 | # Computing CA on logcounts
175 | logcounts <- logcounts(darmanis)
176 | ca <- cacomp(
177 | obj = logcounts,
178 | top = 5000
179 | )
180 |
181 | # The above is equivalent to:
182 | # ca <- cacomp(obj = darmanis,
183 | # assay = "logcounts",
184 | # top = 5000)
185 | ```
186 |
187 | The function `cacomp` accepts as an input any matrix with non-negative entries, be it a single-cell RNA-seq, bulk RNA-seq or other data.
188 | For ease of use, `cacomp` accepts also `r BiocStyle::Biocpkg("SingleCellExperiment")` and `r BiocStyle::CRANpkg("Seurat")` objects, however for these we additionally have to specify via the `assay` and/or `slot` (for Seurat) parameter from where to extract the data.
189 | Importantly, in order to ensure the interpretability of the results `cacomp` (and related functions such as `runAPL`) requires that the input matrix contains both row and column names.
190 |
191 | When performing a feature selection before CA, we can set the argument `top` to the desired number of genes with the highest variance across cells from the input data to retain for further analysis.
192 | By default, only the top 5,000 most variable genes are kept as a good compromise between computational time and keeping the most relevant genes. If we want to ensure however that even marker genes of smaller clusters are kept, we can increase the number of genes.
193 |
194 |
195 |
196 | The output of `cacomp` is an object of class `cacomp`:
197 |
198 | ```{r print_cacomp}
199 | ca
200 | ```
201 |
202 | As can be seen in the summarized output, by default both types of coordinates in the CA space (principal and standardized) are calculated.
203 | Once the coordinates for the Association Plot are calculated, they will also be shown in the output of `cacomp`.
204 | Slots are accessed through an accessor function:
205 |
206 | ```{r std_coords}
207 | cacomp_slot(ca, "std_coords_cols")[1:5, 1:5]
208 | ```
209 |
210 | In the case of `r BiocStyle::Biocpkg("SingleCellExperiment")` and `r BiocStyle::CRANpkg("Seurat")` objects, we can alternatively set `return_input = TRUE` to get the input object back, with the CA results computed by "APL" and stored in the appropriate slot for dimension reduction.
211 | This also allows for using the plotting functions that come with these packages:
212 |
213 | ```{r ca_pbmc}
214 | darmanis <- cacomp(
215 | obj = darmanis,
216 | assay = "logcounts",
217 | top = 5000,
218 | return_input = TRUE
219 | )
220 |
221 | plotReducedDim(darmanis,
222 | dimred = "CA",
223 | ncomponents = c(1, 2),
224 | colour_by = "cell.type"
225 | )
226 | plotReducedDim(darmanis,
227 | dimred = "CA",
228 | ncomponents = c(3, 4),
229 | colour_by = "cell.type"
230 | )
231 | ```
232 |
233 | However, some functions such as apl_coords() require information that cannot be stored in the single-cell container objects.
234 | It is therefore often easier to work with a `cacomp` object instead.
235 | We can convert `r BiocStyle::CRANpkg("Seurat")` or `r BiocStyle::Biocpkg("SingleCellExperiment")` objects which have CA results stored to a `cacomp` object using the function `as.cacomp()`:
236 |
237 | ```{r convert}
238 | # Converting the object darmanis to cacomp
239 | ca <- as.cacomp(darmanis)
240 | ```
241 |
242 | ## Reducing the number of CA dimensions {#dim_reduc}
243 |
244 | When working with high-dimensional data, after singular value decomposition there will often be many dimensions which are representing the noise in the data.
245 | In order to minimize the noise, it is generally recommended to reduce the dimensionality of the data before generating Association Plots.
246 |
247 | The number of dimensions to retain can be computed using the function `pick_dims`.
248 | This function offers three standard methods which we implemented:
249 |
250 | - elbow rule (`method = "elbow_rule"`) - the number of dimensions to retain is calculated based on scree plots generated for randomized data, and corresponds to a point in the plot where the band of randomized singular values enters the band of the original singular values,
251 |
252 | - 80% rule (`method = "maj_inertia"`) - only those first dimensions are retained which in total account for >= 80% of total inertia,
253 |
254 | - average rule (`method = "avg_inertia"`) - only those dimensions are retained which account for more inertia than a single dimension on average.
255 |
256 | Additionally, the user can compute a scree plot to choose the number of dimensions by themselves:
257 |
258 | ```{r scree_plot}
259 | pick_dims(ca, method = "scree_plot") +
260 | xlim(c(0, 20))
261 | ```
262 |
263 | In the scree plot above we can see that the first dimension explains only \~1% of the total inertia and we observe the "jump" in the scree plot at roughly 5 dimensions.
264 | The first dimensions however explain only a small amount of the total inertia.
265 |
266 | Here we compute the number of dimensions using the elbow rule.
267 | For demonstration, only three data permutations are computed:
268 |
269 | ```{r pick_dims, results = "hide"}
270 | pd <- pick_dims(
271 | ca,
272 | mat = logcounts(darmanis),
273 | method = "elbow_rule",
274 | reps = 3
275 | )
276 | ```
277 |
278 | ```{r show_dims, message=FALSE}
279 | pd
280 | ```
281 |
282 | In this case the elbow rule leads to a higher number of dimensions.
283 |
284 | ```{r expl_inert}
285 | # Compute the amount of inertia explained by each of the dimensions
286 | D <- cacomp_slot(ca, "D")
287 | expl_inertia <- (D^2 / sum(D^2)) * 100
288 |
289 | # Compute the amount of intertia explained
290 | # by the number of dimensions defined by elbow rule
291 | sum(expl_inertia[seq_len(pd)])
292 | ```
293 |
294 | In this example the elbow rule suggests to keep `r pd` dimensions that explain `r round(sum(expl_inertia[seq_len(pd)]),2)`% of the total inertia from the data.
295 |
296 | Finally, we can reduce the dimensionality of the data to the desired number of dimensions:
297 |
298 | ```{r subset_dims}
299 | ca <- subset_dims(ca, dims = pd)
300 | ```
301 |
302 | ## Association Plots
303 |
304 | When working with single-cell transcriptomics data we are often interested in which genes are associated to a cluster of cells.
305 | To reveal such genes we can compute an Association Plot for a selected cluster of cells.
306 | In the following example we want to generate an Association Plot for the cluster of endothelial cells:
307 |
308 | ```{r apl_platelets}
309 | # Specifying a cell cluster of interest
310 | endo <- which(darmanis$cell.type == "endothelial")
311 |
312 | # Calculate Association Plot coordinates for endothelial cells
313 | ca <- apl_coords(ca, group = endo)
314 | ```
315 |
316 | After computing the coordinates of genes and cells in the Association Plot we are able to plot the results using the `apl` function.
317 |
318 | ```{r apl_platelets_plot, fig.wide = TRUE}
319 | # endothelial marker genes
320 | marker_genes <- c("APOLD1", "TM4SF1", "SULT1B1", "ESM1", "SELE")
321 |
322 | # Plot APL
323 | apl(ca,
324 | row_labs = TRUE,
325 | rows_idx = marker_genes,
326 | type = "ggplot"
327 | ) # type = "plotly" for an interactive plot
328 | ```
329 |
330 | In the Association Plot all genes are represented by blue circles.
331 | The further to the right a gene is located the more associated it is with the chosen cluster of cells and the lower the y-axis value, the more specific it is for the selected cluster.
332 | Additionally, it is possible to highlight in the Association Plot any set of genes.
333 | In the example above we highlighted five genes (APOLD1, TM4SF1, SULT1B1, ESM1, SELE) which are known to be marker genes for endothelial cells.
334 | As we can see in the plot, they are located in the right part of the plot, which confirms their specificity for endothelial cells.
335 |
336 | By default we plot only the genes in the Association Plot.
337 | To also display the cells in the Association Plot, use the argument `show_cols = TRUE`.
338 | This way we can identify other cells which show similar expression profiles to the cells of interest.
339 | Cells that belong to the cluster of interest will be colored in red, and all remaining cells will be colored in violet.
340 | Furthermore, an interactive plot in which you can hover over genes to see their name can be created by setting `type = "plotly"`.
341 |
342 | ## Association Plots with the $S_\alpha$-scores
343 |
344 | The $S_\alpha$-score allows us to rank genes by their specificity for a selected cell cluster, and is computed for each gene from the Association Plot separately.
345 | The higher the $S_\alpha$-score of a gene, the more characteristic its expression for the investigated cell cluster.
346 | The $S_\alpha$-scores can be computed using the `apl_score` function.
347 | To display the $S_\alpha$-scores in the Association Plot, we can use the argument `show_score = TRUE` in the `apl` function:
348 |
349 | ```{r apl_score, results = "hide"}
350 | # Compute S-alpha score
351 | # For the calculation the input matrix is also required.
352 | ca <- apl_score(ca,
353 | mat = logcounts(darmanis),
354 | reps = 5
355 | )
356 | ```
357 |
358 | ```{r apl_plot_platelets, fig.wide = TRUE}
359 | apl(ca,
360 | show_score = TRUE,
361 | type = "ggplot"
362 | )
363 | ```
364 |
365 | By default, only genes that have a $S_\alpha$-score larger than 0 are colored as these tend to be genes of interest and we consider them as cluster-specific genes.
366 | This cutoff can be easily changed through the `score_cutoff` argument to `apl()`.
367 |
368 | The $S_\alpha$-scores are stored in the `"APL_score"` slot and can be accessed as follows:
369 |
370 | ```{r print_score}
371 | head(cacomp_slot(ca, "APL_score"))
372 | ```
373 |
374 | To see the expression of genes with the highest $S_\alpha$-scores (or any selected genes) across all cell types from the data we can use plotting functions provided by `r BiocStyle::CRANpkg("scater")`:
375 |
376 | ```{r seurat_apl, fig.wide = TRUE}
377 | scores <- cacomp_slot(ca, "APL_score")
378 |
379 | plotExpression(darmanis,
380 | features = head(scores$Rowname, 3),
381 | x = "cell.type",
382 | colour_by = "cell.type"
383 | )
384 |
385 | plotReducedDim(darmanis,
386 | dimred = "UMAP",
387 | colour_by = scores$Rowname[1]
388 | )
389 | ```
390 |
391 | As expected, the 3 most highly scored genes are over-expressed in the endothelial cells. Due to the small size of the data set and number of cells in the cluster (only 20 out of 466 cells are endothelial cells) some cluster specific genes are only expressed in a few cells. Most data sets nowadays are significantly larger so this should not be a major concern and it can further be mitigated by performing a more stringent feature selection before CA.
392 |
393 | ## Visualization of CA
394 |
395 | In addition to Association Plots "APL" produces also other forms of the output.
396 | For instance, we can use "APL" to generate a two- and three-dimensional correspondence analysis projection of the data.
397 | The so-called biplot visualizes both cells and genes from the input data and can be created using the function `ca_biplot`.
398 | Alternatively, a three-dimensional data projection plot can be generated using the function `ca_3Dplot`.
399 | To generate such biplots a `cacomp` object is required.
400 |
401 | ```{r biplot, fig.wide = TRUE}
402 | # Specifying a cell cluster of interest
403 | endo <- which(darmanis$cell.type == "endothelial")
404 |
405 | # Creating a static plot
406 | ca_biplot(ca, col_labels = endo, type = "ggplot")
407 |
408 | # Creating an interactive plot
409 | # ca_biplot(ca, type = "plotly", col_labels = platelets)
410 |
411 | # 3D plot
412 | # ca_3Dplot(ca, col_labels = platelets)
413 | ```
414 |
415 | The above described plots give us a quick overview of the first 2 dimensions of the data (more dimensions can be plotted).
416 | As shown in the commented-out code, to interactively explore the projection of the data `type = "plotly"` can be set.
417 |
418 | # APL and GO enrichment analysis
419 |
420 | After computing an Association Plot and identifying a set of genes specific for a selected cluster of cells we might be interested in conducting a Gene Ontology (GO) enrichment analysis of the identified gene set.
421 | To conduct a GO enrichment analysis of microglia specific genes as idenitfied using an Association Plot, we first need to compute the coordinates of the genes in the Association Plot for microglia cells, as well as the $S_\alpha$-score for each gene:
422 |
423 | ```{r cluster_three, results="hide"}
424 | # Get indices of microglia cells
425 | microglia <- which(darmanis$cell.type == "microglia")
426 |
427 | # Calculate Association Plot coordinates of the genes and the $S_\alpha$-scores
428 | ca <- apl_coords(ca, group = microglia)
429 |
430 | ca <- apl_score(ca,
431 | mat = logcounts(darmanis),
432 | reps = 5
433 | )
434 | ```
435 |
436 | Now we can conduct GO enrichment analysis as implemented in the package `r BiocStyle::Biocpkg("topGO")` using the most cluster-specific genes from the Association Plot.
437 | By default we use all genes with an $S_\alpha$-score higher than 0, but the cutoff may have to be adjusted depending on the dataset.
438 | In the example below we restrict it to genes with a $S_\alpha$-score higher than 1 to restrict it to truly significant genes.
439 | In case that no $S_\alpha$-scores were calculated, one can also choose to use the `ngenes` (by default 1000) genes with the highest x-coordinates by setting `use_coords = TRUE`.
440 |
441 | ```{r topGO, message=FALSE}
442 | enr <- apl_topGO(ca,
443 | ontology = "BP",
444 | organism = "hs",
445 | score_cutoff = 1
446 | )
447 | head(enr)
448 | ```
449 |
450 | The function `plot_enrichment()` was implemented to visualize the `topGO` results in form of a dotplot.
451 |
452 | ```{r topGO_plot, message=FALSE}
453 | plot_enrichment(enr)
454 | ```
455 | Microglia cells are innate immune cells of the brain and as such the most highly scored genes are enriched in gene sets related to the immune response and microglia specific gene sets as one would expect.
456 |
457 |
458 | # Session info {.unnumbered}
459 |
460 | ```{r sessionInfo, echo=FALSE}
461 | sessionInfo()
462 | ```
463 |
--------------------------------------------------------------------------------