├── .github
    ├── .gitignore
    └── workflows
    │   └── pkgdown.yaml
├── vignettes
    ├── .gitignore
    └── APL.Rmd
├── tests
    ├── testthat.R
    └── testthat
    │   ├── testdata
    │       ├── smoke.rda
    │       ├── countries.rda
    │       ├── smoke_scRNAseq.rda
    │       ├── AP_coordinates
    │       │   ├── example3
    │       │   │   ├── genes_order.txt
    │       │   │   ├── AP_coordinates_samples.txt
    │       │   │   ├── AP_coordinates_genes.txt
    │       │   │   └── gene_ranking.txt
    │       │   ├── example1
    │       │   │   ├── AP_coordinates_samples.txt
    │       │   │   ├── genes_order.txt
    │       │   │   ├── AP_coordinates_genes.txt
    │       │   │   └── gene_ranking.txt
    │       │   ├── example2
    │       │   │   ├── AP_coordinates_samples.txt
    │       │   │   ├── genes_order.txt
    │       │   │   ├── AP_coordinates_genes.txt
    │       │   │   └── gene_ranking.txt
    │       │   ├── notes.txt
    │       │   └── input_data.txt
    │       └── input_data.tsv
    │   ├── test-convert.R
    │   ├── test-CA.R
    │   └── test-apl.R
├── NEWS.md
├── _pkgdown.yml
├── man
    ├── figures
    │   └── fig_AP.png
    ├── scree_plot.Rd
    ├── is.empty.Rd
    ├── rm_zeros.Rd
    ├── pipe.Rd
    ├── comp_ft_residuals.Rd
    ├── inertia_rows.Rd
    ├── subset_dims.Rd
    ├── recompute.Rd
    ├── as.list-cacomp-method.Rd
    ├── cacomp_names.Rd
    ├── clip_residuals.Rd
    ├── cacomp_slot.Rd
    ├── show.cacomp.Rd
    ├── check_cacomp.Rd
    ├── random_direction_cutoff.Rd
    ├── calc_residuals.Rd
    ├── plot_enrichment.Rd
    ├── comp_std_residuals.Rd
    ├── var_rows.Rd
    ├── comp_NB_residuals.Rd
    ├── permutation_cutoff.Rd
    ├── apl_ggplot.Rd
    ├── apl_plotly.Rd
    ├── ca_coords.Rd
    ├── elbow_method.Rd
    ├── apl_coords.Rd
    ├── apl_topGO.Rd
    ├── apl.Rd
    ├── run_cacomp.Rd
    ├── ca_3Dplot.Rd
    ├── cacomp-class.Rd
    ├── apl_score.Rd
    ├── as.cacomp.Rd
    ├── ca_biplot.Rd
    ├── pick_dims.Rd
    ├── cacomp.Rd
    └── runAPL.Rd
├── .lintr
├── .gitignore
├── .Rbuildignore
├── R
    ├── utils-pipe.R
    ├── import_packages.R
    ├── generic_methods.R
    ├── convert.R
    └── constructor.R
├── NAMESPACE
├── DESCRIPTION
└── README.md


/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(APL)
3 | test_check("APL")
4 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
1 | # Changes in version 0.99.0 (2021-12-06)
2 | + Submitted to Bioconductor
3 | 


--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
1 | url: https://vingronlab.github.io/APL/
2 | template:
3 |   bootstrap: 5
4 | 
5 | 


--------------------------------------------------------------------------------
/man/figures/fig_AP.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VingronLab/APL/HEAD/man/figures/fig_AP.png


--------------------------------------------------------------------------------
/tests/testthat/testdata/smoke.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VingronLab/APL/HEAD/tests/testthat/testdata/smoke.rda


--------------------------------------------------------------------------------
/.lintr:
--------------------------------------------------------------------------------
1 | linters: linters_with_defaults(
2 |     indentation_linter(indent = 4L),
3 |     commented_code_linter = NULL
4 |   )
5 | 


--------------------------------------------------------------------------------
/tests/testthat/testdata/countries.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VingronLab/APL/HEAD/tests/testthat/testdata/countries.rda


--------------------------------------------------------------------------------
/tests/testthat/testdata/smoke_scRNAseq.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VingronLab/APL/HEAD/tests/testthat/testdata/smoke_scRNAseq.rda


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .Rproj.user
 2 | .Rhistory
 3 | .RData
 4 | .Ruserdata
 5 | .Rproj
 6 | .ipynb_checkpoints
 7 | .ipynb*
 8 | .ipynb*/
 9 | APL.Rproj
10 | /doc/
11 | /Meta/
12 | inst/doc
13 | docs
14 | /renv/
15 | renv.lock
16 | .Rprofile
17 | .editorconfig
18 | 


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^renv$
 2 | ^renv\.lock$
 3 | ^.*\.Rproj$
 4 | ^\.Rproj\.user$
 5 | ^LICENSE\.md$
 6 | ^.git
 7 | ^.git/*
 8 | ^.Rhistory
 9 | ^.gitignore
10 | ^doc$
11 | ^docs$
12 | ^Meta$
13 | ^README\.Rmd$
14 | ^_pkgdown\.yml$
15 | ^pkgdown$
16 | ^\.github$
17 | ^.lintr
18 | ^renv
19 | ^renv.lock
20 | .editorconfig


--------------------------------------------------------------------------------
/man/scree_plot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CA.R
 3 | \name{scree_plot}
 4 | \alias{scree_plot}
 5 | \title{Scree Plot}
 6 | \usage{
 7 | scree_plot(df)
 8 | }
 9 | \arguments{
10 | \item{df}{A data frame with columns "dims" and "inertia".}
11 | }
12 | \value{
13 | Returns a ggplot object.
14 | }
15 | \description{
16 | Plots a scree plot.
17 | }
18 | 


--------------------------------------------------------------------------------
/man/is.empty.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/constructor.R
 3 | \name{is.empty}
 4 | \alias{is.empty}
 5 | \title{Helper function to check if object is empty.}
 6 | \usage{
 7 | is.empty(x)
 8 | }
 9 | \arguments{
10 | \item{x}{object}
11 | }
12 | \value{
13 | TRUE if x has length 0 and is not NULL. FALSE otherwise
14 | }
15 | \description{
16 | Helper function to check if object is empty.
17 | }
18 | 


--------------------------------------------------------------------------------
/man/rm_zeros.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CA.R
 3 | \name{rm_zeros}
 4 | \alias{rm_zeros}
 5 | \title{removes 0-only rows and columns in a matrix.}
 6 | \usage{
 7 | rm_zeros(obj)
 8 | }
 9 | \arguments{
10 | \item{obj}{A matrix.}
11 | }
12 | \value{
13 | Input matrix with rows & columns consisting of only 0 removed.
14 | }
15 | \description{
16 | removes 0-only rows and columns in a matrix.
17 | }
18 | 


--------------------------------------------------------------------------------
/R/utils-pipe.R:
--------------------------------------------------------------------------------
 1 | #' Pipe operator
 2 | #'
 3 | #' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
 4 | #'
 5 | #' @name %>%
 6 | #' @rdname pipe
 7 | #' @keywords internal
 8 | #' @export
 9 | #' @param lhs A value or the magrittr placeholder.
10 | #' @param rhs A function call using the magrittr semantics.
11 | #' @return \code{magrittr::\link[magrittr:pipe]{\%>\%}}
12 | #' @importFrom magrittr %>%
13 | #' @usage lhs \%>\% rhs
14 | #' @examples
15 | #' x <- 1:100
16 | #' x %>% head()
17 | NULL
18 | 


--------------------------------------------------------------------------------
/tests/testthat/testdata/AP_coordinates/example3/genes_order.txt:
--------------------------------------------------------------------------------
 1 | "11"	"Burundi"
 2 | "28"	"Central African Republic"
 3 | "37"	"Congo, Dem. Rep."
 4 | "14"	"Burkina Faso"
 5 | "26"	"Bhutan"
 6 | "19"	"Bosnia and Herzegovina"
 7 | "1"	"Afghanistan"
 8 | "15"	"Bangladesh"
 9 | "36"	"Cameroon"
10 | "25"	"Brunei Darussalam"
11 | "6"	"Argentina"
12 | "12"	"Belgium"
13 | "29"	"Canada"
14 | "2"	"Angola"
15 | "17"	"Bahrain"
16 | "24"	"Barbados"
17 | "5"	"United Arab Emirates"
18 | "8"	"Australia"
19 | "18"	"Bahamas, The"
20 | "13"	"Benin"
21 | 


--------------------------------------------------------------------------------
/R/import_packages.R:
--------------------------------------------------------------------------------
 1 | 
 2 | #' @import methods
 3 | #' @import SummarizedExperiment org.Hs.eg.db org.Mm.eg.db
 4 | #' @importFrom stats as.formula na.omit quantile runif var
 5 | #' @importFrom utils head setTxtProgressBar txtProgressBar
 6 | #' @importFrom ggplot2 ggplot aes geom_point guide_colorbar
 7 | #' @importFrom topGO showSigOfNodes score
 8 | #' @importFrom viridisLite viridis
 9 | #' @importFrom rlang .data
10 | #' @importFrom RSpectra svds
11 | #' @importClassesFrom SeuratObject Seurat
12 | #' @importClassesFrom SingleCellExperiment SingleCellExperiment
13 | NULL
14 | 


--------------------------------------------------------------------------------
/man/pipe.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils-pipe.R
 3 | \name{\%>\%}
 4 | \alias{\%>\%}
 5 | \title{Pipe operator}
 6 | \usage{
 7 | lhs \%>\% rhs
 8 | }
 9 | \arguments{
10 | \item{lhs}{A value or the magrittr placeholder.}
11 | 
12 | \item{rhs}{A function call using the magrittr semantics.}
13 | }
14 | \value{
15 | \code{magrittr::\link[magrittr:pipe]{\%>\%}}
16 | }
17 | \description{
18 | See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
19 | }
20 | \examples{
21 | x <- 1:100
22 | x \%>\% head()
23 | }
24 | \keyword{internal}
25 | 


--------------------------------------------------------------------------------
/man/comp_ft_residuals.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CA.R
 3 | \name{comp_ft_residuals}
 4 | \alias{comp_ft_residuals}
 5 | \title{Compute Freeman-Tukey residuals}
 6 | \usage{
 7 | comp_ft_residuals(mat)
 8 | }
 9 | \arguments{
10 | \item{mat}{A numerical matrix or coercible to one by `as.matrix()`.
11 | Should have row and column names.}
12 | }
13 | \value{
14 | A named list. The elements are:
15 | \itemize{
16 | \item "S": standardized residual matrix.
17 | \item "tot": grand total of the original matrix.
18 | \item "rowm": row masses.
19 | \item "colm": column masses.
20 | }
21 | }
22 | \description{
23 | Computes Freeman-Tukey residuals
24 | }
25 | 


--------------------------------------------------------------------------------
/man/inertia_rows.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CA.R
 3 | \name{inertia_rows}
 4 | \alias{inertia_rows}
 5 | \title{Find most variable rows}
 6 | \usage{
 7 | inertia_rows(mat, top = 5000, ...)
 8 | }
 9 | \arguments{
10 | \item{mat}{A matrix with genes in rows and cells in columns.}
11 | 
12 | \item{top}{Number of genes to select.}
13 | 
14 | \item{...}{Further arguments for `comp_std_residuals`}
15 | }
16 | \value{
17 | Returns a matrix, which consists of the top variable rows of mat.
18 | }
19 | \description{
20 | Calculates the contributing inertia of each row which is defined as sum of squares of pearson residuals and selects the
21 | rows with the largested inertias, e.g. 5,000.
22 | }
23 | 


--------------------------------------------------------------------------------
/man/subset_dims.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CA.R
 3 | \name{subset_dims}
 4 | \alias{subset_dims}
 5 | \title{Subset dimensions of a caobj}
 6 | \usage{
 7 | subset_dims(caobj, dims)
 8 | }
 9 | \arguments{
10 | \item{caobj}{A caobj.}
11 | 
12 | \item{dims}{Integer. Number of dimensions.}
13 | }
14 | \value{
15 | Returns caobj.
16 | }
17 | \description{
18 | Subsets the dimensions according to user input.
19 | }
20 | \examples{
21 | # Simulate scRNAseq data.
22 | cnts <- data.frame(cell_1 = rpois(10, 5),
23 |                    cell_2 = rpois(10, 10),
24 |                    cell_3 = rpois(10, 20))
25 | rownames(cnts) <- paste0("gene_", 1:10)
26 | cnts <- as.matrix(cnts)
27 | 
28 | # Run correspondence analysis.
29 | ca <- cacomp(cnts)
30 | ca <- subset_dims(ca, 2)
31 | }
32 | 


--------------------------------------------------------------------------------
/man/recompute.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/convert.R
 3 | \name{recompute}
 4 | \alias{recompute}
 5 | \title{Recompute missing values of cacomp object.}
 6 | \usage{
 7 | recompute(calist, mat, ...)
 8 | }
 9 | \arguments{
10 | \item{calist}{A list with std_coords_cols, the prin_coords_rows and D.}
11 | 
12 | \item{mat}{A matrix from which the cacomp object is derived from.}
13 | 
14 | \item{...}{Further arguments forwarded to cacomp.}
15 | }
16 | \value{
17 | A cacomp object with additional calculated row_masses, col_masses,
18 | std_coords_rows, U and V.
19 | }
20 | \description{
21 | The caobj needs to have the std_coords_cols, the prin_coords_rows and D
22 | calculated. From this the remainder will be calculated.
23 | Future updates might extend this functionality.
24 | }
25 | 


--------------------------------------------------------------------------------
/man/as.list-cacomp-method.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/generic_methods.R
 3 | \name{as.list,cacomp-method}
 4 | \alias{as.list,cacomp-method}
 5 | \title{Convert cacomp object to list.}
 6 | \usage{
 7 | \S4method{as.list}{cacomp}(x)
 8 | }
 9 | \arguments{
10 | \item{x}{A cacomp object.}
11 | }
12 | \value{
13 | A cacomp object.
14 | }
15 | \description{
16 | Convert cacomp object to list.
17 | }
18 | \examples{
19 | 
20 | # Simulate counts
21 | cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
22 |                x = sample(1:100, 50, replace = TRUE))
23 | rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
24 | colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
25 | 
26 | # Run correspondence analysis
27 | ca <- cacomp(obj = cnts, princ_coords = 3)
28 | ca_list <- as.list(ca)
29 | }
30 | 


--------------------------------------------------------------------------------
/man/cacomp_names.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/constructor.R
 3 | \name{cacomp_names}
 4 | \alias{cacomp_names}
 5 | \title{Prints slot names of cacomp object}
 6 | \usage{
 7 | cacomp_names(caobj)
 8 | }
 9 | \arguments{
10 | \item{caobj}{a cacomp object}
11 | }
12 | \value{
13 | Prints slot names of cacomp object
14 | }
15 | \description{
16 | Prints slot names of cacomp object
17 | }
18 | \examples{
19 | # Simulate scRNAseq data.
20 | cnts <- data.frame(cell_1 = rpois(10, 5),
21 |                    cell_2 = rpois(10, 10),
22 |                    cell_3 = rpois(10, 20))
23 | rownames(cnts) <- paste0("gene_", 1:10)
24 | cnts <- as.matrix(cnts)
25 | 
26 | # Run correspondence analysis.
27 | ca <- cacomp(obj = cnts, princ_coords = 3, top = 5)
28 | 
29 | # show slot names:
30 | cacomp_names(ca)
31 | 
32 | }
33 | 


--------------------------------------------------------------------------------
/man/clip_residuals.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CA.R
 3 | \name{clip_residuals}
 4 | \alias{clip_residuals}
 5 | \title{Perform clipping of residuals}
 6 | \usage{
 7 | clip_residuals(S, cutoff = sqrt(ncol(S)))
 8 | }
 9 | \arguments{
10 | \item{S}{Matrix of residuals.}
11 | 
12 | \item{cutoff}{Value above/below which clipping should happen.}
13 | }
14 | \value{
15 | Matrix of clipped residuals.
16 | }
17 | \description{
18 | Clips Pearson or negative-binomial residuals above or below a determined
19 | value. For Pearson (Poisson) residuals it is set by default for 1, for NB at
20 | sqrt(n).
21 | }
22 | \references{
23 | Lause, J., Berens, P. & Kobak, D. Analytic Pearson residuals for
24 | normalization of single-cell RNA-seq UMI data. Genome Biol 22, 258 (2021).
25 | https://doi.org/10.1186/s13059-021-02451-7
26 | }
27 | 


--------------------------------------------------------------------------------
/tests/testthat/testdata/AP_coordinates/example1/AP_coordinates_samples.txt:
--------------------------------------------------------------------------------
1 | "V1"	"V2"	"V3"	"V4"	"V5"	"V6"	"V7"	"V8"	"V9"	"V10"	"V11"	"V12"	"V13"	"V14"	"V15"	"V16"	"V17"	"V18"	"V19"	"V20"
2 | "1"	-0.497520162742696	-0.497520162756415	-0.497520162738791	-0.497520162737093	-0.497520162736328	1.63637085711038	1.61927262333547	1.47122333782444	-0.497520162737096	1.1890857588906	-0.49752016274269	4.133891377865	-0.497520162738793	-0.49752016273709	-0.497520162736324	-0.497520162742692	-0.497520162756414	-0.49752016273879	-0.497520162737097	-0.497520162736327
3 | "2"	8.25962218436963	7.47376630424699	8.157375687224	7.26316523278324	7.8674906965723	4.21515861914502	4.20135613303697	4.07689426627507	4.2804018285755	3.81263706429037	4.38511077992584	5.3344076850863	4.41474200626891	6.81688703644307	4.61353926316727	3.13790937563177	2.74310982704723	3.2117318008343	2.80911936673341	3.33629235548098
4 | 


--------------------------------------------------------------------------------
/tests/testthat/testdata/AP_coordinates/example2/AP_coordinates_samples.txt:
--------------------------------------------------------------------------------
1 | "V1"	"V2"	"V3"	"V4"	"V5"	"V6"	"V7"	"V8"	"V9"	"V10"	"V11"	"V12"	"V13"	"V14"	"V15"	"V16"	"V17"	"V18"	"V19"	"V20"
2 | "1"	-0.819303761786799	-0.524462196212651	-0.690540545321318	-0.601573222546409	-0.295026840676528	1.49053923705725	0.676946241142394	1.80151507767079	0.99509136392699	2.11134102092629	-0.155429394800191	1.62784456619983	-0.437020255007118	1.41419203571708	-0.820028450041812	-0.556025131641147	-0.505372951386226	-0.691354836861871	-0.633727014767599	-1.03355840189376
3 | "2"	3.38201692874111	3.5330668253809	3.29387869717513	3.60420157310076	3.10443939196198	1.40095669441291	2.91822188207523	1.21287091464517	2.28344185385098	1.00775719856003	1.67522416950889	3.39087497643017	1.77453373365817	3.26728018371152	1.3720051021511	0.37867034651444	1.079639384027	0.46422153612581	1.02304571831716	0.77749296941889
4 | 


--------------------------------------------------------------------------------
/tests/testthat/testdata/AP_coordinates/example3/AP_coordinates_samples.txt:
--------------------------------------------------------------------------------
1 | "V1"	"V2"	"V3"	"V4"	"V5"	"V6"	"V7"	"V8"	"V9"	"V10"	"V11"	"V12"	"V13"	"V14"	"V15"	"V16"	"V17"	"V18"	"V19"	"V20"
2 | "1"	-0.860825655533233	-0.195215644898544	-1.04961066415878	-0.702979515229828	-0.665958384755548	0.890576563883286	0.576142490585286	1.35906065068609	0.970463082293897	1.96830121597133	0.0534435764244335	2.44349727159309	-0.354694129476072	1.90549741395287	-0.96235062777159	-0.434932370721346	-0.722789419983387	-0.587746131595263	-0.812890461978997	-1.04575882446561
3 | "2"	3.66488214776354	3.50435250102428	3.74970406575467	3.43908812256832	3.53122953340069	1.9670830311872	2.20724230116108	1.16326507594813	1.55545115496683	0.561458398050811	1.47478062061375	3.95575532693773	1.37741852892094	3.0984596705426	1.18476841980351	1.47948801613432	1.10965278138218	1.39956763662195	0.949002349607891	1.01565019749943
4 | 


--------------------------------------------------------------------------------
/man/cacomp_slot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/constructor.R
 3 | \name{cacomp_slot}
 4 | \alias{cacomp_slot}
 5 | \title{Access slots in a cacomp object}
 6 | \usage{
 7 | cacomp_slot(caobj, slot)
 8 | }
 9 | \arguments{
10 | \item{caobj}{a cacomp object}
11 | 
12 | \item{slot}{slot to return}
13 | }
14 | \value{
15 | Chosen slot of the cacomp object
16 | }
17 | \description{
18 | Access slots in a cacomp object
19 | }
20 | \examples{
21 | # Simulate scRNAseq data.
22 | cnts <- data.frame(cell_1 = rpois(10, 5),
23 |                    cell_2 = rpois(10, 10),
24 |                    cell_3 = rpois(10, 20))
25 | rownames(cnts) <- paste0("gene_", 1:10)
26 | cnts <- as.matrix(cnts)
27 | 
28 | # Run correspondence analysis.
29 | ca <- cacomp(obj = cnts, princ_coords = 3, top = 5)
30 | 
31 | # access left singular vectors
32 | cacomp_slot(ca, "U")
33 | 
34 | }
35 | 


--------------------------------------------------------------------------------
/tests/testthat/testdata/AP_coordinates/notes.txt:
--------------------------------------------------------------------------------
 1 | ## Input data: 39 rows x 20 columns
 2 | 
 3 | ## Analysess:
 4 | 
 5 | #1. 39 genes and 19 dimensions
 6 | #2. 39 genes and 4 dimensions
 7 | #3. 20 genes and 4 dimensions
 8 | 
 9 | # Sample IDs which I used to compute the AP:
10 | 6, 7, 8, 10, 12 (counting of samples starts from 1)
11 | 
12 | # Output:
13 | 
14 | - "AP.jpg" - Association Plot
15 | - "2D.jpg" - 2D correspondence analysis
16 | - "AP_coordinates_genes.txt" - coordinates of genes in the AP - the order of genes is changed, see: genes_order.txt
17 | - "AP_coordinates_samples.txt" - coordinates of samples in the AP
18 | - "genes_order.txt" - the order of genes in AP_coodinates_genes.txt file
19 | - "gene_ranking.txt" - gene ranking according to the AP and Salpha scores (calculated based on 10 permutations) #There are also gene coordinates from AP, so probably this will be the best file to use for you
20 | 
21 | 


--------------------------------------------------------------------------------
/man/show.cacomp.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/generic_methods.R
 3 | \name{show.cacomp}
 4 | \alias{show.cacomp}
 5 | \alias{show,cacomp-method}
 6 | \title{Prints cacomp object}
 7 | \usage{
 8 | show.cacomp(object)
 9 | 
10 | \S4method{show}{cacomp}(object)
11 | }
12 | \arguments{
13 | \item{object}{cacomp object to print}
14 | }
15 | \value{
16 | prints summary information about cacomp object.
17 | }
18 | \description{
19 | Provides more user friendly printing of cacomp objects.
20 | }
21 | \examples{
22 | # Simulate scRNAseq data.
23 | cnts <- data.frame(cell_1 = rpois(10, 5),
24 |                    cell_2 = rpois(10, 10),
25 |                    cell_3 = rpois(10, 20))
26 | rownames(cnts) <- paste0("gene_", 1:10)
27 | cnts <- as.matrix(cnts)
28 | 
29 | # Run correspondence analysis.
30 | ca <- cacomp(obj = cnts, princ_coords = 3, top = 5)
31 | 
32 | ca
33 | }
34 | 


--------------------------------------------------------------------------------
/man/check_cacomp.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/constructor.R
 3 | \name{check_cacomp}
 4 | \alias{check_cacomp}
 5 | \title{Check if cacomp object was correctly created.}
 6 | \usage{
 7 | check_cacomp(object)
 8 | }
 9 | \arguments{
10 | \item{object}{A cacomp object.}
11 | }
12 | \value{
13 | TRUE if it is a valid cacomp object. FALSE otherwise.
14 | }
15 | \description{
16 | Checks if the slots in a cacomp object are of the correct size
17 | and whether they are coherent.
18 | }
19 | \examples{
20 | # Simulate scRNAseq data.
21 | cnts <- data.frame(cell_1 = rpois(10, 5),
22 |                    cell_2 = rpois(10, 10),
23 |                    cell_3 = rpois(10, 20))
24 | rownames(cnts) <- paste0("gene_", 1:10)
25 | cnts <- as.matrix(cnts)
26 | 
27 | # Run correspondence analysis.
28 | ca <- cacomp(obj = cnts, princ_coords = 3, top = 5)
29 | 
30 | check_cacomp(ca)
31 | }
32 | 


--------------------------------------------------------------------------------
/tests/testthat/testdata/AP_coordinates/example1/genes_order.txt:
--------------------------------------------------------------------------------
 1 | "11"	"Burundi"
 2 | "28"	"Central African Republic"
 3 | "37"	"Congo, Dem. Rep."
 4 | "14"	"Burkina Faso"
 5 | "26"	"Bhutan"
 6 | "19"	"Bosnia and Herzegovina"
 7 | "1"	"Afghanistan"
 8 | "15"	"Bangladesh"
 9 | "36"	"Cameroon"
10 | "25"	"Brunei Darussalam"
11 | "6"	"Argentina"
12 | "12"	"Belgium"
13 | "29"	"Canada"
14 | "2"	"Angola"
15 | "17"	"Bahrain"
16 | "24"	"Barbados"
17 | "5"	"United Arab Emirates"
18 | "8"	"Australia"
19 | "18"	"Bahamas, The"
20 | "13"	"Benin"
21 | "32"	"Channel Islands"
22 | "31"	"Switzerland"
23 | "34"	"China"
24 | "9"	"Austria"
25 | "35"	"Cote d'Ivoire"
26 | "16"	"Bulgaria"
27 | "21"	"Belize"
28 | "7"	"Armenia"
29 | "33"	"Chile"
30 | "27"	"Botswana"
31 | "3"	"Albania"
32 | "39"	"Colombia"
33 | "23"	"Brazil"
34 | "20"	"Belarus"
35 | "38"	"Congo, Rep."
36 | "4"	"Arab World"
37 | "30"	"Central Europe and the Baltics"
38 | "10"	"Azerbaijan"
39 | "22"	"Bolivia"
40 | 


--------------------------------------------------------------------------------
/tests/testthat/testdata/AP_coordinates/example2/genes_order.txt:
--------------------------------------------------------------------------------
 1 | "11"	"Burundi"
 2 | "28"	"Central African Republic"
 3 | "37"	"Congo, Dem. Rep."
 4 | "14"	"Burkina Faso"
 5 | "26"	"Bhutan"
 6 | "19"	"Bosnia and Herzegovina"
 7 | "1"	"Afghanistan"
 8 | "15"	"Bangladesh"
 9 | "36"	"Cameroon"
10 | "25"	"Brunei Darussalam"
11 | "6"	"Argentina"
12 | "12"	"Belgium"
13 | "29"	"Canada"
14 | "2"	"Angola"
15 | "17"	"Bahrain"
16 | "24"	"Barbados"
17 | "5"	"United Arab Emirates"
18 | "8"	"Australia"
19 | "18"	"Bahamas, The"
20 | "13"	"Benin"
21 | "32"	"Channel Islands"
22 | "31"	"Switzerland"
23 | "34"	"China"
24 | "9"	"Austria"
25 | "35"	"Cote d'Ivoire"
26 | "16"	"Bulgaria"
27 | "21"	"Belize"
28 | "7"	"Armenia"
29 | "33"	"Chile"
30 | "27"	"Botswana"
31 | "3"	"Albania"
32 | "39"	"Colombia"
33 | "23"	"Brazil"
34 | "20"	"Belarus"
35 | "38"	"Congo, Rep."
36 | "4"	"Arab World"
37 | "30"	"Central Europe and the Baltics"
38 | "10"	"Azerbaijan"
39 | "22"	"Bolivia"
40 | 


--------------------------------------------------------------------------------
/man/random_direction_cutoff.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/apl.R
 3 | \name{random_direction_cutoff}
 4 | \alias{random_direction_cutoff}
 5 | \title{Random direction association plot coordinates}
 6 | \usage{
 7 | random_direction_cutoff(caobj, dims = caobj@dims, reps = 100)
 8 | }
 9 | \arguments{
10 | \item{caobj}{A "cacomp" object with principal row coordinates and
11 | standardized column coordinates calculated.}
12 | 
13 | \item{dims}{Integer. Number of CA dimensions to retain. Needs to be the same
14 | as in caobj!}
15 | 
16 | \item{reps}{Integer. Number of permutations to perform.}
17 | }
18 | \value{
19 | List with permuted apl coordinates ("apl_perm") and, a list of saved ca
20 | components ("saved_ca") that allow for quick recomputation of the CA results.
21 |  For random_direction_cutoff this saved_ca is empty.
22 | }
23 | \description{
24 | Calculates matrix of apl coordinates for random directions
25 | }
26 | 


--------------------------------------------------------------------------------
/tests/testthat/testdata/AP_coordinates/example3/AP_coordinates_genes.txt:
--------------------------------------------------------------------------------
1 | "V1"	"V2"	"V3"	"V4"	"V5"	"V6"	"V7"	"V8"	"V9"	"V10"	"V11"	"V12"	"V13"	"V14"	"V15"	"V16"	"V17"	"V18"	"V19"	"V20"
2 | "1"	0.948319976974246	0.728090107961784	0.553508142645055	0.607844782388438	0.468089712504795	-0.243828891270645	0.496370015470564	0.451986328251284	0.406323772624122	-0.553295604715508	-0.550113810211719	-0.545073205518662	-0.544067971091048	0.060629751367586	-0.525559867564279	-0.523369968018623	-0.502737028569807	-0.513959001164854	-0.58523132701876	0.366070107530785
3 | "2"	0.878605308254436	0.668167474732221	0.5351916277987	0.276188810756928	0.421166403205462	0.799543631883141	0.428934591316306	0.253593171895212	0.225418771598896	0.378417445994173	0.353331876483964	0.349284132679116	0.339318272163175	0.364354451424873	0.502829969179454	0.307988692363962	0.458179660842991	0.353727220541737	0.250337111778779	0.196005494211658
4 | "3"	1	2	4	3	6	11	5	7	8	19	18	17	16	10	15	14	12	13	20	9
5 | 


--------------------------------------------------------------------------------
/man/calc_residuals.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CA.R
 3 | \name{calc_residuals}
 4 | \alias{calc_residuals}
 5 | \title{Calculate residuals for Correspondence analysis}
 6 | \usage{
 7 | calc_residuals(mat, residuals = "pearson", clip = FALSE, cutoff = NULL)
 8 | }
 9 | \arguments{
10 | \item{mat}{A numerical matrix or coercible to one by `as.matrix()`.
11 | Should have row and column names.}
12 | 
13 | \item{residuals}{character string. Specifies which kind of residuals should
14 | be calculated. Can be "pearson" (default), "freemantukey" or "NB" for
15 | negative-binomial.}
16 | 
17 | \item{clip}{logical. Whether residuals should be clipped if they are
18 | higher/lower than a specified cutoff}
19 | 
20 | \item{cutoff}{numeric. Residuals that are larger than cutoff or lower than
21 | -cutoff are clipped to cutoff.}
22 | }
23 | \value{
24 | A named list. The elements are:
25 | \itemize{
26 | \item "S": standardized residual matrix.
27 | \item "tot": grand total of the original matrix.
28 | \item "rowm": row masses.
29 | \item "colm": column masses.
30 | }
31 | }
32 | \description{
33 | \code{calc_residuals} provides optional residuals as the basis for Correspondence
34 | Analysis
35 | }
36 | 


--------------------------------------------------------------------------------
/man/plot_enrichment.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot.R
 3 | \name{plot_enrichment}
 4 | \alias{plot_enrichment}
 5 | \title{Generates plot for results from apl_topGO}
 6 | \usage{
 7 | plot_enrichment(genenr, ntop = 10)
 8 | }
 9 | \arguments{
10 | \item{genenr}{data.frame. gene enrichment results table.}
11 | 
12 | \item{ntop}{numeric. Number of elements to plot.}
13 | }
14 | \value{
15 | Returns a ggplot plot.
16 | }
17 | \description{
18 | Plots the results from the data frame generated via apl_topGO.
19 | }
20 | \examples{
21 | library(SeuratObject)
22 | set.seed(1234)
23 | cnts <- SeuratObject::LayerData(pbmc_small, assay = "RNA", layer = "counts")
24 | cnts <- as.matrix(cnts)
25 | 
26 | # Run CA on example from Seurat
27 | 
28 | ca <- cacomp(pbmc_small,
29 |              princ_coords = 3,
30 |              return_input = FALSE,
31 |              assay = "RNA",
32 |              slot = "counts")
33 | 
34 | grp <- which(Idents(pbmc_small) == 2)
35 | ca <- apl_coords(ca, group = grp)
36 | ca <- apl_score(ca,
37 |                 mat = cnts)
38 | 
39 | enr <- apl_topGO(ca,
40 |                  ontology = "BP",
41 |                  organism = "hs")
42 | 
43 | plot_enrichment(enr)
44 | }
45 | 


--------------------------------------------------------------------------------
/man/comp_std_residuals.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CA.R
 3 | \name{comp_std_residuals}
 4 | \alias{comp_std_residuals}
 5 | \title{Compute Standardized Residuals}
 6 | \usage{
 7 | comp_std_residuals(mat, clip = FALSE, cutoff = NULL)
 8 | }
 9 | \arguments{
10 | \item{mat}{A numerical matrix or coercible to one by `as.matrix()`.
11 | Should have row and column names.}
12 | 
13 | \item{clip}{logical. Whether residuals should be clipped if they are
14 | higher/lower than a specified cutoff}
15 | 
16 | \item{cutoff}{numeric. Residuals that are larger than cutoff or lower than
17 | -cutoff are clipped to cutoff.}
18 | }
19 | \value{
20 | A named list. The elements are:
21 | \itemize{
22 | \item "S": standardized residual matrix.
23 | \item "tot": grand total of the original matrix.
24 | \item "rowm": row masses.
25 | \item "colm": column masses.
26 | }
27 | }
28 | \description{
29 | `comp_std_residuals` computes the standardized residual matrix S based on
30 | the Poisson model,
31 | which is the basis for correspondence analysis and serves
32 | as input for singular value decomposition (SVD).
33 | }
34 | \details{
35 | Calculates standardized residual matrix S from the proportion matrix P and
36 | the expected values E according to \eqn{S = \frac{(P-E)}{sqrt(E)}}.
37 | }
38 | 


--------------------------------------------------------------------------------
/man/var_rows.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CA.R
 3 | \name{var_rows}
 4 | \alias{var_rows}
 5 | \title{Find most variable rows}
 6 | \usage{
 7 | var_rows(mat, residuals = "pearson", top = 5000, ...)
 8 | }
 9 | \arguments{
10 | \item{mat}{A numeric matrix. For sequencing a count matrix,
11 | gene expression values with genes in rows and samples/cells in columns.
12 | Should contain row and column names.}
13 | 
14 | \item{residuals}{character string. Specifies which kind of residuals should
15 | be calculated. Can be "pearson" (default), "freemantukey" or "NB" for
16 | negative-binomial.}
17 | 
18 | \item{top}{Integer. Number of most variable rows to retain. Default 5000.}
19 | 
20 | \item{...}{Further arguments for `calc_residuals`.}
21 | }
22 | \value{
23 | Returns a matrix, which consists of the top variable rows of mat.
24 | }
25 | \description{
26 | Calculates the variance of the chi-square component matrix and selects the
27 | rows with the highest variance, e.g. 5,000.
28 | }
29 | \examples{
30 | set.seed(1234)
31 | 
32 | # Simulate counts
33 | cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
34 |               x = sample(1:20, 50, replace = TRUE))
35 | rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
36 | colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
37 | 
38 | # Choose top 5000 most variable genes
39 | cnts <- var_rows(mat = cnts, top = 5000)
40 | 
41 | 
42 | }
43 | 


--------------------------------------------------------------------------------
/man/comp_NB_residuals.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CA.R
 3 | \name{comp_NB_residuals}
 4 | \alias{comp_NB_residuals}
 5 | \title{Compute Negative-Binomial residuals}
 6 | \usage{
 7 | comp_NB_residuals(mat, theta = 100, clip = FALSE, cutoff = NULL, freq = TRUE)
 8 | }
 9 | \arguments{
10 | \item{mat}{A numerical matrix or coercible to one by `as.matrix()`.
11 | Should have row and column names.}
12 | 
13 | \item{theta}{Overdispersion parameter. By default set to 100 as described in
14 | Lause and Berens, 2021 (see references).}
15 | 
16 | \item{clip}{logical. Whether residuals should be clipped if they are
17 | higher/lower than a specified cutoff}
18 | 
19 | \item{cutoff}{numeric. Residuals that are larger than cutoff or lower than
20 | -cutoff are clipped to cutoff.}
21 | 
22 | \item{freq}{logical. Whether a table of frequencies (as used in CA) should
23 | be used.}
24 | }
25 | \value{
26 | A named list. The elements are:
27 | \itemize{
28 | \item "S": standardized residual matrix.
29 | \item "tot": grand total of the original matrix.
30 | \item "rowm": row masses.
31 | \item "colm": column masses.
32 | }
33 | }
34 | \description{
35 | Computes the residuals based on the negative binomial model. By default a
36 | theta of 100 is used to capture technical variation.
37 | }
38 | \references{
39 | Lause, J., Berens, P. & Kobak, D. Analytic Pearson residuals for
40 | normalization of single-cell RNA-seq UMI data. Genome Biol 22, 258 (2021).
41 | https://doi.org/10.1186/s13059-021-02451-7
42 | }
43 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export("%>%")
 4 | export(apl)
 5 | export(apl_coords)
 6 | export(apl_score)
 7 | export(apl_topGO)
 8 | export(as.cacomp)
 9 | export(ca_3Dplot)
10 | export(ca_biplot)
11 | export(ca_coords)
12 | export(cacomp)
13 | export(cacomp_names)
14 | export(cacomp_slot)
15 | export(check_cacomp)
16 | export(new_cacomp)
17 | export(pick_dims)
18 | export(plot_enrichment)
19 | export(runAPL)
20 | export(run_APL)
21 | export(show.cacomp)
22 | export(subset_dims)
23 | export(var_rows)
24 | exportClasses(cacomp)
25 | exportMethods(as.cacomp)
26 | exportMethods(as.list)
27 | exportMethods(ca_3Dplot)
28 | exportMethods(ca_biplot)
29 | exportMethods(cacomp)
30 | exportMethods(pick_dims)
31 | exportMethods(runAPL)
32 | exportMethods(show)
33 | import(SummarizedExperiment)
34 | import(methods)
35 | import(org.Hs.eg.db)
36 | import(org.Mm.eg.db)
37 | importClassesFrom(SeuratObject,Seurat)
38 | importClassesFrom(SingleCellExperiment,SingleCellExperiment)
39 | importFrom(RSpectra,svds)
40 | importFrom(ggplot2,aes)
41 | importFrom(ggplot2,geom_point)
42 | importFrom(ggplot2,ggplot)
43 | importFrom(ggplot2,guide_colorbar)
44 | importFrom(magrittr,"%>%")
45 | importFrom(rlang,.data)
46 | importFrom(stats,as.formula)
47 | importFrom(stats,na.omit)
48 | importFrom(stats,quantile)
49 | importFrom(stats,runif)
50 | importFrom(stats,var)
51 | importFrom(topGO,score)
52 | importFrom(topGO,showSigOfNodes)
53 | importFrom(utils,head)
54 | importFrom(utils,setTxtProgressBar)
55 | importFrom(utils,txtProgressBar)
56 | importFrom(viridisLite,viridis)
57 | 


--------------------------------------------------------------------------------
/tests/testthat/testdata/AP_coordinates/example3/gene_ranking.txt:
--------------------------------------------------------------------------------
 1 | "Gene_name"	"x-coordinate"	"y-coordinate"	"Gene_score"
 2 | "13"	"Benin"	0.366070107530785	0.196005494211658	-0.535770235162441
 3 | "36"	"Cameroon"	0.406323772624122	0.225418771598896	-0.630849920112816
 4 | "14"	"Burkina Faso"	0.607844782388438	0.276188810756928	-0.662926798494357
 5 | "15"	"Bangladesh"	0.451986328251284	0.253593171895212	-0.714820520538758
 6 | "26"	"Bhutan"	0.468089712504795	0.421166403205462	-1.46973787199919
 7 | "1"	"Afghanistan"	0.496370015470564	0.428934591316306	-1.47719975807851
 8 | "2"	"Angola"	0.060629751367586	0.364354451424873	-1.61580051736753
 9 | "18"	"Bahamas, The"	-0.58523132701876	0.250337111778779	-1.73705672652319
10 | "37"	"Congo, Dem. Rep."	0.553508142645055	0.5351916277987	-1.90896059049833
11 | "24"	"Barbados"	-0.523369968018623	0.307988692363962	-1.94045589708557
12 | "29"	"Canada"	-0.544067971091048	0.339318272163175	-2.10530434398202
13 | "8"	"Australia"	-0.513959001164854	0.353727220541737	-2.14149234698424
14 | "12"	"Belgium"	-0.545073205518662	0.349284132679116	-2.1521634719399
15 | "6"	"Argentina"	-0.550113810211719	0.353331876483964	-2.17582813956946
16 | "25"	"Brunei Darussalam"	-0.553295604715508	0.378417445994173	-2.29443107925458
17 | "28"	"Central African Republic"	0.728090107961784	0.668167474732221	-2.34621343037817
18 | "5"	"United Arab Emirates"	-0.502737028569807	0.458179660842991	-2.610866211436
19 | "17"	"Bahrain"	-0.525559867564279	0.502829969179454	-2.83912946185094
20 | "11"	"Burundi"	0.948319976974246	0.878605308254436	-3.0942285009405
21 | "19"	"Bosnia and Herzegovina"	-0.243828891270645	0.799543631883141	-3.9226069065289
22 | 


--------------------------------------------------------------------------------
/man/permutation_cutoff.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/apl.R
 3 | \name{permutation_cutoff}
 4 | \alias{permutation_cutoff}
 5 | \title{Calculates permuted association plot coordinates}
 6 | \usage{
 7 | permutation_cutoff(
 8 |   caobj,
 9 |   mat,
10 |   group = caobj@group,
11 |   dims = caobj@dims,
12 |   reps = 10,
13 |   store_perm = FALSE,
14 |   python = TRUE
15 | )
16 | }
17 | \arguments{
18 | \item{caobj}{A "cacomp" object with principal row coordinates and
19 | standardized column coordinates calculated.}
20 | 
21 | \item{mat}{A numeric matrix. For sequencing a count matrix, gene expression
22 | values with genes in rows and samples/cells in columns.
23 | Should contain row and column names.}
24 | 
25 | \item{group}{Vector of indices of the columns to calculate centroid/x-axis
26 | direction.}
27 | 
28 | \item{dims}{Integer. Number of CA dimensions to retain. Needs to be the same
29 | as in caobj!}
30 | 
31 | \item{reps}{Integer. Number of permutations to perform.}
32 | 
33 | \item{store_perm}{Logical. Whether permuted data should be stored in the CA
34 | object.
35 | This implementation dramatically speeds up computation compared to `svd()`
36 | in R.}
37 | 
38 | \item{python}{DEPRACTED. A logical value indicating whether to use singular-value
39 | decomposition from the python package torch.}
40 | }
41 | \value{
42 | List with permuted apl coordinates ("apl_perm") and, a list of saved ca
43 | components ("saved_ca") that allow for quick recomputation of the CA results.
44 |  For random_direction_cutoff this saved_ca is empty.
45 | }
46 | \description{
47 | Calculates matrix of apl coordinates when permuting the original data.
48 | }
49 | 


--------------------------------------------------------------------------------
/tests/testthat/testdata/AP_coordinates/example1/AP_coordinates_genes.txt:
--------------------------------------------------------------------------------
1 | "V1"	"V2"	"V3"	"V4"	"V5"	"V6"	"V7"	"V8"	"V9"	"V10"	"V11"	"V12"	"V13"	"V14"	"V15"	"V16"	"V17"	"V18"	"V19"	"V20"	"V21"	"V22"	"V23"	"V24"	"V25"	"V26"	"V27"	"V28"	"V29"	"V30"	"V31"	"V32"	"V33"	"V34"	"V35"	"V36"	"V37"	"V38"	"V39"
2 | "1"	0.898371653273147	0.700255713010355	0.525371758267359	0.493483081784703	0.479794305974388	-0.168193760838336	0.457564161368761	0.387472726288257	0.366533471653081	-0.397712533701071	-0.412819211563902	-0.403162104224234	-0.391598006862012	0.160505779017826	-0.397474395358172	-0.366573604244055	-0.388969467656746	-0.376470789433401	-0.41782033093511	0.371710481815176	-0.323128564526515	-0.347327954758956	0.293464674324616	-0.31603451736632	0.295935094579746	-0.180573442611186	-0.101016809262888	0.0297001312294838	-0.220920626787251	-0.109912034733154	0.147679647753861	-0.0808501374404857	-0.199897338107552	-0.132301031889015	0.189674099277819	-0.084231483403504	-0.136815218930296	0.079954993980841	0.0763269801211523
3 | "2"	1.17850669951975	0.922917979522332	0.775111250568603	0.51544431851946	0.61849271694774	0.708710350044667	0.598566346331496	0.542474410165313	0.485011904246648	0.479440965862673	0.42956789512184	0.422269123623575	0.428641657689617	0.476151034497615	0.554450026348923	0.404918219307577	0.490699703280908	0.420725523089288	0.423291048139206	0.349564824608867	0.390771868446349	0.444724590999482	0.349093065452325	0.427284462274932	0.338276212911539	0.445145288122006	0.333203336269937	0.412599888090165	0.310567596541093	0.354448484192017	0.426469719370387	0.313739024288438	0.27950479406108	0.423946651564449	0.32272720918723	0.244146587721156	0.34092997551302	0.281767557818549	0.217444942061676
4 | "3"	1	2	3	4	5	24	6	7	9	36	38	37	34	13	35	31	33	32	39	8	29	30	11	28	10	25	20	17	27	21	14	18	26	22	12	19	23	15	16
5 | 


--------------------------------------------------------------------------------
/tests/testthat/testdata/AP_coordinates/example2/AP_coordinates_genes.txt:
--------------------------------------------------------------------------------
1 | "V1"	"V2"	"V3"	"V4"	"V5"	"V6"	"V7"	"V8"	"V9"	"V10"	"V11"	"V12"	"V13"	"V14"	"V15"	"V16"	"V17"	"V18"	"V19"	"V20"	"V21"	"V22"	"V23"	"V24"	"V25"	"V26"	"V27"	"V28"	"V29"	"V30"	"V31"	"V32"	"V33"	"V34"	"V35"	"V36"	"V37"	"V38"	"V39"
2 | "1"	1.17484147858886	0.918251524726067	0.707352964776721	0.659334062031513	0.591111717263373	-0.202596679230447	0.566050278113453	0.515545901239115	0.51122370357559	-0.541832206823089	-0.528375373721855	-0.523093044060456	-0.516913310908317	0.179903144134825	-0.515622126246806	-0.495322587859203	-0.47617120932336	-0.486265584873309	-0.538012266428231	0.46660339350845	-0.415806421614723	-0.46359904965256	0.379581633138169	-0.419212344064118	0.375609622422642	-0.252645734099141	-0.129368951687118	0.0583393083734729	-0.29032497061507	-0.149448409679685	0.192689599496444	-0.108851566434673	-0.255731102123635	-0.176260871721386	0.213539414949807	-0.0826348047718997	-0.175778317867741	0.118643553602509	0.115239525292288
3 | "2"	0.892727782400543	0.696805849208181	0.607676905606093	0.209459617449081	0.500936560797782	0.689328293173901	0.467427881465475	0.382994747014415	0.308047942336173	0.263238202574069	0.251646208394254	0.25318026478414	0.25644196137184	0.45844562830569	0.426290375045695	0.2109356616869	0.380529288289148	0.281257079798341	0.234151041985703	0.166135088099633	0.277531027121789	0.315119718788519	0.232809016489976	0.314917636729549	0.230118794618802	0.394133471715775	0.31724996724156	0.382411643439824	0.239732963941796	0.329957129605282	0.399734524035285	0.268540654620658	0.226333867336864	0.386214534970047	0.265348050842096	0.182134958546392	0.308285912900536	0.236851843036183	0.182489665038921
4 | "3"	1	2	3	4	5	24	6	7	8	39	37	36	35	14	34	33	31	32	38	9	28	30	10	29	11	25	20	17	27	21	13	19	26	23	12	18	22	15	16
5 | 


--------------------------------------------------------------------------------
/man/apl_ggplot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/apl.R
 3 | \name{apl_ggplot}
 4 | \alias{apl_ggplot}
 5 | \title{Plot Association Plot with ggplot}
 6 | \usage{
 7 | apl_ggplot(
 8 |   rows,
 9 |   rows_group = NULL,
10 |   cols,
11 |   cols_group = NULL,
12 |   rows_scored = NULL,
13 |   rows_color = "#0066FF",
14 |   rows_high_color = "#FF0000",
15 |   cols_color = "#601A4A",
16 |   cols_high_color = "#EE442F",
17 |   score_color = "rainbow",
18 |   row_labs = FALSE,
19 |   col_labs = FALSE,
20 |   show_score = FALSE,
21 |   show_cols = FALSE,
22 |   show_rows = TRUE
23 | )
24 | }
25 | \arguments{
26 | \item{rows}{Row APL-coordinates}
27 | 
28 | \item{rows_group}{Row AP-coordinates to highlight}
29 | 
30 | \item{cols}{Column AP-coordinates}
31 | 
32 | \item{cols_group}{Column AP-coordinates for the group to be highlighted.}
33 | 
34 | \item{rows_scored}{Row AP-coordinates of rows above a score cutoff.}
35 | 
36 | \item{rows_color}{Color for rows}
37 | 
38 | \item{rows_high_color}{Color for rows to be highlighted.}
39 | 
40 | \item{cols_color}{Column points color.}
41 | 
42 | \item{cols_high_color}{Color for column points to be highlighted..}
43 | 
44 | \item{score_color}{Color scheme for row points with a score.}
45 | 
46 | \item{row_labs}{Logical. Whether labels for rows indicated by rows_idx
47 | should be labeled with text. Default TRUE.}
48 | 
49 | \item{col_labs}{Logical. Whether labels for columns indicated by cols_idx
50 | shouls be labeled with text. Default FALSE.}
51 | 
52 | \item{show_score}{Logical. Whether the S-alpha score should be shown in
53 | the plot.}
54 | 
55 | \item{show_cols}{Logical. Whether column points should be plotted.}
56 | 
57 | \item{show_rows}{Logical. Whether row points should be plotted.}
58 | }
59 | \value{
60 | ggplot Association Plot
61 | }
62 | \description{
63 | Uses ggplot to plot an Association Plot
64 | }
65 | 


--------------------------------------------------------------------------------
/man/apl_plotly.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/apl.R
 3 | \name{apl_plotly}
 4 | \alias{apl_plotly}
 5 | \title{Plot Association Plot with plotly}
 6 | \usage{
 7 | apl_plotly(
 8 |   rows,
 9 |   rows_group = NULL,
10 |   cols,
11 |   cols_group,
12 |   rows_scored = NULL,
13 |   rows_color = "#0066FF",
14 |   rows_high_color = "#FF0000",
15 |   cols_color = "#601A4A",
16 |   cols_high_color = "#EE442F",
17 |   score_color = "rainbow",
18 |   row_labs = FALSE,
19 |   col_labs = FALSE,
20 |   show_score = FALSE,
21 |   show_cols = FALSE,
22 |   show_rows = TRUE
23 | )
24 | }
25 | \arguments{
26 | \item{rows}{Row APL-coordinates}
27 | 
28 | \item{rows_group}{Row AP-coordinates to highlight}
29 | 
30 | \item{cols}{Column AP-coordinates}
31 | 
32 | \item{cols_group}{Column AP-coordinates for the group to be highlighted.}
33 | 
34 | \item{rows_scored}{Row AP-coordinates of rows above a score cutoff.}
35 | 
36 | \item{rows_color}{Color for rows}
37 | 
38 | \item{rows_high_color}{Color for rows to be highlighted.}
39 | 
40 | \item{cols_color}{Column points color.}
41 | 
42 | \item{cols_high_color}{Color for column points to be highlighted.}
43 | 
44 | \item{score_color}{Color scheme for row points with a score.}
45 | 
46 | \item{row_labs}{Logical. Whether labels for rows indicated by rows_idx
47 | should be labeled with text. Default TRUE.}
48 | 
49 | \item{col_labs}{Logical. Whether labels for columns indicated by cols_idx
50 | shouls be labeled with text. Default FALSE.}
51 | 
52 | \item{show_score}{Logical. Whether the S-alpha score should be shown in
53 | the plot.}
54 | 
55 | \item{show_cols}{Logical. Whether column points should be plotted.}
56 | 
57 | \item{show_rows}{Logical. Whether row points should be plotted.}
58 | }
59 | \value{
60 | Interactive plotly Association Plot
61 | }
62 | \description{
63 | Uses plotly to generate an interactive Association Plot
64 | }
65 | 


--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 |   release:
 9 |     types: [published]
10 |   workflow_dispatch:
11 | 
12 | name: pkgdown
13 | 
14 | jobs:
15 |   pkgdown:
16 |     runs-on: ubuntu-latest
17 |     # Only restrict concurrency for non-PR jobs
18 |     concurrency:
19 |       group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
20 |     env:
21 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
22 |     permissions:
23 |       contents: write
24 |     steps:
25 |       - uses: actions/checkout@v3
26 | 
27 |       - uses: r-lib/actions/setup-pandoc@v2
28 | 
29 |       - uses: r-lib/actions/setup-r@v2
30 |         with:
31 |           use-public-rspm: true
32 | 
33 |       - uses: r-lib/actions/setup-r-dependencies@v2
34 |         with:
35 |           extra-packages: 
36 |             any::pkgdown
37 |             local::.
38 |             any::remotes
39 |           needs: website
40 | 
41 |       - name: Install Miniconda
42 |         run: |
43 |           Rscript -e "remotes::install_github('rstudio/reticulate')"
44 |           Rscript -e "reticulate::install_miniconda()"
45 | 
46 |       - name: Install Python dependencies
47 |         run: |
48 |           Rscript -e "reticulate::conda_create('r-reticulate', packages = c('python==3.6.11', 'numpy', 'pytorch'))"
49 | 
50 |       - name: Build site
51 |         run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
52 |         shell: Rscript {0}
53 | 
54 |       - name: Deploy to GitHub pages 🚀
55 |         if: github.event_name != 'pull_request'
56 |         uses: JamesIves/github-pages-deploy-action@v4.4.1
57 |         with:
58 |           clean: false
59 |           branch: gh-pages
60 |           folder: docs
61 | 


--------------------------------------------------------------------------------
/man/ca_coords.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CA.R
 3 | \name{ca_coords}
 4 | \alias{ca_coords}
 5 | \title{Calculate correspondence analysis row and column coordinates.}
 6 | \usage{
 7 | ca_coords(caobj, dims = NULL, princ_coords = 3, princ_only = FALSE)
 8 | }
 9 | \arguments{
10 | \item{caobj}{A "cacomp" object as outputted from `cacomp()`.}
11 | 
12 | \item{dims}{Integer indicating the number of dimensions to use for the
13 | calculation of coordinates.
14 | All elements of caobj (where applicable) will be reduced to the given
15 | number of dimensions. Default NULL (keeps all dimensions).}
16 | 
17 | \item{princ_coords}{Integer. Number indicating whether principal
18 | coordinates should be calculated for the rows (=1), columns (=2), both (=3)
19 | or none (=0).
20 | Default 3.}
21 | 
22 | \item{princ_only}{Logical, whether only principal coordinates should be
23 | calculated.
24 | Or, in other words, whether the standardized coordinates are already
25 | calculated and stored in `caobj`. Default `FALSE`.}
26 | }
27 | \value{
28 | Returns input object with coordinates added.
29 | std_coords_rows/std_coords_cols: Standardized coordinates of rows/columns.
30 | prin_coords_rows/prin_coords_cols: Principal coordinates of rows/columns.
31 | }
32 | \description{
33 | `ca_coords` calculates the standardized and principal
34 | coordinates of the rows and columns in CA space.
35 | }
36 | \details{
37 | Takes a "cacomp" object and calculates standardized and principal
38 | coordinates for the visualization of CA results in a biplot or
39 | to subsequently calculate coordinates in an Association Plot.
40 | }
41 | \examples{
42 | # Simulate scRNAseq data.
43 | cnts <- data.frame(cell_1 = rpois(10, 5),
44 |                    cell_2 = rpois(10, 10),
45 |                    cell_3 = rpois(10, 20))
46 | rownames(cnts) <- paste0("gene_", 1:10)
47 | cnts <- as.matrix(cnts)
48 | 
49 | # Run correspondence analysis.
50 | ca <- cacomp(obj = cnts, princ_coords = 1)
51 | ca <- ca_coords(ca, princ_coords = 3)
52 | }
53 | 


--------------------------------------------------------------------------------
/man/elbow_method.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CA.R
 3 | \name{elbow_method}
 4 | \alias{elbow_method}
 5 | \title{Runs elbow method}
 6 | \usage{
 7 | elbow_method(obj, mat, reps, python = FALSE, return_plot = FALSE)
 8 | }
 9 | \arguments{
10 | \item{obj}{A "cacomp" object as outputted from `cacomp()`}
11 | 
12 | \item{mat}{A numeric matrix. For sequencing a count matrix, gene expression
13 | values with genes in rows and samples/cells in columns.
14 | Should contain row and column names.}
15 | 
16 | \item{reps}{Integer. Number of permutations to perform when choosing
17 | "elbow_rule".}
18 | 
19 | \item{python}{A logical value indicating whether to use singular value
20 | decomposition from the python package torch.
21 | This implementation dramatically speeds up computation compared to `svd()`
22 | in R.}
23 | 
24 | \item{return_plot}{TRUE/FALSE. Whether a plot should be returned when
25 | choosing "elbow_rule".}
26 | }
27 | \value{
28 | `elbow_method` (for `return_plot=TRUE`) returns a list with two elements:
29 | "dims" contains the number of dimensions and "plot" a ggplot. if
30 | `return_plot=TRUE` it just returns the number of picked dimensions.
31 | }
32 | \description{
33 | Helper function for pick_dims() to run the elbow method.
34 | }
35 | \examples{
36 | 
37 | # Get example data from Seurat
38 | library(SeuratObject)
39 | set.seed(2358)
40 | cnts <- as.matrix(SeuratObject::LayerData(pbmc_small,
41 |                                           assay = "RNA",
42 |                                           layer = "data"))
43 | # Run correspondence analysis.
44 | ca <- cacomp(obj = cnts)
45 | 
46 | # pick dimensions with the elbow rule. Returns list.
47 | pd <- pick_dims(obj = ca,
48 |                 mat = cnts,
49 |                 method = "elbow_rule",
50 |                 return_plot = TRUE,
51 |                 reps = 10)
52 | pd$plot
53 | ca_sub <- subset_dims(ca, dims = pd$dims)
54 | 
55 | }
56 | \references{
57 | Ciampi, Antonio, González Marcos, Ana and Castejón Limas, Manuel. \cr
58 | Correspondence analysis and 2-way clustering. (2005), SORT 29(1).
59 | }
60 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: APL
 2 | Type: Package
 3 | Title: Association Plots
 4 | Version: 1.10.2
 5 | Authors@R:
 6 |     c(person(given = "Clemens",
 7 |         family = "Kohl",
 8 |         role = c("cre", "aut"),
 9 |         email = "kohl.clemens@gmail.com"),
10 |     person(given = "Elzbieta",
11 |         family = "Gralinska",
12 |         role = c("aut"),
13 |         email = "gralinska@molgen.mpg.de"),
14 |     person(given = "Martin",
15 |         family = "Vingron",
16 |         role = c("aut"),
17 |         email = "vingron@molgen.mpg.de"))
18 | Description: APL is a package developed for computation of Association Plots
19 |   (AP), a method for visualization and analysis of single cell transcriptomics
20 |   data. The main focus of APL is the identification of genes characteristic for
21 |   individual clusters of cells from input data. The package performs
22 |   correspondence analysis (CA) and allows to identify cluster-specific
23 |   genes using Association Plots. Additionally, APL computes the
24 |   cluster-specificity scores for all genes which allows to rank the genes by
25 |   their specificity for a selected cell cluster of interest.
26 | biocViews:
27 |   StatisticalMethod,
28 |   DimensionReduction,
29 |   SingleCell,
30 |   Sequencing,
31 |   RNASeq,
32 |   GeneExpression
33 | License: GPL (>= 3)
34 | Encoding: UTF-8
35 | RoxygenNote: 7.3.2
36 | VignetteBuilder: knitr
37 | Imports:
38 |     Matrix,
39 |     RSpectra,
40 |     ggrepel,
41 |     ggplot2,
42 |     viridisLite,
43 |     plotly,
44 |     SeuratObject,
45 |     SingleCellExperiment,
46 |     magrittr,
47 |     SummarizedExperiment,
48 |     topGO,
49 |     methods,
50 |     stats,
51 |     utils,
52 |     org.Hs.eg.db,
53 |     org.Mm.eg.db,
54 |     rlang
55 | Depends: R (>= 4.4.0)
56 | Suggests:
57 |     BiocStyle,
58 |     knitr,
59 |     rmarkdown,
60 |     scRNAseq,
61 |     scater,
62 |     scran,
63 |     sparseMatrixStats,
64 |     testthat
65 | Config/testthat/edition: 3
66 | Collate:
67 |     'constructor.R'
68 |     'CA.R'
69 |     'apl.R'
70 |     'convert.R'
71 |     'generic_methods.R'
72 |     'import_packages.R'
73 |     'plot.R'
74 |     'utils-pipe.R'
75 | URL: https://vingronlab.github.io/APL/
76 | 


--------------------------------------------------------------------------------
/man/apl_coords.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/apl.R
 3 | \name{apl_coords}
 4 | \alias{apl_coords}
 5 | \title{Calculate Association Plot coordinates}
 6 | \usage{
 7 | apl_coords(caobj, group, calc_rows = TRUE, calc_cols = TRUE)
 8 | }
 9 | \arguments{
10 | \item{caobj}{A "cacomp" object with principal row coordinates and
11 | standardized column coordinates calculated.}
12 | 
13 | \item{group}{Numeric/Character. Vector of indices or column names of
14 | the columns to calculate centroid/x-axis direction.}
15 | 
16 | \item{calc_rows}{TRUE/FALSE. Whether apl row coordinates should
17 | be calculated. Default TRUE.}
18 | 
19 | \item{calc_cols}{TRUE/FALSE. Whether apl column coordinates should
20 | be calculated. Default TRUE.}
21 | }
22 | \value{
23 | Returns input "cacomp" object and adds components "apl_rows" and/or
24 | "apl_cols" for row and column coordinates.
25 | In "group" the indices of the columns used to calculate the
26 | centroid are saved.
27 | }
28 | \description{
29 | Calculates the Association Plot coordinates for either the rows,
30 | columns or both (default).
31 | }
32 | \details{
33 | Coordinates (x,y) of row vector \eqn{\vec{r}} are defined as
34 | \deqn{x(\vec{r}) := \left|\vec{r}\right|\cos(\phi(\vec{r}))}
35 | \deqn{y(\vec{r}) := \left|\vec{r}\right|\sin(\phi(\vec{r}))}
36 | The x-direction is determined by calculating the centroid of the columns
37 | selected with the indices in "group".
38 | }
39 | \examples{
40 | set.seed(1234)
41 | # Simulate scRNAseq data
42 | cnts <- data.frame(cell_1 = rpois(10, 5),
43 |                    cell_2 = rpois(10, 10),
44 |                    cell_3 = rpois(10, 20),
45 |                    cell_4 = rpois(10, 20))
46 | rownames(cnts) <- paste0("gene_", 1:10)
47 | cnts <- as.matrix(cnts)
48 | 
49 | # Run correspondence analysis
50 | ca <- cacomp(obj = cnts, princ_coords = 3, dims = 3)
51 | # Calculate APL coordinates
52 | ca <- apl_coords(ca, group = 3:4)
53 | }
54 | \references{
55 | Association Plots: Visualizing associations in high-dimensional
56 | correspondence analysis biplots
57 | Elzbieta Gralinska, Martin Vingron
58 | bioRxiv 2020.10.23.352096; doi: https://doi.org/10.1101/2020.10.23.352096
59 | }
60 | 


--------------------------------------------------------------------------------
/tests/testthat/test-convert.R:
--------------------------------------------------------------------------------
 1 | # context("test conversion to and from cacomp")
 2 | 
 3 | load("./testdata/smoke.rda")
 4 | load("./testdata/smoke_scRNAseq.rda")
 5 | set.seed(2358)
 6 | 
 7 | d <- min(nrow(smoke), ncol(smoke)) - 1
 8 | ca <- cacomp(smoke, top = nrow(smoke), dims = d, princ_coords = 3)
 9 | 
10 | test_that("check recompute function", {
11 | 
12 |   calist <- APL::as.list(ca)
13 | 
14 |   calist_sub <- calist[c("D",
15 |                          "std_coords_cols",
16 |                          "std_coords_rows",
17 |                          "params")]
18 |   expect_equal(recompute(calist_sub, smoke), ca)
19 | 
20 |   calist_sub <- calist[c("std_coords_cols",
21 |                          "std_coords_rows",
22 |                          "prin_coords_rows",
23 |                          "params")]
24 |   expect_equal(recompute(calist_sub, smoke), ca)
25 | 
26 |   calist_sub <- calist[c("V",
27 |                          "U",
28 |                          "D",
29 |                          "params")]
30 |   expect_equal(recompute(calist_sub, smoke), ca)
31 | 
32 |   calist_sub <- calist[c("std_coords_rows",
33 |                          "V",
34 |                          "params")]
35 |   expect_equal(recompute(calist_sub, smoke), ca)
36 | 
37 |   calist_sub <- calist[c("std_coords_cols",
38 |                          "std_coords_rows",
39 |                          "prin_coords_rows",
40 |                          "params")]
41 |   expect_error(recompute(calist_sub, smoke[1:3, ]), "mat does not have have the correct number of rows.")
42 |   expect_error(recompute(calist_sub, smoke[, 1:3]), "mat does not have have the correct number of columns.")
43 | 
44 | })
45 | 
46 | # d <- min(nrow(smoke), ncol(smoke)) - 1
47 | # seu <- SeuratObject::CreateSeuratObject(smoke)
48 | # seu <- cacomp(seu,
49 | #               princ_coords = 3,
50 | #               return_input = TRUE,
51 | #               dims = d,
52 | #               assay = "RNA",
53 | #               slot = "counts")
54 | #
55 | # sce <- SingleCellExperiment::SingleCellExperiment(list(counts = smoke))
56 | # sce <- cacomp(
57 | #     sce,
58 | #     dims = 3,
59 | #     princ_coords = 3,
60 | #     return_input = TRUE,
61 | #     assay = "counts"
62 | # )
63 | # save(seu, sce, file = "./tests/testthat/testdata/smoke_scRNAseq.rda")
64 | 
65 | test_that("check Seurat integration", {
66 |   expect_equal(as.cacomp(seu, assay = "RNA", slot = "counts"), ca)
67 | })
68 | 
69 | test_that("check SingleCellExperiment integration", {
70 |   expect_equal(as.cacomp(sce, assay = "counts"), ca)
71 | })
72 | 


--------------------------------------------------------------------------------
/tests/testthat/test-CA.R:
--------------------------------------------------------------------------------
 1 | # context("Correspondence Analysis")
 2 | 
 3 | 
 4 | # library(ca)
 5 | # data(smoke)
 6 | #
 7 | # smoke_ca <- ca(smoke)
 8 | #
 9 | # smoke_prin <- cacoord(smoke_ca,
10 | #           type = c("principal"),
11 | #           dim = NA,
12 | #           rows = TRUE,
13 | #           cols = TRUE)
14 | #
15 | # smoke <- as.matrix(smoke)
16 | # save(smoke, smoke_ca, smoke_prin, file = "./tests/testthat/testdata/smoke.rda")
17 | 
18 | load("./testdata/smoke.rda")
19 | d <- min(nrow(smoke), ncol(smoke)) - 1
20 | # suppressWarnings(ca_python <- cacomp(obj = smoke, top = nrow(smoke), dims = d, princ_coords = 3, coords = TRUE, python = TRUE))
21 | ca_svd <- cacomp(obj = smoke, top = nrow(smoke), dims = d, princ_coords = 3, coords = TRUE, python = FALSE)
22 | cac <- ca_coords(ca_svd, princ_coords = 3)
23 | 
24 | 
25 | # test_that("CA with torch svd results", {
26 | #
27 | #   expect_equal(ca_python@dims, length(smoke_ca$sv))
28 | #
29 | #   expect_equal(as.numeric(ca_python@D), smoke_ca$sv)
30 | #   expect_equal(ca_python@std_coords_cols, smoke_ca$colcoord)
31 | #   expect_equal(ca_python@std_coords_rows, smoke_ca$rowcoord)
32 | #
33 | #   expect_equal(ca_python@prin_coords_cols, smoke_prin$columns)
34 | #   expect_equal(ca_python@prin_coords_rows, smoke_prin$rows)
35 | #
36 | #   expect_equal(as.numeric(ca_python@row_masses), smoke_ca$rowmass)
37 | #   expect_equal(as.numeric(ca_python@row_inertia), smoke_ca$rowinertia)
38 | #
39 | #   expect_equal(as.numeric(ca_python@col_masses), smoke_ca$colmass)
40 | #   expect_equal(as.numeric(ca_python@col_inertia), smoke_ca$colinertia)
41 | #
42 | # })
43 | 
44 | 
45 | test_that("CA with R svd results", {
46 |   expect_equal(ca_svd@dims, length(smoke_ca$sv))
47 | 
48 |   expect_equal(abs(as.numeric(ca_svd@D)), abs(smoke_ca$sv))
49 |   expect_equal(abs(ca_svd@std_coords_cols), abs(smoke_ca$colcoord))
50 |   expect_equal(abs(ca_svd@std_coords_rows), abs(smoke_ca$rowcoord))
51 | 
52 |   expect_equal(abs(ca_svd@prin_coords_cols), abs(smoke_prin$columns))
53 |   expect_equal(abs(ca_svd@prin_coords_rows), abs(smoke_prin$rows))
54 | 
55 |   expect_equal(as.numeric(ca_svd@row_masses), smoke_ca$rowmass)
56 |   expect_equal(as.numeric(ca_svd@row_inertia), smoke_ca$rowinertia)
57 | 
58 |   expect_equal(as.numeric(ca_svd@col_masses), smoke_ca$colmass)
59 |   expect_equal(as.numeric(ca_svd@col_inertia), smoke_ca$colinertia)
60 | 
61 | })
62 | 
63 | test_that("CA coord function", {
64 | 
65 |   expect_equal(abs(cac@std_coords_cols), abs(smoke_ca$colcoord))
66 |   expect_equal(abs(cac@std_coords_rows), abs(smoke_ca$rowcoord))
67 | 
68 |   expect_equal(abs(cac@prin_coords_cols), abs(smoke_prin$columns))
69 |   expect_equal(abs(cac@prin_coords_rows), abs(smoke_prin$rows))
70 | 
71 | })
72 | 
73 | # cacomp test for 2x2 matrix (--> only 1 dim --> error). Error handling!
74 | 


--------------------------------------------------------------------------------
/man/apl_topGO.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/apl.R
 3 | \name{apl_topGO}
 4 | \alias{apl_topGO}
 5 | \title{Run Gene overrepresentation analysis with topGO}
 6 | \usage{
 7 | apl_topGO(
 8 |   caobj,
 9 |   ontology,
10 |   organism = "hs",
11 |   ngenes = 1000,
12 |   score_cutoff = 0,
13 |   use_coords = FALSE,
14 |   return_plot = FALSE,
15 |   top_res = 15
16 | )
17 | }
18 | \arguments{
19 | \item{caobj}{A "cacomp" object with principal row coordinates and
20 | standardized column coordinates calculated.}
21 | 
22 | \item{ontology}{Character string. Chooses GO sets for 'BP'
23 | (biological processes), 'CC' (cell compartment) or 'MF' (molecular function).}
24 | 
25 | \item{organism}{Character string. Either 'hs' (homo sapiens), 'mm'
26 | (mus musculus) or the name of the organism package such as 'org.*.eg.db'.}
27 | 
28 | \item{ngenes}{Numeric. Number of top ranked genes to test for
29 | overrepresentation.}
30 | 
31 | \item{score_cutoff}{numeric. S-alpha score cutoff. Only genes with a score
32 | larger will be tested.}
33 | 
34 | \item{use_coords}{Logical. Whether the x-coordinates of the row APL
35 | coordinates should be used for ranking.
36 | Only recommended when no S-alpha score (see apl_score()) can be calculated.}
37 | 
38 | \item{return_plot}{Logical. Whether a plot of significant gene sets should
39 | be additionally returned.}
40 | 
41 | \item{top_res}{Numeric. Number of top scoring genes to plot.}
42 | }
43 | \value{
44 | A data.frame containing the gene sets with the highest overrepresentation.
45 | }
46 | \description{
47 | This function uses the Kolmogorov-Smirnov test as implemented by the package
48 | topGO to test for overrepresentation in Gene Ontology gene sets.
49 | }
50 | \details{
51 | For a chosen group of cells/samples,
52 | the top 'ngenes' group specific genes are used for gene overrepresentation
53 | analysis.
54 | The genes are ranked either by the precomputed APL score, or, if
55 | not available by their APL x-coordinates.
56 | }
57 | \examples{
58 | library(SeuratObject)
59 | set.seed(1234)
60 | cnts <- SeuratObject::LayerData(pbmc_small, assay = "RNA", layer = "counts")
61 | cnts <- as.matrix(cnts)
62 | 
63 | # Run CA on example from Seurat
64 | 
65 | ca <- cacomp(pbmc_small,
66 |              princ_coords = 3,
67 |              return_input = FALSE,
68 |              assay = "RNA",
69 |              slot = "counts")
70 | 
71 | grp <- which(Idents(pbmc_small) == 2)
72 | ca <- apl_coords(ca, group = grp)
73 | ca <- apl_score(ca,
74 |                 mat = cnts)
75 | 
76 | enr <- apl_topGO(ca,
77 |                  ontology = "BP",
78 |                  organism = "hs")
79 | 
80 | plot_enrichment(enr)
81 | }
82 | \references{
83 | Adrian Alexa and Jorg Rahnenfuhrer \cr
84 | topGO: Enrichment Analysis for Gene Ontology. \cr
85 | R package version 2.42.0.
86 | }
87 | 


--------------------------------------------------------------------------------
/man/apl.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/apl.R
 3 | \name{apl}
 4 | \alias{apl}
 5 | \title{Association Plot}
 6 | \usage{
 7 | apl(
 8 |   caobj,
 9 |   type = "ggplot",
10 |   rows_idx = NULL,
11 |   cols_idx = caobj@group,
12 |   row_labs = FALSE,
13 |   col_labs = FALSE,
14 |   show_score = FALSE,
15 |   show_cols = FALSE,
16 |   show_rows = TRUE,
17 |   score_cutoff = 0,
18 |   score_color = "rainbow"
19 | )
20 | }
21 | \arguments{
22 | \item{caobj}{An object of class "cacomp" and "APL" with apl
23 | coordinates calculated.}
24 | 
25 | \item{type}{"ggplot"/"plotly". For a static plot a string "ggplot",
26 | for an interactive plot "plotly". Default "ggplot".}
27 | 
28 | \item{rows_idx}{numeric/character vector.
29 | Indices or names of the rows that should be labelled. Default NULL.}
30 | 
31 | \item{cols_idx}{numeric/character vector.
32 | Indices or names of the columns that should be labelled.
33 | Default is only to label columns making up the centroid: caobj@group.}
34 | 
35 | \item{row_labs}{Logical. Whether labels for rows indicated by rows_idx
36 | should be labeled with text. Default TRUE.}
37 | 
38 | \item{col_labs}{Logical. Whether labels for columns indicated by cols_idx
39 | shouls be labeled with text. Default FALSE.}
40 | 
41 | \item{show_score}{Logical. Whether the S-alpha score should be shown in
42 | the plot.}
43 | 
44 | \item{show_cols}{Logical. Whether column points should be plotted.}
45 | 
46 | \item{show_rows}{Logical. Whether row points should be plotted.}
47 | 
48 | \item{score_cutoff}{Numeric. Rows (genes) with a score >= score_cutoff will
49 | be colored according to their score if show_score = TRUE.}
50 | 
51 | \item{score_color}{Either "rainbow" or "viridis".}
52 | }
53 | \value{
54 | Either a ggplot or plotly object.
55 | }
56 | \description{
57 | Plot an Association Plot for the chosen columns.
58 | }
59 | \details{
60 | For an interactive plot type="plotly" can be chosen, otherwise a static plot
61 | will be returned.
62 | The row and column coordinates have to be already calculated by
63 | `apl_coords()`.
64 | }
65 | \examples{
66 | set.seed(1234)
67 | 
68 | # Simulate counts
69 | cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
70 |                x = sample(1:100, 50, replace = TRUE))
71 | rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
72 | colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
73 | 
74 | # Run correspondence analysis
75 | ca <- cacomp(obj = cnts, princ_coords = 3)
76 | 
77 | # Calculate APL coordinates for arbitrary group
78 | ca <- apl_coords(ca, group = 1:10)
79 | 
80 | # plot results
81 | # Note:
82 | # Due to random gene expression & group, no highly
83 | # associated genes are visible.
84 | apl(ca, type = "ggplot")
85 | }
86 | \references{
87 | Association Plots: Visualizing associations in high-dimensional
88 | correspondence analysis biplots \cr
89 | Elzbieta Gralinska, Martin Vingron \cr
90 | bioRxiv 2020.10.23.352096; doi: https://doi.org/10.1101/2020.10.23.352096 \cr
91 | }
92 | 


--------------------------------------------------------------------------------
/tests/testthat/testdata/AP_coordinates/example2/gene_ranking.txt:
--------------------------------------------------------------------------------
 1 | "Gene_name"	"x-coordinate"	"y-coordinate"	"Gene_score"
 2 | "14"	"Burkina Faso"	0.659334062031513	0.209459617449081	0.0932180194135293
 3 | "13"	"Benin"	0.46660339350845	0.166135088099633	0.0175825284826763
 4 | "35"	"Cote d'Ivoire"	0.375609622422642	0.230118794618802	-0.246342918731435
 5 | "34"	"China"	0.379581633138169	0.232809016489976	-0.249641893125213
 6 | "36"	"Cameroon"	0.51122370357559	0.308047942336173	-0.321351492425428
 7 | "22"	"Bolivia"	0.115239525292288	0.182489665038921	-0.37798359928269
 8 | "38"	"Congo, Rep."	0.213539414949807	0.265348050842096	-0.50362884062404
 9 | "15"	"Bangladesh"	0.515545901239115	0.382994747014415	-0.519591435321639
10 | "10"	"Azerbaijan"	0.118643553602509	0.236851843036183	-0.521506703995547
11 | "4"	"Arab World"	-0.0826348047718997	0.182134958546392	-0.574899247963387
12 | "1"	"Afghanistan"	0.566050278113453	0.467427881465475	-0.69728833429889
13 | "26"	"Bhutan"	0.591111717263373	0.500936560797782	-0.762792328085333
14 | "39"	"Colombia"	-0.108851566434673	0.268540654620658	-0.834648617567197
15 | "23"	"Brazil"	-0.255731102123635	0.226333867336864	-0.867453948024074
16 | "3"	"Albania"	0.192689599496444	0.399734524035285	-0.887691094408197
17 | "37"	"Congo, Dem. Rep."	0.707352964776721	0.607676905606093	-0.935043069273576
18 | "33"	"Chile"	-0.29032497061507	0.239732963941796	-0.938262164832479
19 | "28"	"Central African Republic"	0.918251524726067	0.696805849208181	-0.965037362320114
20 | "7"	"Armenia"	0.0583393083734729	0.382411643439824	-0.975222047613535
21 | "21"	"Belize"	-0.129368951687118	0.31724996724156	-0.986814879273313
22 | "30"	"Central Europe and the Baltics"	-0.175778317867741	0.308285912900536	-1.00899668774541
23 | "27"	"Botswana"	-0.149448409679685	0.329957129605282	-1.04123856341249
24 | "2"	"Angola"	0.179903144134825	0.45844562830569	-1.05915872379313
25 | "24"	"Barbados"	-0.495322587859203	0.2109356616869	-1.06542800242783
26 | "32"	"Channel Islands"	-0.415806421614723	0.277531027121789	-1.16590216174509
27 | "18"	"Bahamas, The"	-0.538012266428231	0.234151041985703	-1.17086294608751
28 | "12"	"Belgium"	-0.523093044060456	0.25318026478414	-1.20737487038153
29 | "6"	"Argentina"	-0.528375373721855	0.251646208394254	-1.2085110360082
30 | "29"	"Canada"	-0.516913310908317	0.25644196137184	-1.21001067306841
31 | "20"	"Belarus"	-0.176260871721386	0.386214534970047	-1.2201004757536
32 | "11"	"Burundi"	1.17484147858886	0.892727782400543	-1.23797453560338
33 | "8"	"Australia"	-0.486265584873309	0.281257079798341	-1.24643189718138
34 | "25"	"Brunei Darussalam"	-0.541832206823089	0.263238202574069	-1.25329807944329
35 | "9"	"Austria"	-0.419212344064118	0.314917636729549	-1.27035457580852
36 | "31"	"Switzerland"	-0.46359904965256	0.315119718788519	-1.31528745777646
37 | "16"	"Bulgaria"	-0.252645734099141	0.394133471715775	-1.31788820894164
38 | "5"	"United Arab Emirates"	-0.47617120932336	0.380529288289148	-1.5046450383604
39 | "17"	"Bahrain"	-0.515622126246806	0.426290375045695	-1.66777652749158
40 | "19"	"Bosnia and Herzegovina"	-0.202596679230447	0.689328293173901	-2.06567563520669
41 | 


--------------------------------------------------------------------------------
/tests/testthat/testdata/AP_coordinates/example1/gene_ranking.txt:
--------------------------------------------------------------------------------
 1 | "Gene_name"	"x-coordinate"	"y-coordinate"	"Gene_score"
 2 | "14"	"Burkina Faso"	0.493483081784703	0.51544431851946	0.143971887195826
 3 | "13"	"Benin"	0.371710481815176	0.349564824608867	0.13467843966127
 4 | "11"	"Burundi"	0.898371653273147	1.17850669951975	0.0992527774200379
 5 | "28"	"Central African Republic"	0.700255713010355	0.922917979522332	0.0744457972915453
 6 | "35"	"Cote d'Ivoire"	0.295935094579746	0.338276212911539	0.0665576060174207
 7 | "26"	"Bhutan"	0.479794305974388	0.61849271694774	0.0604083110606367
 8 | "34"	"China"	0.293464674324616	0.349093065452325	0.0567525214399028
 9 | "1"	"Afghanistan"	0.457564161368761	0.598566346331496	0.0516897900148708
10 | "36"	"Cameroon"	0.366533471653081	0.485011904246648	0.0376578124354463
11 | "15"	"Bangladesh"	0.387472726288257	0.542474410165313	0.0196330347472275
12 | "37"	"Congo, Dem. Rep."	0.525371758267359	0.775111250568603	-0.000213740285928798
13 | "38"	"Congo, Rep."	0.189674099277819	0.32272720918723	-0.0291599597207467
14 | "22"	"Bolivia"	0.0763269801211523	0.217444942061676	-0.0711175424522714
15 | "10"	"Azerbaijan"	0.079954993980841	0.281767557818549	-0.111105247040777
16 | "3"	"Albania"	0.147679647753861	0.426469719370387	-0.141499873132641
17 | "2"	"Angola"	0.160505779017826	0.476151034497615	-0.16236152379775
18 | "4"	"Arab World"	-0.084231483403504	0.244146587721156	-0.249781791095949
19 | "7"	"Armenia"	0.0297001312294838	0.412599888090165	-0.25007456914523
20 | "39"	"Colombia"	-0.0808501374404857	0.313739024288438	-0.293589510498607
21 | "21"	"Belize"	-0.101016809262888	0.333203336269937	-0.32695449431724
22 | "27"	"Botswana"	-0.109912034733154	0.354448484192017	-0.350255576565193
23 | "30"	"Central Europe and the Baltics"	-0.136815218930296	0.34092997551302	-0.367992164201472
24 | "23"	"Brazil"	-0.199897338107552	0.27950479406108	-0.38942324993874
25 | "20"	"Belarus"	-0.132301031889015	0.423946651564449	-0.419769717266815
26 | "33"	"Chile"	-0.220920626787251	0.310567596541093	-0.43150952604888
27 | "16"	"Bulgaria"	-0.180573442611186	0.445145288122006	-0.482416446456921
28 | "32"	"Channel Islands"	-0.323128564526515	0.390771868446349	-0.588102175923988
29 | "9"	"Austria"	-0.31603451736632	0.427284462274932	-0.605766497084201
30 | "24"	"Barbados"	-0.366573604244055	0.404918219307577	-0.641139537859345
31 | "19"	"Bosnia and Herzegovina"	-0.168193760838336	0.708710350044667	-0.64875430280551
32 | "31"	"Switzerland"	-0.347327954758956	0.444724590999482	-0.648885693351841
33 | "8"	"Australia"	-0.376470789433401	0.420725523089288	-0.661755300069471
34 | "29"	"Canada"	-0.391598006862012	0.428641657689617	-0.682250270252899
35 | "12"	"Belgium"	-0.403162104224234	0.422269123623575	-0.689493295650689
36 | "6"	"Argentina"	-0.412819211563902	0.42956789512184	-0.704099535716694
37 | "18"	"Bahamas, The"	-0.41782033093511	0.423291048139206	-0.704844466381713
38 | "5"	"United Arab Emirates"	-0.388969467656746	0.490699703280908	-0.721701895420701
39 | "25"	"Brunei Darussalam"	-0.397712533701071	0.479440965862673	-0.722810664949909
40 | "17"	"Bahrain"	-0.397474395358172	0.554450026348923	-0.773434482774928
41 | 


--------------------------------------------------------------------------------
/tests/testthat/test-apl.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # tab <- read.delim(file = "/home/kohl/PhD/gits/APL/tests/testthat/testdata/input_data.tsv")
 3 | # mat <- as.matrix(tab[,-1])
 4 | # rownames(mat) <- tab$Country.Name
 5 | # save(mat, file = "/home/kohl/PhD/gits/APL/tests/testthat/testdata/countries.rda")
 6 | 
 7 | load("./testdata/countries.rda")
 8 | 
 9 | grp <- c(6, 7, 8, 10, 12)
10 | 
11 | 
12 | ca <- cacomp(mat, princ_coords = 3, dims = 19, top = 39)
13 | ca <- apl_coords(ca, group = grp)
14 | 
15 | 
16 | 
17 | test_that("Example 1, 39 genes and 19 dimensions", {
18 | 
19 |   samples1 <- read.delim(file = "./testdata/AP_coordinates/example1/AP_coordinates_samples.txt")
20 |   samples1 <- t(samples1)
21 |   rownames(samples1) <- colnames(mat)
22 |   colnames(samples1) <- c("x", "y")
23 | 
24 |   genes1 <- read.delim(file = "./testdata/AP_coordinates/example1/gene_ranking.txt")
25 |   ord <- order(as.numeric(rownames(genes1)))
26 |   rwnms <- rownames(mat)[as.numeric(rownames(genes1))[ord]]
27 | 
28 |   genes1_sort <- as.matrix(genes1[ord, c("x.coordinate","y.coordinate")])
29 |   dimnames(genes1_sort) <- list(rwnms, c("x", "y"))
30 | 
31 |   ca <- cacomp(mat, princ_coords = 3, dims = 19, top = 39)
32 |   ca <- apl_coords(ca, group = grp)
33 | 
34 |   expect_equal(ca@apl_cols, samples1, tolerance = 1e-6)
35 |   expect_equal(ca@apl_rows, genes1_sort, tolerance = 1e-6)
36 | 
37 |  })
38 | 
39 | test_that("Example 2, 39 genes and 4 dimensions",{
40 |   samples2 <- read.delim(file = "./testdata/AP_coordinates/example2/AP_coordinates_samples.txt")
41 |   samples2 <- t(samples2)
42 |   rownames(samples2) <- colnames(mat)
43 |   colnames(samples2) <- c("x", "y")
44 | 
45 |   genes2 <- read.delim(file = "./testdata/AP_coordinates/example2/gene_ranking.txt")
46 |   ord <- order(as.numeric(rownames(genes2)))
47 |   rwnms <- rownames(mat)[as.numeric(rownames(genes2))[ord]]
48 | 
49 |   genes2_sort <- as.matrix(genes2[ord, c("x.coordinate","y.coordinate")])
50 |   dimnames(genes2_sort) <- list(rwnms, c("x", "y"))
51 | 
52 |   ca <- cacomp(mat, princ_coords = 3, dims = 4, top = 39)
53 |   ca <- apl_coords(ca, group = grp)
54 | 
55 |   expect_equal(ca@apl_cols, samples2, tolerance = 1e-8)
56 |   expect_equal(ca@apl_rows, genes2_sort, tolerance = 1e-8)
57 | })
58 | 
59 | 
60 | test_that("Example 3, 20 genes and 4 dimensions",{
61 |   samples3 <- read.delim(file = "./testdata/AP_coordinates/example3/AP_coordinates_samples.txt")
62 |   samples3 <- t(samples3)
63 |   rownames(samples3) <- colnames(mat)
64 |   colnames(samples3) <- c("x", "y")
65 | 
66 |   genes3 <- read.delim(file = "./testdata/AP_coordinates/example3/gene_ranking.txt")
67 |   ord <- order(as.numeric(rownames(genes3)))
68 |   rwnms <- rownames(mat)[as.numeric(rownames(genes3))[ord]]
69 | 
70 |   genes3_sort <- as.matrix(genes3[ord, c("x.coordinate","y.coordinate")])
71 |   dimnames(genes3_sort) <- list(rwnms, c("x", "y"))
72 | 
73 |   ca <- cacomp(mat, princ_coords = 3, dims = 4, top = 20)
74 |   ca <- apl_coords(ca, group = grp)
75 | 
76 |   expect_equal(ca@apl_cols, samples3, tolerance = 1e-8)
77 |   expect_equal(ca@apl_rows[order(rownames(ca@apl_rows)),], genes3_sort[order(rownames(genes3_sort)),], tolerance = 1e-8)
78 | })
79 | 
80 | 


--------------------------------------------------------------------------------
/man/run_cacomp.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CA.R
 3 | \name{run_cacomp}
 4 | \alias{run_cacomp}
 5 | \title{Internal function for `cacomp`}
 6 | \usage{
 7 | run_cacomp(
 8 |   obj,
 9 |   coords = TRUE,
10 |   princ_coords = 3,
11 |   python = FALSE,
12 |   dims = 100,
13 |   top = 5000,
14 |   inertia = TRUE,
15 |   rm_zeros = TRUE,
16 |   residuals = "pearson",
17 |   cutoff = NULL,
18 |   clip = FALSE,
19 |   ...
20 | )
21 | }
22 | \arguments{
23 | \item{obj}{A numeric matrix or Seurat/SingleCellExperiment object. For
24 | sequencing a count matrix, gene expression values with genes in rows and
25 | samples/cells in columns.
26 | Should contain row and column names.}
27 | 
28 | \item{coords}{Logical. Indicates whether CA standard coordinates should be
29 | calculated.}
30 | 
31 | \item{princ_coords}{Integer. Number indicating whether principal
32 | coordinates should be calculated for the rows (=1), columns (=2),
33 | both (=3) or none (=0).}
34 | 
35 | \item{python}{DEPRACTED. A logical value indicating whether to use singular-value
36 | decomposition from the python package torch.
37 | This implementation dramatically speeds up computation compared to `svd()`
38 | in R when calculating the full SVD. This parameter only works when dims==NULL
39 | or dims==rank(mat), where caculating a full SVD is demanded.}
40 | 
41 | \item{dims}{Integer. Number of CA dimensions to retain. If NULL:
42 | (0.2 * min(nrow(A), ncol(A)) - 1 ).}
43 | 
44 | \item{top}{Integer. Number of most variable rows to retain.
45 | Set NULL to keep all.}
46 | 
47 | \item{inertia}{Logical. Whether total, row and column inertias should be
48 | calculated and returned.}
49 | 
50 | \item{rm_zeros}{Logical. Whether rows & cols containing only 0s should be
51 | removed. Keeping zero only rows/cols might lead to unexpected results.}
52 | 
53 | \item{residuals}{character string. Specifies which kind of residuals should
54 | be calculated. Can be "pearson" (default), "freemantukey" or "NB" for
55 | negative-binomial.}
56 | 
57 | \item{cutoff}{numeric. Residuals that are larger than cutoff or lower than
58 | -cutoff are clipped to cutoff.}
59 | 
60 | \item{clip}{logical. Whether residuals should be clipped if they are
61 | higher/lower than a specified cutoff}
62 | 
63 | \item{...}{Arguments forwarded to methods.}
64 | }
65 | \value{
66 | Returns a named list of class "cacomp" with components
67 | U, V and D: The results from the SVD.
68 | row_masses and col_masses: Row and columns masses.
69 | top_rows: How many of the most variable rows/genes were retained for the
70 | analysis.
71 | tot_inertia, row_inertia and col_inertia: Only if inertia = TRUE. Total,
72 | row and column inertia respectively.
73 | }
74 | \description{
75 | `run_cacomp` performs correspondence analysis on a matrix and returns the
76 | transformed data.
77 | }
78 | \details{
79 | The calculation is performed according to the work of Michael Greenacre.
80 | When working with large matrices,
81 | CA coordinates and
82 | principal coordinates should only be computed when needed to save
83 | computational time.
84 | }
85 | \references{
86 | Greenacre, M. Correspondence Analysis in Practice, Third Edition, 2017.
87 | }
88 | 


--------------------------------------------------------------------------------
/man/ca_3Dplot.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/plot.R
  3 | \name{ca_3Dplot}
  4 | \alias{ca_3Dplot}
  5 | \alias{ca_3Dplot,cacomp-method}
  6 | \alias{ca_3Dplot,Seurat-method}
  7 | \alias{ca_3Dplot,SingleCellExperiment-method}
  8 | \title{Plot of the first 3D CA projection of the data.}
  9 | \usage{
 10 | ca_3Dplot(
 11 |   obj,
 12 |   xdim = 1,
 13 |   ydim = 2,
 14 |   zdim = 3,
 15 |   princ_coords = 1,
 16 |   row_labels = NULL,
 17 |   col_labels = NULL,
 18 |   ...
 19 | )
 20 | 
 21 | \S4method{ca_3Dplot}{cacomp}(
 22 |   obj,
 23 |   xdim = 1,
 24 |   ydim = 2,
 25 |   zdim = 3,
 26 |   princ_coords = 1,
 27 |   row_labels = NULL,
 28 |   col_labels = NULL,
 29 |   ...
 30 | )
 31 | 
 32 | \S4method{ca_3Dplot}{Seurat}(
 33 |   obj,
 34 |   xdim = 1,
 35 |   ydim = 2,
 36 |   zdim = 3,
 37 |   princ_coords = 1,
 38 |   row_labels = NULL,
 39 |   col_labels = NULL,
 40 |   ...,
 41 |   assay = SeuratObject::DefaultAssay(obj),
 42 |   slot = "counts"
 43 | )
 44 | 
 45 | \S4method{ca_3Dplot}{SingleCellExperiment}(
 46 |   obj,
 47 |   xdim = 1,
 48 |   ydim = 2,
 49 |   zdim = 3,
 50 |   princ_coords = 1,
 51 |   row_labels = NULL,
 52 |   col_labels = NULL,
 53 |   ...,
 54 |   assay = "counts"
 55 | )
 56 | }
 57 | \arguments{
 58 | \item{obj}{An object of class "cacomp", or alternatively an object of
 59 | class "Seurat" or "SingleCellExperiment" with a dim. reduction named "CA"
 60 | saved.}
 61 | 
 62 | \item{xdim}{Integer. The dimension for the x-axis. Default 1.}
 63 | 
 64 | \item{ydim}{Integer. The dimension for the y-axis. Default 2.}
 65 | 
 66 | \item{zdim}{Integer. The dimension for the z-axis. Default 3.}
 67 | 
 68 | \item{princ_coords}{Integer. If 1 then principal coordinates are used for
 69 | the rows, if 2 for the columns. Default 1 (rows).}
 70 | 
 71 | \item{row_labels}{Numeric vector. Indices for the rows for which a label
 72 | should be added (label should be stored in rownames). Default NULL.}
 73 | 
 74 | \item{col_labels}{Numeric vector. Indices for the columns for which
 75 | a label should be added (label should be stored in colnames).
 76 | Default NULL (no columns).}
 77 | 
 78 | \item{...}{Further arguments.}
 79 | 
 80 | \item{assay}{SingleCellExperiment assay to obtain counts from.}
 81 | 
 82 | \item{slot}{Seurat slot from assay to get count matrix from.}
 83 | }
 84 | \value{
 85 | Plot of class "plotly".
 86 | }
 87 | \description{
 88 | Plots the first 3 dimensions of the rows and columns in the same plot.
 89 | }
 90 | \details{
 91 | Depending on whether `princ_coords` is set to 1 or 2 either the principal
 92 | coordinates of either the rows (1) or the columns (2)
 93 | are chosen. For the other the standardized coordinates are plotted
 94 | (assymetric biplot).
 95 | Labels for rows and columns should be stored in the row- and column
 96 | names respectively.
 97 | }
 98 | \examples{
 99 | # Simulate counts
100 | cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
101 |                x = sample(1:100, 50, replace = TRUE))
102 | rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
103 | colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
104 | 
105 | # Run correspondence analysis
106 | ca <- cacomp(obj = cnts, princ_coords = 3)
107 | 
108 | ca_3Dplot(ca)
109 | }
110 | 


--------------------------------------------------------------------------------
/man/cacomp-class.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/constructor.R
 3 | \docType{class}
 4 | \name{cacomp-class}
 5 | \alias{cacomp-class}
 6 | \alias{new_cacomp}
 7 | \title{An S4 class that contains all elements needed for CA.}
 8 | \usage{
 9 | new_cacomp(...)
10 | }
11 | \arguments{
12 | \item{...}{slot names and objects for new cacomp object.}
13 | }
14 | \value{
15 | cacomp object
16 | }
17 | \description{
18 | This class contains elements necessary to computer CA coordinates or 
19 | Association Plot coordinates,
20 | as well as other informative data such as row/column inertia, 
21 | gene-wise APL-scores, etc. ...
22 | 
23 | Creates new cacomp object.
24 | }
25 | \section{Slots}{
26 | 
27 | \describe{
28 | \item{\code{U}}{class "matrix". Left singular vectors of the original input matrix.}
29 | 
30 | \item{\code{V}}{class "matrix". Right singular vectors of the original input matrix.}
31 | 
32 | \item{\code{D}}{class "numeric". Singular values of the original inpt matrix.}
33 | 
34 | \item{\code{std_coords_rows}}{class "matrix". Standardized CA coordinates of the 
35 | rows.}
36 | 
37 | \item{\code{std_coords_cols}}{class "matrix". Standardized CA coordinates of the 
38 | columns.}
39 | 
40 | \item{\code{prin_coords_rows}}{class "matrix". Principal CA coordinates of the rows.}
41 | 
42 | \item{\code{prin_coords_cols}}{class "matrix". Principal CA coordinates of the 
43 | columns.}
44 | 
45 | \item{\code{apl_rows}}{class "matrix". Association Plot coordinates of the rows 
46 | for the direction defined in slot "group"}
47 | 
48 | \item{\code{apl_cols}}{class "matrix". Association Plot coordinates of the columns 
49 | for the direction defined in slot "group"}
50 | 
51 | \item{\code{APL_score}}{class "data.frame". Contains rows sorted by the APL score.
52 | Columns: Rowname (gene name in the case of gene expression data),
53 | APL score calculated for the direction defined in slot "group",
54 | the original row number and the rank of the row as determined by the score.}
55 | 
56 | \item{\code{dims}}{class "numeric". Number of dimensions in CA space.}
57 | 
58 | \item{\code{group}}{class "numeric". Indices of the chosen columns for APL 
59 | calculations.}
60 | 
61 | \item{\code{row_masses}}{class "numeric". Row masses of the frequency table.}
62 | 
63 | \item{\code{col_masses}}{class "numeric". Column masses of the frequency table.}
64 | 
65 | \item{\code{top_rows}}{class "numeric". Number of most variable rows chosen.}
66 | 
67 | \item{\code{tot_inertia}}{class "numeric". Total inertia in CA space.}
68 | 
69 | \item{\code{row_inertia}}{class "numeric". Row-wise inertia in CA space.}
70 | 
71 | \item{\code{col_inertia}}{class "numeric". Column-wise inertia in CA space.}
72 | 
73 | \item{\code{permuted_data}}{class "list". Storage slot for permuted data.}
74 | 
75 | \item{\code{params}}{class "list". List of parameters.}
76 | }}
77 | 
78 | \examples{
79 | set.seed(1234)
80 | 
81 | # Simulate counts
82 | cnts <- mapply(function(x){rpois(n = 500, lambda = x)}, 
83 |                x = sample(1:20, 50, replace = TRUE))
84 | rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
85 | colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
86 | 
87 | res <-  APL:::comp_std_residuals(mat=cnts)
88 | SVD <- svd(res$S)
89 | names(SVD) <- c("D", "U", "V")
90 | SVD <- SVD[c(2, 1, 3)]
91 | 
92 | ca <- new_cacomp(U = SVD$U,
93 |                  V = SVD$V,
94 |                  D = SVD$D,
95 |                  row_masses = res$rowm,
96 |                  col_masses = res$colm)
97 | }
98 | 


--------------------------------------------------------------------------------
/man/apl_score.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/apl.R
 3 | \name{apl_score}
 4 | \alias{apl_score}
 5 | \title{Find rows most highly associated with a condition}
 6 | \usage{
 7 | apl_score(
 8 |   caobj,
 9 |   mat = NULL,
10 |   dims = caobj@dims,
11 |   group = caobj@group,
12 |   reps = 10,
13 |   quant = 0.99,
14 |   python = FALSE,
15 |   store_perm = TRUE,
16 |   method = "permutation"
17 | )
18 | }
19 | \arguments{
20 | \item{caobj}{A "cacomp" object with principal row coordinates and
21 | standardized column coordinates calculated.}
22 | 
23 | \item{mat}{A numeric matrix. For sequencing a count matrix, gene expression
24 | values with genes in rows and samples/cells in columns.
25 | Should contain row and column names.}
26 | 
27 | \item{dims}{Integer. Number of CA dimensions to retain. Needs to be the same
28 | as in caobj!}
29 | 
30 | \item{group}{Vector of indices of the columns to calculate centroid/x-axis
31 | direction.}
32 | 
33 | \item{reps}{Integer. Number of permutations to perform.}
34 | 
35 | \item{quant}{Numeric. Single number between 0 and 1 indicating the quantile
36 | used to calculate the cutoff. Default 0.99.}
37 | 
38 | \item{python}{DEPRACTED. A logical value indicating whether to use singular-value
39 | decomposition from the python package torch.}
40 | 
41 | \item{store_perm}{Logical. Whether permuted data should be stored in the CA
42 | object.
43 | This implementation dramatically speeds up computation compared to `svd()`
44 | in R.}
45 | 
46 | \item{method}{Method to calculate the cutoff. Either "random" for random
47 | direction method or "permutation" for the permutation method.}
48 | }
49 | \value{
50 | Returns the input "cacomp" object with "APL_score" component added.
51 | APL_score contains a data frame with ranked rows, their score and their
52 | original row number.
53 | }
54 | \description{
55 | Ranks rows by a calculated score which balances the association of the row
56 | with the condition and how associated it is with other conditions.
57 | }
58 | \details{
59 | The score is calculated by permuting the values of each row to determine the
60 | cutoff angle of the 99% quantile.
61 | \deqn{S_{alpha}(x,y)=x-\frac{y}{\tan\alpha}}
62 | By default the permutation is repeated 10 times (for random direction min.
63 | 300 repetition is recommended!), but for very large matrices
64 | this can be reduced.
65 | The method "permutation" permutes the columns in each row and calculates
66 | AP-coordinates for each such permutation. The cutoff is then taken by the
67 | quantile specified by "quan". The "random" method in contrast calculates
68 | AP-coordinates for the original data, but by looking into random directions.
69 | 
70 | If store_perm is TRUE the permuted data is stored in the cacomp object and
71 | can be used for future scoring.
72 | }
73 | \examples{
74 | set.seed(1234)
75 | 
76 | # Simulate counts
77 | cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
78 |                x = sample(1:20, 50, replace = TRUE))
79 | rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
80 | colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
81 | 
82 | # Run correspondence analysis.
83 | ca <- cacomp(obj = cnts, princ_coords = 3)
84 | 
85 | # Calculate APL coordinates:
86 | ca <- apl_coords(ca, group = 1:10)
87 | 
88 | # Rank genes by S-alpha score
89 | ca <- apl_score(ca, mat = cnts)
90 | }
91 | \references{
92 | Association Plots: Visualizing associations in high-dimensional
93 | correspondence analysis biplots \cr
94 | Elzbieta Gralinska, Martin Vingron \cr
95 | bioRxiv 2020.10.23.352096; doi: https://doi.org/10.1101/2020.10.23.352096
96 | }
97 | 


--------------------------------------------------------------------------------
/R/generic_methods.R:
--------------------------------------------------------------------------------
  1 | #' @include constructor.R
  2 | NULL
  3 | 
  4 | #' Prints cacomp object
  5 | #'
  6 | #' @description Provides more user friendly printing of cacomp objects.
  7 | #'
  8 | #' @param object cacomp object to print
  9 | #' @returns prints summary information about cacomp object.
 10 | #' @export
 11 | #' @examples
 12 | #' # Simulate scRNAseq data.
 13 | #' cnts <- data.frame(cell_1 = rpois(10, 5),
 14 | #'                    cell_2 = rpois(10, 10),
 15 | #'                    cell_3 = rpois(10, 20))
 16 | #' rownames(cnts) <- paste0("gene_", 1:10)
 17 | #' cnts <- as.matrix(cnts)
 18 | #'
 19 | #' # Run correspondence analysis.
 20 | #' ca <- cacomp(obj = cnts, princ_coords = 3, top = 5)
 21 | #'
 22 | #' ca
 23 | show.cacomp <- function(object){
 24 | 
 25 |   if (!is.empty(object@V) && !is.empty(object@U) && !is.empty(object@D)){
 26 |     cat("cacomp object with",
 27 |         nrow(object@V),
 28 |         "columns,",
 29 |         nrow(object@U),
 30 |         "rows and",
 31 |         length(object@D),
 32 |         "dimensions.")
 33 |   } else {
 34 |     cat("Uncomplete cacomp object.",
 35 |         "Consider running as.cacomp(object, recompute=TRUE).")
 36 |   }
 37 | 
 38 |   cat("\nCalc. standard coord.: ",
 39 |       paste0("std_coords_rows"[!is.empty(object@std_coords_rows)],
 40 |              ifelse(!is.empty(object@std_coords_rows) &&
 41 |                     !is.empty(object@std_coords_cols),
 42 |                     ", ",
 43 |                     ""),
 44 |             "std_coords_cols"[!is.empty(object@std_coords_cols)]))
 45 | 
 46 |   cat("\nCalc. principal coord.:",
 47 |       paste0("prin_coords_rows"[!is.empty(object@prin_coords_rows)],
 48 |              ifelse(!is.empty(object@prin_coords_rows) &&
 49 |                     !is.empty(object@prin_coords_cols),
 50 |                     ", ",
 51 |                     ""),
 52 |              "prin_coords_cols"[!is.empty(object@prin_coords_cols)]))
 53 | 
 54 | 
 55 |   cat("\nCalc. APL coord.:      ",
 56 |       paste0("apl_rows"[!is.empty(object@apl_rows)],
 57 |             ifelse(!is.empty(object@apl_rows) && !is.empty(object@apl_cols),
 58 |                    ", ",
 59 |                    ""),
 60 |             "apl_cols"[!is.empty(object@apl_cols)]))
 61 | 
 62 |   if (!is.empty(object@D)){
 63 |     prinInertia <- object@D^2
 64 |     percentInertia <- prinInertia / sum(prinInertia) * 100
 65 |     cat("\nExplained inertia:     ",
 66 |         paste0(round(percentInertia[1], 1),
 67 |                "% Dim1, ",
 68 |                round(percentInertia[2], 1),
 69 |                "% Dim2\n"))
 70 |   }
 71 | 
 72 | }
 73 | 
 74 | #' @rdname show.cacomp
 75 | #' @export
 76 | setMethod(f = "show", signature(object = "cacomp"), function(object) {
 77 |   show.cacomp(object)
 78 | })
 79 | 
 80 | #' Convert cacomp object to list.
 81 | #' @param x A cacomp object.
 82 | #' @return A cacomp object.
 83 | #' @export
 84 | #' @examples
 85 | #'
 86 | #' # Simulate counts
 87 | #' cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
 88 | #'                x = sample(1:100, 50, replace = TRUE))
 89 | #' rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
 90 | #' colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
 91 | #'
 92 | #' # Run correspondence analysis
 93 | #' ca <- cacomp(obj = cnts, princ_coords = 3)
 94 | #' ca_list <- as.list(ca)
 95 | setMethod("as.list",signature(x="cacomp"),function(x) {
 96 |   mapply(function(y) {
 97 | 
 98 |     if (inherits(slot(x,y),"cacomp")) {
 99 |       as.list(slot(x,y))
100 |     } else {
101 |       slot(x,y)
102 |     }
103 |   },
104 |   slotNames(class(x)),
105 |   SIMPLIFY=FALSE)
106 | })
107 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | <!-- badges: start -->
 3 | [![Bioc release status](http://www.bioconductor.org/shields/build/release/bioc/APL.svg)](https://bioconductor.org/checkResults/release/bioc-LATEST/APL)
 4 | [![Bioc devel status](http://www.bioconductor.org/shields/build/devel/bioc/APL.svg)](https://bioconductor.org/checkResults/devel/bioc-LATEST/APL)
 5 | [![Bioc downloads rank](https://bioconductor.org/shields/downloads/release/APL.svg)](http://bioconductor.org/packages/stats/bioc/APL/)
 6 | [![Bioc support](https://bioconductor.org/shields/posts/APL.svg)](https://support.bioconductor.org/tag/APL)
 7 | [![Bioc history](https://bioconductor.org/shields/years-in-bioc/APL.svg)](https://bioconductor.org/packages/release/bioc/html/APL.html#since)
 8 | [![Bioc last commit](https://bioconductor.org/shields/lastcommit/devel/bioc/APL.svg)](http://bioconductor.org/checkResults/devel/bioc-LATEST/APL/)
 9 | [![Bioc dependencies](https://bioconductor.org/shields/dependencies/release/APL.svg)](https://bioconductor.org/packages/release/bioc/html/APL.html#since)
10 | <!-- badges: end -->
11 | 
12 | <img src="man/figures/fig_AP.png" width="700">
13 | 
14 | # APL
15 | 
16 | `APL` is a package developed for computation of Association Plots, a method for visualization and analysis of single cell transcriptomics data. The main focus of `APL` is the identification of genes characteristic for individual clusters of cells from input data. 
17 | 
18 | When working with `APL` package please cite:
19 | ```
20 | Gralinska, E., Kohl, C., Fadakar, B. S., & Vingron, M. (2022). 
21 | Visualizing Cluster-specific Genes from Single-cell Transcriptomics Data Using Association Plots. 
22 | Journal of Molecular Biology, 434(11), 167525.
23 | ```
24 | 
25 | ## Installation
26 | 
27 | The `APL` can be installed from GitHub:
28 |     
29 |     library(devtools)
30 |     install_github("VingronLab/APL")
31 |     
32 | 
33 | To additionally build the package vignette, run instead:
34 | 
35 |     install_github("VingronLab/APL", build_vignettes = TRUE, dependencies = TRUE)
36 | 
37 | 
38 | Building the vignette will however take considerable time.
39 | 
40 | **The vignette can also be found under the link: https://vingronlab.github.io/APL/ (hyperlink in the GitHub repository description).**
41 | 
42 | To install the `APL` from Bioconductor, run:
43 | 
44 |     if (!requireNamespace("BiocManager", quietly = TRUE))
45 |         install.packages("BiocManager")
46 |     
47 |     BiocManager::install("APL")
48 | 
49 |  
50 | ## Pytorch installation
51 | 
52 | In order to speed up the singular value decomposition, we highly recommend the installation of `pytorch`.
53 | Users can instead also opt to use the slower R native SVD. For this, please set the argument `python = FALSE` wherever applicable in the package vignette.
54 | 
55 | ### Install pytorch with reticulate
56 | 
57 |     library(reticulate)
58 |     install_miniconda() 
59 |     conda_install(envname = "r-reticulate", packages = "numpy")
60 |     conda_install(envname = "r-reticulate", packages = "pytorch")
61 | 
62 | ### Manually install pytorch with conda
63 | 
64 | Download the appropriate Miniconda installer for your system from [the conda website](https://docs.conda.io/en/latest/miniconda.html). 
65 | Follow the installation instructions on their website and make sure the R package `reticulate` is also installed before proceeding.
66 | Once installed, list all available conda environments via <br>
67 | `conda info --envs` <br>
68 | One of the environments should have `r-reticulate` in its name. Depending on where
69 | you installed it and your system, the exact path might be different.
70 | Activate the environment and install pytorch into it.
71 | 
72 |     conda activate ~/.local/share/r-miniconda/envs/r-reticulate # change path accordingly.
73 |     conda install numpy
74 |     conda install pytorch
75 | 
76 | 
77 | ## Feature overview
78 | 
79 | Please run 
80 |     
81 |     vignette("APL")
82 | 
83 | after installation with `build_vignettes = TRUE` for an introduction into the package.
84 | 


--------------------------------------------------------------------------------
/man/as.cacomp.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/convert.R
  3 | \name{as.cacomp}
  4 | \alias{as.cacomp}
  5 | \alias{as.cacomp,cacomp-method}
  6 | \alias{as.cacomp,list-method}
  7 | \alias{as.cacomp,Seurat-method}
  8 | \alias{as.cacomp,SingleCellExperiment-method}
  9 | \title{Create cacomp object from Seurat/SingleCellExperiment container}
 10 | \usage{
 11 | as.cacomp(obj, ...)
 12 | 
 13 | \S4method{as.cacomp}{cacomp}(obj, ...)
 14 | 
 15 | \S4method{as.cacomp}{list}(obj, ..., mat = NULL)
 16 | 
 17 | \S4method{as.cacomp}{Seurat}(obj, ..., assay = "RNA", slot = "counts")
 18 | 
 19 | \S4method{as.cacomp}{SingleCellExperiment}(obj, ..., assay = "counts")
 20 | }
 21 | \arguments{
 22 | \item{obj}{An object of class "Seurat" or "SingleCellExperiment"
 23 | with a dim. reduction named "CA" saved. For obj "cacomp" input is returned.}
 24 | 
 25 | \item{...}{Further arguments.}
 26 | 
 27 | \item{mat}{Original input matrix.}
 28 | 
 29 | \item{assay}{Character. The assay from which extract the count matrix,
 30 | e.g. "RNA" for Seurat objects or "counts"/"logcounts" for
 31 | SingleCellExperiments.}
 32 | 
 33 | \item{slot}{character. Slot of the Seurat assay to use. Default "counts".}
 34 | }
 35 | \value{
 36 | A cacomp object.
 37 | }
 38 | \description{
 39 | Converts the values stored in the Seurat/SingleCellExperiment dimensional
 40 | reduction slot "CA" to a cacomp object.
 41 | If recompute = TRUE additional parameters are recomputed from the saved
 42 | values without rerunning SVD (need to specify assay to work).
 43 | 
 44 | as.cacomp.cacomp returns input without any calculations.
 45 | 
 46 | Recomputes missing values and returns cacomp object from a list.
 47 | If you have a *complete* cacomp object in list form,
 48 | use do.call(new_cacomp, obj).
 49 | 
 50 | as.cacomp.Seurat: Converts the values stored in the Seurat DimReduc slot
 51 | "CA" to an cacomp object.
 52 | 
 53 | as.cacomp.SingleCellExperiment: Converts the values stored in the
 54 | SingleCellExperiment reducedDim slot "CA" to a cacomp object.
 55 | }
 56 | \details{
 57 | By default extracts std_coords_cols, D, prin_coords_rows, top_rows and dims
 58 | from obj and outputs a cacomp object.
 59 | If recompute = TRUE the following are additionally recalculated
 60 | (doesn't run SVD):
 61 | U, V, std_coords_rows, row_masses, col_masses.
 62 | }
 63 | \examples{
 64 | #########
 65 | # lists #
 66 | #########
 67 | 
 68 | # Simulate counts
 69 | cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
 70 |                x = sample(1:100, 50, replace = TRUE))
 71 | rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
 72 | colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
 73 | 
 74 | # Run correspondence analysis
 75 | ca <- cacomp(obj = cnts, princ_coords = 3)
 76 | ca_list <- as.list(ca)
 77 | 
 78 | # Only keep subset of elements for demonstration
 79 | ca_list <- ca_list[c("U", "std_coords_rows", "std_coords_cols", "params")]
 80 | 
 81 | # convert (incomplete) list to cacomp object.
 82 | ca <- as.cacomp(ca_list, mat = cnts)
 83 | 
 84 | ##########
 85 | # Seurat #
 86 | ##########
 87 | library(SeuratObject)
 88 | set.seed(1234)
 89 | 
 90 | # Simulate counts
 91 | cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
 92 |                x = sample(1:100, 50, replace = TRUE))
 93 | rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
 94 | colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
 95 | 
 96 | seu <- CreateSeuratObject(counts = cnts)
 97 | seu <- cacomp(seu, return_input = TRUE)
 98 | 
 99 | ca <- as.cacomp(seu, assay = "RNA", slot = "counts")
100 | 
101 | ########################
102 | # SingleCellExperiment #
103 | ########################
104 | library(SingleCellExperiment)
105 | set.seed(1234)
106 | 
107 | # Simulate counts
108 | cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
109 |                x = sample(1:100, 50, replace = TRUE))
110 | rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
111 | colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
112 | 
113 | sce <- SingleCellExperiment(assays=list(counts=cnts))
114 | sce <- cacomp(sce, return_input = TRUE)
115 | 
116 | ca <- as.cacomp(sce, assay = "counts")
117 | }
118 | 


--------------------------------------------------------------------------------
/man/ca_biplot.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/plot.R
  3 | \name{ca_biplot}
  4 | \alias{ca_biplot}
  5 | \alias{ca_biplot,cacomp-method}
  6 | \alias{ca_biplot,Seurat-method}
  7 | \alias{ca_biplot,SingleCellExperiment-method}
  8 | \title{Plot of 2D CA projection of the data.}
  9 | \usage{
 10 | ca_biplot(
 11 |   obj,
 12 |   xdim = 1,
 13 |   ydim = 2,
 14 |   princ_coords = 1,
 15 |   row_labels = NULL,
 16 |   col_labels = NULL,
 17 |   type = "ggplot",
 18 |   col_metadata = NULL,
 19 |   row_metadata = NULL,
 20 |   show_all = TRUE,
 21 |   ...
 22 | )
 23 | 
 24 | \S4method{ca_biplot}{cacomp}(
 25 |   obj,
 26 |   xdim = 1,
 27 |   ydim = 2,
 28 |   princ_coords = 1,
 29 |   row_labels = NULL,
 30 |   col_labels = NULL,
 31 |   type = "ggplot",
 32 |   col_metadata = NULL,
 33 |   row_metadata = NULL,
 34 |   show_all = TRUE,
 35 |   ...
 36 | )
 37 | 
 38 | \S4method{ca_biplot}{Seurat}(
 39 |   obj,
 40 |   xdim = 1,
 41 |   ydim = 2,
 42 |   princ_coords = 1,
 43 |   row_labels = NULL,
 44 |   col_labels = NULL,
 45 |   type = "ggplot",
 46 |   col_metadata = NULL,
 47 |   row_metadata = NULL,
 48 |   show_all = TRUE,
 49 |   ...,
 50 |   assay = SeuratObject::DefaultAssay(obj),
 51 |   slot = "counts"
 52 | )
 53 | 
 54 | \S4method{ca_biplot}{SingleCellExperiment}(
 55 |   obj,
 56 |   xdim = 1,
 57 |   ydim = 2,
 58 |   princ_coords = 1,
 59 |   row_labels = NULL,
 60 |   col_labels = NULL,
 61 |   type = "ggplot",
 62 |   col_metadata = NULL,
 63 |   row_metadata = NULL,
 64 |   show_all = TRUE,
 65 |   ...,
 66 |   assay = "counts"
 67 | )
 68 | }
 69 | \arguments{
 70 | \item{obj}{An object of class "cacomp" with the relevant standardized and
 71 | principal coordinates calculated,
 72 |  or alternatively an object of class "Seurat" or "SingleCellExperiment"
 73 |  with a dim. reduction named "CA" saved.}
 74 | 
 75 | \item{xdim}{Integer. The dimension for the x-axis. Default 1.}
 76 | 
 77 | \item{ydim}{Integer. The dimension for the y-axis. Default 2.}
 78 | 
 79 | \item{princ_coords}{Integer. If 1 then principal coordinates are used for
 80 | the rows,
 81 | if 2 for the columns. Default 1 (rows).}
 82 | 
 83 | \item{row_labels}{Numeric vector. Indices for the rows for which a label
 84 | should be added
 85 | (label should be stored in rownames). Default NULL.}
 86 | 
 87 | \item{col_labels}{Numeric vector. Indices for the columns for which a label
 88 | should be added
 89 | (label should be stored in colnames).
 90 | Default NULL (no columns).}
 91 | 
 92 | \item{type}{String. Type of plot to draw. Either "ggplot" or "plotly".
 93 | Default "ggplot".}
 94 | 
 95 | \item{col_metadata}{named vector of additional metadata to color points.
 96 | The names of the elements in col_metadata should correspond to the column
 97 | names in 'obj'. If NULL columns will be in a single color. Can also specify
 98 | a metadata column for Seurat/SingleCellExperiment objects.}
 99 | 
100 | \item{row_metadata}{named vector of additional metadata to color points.
101 | The names of the elements in row_metadata should correspond to the row
102 | names in 'obj'. If NULL rows will be in a single color. Can also specify
103 | a metadata column for Seurat/SingleCellExperiment objects.}
104 | 
105 | \item{show_all}{logical. If FALSE cells/genes that are not in col_metadata/
106 | row_metadata are not plotted. If *_metadata is NULL, the cell or genes
107 | respectively will still be plotted.}
108 | 
109 | \item{...}{Further arguments.}
110 | 
111 | \item{assay}{SingleCellExperiment assay for recomputation}
112 | 
113 | \item{slot}{Seurat assay slot from which to get matrix.}
114 | }
115 | \value{
116 | Plot of class "plotly" or "ggplot".
117 | }
118 | \description{
119 | Plots the first 2 dimensions of the rows and columns in the same plot.
120 | }
121 | \details{
122 | Choosing type "plotly" will generate an interactive html plot with the
123 | package plotly.
124 | Type "ggplot" generates a static plot.
125 | Depending on whether `princ_coords` is set to 1 or 2 either
126 | the principal coordinates of either the rows (1) or the columns (2)
127 | are chosen. For the other the standard coordinates are plotted
128 | (assymetric biplot).
129 | Labels for rows and columns should be stored in the row and column names
130 | respectively.
131 | }
132 | \examples{
133 | # Simulate counts
134 | cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
135 |                x = sample(1:100, 50, replace = TRUE))
136 | rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
137 | colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
138 | 
139 | # Run correspondence analysis
140 | ca <- cacomp(obj = cnts, princ_coords = 3)
141 | 
142 | ca_biplot(ca)
143 | }
144 | 


--------------------------------------------------------------------------------
/man/pick_dims.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/CA.R
  3 | \name{pick_dims}
  4 | \alias{pick_dims}
  5 | \alias{pick_dims,cacomp-method}
  6 | \alias{pick_dims,Seurat-method}
  7 | \alias{pick_dims,SingleCellExperiment-method}
  8 | \title{Compute statistics to help choose the number of dimensions}
  9 | \usage{
 10 | pick_dims(
 11 |   obj,
 12 |   mat = NULL,
 13 |   method = "scree_plot",
 14 |   reps = 3,
 15 |   python = FALSE,
 16 |   return_plot = FALSE,
 17 |   ...
 18 | )
 19 | 
 20 | \S4method{pick_dims}{cacomp}(
 21 |   obj,
 22 |   mat = NULL,
 23 |   method = "scree_plot",
 24 |   reps = 3,
 25 |   python = FALSE,
 26 |   return_plot = FALSE,
 27 |   ...
 28 | )
 29 | 
 30 | \S4method{pick_dims}{Seurat}(
 31 |   obj,
 32 |   mat = NULL,
 33 |   method = "scree_plot",
 34 |   reps = 3,
 35 |   python = FALSE,
 36 |   return_plot = FALSE,
 37 |   ...,
 38 |   assay = SeuratObject::DefaultAssay(obj),
 39 |   slot = "counts"
 40 | )
 41 | 
 42 | \S4method{pick_dims}{SingleCellExperiment}(
 43 |   obj,
 44 |   mat = NULL,
 45 |   method = "scree_plot",
 46 |   reps = 3,
 47 |   python = FALSE,
 48 |   return_plot = FALSE,
 49 |   ...,
 50 |   assay = "counts"
 51 | )
 52 | }
 53 | \arguments{
 54 | \item{obj}{A "cacomp" object as outputted from \code{cacomp()},
 55 | a "Seurat" object with a "CA" DimReduc object stored,
 56 | or a "SingleCellExperiment" object with a "CA" dim. reduction stored.}
 57 | 
 58 | \item{mat}{A numeric matrix. For sequencing a count matrix, gene expression
 59 | values with genes in rows and samples/cells in columns.
 60 | Should contain row and column names.}
 61 | 
 62 | \item{method}{String. Either "scree_plot", "avg_inertia", "maj_inertia" or
 63 | "elbow_rule" (see Details section). Default "scree_plot".}
 64 | 
 65 | \item{reps}{Integer. Number of permutations to perform when choosing
 66 | "elbow_rule". Default 3.}
 67 | 
 68 | \item{python}{DEPRACTED. A logical value indicating whether to use singular value
 69 | decomposition from the python package torch.
 70 | This implementation dramatically speeds up computation compared to \code{svd()}
 71 | in R.}
 72 | 
 73 | \item{return_plot}{TRUE/FALSE. Whether a plot should be returned when
 74 | choosing "elbow_rule". Default FALSE.}
 75 | 
 76 | \item{...}{Arguments forwarded to methods.}
 77 | 
 78 | \item{assay}{Character. The assay from which to extract the count matrix
 79 | for SVD, e.g. "RNA" for Seurat objects or "counts"/"logcounts" for
 80 | SingleCellExperiments.}
 81 | 
 82 | \item{slot}{Character. Data slot of the Seurat assay.
 83 | E.g. "data" or "counts". Default "counts".}
 84 | }
 85 | \value{
 86 | For \code{avg_inertia}, \code{maj_inertia} and \code{elbow_rule} (when \code{return_plot=FALSE})
 87 | returns an integer, indicating the suggested number of dimensions to use.
 88 | \itemize{
 89 | \item \code{scree_plot} returns a ggplot object.
 90 | \item \code{elbow_rule} (for \code{return_plot=TRUE}) returns a list with two elements:
 91 | "dims" contains the number of dimensions and "plot" a ggplot.
 92 | }
 93 | }
 94 | \description{
 95 | Allow the user to choose from 4 different methods ("avg_inertia",
 96 | "maj_inertia", "scree_plot" and "elbow_rule")
 97 | to estimate the number of dimensions that best represent the data.
 98 | }
 99 | \details{
100 | \itemize{
101 | \item "avg_inertia" calculates the number of dimensions in which the inertia is
102 | above the average inertia.
103 | \item "maj_inertia" calculates the number of dimensions in which cumulatively
104 | explain up to 80\% of the total inertia.
105 | \item "scree_plot" plots a scree plot.
106 | \item "elbow_rule" formalization of the commonly used elbow rule. Permutes the
107 | rows for each column and reruns \code{cacomp()} for a total of \code{reps} times.
108 | The number of relevant dimensions is obtained from the point where the
109 | line for the explained inertia of the permuted data intersects with the
110 | actual data.
111 | }
112 | }
113 | \examples{
114 | # Simulate counts
115 | cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
116 |                x = sample(1:20, 50, replace = TRUE))
117 | rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
118 | colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
119 | 
120 | # Run correspondence analysis.
121 | ca <- cacomp(obj = cnts)
122 | 
123 | # pick dimensions with the elbow rule. Returns list.
124 | 
125 | set.seed(2358)
126 | pd <- pick_dims(obj = ca,
127 |                 mat = cnts,
128 |                 method = "elbow_rule",
129 |                 return_plot = TRUE,
130 |                 reps = 10)
131 | pd$plot
132 | ca_sub <- subset_dims(ca, dims = pd$dims)
133 | 
134 | # pick dimensions which explain cumulatively >80\% of total inertia.
135 | # Returns vector.
136 | pd <- pick_dims(obj = ca,
137 |                 method = "maj_inertia")
138 | ca_sub <- subset_dims(ca, dims = pd)
139 | 
140 | ################################
141 | # pick_dims for Seurat objects #
142 | ################################
143 | library(SeuratObject)
144 | set.seed(1234)
145 | 
146 | # Simulate counts
147 | cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
148 |                x = sample(1:20, 50, replace = TRUE))
149 | rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
150 | colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
151 | 
152 | # Create Seurat object
153 | seu <- CreateSeuratObject(counts = cnts)
154 | 
155 | # run CA and save in dim. reduction slot.
156 | seu <- cacomp(seu, return_input = TRUE, assay = "RNA", slot = "counts")
157 | 
158 | # pick dimensions
159 | pd <- pick_dims(obj = seu,
160 |                 method = "maj_inertia",
161 |                 assay = "RNA",
162 |                 slot = "counts")
163 | 
164 | ##############################################
165 | # pick_dims for SingleCellExperiment objects #
166 | ##############################################
167 | library(SingleCellExperiment)
168 | set.seed(1234)
169 | 
170 | # Simulate counts
171 | cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
172 |                x = sample(1:20, 50, replace = TRUE))
173 | rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
174 | colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
175 | 
176 | # Create SingleCellExperiment object
177 | sce <- SingleCellExperiment(assays=list(counts=cnts))
178 | 
179 | # run CA and save in dim. reduction slot.
180 | sce <- cacomp(sce, return_input = TRUE, assay = "counts")
181 | 
182 | # pick dimensions
183 | pd <- pick_dims(obj = sce,
184 |                 method = "maj_inertia",
185 |                 assay = "counts")
186 | }
187 | 


--------------------------------------------------------------------------------
/man/cacomp.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/CA.R
  3 | \name{cacomp}
  4 | \alias{cacomp}
  5 | \alias{cacomp,matrix-method}
  6 | \alias{cacomp,dgCMatrix-method}
  7 | \alias{cacomp,Seurat-method}
  8 | \alias{cacomp,SingleCellExperiment-method}
  9 | \title{Correspondance Analysis}
 10 | \usage{
 11 | cacomp(
 12 |   obj,
 13 |   coords = TRUE,
 14 |   princ_coords = 3,
 15 |   python = FALSE,
 16 |   dims = NULL,
 17 |   top = 5000,
 18 |   inertia = TRUE,
 19 |   rm_zeros = TRUE,
 20 |   residuals = "pearson",
 21 |   cutoff = NULL,
 22 |   clip = FALSE,
 23 |   ...
 24 | )
 25 | 
 26 | \S4method{cacomp}{matrix}(
 27 |   obj,
 28 |   coords = TRUE,
 29 |   princ_coords = 3,
 30 |   python = FALSE,
 31 |   dims = NULL,
 32 |   top = 5000,
 33 |   inertia = TRUE,
 34 |   rm_zeros = TRUE,
 35 |   residuals = "pearson",
 36 |   cutoff = NULL,
 37 |   clip = FALSE,
 38 |   ...
 39 | )
 40 | 
 41 | \S4method{cacomp}{dgCMatrix}(
 42 |   obj,
 43 |   coords = TRUE,
 44 |   princ_coords = 3,
 45 |   python = FALSE,
 46 |   dims = NULL,
 47 |   top = 5000,
 48 |   inertia = TRUE,
 49 |   rm_zeros = TRUE,
 50 |   residuals = "pearson",
 51 |   cutoff = NULL,
 52 |   clip = FALSE,
 53 |   ...
 54 | )
 55 | 
 56 | \S4method{cacomp}{Seurat}(
 57 |   obj,
 58 |   coords = TRUE,
 59 |   princ_coords = 3,
 60 |   python = FALSE,
 61 |   dims = NULL,
 62 |   top = 5000,
 63 |   inertia = TRUE,
 64 |   rm_zeros = TRUE,
 65 |   residuals = "pearson",
 66 |   cutoff = NULL,
 67 |   clip = FALSE,
 68 |   ...,
 69 |   assay = SeuratObject::DefaultAssay(obj),
 70 |   slot = "counts",
 71 |   return_input = FALSE
 72 | )
 73 | 
 74 | \S4method{cacomp}{SingleCellExperiment}(
 75 |   obj,
 76 |   coords = TRUE,
 77 |   princ_coords = 3,
 78 |   python = FALSE,
 79 |   dims = NULL,
 80 |   top = 5000,
 81 |   inertia = TRUE,
 82 |   rm_zeros = TRUE,
 83 |   residuals = "pearson",
 84 |   cutoff = NULL,
 85 |   clip = FALSE,
 86 |   ...,
 87 |   assay = "counts",
 88 |   return_input = FALSE
 89 | )
 90 | }
 91 | \arguments{
 92 | \item{obj}{A numeric matrix or Seurat/SingleCellExperiment object.
 93 | For sequencing a count matrix, gene expression values with genes in rows
 94 | and samples/cells in columns.
 95 | Should contain row and column names.}
 96 | 
 97 | \item{coords}{Logical. Indicates whether CA standard coordinates should be
 98 | calculated.}
 99 | 
100 | \item{princ_coords}{Integer. Number indicating whether principal
101 | coordinates should be calculated for the rows (=1), columns (=2),
102 | both (=3) or none (=0).}
103 | 
104 | \item{python}{DEPRACTED. A logical value indicating whether to use singular-value
105 | decomposition from the python package torch.
106 | This implementation dramatically speeds up computation compared to `svd()`
107 | in R when calculating the full SVD. This parameter only works when dims==NULL
108 | or dims==rank(mat), where caculating a full SVD is demanded.}
109 | 
110 | \item{dims}{Integer. Number of CA dimensions to retain. If NULL:
111 | (0.2 * min(nrow(A), ncol(A)) - 1 ).}
112 | 
113 | \item{top}{Integer. Number of most variable rows to retain.
114 | Set NULL to keep all.}
115 | 
116 | \item{inertia}{Logical. Whether total, row and column inertias should be
117 | calculated and returned.}
118 | 
119 | \item{rm_zeros}{Logical. Whether rows & cols containing only 0s should be
120 | removed. Keeping zero only rows/cols might lead to unexpected results.}
121 | 
122 | \item{residuals}{character string. Specifies which kind of residuals should
123 | be calculated. Can be "pearson" (default), "freemantukey" or "NB" for
124 | negative-binomial.}
125 | 
126 | \item{cutoff}{numeric. Residuals that are larger than cutoff or lower than
127 | -cutoff are clipped to cutoff.}
128 | 
129 | \item{clip}{logical. Whether residuals should be clipped if they are
130 | higher/lower than a specified cutoff}
131 | 
132 | \item{...}{Other parameters}
133 | 
134 | \item{assay}{Character. The assay from which extract the count matrix for
135 | SVD, e.g. "RNA" for Seurat objects or "counts"/"logcounts" for
136 | SingleCellExperiments.}
137 | 
138 | \item{slot}{character. The slot of the Seurat assay. Default "counts".}
139 | 
140 | \item{return_input}{Logical. If TRUE returns the input
141 | (SingleCellExperiment/Seurat object) with the CA results saved in the
142 | reducedDim/DimReduc slot "CA".
143 |  Otherwise returns a "cacomp". Default FALSE.}
144 | }
145 | \value{
146 | Returns a named list of class "cacomp" with components
147 | U, V and D: The results from the SVD.
148 | row_masses and col_masses: Row and columns masses.
149 | top_rows: How many of the most variable rows were retained for the analysis.
150 | tot_inertia, row_inertia and col_inertia: Only if inertia = TRUE.
151 | Total, row and column inertia respectively.
152 | 
153 | If return_imput = TRUE with Seurat container: Returns input obj of class
154 | "Seurat" with a new Dimensional Reduction Object named "CA".
155 | Standard coordinates of the cells are saved as embeddings,
156 | the principal coordinates of the genes as loadings and
157 | the singular values (= square root of principal intertias/eigenvalues)
158 | are stored as stdev.
159 | To recompute a regular "cacomp" object without rerunning cacomp use
160 | `as.cacomp()`.
161 | 
162 | If return_input =TRUE for SingleCellExperiment input returns a
163 | SingleCellExperiment object with a matrix of standardized coordinates of
164 | the columns in
165 | reducedDim(obj, "CA"). Additionally, the matrix contains the following
166 | attributes:
167 | "prin_coords_rows": Principal coordinates of the rows.
168 | "singval": Singular values. For the explained inertia of each principal
169 | axis calculate singval^2.
170 | "percInertia": Percent explained inertia of each principal axis.
171 | To recompute a regular "cacomp" object from a SingleCellExperiment without
172 | rerunning cacomp use `as.cacomp()`.
173 | }
174 | \description{
175 | `cacomp` performs correspondence analysis on a matrix or
176 | Seurat/SingleCellExperiment object and returns the transformed data.
177 | 
178 | `cacomp.seurat` performs correspondence analysis on an assay from a Seurat
179 | container and stores the standardized coordinates of the columns (= cells)
180 | and the principal coordinates of the rows (= genes) as a DimReduc Object in
181 | the Seurat container.
182 | 
183 | `cacomp.SingleCellExperiment` performs correspondence analysis on an assay
184 | from a SingleCellExperiment and stores the standardized coordinates
185 |  of the columns (= cells) and the principal coordinates of the rows
186 |  (= genes) as a matrix in the SingleCellExperiment container.
187 | }
188 | \details{
189 | The calculation is performed according to the work of Michael Greenacre.
190 | Singular value decomposition can be performed either with the base R
191 | function `svd` or preferably by the faster pytorch implementation
192 | (python = TRUE). When working with large matrices, CA coordinates and
193 | principal coordinates should only be computed when needed to save
194 | computational time.
195 | }
196 | \examples{
197 | # Simulate scRNAseq data.
198 | cnts <- data.frame(cell_1 = rpois(10, 5),
199 |                    cell_2 = rpois(10, 10),
200 |                    cell_3 = rpois(10, 20))
201 | rownames(cnts) <- paste0("gene_", 1:10)
202 | cnts <- as.matrix(cnts)
203 | 
204 | # Run correspondence analysis.
205 | ca <- cacomp(obj = cnts, princ_coords = 3, top = 5)
206 | 
207 | ###########
208 | # Seurat  #
209 | ###########
210 | library(SeuratObject)
211 | set.seed(1234)
212 | 
213 | # Simulate counts
214 | cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
215 |                      x = sample(1:20, 50, replace = TRUE))
216 | rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
217 | colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
218 | 
219 | # Create Seurat object
220 | seu <- CreateSeuratObject(counts = cnts)
221 | 
222 | # Run CA and save in dim. reduction slot
223 | seu <- cacomp(seu, return_input = TRUE, assay = "RNA", slot = "counts")
224 | 
225 | # Run CA and return cacomp object
226 | ca <- cacomp(seu, return_input = FALSE, assay = "RNA", slot = "counts")
227 | 
228 | ########################
229 | # SingleCellExperiment #
230 | ########################
231 | library(SingleCellExperiment)
232 | set.seed(1234)
233 | 
234 | # Simulate counts
235 | cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
236 |                x = sample(1:20, 50, replace = TRUE))
237 | rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
238 | colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
239 | logcnts <- log2(cnts + 1)
240 | 
241 | # Create SingleCellExperiment object
242 | sce <- SingleCellExperiment(assays=list(counts=cnts, logcounts=logcnts))
243 | 
244 | # run CA and save in dim. reduction slot.
245 | sce <- cacomp(sce, return_input = TRUE, assay = "counts") # on counts
246 | sce <- cacomp(sce, return_input = TRUE, assay = "logcounts") # on logcounts
247 | 
248 | # run CA and return cacomp object.
249 | ca <- cacomp(sce, return_input = FALSE, assay = "counts")
250 | }
251 | \references{
252 | Greenacre, M. Correspondence Analysis in Practice, Third Edition, 2017.
253 | }
254 | 


--------------------------------------------------------------------------------
/man/runAPL.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/apl.R
  3 | \name{run_APL}
  4 | \alias{run_APL}
  5 | \alias{runAPL}
  6 | \alias{runAPL,matrix-method}
  7 | \alias{runAPL,SingleCellExperiment-method}
  8 | \alias{runAPL,Seurat-method}
  9 | \alias{runAPL,dgCMatrix-method}
 10 | \title{Compute and plot Association Plot}
 11 | \usage{
 12 | run_APL(
 13 |   obj,
 14 |   group,
 15 |   caobj = NULL,
 16 |   dims = NULL,
 17 |   nrow = 10,
 18 |   top = 5000,
 19 |   clip = FALSE,
 20 |   score = TRUE,
 21 |   score_method = "permutation",
 22 |   mark_rows = NULL,
 23 |   mark_cols = NULL,
 24 |   reps = 3,
 25 |   python = FALSE,
 26 |   row_labs = TRUE,
 27 |   col_labs = TRUE,
 28 |   type = "plotly",
 29 |   show_cols = FALSE,
 30 |   show_rows = TRUE,
 31 |   score_cutoff = 0,
 32 |   score_color = "rainbow",
 33 |   pd_method = "elbow_rule",
 34 |   pd_reps = 1,
 35 |   pd_use = TRUE
 36 | )
 37 | 
 38 | runAPL(
 39 |   obj,
 40 |   group,
 41 |   caobj = NULL,
 42 |   dims = NULL,
 43 |   nrow = 10,
 44 |   top = 5000,
 45 |   clip = FALSE,
 46 |   score = TRUE,
 47 |   score_method = "permutation",
 48 |   mark_rows = NULL,
 49 |   mark_cols = caobj@group,
 50 |   reps = 3,
 51 |   python = FALSE,
 52 |   row_labs = TRUE,
 53 |   col_labs = TRUE,
 54 |   type = "plotly",
 55 |   show_cols = FALSE,
 56 |   show_rows = TRUE,
 57 |   score_cutoff = 0,
 58 |   score_color = "rainbow",
 59 |   pd_method = "elbow_rule",
 60 |   pd_reps = 1,
 61 |   pd_use = TRUE,
 62 |   ...
 63 | )
 64 | 
 65 | \S4method{runAPL}{matrix}(
 66 |   obj,
 67 |   group,
 68 |   caobj = NULL,
 69 |   dims = NULL,
 70 |   nrow = 10,
 71 |   top = 5000,
 72 |   clip = FALSE,
 73 |   score = TRUE,
 74 |   score_method = "permutation",
 75 |   mark_rows = NULL,
 76 |   mark_cols = NULL,
 77 |   reps = 3,
 78 |   python = FALSE,
 79 |   row_labs = TRUE,
 80 |   col_labs = TRUE,
 81 |   type = "plotly",
 82 |   show_cols = FALSE,
 83 |   show_rows = TRUE,
 84 |   score_cutoff = 0,
 85 |   score_color = "rainbow",
 86 |   pd_method = "elbow_rule",
 87 |   pd_reps = 1,
 88 |   pd_use = TRUE,
 89 |   ...
 90 | )
 91 | 
 92 | \S4method{runAPL}{SingleCellExperiment}(
 93 |   obj,
 94 |   group,
 95 |   caobj = NULL,
 96 |   dims = NULL,
 97 |   nrow = 10,
 98 |   top = 5000,
 99 |   clip = FALSE,
100 |   score = TRUE,
101 |   score_method = "permutation",
102 |   mark_rows = NULL,
103 |   mark_cols = NULL,
104 |   reps = 3,
105 |   python = FALSE,
106 |   row_labs = TRUE,
107 |   col_labs = TRUE,
108 |   type = "plotly",
109 |   show_cols = FALSE,
110 |   show_rows = TRUE,
111 |   score_cutoff = 0,
112 |   score_color = "rainbow",
113 |   pd_method = "elbow_rule",
114 |   pd_reps = 1,
115 |   pd_use = TRUE,
116 |   ...,
117 |   assay = "counts"
118 | )
119 | 
120 | \S4method{runAPL}{Seurat}(
121 |   obj,
122 |   group,
123 |   caobj = NULL,
124 |   dims = NULL,
125 |   nrow = 10,
126 |   top = 5000,
127 |   clip = FALSE,
128 |   score = TRUE,
129 |   score_method = "permutation",
130 |   mark_rows = NULL,
131 |   mark_cols = NULL,
132 |   reps = 3,
133 |   python = FALSE,
134 |   row_labs = TRUE,
135 |   col_labs = TRUE,
136 |   type = "plotly",
137 |   show_cols = FALSE,
138 |   show_rows = TRUE,
139 |   score_cutoff = 0,
140 |   score_color = "rainbow",
141 |   pd_method = "elbow_rule",
142 |   pd_reps = 1,
143 |   pd_use = TRUE,
144 |   ...,
145 |   assay = SeuratObject::DefaultAssay(obj),
146 |   slot = "counts"
147 | )
148 | 
149 | \S4method{runAPL}{dgCMatrix}(
150 |   obj,
151 |   group,
152 |   caobj = NULL,
153 |   dims = NULL,
154 |   nrow = 10,
155 |   top = 5000,
156 |   clip = FALSE,
157 |   score = TRUE,
158 |   score_method = "permutation",
159 |   mark_rows = NULL,
160 |   mark_cols = NULL,
161 |   reps = 3,
162 |   python = FALSE,
163 |   row_labs = TRUE,
164 |   col_labs = TRUE,
165 |   type = "plotly",
166 |   show_cols = FALSE,
167 |   show_rows = TRUE,
168 |   score_cutoff = 0,
169 |   score_color = "rainbow",
170 |   pd_method = "elbow_rule",
171 |   pd_reps = 1,
172 |   pd_use = TRUE,
173 |   ...
174 | )
175 | }
176 | \arguments{
177 | \item{obj}{A numeric matrix. For sequencing usually a count matrix,
178 | gene expression values with genes in rows and samples/cells in columns.
179 | Should contain row and column names.}
180 | 
181 | \item{group}{Numeric/Character. Vector of indices or column names of
182 | the columns to calculate centroid/x-axis direction.}
183 | 
184 | \item{caobj}{A "cacomp" object as outputted from `cacomp()`. If not supplied
185 | will be calculated. Default NULL.}
186 | 
187 | \item{dims}{Integer. Number of CA dimensions to retain. If NULL:
188 | (0.2 * min(nrow(A), ncol(A)) - 1 ).}
189 | 
190 | \item{nrow}{Integer. The top nrow scored row labels will be added to the
191 | plot if score = TRUE. Default 10.}
192 | 
193 | \item{top}{Integer. Number of most variable rows to retain.
194 | Set NULL to keep all.}
195 | 
196 | \item{clip}{logical. Whether residuals should be clipped if they are
197 | higher/lower than a specified cutoff}
198 | 
199 | \item{score}{Logical. Whether rows should be scored and ranked. Ignored when
200 | a vector is supplied to mark_rows. Default TRUE.}
201 | 
202 | \item{score_method}{Method to calculate the cutoff. Either "random" for random
203 | direction method or "permutation" for the permutation method.}
204 | 
205 | \item{mark_rows}{Character vector. Names of rows that should be highlighted
206 | in the plot. If not NULL, score is ignored. Default NULL.}
207 | 
208 | \item{mark_cols}{Character vector. Names of cols that should be highlighted
209 | in the plot.}
210 | 
211 | \item{reps}{Integer. Number of permutations during scoring. Default 3.}
212 | 
213 | \item{python}{DEPRACTED. A logical value indicating whether to use singular-value
214 | decomposition from the python package torch.
215 | This implementation dramatically speeds up computation compared to `svd()`
216 | in R when calculating the full SVD. This parameter only works when dims==NULL
217 | or dims==rank(mat), where caculating a full SVD is demanded.}
218 | 
219 | \item{row_labs}{Logical. Whether labels for rows indicated by rows_idx
220 | should be labeled with text. Default TRUE.}
221 | 
222 | \item{col_labs}{Logical. Whether labels for columns indicated by cols_idx
223 | shouls be labeled with text. Default FALSE.}
224 | 
225 | \item{type}{"ggplot"/"plotly". For a static plot a string "ggplot",
226 | for an interactive plot "plotly". Default "ggplot".}
227 | 
228 | \item{show_cols}{Logical. Whether column points should be plotted.}
229 | 
230 | \item{show_rows}{Logical. Whether row points should be plotted.}
231 | 
232 | \item{score_cutoff}{Numeric. Rows (genes) with a score >= score_cutoff will
233 | be colored according to their score if show_score = TRUE.}
234 | 
235 | \item{score_color}{Either "rainbow" or "viridis".}
236 | 
237 | \item{pd_method}{Which method to use for pick_dims (\link[APL]{pick_dims}).}
238 | 
239 | \item{pd_reps}{Number of repetitions performed when using "elbow_rule" in
240 | `pick_dims`.
241 | (\link[APL]{pick_dims})}
242 | 
243 | \item{pd_use}{Whether to use `pick_dims` (\link[APL]{pick_dims}) to determine
244 | the number of dimensions. Ignored when `dims` is set by the user.}
245 | 
246 | \item{...}{Arguments forwarded to methods.}
247 | 
248 | \item{assay}{Character. The assay from which extract the count matrix for
249 | SVD, e.g. "RNA" for Seurat objects or "counts"/"logcounts" for
250 | SingleCellExperiments.}
251 | 
252 | \item{slot}{character. The Seurat assay slot from which to extract the
253 | count matrix.}
254 | }
255 | \value{
256 | Association Plot (plotly object).
257 | }
258 | \description{
259 | Computes singular value decomposition and coordinates for
260 | the Association Plot.
261 | 
262 | runAPL.SingleCellExperiment: Computes singular value decomposition and
263 | coordinates for the Association Plot from SingleCellExperiment objects with
264 | reducedDim(obj, "CA") slot (optional).
265 | 
266 | runAPL.Seurat: Computes singular value decomposition and coordinates for
267 | the Association Plot from Seurat objects, optionally with a DimReduc Object
268 | in the "CA" slot.
269 | }
270 | \details{
271 | The function is a wrapper that calls `cacomp()`, `apl_coords()`,
272 | `apl_score()` and finally `apl()` for ease of use.
273 | The chosen defaults are most useful for genomics experiments, but for more
274 | fine grained control the functions
275 | can be also run individually for the same results.
276 | If score = FALSE, nrow and reps are ignored. If mark_rows is not NULL score
277 | is treated as if FALSE.
278 | }
279 | \examples{
280 | set.seed(1234)
281 | 
282 | # Simulate counts
283 | cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
284 |                x = sample(1:100, 50, replace = TRUE))
285 | rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
286 | colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
287 | 
288 | # (nonsensical) APL
289 | APL:::run_APL(obj = cnts,
290 |        group = 1:10,
291 |        dims = 10,
292 |        top = 500,
293 |        score = TRUE,
294 |        show_cols = TRUE,
295 |        type = "ggplot")
296 | set.seed(1234)
297 | 
298 | # Simulate counts
299 | cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
300 |                x = sample(1:100, 50, replace = TRUE))
301 | rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
302 | colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
303 | 
304 | # (nonsensical) APL
305 | runAPL(obj = cnts,
306 |        group = 1:10,
307 |        dims = 10,
308 |        top = 500,
309 |        score = TRUE,
310 |        show_cols = TRUE,
311 |        type = "ggplot")
312 | 
313 | ########################
314 | # SingleCellExperiment #
315 | ########################
316 | library(SingleCellExperiment)
317 | set.seed(1234)
318 | 
319 | # Simulate counts
320 | cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
321 |                x = sample(1:100, 50, replace = TRUE))
322 | rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
323 | colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
324 | 
325 | sce <- SingleCellExperiment(assays=list(counts=cnts))
326 | 
327 | # (nonsensical) APL
328 | runAPL(obj = sce,
329 |        group = 1:10,
330 |        dims = 10,
331 |        top = 500,
332 |        score = TRUE,
333 |        show_cols = TRUE,
334 |        type = "ggplot",
335 |        assay = "counts")
336 | 
337 | ###########
338 | # Seurat  #
339 | ###########
340 | library(SeuratObject)
341 | set.seed(1234)
342 | 
343 | # Simulate counts
344 | cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
345 |                x = sample(1:100, 50, replace = TRUE))
346 | rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
347 | colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
348 | 
349 | seu <- CreateSeuratObject(counts = cnts)
350 | 
351 | # (nonsensical) APL
352 | runAPL(obj = seu,
353 |        group = 1:10,
354 |        dims = 10,
355 |        top = 500,
356 |        score = TRUE,
357 |        show_cols = TRUE,
358 |        type = "ggplot",
359 |        assay = "RNA",
360 |        slot = "counts")
361 | set.seed(1234)
362 | 
363 | # Simulate counts
364 | cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
365 |                x = sample(seq(0.01,0.1,by=0.01), 50, replace = TRUE))
366 | rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
367 | colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
368 | cnts <- Matrix::Matrix(cnts)
369 | 
370 | # (nonsensical) APL
371 | runAPL(obj = cnts,
372 |        group = 1:10,
373 |        dims = 10,
374 |        top = 500,
375 |        score = TRUE,
376 |        show_cols = TRUE,
377 |        type = "ggplot")
378 | }
379 | \references{
380 | Association Plots: Visualizing associations in high-dimensional
381 | correspondence analysis biplots \cr
382 | Elzbieta Gralinska, Martin Vingron \cr
383 | bioRxiv 2020.10.23.352096; doi: https://doi.org/10.1101/2020.10.23.352096 \cr
384 | }
385 | 


--------------------------------------------------------------------------------
/tests/testthat/testdata/input_data.tsv:
--------------------------------------------------------------------------------
 1 | Country.Name	UEM.M.2015	UEM.F.2015	UEM.M.2010	UEM.F.2010	UEM.M.2000	AGR.M.2015	AGR.F.2015	AGR.M.2010	AGR.F.2010	AGR.M.2000	IND.M.2015	IND.F.2015	IND.M.2010	IND.F.2010	IND.M.2000	SRV.M.2015	SRV.F.2015	SRV.M.2010	SRV.F.2010	SRV.M.2000
 2 | Afghanistan	10.6840000152588	14.4270000457764	10.878999710083	14.8149995803833	10.9519996643066	37.8530144169035	58.5918341132492	45.9080105940595	62.1560903177743	56.5846601919507	14.2539401104799	18.7687265630838	12.5339775963636	13.8681186532326	8.56641753038635	37.2090454573578	8.21329616999908	30.6799036698646	9.16164689261626	23.8960300748246
 3 | Angola	6.88299989700317	7.68100023269653	7.94799995422363	10.9219999313354	4.83599996566772	40.7833840176982	53.0400341192767	39.1699653458642	49.4373997474282	28.2151745299088	14.9266557558941	1.08105549079682	13.1395026410404	1.03508630812801	14.7837272601248	37.4069621054705	38.1979088365961	39.7416102887755	38.6055150750009	52.1651000594079
 4 | Albania	17.068000793457	17.0979995727539	12.7620000839233	15.8809995651245	16.3409996032715	30.5040491882837	39.6420792220923	29.8886125356392	44.6554113386461	36.4117418774221	18.2201596562287	11.516746063801	24.1370091778299	8.9527849210664	14.9247657984299	34.207791943833	31.743176722583	33.2123782026077	30.5108025707205	32.3233288513279
 5 | Arab World	8.19390895838925	20.0161802509865	6.9908812598603	17.9412175476643	11.1299650216614	17.8642307423098	24.6449124655064	19.672914073688	28.4233914901091	24.9142486487184	25.777529922088	7.68758538616177	26.2659536398542	8.43280133428351	20.4083797382762	48.164425070158	47.6515339471448	47.0699117483611	45.2025348058708	43.5474153392269
 6 | United Arab Emirates	1.37399995326996	4.70300006866455	2.1010000705719	5.88299989700317	2.18600010871887	2.31771090704107	0.01238861024668	3.72016195063638	0.019764569755163	8.76217828936375	40.5599425192177	5.87315420755638	40.0132808644816	7.06065721129623	35.2687931646872	55.7483480313275	89.4114548308246	54.1655578145382	87.0365757151367	53.7830284372302
 7 | Argentina	6.96500015258789	8.85099983215332	6.66699981689453	9.19600009918213	14.0200004577637	0.365627555611086	0.06562727729574	1.75839367093423	0.347779307279997	0.844323589764976	31.7854085496292	7.89076856900818	30.6925555436508	8.5755299723227	27.1559227405518	60.8839658493927	83.1926073368043	60.8811191525665	81.879780130292	57.980610592186
 8 | Armenia	17.3859996795654	19.2520008087158	17.0849990844727	21.2849998474121	8.03800010681152	25.7987005495129	32.3646065908367	26.2351360596215	37.2101543676712	35.7070045046545	19.1168791699127	6.08355419438523	22.1424509312325	4.58436151580181	20.8854902026784	37.6984206010089	42.2998380210259	34.53658523013	36.9212698606949	35.3695034318197
 9 | Australia	6.03900003433228	6.07299995422363	5.07600021362305	5.3769998550415	6.45499992370605	3.10729019538592	1.73858872516368	3.80265547127396	2.16024308609024	5.57902401508923	27.5719157008897	7.19386885826237	29.5175676108789	8.14987892307449	28.4984844159813	63.2817929492897	84.9945403349258	61.6037798726544	84.3128745262128	59.4674911991661
10 | Austria	6.08500003814697	5.30800008773804	4.98400020599365	4.62900018692017	4.75899982452393	4.52200730190182	3.98558638106129	5.13371435288384	4.761874049772	5.52207305574734	35.4895407112927	11.494661811554	34.6456851937484	10.9390539332283	40.2555119165146	53.9034532921234	79.2117521711732	55.2375498854336	79.6700745586658	49.4643666358969
11 | Azerbaijan	4.08799982070923	5.86700010299683	4.40000009536743	6.92500019073486	10.956000328064	29.713537216494	39.5932818623105	30.9801342769566	41.4435055627308	32.5553774537565	20.8925119064255	5.53219626749309	19.8570758926669	5.67385216944981	12.2551252604439	45.3069096495127	49.0075226649218	44.7627897350092	45.9576416332684	44.2334961085458
12 | Burundi	2.01500010490417	1.12600004673004	2.26600003242493	1.2940000295639	2.41199994087219	85.6976800111714	95.0287901554352	85.4850005552875	95.0992833942153	85.1367445108122	2.88761797307329	0.438011808713489	3.12944258389122	0.494517047143879	3.56293790449487	9.39872119741325	3.40719813645502	9.11955985760446	3.11318728236891	8.88733950787196
13 | Belgium	9.09200000762939	7.76800012588501	8.10799980163574	8.51599979400635	5.30800008773804	1.42180112941731	0.672371253574105	1.58973162095871	0.814207588746452	2.30101566108169	29.7769163685032	7.71059487282257	31.493225719076	9.21518329887166	34.2321058130084	59.7083746724182	83.8490362765479	58.8090432965048	81.4536972711161	58.1588782124085
14 | Benin	2.39499998092651	2.77600002288818	0.935000002384186	1.14600002765656	1.11300003528595	47.5951266358878	32.3668421884252	50.655895980188	36.3584999831141	52.9193774275583	19.9416773156035	16.3141878637436	19.7050198902156	17.3538199968234	19.6221478104001	30.0691715814223	48.5429717793437	28.7040841272121	45.1426679893845	26.3454747267556
15 | Burkina Faso	4.01800012588501	9.38799953460693	3.55900001525879	6.20900011062622	2.43199992179871	34.1551937764624	18.7358433777082	53.6433784810451	35.8900629300036	82.7396173319413	30.0481246462594	29.7533551454923	18.0942595783406	22.1506209877055	4.65301797583635	31.7796407441471	42.1228002139059	24.7033619253556	35.7503159716647	10.1753666313856
16 | Bangladesh	3.16100001335144	7.46700000762939	3.0090000629425	4.44000005722046	3.25200009346008	34.6034815201925	59.2229696385192	39.7973485445393	62.675890312812	58.7560268489109	20.9695164069447	14.2167704153356	18.629062068294	12.4447785665381	10.9305893542175	41.2650342014641	19.0941847602169	38.5645930241373	20.4383732543633	27.0613837034115
17 | Bulgaria	9.77600002288818	8.41600036621094	10.8570003509521	9.60599994659424	16.5620002746582	8.19955738637818	3.96467134065307	7.32755480918923	4.70139213588005	12.8653049039204	32.6728182406129	20.8271166440149	36.3132923914448	21.7587389804456	31.3760254931073	49.3516269314502	66.7922112124146	45.50215159828	63.9329620453712	39.1966701240408
18 | Bahrain	0.404000014066696	3.74499988555908	0.442999988794327	3.7039999961853	0.649999976158142	1.34454602355564	0.049090049140406	1.34501502172946	0.048148000719368	2.00388946068692	41.9538195811717	8.85738478740921	42.4152643759212	8.70804749340221	32.3414069024451	56.2976359246658	87.3485265400852	55.7977176542525	87.5398024900401	65.0047053187919
19 | Bahamas, The	11.0880002975464	13.003999710083	14.6370000839233	15.0539999008179	5.99100017547607	3.73963857520798	0.334934592819595	4.11961855830491	0.411138638252244	6.36628931391194	22.2626752120574	2.65859776388191	23.4440948961179	3.04276573813465	25.5290841214494	62.9096829474301	84.0024662738985	57.7984333005678	81.4920941278947	62.1145646179444
20 | Bosnia and Herzegovina	25.7479991912842	30.6539993286133	25.6830005645752	29.9309997558594	23.1609992980957	12.9220756101012	12.2645342501349	13.3049723182051	15.9484049210435	17.859688956593	28.9226391057519	11.8345884639975	29.2905576493788	11.420546200109	28.3451389220266	32.4080282049924	45.2461862008465	31.7214666328724	42.700047786528	30.6334048553668
21 | Belarus	7.54400014877319	4.26200008392334	7.49100017547607	4.74100017547607	13.7880001068115	11.8639536963483	6.34934412521728	13.0927986322389	7.57785336336403	16.543219949139	38.8537077692056	19.4702369406733	39.4782149693279	20.8055177737547	33.0459227840703	41.7383375039438	69.9184170241287	39.9379862229572	66.8756305043265	36.6228571599792
22 | Belize	5.43100023269653	11.0649995803833	5.87599992752075	12.956000328064	7.23000001907349	24.4943157494942	4.06966580918732	25.4520712042915	3.3929751088723	32.2097451258239	18.6499530790336	7.1112424673151	19.2295335020512	8.06201532266494	18.4751463455362	51.4247291350152	77.7540934153397	49.4423953661366	75.5890061274627	42.085112048461
23 | Bolivia	2.56399989128113	3.79699993133545	2.15499997138977	3.07699990272522	2.32699990272522	26.8367967963018	27.2244867839196	28.8975426966287	29.6516538190697	39.243059350225	29.5728002294542	10.5688616462612	27.7214461247441	10.2670537996148	24.4514592318484	41.0264012245186	58.4096516384838	41.2260112072374	57.0042924785903	33.9784815152013
24 | Brazil	7.21299982070923	10.0550003051758	5.60599994659424	10.6359996795654	7.90399980545044	12.940075137032	4.51883693563766	15.6609081743726	6.23760723941117	19.1080773755702	27.6254742288144	10.4875868271713	27.7962016710379	11.2670129831341	25.0206406583239	52.2214499285586	74.9376786916657	50.9359449860937	71.8593822284931	47.9663599499601
25 | Barbados	12.3529996871948	10.3219995498657	10.9020004272461	10.4720001220703	7.43800020217896	3.70133286667484	1.39897675570793	3.35364864953735	1.69476504366585	4.60403384875786	25.4220117051662	8.68979786731415	25.4713355332621	7.82922348686829	27.5890294337571	58.5236578306314	79.5892253994942	60.273017089364	80.0040108137672	60.3689373980461
26 | Brunei Darussalam	7.26999998092651	8.79800033569336	5.87599992752075	7.96700000762939	4.51399993896484	0.88278962052145	0.416793126941428	0.739814653136596	0.517225438453593	1.32439078715342	21.4187750082203	9.38103791508986	23.8933776338316	9.63493464281319	28.2056091354989	70.4284321845913	81.4041722916177	69.4908102540112	81.8808413922143	65.9550429571979
27 | Bhutan	1.89600002765656	3.24499988555908	2.75699996948242	4.05399990081787	1.22200000286102	51.2946567369433	64.1505020656227	53.3552892864206	63.1852415861599	57.4966984192026	9.74761331001144	8.93629186962433	6.38011310587837	6.44853085332855	8.03460234643553	37.0617289897961	23.6682080246491	37.5066248150085	26.312230862237	33.2457129361251
28 | Botswana	14.8100004196167	20.8439998626709	14.6619997024536	21.5359992980957	14.7600002288818	22.4254149825805	13.954411034723	24.444216036958	15.4111149645212	18.8320735006111	21.7737120057491	7.88631252920792	20.934264970185	8.10533121264347	22.6900350968499	40.9908725920536	57.3144831835424	39.9586663791761	54.9475545247397	43.7178895478333
29 | Central African Republic	3.94400000572205	3.76900005340576	4.01599979400635	3.82100009918213	4.08400011062622	71.9738010565036	79.6860033622247	72.024476007845	79.9747624084491	72.8395712908062	7.24358285330538	2.58572700343874	8.32085266296868	3.06811015187016	8.81372135423733	16.8386174585611	13.9592684337679	15.6386724505547	13.1361277991158	14.2627090737827
30 | Canada	7.45900011062622	6.28499984741211	8.78899955749512	7.23299980163574	6.94199991226196	2.04423078993088	0.951207238142733	2.22463620162913	1.03064131823743	3.01880148379753	27.9075889553813	8.0135693270031	27.9798870385329	7.92601219261315	30.9808692188154	62.589182350411	84.7502214648194	61.0064795944466	83.8103474616218	59.0583298288602
31 | Central Europe and the Baltics	7.74020473088915	7.90567223682592	9.96677631874455	9.81944761452723	11.9682940640283	11.5688662629283	9.20035554902194	12.5609959883593	11.5927911055313	18.8309308218994	37.5896041916351	17.2087211486188	36.9529718789418	16.9165246743873	34.4635746890898	43.1009681723622	65.6852633119561	40.519477755266	61.6714565500074	34.7367773326694
32 | Switzerland	4.7039999961853	4.91200017929077	4.48400020599365	5.18800020217896	2.29699993133545	3.74513286376686	2.73092733490052	3.94958681010094	2.53811727798916	5.38441248659413	28.5783177465053	9.60103545797122	30.8402042936878	10.1572098204503	34.1344974023386	62.9715985474426	82.7560354408853	60.7252522330816	82.1166736035794	58.1850690024549
33 | Channel Islands	7.89499998092651	8.32600021362305	8.26599979400635	8.56599998474121	8.1899995803833	4.83551250100136	2.26984829940678	5.48477595630134	2.82531052199306	6.66540624060059	32.3868794983221	10.8028644985235	34.0048767603197	12.3161600462105	38.8741895091103	54.8816839621708	78.6012854584717	52.2443474893725	76.2934456827455	46.2704029187691
34 | Chile	6.06400012969971	7.13700008392334	7.44299983978271	9.90900039672852	9.43200016021729	11.9759002436269	4.20947977911087	13.0718252667967	4.63157810402245	17.678873983313	29.7561067219155	10.4248002147113	28.8065144113413	9.25324648677127	26.3833645031928	52.2039938006014	78.2287172654227	50.6786613647715	76.2061724349536	46.5057613532769
35 | China	5.11499977111816	4.01200008392334	5.0019998550415	3.92300009727478	3.61199998855591	29.4561000646645	24.4548620502283	37.2563142762865	32.2261471314498	51.6157732707462	29.2663293402916	26.0108278071535	27.8448646052482	26.8381491674875	20.0246066346865	36.1616224951424	45.522308227869	29.8968212634238	37.0136656785436	24.7476182675561
36 | Cote d'Ivoire	2.5220000743866	3.94799995422363	6.76200008392334	6.61499977111816	4.58099985122681	47.171552058119	34.9715730777324	47.0068703791369	39.4560959337257	49.0663591009697	12.8105589435857	10.092183615497	10.7410180171417	10.3769416600407	12.4483623080556	37.4968613092001	50.988243352547	35.4891796519704	43.5519635257042	33.9042805597209
37 | Cameroon	3.06299996376038	4.00600004196167	3.41300010681152	4.89900016784668	8.4680004119873	40.5565032880577	49.8170461303385	48.0365796469669	58.1961060694681	57.8253400413932	16.9232616438237	10.3491132673038	13.7481937637596	9.33986928783898	10.8117600427362	39.4572351043582	35.8278423913364	34.8022292458383	27.5640730802995	22.8958143210788
38 | Congo, Dem. Rep.	5.19600009918213	3.56200003623962	4.72800016403198	3.19199991226196	3.53399991989136	56.5562725027826	72.0112166573572	59.4430603404656	75.3756770074007	62.6701049092212	14.5125959359357	3.36857936816469	13.1199070101871	3.16562158440437	12.3968455083798	23.7360780772033	21.0582020988296	22.7090343024846	18.2667010343166	21.3990533423936
39 | Congo, Rep.	9.0930004119873	10.7670001983643	12.4320001602173	15.5559997558594	18.6159992218018	31.500185315006	31.4938938062558	32.0253676721182	32.3673868397949	32.7342728923526	21.7085907386955	19.7463702674023	20.7273456288856	19.4052308695495	19.0975692997385	37.6982252682246	37.9927340259931	34.8152848685518	32.6713825347961	29.5521585861072
40 | Colombia	6.36199998855591	10.8400001525879	8.63500022888184	14.1890001296997	17.3519992828369	20.9589929996166	6.43735180191803	23.9476804943024	6.00934413433163	26.6986108578931	22.5227476712479	12.4815080181137	20.8759896481155	13.6113411987855	15.8882524271435	50.1553216889453	70.2411379016403	46.5413296287003	66.1903165830769	40.061139008512
41 | 


--------------------------------------------------------------------------------
/tests/testthat/testdata/AP_coordinates/input_data.txt:
--------------------------------------------------------------------------------
 1 | Country.Name	UEM.M.2015	UEM.F.2015	UEM.M.2010	UEM.F.2010	UEM.M.2000	AGR.M.2015	AGR.F.2015	AGR.M.2010	AGR.F.2010	AGR.M.2000	IND.M.2015	IND.F.2015	IND.M.2010	IND.F.2010	IND.M.2000	SRV.M.2015	SRV.F.2015	SRV.M.2010	SRV.F.2010	SRV.M.2000
 2 | Afghanistan	10.6840000152588	14.4270000457764	10.878999710083	14.8149995803833	10.9519996643066	37.8530144169035	58.5918341132492	45.9080105940595	62.1560903177743	56.5846601919507	14.2539401104799	18.7687265630838	12.5339775963636	13.8681186532326	8.56641753038635	37.2090454573578	8.21329616999908	30.6799036698646	9.16164689261626	23.8960300748246
 3 | Angola	6.88299989700317	7.68100023269653	7.94799995422363	10.9219999313354	4.83599996566772	40.7833840176982	53.0400341192767	39.1699653458642	49.4373997474282	28.2151745299088	14.9266557558941	1.08105549079682	13.1395026410404	1.03508630812801	14.7837272601248	37.4069621054705	38.1979088365961	39.7416102887755	38.6055150750009	52.1651000594079
 4 | Albania	17.068000793457	17.0979995727539	12.7620000839233	15.8809995651245	16.3409996032715	30.5040491882837	39.6420792220923	29.8886125356392	44.6554113386461	36.4117418774221	18.2201596562287	11.516746063801	24.1370091778299	8.9527849210664	14.9247657984299	34.207791943833	31.743176722583	33.2123782026077	30.5108025707205	32.3233288513279
 5 | Arab World	8.19390895838925	20.0161802509865	6.9908812598603	17.9412175476643	11.1299650216614	17.8642307423098	24.6449124655064	19.672914073688	28.4233914901091	24.9142486487184	25.777529922088	7.68758538616177	26.2659536398542	8.43280133428351	20.4083797382762	48.164425070158	47.6515339471448	47.0699117483611	45.2025348058708	43.5474153392269
 6 | United Arab Emirates	1.37399995326996	4.70300006866455	2.1010000705719	5.88299989700317	2.18600010871887	2.31771090704107	0.01238861024668	3.72016195063638	0.019764569755163	8.76217828936375	40.5599425192177	5.87315420755638	40.0132808644816	7.06065721129623	35.2687931646872	55.7483480313275	89.4114548308246	54.1655578145382	87.0365757151367	53.7830284372302
 7 | Argentina	6.96500015258789	8.85099983215332	6.66699981689453	9.19600009918213	14.0200004577637	0.365627555611086	0.06562727729574	1.75839367093423	0.347779307279997	0.844323589764976	31.7854085496292	7.89076856900818	30.6925555436508	8.5755299723227	27.1559227405518	60.8839658493927	83.1926073368043	60.8811191525665	81.879780130292	57.980610592186
 8 | Armenia	17.3859996795654	19.2520008087158	17.0849990844727	21.2849998474121	8.03800010681152	25.7987005495129	32.3646065908367	26.2351360596215	37.2101543676712	35.7070045046545	19.1168791699127	6.08355419438523	22.1424509312325	4.58436151580181	20.8854902026784	37.6984206010089	42.2998380210259	34.53658523013	36.9212698606949	35.3695034318197
 9 | Australia	6.03900003433228	6.07299995422363	5.07600021362305	5.3769998550415	6.45499992370605	3.10729019538592	1.73858872516368	3.80265547127396	2.16024308609024	5.57902401508923	27.5719157008897	7.19386885826237	29.5175676108789	8.14987892307449	28.4984844159813	63.2817929492897	84.9945403349258	61.6037798726544	84.3128745262128	59.4674911991661
10 | Austria	6.08500003814697	5.30800008773804	4.98400020599365	4.62900018692017	4.75899982452393	4.52200730190182	3.98558638106129	5.13371435288384	4.761874049772	5.52207305574734	35.4895407112927	11.494661811554	34.6456851937484	10.9390539332283	40.2555119165146	53.9034532921234	79.2117521711732	55.2375498854336	79.6700745586658	49.4643666358969
11 | Azerbaijan	4.08799982070923	5.86700010299683	4.40000009536743	6.92500019073486	10.956000328064	29.713537216494	39.5932818623105	30.9801342769566	41.4435055627308	32.5553774537565	20.8925119064255	5.53219626749309	19.8570758926669	5.67385216944981	12.2551252604439	45.3069096495127	49.0075226649218	44.7627897350092	45.9576416332684	44.2334961085458
12 | Burundi	2.01500010490417	1.12600004673004	2.26600003242493	1.2940000295639	2.41199994087219	85.6976800111714	95.0287901554352	85.4850005552875	95.0992833942153	85.1367445108122	2.88761797307329	0.438011808713489	3.12944258389122	0.494517047143879	3.56293790449487	9.39872119741325	3.40719813645502	9.11955985760446	3.11318728236891	8.88733950787196
13 | Belgium	9.09200000762939	7.76800012588501	8.10799980163574	8.51599979400635	5.30800008773804	1.42180112941731	0.672371253574105	1.58973162095871	0.814207588746452	2.30101566108169	29.7769163685032	7.71059487282257	31.493225719076	9.21518329887166	34.2321058130084	59.7083746724182	83.8490362765479	58.8090432965048	81.4536972711161	58.1588782124085
14 | Benin	2.39499998092651	2.77600002288818	0.935000002384186	1.14600002765656	1.11300003528595	47.5951266358878	32.3668421884252	50.655895980188	36.3584999831141	52.9193774275583	19.9416773156035	16.3141878637436	19.7050198902156	17.3538199968234	19.6221478104001	30.0691715814223	48.5429717793437	28.7040841272121	45.1426679893845	26.3454747267556
15 | Burkina Faso	4.01800012588501	9.38799953460693	3.55900001525879	6.20900011062622	2.43199992179871	34.1551937764624	18.7358433777082	53.6433784810451	35.8900629300036	82.7396173319413	30.0481246462594	29.7533551454923	18.0942595783406	22.1506209877055	4.65301797583635	31.7796407441471	42.1228002139059	24.7033619253556	35.7503159716647	10.1753666313856
16 | Bangladesh	3.16100001335144	7.46700000762939	3.0090000629425	4.44000005722046	3.25200009346008	34.6034815201925	59.2229696385192	39.7973485445393	62.675890312812	58.7560268489109	20.9695164069447	14.2167704153356	18.629062068294	12.4447785665381	10.9305893542175	41.2650342014641	19.0941847602169	38.5645930241373	20.4383732543633	27.0613837034115
17 | Bulgaria	9.77600002288818	8.41600036621094	10.8570003509521	9.60599994659424	16.5620002746582	8.19955738637818	3.96467134065307	7.32755480918923	4.70139213588005	12.8653049039204	32.6728182406129	20.8271166440149	36.3132923914448	21.7587389804456	31.3760254931073	49.3516269314502	66.7922112124146	45.50215159828	63.9329620453712	39.1966701240408
18 | Bahrain	0.404000014066696	3.74499988555908	0.442999988794327	3.7039999961853	0.649999976158142	1.34454602355564	0.049090049140406	1.34501502172946	0.048148000719368	2.00388946068692	41.9538195811717	8.85738478740921	42.4152643759212	8.70804749340221	32.3414069024451	56.2976359246658	87.3485265400852	55.7977176542525	87.5398024900401	65.0047053187919
19 | Bahamas, The	11.0880002975464	13.003999710083	14.6370000839233	15.0539999008179	5.99100017547607	3.73963857520798	0.334934592819595	4.11961855830491	0.411138638252244	6.36628931391194	22.2626752120574	2.65859776388191	23.4440948961179	3.04276573813465	25.5290841214494	62.9096829474301	84.0024662738985	57.7984333005678	81.4920941278947	62.1145646179444
20 | Bosnia and Herzegovina	25.7479991912842	30.6539993286133	25.6830005645752	29.9309997558594	23.1609992980957	12.9220756101012	12.2645342501349	13.3049723182051	15.9484049210435	17.859688956593	28.9226391057519	11.8345884639975	29.2905576493788	11.420546200109	28.3451389220266	32.4080282049924	45.2461862008465	31.7214666328724	42.700047786528	30.6334048553668
21 | Belarus	7.54400014877319	4.26200008392334	7.49100017547607	4.74100017547607	13.7880001068115	11.8639536963483	6.34934412521728	13.0927986322389	7.57785336336403	16.543219949139	38.8537077692056	19.4702369406733	39.4782149693279	20.8055177737547	33.0459227840703	41.7383375039438	69.9184170241287	39.9379862229572	66.8756305043265	36.6228571599792
22 | Belize	5.43100023269653	11.0649995803833	5.87599992752075	12.956000328064	7.23000001907349	24.4943157494942	4.06966580918732	25.4520712042915	3.3929751088723	32.2097451258239	18.6499530790336	7.1112424673151	19.2295335020512	8.06201532266494	18.4751463455362	51.4247291350152	77.7540934153397	49.4423953661366	75.5890061274627	42.085112048461
23 | Bolivia	2.56399989128113	3.79699993133545	2.15499997138977	3.07699990272522	2.32699990272522	26.8367967963018	27.2244867839196	28.8975426966287	29.6516538190697	39.243059350225	29.5728002294542	10.5688616462612	27.7214461247441	10.2670537996148	24.4514592318484	41.0264012245186	58.4096516384838	41.2260112072374	57.0042924785903	33.9784815152013
24 | Brazil	7.21299982070923	10.0550003051758	5.60599994659424	10.6359996795654	7.90399980545044	12.940075137032	4.51883693563766	15.6609081743726	6.23760723941117	19.1080773755702	27.6254742288144	10.4875868271713	27.7962016710379	11.2670129831341	25.0206406583239	52.2214499285586	74.9376786916657	50.9359449860937	71.8593822284931	47.9663599499601
25 | Barbados	12.3529996871948	10.3219995498657	10.9020004272461	10.4720001220703	7.43800020217896	3.70133286667484	1.39897675570793	3.35364864953735	1.69476504366585	4.60403384875786	25.4220117051662	8.68979786731415	25.4713355332621	7.82922348686829	27.5890294337571	58.5236578306314	79.5892253994942	60.273017089364	80.0040108137672	60.3689373980461
26 | Brunei Darussalam	7.26999998092651	8.79800033569336	5.87599992752075	7.96700000762939	4.51399993896484	0.88278962052145	0.416793126941428	0.739814653136596	0.517225438453593	1.32439078715342	21.4187750082203	9.38103791508986	23.8933776338316	9.63493464281319	28.2056091354989	70.4284321845913	81.4041722916177	69.4908102540112	81.8808413922143	65.9550429571979
27 | Bhutan	1.89600002765656	3.24499988555908	2.75699996948242	4.05399990081787	1.22200000286102	51.2946567369433	64.1505020656227	53.3552892864206	63.1852415861599	57.4966984192026	9.74761331001144	8.93629186962433	6.38011310587837	6.44853085332855	8.03460234643553	37.0617289897961	23.6682080246491	37.5066248150085	26.312230862237	33.2457129361251
28 | Botswana	14.8100004196167	20.8439998626709	14.6619997024536	21.5359992980957	14.7600002288818	22.4254149825805	13.954411034723	24.444216036958	15.4111149645212	18.8320735006111	21.7737120057491	7.88631252920792	20.934264970185	8.10533121264347	22.6900350968499	40.9908725920536	57.3144831835424	39.9586663791761	54.9475545247397	43.7178895478333
29 | Central African Republic	3.94400000572205	3.76900005340576	4.01599979400635	3.82100009918213	4.08400011062622	71.9738010565036	79.6860033622247	72.024476007845	79.9747624084491	72.8395712908062	7.24358285330538	2.58572700343874	8.32085266296868	3.06811015187016	8.81372135423733	16.8386174585611	13.9592684337679	15.6386724505547	13.1361277991158	14.2627090737827
30 | Canada	7.45900011062622	6.28499984741211	8.78899955749512	7.23299980163574	6.94199991226196	2.04423078993088	0.951207238142733	2.22463620162913	1.03064131823743	3.01880148379753	27.9075889553813	8.0135693270031	27.9798870385329	7.92601219261315	30.9808692188154	62.589182350411	84.7502214648194	61.0064795944466	83.8103474616218	59.0583298288602
31 | Central Europe and the Baltics	7.74020473088915	7.90567223682592	9.96677631874455	9.81944761452723	11.9682940640283	11.5688662629283	9.20035554902194	12.5609959883593	11.5927911055313	18.8309308218994	37.5896041916351	17.2087211486188	36.9529718789418	16.9165246743873	34.4635746890898	43.1009681723622	65.6852633119561	40.519477755266	61.6714565500074	34.7367773326694
32 | Switzerland	4.7039999961853	4.91200017929077	4.48400020599365	5.18800020217896	2.29699993133545	3.74513286376686	2.73092733490052	3.94958681010094	2.53811727798916	5.38441248659413	28.5783177465053	9.60103545797122	30.8402042936878	10.1572098204503	34.1344974023386	62.9715985474426	82.7560354408853	60.7252522330816	82.1166736035794	58.1850690024549
33 | Channel Islands	7.89499998092651	8.32600021362305	8.26599979400635	8.56599998474121	8.1899995803833	4.83551250100136	2.26984829940678	5.48477595630134	2.82531052199306	6.66540624060059	32.3868794983221	10.8028644985235	34.0048767603197	12.3161600462105	38.8741895091103	54.8816839621708	78.6012854584717	52.2443474893725	76.2934456827455	46.2704029187691
34 | Chile	6.06400012969971	7.13700008392334	7.44299983978271	9.90900039672852	9.43200016021729	11.9759002436269	4.20947977911087	13.0718252667967	4.63157810402245	17.678873983313	29.7561067219155	10.4248002147113	28.8065144113413	9.25324648677127	26.3833645031928	52.2039938006014	78.2287172654227	50.6786613647715	76.2061724349536	46.5057613532769
35 | China	5.11499977111816	4.01200008392334	5.0019998550415	3.92300009727478	3.61199998855591	29.4561000646645	24.4548620502283	37.2563142762865	32.2261471314498	51.6157732707462	29.2663293402916	26.0108278071535	27.8448646052482	26.8381491674875	20.0246066346865	36.1616224951424	45.522308227869	29.8968212634238	37.0136656785436	24.7476182675561
36 | Cote d'Ivoire	2.5220000743866	3.94799995422363	6.76200008392334	6.61499977111816	4.58099985122681	47.171552058119	34.9715730777324	47.0068703791369	39.4560959337257	49.0663591009697	12.8105589435857	10.092183615497	10.7410180171417	10.3769416600407	12.4483623080556	37.4968613092001	50.988243352547	35.4891796519704	43.5519635257042	33.9042805597209
37 | Cameroon	3.06299996376038	4.00600004196167	3.41300010681152	4.89900016784668	8.4680004119873	40.5565032880577	49.8170461303385	48.0365796469669	58.1961060694681	57.8253400413932	16.9232616438237	10.3491132673038	13.7481937637596	9.33986928783898	10.8117600427362	39.4572351043582	35.8278423913364	34.8022292458383	27.5640730802995	22.8958143210788
38 | Congo, Dem. Rep.	5.19600009918213	3.56200003623962	4.72800016403198	3.19199991226196	3.53399991989136	56.5562725027826	72.0112166573572	59.4430603404656	75.3756770074007	62.6701049092212	14.5125959359357	3.36857936816469	13.1199070101871	3.16562158440437	12.3968455083798	23.7360780772033	21.0582020988296	22.7090343024846	18.2667010343166	21.3990533423936
39 | Congo, Rep.	9.0930004119873	10.7670001983643	12.4320001602173	15.5559997558594	18.6159992218018	31.500185315006	31.4938938062558	32.0253676721182	32.3673868397949	32.7342728923526	21.7085907386955	19.7463702674023	20.7273456288856	19.4052308695495	19.0975692997385	37.6982252682246	37.9927340259931	34.8152848685518	32.6713825347961	29.5521585861072
40 | Colombia	6.36199998855591	10.8400001525879	8.63500022888184	14.1890001296997	17.3519992828369	20.9589929996166	6.43735180191803	23.9476804943024	6.00934413433163	26.6986108578931	22.5227476712479	12.4815080181137	20.8759896481155	13.6113411987855	15.8882524271435	50.1553216889453	70.2411379016403	46.5413296287003	66.1903165830769	40.061139008512
41 | 


--------------------------------------------------------------------------------
/R/convert.R:
--------------------------------------------------------------------------------
  1 | #' @include constructor.R
  2 | NULL
  3 | 
  4 | 
  5 | #' Recompute missing values of cacomp object.
  6 | #'
  7 | #' @description
  8 | #' The caobj needs to have the std_coords_cols, the prin_coords_rows and D
  9 | #' calculated. From this the remainder will be calculated.
 10 | #' Future updates might extend this functionality.
 11 | #'
 12 | #' @return
 13 | #' A cacomp object with additional calculated row_masses, col_masses,
 14 | #' std_coords_rows, U and V.
 15 | #'
 16 | #' @param calist A list with std_coords_cols, the prin_coords_rows and D.
 17 | #' @param mat A matrix from which the cacomp object is derived from.
 18 | #' @param ... Further arguments forwarded to cacomp.
 19 | recompute <- function(calist, mat, ...){
 20 | 
 21 |   stopifnot(is(calist, "list"))
 22 |   stopifnot(is(mat, "matrix") | is(mat, "Matrix"))
 23 | 
 24 |   if(is.null(calist$params)){
 25 |       warning("No parameters provided for recalculation!")
 26 |       calist$params <- list()
 27 | 
 28 |   }
 29 | 
 30 |   # if (is.null(calist$top_rows)) top <- nrow(mat)
 31 |   if(exists("rm_zeros")){
 32 |       if(isTRUE(rm_zeros)){
 33 |           mat <- rm_zeros(mat)
 34 |       }
 35 |   } else if (isTRUE(calist$params$rm_zeros)){
 36 |     mat <- rm_zeros(mat)
 37 |   }
 38 | 
 39 |   # make stock of what we have
 40 | 
 41 |   std_rows <- is.null(calist$std_coords_rows)
 42 |   std_cols <- is.null(calist$std_coords_cols)
 43 |   prin_rows <- is.null(calist$prin_coords_rows)
 44 |   prin_cols <- is.null(calist$prin_coords_cols)
 45 | 
 46 |   sp_rows <- std_rows & prin_rows
 47 |   sp_cols <- std_cols & prin_cols
 48 | 
 49 |   d <- is.null(calist$D)
 50 |   v <- is.null(calist$V)
 51 |   u <- is.null(calist$U)
 52 | 
 53 |   # mat <- var_rows(mat = mat,
 54 |                   # top = nrow(mat))
 55 |   res <- comp_std_residuals(mat=mat)
 56 | 
 57 |   S <- res$S
 58 |   tot <- res$tot
 59 |   rowm <- res$rowm
 60 |   colm <- res$colm
 61 | 
 62 |   if(std_rows & !u) {
 63 |     calist$std_coords_rows <- sweep(x = calist$U,
 64 |                                     MARGIN = 1,
 65 |                                     STATS = sqrt(rowm),
 66 |                                     FUN = "/")
 67 |     std_rows <- FALSE
 68 |   }
 69 |   if(std_cols & !v){
 70 |     calist$std_coords_cols <- sweep(x = calist$V,
 71 |                                     MARGIN = 1,
 72 |                                     STATS = sqrt(colm),
 73 |                                     FUN = "/")
 74 |     std_cols <- FALSE
 75 |   }
 76 | 
 77 |   call_svd <- FALSE
 78 |   done <- FALSE
 79 | 
 80 |   while (isFALSE(done)){
 81 |     if (std_cols){
 82 |       if (d){
 83 |         if(prin_cols){
 84 |           call_svd <- TRUE
 85 |           done <- TRUE
 86 |         } else {
 87 |           # check if we can get D with row coords, otherwise call cacomp
 88 |           if(std_rows | prin_rows){
 89 |             call_svd <- TRUE
 90 |             done <- TRUE
 91 | 
 92 |           } else {
 93 |             calist$D <- calist$prin_coords_rows[1,]/calist$std_coords_rows[1,]
 94 |             d <- FALSE
 95 |           }
 96 |         }
 97 |       } else if (prin_cols){
 98 |         call_svd <- TRUE
 99 |         done <- TRUE
100 | 
101 |       } else {
102 |         # calculate std_coords
103 |         calist$std_coords_cols <- sweep(calist$prin_coords_cols,
104 |                                         2,
105 |                                         calist$D,
106 |                                         "/")
107 |         std_cols <- FALSE
108 |       }
109 |     } else if (d) {
110 |       if (prin_cols) {
111 |         # check if we can get d through rows, otherweise cacomp
112 |         if(std_rows | prin_rows){
113 |           call_svd <- TRUE
114 |           done <- TRUE
115 | 
116 |         } else {
117 |           calist$D <- calist$prin_coords_rows[1,]/calist$std_coords_rows[1,]
118 |           d <- FALSE
119 |         }
120 |       } else {
121 |         # calculate d from col coordinates
122 | 
123 |         # calist$D <- colMeans(sweep(calist$prin_coords_cols,
124 |         #                             1,
125 |         #                             calist$std_coords_cols,
126 |         #                             "/"))
127 |         calist$D <- calist$prin_coords_cols[1,]/calist$std_coords_cols[1,]
128 |         d <- FALSE
129 | 
130 |       }
131 |     } else if (prin_cols){
132 |       # calculate prin_cols with D and std
133 |       calist$prin_coords_cols <- sweep(calist$std_coords_cols,
134 |                                        2,
135 |                                        calist$D,
136 |                                        "*")
137 |       prin_cols <- FALSE
138 | 
139 |     } else {
140 |       # all calculated
141 |       done <- TRUE
142 |     }
143 |   }
144 | 
145 | 
146 |   done <- FALSE
147 |   while (isFALSE(done)){
148 |     if (std_rows){
149 |       if (d){
150 |         if(prin_rows){
151 |           call_svd <- TRUE
152 |           done <- TRUE
153 | 
154 |         } else {
155 |           # check if we can get D with row coords, otherwise call cacomp
156 |           if(std_cols | prin_cols){
157 |             call_svd <- TRUE
158 |             done <- TRUE
159 | 
160 |           } else {
161 |             calist$D <- calist$prin_coords_cols[1,]/calist$std_coords_cols[1,]
162 |             d <- FALSE
163 |           }
164 |         }
165 |       } else if (prin_rows){
166 |         call_svd <- TRUE
167 |         done <- TRUE
168 |       } else {
169 |         # calculate std_coords
170 |         calist$std_coords_rows <- sweep(calist$prin_coords_rows,
171 |                                         2,
172 |                                         calist$D,
173 |                                         "/")
174 |         std_rows <- FALSE
175 |       }
176 |     } else if (d) {
177 |       if (prin_rows) {
178 |         # check if we can get d through rows, otherweise cacomp
179 |         if(std_cols | prin_cols){
180 |           call_svd <- TRUE
181 |           done <- TRUE
182 | 
183 |         } else {
184 |           calist$D <- calist$prin_coords_cols[1,]/calist$std_coords_cols[1,]
185 |           d <- FALSE
186 |         }
187 |       } else {
188 |         # calculate d from col coordinates
189 | 
190 |         # calist$D <- colMeans(sweep(calist$prin_coords_rows, 1, calist$std_coords_rows, "/"))
191 |         calist$D <- calist$prin_coords_rows[1,]/calist$std_coords_rows[1,]
192 |         d <- FALSE
193 | 
194 |       }
195 |     } else if (prin_rows){
196 |       # calculate prin_rows with D and std
197 |       calist$prin_coords_rows <- sweep(calist$std_coords_rows, 2, calist$D, "*")
198 |       prin_rows <- FALSE
199 | 
200 |     } else {
201 |       # all calculated
202 |       done <- TRUE
203 |     }
204 |   }
205 | 
206 |   if (!is.null(calist$std_coords_rows)) top <- nrow(calist$std_coords_rows)
207 | 
208 |   if (!is.null(calist$std_coords_rows) |
209 |       !is.null(calist$std_coords_cols) |
210 |       !is.null(calist$D)){
211 | 
212 |       dims <- min(ncol(calist$std_coords_rows),
213 |                   ncol(calist$std_coords_cols),
214 |                   length(calist$D),
215 |                   na.rm = TRUE)
216 |   }
217 | 
218 |   # if (calist$params$clip)
219 |   if(isTRUE(call_svd)){
220 |     message("Calling cacomp to recompute from matrix.")
221 |     ca <- cacomp(mat,
222 |                  princ_coords = 3,
223 |                  top = top,
224 |                  residuals = calist$params$residuals,
225 |                  clip = calist$params$clip,
226 |                  cutoff = calist$params$cutoff,
227 |                  rm_zeros = calist$params$rm_zeros,
228 |                  dims = min(nrow(mat), ncol(mat)) - 1,
229 |                  ...)
230 |     return(ca)
231 |   } else {
232 | 
233 |     if (nrow(mat) != nrow(calist$std_coords_rows)){
234 |       stop("mat does not have have the correct number of rows.")
235 |     }
236 | 
237 |     if (ncol(mat) != nrow(calist$std_coords_cols)){
238 |       stop("mat does not have have the correct number of columns.")
239 |     }
240 | 
241 |     calist$std_coords_rows[is.na(calist$std_coords_rows)] <- 0
242 |     calist$std_coords_cols[is.na(calist$std_coords_cols)] <- 0
243 |     calist$std_coords_rows[is.infinite(calist$std_coords_rows)] <- 0
244 |     calist$std_coords_cols[is.infinite(calist$std_coords_cols)] <- 0
245 | 
246 |     ordidx <- match(rownames(calist$prin_coords_rows), names(rowm))
247 |     calist$row_masses <- rowm[ordidx]
248 | 
249 |     ordidx <- match(rownames(calist$std_coords_cols), names(colm))
250 |     calist$col_masses <- colm[ordidx]
251 | 
252 |     if (u) calist$U <- sweep(calist$std_coords_rows,
253 |                              1,
254 |                              sqrt(calist$row_masses),
255 |                              "*")
256 |     if (v) calist$V <- sweep(calist$std_coords_cols,
257 |                              1,
258 |                              sqrt(calist$col_masses),
259 |                              "*")
260 | 
261 |     calist$tot_inertia <- sum(calist$D^2)
262 |     calist$row_inertia <- Matrix::rowSums(S^2)
263 |     calist$col_inertia <- Matrix::colSums(S^2)
264 | 
265 |     calist$top_rows <- nrow(mat)
266 |     calist$dims <- length(calist$D)
267 |   }
268 | 
269 |   ca <- do.call(new_cacomp, calist)
270 |   return(ca)
271 | }
272 | 
273 | 
274 | #' Create cacomp object from Seurat/SingleCellExperiment container
275 | #'
276 | #' @description
277 | #' Converts the values stored in the Seurat/SingleCellExperiment dimensional
278 | #' reduction slot "CA" to a cacomp object.
279 | #' If recompute = TRUE additional parameters are recomputed from the saved
280 | #' values without rerunning SVD (need to specify assay to work).
281 | #'
282 | #' @details
283 | #' By default extracts std_coords_cols, D, prin_coords_rows, top_rows and dims
284 | #' from obj and outputs a cacomp object.
285 | #' If recompute = TRUE the following are additionally recalculated
286 | #' (doesn't run SVD):
287 | #' U, V, std_coords_rows, row_masses, col_masses.
288 | #'
289 | #' @return
290 | #' A cacomp object.
291 | #'
292 | #' @param obj An object of class "Seurat" or "SingleCellExperiment"
293 | #' with a dim. reduction named "CA" saved. For obj "cacomp" input is returned.
294 | #' @param assay Character. The assay from which extract the count matrix,
295 | #' e.g. "RNA" for Seurat objects or "counts"/"logcounts" for
296 | #' SingleCellExperiments.
297 | #' @param ... Further arguments.
298 | #' @export
299 | setGeneric("as.cacomp", function(obj, ...) {
300 |   standardGeneric("as.cacomp")
301 | })
302 | 
303 | #' @description as.cacomp.cacomp returns input without any calculations.
304 | #' @rdname as.cacomp
305 | #' @export
306 | setMethod(f = "as.cacomp", signature=(obj="cacomp"), function(obj, ...) {
307 |   stopifnot(is(obj, "cacomp"))
308 |   return(obj)
309 | })
310 | 
311 | 
312 | #' @description Recomputes missing values and returns cacomp object from a list.
313 | #' If you have a *complete* cacomp object in list form,
314 | #' use do.call(new_cacomp, obj).
315 | #' @param mat Original input matrix.
316 | #' @rdname as.cacomp
317 | #' @export
318 | #' @examples
319 | #' #########
320 | #' # lists #
321 | #' #########
322 | #'
323 | #' # Simulate counts
324 | #' cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
325 | #'                x = sample(1:100, 50, replace = TRUE))
326 | #' rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
327 | #' colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
328 | #'
329 | #' # Run correspondence analysis
330 | #' ca <- cacomp(obj = cnts, princ_coords = 3)
331 | #' ca_list <- as.list(ca)
332 | #'
333 | #' # Only keep subset of elements for demonstration
334 | #' ca_list <- ca_list[c("U", "std_coords_rows", "std_coords_cols", "params")]
335 | #'
336 | #' # convert (incomplete) list to cacomp object.
337 | #' ca <- as.cacomp(ca_list, mat = cnts)
338 | setMethod(f = "as.cacomp",
339 |           signature=(obj="list"),
340 |           function(obj, ..., mat = NULL) {
341 | 
342 |   try_obj <- try(do.call(new_cacomp, obj), silent = TRUE)
343 |   if (is(try_obj, "try-error")){
344 |     obj <- recompute(calist = obj, mat = mat)
345 |     return(obj)
346 |   } else if (is(try_obj, "cacomp")){
347 |     return(try_obj)
348 |   } else {
349 |     stop("Unexpected output from try().")
350 |   }
351 | })
352 | 
353 | #' @description
354 | #' as.cacomp.Seurat: Converts the values stored in the Seurat DimReduc slot
355 | #' "CA" to an cacomp object.
356 | #' @param slot character. Slot of the Seurat assay to use. Default "counts".
357 | #' @rdname as.cacomp
358 | #' @export
359 | #' @examples
360 | #'
361 | #' ##########
362 | #' # Seurat #
363 | #' ##########
364 | #' library(SeuratObject)
365 | #' set.seed(1234)
366 | #'
367 | #' # Simulate counts
368 | #' cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
369 | #'                x = sample(1:100, 50, replace = TRUE))
370 | #' rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
371 | #' colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
372 | #'
373 | #' seu <- CreateSeuratObject(counts = cnts)
374 | #' seu <- cacomp(seu, return_input = TRUE)
375 | #'
376 | #' ca <- as.cacomp(seu, assay = "RNA", slot = "counts")
377 | setMethod(f = "as.cacomp",
378 |           signature=(obj="Seurat"),
379 |           function(obj, ..., assay="RNA", slot = "counts") {
380 | 
381 |   stopifnot("obj doesn't belong to class 'Seurat'" = is(obj, "Seurat"))
382 |   stopifnot("obj doesn't contain a DimReduc object named 'CA'. Try running cacomp()." =
383 |               "CA" %in% names(obj@reductions))
384 | 
385 |   if (is.null(assay)) assay <- SeuratObject::DefaultAssay(obj)
386 | 
387 |   ca_list <- list("std_coords_cols" = SeuratObject::Embeddings(obj, reduction = "CA"),
388 |                   "D" = SeuratObject::Stdev(obj, reduction = "CA"),
389 |                   "prin_coords_rows" = SeuratObject::Loadings(obj, reduction = "CA"),
390 |                   "params" = obj@reductions$CA@misc)
391 | 
392 |   ca_list$top_rows <- nrow(ca_list$prin_coords_rows)
393 |   ca_list$dims <- length(ca_list$D)
394 | 
395 |   colnames(ca_list$std_coords_cols) <- paste0("Dim", seq_len(ncol(ca_list$std_coords_cols)))
396 |   colnames(ca_list$prin_coords_rows) <- paste0("Dim", seq_len(ncol(ca_list$prin_coords_rows)))
397 |   names(ca_list$D) <-  paste0("Dim", seq_len(length(ca_list$D)))
398 | 
399 |   stopifnot("Assay is needed to recompute cacomp." = !is.null(assay))
400 | 
401 |   seu <- SeuratObject::LayerData(object = obj, assay = assay, layer = slot)
402 |   seu <- as.matrix(seu)
403 |   seu <- seu[rownames(ca_list$prin_coords_rows),]
404 | 
405 |   ca_obj <- recompute(calist = ca_list, mat = seu)
406 | 
407 |   # ca_obj <- do.call(new_cacomp, ca_list)
408 | 
409 |   stopifnot(validObject(ca_obj))
410 |   return(ca_obj)
411 | })
412 | 
413 | 
414 | #' @description
415 | #' as.cacomp.SingleCellExperiment: Converts the values stored in the
416 | #' SingleCellExperiment reducedDim slot "CA" to a cacomp object.
417 | #'
418 | #' @rdname as.cacomp
419 | #' @export
420 | #' @examples
421 | #'
422 | #' ########################
423 | #' # SingleCellExperiment #
424 | #' ########################
425 | #' library(SingleCellExperiment)
426 | #' set.seed(1234)
427 | #'
428 | #' # Simulate counts
429 | #' cnts <- mapply(function(x){rpois(n = 500, lambda = x)},
430 | #'                x = sample(1:100, 50, replace = TRUE))
431 | #' rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
432 | #' colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
433 | #'
434 | #' sce <- SingleCellExperiment(assays=list(counts=cnts))
435 | #' sce <- cacomp(sce, return_input = TRUE)
436 | #'
437 | #' ca <- as.cacomp(sce, assay = "counts")
438 | setMethod(f = "as.cacomp",
439 |           signature=(obj="SingleCellExperiment"),
440 |           function(obj, ..., assay="counts") {
441 | 
442 | # TODO: Change to principal coordingates or columns.
443 |   sce_ca <- SingleCellExperiment::reducedDim(obj, "CA")
444 |   stopifnot("Attribute singval of dimension reduction slot CA is empty.\nThis can happen after subsetting the sce obj." =
445 |               !is.null(attr(sce_ca, "singval")))
446 |   stopifnot("Attribute prin_coords_rows of dimension reduction slot CA is empty.\nThis can happen after subsetting the sce obj." =
447 |               !is.null(attr(sce_ca, "prin_coords_rows")))
448 | 
449 |   ca_list <- list("std_coords_cols" = sce_ca,
450 |                   "D" = attr(sce_ca, "singval"),
451 |                   "prin_coords_rows" = attr(sce_ca, "prin_coords_rows"),
452 |                   "params" = attr(sce_ca, "params"))
453 | 
454 |   if(is.null(assay)) assay <- "counts"
455 | 
456 |   attr(ca_list$std_coords_cols, "prin_coords_rows") <- NULL
457 |   attr(ca_list$std_coords_cols, "singval") <- NULL
458 |   attr(ca_list$std_coords_cols, "percInertia") <- NULL
459 |   attr(ca_list$std_coords_cols, "params") <- NULL
460 | 
461 |   ca_list$top_rows <- nrow(ca_list$prin_coords_rows)
462 |   ca_list$dims <- length(ca_list$D)
463 | 
464 | 
465 |   stopifnot("Assay is needed to recompute cacomp." = !is.null(assay))
466 |   scemat <- SummarizedExperiment::assay(obj, assay)
467 |   scemat <- scemat[rownames(ca_list$prin_coords_rows),]
468 | 
469 |   ca_obj <- recompute(calist = ca_list, mat = scemat)
470 | 
471 | 
472 |   # ca_obj <- do.call(new_cacomp, ca_list)
473 | 
474 |   stopifnot(validObject(ca_obj))
475 |   return(ca_obj)
476 | })
477 | 
478 | 


--------------------------------------------------------------------------------
/R/constructor.R:
--------------------------------------------------------------------------------
  1 | 
  2 | #' Helper function to check if object is empty.
  3 | #' @param x object
  4 | #' @return TRUE if x has length 0 and is not NULL. FALSE otherwise
  5 | is.empty <- function(x) return(isTRUE(length(x) == 0 & !is.null(x)))
  6 | 
  7 | 
  8 | #' Check if cacomp object was correctly created.
  9 | #'
 10 | #' @description Checks if the slots in a cacomp object are of the correct size
 11 | #' and whether they are coherent.
 12 | #' @param object A cacomp object.
 13 | #' @return TRUE if it is a valid cacomp object. FALSE otherwise.
 14 | #' @export
 15 | #' @examples
 16 | #' # Simulate scRNAseq data.
 17 | #' cnts <- data.frame(cell_1 = rpois(10, 5),
 18 | #'                    cell_2 = rpois(10, 10),
 19 | #'                    cell_3 = rpois(10, 20))
 20 | #' rownames(cnts) <- paste0("gene_", 1:10)
 21 | #' cnts <- as.matrix(cnts)
 22 | #'
 23 | #' # Run correspondence analysis.
 24 | #' ca <- cacomp(obj = cnts, princ_coords = 3, top = 5)
 25 | #'
 26 | #' check_cacomp(ca)
 27 | check_cacomp <- function(object) {
 28 |   errors <- character()
 29 | 
 30 |   dim_rows <- object@top_rows
 31 |   dims <- object@dims
 32 | 
 33 |   # SVD results
 34 |   if (isTRUE(!is.empty(object@U) & 
 35 |              nrow(object@U) != dim_rows)) {
 36 |     msg <- paste0("Nr. of rows in U is ",
 37 |                   nrow(object@U),
 38 |                   ".  Should be ",
 39 |                   dim_rows,
 40 |                   ".")
 41 |     errors <- c(errors, msg)
 42 |   }
 43 | 
 44 |   if (isTRUE(!is.empty(object@U) & 
 45 |              ncol(object@U) != dims)) {
 46 |     msg <- paste0("Nr. of columns in U is ",
 47 |                   ncol(object@U),
 48 |                   ".  Should be ",
 49 |                   dims,
 50 |                   ".")
 51 |     errors <- c(errors, msg)
 52 |   }
 53 | 
 54 |   if (isTRUE(!is.empty(object@V) & 
 55 |              ncol(object@V) != dims)) {
 56 |     msg <- paste0("Nr. of columns in V is ",
 57 |                   ncol(object@V),
 58 |                   ".  Should be ",
 59 |                   dims,
 60 |                   ".")
 61 |     errors <- c(errors, msg)
 62 |   }
 63 | 
 64 |   if (isTRUE(!is.empty(object@D) & 
 65 |              length(object@D) != dims)) {
 66 |     msg <- paste0("Length of D is ", ncol(object@D), ".  Should be ", dims, ".")
 67 |     errors <- c(errors, msg)
 68 |   }
 69 | 
 70 |   # CA results
 71 | 
 72 |   if (isTRUE(!is.empty(object@row_masses) & 
 73 |              length(object@row_masses) != dim_rows)) {
 74 |     
 75 |     msg <- paste0("Length of row_masses is ",
 76 |                   length(object@row_masses),
 77 |                   ".  Should be ",
 78 |                   dim_rows,
 79 |                   ".")
 80 |     errors <- c(errors, msg)
 81 |   }
 82 | 
 83 |   if (isTRUE(!is.empty(object@col_masses) & 
 84 |              length(object@col_masses) != nrow(object@V))) {
 85 |     
 86 |     msg <- paste0("Length of col_masses is ",
 87 |                   length(object@col_masses),
 88 |                   ".  Should be ",
 89 |                   nrow(object@V),
 90 |                   ".")
 91 |     errors <- c(errors, msg)
 92 |   }
 93 | 
 94 |   if (isTRUE(!is.empty(object@row_inertia) & 
 95 |              length(object@row_inertia) != dim_rows)){
 96 |     
 97 |     msg <- paste0("Length of row_inertia is ",
 98 |                   length(object@row_inertia),
 99 |                   ".  Should be ",
100 |                   dim_rows,
101 |                   ".")
102 |     errors <- c(errors, msg)
103 |   }
104 | 
105 |   if (isTRUE(!is.empty(object@col_inertia) & 
106 |              length(object@col_inertia) != nrow(object@V))) {
107 |     
108 |     msg <- paste0("Length of col_inertia is ",
109 |                   length(object@col_inertia),
110 |                   ".  Should be ",
111 |                   nrow(object@V),
112 |                   ".")
113 |     errors <- c(errors, msg)
114 |   }
115 | 
116 |   if (isTRUE(!is.empty(object@tot_inertia) & 
117 |              length(object@tot_inertia) != 1)) {
118 |     
119 |     msg <- paste0("Length of tot_inertia is ",
120 |                   length(object@tot_inertia),
121 |                   ".  Should be 1.")
122 |     errors <- c(errors, msg)
123 |   }
124 | 
125 |   # standardized coordinates
126 | 
127 |   if (isTRUE(!is.empty(object@std_coords_rows) & 
128 |              nrow(object@std_coords_rows) != dim_rows)) {
129 |     
130 |     msg <- paste0("Nr. of rows in std_coords_rows is ",
131 |                   nrow(object@std_coords_rows),
132 |                   ".  Should be ",
133 |                   dim_rows,
134 |                   ".")
135 |     errors <- c(errors, msg)
136 |   }
137 | 
138 |   if (isTRUE(!is.empty(object@std_coords_rows) & 
139 |              ncol(object@std_coords_rows) != dims)) {
140 |     
141 |     msg <- paste0("Nr. of columns in std_coords_rows is ",
142 |                   ncol(object@std_coords_rows),
143 |                   ".  Should be ",
144 |                   dims,
145 |                   ".")
146 |     errors <- c(errors, msg)
147 |   }
148 | 
149 |   if (isTRUE(!is.empty(object@std_coords_cols) & 
150 |              nrow(object@std_coords_cols) != nrow(object@V))) {
151 |     
152 |     msg <- paste0("Nr. of rows in std_coords_cols is ",
153 |                   nrow(object@std_coords_cols),
154 |                   ".  Should be ",
155 |                   nrow(object@V),
156 |                   ".")
157 |     errors <- c(errors, msg)
158 |   }
159 | 
160 |   if (isTRUE(!is.empty(object@std_coords_cols) & 
161 |              ncol(object@std_coords_cols) != dims)) {
162 |     
163 |     msg <- paste0("Nr. of columns in std_coords_cols is ",
164 |                   ncol(object@std_coords_cols),
165 |                   ".  Should be ",
166 |                   dims,
167 |                   ".")
168 |     errors <- c(errors, msg)
169 |   }
170 | 
171 | 
172 |   # principal coordinates
173 | 
174 |   if (isTRUE(!is.empty(object@prin_coords_rows) & 
175 |              nrow(object@prin_coords_rows) != dim_rows)) {
176 |     
177 |     msg <- paste0("Nr. of rows in prin_coords_rows is ",
178 |                   nrow(object@prin_coords_rows),
179 |                   ".  Should be ",
180 |                   dim_rows,
181 |                   ".")
182 |     errors <- c(errors, msg)
183 |   }
184 | 
185 |   if (isTRUE(!is.empty(object@prin_coords_rows) & 
186 |              ncol(object@prin_coords_rows) != dims)) {
187 |     
188 |     msg <- paste0("Nr. of columns in prin_coords_rows is ",
189 |                   ncol(object@prin_coords_rows),
190 |                   ".  Should be ",
191 |                   dims,
192 |                   ".")
193 |     errors <- c(errors, msg)
194 |   }
195 | 
196 |   if (isTRUE(!is.empty(object@prin_coords_cols) & 
197 |              nrow(object@prin_coords_cols) != nrow(object@V))) {
198 |     
199 |     msg <- paste0("Nr. of rows in prin_coords_cols is ",
200 |                   nrow(object@prin_coords_cols),
201 |                   ".  Should be ",
202 |                   nrow(object@V),
203 |                   ".")
204 |     errors <- c(errors, msg)
205 |   }
206 | 
207 |   if (isTRUE(!is.empty(object@prin_coords_cols) & 
208 |              ncol(object@prin_coords_cols) != dims)) {
209 |     
210 |     msg <- paste0("Nr. of columns in prin_coords_cols is ",
211 |                   ncol(object@prin_coords_cols),
212 |                   ".  Should be ",
213 |                   dims,
214 |                   ".")
215 |     errors <- c(errors, msg)
216 |   }
217 | 
218 |   # AP coordinates
219 | 
220 |   if (isTRUE(!is.empty(object@apl_rows) & 
221 |              nrow(object@apl_rows) != dim_rows)) {
222 |     
223 |     msg <- paste0("Nr. of rows in apl_rows is ",
224 |                   ncol(object@apl_rows),
225 |                   ".  Should be ",
226 |                   dim_rows,
227 |                   ".")
228 |     errors <- c(errors, msg)
229 |   }
230 | 
231 |   if (isTRUE(!is.empty(object@apl_rows) & 
232 |              ncol(object@apl_rows) != 2)) {
233 |     
234 |     msg <- paste0("Nr. of columns in apl_rows is ",
235 |                   ncol(object@apl_rows),
236 |                   ".  Should be 2.")
237 |     errors <- c(errors, msg)
238 |   }
239 | 
240 |   if (isTRUE(!is.empty(object@apl_cols) & 
241 |              nrow(object@apl_cols) != nrow(object@V))) {
242 |     
243 |     msg <- paste0("Nr. of rows in apl_cols is ",
244 |                   ncol(object@apl_cols),
245 |                   ".  Should be ",
246 |                   nrow(object@V),
247 |                   ".")
248 |     errors <- c(errors, msg)
249 |   }
250 | 
251 |   if (isTRUE(!is.empty(object@apl_cols) & 
252 |              ncol(object@apl_cols) != 2)) {
253 |     
254 |     msg <- paste0("Nr. of columns in apl_cols is ",
255 |                   ncol(object@apl_cols),
256 |                   ".  Should be 2.")
257 |     errors <- c(errors, msg)
258 |   }
259 | 
260 |   # Salpha score
261 |   if (isTRUE(!is.empty(object@APL_score) & 
262 |              ncol(object@APL_score) != 4)) {
263 |     
264 |     msg <- paste0("Nr. of columns in APL_score is ",
265 |                   ncol(object@APL_score),
266 |                   ".  Should be 4.")
267 |     errors <- c(errors, msg)
268 |   }
269 |   if (isTRUE(!is.empty(object@APL_score) & 
270 |              nrow(object@APL_score) != dim_rows)) {
271 |     
272 |     msg <- paste0("Nr. of rows in APL_score is ",
273 |                   nrow(object@APL_score),
274 |                   ".  Should be ",
275 |                   dim_rows,
276 |                   ".")
277 |     errors <- c(errors, msg)
278 |   }
279 | 
280 |   if (length(errors) == 0) TRUE else errors
281 | }
282 | 
283 | #' An S4 class that contains all elements needed for CA.
284 | #' @name cacomp-class
285 | #' @rdname cacomp-class
286 | #' @description
287 | #' This class contains elements necessary to computer CA coordinates or 
288 | #' Association Plot coordinates,
289 | #' as well as other informative data such as row/column inertia, 
290 | #' gene-wise APL-scores, etc. ...
291 | #'
292 | #' @slot U class "matrix". Left singular vectors of the original input matrix.
293 | #' @slot V class "matrix". Right singular vectors of the original input matrix.
294 | #' @slot D class "numeric". Singular values of the original inpt matrix.
295 | #' @slot std_coords_rows class "matrix". Standardized CA coordinates of the 
296 | #' rows.
297 | #' @slot std_coords_cols class "matrix". Standardized CA coordinates of the 
298 | #' columns.
299 | #' @slot prin_coords_rows class "matrix". Principal CA coordinates of the rows.
300 | #' @slot prin_coords_cols class "matrix". Principal CA coordinates of the 
301 | #' columns.
302 | #' @slot apl_rows class "matrix". Association Plot coordinates of the rows 
303 | #' for the direction defined in slot "group"
304 | #' @slot apl_cols class "matrix". Association Plot coordinates of the columns 
305 | #' for the direction defined in slot "group"
306 | #' @slot APL_score class "data.frame". Contains rows sorted by the APL score.
307 | #' Columns: Rowname (gene name in the case of gene expression data),
308 | #' APL score calculated for the direction defined in slot "group",
309 | #' the original row number and the rank of the row as determined by the score.
310 | #' @slot dims class "numeric". Number of dimensions in CA space.
311 | #' @slot group class "numeric". Indices of the chosen columns for APL 
312 | #' calculations.
313 | #' @slot row_masses class "numeric". Row masses of the frequency table.
314 | #' @slot col_masses class "numeric". Column masses of the frequency table.
315 | #' @slot top_rows class "numeric". Number of most variable rows chosen.
316 | #' @slot tot_inertia class "numeric". Total inertia in CA space.
317 | #' @slot row_inertia class "numeric". Row-wise inertia in CA space.
318 | #' @slot col_inertia class "numeric". Column-wise inertia in CA space.
319 | #' @slot permuted_data class "list". Storage slot for permuted data.
320 | #' @slot params class "list". List of parameters.
321 | #' @export
322 | setClass("cacomp",
323 |          representation(
324 |            U = "matrix",
325 |            V = "matrix",
326 |            D = "numeric",
327 |            std_coords_rows = "matrix",
328 |            std_coords_cols = "matrix",
329 |            prin_coords_rows = "matrix",
330 |            prin_coords_cols = "matrix",
331 |            apl_rows = "matrix",
332 |            apl_cols = "matrix",
333 |            APL_score = "data.frame",
334 |            params = "list",
335 |            dims = "numeric",
336 |            group = "numeric",
337 |            row_masses = "numeric",
338 |            col_masses = "numeric",
339 |            top_rows = "numeric",
340 |            tot_inertia = "numeric",
341 |            row_inertia = "numeric",
342 |            col_inertia = "numeric",
343 |            permuted_data = "list"
344 |          ),
345 |          prototype(
346 |            U = matrix(0, 0, 0),
347 |            V = matrix(0, 0, 0),
348 |            D = numeric(),
349 |            std_coords_rows = matrix(0, 0, 0),
350 |            std_coords_cols = matrix(0, 0, 0),
351 |            prin_coords_rows = matrix(0, 0, 0),
352 |            prin_coords_cols = matrix(0, 0, 0),
353 |            apl_rows = matrix(0, 0, 0),
354 |            apl_cols = matrix(0, 0, 0),
355 |            APL_score = data.frame(),
356 |            params = list(),
357 |            dims = numeric(),
358 |            group = numeric(),
359 |            row_masses = numeric(),
360 |            col_masses = numeric(),
361 |            top_rows = numeric(),
362 |            tot_inertia = numeric(),
363 |            row_inertia = numeric(),
364 |            col_inertia = numeric(),
365 |            permuted_data = list()),
366 |          validity = check_cacomp
367 | )
368 | 
369 | #' Create new "cacomp" object.
370 | #' @description Creates new cacomp object.
371 | #'
372 | #' @param ... slot names and objects for new cacomp object.
373 | #' @return cacomp object
374 | #' @rdname cacomp-class
375 | #' @export
376 | #' @examples
377 | #' set.seed(1234)
378 | #'
379 | #' # Simulate counts
380 | #' cnts <- mapply(function(x){rpois(n = 500, lambda = x)}, 
381 | #'                x = sample(1:20, 50, replace = TRUE))
382 | #' rownames(cnts) <- paste0("gene_", 1:nrow(cnts))
383 | #' colnames(cnts) <- paste0("cell_", 1:ncol(cnts))
384 | #'
385 | #' res <-  APL:::comp_std_residuals(mat=cnts)
386 | #' SVD <- svd(res$S)
387 | #' names(SVD) <- c("D", "U", "V")
388 | #' SVD <- SVD[c(2, 1, 3)]
389 | #'
390 | #' ca <- new_cacomp(U = SVD$U,
391 | #'                  V = SVD$V,
392 | #'                  D = SVD$D,
393 | #'                  row_masses = res$rowm,
394 | #'                  col_masses = res$colm)
395 | new_cacomp <- function(...) new("cacomp",...)
396 | 
397 | 
398 | #' Access slots in a cacomp object
399 | #' 
400 | #' @param caobj a cacomp object
401 | #' @param slot slot to return
402 | #' @returns Chosen slot of the cacomp object
403 | #' @examples 
404 | #' # Simulate scRNAseq data.
405 | #' cnts <- data.frame(cell_1 = rpois(10, 5),
406 | #'                    cell_2 = rpois(10, 10),
407 | #'                    cell_3 = rpois(10, 20))
408 | #' rownames(cnts) <- paste0("gene_", 1:10)
409 | #' cnts <- as.matrix(cnts)
410 | #'
411 | #' # Run correspondence analysis.
412 | #' ca <- cacomp(obj = cnts, princ_coords = 3, top = 5)
413 | #' 
414 | #' # access left singular vectors
415 | #' cacomp_slot(ca, "U")
416 | #' 
417 | #' @export
418 | cacomp_slot <- function(caobj, slot){
419 |   stopifnot(slot %in% slotNames(caobj))
420 |   
421 |   return(slot(caobj, slot))
422 | }
423 | 
424 | #' Prints slot names of cacomp object
425 | #' 
426 | #' @param caobj a cacomp object
427 | #' @returns Prints slot names of cacomp object
428 | #' @examples 
429 | #' # Simulate scRNAseq data.
430 | #' cnts <- data.frame(cell_1 = rpois(10, 5),
431 | #'                    cell_2 = rpois(10, 10),
432 | #'                    cell_3 = rpois(10, 20))
433 | #' rownames(cnts) <- paste0("gene_", 1:10)
434 | #' cnts <- as.matrix(cnts)
435 | #'
436 | #' # Run correspondence analysis.
437 | #' ca <- cacomp(obj = cnts, princ_coords = 3, top = 5)
438 | #' 
439 | #' # show slot names:
440 | #' cacomp_names(ca)
441 | #' 
442 | #' @export
443 | cacomp_names <- function(caobj){
444 |   slotNames(caobj)
445 | }
446 | 
447 | 
448 | 
449 | # Left here for potential future inclusion:
450 | #
451 | #' #' Subset rows and columns of a cacomp object.
452 | #' #' 
453 | #' #' @param x cacomp object
454 | #' #' @param i rows to subset to.
455 | #' #' @param j columns to subset to.
456 | #' #' @param drop Whether or not to coerce to the lowest possible dimension. Should
457 | #' #' be FALSE!
458 | #' #' @param ... Furhter arguments
459 | #' #' 
460 | #' #' @returns 
461 | #' #' Returns a cacomp object with rows and columns subsetted.
462 | #' #' @export
463 | #' setMethod(
464 | #'   f = "[",
465 | #'   signature="cacomp",
466 | #'   definition=function(x, i=NULL, j=NULL,...,drop=FALSE){ 
467 | #'     if (is.null(i)) i <- seq_len(nrow(x@U))
468 | #'     if (is.null(j)) j <- seq_len(nrow(x@V))
469 | #' 
470 | #'     initialize(x,            
471 | #'            U = x@U[i,],
472 | #'            V = x@V[j,],
473 | #'            D = x@D,
474 | #'            std_coords_rows = x@std_coords_rows[i,],
475 | #'            std_coords_cols = x@std_coords_cols[j,],
476 | #'            prin_coords_rows = if(!is.empty(x@prin_coords_rows)) x@prin_coords_rows[i,] else matrix(0, 0, 0),
477 | #'            prin_coords_cols = if(!is.empty(x@prin_coords_cols)) x@prin_coords_cols[j,] else matrix(0, 0, 0),
478 | #'            apl_rows = if(!is.empty(x@apl_rows)) x@apl_rows[i,] else matrix(0, 0, 0),
479 | #'            apl_cols = if(!is.empty(x@apl_cols)) x@apl_cols[j,] else matrix(0, 0, 0),
480 | #'            APL_score = if(!is.empty(x@APL_score)) x@APL_score[which(x@APL_score$Row_num %in% i),] else data.frame(),
481 | #'            dims = x@dims,
482 | #'            group = intersect(x@group, j),
483 | #'            row_masses = x@row_masses[i],
484 | #'            col_masses = x@col_masses[j],
485 | #'            top_rows = min(length(i), x@top_rows),
486 | #'            tot_inertia = sum(x@row_inertia[i]),
487 | #'            row_inertia = x@row_inertia[i],
488 | #'            col_inertia = x@col_inertia[j],
489 | #'            permuted_data = list()
490 | #'          )
491 | #'   }
492 | #' )


--------------------------------------------------------------------------------
/vignettes/APL.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Analyzing data with APL"
  3 | author:
  4 | - name: Elzbieta Gralinska
  5 |   affiliation: Max Planck Institute for Molecular Genetics, Berlin, Germany
  6 |   email: gralinska@molgen.mpg.de
  7 | - name: Clemens Kohl
  8 |   affiliation: Max Planck Institute for Molecular Genetics, Berlin, Germany
  9 |   email: kohl@molgen.mpg.de
 10 | - name: Martin Vingron
 11 |   affiliation: Max Planck Institute for Molecular Genetics, Berlin, Germany
 12 |   email: vingron@molgen.mpg.de
 13 | package: APL
 14 | output:
 15 |   BiocStyle::html_document
 16 | abstract: |
 17 |   This package performs correspondence analysis (CA) and allows to identify cluster-specific genes using Association Plots (AP).
 18 |   Additionally, APL computes the cluster-specificity scores for all genes which allows to rank the genes by their specificity for a selected cell cluster of interest.
 19 | vignette: |
 20 |   %\VignetteIndexEntry{Analyzing data with APL}
 21 |   %\VignetteEncoding{UTF-8}
 22 |   %\VignetteEngine{knitr::rmarkdown}
 23 | editor_options:
 24 |   markdown:
 25 |     wrap: sentence
 26 | ---
 27 | 
 28 | ```{r, echo = FALSE}
 29 | knitr::opts_chunk$set(collapse = TRUE, comment = "#>", results = "hold")
 30 | ```
 31 | 
 32 | # Introduction
 33 | 
 34 | "APL" is a package developed for computation of Association Plots, a method for visualization and analysis of single cell transcriptomics data.
 35 | The main focus of "APL" is the identification of genes characteristic for individual clusters of cells from input data.
 36 | 
 37 | When working with `r BiocStyle::Rpackage("APL")` package please cite:
 38 | 
 39 | 	Gralinska, E., Kohl, C., Fadakar, B. S., & Vingron, M. (2022).
 40 | 	Visualizing Cluster-specific Genes from Single-cell Transcriptomics Data Using Association Plots.
 41 | 	Journal of Molecular Biology, 434(11), 167525.
 42 | 
 43 | A citation can also be obtained in R by running `citation("APL")`.
 44 | For a mathematical description of the method, please refer to the manuscript.
 45 | 
 46 | # Installation
 47 | 
 48 | To install the `r BiocStyle::Rpackage("APL")` from Bioconductor, run:
 49 | 
 50 | ```{r bioc_install, eval=FALSE}
 51 | if (!requireNamespace("BiocManager", quietly = TRUE)) {
 52 |   install.packages("BiocManager")
 53 | }
 54 | 
 55 | BiocManager::install("APL")
 56 | ```
 57 | 
 58 | Alternatively the package can also be installed from GitHub:
 59 | 
 60 | ```{r git_install, eval=FALSE}
 61 | library(devtools)
 62 | install_github("VingronLab/APL")
 63 | ```
 64 | 
 65 | To additionally build the package vignette, run instead
 66 | 
 67 | ```{r git_vignette, eval=FALSE}
 68 | install_github("VingronLab/APL", build_vignettes = TRUE, dependencies = TRUE)
 69 | ```
 70 | 
 71 | Building the vignette will however take considerable time.
 72 | 
 73 | ## Changes regarding python dependencies
 74 | 
 75 | Previous versions of `r BiocStyle::Rpackage("APL")` used pytorch SVD to speed up the computation of the full SVD. This has been deprecated in favor of fast truncated SVD implementations starting with Version 1.10.1.
 76 | Calling `runAPL` or `cacomp` with `python = TRUE` will not lead to an error, but only issue a warning.
 77 | If you still want to perform a full SVD, set the dimensions to rank of the matrix. Until a faster replacement is identified, this computation will be performed by the rather slow base R svd and should therefore not be done on very large matrices.
 78 | The default number of dimensions now defaults to half of the rank of the matrix.
 79 | 
 80 | # Preprocessing
 81 | 
 82 | ## Setup
 83 | 
 84 | In this vignette we will use a small data set published by [Darmanis et al. (2015)](https://doi.org/10.1073/pnas.1507125112) consisting of 466 human adult cortical single cells sequenced on the Fluidigm platform as an example.
 85 | To obtain the data necessary to follow the vignette we use the Bioconductor package `r BiocStyle::Biocpkg("scRNAseq")`.
 86 | 
 87 | Besides the package `r BiocStyle::Rpackage("APL")` we will use Bioconductor packages to preprocess the data.
 88 | Namely we will use `r BiocStyle::Biocpkg("SingleCellExperiment")`, `r BiocStyle::Biocpkg("scater")` and `r BiocStyle::Biocpkg("scran")`.
 89 | However, the preprocessing could equally be performed with the single-cell RNA-seq analysis suite `r BiocStyle::CRANpkg("Seurat")`.
 90 | 
 91 | The preprocessing steps are performed according to the recommendations published in [Orchestrating Single-Cell Analysis with Bioconductor](https://bioconductor.org/books/release/OSCA/) by Amezquita *et al.* (2022).
 92 | For more information about the rational behind them please refer to the book.
 93 | 
 94 | ```{r setup, message=FALSE, warning=FALSE}
 95 | library(APL)
 96 | library(scRNAseq)
 97 | library(SingleCellExperiment)
 98 | library(scran)
 99 | library(scater)
100 | set.seed(1234)
101 | ```
102 | 
103 | ## Loading the data
104 | 
105 | We start with the loading and preprocessing of the Darmanis data.
106 | 
107 | ```{r load_data}
108 | darmanis <- DarmanisBrainData()
109 | darmanis
110 | ```
111 | 
112 | 
113 | ## Normalization, PCA & Clustering
114 | 
115 | Association Plots from `r BiocStyle::Rpackage("APL")` should be computed based on the normalized expression data.
116 | Therefore, we first normalize the counts from the Darmanis data and calculate both PCA and UMAP for visualizations later.
117 | 
118 | For now, `r BiocStyle::Rpackage("APL")` requires the data to be clustered beforehand. The darmanis data comes already annotated, so we will use the cell types stored in the `cell.type` metadata column instead of performing a clustering.
119 | 
120 | ```{r preprocess}
121 | set.seed(100)
122 | clust <- quickCluster(darmanis)
123 | darmanis <- computeSumFactors(darmanis, cluster = clust, min.mean = 0.1)
124 | darmanis <- logNormCounts(darmanis)
125 | 
126 | dec <- modelGeneVar(darmanis)
127 | top_darmanis <- getTopHVGs(dec, n = 5000)
128 | darmanis <- fixedPCA(darmanis, subset.row = top_darmanis)
129 | darmanis <- runUMAP(darmanis, dimred = "PCA")
130 | 
131 | plotReducedDim(darmanis, dimred = "UMAP", colour_by = "cell.type")
132 | ```
133 | 
134 | # Quick start
135 | 
136 | The fastest way to compute the Association Plot for a selected cluster of cells from the input data is by using a wrapper function `runAPL()`.
137 | `runAPL()` automates most of the analysis steps for ease of use.
138 | 
139 | For example, to generate an Association Plot for the oligodendrocytes we can use the following command:
140 | 
141 | ```{r runAPL}
142 | runAPL(
143 |   darmanis,
144 |   assay = "logcounts",
145 |   top = 5000,
146 |   group = which(darmanis$cell.type == "oligodendrocytes"),
147 |   type = "ggplot"
148 | )
149 | ```
150 | 
151 | The generated Association Plot is computed based on the log-normalized count matrix.
152 | By default `runAPL` uses the top 5,000 most variable genes in the data, but the data can be subset to any number of genes by changing the value for the argument `top`.
153 | The dimensionality of the CA is determined automatically by the elbow rule described below (see [here](#dim_reduc)).
154 | This default behavior can be overriden by setting the dimensions manually (parameter `dims`).
155 | The cluster-specificity score ($S_\alpha$) for each gene is also calculated (`score = TRUE`).
156 | In order to better explore the data, `type` can be set to `"plotly"` to obtain an interactive plot.
157 | `runAPL` has many arguments to further customize the output and fine tune the calculations.
158 | Please refer to the documentation (`?runAPL`) for more information.
159 | The following sections in this vignette will discuss the choice of dimensionality and the $S_\alpha$-score.
160 | 
161 | # Step-by-step way of computing Association Plots
162 | 
163 | Alternatively, Association Plots can be computed step-by-step.
164 | This allows to adjust the Association Plots to user's needs.
165 | Below we explain each step of the process of generating Association Plots.
166 | 
167 | ## Correspondence Analysis
168 | 
169 | The first step of Association Plot computations is correspondence analysis (CA).
170 | CA is a data dimensionality reduction method similar to PCA, however it allows for a simultaneous embedding of both cells and genes from the input data in the same space.
171 | In this example we perform CA on the log-normalized count matrix of the darmanis brain data.
172 | 
173 | ```{r cacomp}
174 | # Computing CA on logcounts
175 | logcounts <- logcounts(darmanis)
176 | ca <- cacomp(
177 |   obj = logcounts,
178 |   top = 5000
179 | )
180 | 
181 | # The above is equivalent to:
182 | # ca <- cacomp(obj = darmanis,
183 | #              assay = "logcounts",
184 | #              top = 5000)
185 | ```
186 | 
187 | The function `cacomp` accepts as an input any matrix with non-negative entries, be it a single-cell RNA-seq, bulk RNA-seq or other data.
188 | For ease of use, `cacomp` accepts also `r BiocStyle::Biocpkg("SingleCellExperiment")` and `r BiocStyle::CRANpkg("Seurat")` objects, however for these we additionally have to specify via the `assay` and/or `slot` (for Seurat) parameter from where to extract the data.
189 | Importantly, in order to ensure the interpretability of the results `cacomp` (and related functions such as `runAPL`) requires that the input matrix contains both row and column names.
190 | 
191 | When performing a feature selection before CA, we can set the argument `top` to the desired number of genes with the highest variance across cells from the input data to retain for further analysis.
192 | By default, only the top 5,000 most variable genes are kept as a good compromise between computational time and keeping the most relevant genes. If we want to ensure however that even marker genes of smaller clusters are kept, we can increase the number of genes.
193 | <!-- For this vignette we keep all genes in the data to ensure that also genes from smaller clusters such as the platelets are kept. -->
194 | <!-- In many cases however 5,000 genes are a good compromise between computational time and keeping most relevant genes. -->
195 | 
196 | The output of `cacomp` is an object of class `cacomp`:
197 | 
198 | ```{r print_cacomp}
199 | ca
200 | ```
201 | 
202 | As can be seen in the summarized output, by default both types of coordinates in the CA space (principal and standardized) are calculated.
203 | Once the coordinates for the Association Plot are calculated, they will also be shown in the output of `cacomp`.
204 | Slots are accessed through an accessor function:
205 | 
206 | ```{r std_coords}
207 | cacomp_slot(ca, "std_coords_cols")[1:5, 1:5]
208 | ```
209 | 
210 | In the case of `r BiocStyle::Biocpkg("SingleCellExperiment")` and `r BiocStyle::CRANpkg("Seurat")` objects, we can alternatively set `return_input = TRUE` to get the input object back, with the CA results computed by "APL" and stored in the appropriate slot for dimension reduction.
211 | This also allows for using the plotting functions that come with these packages:
212 | 
213 | ```{r ca_pbmc}
214 | darmanis <- cacomp(
215 |   obj = darmanis,
216 |   assay = "logcounts",
217 |   top = 5000,
218 |   return_input = TRUE
219 | )
220 | 
221 | plotReducedDim(darmanis,
222 |   dimred = "CA",
223 |   ncomponents = c(1, 2),
224 |   colour_by = "cell.type"
225 | )
226 | plotReducedDim(darmanis,
227 |   dimred = "CA",
228 |   ncomponents = c(3, 4),
229 |   colour_by = "cell.type"
230 | )
231 | ```
232 | 
233 | However, some functions such as apl_coords() require information that cannot be stored in the single-cell container objects.
234 | It is therefore often easier to work with a `cacomp` object instead.
235 | We can convert `r BiocStyle::CRANpkg("Seurat")` or `r BiocStyle::Biocpkg("SingleCellExperiment")` objects which have CA results stored to a `cacomp` object using the function `as.cacomp()`:
236 | 
237 | ```{r convert}
238 | # Converting the object darmanis to cacomp
239 | ca <- as.cacomp(darmanis)
240 | ```
241 | 
242 | ## Reducing the number of CA dimensions {#dim_reduc}
243 | 
244 | When working with high-dimensional data, after singular value decomposition there will often be many dimensions which are representing the noise in the data.
245 | In order to minimize the noise, it is generally recommended to reduce the dimensionality of the data before generating Association Plots.
246 | 
247 | The number of dimensions to retain can be computed using the function `pick_dims`.
248 | This function offers three standard methods which we implemented:
249 | 
250 | -   elbow rule (`method = "elbow_rule"`) - the number of dimensions to retain is calculated based on scree plots generated for randomized data, and corresponds to a point in the plot where the band of randomized singular values enters the band of the original singular values,
251 | 
252 | -   80% rule (`method = "maj_inertia"`) - only those first dimensions are retained which in total account for >= 80% of total inertia,
253 | 
254 | -   average rule (`method = "avg_inertia"`) - only those dimensions are retained which account for more inertia than a single dimension on average.
255 | 
256 | Additionally, the user can compute a scree plot to choose the number of dimensions by themselves:
257 | 
258 | ```{r scree_plot}
259 | pick_dims(ca, method = "scree_plot") +
260 |   xlim(c(0, 20))
261 | ```
262 | 
263 | In the scree plot above we can see that the first dimension explains only \~1% of the total inertia and we observe the "jump" in the scree plot at roughly 5 dimensions.
264 | The first dimensions however explain only a small amount of the total inertia.
265 | 
266 | Here we compute the number of dimensions using the elbow rule.
267 | For demonstration, only three data permutations are computed:
268 | 
269 | ```{r pick_dims, results = "hide"}
270 | pd <- pick_dims(
271 |   ca,
272 |   mat = logcounts(darmanis),
273 |   method = "elbow_rule",
274 |   reps = 3
275 | )
276 | ```
277 | 
278 | ```{r show_dims, message=FALSE}
279 | pd
280 | ```
281 | 
282 | In this case the elbow rule leads to a higher number of dimensions.
283 | 
284 | ```{r expl_inert}
285 | # Compute the amount of inertia explained by each of the dimensions
286 | D <- cacomp_slot(ca, "D")
287 | expl_inertia <- (D^2 / sum(D^2)) * 100
288 | 
289 | # Compute the amount of intertia explained
290 | # by the number of dimensions defined by elbow rule
291 | sum(expl_inertia[seq_len(pd)])
292 | ```
293 | 
294 | In this example the elbow rule suggests to keep `r pd` dimensions that explain `r round(sum(expl_inertia[seq_len(pd)]),2)`% of the total inertia from the data.
295 | 
296 | Finally, we can reduce the dimensionality of the data to the desired number of dimensions:
297 | 
298 | ```{r subset_dims}
299 | ca <- subset_dims(ca, dims = pd)
300 | ```
301 | 
302 | ## Association Plots
303 | 
304 | When working with single-cell transcriptomics data we are often interested in which genes are associated to a cluster of cells.
305 | To reveal such genes we can compute an Association Plot for a selected cluster of cells.
306 | In the following example we want to generate an Association Plot for the cluster of endothelial cells:
307 | 
308 | ```{r apl_platelets}
309 | # Specifying a cell cluster of interest
310 | endo <- which(darmanis$cell.type == "endothelial")
311 | 
312 | # Calculate Association Plot coordinates for endothelial cells
313 | ca <- apl_coords(ca, group = endo)
314 | ```
315 | 
316 | After computing the coordinates of genes and cells in the Association Plot we are able to plot the results using the `apl` function.
317 | 
318 | ```{r apl_platelets_plot, fig.wide = TRUE}
319 | # endothelial marker genes
320 | marker_genes <- c("APOLD1", "TM4SF1", "SULT1B1", "ESM1", "SELE")
321 | 
322 | # Plot APL
323 | apl(ca,
324 |   row_labs = TRUE,
325 |   rows_idx = marker_genes,
326 |   type = "ggplot"
327 | ) # type = "plotly" for an interactive plot
328 | ```
329 | 
330 | In the Association Plot all genes are represented by blue circles.
331 | The further to the right a gene is located the more associated it is with the chosen cluster of cells and the lower the y-axis value, the more specific it is for the selected cluster.
332 | Additionally, it is possible to highlight in the Association Plot any set of genes.
333 | In the example above we highlighted five genes (APOLD1, TM4SF1, SULT1B1, ESM1, SELE) which are known to be marker genes for endothelial cells.
334 | As we can see in the plot, they are located in the right part of the plot, which confirms their specificity for endothelial cells.
335 | 
336 | By default we plot only the genes in the Association Plot.
337 | To also display the cells in the Association Plot, use the argument `show_cols = TRUE`.
338 | This way we can identify other cells which show similar expression profiles to the cells of interest.
339 | Cells that belong to the cluster of interest will be colored in red, and all remaining cells will be colored in violet.
340 | Furthermore, an interactive plot in which you can hover over genes to see their name can be created by setting `type = "plotly"`.
341 | 
342 | ## Association Plots with the $S_\alpha$-scores
343 | 
344 | The $S_\alpha$-score allows us to rank genes by their specificity for a selected cell cluster, and is computed for each gene from the Association Plot separately.
345 | The higher the $S_\alpha$-score of a gene, the more characteristic its expression for the investigated cell cluster.
346 | The $S_\alpha$-scores can be computed using the `apl_score` function.
347 | To display the $S_\alpha$-scores in the Association Plot, we can use the argument `show_score = TRUE` in the `apl` function:
348 | 
349 | ```{r apl_score, results = "hide"}
350 | # Compute S-alpha score
351 | # For the calculation the input matrix is also required.
352 | ca <- apl_score(ca,
353 |   mat = logcounts(darmanis),
354 |   reps = 5
355 | )
356 | ```
357 | 
358 | ```{r apl_plot_platelets, fig.wide = TRUE}
359 | apl(ca,
360 |   show_score = TRUE,
361 |   type = "ggplot"
362 | )
363 | ```
364 | 
365 | By default, only genes that have a $S_\alpha$-score larger than 0 are colored as these tend to be genes of interest and we consider them as cluster-specific genes.
366 | This cutoff can be easily changed through the `score_cutoff` argument to `apl()`.
367 | 
368 | The $S_\alpha$-scores are stored in the `"APL_score"` slot and can be accessed as follows:
369 | 
370 | ```{r print_score}
371 | head(cacomp_slot(ca, "APL_score"))
372 | ```
373 | 
374 | To see the expression of genes with the highest $S_\alpha$-scores (or any selected genes) across all cell types from the data we can use plotting functions  provided by `r BiocStyle::CRANpkg("scater")`:
375 | 
376 | ```{r seurat_apl, fig.wide = TRUE}
377 | scores <- cacomp_slot(ca, "APL_score")
378 | 
379 | plotExpression(darmanis,
380 |   features = head(scores$Rowname, 3),
381 |   x = "cell.type",
382 |   colour_by = "cell.type"
383 | )
384 | 
385 | plotReducedDim(darmanis,
386 |   dimred = "UMAP",
387 |   colour_by = scores$Rowname[1]
388 | )
389 | ```
390 | 
391 | As expected, the 3 most highly scored genes are over-expressed in the endothelial cells. Due to the small size of the data set and number of cells in the cluster (only 20 out of 466 cells are endothelial cells) some cluster specific genes are only expressed in a few cells. Most data sets nowadays are significantly larger so this should not be a major concern and it can further be mitigated by performing a more stringent feature selection before CA.
392 | 
393 | ## Visualization of CA
394 | 
395 | In addition to Association Plots "APL" produces also other forms of the output.
396 | For instance, we can use "APL" to generate a two- and three-dimensional correspondence analysis projection of the data.
397 | The so-called biplot visualizes both cells and genes from the input data and can be created using the function `ca_biplot`.
398 | Alternatively, a three-dimensional data projection plot can be generated using the function `ca_3Dplot`.
399 | To generate such biplots a `cacomp` object is required.
400 | 
401 | ```{r biplot, fig.wide = TRUE}
402 | # Specifying a cell cluster of interest
403 | endo <- which(darmanis$cell.type == "endothelial")
404 | 
405 | # Creating a static plot
406 | ca_biplot(ca, col_labels = endo, type = "ggplot")
407 | 
408 | # Creating an interactive plot
409 | # ca_biplot(ca, type = "plotly", col_labels = platelets)
410 | 
411 | # 3D plot
412 | # ca_3Dplot(ca, col_labels = platelets)
413 | ```
414 | 
415 | The above described plots give us a quick overview of the first 2 dimensions of the data (more dimensions can be plotted).
416 | As shown in the commented-out code, to interactively explore the projection of the data `type = "plotly"` can be set.
417 | 
418 | # APL and GO enrichment analysis
419 | 
420 | After computing an Association Plot and identifying a set of genes specific for a selected cluster of cells we might be interested in conducting a Gene Ontology (GO) enrichment analysis of the identified gene set.
421 | To conduct a GO enrichment analysis of microglia specific genes as idenitfied using an Association Plot, we first need to compute the coordinates of the genes in the Association Plot for microglia cells, as well as the $S_\alpha$-score for each gene:
422 | 
423 | ```{r cluster_three, results="hide"}
424 | # Get indices of microglia cells
425 | microglia <- which(darmanis$cell.type == "microglia")
426 | 
427 | # Calculate Association Plot coordinates of the genes and the $S_\alpha$-scores
428 | ca <- apl_coords(ca, group = microglia)
429 | 
430 | ca <- apl_score(ca,
431 |   mat = logcounts(darmanis),
432 |   reps = 5
433 | )
434 | ```
435 | 
436 | Now we can conduct GO enrichment analysis as implemented in the package `r BiocStyle::Biocpkg("topGO")` using the most cluster-specific genes from the Association Plot.
437 | By default we use all genes with an $S_\alpha$-score higher than 0, but the cutoff may have to be adjusted depending on the dataset.
438 | In the example below we restrict it to genes with a $S_\alpha$-score higher than 1 to restrict it to truly significant genes.
439 | In case that no $S_\alpha$-scores were calculated, one can also choose to use the `ngenes` (by default 1000) genes with the highest x-coordinates by setting `use_coords = TRUE`.
440 | 
441 | ```{r topGO, message=FALSE}
442 | enr <- apl_topGO(ca,
443 |   ontology = "BP",
444 |   organism = "hs",
445 |   score_cutoff = 1
446 | )
447 | head(enr)
448 | ```
449 | 
450 | The function `plot_enrichment()` was implemented to visualize the `topGO` results in form of a dotplot.
451 | 
452 | ```{r topGO_plot, message=FALSE}
453 | plot_enrichment(enr)
454 | ```
455 | Microglia cells are innate immune cells of the brain and as such the most highly scored genes are enriched in gene sets related to the immune response and microglia specific gene sets as one would expect.
456 | 
457 | 
458 | # Session info {.unnumbered}
459 | 
460 | ```{r sessionInfo, echo=FALSE}
461 | sessionInfo()
462 | ```
463 | 


--------------------------------------------------------------------------------