├── .Rbuildignore ├── .bumpversion.cfg ├── .github ├── ISSUE_TEMPLATE │ └── issue_template.md └── workflows │ └── check-bioc.yml ├── .gitignore ├── DESCRIPTION ├── LICENSE ├── NAMESPACE ├── NEWS.md ├── R ├── decoupleR-consensus.R ├── decoupleR-decouple.R ├── decoupleR-package.R ├── decoupleR-pre.R ├── statistic-aucell.R ├── statistic-fgsea.R ├── statistic-gsva.R ├── statistic-mdt.R ├── statistic-mlm.R ├── statistic-ora.R ├── statistic-udt.R ├── statistic-ulm.R ├── statistic-viper.R ├── statistic-wmean.R ├── statistic-wsum.R ├── statistic-zscore.R ├── utils-dataset-converters.R ├── utils-decoupler-formats.R ├── utils-omnipath.R ├── utils-pipe.R ├── utils-profiles.R ├── utils-randomize-matrix.R └── utils-tidy-eval.R ├── README.Rmd ├── README.md ├── _pkgdown.yml ├── codecov.yml ├── inst ├── CITATION ├── extdata │ ├── bk_data.rds │ └── sc_data.rds ├── figures │ ├── graphical_abstract.png │ ├── logo.svg │ └── net_plot.png └── testdata │ ├── README.md │ ├── generate_expected_output.R │ ├── inputs │ ├── mat.rds │ └── net.rds │ └── outputs │ ├── aucell │ └── output-aucell.rds │ ├── decouple │ └── output-decouple.rds │ ├── fgsea │ └── output-fgsea.rds │ ├── gsva │ └── output-gsva.rds │ ├── mdt │ └── output-mdt.rds │ ├── mlm │ └── output-mlm.rds │ ├── ora │ └── output-ora.rds │ ├── udt │ └── output-udt.rds │ ├── ulm │ └── output-ulm.rds │ ├── viper │ └── output-viper.rds │ ├── wmean │ └── output-wmean.rds │ └── wsum │ └── output-wsum.rds ├── man ├── check_corr.Rd ├── convert_f_defaults.Rd ├── decouple.Rd ├── decoupleR-package.Rd ├── dot-decoupler_mat_format.Rd ├── dot-decoupler_network_format.Rd ├── dot-fit_preprocessing.Rd ├── extract_sets.Rd ├── figures │ └── logo.svg ├── filt_minsize.Rd ├── get_collectri.Rd ├── get_dorothea.Rd ├── get_ksn_omnipath.Rd ├── get_profile_of.Rd ├── get_progeny.Rd ├── get_resource.Rd ├── get_toy_data.Rd ├── intersect_regulons.Rd ├── pipe.Rd ├── pivot_wider_profile.Rd ├── randomize_matrix.Rd ├── rename_net.Rd ├── run_aucell.Rd ├── run_consensus.Rd ├── run_fgsea.Rd ├── run_gsva.Rd ├── run_mdt.Rd ├── run_mlm.Rd ├── run_ora.Rd ├── run_udt.Rd ├── run_ulm.Rd ├── run_viper.Rd ├── run_wmean.Rd ├── run_wsum.Rd ├── run_zscore.Rd ├── show_methods.Rd ├── show_resources.Rd └── tidyeval.Rd ├── pkgdown ├── extra.css └── favicon │ ├── apple-touch-icon-120x120.png │ ├── apple-touch-icon-152x152.png │ ├── apple-touch-icon-180x180.png │ ├── apple-touch-icon-60x60.png │ ├── apple-touch-icon-76x76.png │ ├── apple-touch-icon.png │ ├── favicon-16x16.png │ ├── favicon-32x32.png │ └── favicon.ico ├── tests ├── testthat.R └── testthat │ ├── test-decoupleR-decouple.R │ ├── test-decoupleR-pre.R │ ├── test-omnipath.R │ ├── test-statistic-aucell.R │ ├── test-statistic-fgsea.R │ ├── test-statistic-gsva.R │ ├── test-statistic-mdt.R │ ├── test-statistic-mlm.R │ ├── test-statistic-ora.R │ ├── test-statistic-udt.R │ ├── test-statistic-ulm.R │ ├── test-statistic-viper.R │ ├── test-statistic-wmean.R │ ├── test-statistic-wsum.R │ ├── test-statistic-zscore.R │ ├── test-utils-dataset-converters.R │ └── test-utils-profiles.R └── vignettes ├── .gitignore ├── decoupleR.Rmd ├── pw_bk.Rmd ├── pw_sc.Rmd ├── tf_bk.Rmd └── tf_sc.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^renv 2 | ^renv\.lock$ 3 | ^packrat/ 4 | ^\.Rprofile$ 5 | ^.*\.Rproj$ 6 | ^\.Rproj\.user$ 7 | ^\.travis\.yml$ 8 | ^README\.Rmd$ 9 | ^_pkgdown\.yml$ 10 | ^docs 11 | ^doc 12 | ^omnipathr-log 13 | ^pkgdown 14 | ^codecov\.yml$ 15 | ^\.github 16 | ^\.git 17 | ^\.bumpversion\.cfg$ 18 | -------------------------------------------------------------------------------- /.bumpversion.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 2.9.7 3 | commit = True 4 | tag = True 5 | files = DESCRIPTION 6 | parse = (?P\d+)\.(?P\d+)\.(?P\d+) 7 | serialize = {major}.{minor}.{patch} 8 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/issue_template.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report or feature request 3 | about: Describe a bug you've seen or make a case for a new feature 4 | --- 5 | 6 | Please briefly describe your problem and what output you expect. 7 | 8 | Please include a minimal reproducible example (AKA a reprex). If you've never heard of a [reprex](http://reprex.tidyverse.org/) before, start by reading . 9 | 10 | Brief description of the problem 11 | 12 | ```r 13 | # insert reprex here 14 | ``` 15 | -------------------------------------------------------------------------------- /.github/workflows/check-bioc.yml: -------------------------------------------------------------------------------- 1 | ## Read more about GitHub actions the features of this GitHub Actions workflow 2 | ## at https://lcolladotor.github.io/biocthis/articles/biocthis.html#use_bioc_github_action 3 | ## 4 | ## For more details, check the biocthis developer notes vignette at 5 | ## https://lcolladotor.github.io/biocthis/articles/biocthis_dev_notes.html 6 | ## 7 | ## You can add this workflow to other packages using: 8 | ## > biocthis::use_bioc_github_action() 9 | ## 10 | ## Using GitHub Actions exposes you to many details about how R packages are 11 | ## compiled and installed in several operating system.s 12 | ### If you need help, please follow the steps listed at 13 | ## https://github.com/r-lib/actions#where-to-find-help 14 | ## 15 | ## If you found an issue specific to biocthis's GHA workflow, please report it 16 | ## with the information that will make it easier for others to help you. 17 | ## Thank you! 18 | 19 | ## Acronyms: 20 | ## * GHA: GitHub Action 21 | ## * OS: operating system 22 | 23 | on: 24 | push: 25 | pull_request: 26 | 27 | name: R-CMD-check-bioc 28 | 29 | ## These environment variables control whether to run GHA code later on that is 30 | ## specific to testthat, covr, and pkgdown. 31 | ## 32 | ## If you need to clear the cache of packages, update the number inside 33 | ## cache-version as discussed at https://github.com/r-lib/actions/issues/86. 34 | ## Note that you can always run a GHA test without the cache by using the word 35 | ## "/nocache" in the commit message. 36 | env: 37 | has_testthat: 'true' 38 | run_covr: 'true' 39 | run_pkgdown: 'true' 40 | has_RUnit: 'false' 41 | cache-version: 'cache-v1' 42 | 43 | jobs: 44 | build-check: 45 | runs-on: ${{ matrix.config.os }} 46 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 47 | container: ${{ matrix.config.cont }} 48 | ## Environment variables unique to this job. 49 | 50 | strategy: 51 | fail-fast: false 52 | matrix: 53 | config: 54 | - { 55 | os: ubuntu-latest, 56 | r: 'next', 57 | bioc: '3.19', 58 | cont: "bioconductor/bioconductor_docker:devel", 59 | rspm: "https://packagemanager.rstudio.com/cran/__linux__/jammy/latest" 60 | } 61 | - { os: macOS-latest, r: 'next', bioc: '3.19'} 62 | - { os: windows-latest, r: 'next', bioc: '3.19'} 63 | env: 64 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true 65 | RSPM: ${{ matrix.config.rspm }} 66 | NOT_CRAN: true 67 | TZ: UTC 68 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 69 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 70 | 71 | steps: 72 | 73 | ## Most of these steps are the same as the ones in 74 | ## https://github.com/r-lib/actions/blob/master/examples/check-standard.yaml 75 | ## If they update their steps, we will also need to update ours. 76 | - name: Checkout Repository 77 | uses: actions/checkout@v4 78 | 79 | ## R is already included in the Bioconductor docker images 80 | - name: Setup R from r-lib 81 | if: runner.os != 'Linux' 82 | uses: r-lib/actions/setup-r@v2 83 | with: 84 | r-version: ${{ matrix.config.r }} 85 | 86 | ## pandoc is already included in the Bioconductor docker images 87 | - name: Setup pandoc from r-lib 88 | if: runner.os != 'Linux' 89 | uses: r-lib/actions/setup-pandoc@v2 90 | 91 | - name: Query dependencies 92 | run: | 93 | install.packages('remotes') 94 | saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) 95 | shell: Rscript {0} 96 | 97 | - name: Install Linux system dependencies 98 | if: runner.os == 'Linux' 99 | run: | 100 | sysreqs=$(Rscript -e 'cat("apt-get update -y && apt-get install -y", paste(gsub("apt-get install -y ", " qpdf ", remotes::system_requirements("ubuntu", "22.04")), collapse = " "))') 101 | echo $sysreqs 102 | sudo -s eval "$sysreqs" 103 | 104 | - name: Install macOS system dependencies 105 | if: matrix.config.os == 'macOS-latest' 106 | run: | 107 | ## Enable installing XML from source if needed 108 | brew install libxml2 109 | echo "XML_CONFIG=/usr/local/opt/libxml2/bin/xml2-config" >> $GITHUB_ENV 110 | 111 | ## Required to install magick as noted at 112 | ## https://github.com/r-lib/usethis/commit/f1f1e0d10c1ebc75fd4c18fa7e2de4551fd9978f#diff-9bfee71065492f63457918efcd912cf2 113 | brew install imagemagick 114 | 115 | ## For textshaping, required by ragg, and required by pkgdown 116 | brew install harfbuzz fribidi 117 | 118 | ## Helps compile RCurl from source 119 | brew uninstall --ignore-dependencies curl 120 | 121 | - name: Install Windows system dependencies 122 | if: runner.os == 'Windows' 123 | run: | 124 | ## Edit below if you have any Windows system dependencies 125 | shell: Rscript {0} 126 | 127 | - name: Install dependencies 128 | uses: r-lib/actions/setup-r-dependencies@v2 129 | with: 130 | extra-packages: | 131 | any::BiocManager 132 | any::BiocGenerics 133 | any::covr 134 | any::pkgdown 135 | any::rcmdcheck 136 | any::BiocCheck 137 | upgrade: "TRUE" 138 | 139 | - name: Set BiocVersion 140 | run: | 141 | BiocManager::install(version = "${{ matrix.config.bioc }}", ask = FALSE) 142 | shell: Rscript {0} 143 | 144 | - name: Run CMD check 145 | uses: r-lib/actions/check-r-package@v2 146 | with: 147 | error-on: '"error"' 148 | build_args: 'c("--no-manual", "--no-build-vignettes")' 149 | 150 | ## Might need an to add this to the if: && runner.os == 'Linux' 151 | - name: Reveal testthat details 152 | if: env.has_testthat == 'true' 153 | run: find . -name testthat.Rout -exec cat '{}' ';' 154 | 155 | - name: Run RUnit tests 156 | if: env.has_RUnit == 'true' 157 | run: | 158 | BiocGenerics:::testPackage() 159 | shell: Rscript {0} 160 | 161 | - name: Run BiocCheck 162 | run: | 163 | BiocCheck::BiocCheck( 164 | dir('check', 'tar.gz$', full.names = TRUE), 165 | `quit-with-status` = TRUE, 166 | `no-check-R-ver` = TRUE, 167 | `no-check-bioc-help` = TRUE 168 | ) 169 | shell: Rscript {0} 170 | 171 | - name: Test coverage 172 | if: github.ref == 'refs/heads/master' && env.run_covr == 'true' && runner.os == 'Linux' 173 | run: | 174 | covr::codecov() 175 | shell: Rscript {0} 176 | 177 | - name: Install package 178 | if: github.ref == 'refs/heads/master' && env.run_pkgdown == 'true' && runner.os == 'Linux' 179 | run: R CMD INSTALL --no-multiarch . 180 | 181 | - name: Deploy package 182 | if: github.ref == 'refs/heads/master' && env.run_pkgdown == 'true' && runner.os == 'Linux' 183 | run: | 184 | git config --global --add safe.directory /__w/decoupleR/decoupleR 185 | git config --local user.email "actions@github.com" 186 | git config --local user.name "GitHub Actions" 187 | Rscript -e "pkgdown::deploy_to_branch(new_process = FALSE)" 188 | shell: bash {0} 189 | ## Note that you need to run pkgdown::deploy_to_branch(new_process = FALSE) 190 | ## at least one locally before this will work. This creates the gh-pages 191 | ## branch (erasing anything you haven't version controlled!) and 192 | ## makes the git history recognizable by pkgdown. 193 | 194 | - name: Upload check results 195 | if: failure() 196 | uses: actions/upload-artifact@master 197 | with: 198 | name: ${{ runner.os }}-biocversion-devel-r-4.3-results 199 | path: check 200 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | .Rapp.history 4 | 5 | # Session Data files 6 | .RData 7 | 8 | # Example code in package build process 9 | *-Ex.R 10 | 11 | # Output files from R CMD build 12 | /*.tar.gz 13 | 14 | # Output files from R CMD check 15 | /*.Rcheck/ 16 | 17 | # RStudio files 18 | .Rproj.user/ 19 | 20 | # produced vignettes 21 | vignettes/*.html 22 | vignettes/*.pdf 23 | 24 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 25 | .httr-oauth 26 | 27 | # knitr and R markdown default cache directories 28 | /*_cache/ 29 | /cache/ 30 | 31 | # Temporary files created by R markdown 32 | *.utf8.md 33 | *.knit.md 34 | 35 | # Shiny token, see https://shiny.rstudio.com/articles/shinyapps.html 36 | rsconnect/ 37 | 38 | # Files for unit testting 39 | #inst/testdata/outputs/viper/*.rds 40 | #inst/testdata/inputs/*.rds 41 | 42 | 43 | # Ignore previewed README files 44 | */README.html 45 | #docs 46 | Icon[ 47 | ]Icon 48 | 49 | Icon 50 | 51 | docs 52 | docs/ 53 | inst/doc 54 | .Rproj.user 55 | *.Rproj 56 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Type: Package 2 | Package: decoupleR 3 | Title: decoupleR: Ensemble of computational methods to infer biological activities from omics data 4 | Version: 2.9.7 5 | Authors@R: 6 | c(person(given = "Pau", 7 | family = "Badia-i-Mompel", 8 | role = c("aut","cre"), 9 | email = "pau.badia@uni-heidelberg.de", 10 | comment = c(ORCID = "0000-0002-1004-3923")), 11 | person(given = "Jesús", 12 | family = "Vélez-Santiago", 13 | role = c("aut"), 14 | email = "jvelezmagic@gmail.com", 15 | comment = c(ORCID = "0000-0001-5128-3838")), 16 | person(given = "Jana", 17 | family = "Braunger", 18 | role = c("aut"), 19 | email = "jana.bc@gmx.de", 20 | comment = c(ORCID = "0000-0003-0820-9987")), 21 | person(given = "Celina", 22 | family = "Geiss", 23 | role = c("aut"), 24 | email = "celina.geiss@stud.uni-heidelberg.de", 25 | comment = c(ORCID = "0000-0002-8740-706X")), 26 | person(given = "Daniel", 27 | family = "Dimitrov", 28 | role = c("aut"), 29 | email = "daniel.dimitrov@uni-heidelberg.de", 30 | comment = c(ORCID = "0000-0002-5197-2112")), 31 | person(given = "Sophia", 32 | family = "Müller-Dott", 33 | role = c("aut"), 34 | email = "sophia.mueller-dott@uni-heidelberg.de", 35 | comment = c(ORCID = "0000-0002-9710-1865")), 36 | person(given = "Petr", 37 | family = "Taus", 38 | role = c("aut"), 39 | email = "petr.taus@ceitec.muni.cz", 40 | comment = c(ORCID = "0000-0003-3764-9033")), 41 | person(given = "Aurélien", 42 | family = "Dugourd", 43 | role = c("aut"), 44 | email = "aurelien.dugourd@bioquant.uni-heidelberg.de", 45 | comment = c(ORCID = "0000-0002-0714-028X")), 46 | person(given = "Christian H.", 47 | family = "Holland", 48 | role = "aut", 49 | email = "cholland2408@gmail.com", 50 | comment = c(ORCID = "0000-0002-3060-5786")), 51 | person(given = "Ricardo O.", 52 | family = "Ramirez Flores", 53 | role = c("aut"), 54 | email = "roramirezf@uni-heidelberg.de ", 55 | comment = c(ORCID = "0000-0003-0087-371X")), 56 | person(given = "Julio", 57 | family = "Saez-Rodriguez", 58 | role = c("aut"), 59 | email = "pub.saez@uni-heidelberg.de", 60 | comment = c(ORCID = "0000-0002-8552-8976")) 61 | ) 62 | Description: Many methods allow us to extract biological activities from omics 63 | data using information from prior knowledge resources, reducing the 64 | dimensionality for increased statistical power and better interpretability. 65 | Here, we present decoupleR, a Bioconductor package containing different 66 | statistical methods to extract these signatures within a unified framework. 67 | decoupleR allows the user to flexibly test any method with any resource. 68 | It incorporates methods that take into account the sign and weight of 69 | network interactions. decoupleR can be used with any omic, as long as its 70 | features can be linked to a biological process based on prior knowledge. 71 | For example, in transcriptomics gene sets regulated by a transcription 72 | factor, or in phospho-proteomics phosphosites that are targeted by a kinase. 73 | License: GPL-3 + file LICENSE 74 | URL: https://saezlab.github.io/decoupleR/ 75 | BugReports: https://github.com/saezlab/decoupleR/issues 76 | Depends: 77 | R (>= 4.0) 78 | Imports: 79 | BiocParallel, 80 | broom, 81 | dplyr, 82 | magrittr, 83 | Matrix, 84 | parallelly, 85 | purrr, 86 | rlang, 87 | stats, 88 | stringr, 89 | tibble, 90 | tidyr, 91 | tidyselect, 92 | withr 93 | Suggests: 94 | glmnet (>= 4.1-7), 95 | GSVA, 96 | viper, 97 | fgsea (>= 1.15.4), 98 | AUCell, 99 | SummarizedExperiment, 100 | rpart, 101 | ranger, 102 | BiocStyle, 103 | covr, 104 | knitr, 105 | pkgdown, 106 | RefManageR, 107 | rmarkdown, 108 | roxygen2, 109 | sessioninfo, 110 | pheatmap, 111 | testthat, 112 | OmnipathR, 113 | Seurat, 114 | ggplot2, 115 | ggrepel, 116 | patchwork, 117 | magick 118 | VignetteBuilder: 119 | knitr 120 | biocViews: DifferentialExpression, FunctionalGenomics, GeneExpression, 121 | GeneRegulation, Network, Software, StatisticalMethod, Transcription, 122 | Config/testthat/edition: 3 123 | Encoding: UTF-8 124 | LazyData: false 125 | Roxygen: list(markdown = TRUE) 126 | RoxygenNote: 7.3.2 127 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export("%>%") 4 | export(":=") 5 | export(.data) 6 | export(.fit_preprocessing) 7 | export(abort) 8 | export(as_label) 9 | export(as_name) 10 | export(check_corr) 11 | export(convert_f_defaults) 12 | export(decouple) 13 | export(enquo) 14 | export(enquos) 15 | export(exec) 16 | export(expr) 17 | export(extract_sets) 18 | export(filt_minsize) 19 | export(get_collectri) 20 | export(get_dorothea) 21 | export(get_ksn_omnipath) 22 | export(get_progeny) 23 | export(get_resource) 24 | export(get_toy_data) 25 | export(intersect_regulons) 26 | export(pivot_wider_profile) 27 | export(quo_is_missing) 28 | export(quo_is_null) 29 | export(rename_net) 30 | export(run_aucell) 31 | export(run_consensus) 32 | export(run_fgsea) 33 | export(run_gsva) 34 | export(run_mdt) 35 | export(run_mlm) 36 | export(run_ora) 37 | export(run_udt) 38 | export(run_ulm) 39 | export(run_viper) 40 | export(run_wmean) 41 | export(run_wsum) 42 | export(run_zscore) 43 | export(show_methods) 44 | export(show_resources) 45 | export(sym) 46 | export(syms) 47 | import(dplyr) 48 | import(purrr) 49 | import(tibble) 50 | import(tidyr) 51 | importFrom(BiocParallel,MulticoreParam) 52 | importFrom(Matrix,Matrix) 53 | importFrom(dplyr,arrange) 54 | importFrom(dplyr,distinct) 55 | importFrom(dplyr,filter) 56 | importFrom(dplyr,first) 57 | importFrom(dplyr,group_by) 58 | importFrom(dplyr,inner_join) 59 | importFrom(dplyr,mutate) 60 | importFrom(dplyr,reframe) 61 | importFrom(dplyr,relocate) 62 | importFrom(dplyr,rename) 63 | importFrom(dplyr,rowwise) 64 | importFrom(dplyr,select) 65 | importFrom(dplyr,summarise) 66 | importFrom(dplyr,summarize_all) 67 | importFrom(dplyr,ungroup) 68 | importFrom(magrittr,"%<>%") 69 | importFrom(magrittr,"%>%") 70 | importFrom(magrittr,"%T>%") 71 | importFrom(magrittr,extract2) 72 | importFrom(parallelly,availableCores) 73 | importFrom(purrr,map_dfr) 74 | importFrom(purrr,partial) 75 | importFrom(purrr,pluck) 76 | importFrom(rlang,"!!!") 77 | importFrom(rlang,"%||%") 78 | importFrom(rlang,":=") 79 | importFrom(rlang,.data) 80 | importFrom(rlang,abort) 81 | importFrom(rlang,as_label) 82 | importFrom(rlang,as_name) 83 | importFrom(rlang,enquo) 84 | importFrom(rlang,enquos) 85 | importFrom(rlang,exec) 86 | importFrom(rlang,expr) 87 | importFrom(rlang,quo_is_missing) 88 | importFrom(rlang,quo_is_null) 89 | importFrom(rlang,sym) 90 | importFrom(rlang,syms) 91 | importFrom(stats,coef) 92 | importFrom(stats,cor) 93 | importFrom(stats,lm) 94 | importFrom(stats,pnorm) 95 | importFrom(stats,pt) 96 | importFrom(stats,sd) 97 | importFrom(stats,summary.lm) 98 | importFrom(stringr,str_to_lower) 99 | importFrom(tibble,as_tibble) 100 | importFrom(tibble,rownames_to_column) 101 | importFrom(tibble,tibble) 102 | importFrom(tidyr,drop_na) 103 | importFrom(tidyr,expand_grid) 104 | importFrom(tidyr,pivot_longer) 105 | importFrom(tidyselect,eval_rename) 106 | importFrom(utils,head) 107 | importFrom(withr,with_seed) 108 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # decoupleR 2.3.x 2 | 3 | ## Changes 4 | * `ulm` and `mlm` are now faster but consume more memory. 5 | * `mat` is now transformed to `matrix` automatically. 6 | 7 | ## New features 8 | * Added `get_collectri` wrapper to easily access the CollecTRI Gene Regulatory 9 | Network Network from `Omnipath`. 10 | * Added `get_ksn_omnipath` wrapper to easily access the Kinase-Substrate network 11 | from `Omnipath`. 12 | 13 | # decoupleR 2.2.x 14 | 15 | ## Changes 16 | * Changed example `mat` and `net` to toy examples. 17 | * Changed test data to toy data. 18 | 19 | ## Bugfixes 20 | * `ora` now selects correctly the top and bottom genes for p-value estimation. 21 | * `wmean` and `wsum` now handle named matrices with only one sample. 22 | 23 | # decoupleR 2.1.x 24 | 25 | ## Changes 26 | * `likelihood` param is deprecated, from now on, weights (positive or negative) 27 | should go to the `mor` column of `network`. Methods will still run if 28 | `likelihood` is specified, however they will be set to 1. 29 | 30 | * Added `minsize` argument to all methods, set to 5 by default. Sources 31 | containing less than this value of targets in the input mat will be removed 32 | from the calculations. 33 | 34 | * Changed default behavior of the `decouple` function. Now if no methods are 35 | specified in the `statistics` argument, the function will only run the top 36 | performers in our benchmark (`mlm`, `ulm` and `wsum`). To run all methods like 37 | before, set `statistics` to 'all'. Moreover, the argument `consensus_stats` has 38 | been added to filter statistics for the calculation of the `consensus` score. 39 | By default it only uses `mlm`, `ulm` and `norm_wsum`, or if `statistics`=='all' 40 | all methods returned after running `decouple`. 41 | 42 | * `viper` method: 43 | * Now properly handles weights in `mor` by normalizing them to -1 and +1. 44 | 45 | * `ulm`/`mlm`/`udt`/`mdt` methods: 46 | * Changed how they processed the input network. Before the model 47 | matrix only contained the intersection of features between mat and 48 | network's targets, now it incorporates all features coming from mat 49 | ensuring a more robust prediction. Prediction values may change slightly 50 | from older versions. 51 | * Deprecated `sparse` argument. 52 | 53 | * `ora` method: 54 | * Now takes top 5% features as default input instead of 300 up and bottom 55 | features. 56 | * Added seed to randomly break ties 57 | 58 | * `consensus` method: 59 | * No longer based on `RobustRankAggreg`. Now the consensus score is the mean of the 60 | activities obtained after a double tailed z-score transformation. 61 | 62 | * Discarded `filter_regulons` function. 63 | 64 | * Moved major dependencies to Suggest to reduce the number of dependencies 65 | needed. 66 | 67 | * Updated README by adding: 68 | * Kinase inference example 69 | * Graphical abstract 70 | * Manuscript and citation 71 | * New vignette style 72 | 73 | * Updated documentation for all methods. 74 | 75 | ## New features 76 | * Added wrappers to easily query `Omnipath`, one of the largest data-bases 77 | collecting prior-knowledge resources. Added these functions: 78 | * `show_resources`: shows available resources inside `Omnipath`. 79 | * `get_resource`: gets any resource from `Omnipath`. 80 | * `get_dorothea`: gets the DoRothEA gene regulatory network for 81 | transcription factor (TF) activity estimation. Note: this version is 82 | slightly different from the one in the package `dorothea` since it contains 83 | new edges and TFs and also weights the interactions by confidence levels. 84 | * `get_progeny`: gets the PROGENy model for pathway activity estimation. 85 | 86 | * Added `show_methods` function, it shows how many statistics are currently 87 | available. 88 | 89 | * Added `check_corr` function, it shows how correlated regulators in a network 90 | are. It can be used to check for co-linearity for `mlm` and `mdt`. 91 | 92 | * Added new error for `mlm` when co-variables are co-linear (regulators are too 93 | correlated to fit a model). 94 | 95 | ## Bugfixes 96 | * `wmean` and `wsum` now return the correct empirical p-values. 97 | 98 | * `ulm`, `mlm`, `mdt` and `udt` now accept matrices with one column as input. 99 | 100 | * Results from `ulm` and `mlm` now correctly return un-grouped. 101 | 102 | * Methods correctly run when `mat` has no column names. 103 | 104 | # decoupleR 2.0.x 105 | 106 | ## Changes 107 | * Some method's names have been changed to make them easier to identify: 108 | * `pscira` now is called Weighted Sum (`wsum`). 109 | * `mean` now is called Weighted Mean (`wmean`). 110 | * `scira` now is called Univariate Linear Model (`ulm`). 111 | 112 | * The column name for `tf` in the output tibbles has been changed to `source`. 113 | 114 | * Updated documentation for all methods. 115 | 116 | * Updated vignette and README. 117 | 118 | * `decouple` function now accepts order mismatch between the list of methods and 119 | the list of methods's arguments. 120 | 121 | * Moved benchmark branch to a separate repository as its own package: 122 | https://github.com/saezlab/decoupleRBench 123 | 124 | ## New features 125 | 126 | * New methods added: 127 | * Fast Gene Set Enrichment Analysis (`fgsea`). 128 | * `AUCell`. 129 | * Univariate Decision Tree (`udt`). 130 | * Multivariate Decision Tree (`mdt`). 131 | * Multivariate Linear Model (`mlm`). 132 | 133 | * New `decoupleR` manuscript repository: https://github.com/saezlab/decoupleR_manuscript 134 | 135 | * New `consensus` score based on `RobustRankAggreg::aggregateRanks()` added when 136 | running `decouple` with multiple methods. 137 | 138 | * New statistic `corr_wmean` inside `wmean`. 139 | 140 | * Methods based on permutations or statistical tests now return also a p-value 141 | for the obtained score (`fgsea`, `mlm`, `ora`, `ulm`, `viper`, `wmean` and 142 | `wsum`). 143 | 144 | * New error added when network edges are duplicated. 145 | 146 | * New error added when the input matrix contains NAs or Infs. 147 | 148 | # decoupleR 1.1.x 149 | 150 | ## New features 151 | 152 | All new features allow for **tidy selection**. Making it easier to evaluate 153 | different types of data for the same method. For instance, you can specify the 154 | columns to use as strings, integer position, symbol or expression. 155 | 156 | ### Methods 157 | 158 | * New `decouple()` integrates the various member functions of the 159 | `decoupleR statistics` for centralized evaluation. 160 | 161 | * New family `decoupleR statists` for shared documentation is made up of: 162 | * New `run_gsva()` incorporate a convinient wrapper for [GSVA::gsva()](https://rdrr.io/bioc/GSVA/man/gsva.html). 163 | * New `run_mean()` calculates both the unnormalized regulatory activity 164 | and the normalized (i.e. z-score) one based on an empirical distribution. 165 | * New `run_ora()` fisher exact test to calculate the regulatory activity. 166 | * New `run_pscira()` uses a logic equivalent to `run_mean()` with the 167 | difference that it does not accept a column of likelihood. 168 | * New `run_scira()` calculates the regulatory activity through the coefficient 169 | $\beta_1$ of an adjusted linear model. 170 | * New `run_viper()` incorporate a convinient wrapper for [viper::viper()](https://rdrr.io/bioc/viper/man/viper.html). 171 | 172 | ### Converters 173 | 174 | * New functions family `convert_to_ variants` that allows the conversion 175 | of data to a standard format. 176 | * New `convert_to_()` return the entry without modification. 177 | * New `convert_to_gsva()` return a list of regulons suitable for [GSVA::gsva()](https://rdrr.io/bioc/GSVA/man/gsva.html). 178 | * New `convert_to_mean()` return a tibble with four columns: 179 | `tf`, `target`, `mor` and `likelihood`. 180 | * New `convert_to_ora()` returns a named list of regulons; tf with 181 | associated targets. 182 | * New `convert_to_pscira()` returns a tibble with three columns: 183 | `tf`, `target` and `mor`. 184 | * New `convert_to_scira()` returns a tibble with three columns: 185 | `tf`, `target` and `mor`. 186 | * New `convert_to_viper()` return a list of regulons suitable for 187 | [viper::viper()](https://rdrr.io/bioc/viper/man/viper.html) 188 | -------------------------------------------------------------------------------- /R/decoupleR-consensus.R: -------------------------------------------------------------------------------- 1 | #' Consensus score between methods 2 | #' @description 3 | #' Function to generate a consensus score between methods from the 4 | #' result of the `decouple` function. 5 | #' 6 | #' @param df `decouple` data frame result 7 | #' @param include_time Should the time per statistic evaluated be informed? 8 | #' @param seed Deprecated parameter. 9 | #' 10 | #' @return Updated tibble with the computed consensus score between methods 11 | #' 12 | #' @import purrr 13 | #' @export 14 | #' @examples 15 | #' inputs_dir <- system.file("testdata", "inputs", package = "decoupleR") 16 | #' mat <- readRDS(file.path(inputs_dir, "mat.rds")) 17 | #' net <- readRDS(file.path(inputs_dir, "net.rds")) 18 | #' 19 | #' results <- decouple( 20 | #' mat = mat, 21 | #' network = net, 22 | #' .source = "source", 23 | #' .target = "target", 24 | #' statistics = c("wmean", "ulm"), 25 | #' args = list( 26 | #' wmean = list(.mor = "mor", .likelihood = "likelihood"), 27 | #' ulm = list(.mor = "mor", .likelihood = "likelihood") 28 | #' ), 29 | #' consensus_score = FALSE, 30 | #' minsize = 0 31 | #' ) 32 | #' run_consensus(results) 33 | run_consensus <- function(df, 34 | include_time=FALSE, 35 | seed = NULL 36 | ){ 37 | 38 | # NSE vs. R CMD check workaround 39 | condition <- score <- source <- statistic <- NULL 40 | 41 | start_time <- Sys.time() 42 | 43 | # Filter Infs 44 | is_inf <- !is.finite(df$score) 45 | if (any(is_inf)) { 46 | warning("Infs detected in score, will be set to NAs. This might effect the final 47 | consensus score since they will be ignored.") 48 | df <- df %>% 49 | dplyr::filter(!is_inf) 50 | } 51 | 52 | run_id <- max(df$run_id) 53 | consensus <- df %>% 54 | dplyr::group_by(statistic, condition) %>% 55 | dplyr::group_split() %>% 56 | purrr::map(function(df){ 57 | pos <- df %>% 58 | dplyr::filter(score > 0) %>% 59 | rbind(., dplyr::mutate(., score=-score)) %>% 60 | dplyr::mutate(score=score / sd(score)) %>% 61 | dplyr::filter(score > 0) 62 | neg <- df %>% 63 | dplyr::filter(score <= 0) %>% 64 | rbind(., dplyr::mutate(., score=-score)) %>% 65 | dplyr::mutate(score=score / sd(score)) %>% 66 | dplyr::filter(score <= 0) 67 | rbind(pos,neg) 68 | }) %>% 69 | dplyr::bind_rows() %>% 70 | dplyr::group_by(condition, source) %>% 71 | dplyr::summarize(score=mean(score), .groups = 'drop') %>% 72 | dplyr::mutate(p_value = 2*stats::pnorm(-abs(score))) %>% 73 | tibble::add_column( 74 | statistic = 'consensus', 75 | .before = 'source' 76 | ) %>% 77 | tibble::add_column( 78 | run_id = run_id + 1, 79 | .before = 'statistic' 80 | ) 81 | 82 | if (include_time) { 83 | consensus <- consensus %>% 84 | tibble::add_column( 85 | statistic_time = difftime(Sys.time(), start_time), 86 | .after = "score" 87 | ) 88 | } 89 | consensus 90 | } 91 | 92 | -------------------------------------------------------------------------------- /R/decoupleR-package.R: -------------------------------------------------------------------------------- 1 | #' @keywords internal 2 | "_PACKAGE" 3 | 4 | # The following block is used by usethis to automatically manage 5 | # roxygen namespace tags. Modify with care! 6 | ## usethis namespace: start 7 | ## usethis namespace: end 8 | NULL 9 | 10 | # Quiets concerns of R CMD check re: the .'s that appear in pipelines. 11 | if (getRversion() >= "2.15.1") utils::globalVariables(c(".")) 12 | -------------------------------------------------------------------------------- /R/statistic-aucell.R: -------------------------------------------------------------------------------- 1 | 2 | # NSE vs. R CMD check workaround 3 | c_score <- condition <- corr_wmean <- likelihood <- mor <- norm_wmean <- null_distribution <- null_mean <- null_sd <- p_value <- score <- source <- statistic <- target <- value <- weight <- wmean <- z_score <- NULL 4 | 5 | #' AUCell 6 | #' 7 | #' @description 8 | #' Calculates regulatory activities using AUCell. 9 | #' 10 | #' @details 11 | #' AUCell (Aibar et al., 2017) uses the Area Under the Curve (AUC) to calculate 12 | #' whether a set of targets is enriched within the molecular readouts of each 13 | #' sample. To do so, AUCell first ranks the molecular features of each sample 14 | #' from highest to lowest value, resolving ties randomly. Then, an AUC can be 15 | #' calculated using by default the top 5% molecular features in the ranking. 16 | #' Therefore, this metric, `aucell`, represents the proportion of 17 | #' abundant molecular features in the target set, and their relative abundance 18 | #' value compared to the other features within the sample. 19 | #' 20 | #' Aibar S. et al. (2017) Scenic: single-cell regulatory network inference and 21 | #' clustering. Nat. Methods, 14, 1083–1086. 22 | #' 23 | #' @inheritParams .decoupler_mat_format 24 | #' @inheritParams .decoupler_network_format 25 | #' @param aucMaxRank Threshold to calculate the AUC. 26 | #' @param nproc Number of cores to use for computation. 27 | #' @param seed A single value, interpreted as an integer, or NULL for random 28 | #' number generation. 29 | #' @param minsize Integer indicating the minimum number of targets per source. 30 | #' 31 | #' @family decoupleR statistics 32 | #' @export 33 | #' @import dplyr 34 | #' @import tibble 35 | #' @import tidyr 36 | #' @importFrom parallelly availableCores 37 | #' @importFrom BiocParallel MulticoreParam 38 | #' @examples 39 | #' inputs_dir <- system.file("testdata", "inputs", package = "decoupleR") 40 | #' 41 | #' mat <- readRDS(file.path(inputs_dir, "mat.rds")) 42 | #' net <- readRDS(file.path(inputs_dir, "net.rds")) 43 | #' 44 | #' run_aucell(mat, net, minsize=0, nproc=1, aucMaxRank=3) 45 | run_aucell <- function(mat, 46 | network, 47 | .source = source, 48 | .target = target, 49 | aucMaxRank = ceiling(0.05 * nrow(rankings)), 50 | nproc = availableCores(), 51 | seed = 42, 52 | minsize = 5 53 | ) { 54 | 55 | # NSE vs. R CMD check workaround 56 | source <- target <- NULL 57 | 58 | # Before to start --------------------------------------------------------- 59 | # Check for NAs/Infs in mat 60 | mat <- check_nas_infs(mat) 61 | 62 | network <- network %>% 63 | rename_net({{ .source }}, {{ .target }}) 64 | network <- filt_minsize(rownames(mat), network, minsize) 65 | network <- extract_sets(network) 66 | 67 | # Convert to absolute values 68 | mat <- abs(mat) 69 | 70 | # Analysis ---------------------------------------------------------------- 71 | withr::with_seed(seed, { 72 | rankings <- exec(.fn = AUCell::AUCell_buildRankings, 73 | exprMat = mat, 74 | plotStats = FALSE, 75 | verbose = FALSE, 76 | BPPARAM = MulticoreParam(workers = nproc)) 77 | }) 78 | 79 | withr::with_seed(seed, { 80 | exec(.fn = AUCell::AUCell_calcAUC, 81 | geneSets = network, 82 | rankings = rankings, 83 | verbose = FALSE, 84 | aucMaxRank = aucMaxRank, 85 | nCores = nproc 86 | ) 87 | }) %>% 88 | .extract_assay_auc() %>% 89 | as.data.frame() %>% 90 | rownames_to_column("source") %>% 91 | pivot_longer(-source ,names_to = "condition", values_to = "score") %>% 92 | add_column(statistic = "aucell", .before = 1) 93 | 94 | } 95 | 96 | .extract_assay_auc <- function(.a){ 97 | SummarizedExperiment::assay(.a) 98 | } 99 | -------------------------------------------------------------------------------- /R/statistic-fgsea.R: -------------------------------------------------------------------------------- 1 | #' Fast Gene Set Enrichment Analysis (FGSEA) 2 | #' 3 | #' @description 4 | #' Calculates regulatory activities using FGSEA. 5 | #' 6 | #' @details 7 | #' 8 | #' GSEA (Aravind et al., 2005) starts by transforming the input molecular 9 | #' readouts in mat to ranks for each sample. Then, an enrichment score 10 | #' `fgsea` is calculated by walking down the list of features, increasing 11 | #' a running-sum statistic when a feature in the target feature set is 12 | #' encountered and decreasing it when it is not. The final score is the maximum 13 | #' deviation from zero encountered in the random walk. Finally, a normalized 14 | #' score `norm_fgsea`, can be obtained by computing the z-score of the estimate 15 | #' compared to a null distribution obtained from N random permutations. The used 16 | #' implementation is taken from the package `fgsea` (Korotkevich et al., 2021). 17 | #' 18 | #' Aravind S. et al. (2005) Gene set enrichment analysis: A knowledge-based 19 | #' approach for interpreting genome-wide expression profiles. PNAS. 102, 43. 20 | #' 21 | #' Korotkevich G. et al. (2021) Fast gene set enrichment analysis. bioRxiv. 22 | #' DOI: https://doi.org/10.1101/060012. 23 | #' 24 | #' @inheritParams .decoupler_mat_format 25 | #' @inheritParams .decoupler_network_format 26 | #' @param times How many permutations to do? 27 | #' @param nproc Number of cores to use for computation. 28 | #' @param seed A single value, interpreted as an integer, or NULL. 29 | #' @param minsize Integer indicating the minimum number of targets per source. 30 | #' @inheritDotParams fgsea::fgseaMultilevel -pathways -stats -nPermSimple -nproc 31 | #' 32 | #' @return A long format tibble of the enrichment scores for each source 33 | #' across the samples. Resulting tibble contains the following columns: 34 | #' 1. `statistic`: Indicates which method is associated with which score. 35 | #' 2. `source`: Source nodes of `network`. 36 | #' 3. `condition`: Condition representing each column of `mat`. 37 | #' 4. `score`: Regulatory activity (enrichment score). 38 | #' @family decoupleR statistics 39 | #' @importFrom parallelly availableCores 40 | #' @export 41 | #' @examples 42 | #' inputs_dir <- system.file("testdata", "inputs", package = "decoupleR") 43 | #' 44 | #' mat <- readRDS(file.path(inputs_dir, "mat.rds")) 45 | #' net <- readRDS(file.path(inputs_dir, "net.rds")) 46 | #' 47 | #' run_fgsea(mat, net, minsize=0, nproc=1) 48 | run_fgsea <- function(mat, 49 | network, 50 | .source = source, 51 | .target = target, 52 | times = 100, 53 | nproc = availableCores(), 54 | seed = 42, 55 | minsize = 5, 56 | ...) { 57 | 58 | # NSE vs. R CMD check workaround 59 | ES <- NES <- condition <- p_value <- pathway <- pval <- score <- source <- statistic <- target <- NULL 60 | 61 | # Check for NAs/Infs in mat 62 | mat <- check_nas_infs(mat) 63 | 64 | network <- network %>% 65 | rename_net({{ .source }}, {{ .target }}) 66 | network <- filt_minsize(rownames(mat), network, minsize) 67 | regulons <- extract_sets(network) 68 | 69 | if (is.null(colnames(mat))){ 70 | colnames(mat) <- 1:ncol(mat) 71 | } 72 | 73 | conditions <- colnames(mat) %>% 74 | set_names() 75 | 76 | map_dfr(.x = conditions, .f = ~ { 77 | stats <- mat[, .x] 78 | options <- list( 79 | pathways = regulons, 80 | stats = stats, 81 | nPermSimple = times, 82 | nproc = nproc 83 | ) 84 | withr::with_seed(seed, { 85 | result <- suppressWarnings(do.call(what = fgsea::fgsea, args = options)) 86 | }) 87 | }, .id = "condition") %>% 88 | select(pathway, condition, ES, NES, pval) %>% 89 | tidyr::pivot_longer(cols=c("ES","NES"), names_to ="statistic", values_to="score") %>% 90 | mutate(statistic=if_else(statistic=='ES', 'fgsea', 'norm_fgsea')) %>% 91 | rename('source'=pathway, 'p_value'=pval) %>% 92 | select(statistic, source, condition, score, p_value) %>% 93 | mutate(score = replace_na(score, Inf)) %>% 94 | mutate(p_value = replace_na(p_value, 1/times)) 95 | } 96 | -------------------------------------------------------------------------------- /R/statistic-gsva.R: -------------------------------------------------------------------------------- 1 | #' Gene Set Variation Analysis (GSVA) 2 | #' 3 | #' @description 4 | #' Calculates regulatory activities using GSVA. 5 | #' 6 | #' @details 7 | #' GSVA (Hänzelmann et al., 2013) starts by transforming the input molecular 8 | #' readouts in mat to a readout-level statistic using Gaussian kernel estimation 9 | #' of the cumulative density function. Then, readout-level statistics are 10 | #' ranked per sample and normalized to up-weight the two tails of the rank 11 | #' distribution. Afterwards, an enrichment score `gsva` is calculated 12 | #' using a running sum statistic that is normalized by subtracting the largest 13 | #' negative estimate from the largest positive one. 14 | #' 15 | #' Hänzelmann S. et al. (2013) GSVA: gene set variation analysis for microarray 16 | #' and RNA-seq data. BMC Bioinformatics, 14, 7. 17 | #' 18 | #' @inheritParams .decoupler_mat_format 19 | #' @inheritParams .decoupler_network_format 20 | #' @param verbose Gives information about each calculation step. Default: FALSE. 21 | #' @param method Method to employ in the estimation of gene-set enrichment. 22 | #' scores per sample. By default this is set to gsva (Hänzelmann et al, 2013). 23 | #' Further available methods are "plage", "ssgsea" and "zscore". Read more in 24 | #' the manual of \code{\link{GSVA::gsva}}. 25 | #' @param minsize Integer indicating the minimum number of targets per source. 26 | #' Must be greater than 0. 27 | #' @param maxsize Integer indicating the maximum number of targets per source. 28 | #' @inheritDotParams GSVA::gsvaParam -exprData -geneSets -minSize -maxSize 29 | #' @inheritDotParams GSVA::ssgseaParam -exprData -geneSets -minSize -maxSize 30 | #' 31 | #' @return A long format tibble of the enrichment scores for each source 32 | #' across the samples. Resulting tibble contains the following columns: 33 | #' 1. `statistic`: Indicates which method is associated with which score. 34 | #' 2. `source`: Source nodes of `network`. 35 | #' 3. `condition`: Condition representing each column of `mat`. 36 | #' 4. `score`: Regulatory activity (enrichment score). 37 | #' @family decoupleR statistics 38 | #' @importFrom rlang !!! exec 39 | #' @importFrom tidyr pivot_longer 40 | #' @importFrom dplyr mutate 41 | #' @importFrom tibble rownames_to_column 42 | #' @export 43 | #' @examples 44 | #' inputs_dir <- system.file("testdata", "inputs", package = "decoupleR") 45 | #' 46 | #' mat <- readRDS(file.path(inputs_dir, "mat.rds")) 47 | #' net <- readRDS(file.path(inputs_dir, "net.rds")) 48 | #' 49 | #' run_gsva(mat, net, minsize=1, verbose = FALSE) 50 | run_gsva <- function(mat, 51 | network, 52 | .source = source, 53 | .target = target, 54 | verbose = FALSE, 55 | method = c("gsva", "plage", "ssgsea", "zscore"), 56 | minsize = 5L, 57 | maxsize = Inf, 58 | ...) { 59 | 60 | # NSE vs. R CMD check workaround 61 | condition <- score <- source <- target <- NULL 62 | 63 | if (minsize < 1L) { 64 | paste( 65 | 'decoupleR::run_gsva: `minsize` must be greater than 0.', 66 | 'Using 1 as minimum number of targets per source.' 67 | ) %>% 68 | warning(call. = FALSE) 69 | minsize <- 1L 70 | } 71 | 72 | param <- tryCatch( 73 | get( 74 | sprintf('%sParam', method[1L]), 75 | envir = asNamespace('GSVA'), 76 | inherits = FALSE 77 | ), 78 | error = function(e) { 79 | stop(sprintf( 80 | 'No such method in GSVA: `%s`. To learn more check ?gsva.', 81 | method 82 | )) 83 | } 84 | ) 85 | 86 | # Check for NAs/Infs in mat 87 | mat <- check_nas_infs(mat) 88 | 89 | # Before to start --------------------------------------------------------- 90 | network <- network %>% 91 | rename_net({{ .source }}, {{ .target }}) 92 | network <- filt_minsize(rownames(mat), network, minsize) 93 | regulons <- extract_sets(network) 94 | 95 | # Analysis ---------------------------------------------------------------- 96 | param %>% 97 | exec( 98 | exprData = mat, 99 | geneSets = regulons, 100 | minSize = minsize, 101 | maxSize = maxsize, 102 | !!!list(...) 103 | ) %>% 104 | GSVA::gsva(param = ., verbose = verbose) %>% 105 | as.data.frame() %>% 106 | rownames_to_column(var = "source") %>% 107 | pivot_longer( 108 | cols = -source, 109 | names_to = "condition", 110 | values_to = "score" 111 | ) %>% 112 | mutate( 113 | statistic = "gsva", 114 | source, condition, score, 115 | .keep = "none", 116 | .before = 1L 117 | ) 118 | 119 | } 120 | -------------------------------------------------------------------------------- /R/statistic-mdt.R: -------------------------------------------------------------------------------- 1 | #' Multivariate Decision Trees (MDT) 2 | #' 3 | #' @description 4 | #' Calculates regulatory activities using MDT. 5 | #' 6 | #' @details 7 | #' 8 | #' MDT fits a multivariate regression random forest for each sample, where the 9 | #' observed molecular readouts in mat are the response variable and the 10 | #' regulator weights in net are the covariates. Target features with no 11 | #' associated weight are set to zero. The obtained feature importances from the 12 | #' fitted model are the activities `mdt` of the regulators in net. 13 | #' 14 | #' @inheritParams .decoupler_mat_format 15 | #' @inheritParams .decoupler_network_format 16 | #' @param sparse Deprecated parameter. 17 | #' @param center Logical value indicating if `mat` must be centered by 18 | #' [base::rowMeans()]. 19 | #' @param na.rm Should missing values (including NaN) be omitted from the 20 | #' calculations of [base::rowMeans()]? 21 | #' @param trees An integer for the number of trees contained in the ensemble. 22 | #' @param min_n An integer for the minimum number of data points in a node that 23 | #' are required for the node to be split further. 24 | #' @param nproc Number of cores to use for computation. 25 | #' @param seed A single value, interpreted as an integer, or NULL for random 26 | #' number generation. 27 | #' @param minsize Integer indicating the minimum number of targets per source. 28 | #' 29 | #' @return A long format tibble of the enrichment scores for each source 30 | #' across the samples. Resulting tibble contains the following columns: 31 | #' 1. `statistic`: Indicates which method is associated with which score. 32 | #' 2. `source`: Source nodes of `network`. 33 | #' 3. `condition`: Condition representing each column of `mat`. 34 | #' 4. `score`: Regulatory activity (enrichment score). 35 | #' @family decoupleR statistics 36 | #' @export 37 | #' 38 | #' @import dplyr 39 | #' @import purrr 40 | #' @import tibble 41 | #' @importFrom magrittr %<>% %>% 42 | #' @importFrom withr with_seed 43 | #' @importFrom parallelly availableCores 44 | #' @examples 45 | #' inputs_dir <- system.file("testdata", "inputs", package = "decoupleR") 46 | #' 47 | #' mat <- readRDS(file.path(inputs_dir, "mat.rds")) 48 | #' net <- readRDS(file.path(inputs_dir, "net.rds")) 49 | #' 50 | #' run_mdt(mat, net, minsize=0) 51 | run_mdt <- function(mat, 52 | network, 53 | .source = source, 54 | .target = target, 55 | .mor = mor, 56 | .likelihood = likelihood, 57 | sparse = FALSE, 58 | center = FALSE, 59 | na.rm = FALSE, 60 | trees = 10, 61 | min_n = 20, 62 | nproc = availableCores(), 63 | seed = 42, 64 | minsize = 5 65 | ) { 66 | 67 | # NSE vs. R CMD check workaround 68 | condition <- likelihood <- mor <- score <- source <- target <- NULL 69 | 70 | # Check for NAs/Infs in mat 71 | mat %<>% check_nas_infs 72 | 73 | # Before to start --------------------------------------------------------- 74 | # Convert to standard tibble: source-target-mor. 75 | network %>% 76 | rename_net({{ .source }}, {{ .target }}, {{ .mor }}, {{ .likelihood }}) %>% 77 | filt_minsize(rownames(mat), ., minsize) %>% 78 | # Preprocessing ----------------------------------------------------------- 79 | .fit_preprocessing(mat, center, na.rm, sparse) %>% 80 | # Model evaluation -------------------------------------------------------- 81 | {with_seed(seed, {.mdt_analysis(.$mat, .$mor_mat, trees, min_n, nproc)})} 82 | 83 | } 84 | 85 | 86 | #' Wrapper to execute run_mdt() logic one finished preprocessing of data 87 | #' 88 | #' 89 | #' @inheritParams run_mdt 90 | #' @param mor_mat 91 | #' 92 | #' @inherit run_mdt return 93 | #' @importFrom purrr partial 94 | #' @importFrom tidyr expand_grid 95 | #' @importFrom dplyr rowwise reframe mutate arrange relocate 96 | #' @keywords intern 97 | #' @noRd 98 | .mdt_analysis <- function(mat, mor_mat, trees, min_n, nproc) { 99 | 100 | # NSE vs R CMD check workaround 101 | condition <- NULL 102 | mdt_evaluate_model <- partial( 103 | .f = .mdt_evaluate_model, 104 | mat = mat, 105 | mor_mat = mor_mat, 106 | trees = trees, 107 | min_n = min_n, 108 | nproc = nproc 109 | ) 110 | 111 | # Allocate the space for all conditions and evaluate the proposed model. 112 | expand_grid( 113 | condition = colnames(mat) 114 | ) %>% 115 | rowwise(condition) %>% 116 | reframe( 117 | score = mdt_evaluate_model(condition), 118 | source = colnames(mor_mat), 119 | ) %>% 120 | mutate( 121 | statistic = "mdt", 122 | source, condition, score, 123 | .before = 1L 124 | ) %>% 125 | arrange(source) %>% 126 | relocate(source, .after = 1L) 127 | 128 | } 129 | 130 | 131 | #' Wrapper to run mdt per a sample (condition) at time 132 | #' 133 | #' @importFrom purrr pluck 134 | #' @importFrom magrittr %>% 135 | #' @keywords internal 136 | #' @noRd 137 | .mdt_evaluate_model <- function(condition, mat, mor_mat, trees, min_n, nproc) { 138 | 139 | ranger::ranger( 140 | condition ~ ., 141 | data = data.frame(condition = mat[, condition], mor_mat), 142 | num.trees = trees, 143 | importance = "impurity", 144 | min.node.size = min_n, 145 | num.threads = nproc 146 | ) %>% 147 | pluck("variable.importance") 148 | 149 | } 150 | -------------------------------------------------------------------------------- /R/statistic-mlm.R: -------------------------------------------------------------------------------- 1 | #' Multivariate Linear Model (MLM) 2 | #' 3 | #' @description 4 | #' Calculates regulatory activities using MLM. 5 | #' 6 | #' @details 7 | #' 8 | #' MLM fits a multivariate linear model for each sample, where the observed 9 | #' molecular readouts in mat are the response variable and the regulator weights 10 | #' in net are the covariates. Target features with no associated weight are set 11 | #' to zero. The obtained t-values from the fitted model are the activities 12 | #' (`mlm`) of the regulators in net. 13 | #' 14 | #' @inheritParams .decoupler_mat_format 15 | #' @inheritParams .decoupler_network_format 16 | #' @param sparse Deprecated parameter. 17 | #' @param center Logical value indicating if `mat` must be centered by 18 | #' [base::rowMeans()]. 19 | #' @param na.rm Should missing values (including NaN) be omitted from the 20 | #' calculations of [base::rowMeans()]? 21 | #' @param minsize Integer indicating the minimum number of targets per source. 22 | #' 23 | #' @return A long format tibble of the enrichment scores for each source 24 | #' across the samples. Resulting tibble contains the following columns: 25 | #' 1. `statistic`: Indicates which method is associated with which score. 26 | #' 2. `source`: Source nodes of `network`. 27 | #' 3. `condition`: Condition representing each column of `mat`. 28 | #' 4. `score`: Regulatory activity (enrichment score). 29 | #' @family decoupleR statistics 30 | #' @export 31 | #' 32 | #' @import dplyr 33 | #' @import purrr 34 | #' @import tibble 35 | #' @import tidyr 36 | #' @importFrom stats coef lm summary.lm 37 | #' @examples 38 | #' inputs_dir <- system.file("testdata", "inputs", package = "decoupleR") 39 | #' 40 | #' mat <- readRDS(file.path(inputs_dir, "mat.rds")) 41 | #' net <- readRDS(file.path(inputs_dir, "net.rds")) 42 | #' 43 | #' run_mlm(mat, net, minsize=0) 44 | run_mlm <- function(mat, 45 | network, 46 | .source = source, 47 | .target = target, 48 | .mor = mor, 49 | .likelihood = likelihood, 50 | sparse = FALSE, 51 | center = FALSE, 52 | na.rm = FALSE, 53 | minsize = 5) { 54 | 55 | # NSE vs. R CMD check workaround 56 | condition <- likelihood <- mor <- p_value <- score <- source <- statistic <- target <- NULL 57 | 58 | # Check for NAs/Infs in mat 59 | mat <- check_nas_infs(mat) 60 | 61 | # Before to start --------------------------------------------------------- 62 | # Convert to standard tibble: source-target-mor. 63 | network <- network %>% 64 | rename_net({{ .source }}, {{ .target }}, {{ .mor }}, {{ .likelihood }}) 65 | network <- filt_minsize(rownames(mat), network, minsize) 66 | 67 | # Preprocessing ----------------------------------------------------------- 68 | .fit_preprocessing(network, mat, center, na.rm, sparse) %>% 69 | # Model evaluation -------------------------------------------------------- 70 | { 71 | .mlm_analysis(.$mat, .$mor_mat) 72 | } %>% 73 | ungroup() 74 | } 75 | 76 | 77 | #' Wrapper to execute run_mlm() logic one finished preprocessing of data 78 | #' 79 | #' Fit a linear regression between the value of expression and the profile of its targets. 80 | #' 81 | #' @inheritParams run_mlm 82 | #' @param mor_mat 83 | #' 84 | #' @inherit run_mlm return 85 | #' @keywords intern 86 | #' @noRd 87 | 88 | 89 | .mlm_analysis <- function(mat, mor_mat) { 90 | 91 | # run all linear models at the same time: 92 | res_all <- lm(mat ~ mor_mat) %>% summary() 93 | 94 | if(ncol(mat) == 1){ 95 | # in case of a single condition, the summary of lm returns the table instead of 96 | # list of tables. 97 | # 98 | res_all = list(res_all) 99 | } 100 | names(res_all) <- colnames(mat) 101 | 102 | # summary is a list for each condition. Get the info we need: 103 | res_new <- res_all %>% lapply(X = ., function(fit){ 104 | 105 | scores <- as.vector(fit$coefficients[,3][-1]) 106 | pvals <- as.vector(fit$coefficients[,4][-1]) 107 | sources <- colnames(mor_mat) 108 | diff_n <- length(sources) - length(scores) 109 | if (diff_n > 0) { 110 | stop(stringr::str_glue('After intersecting mat and network, at least {diff_n} sources in the network are colinear with other sources. 111 | Cannot fit a linear model with colinear covariables, please remove them. 112 | Please run decoupleR::check_corr to see what regulators are correlated.')) 113 | } 114 | tibble(score=scores, p_value=pvals, source=sources) 115 | }) %>% bind_rows(.id = "condition") %>% 116 | mutate(statistic = "mlm", .before= 1) %>% 117 | select(statistic, source, condition, 118 | score, p_value) 119 | return(res_new) 120 | } 121 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /R/statistic-ora.R: -------------------------------------------------------------------------------- 1 | #' Over Representation Analysis (ORA) 2 | #' 3 | #' @description 4 | #' Calculates regulatory activities using ORA. 5 | #' 6 | #' @details 7 | #' ORA measures the overlap between the target feature set and a list of most 8 | #' altered molecular features in mat. The most altered molecular features can 9 | #' be selected from the top and or bottom of the molecular readout distribution, 10 | #' by default it is the top 5% positive values. With these, a contingency table 11 | #' is build and a one-tailed Fisher’s exact test is computed to determine if a 12 | #' regulator’s set of features are over-represented in the selected features 13 | #' from the data. The resulting score, `ora`, is the minus log10 of the 14 | #' obtained p-value. 15 | #' 16 | #' @inheritParams .decoupler_mat_format 17 | #' @inheritParams .decoupler_network_format 18 | #' @param n_up Integer indicating the number of top targets to slice from mat. 19 | #' @param n_bottom Integer indicating the number of bottom targets to slice from 20 | #' mat. 21 | #' @param n_background Integer indicating the background size of the sliced 22 | #' targets. If not specified the number of background targets is determined by 23 | #' the total number of unique targets in the union of `mat` and `network`. 24 | #' @param with_ties Should ties be kept together? The default, `TRUE`, 25 | #' may return more rows than you request. Use `FALSE` to ignore ties, 26 | #' and return the first `n` rows. 27 | #' @param seed A single value, interpreted as an integer, or NULL for random 28 | #' number generation. 29 | #' @param minsize Integer indicating the minimum number of targets per source. 30 | #' @inheritDotParams stats::fisher.test -x -y 31 | #' 32 | #' @return A long format tibble of the enrichment scores for each source 33 | #' across the samples. Resulting tibble contains the following columns: 34 | #' 1. `statistic`: Indicates which method is associated with which score. 35 | #' 2. `source`: Source nodes of `network`. 36 | #' 3. `condition`: Condition representing each column of `mat`. 37 | #' 4. `score`: Regulatory activity (enrichment score). 38 | #' @family decoupleR statistics 39 | #' @export 40 | #' @examples 41 | #' inputs_dir <- system.file("testdata", "inputs", package = "decoupleR") 42 | #' 43 | #' mat <- readRDS(file.path(inputs_dir, "mat.rds")) 44 | #' net <- readRDS(file.path(inputs_dir, "net.rds")) 45 | #' 46 | #' run_ora(mat, net, minsize=0) 47 | run_ora <- function(mat, 48 | network, 49 | .source = source, 50 | .target = target, 51 | n_up = ceiling(0.05 * nrow(mat)), 52 | n_bottom = 0, 53 | n_background = 20000, 54 | with_ties = TRUE, 55 | seed = 42, 56 | minsize = 5, 57 | ...) { 58 | 59 | # NSE vs. R CMD check workaround 60 | condition <- p <- p_value <- rand <- score <- source <- statistic <- target <- targets <- value <- NULL 61 | 62 | # Check for NAs/Infs in mat 63 | mat <- check_nas_infs(mat) 64 | 65 | # Before to start --------------------------------------------------------- 66 | network <- network %>% 67 | rename_net({{ .source }}, {{ .target }}) 68 | network <- filt_minsize(rownames(mat), network, minsize) 69 | regulons <- extract_sets(network) 70 | 71 | ns <- .ora_check_ns(n_up, n_bottom, n_background, network, mat) 72 | n_up <- ns[1] 73 | n_bottom <- ns[2] 74 | n_background <- ns[3] 75 | 76 | withr::with_seed(seed, { 77 | targets <- .ora_slice_targets(mat, n_up, n_bottom, with_ties) 78 | }) 79 | 80 | # Run analysis ------------------------------------------------------------ 81 | .ora_analysis(regulons, targets, n_background, ...) 82 | } 83 | 84 | # Helper functions -------------------------------------------------------- 85 | #' Wrapper to execute `run_ora()` logic one finished preprocessing of data 86 | #' 87 | #' @inheritParams run_ora 88 | #' @param regulons Named list; names from `source` and values 89 | #' from `target`. 90 | #' @param targets Named list; names from columns of `mat` and 91 | #' values from sliced data of `mat`. 92 | #' 93 | #' @inherit run_scira return 94 | #' @keywords internal 95 | #' @noRd 96 | .ora_analysis <- function(regulons, targets, n_background, ...) { 97 | 98 | # NSE vs. R CMD check workaround 99 | p.value <- NULL 100 | 101 | expand_grid(source = names(regulons), condition = names(targets)) %>% 102 | rowwise(source, condition) %>% 103 | summarise(.ora_fisher_exact_test( 104 | expected = regulons[[source]], 105 | observed = targets[[condition]], 106 | n_background = n_background, 107 | ... 108 | ), 109 | .groups = "drop" 110 | ) %>% 111 | select(source, condition, 112 | p_value = p.value, everything() 113 | ) %>% 114 | mutate(score = -log10(p_value)) %>% 115 | add_column(statistic = "ora", .before = 1) %>% 116 | select(statistic, source, condition, score, p_value) 117 | } 118 | 119 | #' Fisher Exact Test 120 | #' 121 | #' @inheritParams run_ora 122 | #' @inheritParams .ora_contigency_table 123 | #' 124 | #' @return Single row summary "glance" of a object of class `htest`. 125 | #' @keywords internal 126 | #' @noRd 127 | .ora_fisher_exact_test <- function(expected, observed, n_background, ...) { 128 | exec( 129 | .fn = stats::fisher.test, 130 | x = .ora_contingency_table(expected, observed, n_background), 131 | y = NULL, 132 | alternative='greater', 133 | !!!list(...) 134 | ) %>% 135 | broom::glance() 136 | } 137 | 138 | #' Create contingency table 139 | #' 140 | #' @inheritParams run_ora 141 | #' @param expected Vector with expected targets 142 | #' @param observed Vector with observed targets 143 | #' 144 | #' @return 2 x 2 matrix 145 | #' @keywords internal 146 | #' @noRd 147 | .ora_contingency_table <- function(expected, observed, n_background) { 148 | true_positive <- intersect(observed, expected) %>% length() 149 | false_positive <- setdiff(expected, observed) %>% length() 150 | false_negative <- setdiff(observed, expected) %>% length() 151 | true_negative <- (n_background - 152 | true_positive - false_positive - false_negative) 153 | 154 | c(true_positive, false_positive, false_negative, true_negative) %>% 155 | matrix(nrow = 2, ncol = 2, byrow = FALSE) 156 | } 157 | 158 | #' Slice targets per condition 159 | #' 160 | #' @inheritParams run_ora 161 | #' @return Named list with sliced targets per condition. 162 | #' 163 | #' @keywords internal 164 | #' @noRd 165 | .ora_slice_targets <- function(mat, n_up, n_bottom, with_ties) { 166 | 167 | # NSE vs. R CMD check workaround 168 | rand <- targets <- target <- condition <- value <- NULL 169 | 170 | mat %>% 171 | as_tibble(rownames = "target") %>% 172 | tidyr::pivot_longer( 173 | cols = -target, 174 | names_to = "condition", 175 | values_to = "value" 176 | ) %>% 177 | mutate(rand=stats::rnorm(n())) %>% 178 | arrange(condition, value, rand) %>% 179 | group_by(condition) %>% 180 | dplyr::do(bind_rows(utils::head(., n = n_bottom), utils::tail(., n = n_up))) %>% 181 | arrange(condition) %>% 182 | summarise( 183 | targets = rlang::set_names(list(target), condition[1]), 184 | .groups = "drop" 185 | ) %>% 186 | pull(targets) 187 | } 188 | 189 | #' Check values of variables with n_prefix 190 | #' 191 | #' Set convenient default values for the ns so that downstream 192 | #' functions work fine. 193 | #' 194 | #' @inheritParams run_ora 195 | #' 196 | #' @return ns modified if necessary. 197 | #' 198 | #' @keywords internal 199 | #' @noRd 200 | .ora_check_ns <- function(n_up, n_bottom, n_background, network, mat) { 201 | if (is.null(n_background)) { 202 | n_background <- network %>% 203 | pull(target) %>% 204 | unique() %>% 205 | union(rownames(mat)) %>% 206 | length() 207 | } else if (n_background < 0) { 208 | abort("`n` must be a non-missing positive number.") 209 | } 210 | 211 | if (n_up + n_bottom >= nrow(mat)) { 212 | n_up <- nrow(mat) 213 | n_bottom <- 0 214 | } 215 | 216 | c(n_up, n_bottom, n_background) 217 | } 218 | -------------------------------------------------------------------------------- /R/statistic-udt.R: -------------------------------------------------------------------------------- 1 | #' Univariate Decision Tree (UDT) 2 | #' 3 | #' @description 4 | #' Calculates regulatory activities by using UDT. 5 | #' 6 | #' @details 7 | #' UDT fits a single regression decision tree for each sample and regulator, 8 | #' where the observed molecular readouts in mat are the response variable and 9 | #' the regulator weights in net are the explanatory one. Target features with 10 | #' no associated weight are set to zero. The obtained feature importance from 11 | #' the fitted model is the activity `udt` of a given regulator. 12 | #' 13 | #' @inheritParams .decoupler_mat_format 14 | #' @inheritParams .decoupler_network_format 15 | #' @param sparse Deprecated parameter. 16 | #' @param center Logical value indicating if `mat` must be centered by 17 | #' [base::rowMeans()]. 18 | #' @param na.rm Should missing values (including NaN) be omitted from the 19 | #' calculations of [base::rowMeans()]? 20 | #' @param min_n An integer for the minimum number of data points in a node that 21 | #' are required for the node to be split further. 22 | #' @param seed A single value, interpreted as an integer, or NULL for random 23 | #' number generation. 24 | #' @param minsize Integer indicating the minimum number of targets per source. 25 | #' 26 | #' @return A long format tibble of the enrichment scores for each source 27 | #' across the samples. Resulting tibble contains the following columns: 28 | #' 1. `statistic`: Indicates which method is associated with which score. 29 | #' 2. `source`: Source nodes of `network`. 30 | #' 3. `condition`: Condition representing each column of `mat`. 31 | #' 4. `score`: Regulatory activity (enrichment score). 32 | #' @family decoupleR statistics 33 | #' @export 34 | #' 35 | #' @import dplyr 36 | #' @import purrr 37 | #' @import tibble 38 | #' @examples 39 | #' inputs_dir <- system.file("testdata", "inputs", package = "decoupleR") 40 | #' 41 | #' mat <- readRDS(file.path(inputs_dir, "mat.rds")) 42 | #' net <- readRDS(file.path(inputs_dir, "net.rds")) 43 | #' 44 | #' run_udt(mat, net, minsize=0) 45 | run_udt <- function(mat, 46 | network, 47 | .source = source, 48 | .target = target, 49 | .mor = mor, 50 | .likelihood = likelihood, 51 | sparse = FALSE, 52 | center = FALSE, 53 | na.rm = FALSE, 54 | min_n = 20, 55 | seed = 42, 56 | minsize = 5 57 | ) { 58 | 59 | # NSE vs. R CMD check workaround 60 | condition <- likelihood <- mor <- score <- source <- target <- NULL 61 | 62 | # Check for NAs/Infs in mat 63 | mat <- check_nas_infs(mat) 64 | 65 | # Before to start --------------------------------------------------------- 66 | # Convert to standard tibble: source-target-mor. 67 | network <- network %>% 68 | rename_net({{ .source }}, {{ .target }}, {{ .mor }}, {{ .likelihood }}) 69 | network <- filt_minsize(rownames(mat), network, minsize) 70 | 71 | # Preprocessing ----------------------------------------------------------- 72 | .fit_preprocessing(network, mat, center, na.rm, sparse) %>% 73 | # Model evaluation -------------------------------------------------------- 74 | { 75 | withr::with_seed(seed, { 76 | .udt_analysis(.$mat, .$mor_mat, min_n) 77 | }) 78 | } 79 | } 80 | 81 | #' Wrapper to execute run_udt() logic once finished preprocessing of data 82 | #' 83 | #' 84 | #' @inheritParams run_udt 85 | #' @param mor_mat 86 | #' 87 | #' @inherit run_udt return 88 | #' @keywords intern 89 | #' @noRd 90 | .udt_analysis <- function(mat, mor_mat, min_n) { 91 | udt_evaluate_model <- partial( 92 | .f = .udt_evaluate_model, 93 | mat = mat, 94 | mor_mat = mor_mat, 95 | min_n = min_n 96 | ) 97 | 98 | # Allocate the space for all conditions and evaluate the proposed model. 99 | temp <- expand_grid( 100 | source = colnames(mor_mat), 101 | condition = colnames(mat) 102 | ) 103 | 104 | score <- seq_len(nrow(temp)) %>% 105 | map_dbl(~udt_evaluate_model(temp %>% pluck("source", .x), 106 | temp %>% pluck("condition", .x))) 107 | 108 | bind_cols(temp, score = score) %>% 109 | transmute(statistic = "udt", source, condition, score) 110 | } 111 | 112 | #' Wrapper to run udt per a sample (condition) at time 113 | #' 114 | #' @keywords internal 115 | #' @noRd 116 | .udt_evaluate_model <- function(source, condition, mat, mor_mat, min_n) { 117 | data <- tibble(x = mat[, condition, drop=FALSE] , y = mor_mat[, source]) 118 | score <- rpart::rpart(y~x, data, minsplit=min_n) %>% pluck("variable.importance") 119 | 120 | if (is.null(score)) { 121 | score <- 0 122 | names(score) <- source 123 | } 124 | score 125 | } 126 | -------------------------------------------------------------------------------- /R/statistic-ulm.R: -------------------------------------------------------------------------------- 1 | #' Univariate Linear Model (ULM) 2 | #' 3 | #' @description 4 | #' Calculates regulatory activities using ULM. 5 | #' 6 | #' @details 7 | #' ULM fits a linear model for each sample and regulator, where the observed 8 | #' molecular readouts in mat are the response variable and the regulator weights 9 | #' in net are the explanatory one. Target features with no associated weight 10 | #' are set to zero. The obtained t-value from the fitted model is the activity 11 | #' `ulm` of a given regulator. 12 | #' 13 | #' @inheritParams .decoupler_mat_format 14 | #' @inheritParams .decoupler_network_format 15 | #' @param sparse Deprecated parameter. 16 | #' @param center Logical value indicating if `mat` must be centered by 17 | #' [base::rowMeans()]. 18 | #' @param na.rm Should missing values (including NaN) be omitted from the 19 | #' calculations of [base::rowMeans()]? 20 | #' @param minsize Integer indicating the minimum number of targets per source. 21 | #' 22 | #' @return A long format tibble of the enrichment scores for each source 23 | #' across the samples. Resulting tibble contains the following columns: 24 | #' 1. `statistic`: Indicates which method is associated with which score. 25 | #' 2. `source`: Source nodes of `network`. 26 | #' 3. `condition`: Condition representing each column of `mat`. 27 | #' 4. `score`: Regulatory activity (enrichment score). 28 | #' @family decoupleR statistics 29 | #' @export 30 | #' 31 | #' @importFrom stats coef lm summary.lm 32 | #' @importFrom magrittr %<>% %>% 33 | #' @importFrom dplyr ungroup 34 | #' @examples 35 | #' inputs_dir <- system.file("testdata", "inputs", package = "decoupleR") 36 | #' 37 | #' mat <- readRDS(file.path(inputs_dir, "mat.rds")) 38 | #' net <- readRDS(file.path(inputs_dir, "net.rds")) 39 | #' 40 | #' run_ulm(mat, net, minsize=0) 41 | run_ulm <- function(mat, 42 | network, 43 | .source = source, 44 | .target = target, 45 | .mor = mor, 46 | .likelihood = likelihood, 47 | sparse = FALSE, 48 | center = FALSE, 49 | na.rm = FALSE, 50 | minsize = 5L 51 | ) { 52 | 53 | # NSE vs. R CMD check workaround 54 | condition <- likelihood <- mor <- p_value <- score <- 55 | source <- statistic <- target <- NULL 56 | 57 | # Check for NAs/Infs in mat 58 | mat %<>% check_nas_infs 59 | 60 | network %>% 61 | # Convert to standard tibble: source-target-mor. 62 | rename_net( 63 | {{ .source }}, 64 | {{ .target }}, 65 | {{ .mor }}, 66 | {{ .likelihood }} 67 | ) %>% 68 | filt_minsize(rownames(mat), ., minsize) %>% 69 | # Preprocessing ------------------------------------------------------- 70 | .fit_preprocessing(mat, center, na.rm, sparse) %>% 71 | # Model evaluation ---------------------------------------------------- 72 | {.ulm_analysis(.$mat, .$mor_mat)} %>% 73 | ungroup() 74 | 75 | } 76 | 77 | #' Wrapper to execute run_ulm() logic on preprocessed data 78 | #' 79 | #' Fit a linear regression between the value of expression and 80 | #' the profile of its targets. 81 | #' 82 | #' @inheritParams run_ulm 83 | #' @param mor_mat 84 | #' 85 | #' @inherit run_ulm return 86 | #' @keywords intern 87 | #' @importFrom stats cor pt 88 | #' @importFrom dplyr inner_join mutate select arrange 89 | #' @importFrom tibble as_tibble 90 | #' @importFrom tidyr pivot_longer 91 | #' @importFrom magrittr %<>% %>% 92 | #' @noRd 93 | .ulm_analysis <- function(mat, mor_mat) { 94 | 95 | # Compute dfs 96 | df <- nrow(mor_mat) - 2L 97 | 98 | # Fit univariate lm 99 | r <- cor(mor_mat, mat) 100 | 101 | # Compute t-value 102 | scores <- r * sqrt(df / ((1.0 - r + 1.0e-20) * (1.0 + r + 1.0e-20))) 103 | 104 | # Compute pvals 105 | pvals <- pt(q = abs(scores), df = df, lower.tail = FALSE) * 2L 106 | 107 | pivot_mat <- function(mat, value_col) { 108 | 109 | mat %>% t %>% 110 | as_tibble(rownames = 'condition') %>% 111 | pivot_longer(-condition, names_to = 'source', values_to = value_col) 112 | 113 | } 114 | 115 | scores %>% 116 | pivot_mat('score') %>% 117 | inner_join( 118 | pvals %>% pivot_mat('p_value'), 119 | by = c('condition', 'source') 120 | ) %>% 121 | mutate(statistic = "ulm", .before = 1L) %>% 122 | select(statistic, source, condition, score, p_value) %>% 123 | arrange(source, condition) 124 | 125 | } 126 | -------------------------------------------------------------------------------- /R/statistic-viper.R: -------------------------------------------------------------------------------- 1 | 2 | # NSE vs. R CMD check workaround 3 | ES <- NES <- condition <- p_value <- pathway <- pval <- score <- source <- statistic <- target <- NULL 4 | 5 | #' Virtual Inference of Protein-activity by Enriched Regulon analysis (VIPER) 6 | #' 7 | #' @description 8 | #' Calculates regulatory activities using VIPER. 9 | #' 10 | #' @details 11 | #' VIPER (Alvarez et al., 2016) estimates biological activities by performing a 12 | #' three-tailed enrichment score calculation. For further information check the 13 | #' supplementary information of the decoupler manuscript or the original 14 | #' publication. 15 | #' 16 | #' Alvarez M.J.et al. (2016) Functional characterization of somatic mutations 17 | #' in cancer using network-based inference of protein activity. Nat. Genet., 18 | #' 48, 838–847. 19 | #' 20 | #' @inheritParams .decoupler_mat_format 21 | #' @inheritParams .decoupler_network_format 22 | #' @param verbose Logical, whether progression messages should be printed in 23 | #' the terminal. 24 | #' @param pleiotropy Logical, whether correction for pleiotropic regulation 25 | #' should be performed. 26 | #' @param eset.filter Logical, whether the dataset should be limited only to 27 | #' the genes represented in the interactome. 28 | #' @param minsize Integer indicating the minimum number of targets per source. 29 | #' @inheritDotParams viper::viper -eset -regulon -verbose -minsize -pleiotropy -eset.filter 30 | #' 31 | #' @return A long format tibble of the enrichment scores for each source 32 | #' across the samples. Resulting tibble contains the following columns: 33 | #' 1. `statistic`: Indicates which method is associated with which score. 34 | #' 2. `source`: Source nodes of `network`. 35 | #' 3. `condition`: Condition representing each column of `mat`. 36 | #' 4. `score`: Regulatory activity (enrichment score). 37 | #' @family decoupleR statistics 38 | #' @export 39 | #' 40 | #' @import dplyr 41 | #' @import tibble 42 | #' @import purrr 43 | #' @import tidyr 44 | #' @examples 45 | #' inputs_dir <- system.file("testdata", "inputs", package = "decoupleR") 46 | #' 47 | #' mat <- readRDS(file.path(inputs_dir, "mat.rds")) 48 | #' net <- readRDS(file.path(inputs_dir, "net.rds")) 49 | #' 50 | #' run_viper(mat, net, minsize=0, verbose = FALSE) 51 | run_viper <- function(mat, 52 | network, 53 | .source = source, 54 | .target = target, 55 | .mor = mor, 56 | .likelihood = likelihood, 57 | verbose = FALSE, 58 | minsize = 5, 59 | pleiotropy = TRUE, 60 | eset.filter = FALSE, 61 | ...) { 62 | 63 | # NSE vs. R CMD check workaround 64 | likelihood <- mor <- score <- source <- target <- NULL 65 | 66 | # Check for NAs/Infs in mat 67 | mat <- check_nas_infs(mat) 68 | 69 | # Before to start --------------------------------------------------------- 70 | network <- network %>% 71 | rename_net({{ .source }}, {{ .target }}, {{ .mor }}, {{ .likelihood }}) 72 | network <- filt_minsize(rownames(mat), network, minsize) 73 | # Normalize mor between -1 and 1 74 | network <- network %>% 75 | dplyr::group_by(source) %>% 76 | dplyr::group_modify(function(.x, .y){ 77 | n_max <- max(abs(.x$mor)) 78 | .x$mor <- .x$mor / n_max 79 | .x 80 | }) 81 | # Transform to viper format 82 | network <- network %>% 83 | dplyr::mutate(mor = mor) %>% 84 | split(.$source) %>% 85 | purrr::map(~ { 86 | list( 87 | tfmode = purrr::set_names(.x$mor, .x$target), 88 | likelihood = .x$likelihood 89 | ) 90 | }) 91 | 92 | # Analysis ---------------------------------------------------------------- 93 | exec( 94 | .fn = viper::viper, 95 | eset = mat, 96 | regulon = network, 97 | verbose = verbose, 98 | minsize = minsize, 99 | pleiotropy = pleiotropy, 100 | eset.filter = eset.filter, 101 | !!!list(...) 102 | ) %>% 103 | as.data.frame() %>% 104 | rownames_to_column("source") %>% 105 | pivot_longer(-source, names_to = "condition", values_to = "score") %>% 106 | add_column(statistic = "viper", .before = 1) %>% 107 | mutate(p_value = 2*stats::pnorm(-abs(score))) 108 | } 109 | -------------------------------------------------------------------------------- /R/statistic-zscore.R: -------------------------------------------------------------------------------- 1 | #' z-score 2 | #' 3 | #' @description 4 | #' Calculates regulatory activities using a z-score as descibed in KSEA or RoKAI. 5 | #' 6 | #' @details 7 | #' The z-score calculates the mean of the molecular features of the known targets 8 | #' for each regulator and adjusts it for the number of identified targets for the 9 | #' regulator, the standard deviation of all molecular features (RoKAI), as well as 10 | #' the mean of all moleculare features (KSEA). 11 | #' 12 | #' @inheritParams .decoupler_mat_format 13 | #' @inheritParams .decoupler_network_format 14 | #' @param sparse Deprecated parameter. 15 | #' @param center Logical value indicating if `mat` must be centered by 16 | #' [base::rowMeans()]. 17 | #' @param na.rm Should missing values (including NaN) be omitted from the 18 | #' calculations of [base::rowMeans()]? 19 | #' @param minsize Integer indicating the minimum number of targets per source. 20 | #' @param flavor Whether the calculation should be based on RoKAI (default) or 21 | #' KSEA. 22 | #' 23 | #' @return A long format tibble of the enrichment scores for each source 24 | #' across the samples. Resulting tibble contains the following columns: 25 | #' 1. `statistic`: Indicates which method is associated with which score. 26 | #' 2. `source`: Source nodes of `network`. 27 | #' 3. `condition`: Condition representing each column of `mat`. 28 | #' 4. `score`: Regulatory activity (enrichment score). 29 | #' @family decoupleR statistics 30 | #' @export 31 | #' 32 | #' @importFrom stats coef lm summary.lm 33 | #' @importFrom magrittr %<>% %>% 34 | #' @importFrom dplyr ungroup 35 | #' @examples 36 | #' inputs_dir <- system.file("testdata", "inputs", package = "decoupleR") 37 | #' 38 | #' mat <- readRDS(file.path(inputs_dir, "mat.rds")) 39 | #' net <- readRDS(file.path(inputs_dir, "net.rds")) 40 | #' 41 | #' run_zscore(mat, net, minsize=0) 42 | run_zscore <- function(mat, 43 | network, 44 | .source = source, 45 | .target = target, 46 | .mor = mor, 47 | .likelihood = likelihood, 48 | sparse = FALSE, 49 | center = FALSE, 50 | na.rm = FALSE, 51 | minsize = 5L, 52 | flavor = "RoKAI" 53 | ) { 54 | 55 | # NSE vs. R CMD check workaround 56 | condition <- likelihood <- mor <- p_value <- score <- 57 | source <- statistic <- target <- NULL 58 | 59 | # Check for NAs/Infs in mat 60 | mat %<>% check_nas_infs 61 | 62 | network %>% 63 | # Convert to standard tibble: source-target-mor. 64 | rename_net( 65 | {{ .source }}, 66 | {{ .target }}, 67 | {{ .mor }}, 68 | {{ .likelihood }} 69 | ) %>% 70 | filt_minsize(rownames(mat), ., minsize) %>% 71 | # Preprocessing ------------------------------------------------------- 72 | .fit_preprocessing(mat, center, na.rm, sparse) %>% 73 | # Model evaluation ---------------------------------------------------- 74 | {.zscore_analysis(.$mat, .$mor_mat, flavor)} %>% 75 | ungroup() 76 | 77 | } 78 | 79 | #' Wrapper to execute run_zscore() logic on preprocessed data 80 | #' 81 | #' Calculate a z-score from the molecular features of its targets and 82 | #' normalise it by the background molecular features. 83 | #' 84 | #' @inheritParams run_zscore 85 | #' @param mor_mat 86 | #' 87 | #' @inherit run_zscore return 88 | #' @keywords intern 89 | #' @importFrom stats pnorm 90 | #' @importFrom dplyr filter rename mutate group_by summarise arrange 91 | #' @importFrom tibble rownames_to_column tibble 92 | #' @importFrom tidyr pivot_longer drop_na 93 | #' @importFrom purrr map_dfr 94 | #' @importFrom magrittr %<>% %>% 95 | #' @noRd 96 | .zscore_analysis <- function(mat, mor_mat, flavor) { 97 | net <- mor_mat %>% 98 | as.data.frame() %>% 99 | rownames_to_column("target") %>% 100 | pivot_longer(cols = -target, values_to = "mor", names_to = "source") %>% 101 | filter(mor != 0) 102 | 103 | scores <- purrr::map_dfr(seq_len(ncol(mat)), function(exp){ 104 | # Convert column of mat to data frame and drop NA rows 105 | mat_c <- mat[, exp, drop = FALSE] %>% 106 | data.frame() %>% 107 | drop_na() %>% 108 | rownames_to_column("target") 109 | 110 | # Perform inner join and filter NA rows 111 | KSdata <- full_join(net, mat_c, by = "target") %>% 112 | drop_na() %>% 113 | rename("stat" = colnames(.)[4]) 114 | 115 | # Calculate value based on mor and stat 116 | KSdata <- KSdata %>% 117 | mutate(value = mor * stat) 118 | 119 | # Aggregate mean values for each source 120 | Mean.FC <- KSdata %>% 121 | group_by(source) %>% 122 | summarise(mS = mean(value), m = n()) %>% 123 | arrange(source) 124 | 125 | # Calculate Enrichment and z-score 126 | mean_value <- mean(mat_c[, 2], na.rm = TRUE) 127 | if (flavor == "RoKAI") { 128 | mean_value <- 0 129 | } 130 | Mean.FC <- Mean.FC %>% 131 | mutate( 132 | Enrichment = mS / abs(mean(mat_c[,2], na.rm = TRUE)), 133 | z.score = ((mS - mean_value) * sqrt(m)) / sd(mat_c[, 2], na.rm = TRUE), 134 | p.value = pnorm(-abs(z.score)) 135 | ) 136 | 137 | # Prepare and return result as tibble 138 | tibble( 139 | statistic = "z_score", 140 | source = Mean.FC$source, 141 | condition = colnames(mat_c)[2], 142 | score = Mean.FC$z.score, 143 | p_value = Mean.FC$p.value 144 | ) 145 | }) 146 | # Arrange scores according to the order of sources in mor_mat 147 | scores <- scores %>% 148 | arrange(match(source, colnames(mor_mat))) 149 | 150 | return(scores) 151 | } 152 | 153 | -------------------------------------------------------------------------------- /R/utils-decoupler-formats.R: -------------------------------------------------------------------------------- 1 | # File description -------------------------------------------------------- 2 | # The purpose of this file is to generate a documentation skeleton from 3 | # which the methods in the package can inherit the corresponding attributes. 4 | # Thus avoiding duplication of documentation while maintaining uniformity 5 | # through the definition of functions in the package. 6 | 7 | # mat -------------------------------------------------------------------- 8 | 9 | #' DecoupleR mat format 10 | #' 11 | #' @param mat Matrix to evaluate (e.g. expression matrix). 12 | #' Target nodes in rows and conditions in columns. 13 | #' `rownames(mat)` must have at least one intersection with the elements 14 | #' in `network` `.target` column. 15 | #' 16 | #' @keywords internal 17 | #' @family decoupleR formats 18 | #' @name .decoupler_mat_format 19 | #' @aliases mat_format 20 | NULL 21 | 22 | # network ----------------------------------------------------------------- 23 | 24 | #' DecoupleR network format 25 | #' 26 | #' @description 27 | #' A network passed to any `run_` method in the package must contain at 28 | #' least two attributes: `.source` and `.target`. In addition, 29 | #' the methods must map their corresponding metadata associated with their edges. 30 | #' 31 | #' @param network Tibble or dataframe with edges and it's associated metadata. 32 | #' @param .source Column with source nodes. 33 | #' @param .target Column with target nodes. 34 | #' @param .mor Column with edge mode of regulation (i.e. mor). 35 | #' @param .likelihood Deprecated argument. Now it will always be set to 1. 36 | #' 37 | #' @details 38 | #' * All the attributes to be mapped are prefixed by `.` 39 | #' * The idea of using this type of mapping is to provide flexibility to 40 | #' different types of networks, be they regulatory, metabolic, or of any 41 | #' other type. This way, you should only consider having your network or 42 | #' networks in a long format and these can easily be manipulated by functions 43 | #' within the [tidyverse ecosystem](https://www.tidyverse.org/). 44 | #' 45 | #' @keywords internal 46 | #' @family decoupleR formats 47 | #' @name .decoupler_network_format 48 | #' @aliases network_format 49 | NULL 50 | -------------------------------------------------------------------------------- /R/utils-pipe.R: -------------------------------------------------------------------------------- 1 | #' Pipe operator 2 | #' 3 | #' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. 4 | #' 5 | #' @name %>% 6 | #' @rdname pipe 7 | #' @keywords internal 8 | #' @export 9 | #' @importFrom magrittr %>% 10 | #' @usage lhs \%>\% rhs 11 | #' @return Pipe an object forward into a function or call expression. 12 | #' @examples 13 | #' c(1, 2, 3) %>% sum() 14 | #' @noMd 15 | NULL 16 | -------------------------------------------------------------------------------- /R/utils-profiles.R: -------------------------------------------------------------------------------- 1 | 2 | # NSE vs. R CMD check workaround 3 | condition <- likelihood <- mor <- score <- source <- target <- NULL 4 | 5 | #' @inherit tidyr::complete title 6 | #' 7 | #' @inherit tidyr::complete description 8 | #' 9 | #' @inheritParams tidyr::complete 10 | #' @inheritParams tidyr::pivot_wider 11 | #' @param sources A named vector or list with the values to expand and get 12 | #' profile. 13 | #' 14 | #' @return A data frame with the expanded grid of the values passed in 15 | #' `sources` and filled as specified in the `fill` argument. 16 | #' @examples 17 | #' \dontrun{ 18 | #' library(dplyr, warn.conflicts = FALSE) 19 | #' df <- tibble( 20 | #' group = c(1:2, 1), 21 | #' item_id = c(1:2, 2), 22 | #' item_name = c("a", "b", "b"), 23 | #' value1 = 1:3, 24 | #' value2 = 4:6 25 | #' ) 26 | #' 27 | #' to_get_profile <- list(group = c(1, 2, 3), item_id = c(1, 2)) 28 | #' 29 | #' # This will add the combinations of group 3 with the id of the items 30 | #' df %>% get_profile_of(sources = to_get_profile) 31 | #' 32 | #' # You can also choose to fill in missing values 33 | #' 34 | #' # This only fill with "Unknown" the NA values of the column item_name 35 | #' df %>% get_profile_of( 36 | #' sources = to_get_profile, 37 | #' values_fill = list(item_name = "Unknown") 38 | #' ) 39 | #' 40 | #' # Replace all NAs with "Unkwnon" 41 | #' df %>% get_profile_of(sources = to_get_profile, values_fill = "Unknown") 42 | #' } 43 | #' @keywords internal 44 | #' @seealso [complete][tidyr::complete] [expand][tidyr::expand] 45 | #' 46 | #' @import dplyr 47 | #' @import tidyr 48 | #' @importFrom rlang exec !!! 49 | get_profile_of <- function(data, sources, values_fill = NA) { 50 | # The function only allows to reduce or extend the length of the profile, 51 | # not to add metadata 52 | stopifnot(all(names(sources) %in% colnames(data))) 53 | 54 | # Drop duplicated entries 55 | sources <- map(sources, unique) 56 | 57 | # Get combinations of the data and join them to the original data set 58 | new_data <- 59 | exec(expand_grid, !!!sources) %>% 60 | left_join(data, by = names(sources)) 61 | 62 | if (is_list(values_fill)) { 63 | replace_na(new_data, replace = values_fill) 64 | } else if (!is.na(values_fill) && length(values_fill) == 1) { 65 | new_data %>% 66 | mutate(across( 67 | .cols = everything(), 68 | .fns = ~ replace_na(.x, replace = values_fill) 69 | )) 70 | } else { 71 | new_data 72 | } 73 | } 74 | 75 | #' Pivot a data frame to wider and convert it to matrix 76 | #' 77 | #' @description Generates a kind of table where the rows come from `id_cols`, 78 | #' the columns from `names_from` and the values from `values_from`. 79 | #' 80 | #' @details 81 | #' In the current state of the function, to ensure its operation, 82 | #' the `id_cols` parameter is a single selector. 83 | #' 84 | #' @inheritParams tidyr::pivot_wider 85 | #' @inheritParams tidyr::spread 86 | #' @param to_matrix Logical value indicating if the result should be a matrix. 87 | #' Parameter is ignored in case `sparse` is `TRUE`. 88 | #' @param to_sparse Logical value indicating whether the resulting matrix 89 | #' should be sparse or not. 90 | #' 91 | #' @return "widened" data; it is increasing the number of columns and 92 | #' decreasing the number of rows. 93 | #' 94 | #' @import dplyr 95 | #' @import tibble 96 | #' @import tidyr 97 | #' @importFrom Matrix Matrix 98 | #' @export 99 | #' @examples 100 | #' \dontrun{ 101 | #' df <- tibble::tibble( 102 | #' tf = c("tf_1", "tf_1", "tf_2", "tf_2"), 103 | #' gene = c("gene_1", "gene_2", "gene_1", "gene_2"), 104 | #' mor = c(1, -1, 1, -1) 105 | #' ) 106 | #' 107 | #' # Return a tibble 108 | #' pivot_wider_profile( 109 | #' data = df, 110 | #' id_cols = tf, 111 | #' names_from = gene, 112 | #' values_from = mor 113 | #' ) 114 | #' 115 | #' # Return a matrix 116 | #' pivot_wider_profile( 117 | #' data = df, 118 | #' id_cols = tf, 119 | #' names_from = gene, 120 | #' values_from = mor, 121 | #' to_matrix = TRUE 122 | #' ) 123 | #' # Return a sparse Matrix of class "dgCMatrix" 124 | #' pivot_wider_profile( 125 | #' data = df, 126 | #' id_cols = tf, 127 | #' names_from = gene, 128 | #' values_from = mor, 129 | #' to_sparse = TRUE 130 | #' ) 131 | #' } 132 | #' @keywords internal 133 | pivot_wider_profile <- function(data, 134 | id_cols, 135 | names_from, 136 | values_from, 137 | values_fill = NA, 138 | to_matrix = FALSE, 139 | to_sparse = FALSE, 140 | ...) { 141 | wider_profile <- data %>% 142 | select({{ id_cols }}, {{ names_from }}, {{ values_from }}) %>% 143 | pivot_wider( 144 | id_cols = {{ id_cols }}, 145 | names_from = {{ names_from }}, 146 | values_from = {{ values_from }}, 147 | values_fill = values_fill, 148 | ... 149 | ) %>% 150 | column_to_rownames(var = as_label(enquo(id_cols))) 151 | 152 | if (to_matrix == TRUE || to_sparse == TRUE) { 153 | if (to_sparse == TRUE) { 154 | return(Matrix(data = as.matrix(wider_profile), sparse = TRUE)) 155 | } else { 156 | return(as.matrix(wider_profile)) 157 | } 158 | } 159 | wider_profile 160 | } 161 | -------------------------------------------------------------------------------- /R/utils-randomize-matrix.R: -------------------------------------------------------------------------------- 1 | #' Randomize matrix 2 | #' 3 | #' Utility function used in functions that require permutations of the 4 | #' expression matrix 5 | #' 6 | #' @param mat Matrix to randomize. 7 | #' @param randomize_type How to randomize. 8 | #' 9 | #' @return Randomized matrix 10 | #' @examples 11 | #' \dontrun{ 12 | #' mat <- matrix(seq_len(9), ncol = 3) 13 | #' mat 14 | #' 15 | #' set.seed(42) 16 | #' randomize_matrix(mat, randomize_type = "rows") 17 | #' 18 | #' set.seed(42) 19 | #' randomize_matrix(mat, randomize_type = "cols_independently") 20 | #' } 21 | #' @keywords internal 22 | randomize_matrix <- function(mat, 23 | randomize_type = c("rows", "cols_independently")) { 24 | randomize_type <- match.arg(randomize_type) 25 | 26 | switch(randomize_type, 27 | rows = mat[sample(nrow(mat)), , drop = FALSE], 28 | cols_independently = apply(mat, 2, sample) 29 | ) %>% 30 | `row.names<-`(rownames(mat)) 31 | } 32 | -------------------------------------------------------------------------------- /R/utils-tidy-eval.R: -------------------------------------------------------------------------------- 1 | #' Tidy eval helpers 2 | #' 3 | #' @description 4 | #' 5 | #' * [rlang::sym()] creates a symbol from a string and 6 | #' [`syms()`][rlang::sym] creates a list of symbols from a 7 | #' character vector. 8 | #' 9 | #' * [`enquo()`][rlang::nse-defuse] and 10 | #' [`enquos()`][rlang::nse-defuse] delay the execution of one or 11 | #' several function arguments. `enquo()` returns a single quoted 12 | #' expression, which is like a blueprint for the delayed computation. 13 | #' `enquos()` returns a list of such quoted expressions. 14 | #' 15 | #' * [`expr()`][rlang::nse-defuse] quotes a new expression _locally_. It 16 | #' is mostly useful to build new expressions around arguments 17 | #' captured with [enquo()] or [enquos()]: 18 | #' `expr(mean(!!enquo(arg), na.rm = TRUE))`. 19 | #' 20 | #' * [rlang::as_name()] transforms a quoted variable name 21 | #' into a string. Supplying something else than a quoted variable 22 | #' name is an error. 23 | #' 24 | #' That's unlike [rlang::as_label()] which also returns 25 | #' a single string but supports any kind of R object as input, 26 | #' including quoted function calls and vectors. Its purpose is to 27 | #' summarise that object into a single label. That label is often 28 | #' suitable as a default name. 29 | #' 30 | #' If you don't know what a quoted expression contains (for instance 31 | #' expressions captured with `enquo()` could be a variable 32 | #' name, a call to a function, or an unquoted constant), then use 33 | #' `as_label()`. If you know you have quoted a simple variable 34 | #' name, or would like to enforce this, use `as_name()`. 35 | #' 36 | #' To learn more about tidy eval and how to use these tools, visit 37 | #' and the 38 | #' [Metaprogramming section](https://adv-r.hadley.nz/metaprogramming.html) of 39 | #' [Advanced R](https://adv-r.hadley.nz). 40 | #' 41 | #' @md 42 | #' @name tidyeval 43 | #' @keywords internal 44 | #' @importFrom rlang expr enquo enquos sym syms .data := as_name as_label quo_is_null quo_is_missing abort exec 45 | #' @aliases expr enquo enquos sym syms .data := as_name as_label quo_is_null quo_is_missing abort exec 46 | #' @export expr enquo enquos sym syms .data := as_name as_label quo_is_null quo_is_missing abort exec 47 | #' @examples 48 | #' if (FALSE) { 49 | #' help("nse-defuse", package = "rlang") 50 | #' } 51 | NULL 52 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | ```{r, include = FALSE} 8 | knitr::opts_chunk$set( 9 | collapse = TRUE, 10 | comment = "#>", 11 | fig.path = "man/figures/README-", 12 | out.width = "100%" 13 | ) 14 | ``` 15 | 16 | # decoupleR 17 | 18 | 19 | [![Lifecycle: maturing](https://img.shields.io/badge/lifecycle-maturing-blue.svg)](https://www.tidyverse.org/lifecycle/#maturing) 20 | [![BioC status](http://www.bioconductor.org/shields/build/release/bioc/decoupleR.svg)](https://bioconductor.org/checkResults/release/bioc-LATEST/decoupleR) 21 | [![BioC dev status](http://www.bioconductor.org/shields/build/devel/bioc/decoupleR.svg)](https://bioconductor.org/checkResults/devel/bioc-LATEST/decoupleR) 22 | [![R build status](https://github.com/saezlab/decoupleR/workflows/R-CMD-check-bioc/badge.svg)](https://github.com/saezlab/decoupleR/actions) 23 | [![Codecov test coverage](https://codecov.io/gh/saezlab/decoupleR/branch/master/graph/badge.svg)](https://codecov.io/gh/saezlab/decoupleR?branch=master) 24 | [![GitHub issues](https://img.shields.io/github/issues/saezlab/decoupleR)](https://github.com/saezlab/decoupleR/issues) 25 | 26 | 27 | ## Overview 28 | 29 | There are many methods that allow us to extract biological activities from omics data. 30 | `decoupleR` is a Bioconductor package containing different statistical methods to 31 | extract biological signatures from prior knowledge within a unified framework. 32 | Additionally, it incorporates methods that take into account the sign and weight of 33 | network interactions. `decoupleR` can be used with any omic, as long as its 34 | features can be linked to a biological process based on prior knowledge. 35 | For example, in transcriptomics gene sets regulated by a transcription 36 | factor, or in phospho-proteomics phosphosites that are targeted by a kinase. 37 | This is the R version, for its faster and memory efficient Python implementation go [here](https://decoupler-py.readthedocs.io/en/latest/). 38 | 39 |

40 | 41 |

42 | 43 | For more information about how this package has been used with real data, 44 | please check the following links: 45 | 46 | - [decoupleR's general usage](https://saezlab.github.io/decoupleR/articles/decoupleR.html) 47 | - [Pathway activity inference in bulk RNA-seq](https://saezlab.github.io/decoupleR/articles/pw_bk.html) 48 | - [Pathway activity inference from scRNA-seq](https://saezlab.github.io/decoupleR/articles/pw_sc.html) 49 | - [Transcription factor activity inference in bulk RNA-seq](https://saezlab.github.io/decoupleR/articles/tf_bk.html) 50 | - [Transcription factor activity inference from scRNA-seq](https://saezlab.github.io/decoupleR/articles/tf_sc.html) 51 | - [Example of Kinase and TF activity estimation](https://saezlab.github.io/kinase_tf_mini_tuto/) 52 | - [decoupleR's manuscript repository](https://github.com/saezlab/decoupleR_manuscript) 53 | - [Python implementation](https://decoupler-py.readthedocs.io/en/latest/) 54 | 55 | # Installation 56 | `decoupleR` is an R package distributed as part of the Bioconductor 57 | project. To install the package, start R and enter: 58 | 59 | ```{r bioconductor_install, eval=FALSE} 60 | install.packages("BiocManager") 61 | BiocManager::install("decoupleR") 62 | ``` 63 | 64 | Alternatively, you can instead install the latest development version from [GitHub](https://github.com/) with: 65 | 66 | ```{r github_install, eval=FALSE} 67 | BiocManager::install("saezlab/decoupleR") 68 | ``` 69 | 70 | ## License 71 | Footprint methods inside `decoupleR` can be used for academic or commercial purposes, except `viper` which holds a non-commercial license. 72 | 73 | The data redistributed by `OmniPath` does not have a license, each original resource carries their own. 74 | [Here](https://omnipathdb.org/info) one can find the license information of all the resources in `OmniPath`. 75 | 76 | ## Citation 77 | Badia-i-Mompel P., Vélez Santiago J., Braunger J., Geiss C., Dimitrov D., Müller-Dott S., Taus P., Dugourd A., Holland 78 | C.H., Ramirez Flores R.O. and Saez-Rodriguez J. 2022. decoupleR: ensemble of computational methods to infer 79 | biological activities from omics data. Bioinformatics Advances. https://doi.org/10.1093/bioadv/vbac016 80 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # decoupleR 5 | 6 | 7 | 8 | [![Lifecycle: 9 | maturing](https://img.shields.io/badge/lifecycle-maturing-blue.svg)](https://www.tidyverse.org/lifecycle/#maturing) 10 | [![BioC 11 | status](http://www.bioconductor.org/shields/build/release/bioc/decoupleR.svg)](https://bioconductor.org/checkResults/release/bioc-LATEST/decoupleR) 12 | [![BioC dev 13 | status](http://www.bioconductor.org/shields/build/devel/bioc/decoupleR.svg)](https://bioconductor.org/checkResults/devel/bioc-LATEST/decoupleR) 14 | [![R build 15 | status](https://github.com/saezlab/decoupleR/workflows/R-CMD-check-bioc/badge.svg)](https://github.com/saezlab/decoupleR/actions) 16 | [![Codecov test 17 | coverage](https://codecov.io/gh/saezlab/decoupleR/branch/master/graph/badge.svg)](https://codecov.io/gh/saezlab/decoupleR?branch=master) 18 | [![GitHub 19 | issues](https://img.shields.io/github/issues/saezlab/decoupleR)](https://github.com/saezlab/decoupleR/issues) 20 | 21 | 22 | ## Overview 23 | 24 | There are many methods that allow us to extract biological activities 25 | from omics data. `decoupleR` is a Bioconductor package containing 26 | different statistical methods to extract biological signatures from 27 | prior knowledge within a unified framework. Additionally, it 28 | incorporates methods that take into account the sign and weight of 29 | network interactions. `decoupleR` can be used with any omic, as long as 30 | its features can be linked to a biological process based on prior 31 | knowledge. For example, in transcriptomics gene sets regulated by a 32 | transcription factor, or in phospho-proteomics phosphosites that are 33 | targeted by a kinase. This is the R version, for its faster and memory 34 | efficient Python implementation go 35 | [here](https://decoupler-py.readthedocs.io/en/latest/). 36 | 37 |

38 | 39 |

40 | 41 | For more information about how this package has been used with real 42 | data, please check the following links: 43 | 44 | - [decoupleR’s general 45 | usage](https://saezlab.github.io/decoupleR/articles/decoupleR.html) 46 | - [Pathway activity inference in bulk 47 | RNA-seq](https://saezlab.github.io/decoupleR/articles/pw_bk.html) 48 | - [Pathway activity inference from 49 | scRNA-seq](https://saezlab.github.io/decoupleR/articles/pw_sc.html) 50 | - [Transcription factor activity inference in bulk 51 | RNA-seq](https://saezlab.github.io/decoupleR/articles/tf_bk.html) 52 | - [Transcription factor activity inference from 53 | scRNA-seq](https://saezlab.github.io/decoupleR/articles/tf_sc.html) 54 | - [Example of Kinase and TF activity 55 | estimation](https://saezlab.github.io/kinase_tf_mini_tuto/) 56 | - [decoupleR’s manuscript 57 | repository](https://github.com/saezlab/decoupleR_manuscript) 58 | - [Python 59 | implementation](https://decoupler-py.readthedocs.io/en/latest/) 60 | 61 | # Installation 62 | 63 | `decoupleR` is an R package distributed as part of the Bioconductor 64 | project. To install the package, start R and enter: 65 | 66 | ``` r 67 | install.packages('BiocManager') 68 | BiocManager::install('saezlab/decoupleR') 69 | ``` 70 | 71 | Alternatively, if you find any error, try to install the latest version from GitHub: 72 | ```r 73 | install.packages('remotes') 74 | remotes::install_github('saezlab/decoupleR') 75 | ``` 76 | 77 | ## License 78 | 79 | Footprint methods inside `decoupleR` can be used for academic or 80 | commercial purposes, except `viper` which holds a non-commercial 81 | license. 82 | 83 | The data redistributed by `OmniPath` does not have a license, each 84 | original resource carries their own. [Here](https://omnipathdb.org/info) 85 | one can find the license information of all the resources in `OmniPath`. 86 | 87 | ## Citation 88 | 89 | Badia-i-Mompel P., Vélez Santiago J., Braunger J., Geiss C., Dimitrov 90 | D., Müller-Dott S., Taus P., Dugourd A., Holland C.H., Ramirez Flores 91 | R.O. and Saez-Rodriguez J. 2022. decoupleR: ensemble of computational 92 | methods to infer biological activities from omics data. Bioinformatics 93 | Advances. 94 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: https://saezlab.github.io/decoupleR/ 2 | 3 | destination: docs 4 | 5 | template: 6 | bootstrap: 5 7 | bootswatch: cosmo 8 | bslib: 9 | primary: "#A6A6A6" 10 | params: 11 | ganalytics: UA-119440867-11 12 | highlightcss: false 13 | 14 | development: 15 | mode: auto 16 | 17 | navbar: 18 | structure: 19 | left: [home, intro, reference, articles, tutorials, news] 20 | right: [github, twitter, homepage] 21 | components: 22 | twitter: 23 | icon: "fab fa-twitter fa-lg" 24 | href: https://twitter.com/saezlab 25 | homepage: 26 | icon: "fas fa-university" 27 | href: https://www.saezlab.org 28 | type: dark 29 | bg: primary 30 | 31 | reference: 32 | - title: decoupleR 33 | desc: > 34 | contents: 35 | - decouple 36 | - title: Statistics 37 | desc: > 38 | contents: 39 | - starts_with("run_") 40 | - title: Network converters 41 | desc: > 42 | contents: 43 | - convert_f_defaults 44 | - rename_net 45 | - extract_sets 46 | - title: Utils 47 | desc: > 48 | contents: 49 | - check_corr 50 | - get_toy_data 51 | - show_methods 52 | - .fit_preprocessing 53 | - filt_minsize 54 | - intersect_regulons 55 | - title: Omnipath wrappers 56 | desc: > 57 | contents: 58 | - show_resources 59 | - get_resource 60 | - get_dorothea 61 | - get_collectri 62 | - get_progeny 63 | - get_ksn_omnipath 64 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | 3 | coverage: 4 | status: 5 | project: 6 | default: 7 | target: auto 8 | threshold: 1% 9 | informational: true 10 | patch: 11 | default: 12 | target: auto 13 | threshold: 1% 14 | informational: true 15 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | citHeader("To cite decoupleR in publications, please use:") 2 | 3 | citEntry(entry = "article", 4 | author = personList(as.person("Pau Badia-i-Mompel"), 5 | as.person("Jesús Vélez Santiago"), 6 | as.person("Jana Braunger"), 7 | as.person("Celina Geiss"), 8 | as.person("Daniel Dimitrov"), 9 | as.person("Sophia Müller-Dott"), 10 | as.person("Petr Taus"), 11 | as.person("Aurelien Dugourd"), 12 | as.person("Christian H. Holland"), 13 | as.person("Ricardo O. Ramirez Flores"), 14 | as.person("Julio Saez-Rodriguez")), 15 | title = "decoupleR: ensemble of computational methods to infer biological activities from omics data", 16 | journal = "Bioinformatics Advances", 17 | year = "2022", 18 | volume = "", 19 | pages = "", 20 | doi = "https://doi.org/10.1093/bioadv/vbac016", 21 | url = "", 22 | textVersion = "Badia-i-Mompel P., Vélez Santiago J., Braunger J., Geiss C., Dimitrov 23 | D., Müller-Dott S., Taus P., Dugourd A., Holland C.H., Ramirez Flores R.O. 24 | and Saez-Rodriguez J. 2022. decoupleR: Ensemble of computational methods to 25 | infer biological activities from omics data. Bioinformatics Advances. 26 | https://doi.org/10.1093/bioadv/vbac016" 27 | ) 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /inst/extdata/bk_data.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saezlab/decoupleR/ee886864dd8fdb3bbbe1b752f872e8474ecf795e/inst/extdata/bk_data.rds -------------------------------------------------------------------------------- /inst/extdata/sc_data.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saezlab/decoupleR/ee886864dd8fdb3bbbe1b752f872e8474ecf795e/inst/extdata/sc_data.rds -------------------------------------------------------------------------------- /inst/figures/graphical_abstract.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saezlab/decoupleR/ee886864dd8fdb3bbbe1b752f872e8474ecf795e/inst/figures/graphical_abstract.png -------------------------------------------------------------------------------- /inst/figures/net_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saezlab/decoupleR/ee886864dd8fdb3bbbe1b752f872e8474ecf795e/inst/figures/net_plot.png -------------------------------------------------------------------------------- /inst/testdata/README.md: -------------------------------------------------------------------------------- 1 | # Files for unit testing 2 | 3 | Here we will store input and output files for our unit tests via [`testthat`](https://testthat.r-lib.org). 4 | -------------------------------------------------------------------------------- /inst/testdata/generate_expected_output.R: -------------------------------------------------------------------------------- 1 | library(decoupleR) 2 | library(purrr) 3 | 4 | # Base directories definition -------------------------------------------------- 5 | input_dir <- file.path("inst", "testdata", "inputs") 6 | output_dir <- file.path("inst", "testdata", "outputs") 7 | 8 | # Specific directories creation ------------------------------------------------ 9 | # Here you need to extend the vector with the name of your new statistic. 10 | available_statistics = c( 11 | 'udt', 12 | 'mdt', 13 | 'aucell', 14 | 'wmean', 15 | 'wsum', 16 | 'ulm', 17 | 'mlm', 18 | 'viper', 19 | 'gsva', 20 | 'ora', 21 | 'fgsea' 22 | ) 23 | 24 | out <- available_statistics %>% 25 | file.path(output_dir, .) %>% 26 | setNames(object = ., nm = basename(.)) %>% 27 | as.list() 28 | 29 | sapply(out, dir.create, showWarnings = TRUE) 30 | 31 | # Collect individual default outputs files to decouple() test. 32 | 33 | out_default <- stringr::str_glue( 34 | "output-{available_statistics}.rds" 35 | ) %>% 36 | map2(.x = out, .y = ., file.path) 37 | 38 | decouple_dir <- file.path(output_dir, "decouple") 39 | dir.create(decouple_dir, showWarnings = TRUE) 40 | 41 | # Load data to generated outputs ----------------------------------------------- 42 | mat <- readRDS(file.path(input_dir, 'mat.rds')) 43 | net <- readRDS(file.path(input_dir, 'net.rds')) 44 | 45 | #----- run_udt() --------------------------------------------------------------- 46 | run_udt(mat, net, minsize = 0) %>% 47 | saveRDS(out_default$udt) 48 | 49 | #----- run_mdt() --------------------------------------------------------------- 50 | run_mdt(mat, net, minsize=0, trees=1000) %>% 51 | saveRDS(out_default$mdt) 52 | 53 | #----- run_aucell() ------------------------------------------------------------ 54 | run_aucell(mat, net, minsize=0, nproc=1, aucMaxRank=3) %>% 55 | saveRDS(out_default$aucell) 56 | 57 | #----- run_wmean() ------------------------------------------------------------- 58 | run_wmean(mat, net, minsize=0) %>% 59 | saveRDS(out_default$wmean) 60 | 61 | #----- run_wsum() -------------------------------------------------------------- 62 | run_wsum(mat, net, minsize=0) %>% 63 | saveRDS(out_default$wsum) 64 | 65 | #----- run_ulm() --------------------------------------------------------------- 66 | run_ulm(mat, net, minsize=0) %>% 67 | saveRDS(out_default$ulm) 68 | 69 | #----- run_mlm() --------------------------------------------------------------- 70 | run_mlm(mat, net, minsize=0) %>% 71 | saveRDS(out_default$mlm) 72 | 73 | #----- run_viper() ------------------------------------------------------------- 74 | run_viper(mat, net, minsize=0) %>% 75 | saveRDS(out_default$viper) 76 | 77 | #----- run_gsva() -------------------------------------------------------------- 78 | run_gsva(mat, net, minsize=0) %>% 79 | saveRDS(out_default$gsva) 80 | 81 | #----- run_ora() --------------------------------------------------------------- 82 | run_ora(mat, net, minsize=0, n_up=3, n_bottom=3) %>% 83 | saveRDS(out_default$ora) 84 | 85 | #----- run_fgsea() ------------------------------------------------------------- 86 | run_fgsea(mat, net, minsize=0) %>% 87 | saveRDS(out_default$fgsea) 88 | 89 | # decouple() -------------------------------------------------------------- 90 | # This section should be kept at the end of the file 91 | # and should be executed every time a new statistic 92 | # is added or any entry of the default models is modified. 93 | 94 | map_dfr(out_default, readRDS) %>% 95 | dplyr::arrange(.data$statistic, .data$source, .data$condition) %>% 96 | saveRDS(file.path(decouple_dir, "output-decouple.rds")) 97 | -------------------------------------------------------------------------------- /inst/testdata/inputs/mat.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saezlab/decoupleR/ee886864dd8fdb3bbbe1b752f872e8474ecf795e/inst/testdata/inputs/mat.rds -------------------------------------------------------------------------------- /inst/testdata/inputs/net.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saezlab/decoupleR/ee886864dd8fdb3bbbe1b752f872e8474ecf795e/inst/testdata/inputs/net.rds -------------------------------------------------------------------------------- /inst/testdata/outputs/aucell/output-aucell.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saezlab/decoupleR/ee886864dd8fdb3bbbe1b752f872e8474ecf795e/inst/testdata/outputs/aucell/output-aucell.rds -------------------------------------------------------------------------------- /inst/testdata/outputs/decouple/output-decouple.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saezlab/decoupleR/ee886864dd8fdb3bbbe1b752f872e8474ecf795e/inst/testdata/outputs/decouple/output-decouple.rds -------------------------------------------------------------------------------- /inst/testdata/outputs/fgsea/output-fgsea.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saezlab/decoupleR/ee886864dd8fdb3bbbe1b752f872e8474ecf795e/inst/testdata/outputs/fgsea/output-fgsea.rds -------------------------------------------------------------------------------- /inst/testdata/outputs/gsva/output-gsva.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saezlab/decoupleR/ee886864dd8fdb3bbbe1b752f872e8474ecf795e/inst/testdata/outputs/gsva/output-gsva.rds -------------------------------------------------------------------------------- /inst/testdata/outputs/mdt/output-mdt.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saezlab/decoupleR/ee886864dd8fdb3bbbe1b752f872e8474ecf795e/inst/testdata/outputs/mdt/output-mdt.rds -------------------------------------------------------------------------------- /inst/testdata/outputs/mlm/output-mlm.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saezlab/decoupleR/ee886864dd8fdb3bbbe1b752f872e8474ecf795e/inst/testdata/outputs/mlm/output-mlm.rds -------------------------------------------------------------------------------- /inst/testdata/outputs/ora/output-ora.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saezlab/decoupleR/ee886864dd8fdb3bbbe1b752f872e8474ecf795e/inst/testdata/outputs/ora/output-ora.rds -------------------------------------------------------------------------------- /inst/testdata/outputs/udt/output-udt.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saezlab/decoupleR/ee886864dd8fdb3bbbe1b752f872e8474ecf795e/inst/testdata/outputs/udt/output-udt.rds -------------------------------------------------------------------------------- /inst/testdata/outputs/ulm/output-ulm.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saezlab/decoupleR/ee886864dd8fdb3bbbe1b752f872e8474ecf795e/inst/testdata/outputs/ulm/output-ulm.rds -------------------------------------------------------------------------------- /inst/testdata/outputs/viper/output-viper.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saezlab/decoupleR/ee886864dd8fdb3bbbe1b752f872e8474ecf795e/inst/testdata/outputs/viper/output-viper.rds -------------------------------------------------------------------------------- /inst/testdata/outputs/wmean/output-wmean.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saezlab/decoupleR/ee886864dd8fdb3bbbe1b752f872e8474ecf795e/inst/testdata/outputs/wmean/output-wmean.rds -------------------------------------------------------------------------------- /inst/testdata/outputs/wsum/output-wsum.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saezlab/decoupleR/ee886864dd8fdb3bbbe1b752f872e8474ecf795e/inst/testdata/outputs/wsum/output-wsum.rds -------------------------------------------------------------------------------- /man/check_corr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/decoupleR-pre.R 3 | \name{check_corr} 4 | \alias{check_corr} 5 | \title{Check correlation (colinearity)} 6 | \usage{ 7 | check_corr( 8 | network, 9 | .source = "source", 10 | .target = "target", 11 | .mor = "mor", 12 | .likelihood = NULL 13 | ) 14 | } 15 | \arguments{ 16 | \item{network}{Tibble or dataframe with edges and it's associated metadata.} 17 | 18 | \item{.source}{Column with source nodes.} 19 | 20 | \item{.target}{Column with target nodes.} 21 | 22 | \item{.mor}{Column with edge mode of regulation (i.e. mor).} 23 | 24 | \item{.likelihood}{Deprecated argument. Now it will always be set to 1.} 25 | } 26 | \value{ 27 | Correlation pairs tibble. 28 | } 29 | \description{ 30 | Checks the correlation across the regulators in a network. 31 | } 32 | \examples{ 33 | inputs_dir <- system.file("testdata", "inputs", package = "decoupleR") 34 | net <- readRDS(file.path(inputs_dir, "net.rds")) 35 | check_corr(net, .source='source') 36 | } 37 | -------------------------------------------------------------------------------- /man/convert_f_defaults.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-dataset-converters.R 3 | \name{convert_f_defaults} 4 | \alias{convert_f_defaults} 5 | \title{Rename columns and add defaults values if column not present} 6 | \usage{ 7 | convert_f_defaults(.data, ..., .def_col_val = c(), .use_dots = TRUE) 8 | } 9 | \arguments{ 10 | \item{.data}{A data frame, data frame extension (e.g. a tibble), or a 11 | lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for 12 | more details.} 13 | 14 | \item{...}{For \code{rename()}: <\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Use 15 | \code{new_name = old_name} to rename selected variables. 16 | 17 | For \code{rename_with()}: additional arguments passed onto \code{.fn}.} 18 | 19 | \item{.def_col_val}{Named vector with columns with default values 20 | if none exist after rename.} 21 | 22 | \item{.use_dots}{Should a dot prefix be added to renamed variables? 23 | This will allow swapping of columns.} 24 | } 25 | \value{ 26 | An object of the same type as .data. The output has the following properties: 27 | \itemize{ 28 | \item Rows are not affected. 29 | \item Column names are changed. 30 | \item Column order is the same as that of the function call. 31 | } 32 | } 33 | \description{ 34 | \code{convert_f_defaults()} combine the \code{\link[dplyr:rename]{dplyr::rename()}} way of 35 | working and with the \code{\link[tibble:add_column]{tibble::add_column()}} to add columns 36 | with default values in case they don't exist after renaming data. 37 | } 38 | \details{ 39 | The objective of using .use_dots is to be able to swap columns which, 40 | by default, is not allowed by the \code{\link[dplyr:rename]{dplyr::rename()}} function. 41 | The same behavior can be replicated by simply using the \code{\link[dplyr:select]{dplyr::select()}}, 42 | however, the select evaluation allows much more flexibility so that 43 | unexpected results could be obtained. Despite this, a future implementation 44 | will consider this form of execution to allow renaming the same 45 | column to multiple ones (i.e. extend dataframe extension). 46 | } 47 | \examples{ 48 | 49 | df <- tibble::tibble(x = 1, y = 2, z = 3) 50 | 51 | # Rename columns 52 | df <- tibble::tibble(x = 1, y = 2) 53 | convert_f_defaults( 54 | .data = df, 55 | new_x = x, 56 | new_y = y, 57 | new_z = NULL, 58 | .def_col_val = c(new_z = 3) 59 | ) 60 | } 61 | -------------------------------------------------------------------------------- /man/decouple.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/decoupleR-decouple.R 3 | \name{decouple} 4 | \alias{decouple} 5 | \title{Evaluate multiple statistics with same input data} 6 | \usage{ 7 | decouple( 8 | mat, 9 | network, 10 | .source = source, 11 | .target = target, 12 | statistics = NULL, 13 | args = list(NULL), 14 | consensus_score = TRUE, 15 | consensus_stats = NULL, 16 | include_time = FALSE, 17 | show_toy_call = FALSE, 18 | minsize = 5 19 | ) 20 | } 21 | \arguments{ 22 | \item{mat}{Matrix to evaluate (e.g. expression matrix). 23 | Target nodes in rows and conditions in columns. 24 | \code{rownames(mat)} must have at least one intersection with the elements 25 | in \code{network} \code{.target} column.} 26 | 27 | \item{network}{Tibble or dataframe with edges and it's associated metadata.} 28 | 29 | \item{.source}{Column with source nodes.} 30 | 31 | \item{.target}{Column with target nodes.} 32 | 33 | \item{statistics}{Statistical methods to be run sequentially. If none are 34 | provided, only top performer methods are run (mlm, ulm and wsum).} 35 | 36 | \item{args}{A list of argument-lists the same length as \code{statistics} 37 | (or length 1). The default argument, list(NULL), will be recycled to the 38 | same length as \code{statistics}, and will call each function with no arguments 39 | (apart from \code{mat}, \code{network}, \code{.source} and, \code{.target}).} 40 | 41 | \item{consensus_score}{Boolean whether to run a consensus score between 42 | methods.} 43 | 44 | \item{consensus_stats}{List of estimate names to use for the calculation 45 | of the consensus score. This is used to filter out extra estimations 46 | from some methods, for example wsum returns wsum, corr_wsum and norm_wsum. If 47 | none are provided, and also no statstics where provided, only top performer 48 | methods are used (mlm, ulm and norm_wsum). Else, it will use all available 49 | estimates after running all methods in the statistics argument.} 50 | 51 | \item{include_time}{Should the time per statistic evaluated be informed?} 52 | 53 | \item{show_toy_call}{The call of each statistic must be informed?} 54 | 55 | \item{minsize}{Integer indicating the minimum number of targets per source.} 56 | } 57 | \value{ 58 | A long format tibble of the enrichment scores for each source 59 | across the samples. Resulting tibble contains the following columns: 60 | \enumerate{ 61 | \item \code{run_id}: Indicates the order in which the methods have been executed. 62 | \item \code{statistic}: Indicates which method is associated with which score. 63 | \item \code{source}: Source nodes of \code{network}. 64 | \item \code{condition}: Condition representing each column of \code{mat}. 65 | \item \code{score}: Regulatory activity (enrichment score). 66 | \item \code{statistic_time}: If requested, internal execution time indicator. 67 | \item \code{p_value}: p-value (if available) of the obtained score. 68 | } 69 | } 70 | \description{ 71 | Calculate the source activity per sample out of a gene expression matrix by 72 | coupling a regulatory network with a variety of statistics. 73 | } 74 | \examples{ 75 | if (FALSE) { 76 | inputs_dir <- system.file("testdata", "inputs", package = "decoupleR") 77 | 78 | mat <- readRDS(file.path(inputs_dir, "mat.rds")) 79 | net <- readRDS(file.path(inputs_dir, "net.rds")) 80 | 81 | decouple( 82 | mat = mat, 83 | network = net, 84 | .source = "source", 85 | .target = "target", 86 | statistics = c("gsva", "wmean", "wsum", "ulm", "aucell"), 87 | args = list( 88 | gsva = list(verbose = FALSE), 89 | wmean = list(.mor = "mor", .likelihood = "likelihood"), 90 | wsum = list(.mor = "mor"), 91 | ulm = list(.mor = "mor") 92 | ), 93 | minsize = 0 94 | ) 95 | } 96 | } 97 | \seealso{ 98 | Other decoupleR statistics: 99 | \code{\link{run_aucell}()}, 100 | \code{\link{run_fgsea}()}, 101 | \code{\link{run_gsva}()}, 102 | \code{\link{run_mdt}()}, 103 | \code{\link{run_mlm}()}, 104 | \code{\link{run_ora}()}, 105 | \code{\link{run_udt}()}, 106 | \code{\link{run_ulm}()}, 107 | \code{\link{run_viper}()}, 108 | \code{\link{run_wmean}()}, 109 | \code{\link{run_wsum}()}, 110 | \code{\link{run_zscore}()} 111 | } 112 | \concept{decoupleR statistics} 113 | -------------------------------------------------------------------------------- /man/decoupleR-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/decoupleR-package.R 3 | \docType{package} 4 | \name{decoupleR-package} 5 | \alias{decoupleR} 6 | \alias{decoupleR-package} 7 | \title{decoupleR: decoupleR: Ensemble of computational methods to infer biological activities from omics data} 8 | \description{ 9 | Many methods allow us to extract biological activities from omics data using information from prior knowledge resources, reducing the dimensionality for increased statistical power and better interpretability. Here, we present decoupleR, a Bioconductor package containing different statistical methods to extract these signatures within a unified framework. decoupleR allows the user to flexibly test any method with any resource. It incorporates methods that take into account the sign and weight of network interactions. decoupleR can be used with any omic, as long as its features can be linked to a biological process based on prior knowledge. For example, in transcriptomics gene sets regulated by a transcription factor, or in phospho-proteomics phosphosites that are targeted by a kinase. 10 | } 11 | \seealso{ 12 | Useful links: 13 | \itemize{ 14 | \item \url{https://saezlab.github.io/decoupleR/} 15 | \item Report bugs at \url{https://github.com/saezlab/decoupleR/issues} 16 | } 17 | 18 | } 19 | \author{ 20 | \strong{Maintainer}: Pau Badia-i-Mompel \email{pau.badia@uni-heidelberg.de} (\href{https://orcid.org/0000-0002-1004-3923}{ORCID}) 21 | 22 | Authors: 23 | \itemize{ 24 | \item Jesús Vélez-Santiago \email{jvelezmagic@gmail.com} (\href{https://orcid.org/0000-0001-5128-3838}{ORCID}) 25 | \item Jana Braunger \email{jana.bc@gmx.de} (\href{https://orcid.org/0000-0003-0820-9987}{ORCID}) 26 | \item Celina Geiss \email{celina.geiss@stud.uni-heidelberg.de} (\href{https://orcid.org/0000-0002-8740-706X}{ORCID}) 27 | \item Daniel Dimitrov \email{daniel.dimitrov@uni-heidelberg.de} (\href{https://orcid.org/0000-0002-5197-2112}{ORCID}) 28 | \item Sophia Müller-Dott \email{sophia.mueller-dott@uni-heidelberg.de} (\href{https://orcid.org/0000-0002-9710-1865}{ORCID}) 29 | \item Petr Taus \email{petr.taus@ceitec.muni.cz} (\href{https://orcid.org/0000-0003-3764-9033}{ORCID}) 30 | \item Aurélien Dugourd \email{aurelien.dugourd@bioquant.uni-heidelberg.de} (\href{https://orcid.org/0000-0002-0714-028X}{ORCID}) 31 | \item Christian H. Holland \email{cholland2408@gmail.com} (\href{https://orcid.org/0000-0002-3060-5786}{ORCID}) 32 | \item Ricardo O. Ramirez Flores \email{roramirezf@uni-heidelberg.de} (\href{https://orcid.org/0000-0003-0087-371X}{ORCID}) 33 | \item Julio Saez-Rodriguez \email{pub.saez@uni-heidelberg.de} (\href{https://orcid.org/0000-0002-8552-8976}{ORCID}) 34 | } 35 | 36 | } 37 | \keyword{internal} 38 | -------------------------------------------------------------------------------- /man/dot-decoupler_mat_format.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-decoupler-formats.R 3 | \name{.decoupler_mat_format} 4 | \alias{.decoupler_mat_format} 5 | \alias{mat_format} 6 | \title{DecoupleR mat format} 7 | \arguments{ 8 | \item{mat}{Matrix to evaluate (e.g. expression matrix). 9 | Target nodes in rows and conditions in columns. 10 | \code{rownames(mat)} must have at least one intersection with the elements 11 | in \code{network} \code{.target} column.} 12 | } 13 | \description{ 14 | DecoupleR mat format 15 | } 16 | \seealso{ 17 | Other decoupleR formats: 18 | \code{\link{.decoupler_network_format}} 19 | } 20 | \concept{decoupleR formats} 21 | \keyword{internal} 22 | -------------------------------------------------------------------------------- /man/dot-decoupler_network_format.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-decoupler-formats.R 3 | \name{.decoupler_network_format} 4 | \alias{.decoupler_network_format} 5 | \alias{network_format} 6 | \title{DecoupleR network format} 7 | \arguments{ 8 | \item{network}{Tibble or dataframe with edges and it's associated metadata.} 9 | 10 | \item{.source}{Column with source nodes.} 11 | 12 | \item{.target}{Column with target nodes.} 13 | 14 | \item{.mor}{Column with edge mode of regulation (i.e. mor).} 15 | 16 | \item{.likelihood}{Deprecated argument. Now it will always be set to 1.} 17 | } 18 | \description{ 19 | A network passed to any \code{run_} method in the package must contain at 20 | least two attributes: \code{.source} and \code{.target}. In addition, 21 | the methods must map their corresponding metadata associated with their edges. 22 | } 23 | \details{ 24 | \itemize{ 25 | \item All the attributes to be mapped are prefixed by \code{.} 26 | \item The idea of using this type of mapping is to provide flexibility to 27 | different types of networks, be they regulatory, metabolic, or of any 28 | other type. This way, you should only consider having your network or 29 | networks in a long format and these can easily be manipulated by functions 30 | within the \href{https://www.tidyverse.org/}{tidyverse ecosystem}. 31 | } 32 | } 33 | \seealso{ 34 | Other decoupleR formats: 35 | \code{\link{.decoupler_mat_format}} 36 | } 37 | \concept{decoupleR formats} 38 | \keyword{internal} 39 | -------------------------------------------------------------------------------- /man/dot-fit_preprocessing.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/decoupleR-pre.R 3 | \name{.fit_preprocessing} 4 | \alias{.fit_preprocessing} 5 | \title{Pre-processing for methods that fit networks.} 6 | \usage{ 7 | .fit_preprocessing(network, mat, center, na.rm, sparse) 8 | } 9 | \arguments{ 10 | \item{network}{Tibble or dataframe with edges and it's associated metadata.} 11 | 12 | \item{mat}{Matrix to evaluate (e.g. expression matrix). 13 | Target nodes in rows and conditions in columns. 14 | \code{rownames(mat)} must have at least one intersection with the elements 15 | in \code{network} \code{.target} column.} 16 | 17 | \item{center}{Logical value indicating if \code{mat} must be centered by 18 | \code{\link[base:colSums]{base::rowMeans()}}.} 19 | 20 | \item{na.rm}{Should missing values (including NaN) be omitted from the 21 | calculations of \code{\link[base:colSums]{base::rowMeans()}}?} 22 | 23 | \item{sparse}{Deprecated parameter.} 24 | } 25 | \value{ 26 | A named list of matrices to evaluate in methods that fit models, like 27 | \code{.mlm_analysis()}. 28 | \itemize{ 29 | \item mat: Features as rows and samples as columns. 30 | \item mor_mat: Features as rows and columns as source. 31 | } 32 | } 33 | \description{ 34 | \itemize{ 35 | \item If \code{center} is true, then the expression values are centered by the 36 | mean of expression across the samples. 37 | } 38 | } 39 | \examples{ 40 | inputs_dir <- system.file("testdata", "inputs", package = "decoupleR") 41 | mat <- readRDS(file.path(inputs_dir, "mat.rds")) 42 | net <- readRDS(file.path(inputs_dir, "net.rds")) 43 | net <- rename_net(net, source, target, mor) 44 | .fit_preprocessing(net, mat, center = FALSE, na.rm = FALSE, sparse = FALSE) 45 | } 46 | -------------------------------------------------------------------------------- /man/extract_sets.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-dataset-converters.R 3 | \name{extract_sets} 4 | \alias{extract_sets} 5 | \title{Extract sets} 6 | \usage{ 7 | extract_sets(network) 8 | } 9 | \arguments{ 10 | \item{network}{Tibble or dataframe with edges and it's associated metadata.} 11 | } 12 | \description{ 13 | Extracts feature sets from a renamed network (see \link{rename_net}). 14 | } 15 | \examples{ 16 | inputs_dir <- system.file("testdata", "inputs", package = "decoupleR") 17 | mat <- readRDS(file.path(inputs_dir, "mat.rds")) 18 | net <- readRDS(file.path(inputs_dir, "net.rds")) 19 | net <- rename_net(net, source, target, mor) 20 | extract_sets(net) 21 | } 22 | -------------------------------------------------------------------------------- /man/filt_minsize.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/decoupleR-pre.R 3 | \name{filt_minsize} 4 | \alias{filt_minsize} 5 | \title{Filter sources with minsize targets} 6 | \usage{ 7 | filt_minsize(mat_f_names, network, minsize = 5) 8 | } 9 | \arguments{ 10 | \item{mat_f_names}{Feature names of mat.} 11 | 12 | \item{network}{Tibble or dataframe with edges and it's associated metadata.} 13 | 14 | \item{minsize}{Integer indicating the minimum number of targets per source.} 15 | } 16 | \value{ 17 | Filtered network. 18 | } 19 | \description{ 20 | Filter sources of a net with less than minsize targets 21 | } 22 | \examples{ 23 | inputs_dir <- system.file("testdata", "inputs", package = "decoupleR") 24 | mat <- readRDS(file.path(inputs_dir, "mat.rds")) 25 | net <- readRDS(file.path(inputs_dir, "net.rds")) 26 | net <- rename_net(net, source, target, mor) 27 | filt_minsize(rownames(mat), net, minsize = 4) 28 | } 29 | -------------------------------------------------------------------------------- /man/get_collectri.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-omnipath.R 3 | \name{get_collectri} 4 | \alias{get_collectri} 5 | \title{CollecTRI gene regulatory network. 6 | Wrapper to access CollecTRI gene regulatory network. CollecTRI is a 7 | comprehensive resource containing a curated collection of transcription 8 | factors (TFs) and their target genes. It is an expansion of DoRothEA. 9 | Each interaction is weighted by its mode of regulation (either positive or negative).} 10 | \usage{ 11 | get_collectri( 12 | organism = "human", 13 | split_complexes = FALSE, 14 | load_meta = FALSE, 15 | ... 16 | ) 17 | } 18 | \arguments{ 19 | \item{organism}{Which organism to use. Only human, mouse and rat are available.} 20 | 21 | \item{split_complexes}{Whether to split complexes into subunits. By default 22 | complexes are kept as they are.} 23 | 24 | \item{load_meta}{Whether to load meta data for the TF-gene interactions. This is set 25 | to false by default.} 26 | 27 | \item{...}{Optional additional arguments, passed to OmniPath import_transcriptional_interactions.} 28 | } 29 | \description{ 30 | CollecTRI gene regulatory network. 31 | Wrapper to access CollecTRI gene regulatory network. CollecTRI is a 32 | comprehensive resource containing a curated collection of transcription 33 | factors (TFs) and their target genes. It is an expansion of DoRothEA. 34 | Each interaction is weighted by its mode of regulation (either positive or negative). 35 | } 36 | \examples{ 37 | collectri <- get_collectri(organism='human', split_complexes=FALSE) 38 | } 39 | -------------------------------------------------------------------------------- /man/get_dorothea.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-omnipath.R 3 | \name{get_dorothea} 4 | \alias{get_dorothea} 5 | \title{DoRothEA gene regulatory network.} 6 | \usage{ 7 | get_dorothea( 8 | organism = "human", 9 | levels = c("A", "B", "C"), 10 | weight_dict = list(A = 1, B = 2, C = 3, D = 4) 11 | ) 12 | } 13 | \arguments{ 14 | \item{organism}{Which organism to use. Only human, mouse and rat are available.} 15 | 16 | \item{levels}{List of confidence levels to return. Goes from A to D, A 17 | being the most confident and D being the less.} 18 | 19 | \item{weight_dict}{Dictionary of values to divide the mode of regulation 20 | (-1 or 1), one for each confidence level. Bigger values will generate 21 | weights close to zero.} 22 | } 23 | \description{ 24 | Wrapper to access DoRothEA gene regulatory network. DoRothEA is a 25 | comprehensive resource containing a curated collection of transcription 26 | factors (TFs) and their target genes. Each interaction is weighted by its 27 | mode of regulation (either positive or negative) and by its confidence level 28 | } 29 | \examples{ 30 | dorothea <- get_dorothea(organism='human', levels=c('A', 'B')) 31 | } 32 | -------------------------------------------------------------------------------- /man/get_ksn_omnipath.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-omnipath.R 3 | \name{get_ksn_omnipath} 4 | \alias{get_ksn_omnipath} 5 | \title{OmniPath kinase-substrate network} 6 | \usage{ 7 | get_ksn_omnipath(...) 8 | } 9 | \arguments{ 10 | \item{...}{Passed to \code{OmnipathR::import_omnipath_enzsub}.} 11 | } 12 | \description{ 13 | Retrieve a ready to use, curated kinase-substrate Network from the OmniPath 14 | database. 15 | } 16 | \details{ 17 | Import enzyme-PTM network from OmniPath, then filter out anything that is not 18 | phospho or dephosphorilation. Then format the columns for use with decoupleR 19 | functions. 20 | } 21 | -------------------------------------------------------------------------------- /man/get_profile_of.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-profiles.R 3 | \name{get_profile_of} 4 | \alias{get_profile_of} 5 | \title{Complete a data frame with missing combinations of data} 6 | \usage{ 7 | get_profile_of(data, sources, values_fill = NA) 8 | } 9 | \arguments{ 10 | \item{data}{A data frame.} 11 | 12 | \item{sources}{A named vector or list with the values to expand and get 13 | profile.} 14 | 15 | \item{values_fill}{Optionally, a (scalar) value that specifies what each 16 | \code{value} should be filled in with when missing. 17 | 18 | This can be a named list if you want to apply different fill values to 19 | different value columns.} 20 | } 21 | \value{ 22 | A data frame with the expanded grid of the values passed in 23 | \code{sources} and filled as specified in the \code{fill} argument. 24 | } 25 | \description{ 26 | Turns implicit missing values into explicit missing values. This is a wrapper 27 | around \code{\link[tidyr:expand]{expand()}}, \code{\link[dplyr:mutate-joins]{dplyr::full_join()}} and \code{\link[tidyr:replace_na]{replace_na()}} that's useful for 28 | completing missing combinations of data. 29 | } 30 | \examples{ 31 | \dontrun{ 32 | library(dplyr, warn.conflicts = FALSE) 33 | df <- tibble( 34 | group = c(1:2, 1), 35 | item_id = c(1:2, 2), 36 | item_name = c("a", "b", "b"), 37 | value1 = 1:3, 38 | value2 = 4:6 39 | ) 40 | 41 | to_get_profile <- list(group = c(1, 2, 3), item_id = c(1, 2)) 42 | 43 | # This will add the combinations of group 3 with the id of the items 44 | df \%>\% get_profile_of(sources = to_get_profile) 45 | 46 | # You can also choose to fill in missing values 47 | 48 | # This only fill with "Unknown" the NA values of the column item_name 49 | df \%>\% get_profile_of( 50 | sources = to_get_profile, 51 | values_fill = list(item_name = "Unknown") 52 | ) 53 | 54 | # Replace all NAs with "Unkwnon" 55 | df \%>\% get_profile_of(sources = to_get_profile, values_fill = "Unknown") 56 | } 57 | } 58 | \seealso{ 59 | \link[tidyr:complete]{complete} \link[tidyr:expand]{expand} 60 | } 61 | \keyword{internal} 62 | -------------------------------------------------------------------------------- /man/get_progeny.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-omnipath.R 3 | \name{get_progeny} 4 | \alias{get_progeny} 5 | \title{Pathway RespOnsive GENes for activity inference (PROGENy).} 6 | \usage{ 7 | get_progeny(organism = "human", top = 500) 8 | } 9 | \arguments{ 10 | \item{organism}{Which organism to use. Only human and mouse are available.} 11 | 12 | \item{top}{Number of genes per pathway to return.} 13 | } 14 | \description{ 15 | Wrapper to access PROGENy model gene weights. Each pathway is defined with a 16 | collection of target genes, each interaction has an associated p-value and 17 | weight. The top significant interactions per pathway are returned. 18 | } 19 | \examples{ 20 | progeny <- get_progeny(organism='human', top=500) 21 | } 22 | -------------------------------------------------------------------------------- /man/get_resource.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-omnipath.R 3 | \name{get_resource} 4 | \alias{get_resource} 5 | \title{Wrapper to access resources inside Omnipath. 6 | This wrapper allows to easily query different prior knowledge resources. 7 | To check available resources run \code{decoupleR::show_resources()}. For more 8 | information visit the official website for \href{https://omnipathdb.org/}{Omnipath}.} 9 | \usage{ 10 | get_resource(name, organism = "human", ...) 11 | } 12 | \arguments{ 13 | \item{name}{Name of the resource to query.} 14 | 15 | \item{organism}{Organism name or NCBI Taxonomy ID.} 16 | 17 | \item{...}{Passed to \code{OmnipathR::import_omnipath_annotations}.} 18 | } 19 | \description{ 20 | Wrapper to access resources inside Omnipath. 21 | This wrapper allows to easily query different prior knowledge resources. 22 | To check available resources run \code{decoupleR::show_resources()}. For more 23 | information visit the official website for \href{https://omnipathdb.org/}{Omnipath}. 24 | } 25 | \examples{ 26 | df <- decoupleR::get_resource('SIGNOR') 27 | } 28 | -------------------------------------------------------------------------------- /man/get_toy_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/decoupleR-pre.R 3 | \name{get_toy_data} 4 | \alias{get_toy_data} 5 | \title{Generate a toy \code{mat} and \code{network}.} 6 | \usage{ 7 | get_toy_data(n_samples = 24, seed = 42) 8 | } 9 | \arguments{ 10 | \item{n_samples}{Number of samples to simulate.} 11 | 12 | \item{seed}{A single value, interpreted as an integer, or NULL for random 13 | number generation.} 14 | } 15 | \value{ 16 | List containing \code{mat} and \code{network}. 17 | } 18 | \description{ 19 | Generate a toy \code{mat} and \code{network}. 20 | } 21 | \examples{ 22 | data <- get_toy_data() 23 | mat <- data$mat 24 | network <- data$network 25 | } 26 | -------------------------------------------------------------------------------- /man/intersect_regulons.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/decoupleR-pre.R 3 | \name{intersect_regulons} 4 | \alias{intersect_regulons} 5 | \title{Intersect network target features with input matrix.} 6 | \usage{ 7 | intersect_regulons(mat, network, .source, .target, minsize) 8 | } 9 | \arguments{ 10 | \item{mat}{Matrix to evaluate (e.g. expression matrix). 11 | Target nodes in rows and conditions in columns. 12 | \code{rownames(mat)} must have at least one intersection with the elements 13 | in \code{network} \code{.target} column.} 14 | 15 | \item{network}{Tibble or dataframe with edges and it's associated metadata.} 16 | 17 | \item{.source}{Column with source nodes.} 18 | 19 | \item{.target}{Column with target nodes.} 20 | 21 | \item{minsize}{Minimum number of targets per source allowed.} 22 | } 23 | \value{ 24 | Filtered tibble. 25 | } 26 | \description{ 27 | Keep only edges which its target features belong to the input matrix. 28 | } 29 | \examples{ 30 | inputs_dir <- system.file("testdata", "inputs", package = "decoupleR") 31 | mat <- readRDS(file.path(inputs_dir, "mat.rds")) 32 | net <- readRDS(file.path(inputs_dir, "net.rds")) 33 | intersect_regulons(mat, net, source, target, minsize=4) 34 | } 35 | -------------------------------------------------------------------------------- /man/pipe.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-pipe.R 3 | \name{\%>\%} 4 | \alias{\%>\%} 5 | \title{Pipe operator} 6 | \usage{ 7 | lhs \%>\% rhs 8 | } 9 | \value{ 10 | Pipe an object forward into a function or call expression. 11 | } 12 | \description{ 13 | See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. 14 | } 15 | \examples{ 16 | c(1, 2, 3) \%>\% sum() 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/pivot_wider_profile.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-profiles.R 3 | \name{pivot_wider_profile} 4 | \alias{pivot_wider_profile} 5 | \title{Pivot a data frame to wider and convert it to matrix} 6 | \usage{ 7 | pivot_wider_profile( 8 | data, 9 | id_cols, 10 | names_from, 11 | values_from, 12 | values_fill = NA, 13 | to_matrix = FALSE, 14 | to_sparse = FALSE, 15 | ... 16 | ) 17 | } 18 | \arguments{ 19 | \item{data}{A data frame to pivot.} 20 | 21 | \item{id_cols}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> A set of columns that 22 | uniquely identify each observation. Typically used when you have 23 | redundant variables, i.e. variables whose values are perfectly correlated 24 | with existing variables. 25 | 26 | Defaults to all columns in \code{data} except for the columns specified through 27 | \code{names_from} and \code{values_from}. If a tidyselect expression is supplied, it 28 | will be evaluated on \code{data} after removing the columns specified through 29 | \code{names_from} and \code{values_from}.} 30 | 31 | \item{names_from, values_from}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> A pair of 32 | arguments describing which column (or columns) to get the name of the 33 | output column (\code{names_from}), and which column (or columns) to get the 34 | cell values from (\code{values_from}). 35 | 36 | If \code{values_from} contains multiple values, the value will be added to the 37 | front of the output column.} 38 | 39 | \item{values_fill}{Optionally, a (scalar) value that specifies what each 40 | \code{value} should be filled in with when missing. 41 | 42 | This can be a named list if you want to apply different fill values to 43 | different value columns.} 44 | 45 | \item{to_matrix}{Logical value indicating if the result should be a matrix. 46 | Parameter is ignored in case \code{sparse} is \code{TRUE}.} 47 | 48 | \item{to_sparse}{Logical value indicating whether the resulting matrix 49 | should be sparse or not.} 50 | 51 | \item{...}{Additional arguments passed on to methods.} 52 | } 53 | \value{ 54 | "widened" data; it is increasing the number of columns and 55 | decreasing the number of rows. 56 | } 57 | \description{ 58 | Generates a kind of table where the rows come from \code{id_cols}, 59 | the columns from \code{names_from} and the values from \code{values_from}. 60 | } 61 | \details{ 62 | In the current state of the function, to ensure its operation, 63 | the \code{id_cols} parameter is a single selector. 64 | } 65 | \examples{ 66 | \dontrun{ 67 | df <- tibble::tibble( 68 | tf = c("tf_1", "tf_1", "tf_2", "tf_2"), 69 | gene = c("gene_1", "gene_2", "gene_1", "gene_2"), 70 | mor = c(1, -1, 1, -1) 71 | ) 72 | 73 | # Return a tibble 74 | pivot_wider_profile( 75 | data = df, 76 | id_cols = tf, 77 | names_from = gene, 78 | values_from = mor 79 | ) 80 | 81 | # Return a matrix 82 | pivot_wider_profile( 83 | data = df, 84 | id_cols = tf, 85 | names_from = gene, 86 | values_from = mor, 87 | to_matrix = TRUE 88 | ) 89 | # Return a sparse Matrix of class "dgCMatrix" 90 | pivot_wider_profile( 91 | data = df, 92 | id_cols = tf, 93 | names_from = gene, 94 | values_from = mor, 95 | to_sparse = TRUE 96 | ) 97 | } 98 | } 99 | \keyword{internal} 100 | -------------------------------------------------------------------------------- /man/randomize_matrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-randomize-matrix.R 3 | \name{randomize_matrix} 4 | \alias{randomize_matrix} 5 | \title{Randomize matrix} 6 | \usage{ 7 | randomize_matrix(mat, randomize_type = c("rows", "cols_independently")) 8 | } 9 | \arguments{ 10 | \item{mat}{Matrix to randomize.} 11 | 12 | \item{randomize_type}{How to randomize.} 13 | } 14 | \value{ 15 | Randomized matrix 16 | } 17 | \description{ 18 | Utility function used in functions that require permutations of the 19 | expression matrix 20 | } 21 | \examples{ 22 | \dontrun{ 23 | mat <- matrix(seq_len(9), ncol = 3) 24 | mat 25 | 26 | set.seed(42) 27 | randomize_matrix(mat, randomize_type = "rows") 28 | 29 | set.seed(42) 30 | randomize_matrix(mat, randomize_type = "cols_independently") 31 | } 32 | } 33 | \keyword{internal} 34 | -------------------------------------------------------------------------------- /man/rename_net.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-dataset-converters.R 3 | \name{rename_net} 4 | \alias{rename_net} 5 | \title{Rename network} 6 | \usage{ 7 | rename_net( 8 | network, 9 | .source, 10 | .target, 11 | .mor = NULL, 12 | .likelihood = NULL, 13 | def_mor = 1 14 | ) 15 | } 16 | \arguments{ 17 | \item{network}{Tibble or dataframe with edges and it's associated metadata.} 18 | 19 | \item{.source}{Column with source nodes.} 20 | 21 | \item{.target}{Column with target nodes.} 22 | 23 | \item{.mor}{Column with edge mode of regulation (i.e. mor).} 24 | 25 | \item{.likelihood}{Deprecated argument. Now it will always be set to 1.} 26 | 27 | \item{def_mor}{Default value for .mor when not provided.} 28 | } 29 | \description{ 30 | Renames a given network to these column names: .source, .target, .mor, If 31 | .mor is not provided, then the function sets them to default values. 32 | } 33 | \examples{ 34 | inputs_dir <- system.file("testdata", "inputs", package = "decoupleR") 35 | mat <- readRDS(file.path(inputs_dir, "mat.rds")) 36 | net <- readRDS(file.path(inputs_dir, "net.rds")) 37 | rename_net(net, source, target, mor) 38 | } 39 | -------------------------------------------------------------------------------- /man/run_aucell.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/statistic-aucell.R 3 | \name{run_aucell} 4 | \alias{run_aucell} 5 | \title{AUCell} 6 | \usage{ 7 | run_aucell( 8 | mat, 9 | network, 10 | .source = source, 11 | .target = target, 12 | aucMaxRank = ceiling(0.05 * nrow(rankings)), 13 | nproc = availableCores(), 14 | seed = 42, 15 | minsize = 5 16 | ) 17 | } 18 | \arguments{ 19 | \item{mat}{Matrix to evaluate (e.g. expression matrix). 20 | Target nodes in rows and conditions in columns. 21 | \code{rownames(mat)} must have at least one intersection with the elements 22 | in \code{network} \code{.target} column.} 23 | 24 | \item{network}{Tibble or dataframe with edges and it's associated metadata.} 25 | 26 | \item{.source}{Column with source nodes.} 27 | 28 | \item{.target}{Column with target nodes.} 29 | 30 | \item{aucMaxRank}{Threshold to calculate the AUC.} 31 | 32 | \item{nproc}{Number of cores to use for computation.} 33 | 34 | \item{seed}{A single value, interpreted as an integer, or NULL for random 35 | number generation.} 36 | 37 | \item{minsize}{Integer indicating the minimum number of targets per source.} 38 | } 39 | \description{ 40 | Calculates regulatory activities using AUCell. 41 | } 42 | \details{ 43 | AUCell (Aibar et al., 2017) uses the Area Under the Curve (AUC) to calculate 44 | whether a set of targets is enriched within the molecular readouts of each 45 | sample. To do so, AUCell first ranks the molecular features of each sample 46 | from highest to lowest value, resolving ties randomly. Then, an AUC can be 47 | calculated using by default the top 5\% molecular features in the ranking. 48 | Therefore, this metric, \code{aucell}, represents the proportion of 49 | abundant molecular features in the target set, and their relative abundance 50 | value compared to the other features within the sample. 51 | 52 | Aibar S. et al. (2017) Scenic: single-cell regulatory network inference and 53 | clustering. Nat. Methods, 14, 1083–1086. 54 | } 55 | \examples{ 56 | inputs_dir <- system.file("testdata", "inputs", package = "decoupleR") 57 | 58 | mat <- readRDS(file.path(inputs_dir, "mat.rds")) 59 | net <- readRDS(file.path(inputs_dir, "net.rds")) 60 | 61 | run_aucell(mat, net, minsize=0, nproc=1, aucMaxRank=3) 62 | } 63 | \seealso{ 64 | Other decoupleR statistics: 65 | \code{\link{decouple}()}, 66 | \code{\link{run_fgsea}()}, 67 | \code{\link{run_gsva}()}, 68 | \code{\link{run_mdt}()}, 69 | \code{\link{run_mlm}()}, 70 | \code{\link{run_ora}()}, 71 | \code{\link{run_udt}()}, 72 | \code{\link{run_ulm}()}, 73 | \code{\link{run_viper}()}, 74 | \code{\link{run_wmean}()}, 75 | \code{\link{run_wsum}()}, 76 | \code{\link{run_zscore}()} 77 | } 78 | \concept{decoupleR statistics} 79 | -------------------------------------------------------------------------------- /man/run_consensus.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/decoupleR-consensus.R 3 | \name{run_consensus} 4 | \alias{run_consensus} 5 | \title{Consensus score between methods} 6 | \usage{ 7 | run_consensus(df, include_time = FALSE, seed = NULL) 8 | } 9 | \arguments{ 10 | \item{df}{\code{decouple} data frame result} 11 | 12 | \item{include_time}{Should the time per statistic evaluated be informed?} 13 | 14 | \item{seed}{Deprecated parameter.} 15 | } 16 | \value{ 17 | Updated tibble with the computed consensus score between methods 18 | } 19 | \description{ 20 | Function to generate a consensus score between methods from the 21 | result of the \code{decouple} function. 22 | } 23 | \examples{ 24 | inputs_dir <- system.file("testdata", "inputs", package = "decoupleR") 25 | mat <- readRDS(file.path(inputs_dir, "mat.rds")) 26 | net <- readRDS(file.path(inputs_dir, "net.rds")) 27 | 28 | results <- decouple( 29 | mat = mat, 30 | network = net, 31 | .source = "source", 32 | .target = "target", 33 | statistics = c("wmean", "ulm"), 34 | args = list( 35 | wmean = list(.mor = "mor", .likelihood = "likelihood"), 36 | ulm = list(.mor = "mor", .likelihood = "likelihood") 37 | ), 38 | consensus_score = FALSE, 39 | minsize = 0 40 | ) 41 | run_consensus(results) 42 | } 43 | -------------------------------------------------------------------------------- /man/run_fgsea.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/statistic-fgsea.R 3 | \name{run_fgsea} 4 | \alias{run_fgsea} 5 | \title{Fast Gene Set Enrichment Analysis (FGSEA)} 6 | \usage{ 7 | run_fgsea( 8 | mat, 9 | network, 10 | .source = source, 11 | .target = target, 12 | times = 100, 13 | nproc = availableCores(), 14 | seed = 42, 15 | minsize = 5, 16 | ... 17 | ) 18 | } 19 | \arguments{ 20 | \item{mat}{Matrix to evaluate (e.g. expression matrix). 21 | Target nodes in rows and conditions in columns. 22 | \code{rownames(mat)} must have at least one intersection with the elements 23 | in \code{network} \code{.target} column.} 24 | 25 | \item{network}{Tibble or dataframe with edges and it's associated metadata.} 26 | 27 | \item{.source}{Column with source nodes.} 28 | 29 | \item{.target}{Column with target nodes.} 30 | 31 | \item{times}{How many permutations to do?} 32 | 33 | \item{nproc}{Number of cores to use for computation.} 34 | 35 | \item{seed}{A single value, interpreted as an integer, or NULL.} 36 | 37 | \item{minsize}{Integer indicating the minimum number of targets per source.} 38 | 39 | \item{...}{ 40 | Arguments passed on to \code{\link[fgsea:fgseaMultilevel]{fgsea::fgseaMultilevel}} 41 | \describe{ 42 | \item{\code{sampleSize}}{The size of a random set of genes which in turn has size = pathwaySize} 43 | \item{\code{minSize}}{Minimal size of a gene set to test. All pathways below the threshold are excluded.} 44 | \item{\code{maxSize}}{Maximal size of a gene set to test. All pathways above the threshold are excluded.} 45 | \item{\code{eps}}{This parameter sets the boundary for calculating the p value.} 46 | \item{\code{scoreType}}{This parameter defines the GSEA score type. 47 | Possible options are ("std", "pos", "neg"). 48 | By default ("std") the enrichment score is computed as in the original GSEA. 49 | The "pos" and "neg" score types are intended to be used for one-tailed tests 50 | (i.e. when one is interested only in positive ("pos") or negateive ("neg") enrichment).} 51 | \item{\code{gseaParam}}{GSEA parameter value, all gene-level statis are raised to the power of `gseaParam` 52 | before calculation of GSEA enrichment scores.} 53 | \item{\code{BPPARAM}}{Parallelization parameter used in bplapply. 54 | Can be used to specify cluster to run. If not initialized explicitly or 55 | by setting `nproc` default value `bpparam()` is used.} 56 | \item{\code{absEps}}{deprecated, use `eps` parameter instead} 57 | }} 58 | } 59 | \value{ 60 | A long format tibble of the enrichment scores for each source 61 | across the samples. Resulting tibble contains the following columns: 62 | \enumerate{ 63 | \item \code{statistic}: Indicates which method is associated with which score. 64 | \item \code{source}: Source nodes of \code{network}. 65 | \item \code{condition}: Condition representing each column of \code{mat}. 66 | \item \code{score}: Regulatory activity (enrichment score). 67 | } 68 | } 69 | \description{ 70 | Calculates regulatory activities using FGSEA. 71 | } 72 | \details{ 73 | GSEA (Aravind et al., 2005) starts by transforming the input molecular 74 | readouts in mat to ranks for each sample. Then, an enrichment score 75 | \code{fgsea} is calculated by walking down the list of features, increasing 76 | a running-sum statistic when a feature in the target feature set is 77 | encountered and decreasing it when it is not. The final score is the maximum 78 | deviation from zero encountered in the random walk. Finally, a normalized 79 | score \code{norm_fgsea}, can be obtained by computing the z-score of the estimate 80 | compared to a null distribution obtained from N random permutations. The used 81 | implementation is taken from the package \code{fgsea} (Korotkevich et al., 2021). 82 | 83 | Aravind S. et al. (2005) Gene set enrichment analysis: A knowledge-based 84 | approach for interpreting genome-wide expression profiles. PNAS. 102, 43. 85 | 86 | Korotkevich G. et al. (2021) Fast gene set enrichment analysis. bioRxiv. 87 | DOI: https://doi.org/10.1101/060012. 88 | } 89 | \examples{ 90 | inputs_dir <- system.file("testdata", "inputs", package = "decoupleR") 91 | 92 | mat <- readRDS(file.path(inputs_dir, "mat.rds")) 93 | net <- readRDS(file.path(inputs_dir, "net.rds")) 94 | 95 | run_fgsea(mat, net, minsize=0, nproc=1) 96 | } 97 | \seealso{ 98 | Other decoupleR statistics: 99 | \code{\link{decouple}()}, 100 | \code{\link{run_aucell}()}, 101 | \code{\link{run_gsva}()}, 102 | \code{\link{run_mdt}()}, 103 | \code{\link{run_mlm}()}, 104 | \code{\link{run_ora}()}, 105 | \code{\link{run_udt}()}, 106 | \code{\link{run_ulm}()}, 107 | \code{\link{run_viper}()}, 108 | \code{\link{run_wmean}()}, 109 | \code{\link{run_wsum}()}, 110 | \code{\link{run_zscore}()} 111 | } 112 | \concept{decoupleR statistics} 113 | -------------------------------------------------------------------------------- /man/run_gsva.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/statistic-gsva.R 3 | \name{run_gsva} 4 | \alias{run_gsva} 5 | \title{Gene Set Variation Analysis (GSVA)} 6 | \usage{ 7 | run_gsva( 8 | mat, 9 | network, 10 | .source = source, 11 | .target = target, 12 | verbose = FALSE, 13 | method = c("gsva", "plage", "ssgsea", "zscore"), 14 | minsize = 5L, 15 | maxsize = Inf, 16 | ... 17 | ) 18 | } 19 | \arguments{ 20 | \item{mat}{Matrix to evaluate (e.g. expression matrix). 21 | Target nodes in rows and conditions in columns. 22 | \code{rownames(mat)} must have at least one intersection with the elements 23 | in \code{network} \code{.target} column.} 24 | 25 | \item{network}{Tibble or dataframe with edges and it's associated metadata.} 26 | 27 | \item{.source}{Column with source nodes.} 28 | 29 | \item{.target}{Column with target nodes.} 30 | 31 | \item{verbose}{Gives information about each calculation step. Default: FALSE.} 32 | 33 | \item{method}{Method to employ in the estimation of gene-set enrichment. 34 | scores per sample. By default this is set to gsva (Hänzelmann et al, 2013). 35 | Further available methods are "plage", "ssgsea" and "zscore". Read more in 36 | the manual of \code{\link{GSVA::gsva}}.} 37 | 38 | \item{minsize}{Integer indicating the minimum number of targets per source. 39 | Must be greater than 0.} 40 | 41 | \item{maxsize}{Integer indicating the maximum number of targets per source.} 42 | 43 | \item{...}{ 44 | Arguments passed on to \code{\link[GSVA:gsvaParam-class]{GSVA::gsvaParam}}, \code{\link[GSVA:ssgseaParam-class]{GSVA::ssgseaParam}} 45 | \describe{ 46 | \item{\code{assay}}{The name of the assay to use in case \code{exprData} is a multi-assay 47 | container, otherwise ignored. By default, the first assay is used.} 48 | \item{\code{annotation}}{The name of a Bioconductor annotation package for the gene 49 | identifiers occurring in the row names of the expression data matrix. This 50 | can be used to map gene identifiers occurring in the gene sets if those are 51 | provided in a \code{\link{GeneSetCollection}}. By default gene identifiers used in 52 | expression data matrix and gene sets are matched directly.} 53 | \item{\code{kcdf}}{Character vector of length 1 denoting the kernel to use during 54 | the non-parametric estimation of the cumulative distribution function of 55 | expression levels across samples. By default, \code{kcdf="Gaussian"} which is 56 | suitable when input expression values are continuous, such as microarray 57 | fluorescent units in logarithmic scale, RNA-seq log-CPMs, log-RPKMs or 58 | log-TPMs. When input expression values are integer counts, such as those 59 | derived from RNA-seq experiments, then this argument should be set to 60 | \code{kcdf="Poisson"}.} 61 | \item{\code{tau}}{Numeric vector of length 1. The exponent defining the weight of 62 | the tail in the random walk performed by the \code{GSVA} (Hänzelmann et al., 63 | 2013) method. The default value is 1 as described in the paper.} 64 | \item{\code{maxDiff}}{Logical vector of length 1 which offers two approaches to 65 | calculate the enrichment statistic (ES) from the KS random walk statistic. 66 | \itemize{ 67 | \item \code{FALSE}: ES is calculated as the maximum distance of the random walk 68 | from 0. This approach produces a distribution of enrichment scores that is 69 | bimodal, but it can give large enrichment scores to gene sets whose genes 70 | are not concordantly activated in one direction only. 71 | \item \code{TRUE} (the default): ES is calculated as the magnitude difference between 72 | the largest positive and negative random walk deviations. This default value 73 | gives larger enrichment scores to gene sets whose genes are concordantly 74 | activated in one direction only. 75 | }} 76 | \item{\code{absRanking}}{Logical vector of length 1 used only when \code{maxDiff=TRUE}. 77 | When \code{absRanking=FALSE} (default) a modified Kuiper statistic is used to 78 | calculate enrichment scores, taking the magnitude difference between the 79 | largest positive and negative random walk deviations. When 80 | \code{absRanking=TRUE} the original Kuiper statistic that sums the largest 81 | positive and negative random walk deviations is used.} 82 | \item{\code{alpha}}{Numeric vector of length 1. The exponent defining the 83 | weight of the tail in the random walk performed by the \code{ssGSEA} (Barbie et 84 | al., 2009) method. The default value is 0.25 as described in the paper.} 85 | \item{\code{normalize}}{Logical vector of length 1; if \code{TRUE} runs the \code{ssGSEA} method 86 | from Barbie et al. (2009) normalizing the scores by the absolute difference 87 | between the minimum and the maximum, as described in their paper. Otherwise 88 | this last normalization step is skipped.} 89 | }} 90 | } 91 | \value{ 92 | A long format tibble of the enrichment scores for each source 93 | across the samples. Resulting tibble contains the following columns: 94 | \enumerate{ 95 | \item \code{statistic}: Indicates which method is associated with which score. 96 | \item \code{source}: Source nodes of \code{network}. 97 | \item \code{condition}: Condition representing each column of \code{mat}. 98 | \item \code{score}: Regulatory activity (enrichment score). 99 | } 100 | } 101 | \description{ 102 | Calculates regulatory activities using GSVA. 103 | } 104 | \details{ 105 | GSVA (Hänzelmann et al., 2013) starts by transforming the input molecular 106 | readouts in mat to a readout-level statistic using Gaussian kernel estimation 107 | of the cumulative density function. Then, readout-level statistics are 108 | ranked per sample and normalized to up-weight the two tails of the rank 109 | distribution. Afterwards, an enrichment score \code{gsva} is calculated 110 | using a running sum statistic that is normalized by subtracting the largest 111 | negative estimate from the largest positive one. 112 | 113 | Hänzelmann S. et al. (2013) GSVA: gene set variation analysis for microarray 114 | and RNA-seq data. BMC Bioinformatics, 14, 7. 115 | } 116 | \examples{ 117 | inputs_dir <- system.file("testdata", "inputs", package = "decoupleR") 118 | 119 | mat <- readRDS(file.path(inputs_dir, "mat.rds")) 120 | net <- readRDS(file.path(inputs_dir, "net.rds")) 121 | 122 | run_gsva(mat, net, minsize=1, verbose = FALSE) 123 | } 124 | \seealso{ 125 | Other decoupleR statistics: 126 | \code{\link{decouple}()}, 127 | \code{\link{run_aucell}()}, 128 | \code{\link{run_fgsea}()}, 129 | \code{\link{run_mdt}()}, 130 | \code{\link{run_mlm}()}, 131 | \code{\link{run_ora}()}, 132 | \code{\link{run_udt}()}, 133 | \code{\link{run_ulm}()}, 134 | \code{\link{run_viper}()}, 135 | \code{\link{run_wmean}()}, 136 | \code{\link{run_wsum}()}, 137 | \code{\link{run_zscore}()} 138 | } 139 | \concept{decoupleR statistics} 140 | -------------------------------------------------------------------------------- /man/run_mdt.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/statistic-mdt.R 3 | \name{run_mdt} 4 | \alias{run_mdt} 5 | \title{Multivariate Decision Trees (MDT)} 6 | \usage{ 7 | run_mdt( 8 | mat, 9 | network, 10 | .source = source, 11 | .target = target, 12 | .mor = mor, 13 | .likelihood = likelihood, 14 | sparse = FALSE, 15 | center = FALSE, 16 | na.rm = FALSE, 17 | trees = 10, 18 | min_n = 20, 19 | nproc = availableCores(), 20 | seed = 42, 21 | minsize = 5 22 | ) 23 | } 24 | \arguments{ 25 | \item{mat}{Matrix to evaluate (e.g. expression matrix). 26 | Target nodes in rows and conditions in columns. 27 | \code{rownames(mat)} must have at least one intersection with the elements 28 | in \code{network} \code{.target} column.} 29 | 30 | \item{network}{Tibble or dataframe with edges and it's associated metadata.} 31 | 32 | \item{.source}{Column with source nodes.} 33 | 34 | \item{.target}{Column with target nodes.} 35 | 36 | \item{.mor}{Column with edge mode of regulation (i.e. mor).} 37 | 38 | \item{.likelihood}{Deprecated argument. Now it will always be set to 1.} 39 | 40 | \item{sparse}{Deprecated parameter.} 41 | 42 | \item{center}{Logical value indicating if \code{mat} must be centered by 43 | \code{\link[base:colSums]{base::rowMeans()}}.} 44 | 45 | \item{na.rm}{Should missing values (including NaN) be omitted from the 46 | calculations of \code{\link[base:colSums]{base::rowMeans()}}?} 47 | 48 | \item{trees}{An integer for the number of trees contained in the ensemble.} 49 | 50 | \item{min_n}{An integer for the minimum number of data points in a node that 51 | are required for the node to be split further.} 52 | 53 | \item{nproc}{Number of cores to use for computation.} 54 | 55 | \item{seed}{A single value, interpreted as an integer, or NULL for random 56 | number generation.} 57 | 58 | \item{minsize}{Integer indicating the minimum number of targets per source.} 59 | } 60 | \value{ 61 | A long format tibble of the enrichment scores for each source 62 | across the samples. Resulting tibble contains the following columns: 63 | \enumerate{ 64 | \item \code{statistic}: Indicates which method is associated with which score. 65 | \item \code{source}: Source nodes of \code{network}. 66 | \item \code{condition}: Condition representing each column of \code{mat}. 67 | \item \code{score}: Regulatory activity (enrichment score). 68 | } 69 | } 70 | \description{ 71 | Calculates regulatory activities using MDT. 72 | } 73 | \details{ 74 | MDT fits a multivariate regression random forest for each sample, where the 75 | observed molecular readouts in mat are the response variable and the 76 | regulator weights in net are the covariates. Target features with no 77 | associated weight are set to zero. The obtained feature importances from the 78 | fitted model are the activities \code{mdt} of the regulators in net. 79 | } 80 | \examples{ 81 | inputs_dir <- system.file("testdata", "inputs", package = "decoupleR") 82 | 83 | mat <- readRDS(file.path(inputs_dir, "mat.rds")) 84 | net <- readRDS(file.path(inputs_dir, "net.rds")) 85 | 86 | run_mdt(mat, net, minsize=0) 87 | } 88 | \seealso{ 89 | Other decoupleR statistics: 90 | \code{\link{decouple}()}, 91 | \code{\link{run_aucell}()}, 92 | \code{\link{run_fgsea}()}, 93 | \code{\link{run_gsva}()}, 94 | \code{\link{run_mlm}()}, 95 | \code{\link{run_ora}()}, 96 | \code{\link{run_udt}()}, 97 | \code{\link{run_ulm}()}, 98 | \code{\link{run_viper}()}, 99 | \code{\link{run_wmean}()}, 100 | \code{\link{run_wsum}()}, 101 | \code{\link{run_zscore}()} 102 | } 103 | \concept{decoupleR statistics} 104 | -------------------------------------------------------------------------------- /man/run_mlm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/statistic-mlm.R 3 | \name{run_mlm} 4 | \alias{run_mlm} 5 | \title{Multivariate Linear Model (MLM)} 6 | \usage{ 7 | run_mlm( 8 | mat, 9 | network, 10 | .source = source, 11 | .target = target, 12 | .mor = mor, 13 | .likelihood = likelihood, 14 | sparse = FALSE, 15 | center = FALSE, 16 | na.rm = FALSE, 17 | minsize = 5 18 | ) 19 | } 20 | \arguments{ 21 | \item{mat}{Matrix to evaluate (e.g. expression matrix). 22 | Target nodes in rows and conditions in columns. 23 | \code{rownames(mat)} must have at least one intersection with the elements 24 | in \code{network} \code{.target} column.} 25 | 26 | \item{network}{Tibble or dataframe with edges and it's associated metadata.} 27 | 28 | \item{.source}{Column with source nodes.} 29 | 30 | \item{.target}{Column with target nodes.} 31 | 32 | \item{.mor}{Column with edge mode of regulation (i.e. mor).} 33 | 34 | \item{.likelihood}{Deprecated argument. Now it will always be set to 1.} 35 | 36 | \item{sparse}{Deprecated parameter.} 37 | 38 | \item{center}{Logical value indicating if \code{mat} must be centered by 39 | \code{\link[base:colSums]{base::rowMeans()}}.} 40 | 41 | \item{na.rm}{Should missing values (including NaN) be omitted from the 42 | calculations of \code{\link[base:colSums]{base::rowMeans()}}?} 43 | 44 | \item{minsize}{Integer indicating the minimum number of targets per source.} 45 | } 46 | \value{ 47 | A long format tibble of the enrichment scores for each source 48 | across the samples. Resulting tibble contains the following columns: 49 | \enumerate{ 50 | \item \code{statistic}: Indicates which method is associated with which score. 51 | \item \code{source}: Source nodes of \code{network}. 52 | \item \code{condition}: Condition representing each column of \code{mat}. 53 | \item \code{score}: Regulatory activity (enrichment score). 54 | } 55 | } 56 | \description{ 57 | Calculates regulatory activities using MLM. 58 | } 59 | \details{ 60 | MLM fits a multivariate linear model for each sample, where the observed 61 | molecular readouts in mat are the response variable and the regulator weights 62 | in net are the covariates. Target features with no associated weight are set 63 | to zero. The obtained t-values from the fitted model are the activities 64 | (\code{mlm}) of the regulators in net. 65 | } 66 | \examples{ 67 | inputs_dir <- system.file("testdata", "inputs", package = "decoupleR") 68 | 69 | mat <- readRDS(file.path(inputs_dir, "mat.rds")) 70 | net <- readRDS(file.path(inputs_dir, "net.rds")) 71 | 72 | run_mlm(mat, net, minsize=0) 73 | } 74 | \seealso{ 75 | Other decoupleR statistics: 76 | \code{\link{decouple}()}, 77 | \code{\link{run_aucell}()}, 78 | \code{\link{run_fgsea}()}, 79 | \code{\link{run_gsva}()}, 80 | \code{\link{run_mdt}()}, 81 | \code{\link{run_ora}()}, 82 | \code{\link{run_udt}()}, 83 | \code{\link{run_ulm}()}, 84 | \code{\link{run_viper}()}, 85 | \code{\link{run_wmean}()}, 86 | \code{\link{run_wsum}()}, 87 | \code{\link{run_zscore}()} 88 | } 89 | \concept{decoupleR statistics} 90 | -------------------------------------------------------------------------------- /man/run_ora.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/statistic-ora.R 3 | \name{run_ora} 4 | \alias{run_ora} 5 | \title{Over Representation Analysis (ORA)} 6 | \usage{ 7 | run_ora( 8 | mat, 9 | network, 10 | .source = source, 11 | .target = target, 12 | n_up = ceiling(0.05 * nrow(mat)), 13 | n_bottom = 0, 14 | n_background = 20000, 15 | with_ties = TRUE, 16 | seed = 42, 17 | minsize = 5, 18 | ... 19 | ) 20 | } 21 | \arguments{ 22 | \item{mat}{Matrix to evaluate (e.g. expression matrix). 23 | Target nodes in rows and conditions in columns. 24 | \code{rownames(mat)} must have at least one intersection with the elements 25 | in \code{network} \code{.target} column.} 26 | 27 | \item{network}{Tibble or dataframe with edges and it's associated metadata.} 28 | 29 | \item{.source}{Column with source nodes.} 30 | 31 | \item{.target}{Column with target nodes.} 32 | 33 | \item{n_up}{Integer indicating the number of top targets to slice from mat.} 34 | 35 | \item{n_bottom}{Integer indicating the number of bottom targets to slice from 36 | mat.} 37 | 38 | \item{n_background}{Integer indicating the background size of the sliced 39 | targets. If not specified the number of background targets is determined by 40 | the total number of unique targets in the union of \code{mat} and \code{network}.} 41 | 42 | \item{with_ties}{Should ties be kept together? The default, \code{TRUE}, 43 | may return more rows than you request. Use \code{FALSE} to ignore ties, 44 | and return the first \code{n} rows.} 45 | 46 | \item{seed}{A single value, interpreted as an integer, or NULL for random 47 | number generation.} 48 | 49 | \item{minsize}{Integer indicating the minimum number of targets per source.} 50 | 51 | \item{...}{ 52 | Arguments passed on to \code{\link[stats:fisher.test]{stats::fisher.test}} 53 | \describe{ 54 | \item{\code{workspace}}{an integer specifying the size of the workspace 55 | used in the network algorithm. In units of 4 bytes. Only used for 56 | non-simulated p-values larger than \eqn{2 \times 2}{2 by 2} tables. 57 | Since \R version 3.5.0, this also increases the internal stack size 58 | which allows larger problems to be solved, however sometimes needing 59 | hours. In such cases, \code{simulate.p.values=TRUE} may be more 60 | reasonable.} 61 | \item{\code{hybrid}}{a logical. Only used for larger than \eqn{2 \times 2}{2 by 2} 62 | tables, in which cases it indicates whether the exact probabilities 63 | (default) or a hybrid approximation thereof should be computed.} 64 | \item{\code{hybridPars}}{a numeric vector of length 3, by default describing 65 | \dQuote{Cochran's conditions} for the validity of the chi-squared 66 | approximation, see \sQuote{Details}.} 67 | \item{\code{control}}{a list with named components for low level algorithm 68 | control. At present the only one used is \code{"mult"}, a positive 69 | integer \eqn{\ge 2} with default 30 used only for larger than 70 | \eqn{2 \times 2}{2 by 2} tables. This says how many times as much 71 | space should be allocated to paths as to keys: see file 72 | \file{fexact.c} in the sources of this package.} 73 | \item{\code{or}}{the hypothesized odds ratio. Only used in the 74 | \eqn{2 \times 2}{2 by 2} case.} 75 | \item{\code{alternative}}{indicates the alternative hypothesis and must be 76 | one of \code{"two.sided"}, \code{"greater"} or \code{"less"}. 77 | You can specify just the initial letter. Only used in the 78 | \eqn{2 \times 2}{2 by 2} case.} 79 | \item{\code{conf.int}}{logical indicating if a confidence interval for the 80 | odds ratio in a \eqn{2 \times 2}{2 by 2} table should be 81 | computed (and returned).} 82 | \item{\code{conf.level}}{confidence level for the returned confidence 83 | interval. Only used in the \eqn{2 \times 2}{2 by 2} case and if 84 | \code{conf.int = TRUE}.} 85 | \item{\code{simulate.p.value}}{a logical indicating whether to compute 86 | p-values by Monte Carlo simulation, in larger than \eqn{2 \times 87 | 2}{2 by 2} tables.} 88 | \item{\code{B}}{an integer specifying the number of replicates used in the 89 | Monte Carlo test.} 90 | }} 91 | } 92 | \value{ 93 | A long format tibble of the enrichment scores for each source 94 | across the samples. Resulting tibble contains the following columns: 95 | \enumerate{ 96 | \item \code{statistic}: Indicates which method is associated with which score. 97 | \item \code{source}: Source nodes of \code{network}. 98 | \item \code{condition}: Condition representing each column of \code{mat}. 99 | \item \code{score}: Regulatory activity (enrichment score). 100 | } 101 | } 102 | \description{ 103 | Calculates regulatory activities using ORA. 104 | } 105 | \details{ 106 | ORA measures the overlap between the target feature set and a list of most 107 | altered molecular features in mat. The most altered molecular features can 108 | be selected from the top and or bottom of the molecular readout distribution, 109 | by default it is the top 5\% positive values. With these, a contingency table 110 | is build and a one-tailed Fisher’s exact test is computed to determine if a 111 | regulator’s set of features are over-represented in the selected features 112 | from the data. The resulting score, \code{ora}, is the minus log10 of the 113 | obtained p-value. 114 | } 115 | \examples{ 116 | inputs_dir <- system.file("testdata", "inputs", package = "decoupleR") 117 | 118 | mat <- readRDS(file.path(inputs_dir, "mat.rds")) 119 | net <- readRDS(file.path(inputs_dir, "net.rds")) 120 | 121 | run_ora(mat, net, minsize=0) 122 | } 123 | \seealso{ 124 | Other decoupleR statistics: 125 | \code{\link{decouple}()}, 126 | \code{\link{run_aucell}()}, 127 | \code{\link{run_fgsea}()}, 128 | \code{\link{run_gsva}()}, 129 | \code{\link{run_mdt}()}, 130 | \code{\link{run_mlm}()}, 131 | \code{\link{run_udt}()}, 132 | \code{\link{run_ulm}()}, 133 | \code{\link{run_viper}()}, 134 | \code{\link{run_wmean}()}, 135 | \code{\link{run_wsum}()}, 136 | \code{\link{run_zscore}()} 137 | } 138 | \concept{decoupleR statistics} 139 | -------------------------------------------------------------------------------- /man/run_udt.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/statistic-udt.R 3 | \name{run_udt} 4 | \alias{run_udt} 5 | \title{Univariate Decision Tree (UDT)} 6 | \usage{ 7 | run_udt( 8 | mat, 9 | network, 10 | .source = source, 11 | .target = target, 12 | .mor = mor, 13 | .likelihood = likelihood, 14 | sparse = FALSE, 15 | center = FALSE, 16 | na.rm = FALSE, 17 | min_n = 20, 18 | seed = 42, 19 | minsize = 5 20 | ) 21 | } 22 | \arguments{ 23 | \item{mat}{Matrix to evaluate (e.g. expression matrix). 24 | Target nodes in rows and conditions in columns. 25 | \code{rownames(mat)} must have at least one intersection with the elements 26 | in \code{network} \code{.target} column.} 27 | 28 | \item{network}{Tibble or dataframe with edges and it's associated metadata.} 29 | 30 | \item{.source}{Column with source nodes.} 31 | 32 | \item{.target}{Column with target nodes.} 33 | 34 | \item{.mor}{Column with edge mode of regulation (i.e. mor).} 35 | 36 | \item{.likelihood}{Deprecated argument. Now it will always be set to 1.} 37 | 38 | \item{sparse}{Deprecated parameter.} 39 | 40 | \item{center}{Logical value indicating if \code{mat} must be centered by 41 | \code{\link[base:colSums]{base::rowMeans()}}.} 42 | 43 | \item{na.rm}{Should missing values (including NaN) be omitted from the 44 | calculations of \code{\link[base:colSums]{base::rowMeans()}}?} 45 | 46 | \item{min_n}{An integer for the minimum number of data points in a node that 47 | are required for the node to be split further.} 48 | 49 | \item{seed}{A single value, interpreted as an integer, or NULL for random 50 | number generation.} 51 | 52 | \item{minsize}{Integer indicating the minimum number of targets per source.} 53 | } 54 | \value{ 55 | A long format tibble of the enrichment scores for each source 56 | across the samples. Resulting tibble contains the following columns: 57 | \enumerate{ 58 | \item \code{statistic}: Indicates which method is associated with which score. 59 | \item \code{source}: Source nodes of \code{network}. 60 | \item \code{condition}: Condition representing each column of \code{mat}. 61 | \item \code{score}: Regulatory activity (enrichment score). 62 | } 63 | } 64 | \description{ 65 | Calculates regulatory activities by using UDT. 66 | } 67 | \details{ 68 | UDT fits a single regression decision tree for each sample and regulator, 69 | where the observed molecular readouts in mat are the response variable and 70 | the regulator weights in net are the explanatory one. Target features with 71 | no associated weight are set to zero. The obtained feature importance from 72 | the fitted model is the activity \code{udt} of a given regulator. 73 | } 74 | \examples{ 75 | inputs_dir <- system.file("testdata", "inputs", package = "decoupleR") 76 | 77 | mat <- readRDS(file.path(inputs_dir, "mat.rds")) 78 | net <- readRDS(file.path(inputs_dir, "net.rds")) 79 | 80 | run_udt(mat, net, minsize=0) 81 | } 82 | \seealso{ 83 | Other decoupleR statistics: 84 | \code{\link{decouple}()}, 85 | \code{\link{run_aucell}()}, 86 | \code{\link{run_fgsea}()}, 87 | \code{\link{run_gsva}()}, 88 | \code{\link{run_mdt}()}, 89 | \code{\link{run_mlm}()}, 90 | \code{\link{run_ora}()}, 91 | \code{\link{run_ulm}()}, 92 | \code{\link{run_viper}()}, 93 | \code{\link{run_wmean}()}, 94 | \code{\link{run_wsum}()}, 95 | \code{\link{run_zscore}()} 96 | } 97 | \concept{decoupleR statistics} 98 | -------------------------------------------------------------------------------- /man/run_ulm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/statistic-ulm.R 3 | \name{run_ulm} 4 | \alias{run_ulm} 5 | \title{Univariate Linear Model (ULM)} 6 | \usage{ 7 | run_ulm( 8 | mat, 9 | network, 10 | .source = source, 11 | .target = target, 12 | .mor = mor, 13 | .likelihood = likelihood, 14 | sparse = FALSE, 15 | center = FALSE, 16 | na.rm = FALSE, 17 | minsize = 5L 18 | ) 19 | } 20 | \arguments{ 21 | \item{mat}{Matrix to evaluate (e.g. expression matrix). 22 | Target nodes in rows and conditions in columns. 23 | \code{rownames(mat)} must have at least one intersection with the elements 24 | in \code{network} \code{.target} column.} 25 | 26 | \item{network}{Tibble or dataframe with edges and it's associated metadata.} 27 | 28 | \item{.source}{Column with source nodes.} 29 | 30 | \item{.target}{Column with target nodes.} 31 | 32 | \item{.mor}{Column with edge mode of regulation (i.e. mor).} 33 | 34 | \item{.likelihood}{Deprecated argument. Now it will always be set to 1.} 35 | 36 | \item{sparse}{Deprecated parameter.} 37 | 38 | \item{center}{Logical value indicating if \code{mat} must be centered by 39 | \code{\link[base:colSums]{base::rowMeans()}}.} 40 | 41 | \item{na.rm}{Should missing values (including NaN) be omitted from the 42 | calculations of \code{\link[base:colSums]{base::rowMeans()}}?} 43 | 44 | \item{minsize}{Integer indicating the minimum number of targets per source.} 45 | } 46 | \value{ 47 | A long format tibble of the enrichment scores for each source 48 | across the samples. Resulting tibble contains the following columns: 49 | \enumerate{ 50 | \item \code{statistic}: Indicates which method is associated with which score. 51 | \item \code{source}: Source nodes of \code{network}. 52 | \item \code{condition}: Condition representing each column of \code{mat}. 53 | \item \code{score}: Regulatory activity (enrichment score). 54 | } 55 | } 56 | \description{ 57 | Calculates regulatory activities using ULM. 58 | } 59 | \details{ 60 | ULM fits a linear model for each sample and regulator, where the observed 61 | molecular readouts in mat are the response variable and the regulator weights 62 | in net are the explanatory one. Target features with no associated weight 63 | are set to zero. The obtained t-value from the fitted model is the activity 64 | \code{ulm} of a given regulator. 65 | } 66 | \examples{ 67 | inputs_dir <- system.file("testdata", "inputs", package = "decoupleR") 68 | 69 | mat <- readRDS(file.path(inputs_dir, "mat.rds")) 70 | net <- readRDS(file.path(inputs_dir, "net.rds")) 71 | 72 | run_ulm(mat, net, minsize=0) 73 | } 74 | \seealso{ 75 | Other decoupleR statistics: 76 | \code{\link{decouple}()}, 77 | \code{\link{run_aucell}()}, 78 | \code{\link{run_fgsea}()}, 79 | \code{\link{run_gsva}()}, 80 | \code{\link{run_mdt}()}, 81 | \code{\link{run_mlm}()}, 82 | \code{\link{run_ora}()}, 83 | \code{\link{run_udt}()}, 84 | \code{\link{run_viper}()}, 85 | \code{\link{run_wmean}()}, 86 | \code{\link{run_wsum}()}, 87 | \code{\link{run_zscore}()} 88 | } 89 | \concept{decoupleR statistics} 90 | -------------------------------------------------------------------------------- /man/run_viper.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/statistic-viper.R 3 | \name{run_viper} 4 | \alias{run_viper} 5 | \title{Virtual Inference of Protein-activity by Enriched Regulon analysis (VIPER)} 6 | \usage{ 7 | run_viper( 8 | mat, 9 | network, 10 | .source = source, 11 | .target = target, 12 | .mor = mor, 13 | .likelihood = likelihood, 14 | verbose = FALSE, 15 | minsize = 5, 16 | pleiotropy = TRUE, 17 | eset.filter = FALSE, 18 | ... 19 | ) 20 | } 21 | \arguments{ 22 | \item{mat}{Matrix to evaluate (e.g. expression matrix). 23 | Target nodes in rows and conditions in columns. 24 | \code{rownames(mat)} must have at least one intersection with the elements 25 | in \code{network} \code{.target} column.} 26 | 27 | \item{network}{Tibble or dataframe with edges and it's associated metadata.} 28 | 29 | \item{.source}{Column with source nodes.} 30 | 31 | \item{.target}{Column with target nodes.} 32 | 33 | \item{.mor}{Column with edge mode of regulation (i.e. mor).} 34 | 35 | \item{.likelihood}{Deprecated argument. Now it will always be set to 1.} 36 | 37 | \item{verbose}{Logical, whether progression messages should be printed in 38 | the terminal.} 39 | 40 | \item{minsize}{Integer indicating the minimum number of targets per source.} 41 | 42 | \item{pleiotropy}{Logical, whether correction for pleiotropic regulation 43 | should be performed.} 44 | 45 | \item{eset.filter}{Logical, whether the dataset should be limited only to 46 | the genes represented in the interactome.} 47 | 48 | \item{...}{ 49 | Arguments passed on to \code{\link[viper:viper]{viper::viper}} 50 | \describe{ 51 | \item{\code{dnull}}{Numeric matrix for the null model, usually generated by \code{nullTtest}} 52 | \item{\code{nes}}{Logical, whether the enrichment score reported should be normalized} 53 | \item{\code{method}}{Character string indicating the method for computing the single samples signature, either scale, rank, mad, ttest or none} 54 | \item{\code{bootstraps}}{Integer indicating the number of bootstraps iterations to perform. Only the scale method is implemented with bootstraps.} 55 | \item{\code{adaptive.size}}{Logical, whether the weighting scores should be taken into account for computing the regulon size} 56 | \item{\code{pleiotropyArgs}}{list of 5 numbers for the pleotropy correction indicating: regulators p-value threshold, pleiotropic interaction p-value threshold, minimum number of targets in the overlap between pleiotropic regulators, penalty for the pleiotropic interactions and the method for computing the pleiotropy, either absolute or adaptive} 57 | \item{\code{cores}}{Integer indicating the number of cores to use (only 1 in Windows-based systems)} 58 | }} 59 | } 60 | \value{ 61 | A long format tibble of the enrichment scores for each source 62 | across the samples. Resulting tibble contains the following columns: 63 | \enumerate{ 64 | \item \code{statistic}: Indicates which method is associated with which score. 65 | \item \code{source}: Source nodes of \code{network}. 66 | \item \code{condition}: Condition representing each column of \code{mat}. 67 | \item \code{score}: Regulatory activity (enrichment score). 68 | } 69 | } 70 | \description{ 71 | Calculates regulatory activities using VIPER. 72 | } 73 | \details{ 74 | VIPER (Alvarez et al., 2016) estimates biological activities by performing a 75 | three-tailed enrichment score calculation. For further information check the 76 | supplementary information of the decoupler manuscript or the original 77 | publication. 78 | 79 | Alvarez M.J.et al. (2016) Functional characterization of somatic mutations 80 | in cancer using network-based inference of protein activity. Nat. Genet., 81 | 48, 838–847. 82 | } 83 | \examples{ 84 | inputs_dir <- system.file("testdata", "inputs", package = "decoupleR") 85 | 86 | mat <- readRDS(file.path(inputs_dir, "mat.rds")) 87 | net <- readRDS(file.path(inputs_dir, "net.rds")) 88 | 89 | run_viper(mat, net, minsize=0, verbose = FALSE) 90 | } 91 | \seealso{ 92 | Other decoupleR statistics: 93 | \code{\link{decouple}()}, 94 | \code{\link{run_aucell}()}, 95 | \code{\link{run_fgsea}()}, 96 | \code{\link{run_gsva}()}, 97 | \code{\link{run_mdt}()}, 98 | \code{\link{run_mlm}()}, 99 | \code{\link{run_ora}()}, 100 | \code{\link{run_udt}()}, 101 | \code{\link{run_ulm}()}, 102 | \code{\link{run_wmean}()}, 103 | \code{\link{run_wsum}()}, 104 | \code{\link{run_zscore}()} 105 | } 106 | \concept{decoupleR statistics} 107 | -------------------------------------------------------------------------------- /man/run_wmean.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/statistic-wmean.R 3 | \name{run_wmean} 4 | \alias{run_wmean} 5 | \title{Weighted Mean (WMEAN)} 6 | \usage{ 7 | run_wmean( 8 | mat, 9 | network, 10 | .source = source, 11 | .target = target, 12 | .mor = mor, 13 | .likelihood = likelihood, 14 | times = 100, 15 | seed = 42, 16 | sparse = TRUE, 17 | randomize_type = "rows", 18 | minsize = 5 19 | ) 20 | } 21 | \arguments{ 22 | \item{mat}{Matrix to evaluate (e.g. expression matrix). 23 | Target nodes in rows and conditions in columns. 24 | \code{rownames(mat)} must have at least one intersection with the elements 25 | in \code{network} \code{.target} column.} 26 | 27 | \item{network}{Tibble or dataframe with edges and it's associated metadata.} 28 | 29 | \item{.source}{Column with source nodes.} 30 | 31 | \item{.target}{Column with target nodes.} 32 | 33 | \item{.mor}{Column with edge mode of regulation (i.e. mor).} 34 | 35 | \item{.likelihood}{Deprecated argument. Now it will always be set to 1.} 36 | 37 | \item{times}{How many permutations to do?} 38 | 39 | \item{seed}{A single value, interpreted as an integer, or NULL for random 40 | number generation.} 41 | 42 | \item{sparse}{Should the matrices used for the calculation be sparse?} 43 | 44 | \item{randomize_type}{How to randomize the expression matrix.} 45 | 46 | \item{minsize}{Integer indicating the minimum number of targets per source.} 47 | } 48 | \value{ 49 | A long format tibble of the enrichment scores for each source 50 | across the samples. Resulting tibble contains the following columns: 51 | \enumerate{ 52 | \item \code{statistic}: Indicates which method is associated with which score. 53 | \item \code{source}: Source nodes of \code{network}. 54 | \item \code{condition}: Condition representing each column of \code{mat}. 55 | \item \code{score}: Regulatory activity (enrichment score). 56 | \item \code{p_value}: p-value for the score of the method. 57 | } 58 | } 59 | \description{ 60 | Calculates regulatory activities using WMEAN. 61 | } 62 | \details{ 63 | WMEAN infers regulator activities by first multiplying each target feature by 64 | its associated weight which then are summed to an enrichment score 65 | \code{wmean}. Furthermore, permutations of random target features can 66 | be performed to obtain a null distribution that can be used to compute a 67 | z-score \code{norm_wmean}, or a corrected estimate \code{corr_wmean} by multiplying 68 | \code{wmean} by the minus log10 of the obtained empirical p-value. 69 | } 70 | \examples{ 71 | inputs_dir <- system.file("testdata", "inputs", package = "decoupleR") 72 | 73 | mat <- readRDS(file.path(inputs_dir, "mat.rds")) 74 | net <- readRDS(file.path(inputs_dir, "net.rds")) 75 | 76 | run_wmean(mat, net, minsize=0) 77 | } 78 | \seealso{ 79 | Other decoupleR statistics: 80 | \code{\link{decouple}()}, 81 | \code{\link{run_aucell}()}, 82 | \code{\link{run_fgsea}()}, 83 | \code{\link{run_gsva}()}, 84 | \code{\link{run_mdt}()}, 85 | \code{\link{run_mlm}()}, 86 | \code{\link{run_ora}()}, 87 | \code{\link{run_udt}()}, 88 | \code{\link{run_ulm}()}, 89 | \code{\link{run_viper}()}, 90 | \code{\link{run_wsum}()}, 91 | \code{\link{run_zscore}()} 92 | } 93 | \concept{decoupleR statistics} 94 | -------------------------------------------------------------------------------- /man/run_wsum.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/statistic-wsum.R 3 | \name{run_wsum} 4 | \alias{run_wsum} 5 | \title{Weighted Sum (WSUM)} 6 | \usage{ 7 | run_wsum( 8 | mat, 9 | network, 10 | .source = source, 11 | .target = target, 12 | .mor = mor, 13 | .likelihood = likelihood, 14 | times = 100, 15 | seed = 42, 16 | sparse = TRUE, 17 | randomize_type = "rows", 18 | minsize = 5 19 | ) 20 | } 21 | \arguments{ 22 | \item{mat}{Matrix to evaluate (e.g. expression matrix). 23 | Target nodes in rows and conditions in columns. 24 | \code{rownames(mat)} must have at least one intersection with the elements 25 | in \code{network} \code{.target} column.} 26 | 27 | \item{network}{Tibble or dataframe with edges and it's associated metadata.} 28 | 29 | \item{.source}{Column with source nodes.} 30 | 31 | \item{.target}{Column with target nodes.} 32 | 33 | \item{.mor}{Column with edge mode of regulation (i.e. mor).} 34 | 35 | \item{.likelihood}{Deprecated argument. Now it will always be set to 1.} 36 | 37 | \item{times}{How many permutations to do?} 38 | 39 | \item{seed}{A single value, interpreted as an integer, or NULL for random 40 | number generation.} 41 | 42 | \item{sparse}{Should the matrices used for the calculation be sparse?} 43 | 44 | \item{randomize_type}{How to randomize the expression matrix.} 45 | 46 | \item{minsize}{Integer indicating the minimum number of targets per source.} 47 | } 48 | \value{ 49 | A long format tibble of the enrichment scores for each source 50 | across the samples. Resulting tibble contains the following columns: 51 | \enumerate{ 52 | \item \code{statistic}: Indicates which method is associated with which score. 53 | \item \code{source}: Source nodes of \code{network}. 54 | \item \code{condition}: Condition representing each column of \code{mat}. 55 | \item \code{score}: Regulatory activity (enrichment score). 56 | \item \code{p_value}: p-value for the score of the method. 57 | } 58 | } 59 | \description{ 60 | Calculates regulatory activities using WSUM. 61 | } 62 | \details{ 63 | WSUM infers regulator activities by first multiplying each target feature by 64 | its associated weight which then are summed to an enrichment score 65 | \code{wsum}. Furthermore, permutations of random target features can be 66 | performed to obtain a null distribution that can be used to compute a z-score 67 | \code{norm_wsum}, or a corrected estimate \code{corr_wsum} by multiplying 68 | \code{wsum} by the minus log10 of the obtained empirical p-value. 69 | } 70 | \examples{ 71 | inputs_dir <- system.file("testdata", "inputs", package = "decoupleR") 72 | 73 | mat <- readRDS(file.path(inputs_dir, "mat.rds")) 74 | net <- readRDS(file.path(inputs_dir, "net.rds")) 75 | 76 | run_wsum(mat, net, minsize=0) 77 | } 78 | \seealso{ 79 | Other decoupleR statistics: 80 | \code{\link{decouple}()}, 81 | \code{\link{run_aucell}()}, 82 | \code{\link{run_fgsea}()}, 83 | \code{\link{run_gsva}()}, 84 | \code{\link{run_mdt}()}, 85 | \code{\link{run_mlm}()}, 86 | \code{\link{run_ora}()}, 87 | \code{\link{run_udt}()}, 88 | \code{\link{run_ulm}()}, 89 | \code{\link{run_viper}()}, 90 | \code{\link{run_wmean}()}, 91 | \code{\link{run_zscore}()} 92 | } 93 | \concept{decoupleR statistics} 94 | -------------------------------------------------------------------------------- /man/run_zscore.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/statistic-zscore.R 3 | \name{run_zscore} 4 | \alias{run_zscore} 5 | \title{z-score} 6 | \usage{ 7 | run_zscore( 8 | mat, 9 | network, 10 | .source = source, 11 | .target = target, 12 | .mor = mor, 13 | .likelihood = likelihood, 14 | sparse = FALSE, 15 | center = FALSE, 16 | na.rm = FALSE, 17 | minsize = 5L, 18 | flavor = "RoKAI" 19 | ) 20 | } 21 | \arguments{ 22 | \item{mat}{Matrix to evaluate (e.g. expression matrix). 23 | Target nodes in rows and conditions in columns. 24 | \code{rownames(mat)} must have at least one intersection with the elements 25 | in \code{network} \code{.target} column.} 26 | 27 | \item{network}{Tibble or dataframe with edges and it's associated metadata.} 28 | 29 | \item{.source}{Column with source nodes.} 30 | 31 | \item{.target}{Column with target nodes.} 32 | 33 | \item{.mor}{Column with edge mode of regulation (i.e. mor).} 34 | 35 | \item{.likelihood}{Deprecated argument. Now it will always be set to 1.} 36 | 37 | \item{sparse}{Deprecated parameter.} 38 | 39 | \item{center}{Logical value indicating if \code{mat} must be centered by 40 | \code{\link[base:colSums]{base::rowMeans()}}.} 41 | 42 | \item{na.rm}{Should missing values (including NaN) be omitted from the 43 | calculations of \code{\link[base:colSums]{base::rowMeans()}}?} 44 | 45 | \item{minsize}{Integer indicating the minimum number of targets per source.} 46 | 47 | \item{flavor}{Whether the calculation should be based on RoKAI (default) or 48 | KSEA.} 49 | } 50 | \value{ 51 | A long format tibble of the enrichment scores for each source 52 | across the samples. Resulting tibble contains the following columns: 53 | \enumerate{ 54 | \item \code{statistic}: Indicates which method is associated with which score. 55 | \item \code{source}: Source nodes of \code{network}. 56 | \item \code{condition}: Condition representing each column of \code{mat}. 57 | \item \code{score}: Regulatory activity (enrichment score). 58 | } 59 | } 60 | \description{ 61 | Calculates regulatory activities using a z-score as descibed in KSEA or RoKAI. 62 | } 63 | \details{ 64 | The z-score calculates the mean of the molecular features of the known targets 65 | for each regulator and adjusts it for the number of identified targets for the 66 | regulator, the standard deviation of all molecular features (RoKAI), as well as 67 | the mean of all moleculare features (KSEA). 68 | } 69 | \examples{ 70 | inputs_dir <- system.file("testdata", "inputs", package = "decoupleR") 71 | 72 | mat <- readRDS(file.path(inputs_dir, "mat.rds")) 73 | net <- readRDS(file.path(inputs_dir, "net.rds")) 74 | 75 | run_zscore(mat, net, minsize=0) 76 | } 77 | \seealso{ 78 | Other decoupleR statistics: 79 | \code{\link{decouple}()}, 80 | \code{\link{run_aucell}()}, 81 | \code{\link{run_fgsea}()}, 82 | \code{\link{run_gsva}()}, 83 | \code{\link{run_mdt}()}, 84 | \code{\link{run_mlm}()}, 85 | \code{\link{run_ora}()}, 86 | \code{\link{run_udt}()}, 87 | \code{\link{run_ulm}()}, 88 | \code{\link{run_viper}()}, 89 | \code{\link{run_wmean}()}, 90 | \code{\link{run_wsum}()} 91 | } 92 | \concept{decoupleR statistics} 93 | -------------------------------------------------------------------------------- /man/show_methods.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/decoupleR-pre.R 3 | \name{show_methods} 4 | \alias{show_methods} 5 | \title{Show methods} 6 | \usage{ 7 | show_methods() 8 | } 9 | \description{ 10 | Prints the methods available in decoupleR. The first column correspond to 11 | the function name in decoupleR and the second to the method's full name. 12 | } 13 | \examples{ 14 | show_methods() 15 | } 16 | -------------------------------------------------------------------------------- /man/show_resources.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-omnipath.R 3 | \name{show_resources} 4 | \alias{show_resources} 5 | \title{Shows available resources in Omnipath. For more information visit the 6 | official website for \href{https://omnipathdb.org/}{Omnipath}.} 7 | \usage{ 8 | show_resources() 9 | } 10 | \description{ 11 | Shows available resources in Omnipath. For more information visit the 12 | official website for \href{https://omnipathdb.org/}{Omnipath}. 13 | } 14 | \examples{ 15 | decoupleR::show_resources() 16 | } 17 | -------------------------------------------------------------------------------- /man/tidyeval.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-tidy-eval.R 3 | \name{tidyeval} 4 | \alias{tidyeval} 5 | \alias{expr} 6 | \alias{enquo} 7 | \alias{enquos} 8 | \alias{sym} 9 | \alias{syms} 10 | \alias{.data} 11 | \alias{:=} 12 | \alias{as_name} 13 | \alias{as_label} 14 | \alias{quo_is_null} 15 | \alias{quo_is_missing} 16 | \alias{abort} 17 | \alias{exec} 18 | \title{Tidy eval helpers} 19 | \description{ 20 | \itemize{ 21 | \item \code{\link[rlang:sym]{rlang::sym()}} creates a symbol from a string and 22 | \code{\link[rlang:sym]{syms()}} creates a list of symbols from a 23 | character vector. 24 | \item \code{\link[rlang:topic-defuse]{enquo()}} and 25 | \code{\link[rlang:topic-defuse]{enquos()}} delay the execution of one or 26 | several function arguments. \code{enquo()} returns a single quoted 27 | expression, which is like a blueprint for the delayed computation. 28 | \code{enquos()} returns a list of such quoted expressions. 29 | \item \code{\link[rlang:topic-defuse]{expr()}} quotes a new expression \emph{locally}. It 30 | is mostly useful to build new expressions around arguments 31 | captured with \code{\link[=enquo]{enquo()}} or \code{\link[=enquos]{enquos()}}: 32 | \code{expr(mean(!!enquo(arg), na.rm = TRUE))}. 33 | \item \code{\link[rlang:as_name]{rlang::as_name()}} transforms a quoted variable name 34 | into a string. Supplying something else than a quoted variable 35 | name is an error. 36 | 37 | That's unlike \code{\link[rlang:as_label]{rlang::as_label()}} which also returns 38 | a single string but supports any kind of R object as input, 39 | including quoted function calls and vectors. Its purpose is to 40 | summarise that object into a single label. That label is often 41 | suitable as a default name. 42 | 43 | If you don't know what a quoted expression contains (for instance 44 | expressions captured with \code{enquo()} could be a variable 45 | name, a call to a function, or an unquoted constant), then use 46 | \code{as_label()}. If you know you have quoted a simple variable 47 | name, or would like to enforce this, use \code{as_name()}. 48 | } 49 | 50 | To learn more about tidy eval and how to use these tools, visit 51 | \url{https://tidyeval.tidyverse.org} and the 52 | \href{https://adv-r.hadley.nz/metaprogramming.html}{Metaprogramming section} of 53 | \href{https://adv-r.hadley.nz}{Advanced R}. 54 | } 55 | \examples{ 56 | if (FALSE) { 57 | help("nse-defuse", package = "rlang") 58 | } 59 | } 60 | \keyword{internal} 61 | -------------------------------------------------------------------------------- /pkgdown/extra.css: -------------------------------------------------------------------------------- 1 | .contents p:first-of-type img { 2 | background-color: white; 3 | padding: 10px; 4 | margin-top: 15px; 5 | scale: 1.1; 6 | position: relative; 7 | left: 10%; 8 | } 9 | 10 | body { 11 | font-size: 117%!important; 12 | } 13 | 14 | .bg-primary { 15 | background-color: #A6A6A6!important; 16 | } 17 | 18 | nav[data-toggle="toc"] .nav > li > a { 19 | border-radius: 0px!important; 20 | padding-left: 1rem!important; 21 | } 22 | 23 | pre { 24 | border-radius: 0px!important; 25 | background-color: white!important; 26 | } 27 | 28 | code span, code a:any-link { 29 | color: #1f1c1b!important; 30 | }/* Normal */ 31 | code span.al, code span.al a:any-link { 32 | color: #bf0303!important; 33 | background-color: #f7e6e6!important; 34 | font-weight: bold; 35 | } /* Alert */ 36 | code span.an, code span.an a:any-link { 37 | color: #ca60ca!important; 38 | } /* Annotation */ 39 | code span.at, code span.at a:any-link { 40 | color: #0057ae!important; 41 | } /* Attribute */ 42 | code span.bn, code span.bn a:any-link { 43 | color: #b08000!important; 44 | } /* BaseN */ 45 | code span.bu, code span.bu a:any-link { 46 | color: #644a9b!important; 47 | font-weight: bold; 48 | } /* BuiltIn */ 49 | code span.cf, code span.cf a:any-link { 50 | color: #1f1c1b!important; 51 | font-weight: bold; 52 | } /* ControlFlow */ 53 | code span.ch, code span.ch a:any-link { 54 | color: #924c9d!important; 55 | } /* Char */ 56 | code span.cn, code span.cn a:any-link { 57 | color: #aa5500!important; 58 | }/* Constant */ 59 | code span.co, code span.co a:any-link { 60 | color: #898887!important; 61 | }/* Comment */ 62 | code span.cv, code span.cv a:any-link { 63 | color: #0095ff!important; 64 | }/* CommentVar */ 65 | code span.do, code span.do a:any-link { 66 | color: #607880!important; 67 | }/* Documentation */ 68 | code span.dt, code span.dt a:any-link { 69 | color: #0057ae!important; 70 | }/* DataType */ 71 | code span.dv, code span.dv a:any-link { 72 | color: #b08000!important; 73 | }/* DecVal */ 74 | code span.er, code span.er a:any-link { 75 | color: #bf0303!important; 76 | text-decoration: underline; 77 | } /* Error */ 78 | code span.ex, code span.ex a:any-link { 79 | color: #0095ff!important; 80 | font-weight: bold; 81 | } /* Extension */ 82 | code span.fl, code span.fl a:any-link { 83 | color: #b08000!important; 84 | }/* Float */ 85 | code span.fu, 86 | code span.fu a:any-link { 87 | color: #644a9b!important; 88 | } /* Function */ 89 | code span.im, code span.im a:any-link { 90 | color: #ff5500!important; 91 | }/* Import */ 92 | code span.in, code span.in a:any-link { 93 | color: #b08000!important; 94 | }/* Information */ 95 | code span.kw, code span.kw a:any-link { 96 | /*color: #1f1c1b!important;*/ 97 | color: #007BA5!important; 98 | font-weight: bold; 99 | } /* Keyword */ 100 | code span.op, code span.op a:any-link { 101 | /* color: #1f1c1b!important; */ 102 | color: #5E5E5E!important; 103 | }/* Operator */ 104 | code span.ot, code span.ot a:any-link { 105 | color: #006e28!important; 106 | }/* Other */ 107 | code span.pp, code span.pp a:any-link { 108 | color: #006e28!important; 109 | }/* Preprocessor */ 110 | code span.re, code span.re a:any-link { 111 | color: #0057ae!important; 112 | background-color: #e0e9f8!important; 113 | } /* RegionMarker */ 114 | code span.sc, code span.sc a:any-link { 115 | color: #3daee9!important; 116 | }/* SpecialChar */ 117 | code span.ss, code span.ss a:any-link { 118 | color: #ff5500!important; 119 | }/* SpecialString */ 120 | /* code span.st, code span.st a:any-link { 121 | color: #bf0303!important; 122 | }/* String */ 123 | code span.st, code span.st a:any-link { 124 | color: #20794d!important; 125 | }/* String */ 126 | code span.va, code span.va a:any-link { 127 | color: #0057ae!important; 128 | }/* Variable */ 129 | code span.vs, code span.vs a:any-link { 130 | color: #bf0303!important; 131 | }/* VerbatimString */ 132 | code span.wa, code span.wa a:any-link { 133 | color: #bf0303!important; 134 | }/* Warning */ 135 | 136 | .nav-text.text-muted { 137 | color: #ffffff!important; 138 | } 139 | 140 | .nav-item.active > .nav-link { 141 | background-color: #e9ecef!important; 142 | color: #A6A6A6!important; 143 | } 144 | 145 | .navbar-dark input[type="search"] { 146 | background-color: #e9ecef!important; 147 | color: #212529!important; 148 | } 149 | 150 | .template-home > .row > #main > p > img { 151 | background-color: #ffffff!important; 152 | padding-top: 20px; 153 | } 154 | 155 | .template-home > .row > #main > .section > .page-header > img { 156 | display: none!important; 157 | } 158 | 159 | .template-home h1, .template-home h2, .template-home h3, .template-home h4, 160 | .template-home h5, .template-home h6, .template-article .page-header h1 { 161 | font-weight: 700!important; 162 | } 163 | 164 | h1#omnipathr { 165 | font-size: 4.125rem!important; 166 | } 167 | 168 | img.logo { 169 | background-color: #ffffff!important; 170 | } 171 | 172 | p.abstract { 173 | font-size: calc(1.375rem + 1.5vw)!important; 174 | } 175 | 176 | h4.author { 177 | font-size: 1.25rem!important; 178 | margin-top: 1rem!important; 179 | } 180 | 181 | @media (min-width: 1200px){ 182 | p.abstract { 183 | font-size: 2.5rem!important; 184 | } 185 | } 186 | 187 | .author_afil { 188 | font-size: small!important; 189 | } 190 | 191 | .nav-item { 192 | margin-left: 10px!important; 193 | } 194 | -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-120x120.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saezlab/decoupleR/ee886864dd8fdb3bbbe1b752f872e8474ecf795e/pkgdown/favicon/apple-touch-icon-120x120.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-152x152.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saezlab/decoupleR/ee886864dd8fdb3bbbe1b752f872e8474ecf795e/pkgdown/favicon/apple-touch-icon-152x152.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-180x180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saezlab/decoupleR/ee886864dd8fdb3bbbe1b752f872e8474ecf795e/pkgdown/favicon/apple-touch-icon-180x180.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-60x60.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saezlab/decoupleR/ee886864dd8fdb3bbbe1b752f872e8474ecf795e/pkgdown/favicon/apple-touch-icon-60x60.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-76x76.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saezlab/decoupleR/ee886864dd8fdb3bbbe1b752f872e8474ecf795e/pkgdown/favicon/apple-touch-icon-76x76.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saezlab/decoupleR/ee886864dd8fdb3bbbe1b752f872e8474ecf795e/pkgdown/favicon/apple-touch-icon.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saezlab/decoupleR/ee886864dd8fdb3bbbe1b752f872e8474ecf795e/pkgdown/favicon/favicon-16x16.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saezlab/decoupleR/ee886864dd8fdb3bbbe1b752f872e8474ecf795e/pkgdown/favicon/favicon-32x32.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saezlab/decoupleR/ee886864dd8fdb3bbbe1b752f872e8474ecf795e/pkgdown/favicon/favicon.ico -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(decoupleR) 3 | 4 | test_check("decoupleR") 5 | -------------------------------------------------------------------------------- /tests/testthat/test-decoupleR-decouple.R: -------------------------------------------------------------------------------- 1 | library(decoupleR) 2 | 3 | # Directories ------------------------------------------------------------- 4 | 5 | # Inputs 6 | input_dir <- system.file("testdata", "inputs", package = "decoupleR") 7 | 8 | # Outputs 9 | expected_dir <- system.file("testdata", "outputs", package = "decoupleR") 10 | 11 | # Data to run ------------------------------------------------------------- 12 | 13 | mat <- file.path(input_dir, "mat.rds") %>% 14 | readRDS() 15 | 16 | net <- file.path(input_dir, "net.rds") %>% 17 | readRDS() 18 | 19 | # Common expressions ------------------------------------------------------ 20 | 21 | # Available statistics 22 | statistics <- c( 23 | 'udt', 24 | 'mdt', 25 | 'aucell', 26 | 'wmean', 27 | 'wsum', 28 | 'ulm', 29 | 'mlm', 30 | 'viper', 31 | 'gsva', 32 | 'ora', 33 | 'fgsea' 34 | ) 35 | 36 | # Arguments for statistics; same order as statistics vector. 37 | args <- list( 38 | udt = list(), 39 | mdt = list(trees=1000), 40 | aucell = list(nproc=1, aucMaxRank=3), 41 | wmean = list(), 42 | wsum = list(), 43 | ulm = list(), 44 | viper = list(), 45 | gsva = list(), 46 | ora = list(n_up=3, n_bottom=3), 47 | fgsea = list() 48 | ) 49 | 50 | partial_decouple <- purrr::partial( 51 | .f = decouple, 52 | mat = mat, 53 | network = net, 54 | .source = source, 55 | .target = target, 56 | statistics = statistics, 57 | minsize = 0, 58 | args = args 59 | ) 60 | 61 | # decouple() -------------------------------------------------------------- 62 | 63 | test_that("decouple same results as independent functions", { 64 | 65 | # Choose the same defaults as in the section on generating expected results. 66 | res_decouple_defaults <- suppressWarnings(partial_decouple( 67 | show_toy_call = FALSE, 68 | include_time = TRUE 69 | ) %>% 70 | dplyr::select(-.data$run_id, -statistic_time) %>% 71 | dplyr::filter(statistic != 'consensus') %>% 72 | dplyr::arrange(.data$statistic, .data$source, .data$condition)) 73 | 74 | exp_decouple_defaults <- file.path( 75 | expected_dir, 76 | "decouple", 77 | "output-decouple.rds" 78 | ) %>% 79 | readRDS() %>% 80 | dplyr::arrange(.data$statistic, .data$source, .data$condition) 81 | 82 | expect_equal(res_decouple_defaults, exp_decouple_defaults, tolerance=0.1) 83 | }) 84 | -------------------------------------------------------------------------------- /tests/testthat/test-decoupleR-pre.R: -------------------------------------------------------------------------------- 1 | library(decoupleR) 2 | 3 | test_that("test intersect_regulons", { 4 | inputs_dir <- system.file("testdata", "inputs", package = "decoupleR") 5 | 6 | mat <- readRDS(file.path(inputs_dir, "mat.rds")) 7 | net <- readRDS(file.path(inputs_dir, "net.rds")) 8 | 9 | f_net <- intersect_regulons(mat, net, source, target, minsize = 4) 10 | expect_lt(nrow(f_net), nrow(net)) 11 | }) 12 | -------------------------------------------------------------------------------- /tests/testthat/test-omnipath.R: -------------------------------------------------------------------------------- 1 | library(decoupleR) 2 | 3 | test_that("test show_resources", { 4 | lst <- show_resources() 5 | testthat::expect_true(length(lst) > 0) 6 | }) 7 | 8 | test_that("test get_resource", { 9 | df <- get_resource('TFcensus') 10 | testthat::expect_true(nrow(df) > 0) 11 | }) 12 | 13 | test_that("test get_progeny human", { 14 | df <- get_progeny(organism = 'human') 15 | testthat::expect_true(nrow(df) > 0) 16 | }) 17 | 18 | test_that("test get_progeny mouse", { 19 | df <- get_progeny(organism = 'mouse') 20 | testthat::expect_true(nrow(df) > 0L) 21 | }) 22 | 23 | test_that("test get_dorothea human", { 24 | df <- get_dorothea(organism = 'human', levels = c('A', 'B')) 25 | testthat::expect_true(nrow(df) > 0) 26 | }) 27 | 28 | test_that("test get_dorothea mouse", { 29 | df <- get_dorothea(organism = 'mouse', levels = c('A', 'B')) 30 | testthat::expect_true(nrow(df) > 0) 31 | }) 32 | 33 | test_that("test get_collectri", { 34 | df <- get_collectri(split_complexes=FALSE) 35 | testthat::expect_true(nrow(df) > 0) 36 | df_split <- get_collectri(split_complexes=TRUE) 37 | testthat::expect_true(nrow(df) < nrow(df_split)) 38 | df_meta <- get_collectri(load_meta=TRUE) 39 | testthat::expect_true(ncol(df) < ncol(df_meta)) 40 | }) 41 | -------------------------------------------------------------------------------- /tests/testthat/test-statistic-aucell.R: -------------------------------------------------------------------------------- 1 | library(decoupleR) 2 | 3 | # Directories ------------------------------------------------------------- 4 | 5 | # Inputs 6 | input_dir <- system.file("testdata", "inputs", package = "decoupleR") 7 | 8 | # Outputs 9 | expected_dir <- system.file("testdata", "outputs", "aucell", package = "decoupleR") 10 | 11 | # Data to run ------------------------------------------------------------- 12 | mat <- file.path(input_dir, "mat.rds") %>% 13 | readRDS() 14 | 15 | net <- file.path(input_dir, "net.rds") %>% 16 | readRDS() 17 | 18 | # Test for run_aucell function --------------------------------------------- 19 | 20 | test_that("test run_aucell", { 21 | res_1 <- run_aucell(mat, net, minsize=0, nproc=1, aucMaxRank=3) 22 | exp_1 <- file.path(expected_dir, "output-aucell.rds") %>% 23 | readRDS() 24 | 25 | expect_equal(res_1, exp_1, tolerance=1) 26 | }) 27 | -------------------------------------------------------------------------------- /tests/testthat/test-statistic-fgsea.R: -------------------------------------------------------------------------------- 1 | library(decoupleR) 2 | 3 | # Directories ------------------------------------------------------------- 4 | 5 | # Inputs 6 | input_dir <- system.file("testdata", "inputs", package = "decoupleR") 7 | 8 | # Outputs 9 | expected_dir <- system.file("testdata", "outputs", "fgsea", package = "decoupleR") 10 | 11 | # Data to run ------------------------------------------------------------- 12 | 13 | mat <- file.path(input_dir, "mat.rds") %>% 14 | readRDS() 15 | 16 | net <- file.path(input_dir, "net.rds") %>% 17 | readRDS() 18 | 19 | # Test for run_fgsea function --------------------------------------------- 20 | 21 | test_that("test run_fgsea", { 22 | res_1 <- run_fgsea(mat, net, minsize=0, nproc=1) 23 | exp_1 <- file.path(expected_dir, "output-fgsea.rds") %>% 24 | readRDS() 25 | 26 | expect_equal(res_1, exp_1, tolerance=0.01) 27 | }) 28 | -------------------------------------------------------------------------------- /tests/testthat/test-statistic-gsva.R: -------------------------------------------------------------------------------- 1 | library(decoupleR) 2 | 3 | # Directories ------------------------------------------------------------- 4 | 5 | # Inputs 6 | input_dir <- system.file("testdata", "inputs", package = "decoupleR") 7 | 8 | # Outputs 9 | expected_dir <- system.file("testdata", "outputs", "gsva", package = "decoupleR") 10 | 11 | # Data to run ------------------------------------------------------------- 12 | 13 | mat <- file.path(input_dir, "mat.rds") %>% 14 | readRDS() 15 | 16 | net <- file.path(input_dir, "net.rds") %>% 17 | readRDS() 18 | 19 | # Test for run_gsva function --------------------------------------------- 20 | 21 | test_that("test run_gsva", { 22 | res_1 <- run_gsva(mat, net, minsize=1L, verbose = FALSE) 23 | exp_1 <- file.path(expected_dir, "output-gsva.rds") %>% 24 | readRDS() 25 | 26 | expect_equal(res_1, exp_1, tolerance=0.01) 27 | }) 28 | -------------------------------------------------------------------------------- /tests/testthat/test-statistic-mdt.R: -------------------------------------------------------------------------------- 1 | library(decoupleR) 2 | 3 | # Directories ------------------------------------------------------------- 4 | 5 | # Inputs 6 | input_dir <- system.file("testdata", "inputs", package = "decoupleR") 7 | 8 | # Outputs 9 | expected_dir <- system.file("testdata", "outputs", "mdt", package = "decoupleR") 10 | 11 | # Data to run ------------------------------------------------------------- 12 | 13 | mat <- file.path(input_dir, "mat.rds") %>% 14 | readRDS() 15 | 16 | net <- file.path(input_dir, "net.rds") %>% 17 | readRDS() 18 | 19 | # Test for run_mdt function --------------------------------------------- 20 | 21 | test_that("test run_mdt", { 22 | res_1 <- run_mdt(mat, net, minsize=0, trees=1000) 23 | exp_1 <- file.path(expected_dir, "output-mdt.rds") %>% 24 | readRDS() 25 | 26 | expect_equal(res_1, exp_1, tolerance=0.1) 27 | }) 28 | -------------------------------------------------------------------------------- /tests/testthat/test-statistic-mlm.R: -------------------------------------------------------------------------------- 1 | library(decoupleR) 2 | 3 | # Directories ------------------------------------------------------------- 4 | 5 | # Inputs 6 | input_dir <- system.file("testdata", "inputs", package = "decoupleR") 7 | 8 | # Outputs 9 | expected_dir <- system.file("testdata", "outputs", "mlm", package = "decoupleR") 10 | 11 | # Data to run ------------------------------------------------------------- 12 | 13 | mat <- file.path(input_dir, "mat.rds") %>% 14 | readRDS() 15 | 16 | net <- file.path(input_dir, "net.rds") %>% 17 | readRDS() 18 | 19 | # Test for run_mlm function --------------------------------------------- 20 | 21 | test_that("test run_mlm", { 22 | res_1 <- run_mlm(mat, net, minsize=0) 23 | exp_1 <- file.path(expected_dir, "output-mlm.rds") %>% 24 | readRDS() 25 | 26 | expect_equal(res_1, exp_1, tolerance=0.01) 27 | }) 28 | -------------------------------------------------------------------------------- /tests/testthat/test-statistic-ora.R: -------------------------------------------------------------------------------- 1 | library(decoupleR) 2 | 3 | # Directories ------------------------------------------------------------- 4 | 5 | # Inputs 6 | input_dir <- system.file("testdata", "inputs", package = "decoupleR") 7 | 8 | # Outputs 9 | expected_dir <- system.file("testdata", "outputs", "ora", package = "decoupleR") 10 | 11 | # Data to run ------------------------------------------------------------- 12 | 13 | mat <- file.path(input_dir, "mat.rds") %>% 14 | readRDS() 15 | 16 | net <- file.path(input_dir, "net.rds") %>% 17 | readRDS() 18 | 19 | # Test for run_ora function ----------------------------------------------- 20 | test_that("test run_ora", { 21 | res_1 <- run_ora(mat, net, minsize=0, n_up=3, n_bottom=3) 22 | exp_1 <- file.path(expected_dir, "output-ora.rds") %>% 23 | readRDS() 24 | 25 | expect_equal(res_1, exp_1, tolerance=0.01) 26 | expect_error( 27 | object = run_ora(mat, net, minsize=0, n_background = -1), 28 | regexp = "`n` must be a non-missing positive number." 29 | ) 30 | }) 31 | -------------------------------------------------------------------------------- /tests/testthat/test-statistic-udt.R: -------------------------------------------------------------------------------- 1 | library(decoupleR) 2 | 3 | # Directories ------------------------------------------------------------- 4 | 5 | # Inputs 6 | input_dir <- system.file("testdata", "inputs", package = "decoupleR") 7 | 8 | # Outputs 9 | expected_dir <- system.file("testdata", "outputs", "udt", package = "decoupleR") 10 | 11 | # Data to run ------------------------------------------------------------- 12 | 13 | mat <- file.path(input_dir, "mat.rds") %>% 14 | readRDS() 15 | 16 | net <- file.path(input_dir, "net.rds") %>% 17 | readRDS() 18 | 19 | # Test for run_udt function --------------------------------------------- 20 | 21 | test_that("test run_udt", { 22 | res_1 <- run_udt(mat, net, minsize=0) 23 | exp_1 <- file.path(expected_dir, "output-udt.rds") %>% 24 | readRDS() 25 | 26 | expect_equal(res_1, exp_1, tolerance=0.01) 27 | }) 28 | -------------------------------------------------------------------------------- /tests/testthat/test-statistic-ulm.R: -------------------------------------------------------------------------------- 1 | library(decoupleR) 2 | 3 | # Directories ------------------------------------------------------------- 4 | 5 | # Inputs 6 | input_dir <- system.file("testdata", "inputs", package = "decoupleR") 7 | 8 | # Outputs 9 | expected_dir <- system.file("testdata", "outputs", "ulm", package = "decoupleR") 10 | 11 | # Data to run ------------------------------------------------------------- 12 | 13 | mat <- file.path(input_dir, "mat.rds") %>% 14 | readRDS() 15 | 16 | net <- file.path(input_dir, "net.rds") %>% 17 | readRDS() 18 | 19 | # Test for run_ulm function --------------------------------------------- 20 | 21 | test_that("test run_ulm", { 22 | res_1 <- run_ulm(mat, net, minsize=0) 23 | exp_1 <- file.path(expected_dir, "output-ulm.rds") %>% 24 | readRDS() 25 | 26 | expect_equal(res_1, exp_1, tolerance=0.01) 27 | }) 28 | -------------------------------------------------------------------------------- /tests/testthat/test-statistic-viper.R: -------------------------------------------------------------------------------- 1 | library(decoupleR) 2 | 3 | # Directories ------------------------------------------------------------- 4 | 5 | # Inputs 6 | input_dir <- system.file("testdata", "inputs", package = "decoupleR") 7 | 8 | # Outputs 9 | expected_dir <- system.file("testdata", "outputs", "viper", package = "decoupleR") 10 | 11 | # Data to run ------------------------------------------------------------- 12 | 13 | mat <- file.path(input_dir, "mat.rds") %>% 14 | readRDS() 15 | 16 | net <- file.path(input_dir, "net.rds") %>% 17 | readRDS() 18 | 19 | # Test for run_viper() ---------------------------------------------------- 20 | 21 | test_that("test run_viper", { 22 | res_1 <- run_viper(mat, net, minsize=0, verbose = FALSE) 23 | exp_1 <- file.path(expected_dir, "output-viper.rds") %>% 24 | readRDS() 25 | 26 | expect_equal(res_1, exp_1, tolerance=0.01) 27 | }) 28 | -------------------------------------------------------------------------------- /tests/testthat/test-statistic-wmean.R: -------------------------------------------------------------------------------- 1 | library(decoupleR) 2 | 3 | # Directories ------------------------------------------------------------- 4 | 5 | # Inputs 6 | input_dir <- system.file("testdata", "inputs", package = "decoupleR") 7 | 8 | # Outputs 9 | expected_dir <- system.file("testdata", "outputs", "wmean", package = "decoupleR") 10 | 11 | # Data to run ------------------------------------------------------------- 12 | 13 | mat <- file.path(input_dir, "mat.rds") %>% 14 | readRDS() 15 | 16 | net <- file.path(input_dir, "net.rds") %>% 17 | readRDS() 18 | 19 | # Test for run_wmean function --------------------------------------------- 20 | 21 | test_that("test run_wmean", { 22 | res_1 <- run_wmean(mat, net, minsize=0) 23 | exp_1 <- file.path(expected_dir, "output-wmean.rds") %>% 24 | readRDS() 25 | 26 | expect_error( 27 | run_wmean(mat, net, minsize=0, times = 1), 28 | "Parameter 'times' must be greater than or equal to 2, but 1 was passed." 29 | ) 30 | expect_equal(res_1, exp_1, tolerance=0.01) 31 | }) 32 | -------------------------------------------------------------------------------- /tests/testthat/test-statistic-wsum.R: -------------------------------------------------------------------------------- 1 | library(decoupleR) 2 | 3 | # Directories ------------------------------------------------------------- 4 | 5 | # Inputs 6 | input_dir <- system.file("testdata", "inputs", package = "decoupleR") 7 | 8 | # Outputs 9 | expected_dir <- system.file("testdata", "outputs", "wsum", package = "decoupleR") 10 | 11 | # Data to run ------------------------------------------------------------- 12 | 13 | mat <- file.path(input_dir, "mat.rds") %>% 14 | readRDS() 15 | 16 | net <- file.path(input_dir, "net.rds") %>% 17 | readRDS() 18 | 19 | # Test for run_wsum function --------------------------------------------- 20 | 21 | test_that("test run_wsum", { 22 | res_1 <- run_wsum(mat, net, minsize=0) 23 | exp_1 <- file.path(expected_dir, "output-wsum.rds") %>% 24 | readRDS() 25 | 26 | expect_error( 27 | run_wsum(mat, net, minsize=0, times = 1), 28 | "Parameter 'times' must be greater than or equal to 2, but 1 was passed." 29 | ) 30 | expect_equal(res_1, exp_1, tolerance=0.01) 31 | }) 32 | -------------------------------------------------------------------------------- /tests/testthat/test-statistic-zscore.R: -------------------------------------------------------------------------------- 1 | library(decoupleR) 2 | 3 | # Directories ------------------------------------------------------------- 4 | 5 | # Inputs 6 | input_dir <- system.file("testdata", "inputs", package = "decoupleR") 7 | 8 | 9 | # Data to run ------------------------------------------------------------- 10 | 11 | mat <- file.path(input_dir, "mat.rds") %>% 12 | readRDS() 13 | 14 | net <- file.path(input_dir, "net.rds") %>% 15 | readRDS() 16 | 17 | # Test for run_wmean function --------------------------------------------- 18 | 19 | test_that("test run_zscore", { 20 | res_1 <- run_zscore(mat, net, minsize=0) 21 | 22 | expect_equal(res_1$score[1], 3.52, tolerance=0.01) 23 | expect_equal(sign(range(res_1$score)), c(-1, 1)) 24 | 25 | net_2 <- net 26 | net_2$mor <- 1 27 | res_2 <- run_zscore(mat, net_2, minsize=0) 28 | 29 | expect_equal(res_2$score[1], 3.89, tolerance=0.01) 30 | expect_equal(sign(range(res_2$score)), c(1, 1)) 31 | }) 32 | -------------------------------------------------------------------------------- /tests/testthat/test-utils-dataset-converters.R: -------------------------------------------------------------------------------- 1 | library(decoupleR) 2 | library(dplyr) 3 | 4 | # Data set to test --------------------------------------------------------- 5 | 6 | net <- readRDS( 7 | system.file("testdata/inputs", "net.rds", package = "decoupleR") 8 | ) 9 | 10 | # convert_f_defaults ------------------------------------------------------ 11 | 12 | test_that("convert_f_defaults (select-transmute)-like property", { 13 | res_1 <- convert_f_defaults( 14 | .data = net, 15 | source = source, 16 | target = target 17 | ) 18 | 19 | exp_1 <- net %>% 20 | select(source, target) 21 | 22 | expect_equal(res_1, exp_1) 23 | }) 24 | 25 | test_that("convert_f_defaults swap property for single.", { 26 | 27 | # Normal rename 28 | res_1 <- convert_f_defaults( 29 | .data = net, 30 | source = target, 31 | target = source 32 | ) 33 | 34 | exp_1 <- net %>% 35 | select(source = target, target = source) 36 | 37 | expect_equal(res_1, exp_1) 38 | 39 | # Default dplyr::rename don't allow this. 40 | res_1 <- convert_f_defaults( 41 | .data = net, 42 | source = target, 43 | .use_dots = TRUE 44 | ) 45 | 46 | exp_1 <- net %>% 47 | select(source = target) 48 | 49 | expect_equal(res_1, exp_1) 50 | 51 | # If use_dots its false it's like normal rename. 52 | expect_error(convert_f_defaults( 53 | .data = net, 54 | source = target, 55 | .use_dots = FALSE 56 | ), 57 | regexp = "Names must be unique.", 58 | class = "vctrs_error_names_must_be_unique" 59 | ) 60 | }) 61 | 62 | test_that("convert_f_defaults add columns with defaults", { 63 | res_1 <- convert_f_defaults( 64 | .data = net, 65 | source = source, 66 | target = target, 67 | mor = NULL, 68 | likelihood = NULL, 69 | .def_col_val = c(mor = 0, likelihood = 1) 70 | ) 71 | 72 | exp_1 <- net %>% 73 | dplyr::select(source, target) %>% 74 | dplyr::mutate(mor = 0, likelihood = 1) 75 | 76 | expect_equal(res_1, exp_1) 77 | }) 78 | -------------------------------------------------------------------------------- /tests/testthat/test-utils-profiles.R: -------------------------------------------------------------------------------- 1 | library(decoupleR) 2 | 3 | network <- tibble::tribble( 4 | ~tf, ~target, ~mor, 5 | 1, 1, 1, 6 | 2, 1, 0, 7 | 2, 2, 0 8 | ) 9 | 10 | test_that("test get_profile_of", { 11 | partial_get_profile_of <- purrr::partial( 12 | .f = get_profile_of, 13 | data = network, 14 | sources = list(tf = c(1, 2), target = c(1, 2)) 15 | ) 16 | 17 | expected_network <- tibble::tribble( 18 | ~tf, ~target, ~mor, 19 | 1, 1, 1, 20 | 1, 2, NA, 21 | 2, 1, 0, 22 | 2, 2, 0 23 | ) 24 | 25 | expect_equal(partial_get_profile_of(), expected_network) 26 | 27 | expect_equal( 28 | partial_get_profile_of(values_fill = 0), 29 | replace_na(expected_network, list(mor = 0)) 30 | ) 31 | 32 | expect_equal( 33 | partial_get_profile_of(values_fill = list(mor = 0)), 34 | replace_na(expected_network, list(mor = 0)) 35 | ) 36 | }) 37 | 38 | test_that("test pivot_wider_profile", { 39 | partial_pivot <- purrr::partial( 40 | .f = pivot_wider_profile, 41 | data = network, 42 | id_cols = tf, 43 | names_from = target, 44 | values_from = mor 45 | ) 46 | 47 | df_mat <- partial_pivot(to_matrix = FALSE, to_sparse = FALSE) 48 | mat_mat <- partial_pivot(to_matrix = TRUE) 49 | sparse_mat <- partial_pivot(to_sparse = TRUE) 50 | 51 | expect_true(is.data.frame(df_mat)) 52 | expect_true(is.matrix(mat_mat)) 53 | expect_true(is(sparse_mat, "dsCMatrix") || is(sparse_mat, "dtCMatrix")) 54 | }) 55 | -------------------------------------------------------------------------------- /vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.R 3 | --------------------------------------------------------------------------------