├── .Rbuildignore ├── .github └── workflows │ └── R-CMD-check.yaml ├── CODE_OF_CONDUCT.md ├── DESCRIPTION ├── LICENSE ├── LICENSE.md ├── NAMESPACE ├── NEWS.md ├── R ├── correlatePCs.R ├── deprecated.R ├── distro_expr.R ├── genespca.R ├── get_annotation.R ├── hi_loadings.R ├── makeds.R ├── pair_corr.R ├── pca2go.R ├── pcaExplorer-pkg.R ├── pcaExplorer.R ├── pcaplot.R ├── profile_explore.R └── zzz.R ├── README.md ├── _pkgdown.yml ├── inst ├── CITATION ├── extdata │ ├── about.md │ ├── datainput.md │ ├── instructions.md │ ├── instructions_unr.md │ └── reportTemplate.Rmd └── www │ ├── help_dataformats.png │ └── pcaExplorer.png ├── man ├── correlatePCs.Rd ├── deprecated.Rd ├── distro_expr.Rd ├── figures │ └── pcaExplorer.png ├── geneprofiler.Rd ├── genespca.Rd ├── get_annotation.Rd ├── get_annotation_orgdb.Rd ├── hi_loadings.Rd ├── limmaquickpca2go.Rd ├── makeExampleDESeqDataSet_multifac.Rd ├── pair_corr.Rd ├── pca2go.Rd ├── pcaExplorer-pkg.Rd ├── pcaExplorer.Rd ├── pcaplot.Rd ├── pcaplot3d.Rd ├── pcascree.Rd ├── plotPCcorrs.Rd └── topGOtable.Rd ├── tests ├── testthat.R └── testthat │ ├── setuptests_pcaExplorer.R │ ├── test_annotations.R │ ├── test_correlatePCs.R │ ├── test_hiloadings.R │ ├── test_makeDS.R │ ├── test_moreplots.R │ ├── test_pca2go.R │ ├── test_pcagenes.R │ ├── test_pcasamples.R │ └── test_shiny.R └── vignettes ├── newsnap_01_upload.png ├── newsnap_02_instructions.png ├── newsnap_03_countstable.png ├── newsnap_04_overview.png ├── newsnap_05_samples.png ├── newsnap_06_genes.png ├── newsnap_07_finder.png ├── newsnap_08_pca2go.png ├── newsnap_09_multifac.png ├── newsnap_10_editor.png ├── newsnap_11_about.png ├── pcaExplorer.Rmd ├── unr_00_demo_loaded.png ├── unr_01_splom.png ├── unr_02_sts_heatmap.png ├── unr_03_summary_counts.png ├── unr_04a_samplespca.png ├── unr_04b_samples_dex.png ├── unr_05_loadings.png ├── unr_06a_genefinder_dusp1.png ├── unr_06b_genefinder_per1.png ├── unr_06c_genefinder_ddx3y.png ├── unr_06c_genefinder_ddx3y_dex.png ├── unr_07_genespca.png ├── unr_08_pca2go_topgo.png ├── unr_90_exitsave.png ├── unr_99_editreport.png └── upandrunning.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^.gitignore$ 4 | ^.travis.yml$ 5 | ^codecov\.yml$ 6 | ^appveyor.yml$ 7 | .circleci 8 | _pkgdown.yml 9 | docs/* 10 | ^docs$ 11 | ^\.github$ 12 | ^CODE_OF_CONDUCT\.md$ 13 | ^LICENSE\.md$ 14 | ^TODO\.md$ 15 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | pull_request: 4 | branches: 5 | - master 6 | schedule: 7 | - cron: '0 8 * * 5' 8 | 9 | name: R-CMD-check 10 | 11 | jobs: 12 | R-CMD-check: 13 | runs-on: ${{ matrix.config.os }} 14 | container: ${{ matrix.config.image }} 15 | 16 | name: ${{ matrix.config.os }} (${{ matrix.config.bioc }} - ${{ matrix.config.image }}) 17 | 18 | strategy: 19 | fail-fast: false 20 | matrix: 21 | config: 22 | - { os: windows-latest, bioc: 'devel'} 23 | - { os: macOS-latest, bioc: 'devel', curlConfigPath: '/usr/bin/'} 24 | - { os: ubuntu-latest, bioc: 'devel'} 25 | # - { os: ubuntu-latest, bioc: 'devel', cran: "https://demo.rstudiopm.com/all/__linux__/xenial/latest"} 26 | - { os: ubuntu-latest, image: 'bioconductor/bioconductor_docker:devel'} 27 | 28 | env: 29 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true 30 | CRAN: ${{ matrix.config.cran }} 31 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 32 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 33 | CURL_CONFIG: ${{ matrix.config.curlConfigPath }}curl-config 34 | 35 | steps: 36 | - name: Check out repo 37 | uses: actions/checkout@v2 38 | 39 | - name: Set up R and install BiocManager 40 | uses: grimbough/bioc-actions/setup-bioc@v1 41 | if: matrix.config.image == null 42 | with: 43 | bioc-version: ${{ matrix.config.bioc }} 44 | 45 | - name: Set up pandoc 46 | uses: r-lib/actions/setup-pandoc@v2 47 | if: matrix.config.image == null 48 | 49 | - name: Install remotes 50 | run: | 51 | install.packages('remotes') 52 | shell: Rscript {0} 53 | 54 | - name: Query dependencies 55 | run: | 56 | saveRDS(remotes::dev_package_deps(dependencies = TRUE, repos = c(getOption('repos'), BiocManager::repositories())), 'depends.Rds', version = 2) 57 | shell: Rscript {0} 58 | 59 | - name: Cache R packages 60 | if: runner.os != 'Windows' && matrix.config.image == null 61 | uses: actions/cache@v4 62 | with: 63 | path: ${{ env.R_LIBS_USER }} 64 | key: ${{ runner.os }}-bioc-${{ matrix.config.bioc }}-${{ hashFiles('depends.Rds') }} 65 | restore-keys: ${{ runner.os }}-bioc-${{ matrix.config.bioc }}- 66 | 67 | - name: Install system dependencies 68 | if: runner.os == 'Linux' 69 | env: 70 | RHUB_PLATFORM: linux-x86_64-ubuntu-gcc 71 | uses: r-lib/actions/setup-r-dependencies@v2 72 | with: 73 | extra-packages: any::rcmdcheck 74 | pak-version: devel 75 | 76 | - name: Install system dependencies (macOS) 77 | if: runner.os == 'macOS' 78 | run: | 79 | brew install cairo 80 | brew install --cask xquartz 81 | brew install harfbuzz 82 | brew install fribidi 83 | brew install fftw 84 | 85 | # - name: Set up gfortran symlinks (macOS) 86 | # if: runner.os == 'macOS' 87 | # run: | 88 | # set -x 89 | # sudo ln -s /usr/local/Cellar/gcc@8/8.4.0_2/lib/gcc/8 /usr/local/gfortran/lib 90 | # gfortran --version 91 | 92 | - name: Install dependencies 93 | run: | 94 | local_deps <- remotes::local_package_deps(dependencies = TRUE) 95 | deps <- remotes::dev_package_deps(dependencies = TRUE, repos = BiocManager::repositories()) 96 | BiocManager::install(local_deps[local_deps %in% deps$package[deps$diff != 0]], Ncpu = 2L) 97 | BiocManager::install(c("GenomeInfoDbData", "GO.db"), Ncpu = 2L) 98 | BiocManager::install("markdown") 99 | remotes::install_cran('rcmdcheck', Ncpu = 2L) 100 | shell: Rscript {0} 101 | 102 | - name: Session info 103 | run: | 104 | options(width = 100) 105 | pkgs <- installed.packages()[, "Package"] 106 | sessioninfo::session_info(pkgs, include_base = TRUE) 107 | shell: Rscript {0} 108 | 109 | - name: Build, Install, Check 110 | uses: grimbough/bioc-actions/build-install-check@v1 111 | 112 | - name: Run BiocCheck 113 | uses: grimbough/bioc-actions/run-BiocCheck@v1 114 | with: 115 | arguments: '--no-check-bioc-views --no-check-bioc-help' 116 | error-on: 'error' 117 | 118 | - name: Upload check results 119 | if: failure() 120 | uses: actions/upload-artifact@master 121 | with: 122 | name: ${{ runner.os }}-results 123 | path: check 124 | 125 | - name: Show testthat output 126 | if: always() 127 | run: find check -name 'testthat.Rout*' -exec cat '{}' \; || true 128 | shell: bash 129 | 130 | - name: Upload check results 131 | if: failure() 132 | uses: actions/upload-artifact@master 133 | with: 134 | name: ${{ runner.os }}-bioc-${{ matrix.config.bioc }}-results 135 | path: check 136 | 137 | - name: Test coverage 138 | if: matrix.config.os == 'macOS-latest' 139 | run: | 140 | install.packages("covr") 141 | covr::codecov(token = "${{secrets.CODECOV_TOKEN}}") 142 | shell: Rscript {0} 143 | 144 | - name: Deploy 145 | if: github.event_name == 'push' && github.ref == 'refs/heads/devel' && matrix.config.os == 'macOS-latest' 146 | run: | 147 | R CMD INSTALL . 148 | Rscript -e "remotes::install_dev('pkgdown'); pkgdown::deploy_to_branch(new_process = FALSE)" 149 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, religion, or sexual identity and 10 | orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the overall 26 | community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email 35 | address, without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards 42 | of acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies 54 | when an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail 56 | address, posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at marinif@uni-mainz.de. 63 | All complaints will be reviewed and investigated promptly and fairly. 64 | 65 | All community leaders are obligated to respect the privacy and security of the 66 | reporter of any incident. 67 | 68 | ## Enforcement Guidelines 69 | 70 | Community leaders will follow these Community Impact Guidelines in determining 71 | the consequences for any action they deem in violation of this Code of Conduct: 72 | 73 | ### 1. Correction 74 | 75 | **Community Impact**: Use of inappropriate language or other behavior deemed 76 | unprofessional or unwelcome in the community. 77 | 78 | **Consequence**: A private, written warning from community leaders, providing 79 | clarity around the nature of the violation and an explanation of why the 80 | behavior was inappropriate. A public apology may be requested. 81 | 82 | ### 2. Warning 83 | 84 | **Community Impact**: A violation through a single incident or series of 85 | actions. 86 | 87 | **Consequence**: A warning with consequences for continued behavior. No 88 | interaction with the people involved, including unsolicited interaction with 89 | those enforcing the Code of Conduct, for a specified period of time. This 90 | includes avoiding interactions in community spaces as well as external channels 91 | like social media. Violating these terms may lead to a temporary or permanent 92 | ban. 93 | 94 | ### 3. Temporary Ban 95 | 96 | **Community Impact**: A serious violation of community standards, including 97 | sustained inappropriate behavior. 98 | 99 | **Consequence**: A temporary ban from any sort of interaction or public 100 | communication with the community for a specified period of time. No public or 101 | private interaction with the people involved, including unsolicited interaction 102 | with those enforcing the Code of Conduct, is allowed during this period. 103 | Violating these terms may lead to a permanent ban. 104 | 105 | ### 4. Permanent Ban 106 | 107 | **Community Impact**: Demonstrating a pattern of violation of community 108 | standards, including sustained inappropriate behavior, harassment of an 109 | individual, or aggression toward or disparagement of classes of individuals. 110 | 111 | **Consequence**: A permanent ban from any sort of public interaction within the 112 | community. 113 | 114 | ## Attribution 115 | 116 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 117 | version 2.0, 118 | available at . 119 | 120 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 121 | enforcement ladder](https://github.com/mozilla/diversity). 122 | 123 | [homepage]: https://www.contributor-covenant.org 124 | 125 | For answers to common questions about this code of conduct, see the FAQ at 126 | . Translations are available at . 127 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: pcaExplorer 2 | Type: Package 3 | Title: Interactive Visualization of RNA-seq Data Using a Principal Components Approach 4 | Version: 3.3.0 5 | Date: 2024-12-19 6 | Authors@R: c(person("Federico", "Marini", role = c("aut", "cre"), 7 | email ="marinif@uni-mainz.de", 8 | comment = c(ORCID = '0000-0003-3252-7758'))) 9 | Description: This package provides functionality for interactive visualization 10 | of RNA-seq datasets based on Principal Components Analysis. The methods provided 11 | allow for quick information extraction and effective data exploration. A Shiny 12 | application encapsulates the whole analysis. 13 | License: MIT + file LICENSE 14 | Imports: 15 | DESeq2, 16 | SummarizedExperiment, 17 | mosdef (>= 1.1.0), 18 | GenomicRanges, 19 | IRanges, 20 | S4Vectors, 21 | genefilter, 22 | ggplot2 (>= 2.0.0), 23 | heatmaply, 24 | plotly, 25 | scales, 26 | NMF, 27 | plyr, 28 | topGO, 29 | limma, 30 | GOstats, 31 | GO.db, 32 | AnnotationDbi, 33 | shiny (>= 0.12.0), 34 | shinydashboard, 35 | shinyBS, 36 | ggrepel, 37 | DT, 38 | shinyAce, 39 | threejs, 40 | biomaRt, 41 | pheatmap, 42 | knitr, 43 | rmarkdown, 44 | base64enc, 45 | tidyr, 46 | grDevices, 47 | methods 48 | Suggests: 49 | testthat, 50 | BiocStyle, 51 | markdown, 52 | airway, 53 | org.Hs.eg.db, 54 | htmltools 55 | URL: https://github.com/federicomarini/pcaExplorer, 56 | https://federicomarini.github.io/pcaExplorer/ 57 | BugReports: https://github.com/federicomarini/pcaExplorer/issues 58 | biocViews: ImmunoOncology, Visualization, RNASeq, DimensionReduction, 59 | PrincipalComponent, QualityControl, GUI, ReportWriting, ShinyApps 60 | VignetteBuilder: knitr 61 | RoxygenNote: 7.3.2 62 | Encoding: UTF-8 63 | NeedsCompilation: no 64 | Roxygen: list(markdown = TRUE) 65 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2024 2 | COPYRIGHT HOLDER: pcaExplorer authors 3 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2024 pcaExplorer authors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(correlatePCs) 4 | export(distro_expr) 5 | export(geneprofiler) 6 | export(genespca) 7 | export(get_annotation) 8 | export(get_annotation_orgdb) 9 | export(hi_loadings) 10 | export(limmaquickpca2go) 11 | export(makeExampleDESeqDataSet_multifac) 12 | export(pair_corr) 13 | export(pca2go) 14 | export(pcaExplorer) 15 | export(pcaplot) 16 | export(pcaplot3d) 17 | export(pcascree) 18 | export(plotPCcorrs) 19 | export(topGOtable) 20 | import(DESeq2) 21 | import(GO.db) 22 | import(SummarizedExperiment) 23 | import(biomaRt) 24 | import(ggplot2) 25 | import(knitr) 26 | import(methods) 27 | import(plyr) 28 | import(rmarkdown) 29 | import(shiny) 30 | import(shinydashboard) 31 | import(threejs) 32 | import(topGO) 33 | importFrom(AnnotationDbi,Term) 34 | importFrom(AnnotationDbi,mapIds) 35 | importFrom(AnnotationDbi,select) 36 | importFrom(DT,datatable) 37 | importFrom(GenomicRanges,GRanges) 38 | importFrom(IRanges,IRanges) 39 | importFrom(NMF,aheatmap) 40 | importFrom(S4Vectors,DataFrame) 41 | importFrom(base64enc,dataURI) 42 | importFrom(genefilter,rowVars) 43 | importFrom(ggrepel,geom_label_repel) 44 | importFrom(grDevices,colorRamp) 45 | importFrom(grDevices,dev.off) 46 | importFrom(grDevices,pdf) 47 | importFrom(grDevices,rainbow) 48 | importFrom(grDevices,rgb) 49 | importFrom(heatmaply,heatmaply) 50 | importFrom(limma,goana) 51 | importFrom(limma,topGO) 52 | importFrom(mosdef,gene_plot) 53 | importFrom(mosdef,run_topGO) 54 | importFrom(pheatmap,pheatmap) 55 | importFrom(plotly,plotlyOutput) 56 | importFrom(plotly,renderPlotly) 57 | importFrom(scales,brewer_pal) 58 | importFrom(scales,hue_pal) 59 | importFrom(shiny,addResourcePath) 60 | importFrom(shinyAce,aceAutocomplete) 61 | importFrom(shinyAce,aceEditor) 62 | importFrom(shinyAce,getAceModes) 63 | importFrom(shinyAce,getAceThemes) 64 | importFrom(shinyAce,updateAceEditor) 65 | importFrom(shinyBS,bsCollapse) 66 | importFrom(shinyBS,bsCollapsePanel) 67 | importFrom(shinyBS,bsTooltip) 68 | importFrom(tidyr,gather) 69 | importMethodsFrom(GOstats,hyperGTest) 70 | importMethodsFrom(GOstats,summary) 71 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # pcaExplorer 3.2.0 2 | 3 | ## Other notes 4 | 5 | * Some changes in the source of the documentation, providing anchors to all function calls. This avoids the new note in R CMD check in the new major release 6 | 7 | # pcaExplorer 2.99.0 8 | 9 | ## New features 10 | 11 | * The `pcaplot()` function now provides a clever default for the `intgroup` parameter, if some content (as it should) is provided in the `colData` slot of the main input object 12 | 13 | ## Other notes 14 | 15 | * The transition to the functions available in the `mosdef` Bioconductor is complete, with the original functions now being deprecated. This applies to `topGOtable()` (now replaced by `mosdef::run_topGO()`) 16 | * The gene plot widgets now also use the `gene_plot()` function from `mosdef`, instead of the previous undocumented internal function 17 | * The Roxygen-based documentation now supports markdown. No visible changes should appear to the user, as the content should have stayed fairly the same 18 | * Although no visible changes for the end user are expected, the incoming major version bump will reflect the change in the dependency graph, ensuring that this is noticed at least at the version numbering level 19 | 20 | # pcaExplorer 2.22.0 21 | 22 | ## Other notes 23 | 24 | * `get_annotation_orgdb()` gains an additional argument, `key_for_genenames`, which defaults to "SYMBOL". This should not change the behavior of the function, if not specified, but accommodates for the use of annotation packages where the information has been encoded differently (e.g. org.Sc.sgd.db where the info is contained in the "ORF" column) 25 | 26 | ## Bug fixes 27 | 28 | * `pcaplot` correctly returns the values for the percent of explained variance, which were correctly displayed on the plot but not stored as they should in the attribute slot 29 | 30 | # pcaExplorer 2.20.0 31 | 32 | ## Other notes 33 | 34 | * the tables in the PCA2GO tab panel can be compacted only if they are computed via the `pca2go` function (offline) - at runtime, `limmaquickpca2go` is used and no compaction is required 35 | * if an annotation is provided with a column `gene_id`, these values are actually overwriting the rownames (makes the object more robust with respect to its provenance) 36 | 37 | # pcaExplorer 2.16.0 38 | 39 | ## Other notes 40 | 41 | * Replaced dependency from `d3heatmap` with the functionality of `heatmaply` 42 | 43 | # pcaExplorer 2.12.0 44 | 45 | ## Bug fixes 46 | 47 | * Fixed an error in the initialization of the app due to a new behavior introduced by `shinyAce` in version >= 0.4.0 48 | * `topGOtable` does not generate rows with NAs if providing a too high number for the categories to report 49 | 50 | ## Other notes 51 | 52 | * The type of the columns in the data.frame returned by `topGOtable` are now correctly referring to the type they contain - e.g. the p values are now stored as numeric values 53 | * Citation now refers to the published manuscript - https://doi.org/10.1186/s12859-019-2879-1 54 | 55 | # pcaExplorer 2.10.0 56 | 57 | ## New features 58 | 59 | * Added extra parameters to `topGOtable` to offer more control on the method used, and the option to correct the p-values for multiple testing (via the Benjamini-Yekutieli procedure) 60 | * `pca2go` has now an option to return (early) a list with vectors of genes with high loadings 61 | * Better preview of the uploaded data with modal dialog windows triggered by buttons which appear once corresponding inputs are available 62 | * Improved notification system: in addition to the progress bar, info that all input is correctly specified, or suggest optional annotation loading 63 | * Added flexibility to select separator for each of the uploadable files 64 | * The pairwise correlation plots can now use logarithmic scale in the axes, use smaller subsets of the data for quicker inspection, and resizes the correlation info proportionally to its intensity 65 | * The sample to sample heatmap supports additionally manhattan and correlation-based distances 66 | * There is a new vignette with a detailed quick start, "Up and running with pcaExplorer", specifying how the workflow with `pcaExplorer` can look like, demonstrated on the `airway` dataset 67 | * In the Instructions panel, we added buttons to access the fully rendered documentation, either local or online if e.g. deployed to a server. Related to this, `pcaExplorer` has a new parameter, `runLocal`, to control this behavior 68 | * An additional parameter, `annopkg`, has been added to `pca2go()` to override the behavior with the `organism` parameter (this is useful when the name of the annotation package is not conform to the classical `org.Xx.eg.db`, e.g. for Arabidopsis Thaliana); a detailed use case has been added in the main vignette 69 | 70 | ## Other notes 71 | 72 | * The computing of the required objects now requires the explicit action on the dedicated button, and the tooltip informs the user on what steps are taken (including normalization) 73 | * An information box has been added to provide detailed information on the required input formats 74 | * Added notification to specify how to install the airway package for demonstration purposes if not already available 75 | * Added startup message upon loading the package 76 | * The content in the Instructions tab is now contained in collapsible elements 77 | * The file formats accepted by `pcaExplorer` are now specified both in the vignette text, as well as in the app at runtime 78 | * The content of the Instructions tab is now more compact, containing the rendered "Up and running with pcaExplorer" vignette. The full vignettes can be accessed via buttons in the same panel 79 | * Added instructions to install phantomJS via the `webshot` package - would raise an error when previewing the report 80 | 81 | # pcaExplorer 2.8.0 82 | 83 | ## New features 84 | 85 | * Added a `NEWS.md` file to track changes to the package 86 | * PCA plots now are correctly generated with fixed coordinates 87 | * Introduced use of conditionalPanels for better handling of errors in the app tabs 88 | * Added possibility to use different transformations, also reflected in the change of one of the main arguments (previously `rlt`, now `dst`, i.e. `DESeqTransform`): rlog, vst, shifted log, ... The transformation type is tracked in the reactive values. 89 | * More modular loading of data, by splitting generation of `dds` and `dst` 90 | * `pca2go` is now also picking values from the input widgets 91 | 92 | ## Other notes 93 | 94 | * Built project website via pkgdown, with customized reference structure 95 | * Correctly adding the resources to shinyBS, loaded via `.onLoad`, and also better placement for bstooltips 96 | * Editor options start collapsed in the Report Editor tab 97 | * Vignette and template report are updated to reflect the new parameter names 98 | * Uniformed style for ggplot2 plots 99 | * Better tooltip placement in the main page 100 | * Replaced `print` calls with more appropriate `message`s 101 | * Displaying user returned messages in long (plotting) operations 102 | 103 | ## Bug fixes 104 | 105 | * Fixed behavior of rendering inline the content of the report - did not work properly for server deployed instances 106 | 107 | # pcaExplorer 2.6.0 108 | 109 | ## New features 110 | 111 | * Automatically computing size factors where required 112 | * Added progress indication when compiling the report 113 | 114 | ## Bug fixes 115 | 116 | * Fixed after changes in threejs package 117 | * Edited dropdown menu to remove unused green badge 118 | * Menus start expanded on the side, again 119 | * `theme_bw` applied when needed, corrected previous behavior 120 | 121 | ## Other notes 122 | 123 | * Updated citation infos 124 | * Slight difference in handling validate/need errors 125 | 126 | # pcaExplorer 2.2.0 127 | 128 | ## New features 129 | 130 | * Added Demo data, loadable via demo button 131 | 132 | ## Bug fixes 133 | 134 | * Plots work now without cutting out points when zooming in 135 | 136 | ## Other notes 137 | 138 | * Saved reactive values are now exported to dedicate environments (instead of assigning to global) 139 | 140 | # pcaExplorer 1.99.0 141 | 142 | ## Other notes 143 | 144 | * Reflecting the major feature added, will trigger a major version number bump. Welcome soon, pcaExplorer 2.0.0! 145 | 146 | # pcaExplorer 1.1.5 147 | 148 | ## New features 149 | 150 | * Automated report generation - template available + editor in the app tab for advance user customization 151 | * Support for state saving, in the global environment as well as with binary data 152 | * All plots generated can be now exported with the dedicated button 153 | * Added confidence ellipse for PCA plot 154 | * Added 3d pca plot 155 | * Added functions to automatically retrieve the annotation in format ready to use for the app 156 | * Added profile explorer function, for plotting behaviour across all samples for subset of genes 157 | * Added distribution plots 158 | * Added pairwise correlation plot 159 | * Added table to enhance readability of the gene finder plot, also by annotating sample names 160 | 161 | ## Bug fixes 162 | 163 | * Minor typos fixed in the tabs 164 | * Added option row.names to read.delim for allowing row names when uploading the data 165 | 166 | ## Other notes 167 | 168 | * Added extra info in the about section 169 | * Instructions and vignette rewritten to reflect new design of the app 170 | 171 | # pcaExplorer 1.1.3 172 | 173 | ## Bug fixes 174 | 175 | * Remove y axis limits to gene boxplots 176 | * Fixed: correct labels and colors assignements for genespca 177 | 178 | # pcaExplorer 1.0.0 179 | 180 | ## Other notes 181 | 182 | * Released in Bioconductor 3.3 183 | 184 | # pcaExplorer 0.99.1 185 | 186 | ## Other notes 187 | 188 | * Changed format of the NEWS file 189 | 190 | # pcaExplorer 0.99.0 191 | 192 | ## Other notes 193 | 194 | * Ready for submission to Bioconductor 195 | 196 | # pcaExplorer 0.9.0 197 | 198 | ## Other notes 199 | 200 | * Added TravisCI integration for both branches 201 | * Added appveyor integration - plus badges in the README.md 202 | * Code cleanup 203 | * Added screenshots for the vignette 204 | * Removed some lengthy tests 205 | 206 | # pcaExplorer 0.8.0 207 | 208 | ## New features 209 | 210 | * Selection of identifier type available in pca2go 211 | 212 | ## Bug fixes 213 | 214 | * Couple of layout fixes 215 | 216 | ## Other notes 217 | 218 | * MIT license 219 | * Added TravisCI integration 220 | * Added codecov integration 221 | * Enhanced documentation 222 | 223 | # pcaExplorer 0.7.0 224 | 225 | ## New features 226 | 227 | * Vignette full draft done 228 | 229 | # pcaExplorer 0.6.4 230 | 231 | ## Other notes 232 | 233 | * Updated NEWS file 234 | 235 | # pcaExplorer 0.6.3 236 | 237 | ## New features 238 | 239 | * About and Instructions done by now 240 | * Added some missing details on the documentations 241 | 242 | # pcaExplorer 0.6.2 243 | 244 | ## Other notes 245 | 246 | * Corrected wordings for (cor)relations of principal components with covariates 247 | * Added a couple of checks if correct objects are provided 248 | 249 | # pcaExplorer 0.6.1 250 | 251 | ## New features 252 | 253 | * Added function to remove selected samples suspected to be deemed as outliers, in order to see the effect of clustering on the good ones 254 | 255 | # pcaExplorer 0.6.0 256 | 257 | ## Other notes 258 | 259 | * Documentation completed 260 | * Examples fully working, cleaned up further a little more. 261 | 262 | # pcaExplorer 0.5.0 263 | 264 | ## Other notes 265 | 266 | * Further steps in direction of R CMD check 267 | 268 | # pcaExplorer 0.4.0 269 | 270 | ## New features 271 | 272 | * Added pca2go live functionality 273 | 274 | # pcaExplorer 0.3.0 275 | 276 | ## New features 277 | 278 | * Added color palette to choose, and dependent on the samples and factors available/selected 279 | 280 | # pcaExplorer 0.2.0 281 | 282 | ## New features 283 | 284 | * Multifactorial exploration completed and adaptable to each dataset 285 | 286 | # pcaExplorer 0.1.0 287 | 288 | ## New features 289 | 290 | * Restyling and (re)packaging mostly completed 291 | -------------------------------------------------------------------------------- /R/correlatePCs.R: -------------------------------------------------------------------------------- 1 | #' Principal components (cor)relation with experimental covariates 2 | #' 3 | #' Computes the significance of (cor)relations between PCA scores and the sample 4 | #' experimental covariates, using Kruskal-Wallis test for categorial variables 5 | #' and the `cor.test` based on Spearman's correlation for continuous 6 | #' variables 7 | #' 8 | #' @param pcaobj A `prcomp` object 9 | #' @param coldata A `data.frame` object containing the experimental 10 | #' covariates 11 | #' @param pcs A numeric vector, containing the corresponding PC number 12 | #' 13 | #' @return A `data.frame` object with computed p values for each covariate 14 | #' and for each principal component 15 | #' 16 | #' @examples 17 | #' library(DESeq2) 18 | #' dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1) 19 | #' rlt <- DESeq2::rlogTransformation(dds) 20 | #' pcaobj <- prcomp(t(assay(rlt))) 21 | #' correlatePCs(pcaobj, colData(dds)) 22 | #' 23 | #' @export 24 | correlatePCs <- function(pcaobj, coldata, pcs = 1:4) { 25 | # split the analysis for continuous and categorial 26 | coldataTypes <- vapply(coldata, class, character(1)) 27 | # extract the scores from the pc object 28 | x <- pcaobj$x 29 | 30 | # do it until 1:4 PCs 31 | res <- matrix(NA, nrow = length(pcs), ncol = ncol(coldata)) 32 | 33 | colnames(res) <- colnames(coldata) 34 | rownames(res) <- paste0("PC_", pcs) 35 | 36 | for (i in 1:ncol(res)) { 37 | # for each covariate... 38 | for (j in pcs) { 39 | if (coldataTypes[i] %in% c("factor", "character")) { 40 | if (length(levels(coldata[, i])) > 1) { 41 | res[j, i] <- kruskal.test(x[, j], coldata[, i])$p.value 42 | } 43 | } else { 44 | res[j, i] <- cor.test(x[, j], coldata[, i], method = "spearman")$p.value 45 | } 46 | } 47 | } 48 | res 49 | } 50 | 51 | 52 | #' Plot significance of (cor)relations of covariates VS principal components 53 | #' 54 | #' Plots the significance of the (cor)relation of each covariate vs a principal component 55 | #' 56 | #' @param pccorrs A `data.frame` object generated by [correlatePCs] 57 | #' @param pc An integer number, corresponding to the principal component of 58 | #' interest 59 | #' @param logp Logical, defaults to `TRUE`, displays the -`log10` of 60 | #' the pvalue instead of the p value itself 61 | #' 62 | #' @return A base plot object 63 | #' 64 | #' @examples 65 | #' library(DESeq2) 66 | #' dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1) 67 | #' rlt <- rlogTransformation(dds) 68 | #' pcaobj <- prcomp(t(assay(rlt))) 69 | #' res <- correlatePCs(pcaobj, colData(dds)) 70 | #' plotPCcorrs(res) 71 | #' 72 | #' @export 73 | plotPCcorrs <- function(pccorrs, pc = 1, logp = TRUE) { 74 | selectedPC <- paste0("PC_", pc) 75 | pvals <- pccorrs[selectedPC, ] 76 | 77 | if (logp) pvals <- -log10(pvals) 78 | 79 | barplot(pvals, las = 2, col = "steelblue", 80 | main = paste0("Significance of the relations between PC ", pc, " vs covariates"), 81 | ylab = ifelse(logp, "-log10(pval)", "pval")) 82 | } 83 | -------------------------------------------------------------------------------- /R/deprecated.R: -------------------------------------------------------------------------------- 1 | #' Deprecated functions in pcaExplorer 2 | #' 3 | #' Functions that are on their way to the function afterlife. 4 | #' Their successors are also listed. 5 | #' 6 | #' The successors of these functions are likely coming after the rework that 7 | #' led to the creation of the `mosdef` package. See more into its 8 | #' documentation for more details. 9 | #' 10 | #' @param ... Ignored arguments. 11 | #' 12 | #' @return All functions throw a warning, with a deprecation message pointing 13 | #' towards its descendent (if available). 14 | #' 15 | #' @name deprecated 16 | #' 17 | #' @section Transitioning to the mosdef framework: 18 | #' 19 | #' - [topGOtable()] is now being replaced by the more flexible 20 | #' [mosdef::run_topGO()] function 21 | #' 22 | #' @author Federico Marini 23 | #' 24 | #' @examples 25 | #' # try(topGOtable()) 26 | #' 27 | NULL 28 | 29 | 30 | ## #' @export 31 | ## #' @rdname defunct 32 | ## trendVar <- function(...) { 33 | ## .Defunct("fitTrendVar") 34 | ## } 35 | -------------------------------------------------------------------------------- /R/distro_expr.R: -------------------------------------------------------------------------------- 1 | #' Plot distribution of expression values 2 | #' 3 | #' @param rld A [DESeq2::DESeqTransform()] object. 4 | #' @param plot_type Character, choose one of `boxplot`, `violin` or 5 | #' `density`. Defaults to `density` 6 | #' 7 | #' @return A plot with the distribution of the expression values 8 | #' @export 9 | #' 10 | #' @examples 11 | #' dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1) 12 | #' rlt <- DESeq2::rlogTransformation(dds) 13 | #' distro_expr(rlt) 14 | distro_expr <- function(rld, plot_type="density") { 15 | allrld <- tidyr::gather(as.data.frame(assay(rld))) 16 | names(allrld) <- c("Sample", "rlogExpression") 17 | 18 | if (plot_type == "boxplot") { 19 | p <- ggplot(allrld, aes_string(x = "Sample", y = "rlogExpression")) + geom_boxplot(aes_string(col = "Sample", fill = "Sample"), alpha = 0.5) 20 | } 21 | 22 | if (plot_type == "violin") { 23 | p <- ggplot(allrld, aes_string(x = "Sample", y = "rlogExpression")) + geom_violin(aes_string(col = "Sample", fill = "Sample"), alpha = 0.5) 24 | } 25 | 26 | if (plot_type == "density") { 27 | p <- ggplot(allrld, aes_string(x = "rlogExpression")) + geom_density(aes_string(color = "Sample"), alpha = 0.1) 28 | } 29 | p <- p + theme_bw() 30 | return(p) 31 | } 32 | -------------------------------------------------------------------------------- /R/genespca.R: -------------------------------------------------------------------------------- 1 | #' Principal components analysis on the genes 2 | #' 3 | #' Computes and plots the principal components of the genes, eventually displaying 4 | #' the samples as in a typical biplot visualization. 5 | #' 6 | #' The implementation of this function is based on the beautiful `ggbiplot` 7 | #' package developed by Vince Vu, available at https://github.com/vqv/ggbiplot. 8 | #' The adaptation and additional parameters are tailored to display typical genomics data 9 | #' such as the transformed counts of RNA-seq experiments 10 | #' 11 | #' @param x A [DESeq2::DESeqTransform()] object, with data in `assay(x)`, 12 | #' produced for example by either [DESeq2::rlog()] or 13 | #' [DESeq2::varianceStabilizingTransformation()] 14 | #' @param ntop Number of top genes to use for principal components, 15 | #' selected by highest row variance 16 | #' @param choices Vector of two numeric values, to select on which principal components to plot 17 | #' @param arrowColors Vector of character, either as long as the number of the samples, or one single value 18 | #' @param groupNames Factor containing the groupings for the input data. Is efficiently chosen 19 | #' as the (interaction of more) factors in the colData for the object provided 20 | #' @param biplot Logical, whether to additionally draw the samples labels as in a biplot representation 21 | #' @param scale Covariance biplot (scale = 1), form biplot (scale = 0). When scale = 1, 22 | #' the inner product between the variables approximates the covariance and the 23 | #' distance between the points approximates the Mahalanobis distance. 24 | #' @param pc.biplot Logical, for compatibility with biplot.princomp() 25 | #' @param obs.scale Scale factor to apply to observations 26 | #' @param var.scale Scale factor to apply to variables 27 | #' @param groups Optional factor variable indicating the groups that the observations 28 | #' belong to. If provided the points will be colored according to groups 29 | #' @param ellipse Logical, draw a normal data ellipse for each group 30 | #' @param ellipse.prob Size of the ellipse in Normal probability 31 | #' @param labels optional Vector of labels for the observations 32 | #' @param labels.size Size of the text used for the labels 33 | #' @param alpha Alpha transparency value for the points (0 = transparent, 1 = opaque) 34 | #' @param var.axes Logical, draw arrows for the variables? 35 | #' @param circle Logical, draw a correlation circle? (only applies when prcomp 36 | #' was called with scale = TRUE and when var.scale = 1) 37 | #' @param circle.prob Size of the correlation circle in Normal probability 38 | #' @param varname.size Size of the text for variable names 39 | #' @param varname.adjust Adjustment factor the placement of the variable names, 40 | #' '>= 1' means farther from the arrow 41 | #' @param varname.abbrev Logical, whether or not to abbreviate the variable names 42 | #' @param returnData Logical, if TRUE returns a data.frame for further use, containing the 43 | #' selected principal components for custom plotting 44 | #' @param coordEqual Logical, default FALSE, for allowing brushing. If TRUE, plot using 45 | #' equal scale cartesian coordinates 46 | #' @param scaleArrow Multiplicative factor, usually >=1, only for visualization purposes, 47 | #' to allow for distinguishing where the variables are plotted 48 | #' @param useRownamesAsLabels Logical, if TRUE uses the row names as labels for plotting 49 | #' @param point_size Size of the points to be plotted for the observations (genes) 50 | #' @param annotation A `data.frame` object, with row.names as gene identifiers (e.g. ENSEMBL ids) 51 | #' and a column, `gene_name`, containing e.g. HGNC-based gene symbols 52 | #' 53 | #' @return An object created by `ggplot`, which can be assigned and further customized. 54 | #' 55 | #' @examples 56 | #' 57 | #' library(DESeq2) 58 | #' dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1) 59 | #' rlt <- rlogTransformation(dds) 60 | #' groups <- colData(dds)$condition 61 | #' groups <- factor(groups, levels = unique(groups)) 62 | #' cols <- scales::hue_pal()(2)[groups] 63 | #' genespca(rlt, ntop=100, arrowColors = cols, groupNames = groups) 64 | #' 65 | #' groups_multi <- interaction(as.data.frame(colData(rlt)[, c("condition", "tissue")])) 66 | #' groups_multi <- factor(groups_multi, levels = unique(groups_multi)) 67 | #' cols_multi <- scales::hue_pal()(length(levels(groups_multi)))[factor(groups_multi)] 68 | #' genespca(rlt, ntop = 100, arrowColors = cols_multi, groupNames = groups_multi) 69 | #' 70 | #' @export 71 | genespca <- function(x, 72 | ntop, 73 | choices = c(1, 2), 74 | arrowColors = "steelblue", 75 | groupNames="group", 76 | biplot = TRUE, 77 | scale = 1, pc.biplot = TRUE, 78 | obs.scale = 1 - scale, var.scale = scale, groups = NULL, 79 | ellipse = FALSE, ellipse.prob = 0.68, labels = NULL, labels.size = 3, 80 | alpha = 1, var.axes = TRUE, circle = FALSE, circle.prob = 0.69, 81 | varname.size = 4, varname.adjust = 1.5, varname.abbrev = FALSE, 82 | returnData = FALSE, coordEqual = FALSE, scaleArrow = 1, 83 | useRownamesAsLabels = TRUE, point_size = 2, annotation = NULL) { 84 | 85 | stopifnot(length(choices) == 2) 86 | if (length(arrowColors) != 1 & length(arrowColors) != ncol(x)) 87 | stop("Please provide either one color or a vector as long as the number of samples") 88 | 89 | rv <- rowVars(assay(x)) 90 | select <- order(rv, decreasing = TRUE)[seq_len(min(ntop, length(rv)))] 91 | pca <- prcomp((assay(x)[select, ])) 92 | 93 | percentVar <- pca$sdev^2 / sum(pca$sdev^2) 94 | 95 | if (!biplot) { 96 | nobs.factor <- sqrt(nrow(pca$x) - 1) 97 | devs <- pca$sdev 98 | pcast <- pca 99 | pcast$x <- sweep(pca$x, 2, 1 / (devs * nobs.factor), FUN = "*") * nobs.factor 100 | d <- data.frame(PC1 = pcast$x[, choices[1]], 101 | PC2 = pcast$x[, choices[2]], 102 | names = rownames((assay(x)[select, ]))) 103 | 104 | if (returnData) { 105 | attr(d, "percentVar") <- percentVar 106 | return(d) 107 | } 108 | 109 | ggplot(data = d, aes_string(x = "PC1", y = "PC2")) + 110 | geom_point(size = 3) + 111 | xlab(paste0("PC", choices[1], ": ", round(percentVar[choices[1]] * 100), "% variance")) + 112 | ylab(paste0("PC", choices[2], ": ", round(percentVar[choices[2]] * 100), "% variance")) + 113 | # geom_text(aes(label=names),hjust=0.25, vjust=-0.5, show.legend = F) + 114 | ggtitle("title") + theme_bw() 115 | } else { 116 | if (inherits(pca, "prcomp")) { 117 | nobs.factor <- sqrt(nrow(pca$x) - 1) 118 | d <- pca$sdev 119 | u <- sweep(pca$x, 2, 1 / (d * nobs.factor), FUN = "*") 120 | v <- pca$rotation 121 | } 122 | 123 | choices <- pmin(choices, ncol(u)) 124 | df.u <- as.data.frame(sweep(u[, choices], 2, d[choices]^obs.scale, 125 | FUN = "*")) 126 | v <- sweep(v, 2, d^var.scale, FUN = "*") 127 | df.v <- as.data.frame(v[, choices]) 128 | names(df.u) <- c("xvar", "yvar") 129 | names(df.v) <- names(df.u) 130 | if (pc.biplot) { 131 | df.u <- df.u * nobs.factor 132 | } 133 | 134 | r <- sqrt(qchisq(circle.prob, df = 2)) * prod(colMeans(df.u^2)) ^ (1/4) 135 | v.scale <- rowSums(v^2) 136 | df.v <- r * df.v / sqrt(max(v.scale)) 137 | if (obs.scale == 0) { 138 | u.axis.labs <- paste("standardized PC", choices, sep = "") 139 | } else { 140 | u.axis.labs <- paste("PC", choices, sep = "") 141 | } 142 | u.axis.labs <- paste(u.axis.labs, sprintf("(%0.1f%% explained var.)", 143 | 100 * pca$sdev[choices]^2 / sum(pca$sdev^2))) 144 | if (!is.null(labels)) { 145 | df.u$labels <- labels 146 | } 147 | if (!is.null(groups)) { 148 | df.u$groups <- groups 149 | } 150 | 151 | # additionally... 152 | df.u$ids <- rownames(df.u) 153 | if(!is.null(annotation)) { 154 | df.u$geneNames <- annotation$gene_name[match(df.u$ids, rownames(annotation))] 155 | } else { 156 | df.u$geneNames <- df.u$ids 157 | } 158 | if (varname.abbrev) { 159 | df.v$varname <- abbreviate(rownames(v)) 160 | } else { 161 | df.v$varname <- rownames(v) 162 | } 163 | df.v$angle <- with(df.v, (180/pi) * atan(yvar/xvar)) 164 | df.v$hjust <- with(df.v, (1 - varname.adjust * sign(xvar)) / 2) 165 | 166 | if (returnData) { 167 | return(df.u) 168 | } 169 | 170 | 171 | 172 | g <- ggplot(data = df.u, aes_string(x = "xvar", y = "yvar")) + xlab(u.axis.labs[1]) + 173 | ylab(u.axis.labs[2]) # + coord_equal() # REMOVED OTHERWISE BRUSH DOES NOT WORK PROPERLY 174 | if (coordEqual) g <- g + coord_equal() 175 | 176 | if (!is.null(df.u$labels)) { 177 | if (!is.null(df.u$groups)) { 178 | g <- g + geom_text(aes(label = labels, color = groups), 179 | size = labels.size) 180 | } else { 181 | g <- g + geom_text(aes(label = labels), size = labels.size) 182 | } 183 | } else { 184 | if (!is.null(df.u$groups)) { 185 | g <- g + geom_point(aes(color = groups), size = point_size, alpha = alpha) 186 | } else { 187 | g <- g + geom_point(size = point_size, alpha = alpha) 188 | } 189 | } 190 | 191 | if (useRownamesAsLabels) { 192 | g <- g + geom_text(aes_string(label = "geneNames"), size = labels.size, hjust = 0.25, vjust = -0.75) 193 | } 194 | 195 | if (!is.null(df.u$groups) && ellipse) { 196 | theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50)) 197 | circle <- cbind(cos(theta), sin(theta)) 198 | ell <- ddply(df.u, "groups", function(x) { 199 | if (nrow(x) <= 2) { 200 | return(NULL) 201 | } 202 | sigma <- var(cbind(x$xvar, x$yvar)) 203 | mu <- c(mean(x$xvar), mean(x$yvar)) 204 | ed <- sqrt(qchisq(ellipse.prob, df = 2)) 205 | data.frame(sweep(circle %*% chol(sigma) * ed, 2, 206 | mu, FUN = "+"), groups = x$groups[1]) 207 | }) 208 | names(ell)[1:2] <- c("xvar", "yvar") 209 | g <- g + geom_path(data = ell, aes(color = groups, group = groups)) 210 | } 211 | # moved down to have the arrows drawn on top of the points and not vice versa 212 | if (var.axes) { 213 | if (circle) { 214 | theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, 215 | length = 50)) 216 | circle <- data.frame(xvar = r * cos(theta), yvar = r * 217 | sin(theta)) 218 | g <- g + geom_path(data = circle, color = "steelblue", 219 | size = 1/2, alpha = 1/3) 220 | } 221 | df.v$scaleArrow <- scaleArrow # quick fix for mapping scaling of the arrows 222 | arrowColors <- factor(arrowColors, levels = unique(arrowColors)) 223 | df.v$arrowColors <- factor(arrowColors, levels = unique(arrowColors)) 224 | df.v$groupNames <- factor(groupNames, levels = unique(groupNames)) 225 | df.v$sca_x <- df.v$xvar * scaleArrow 226 | df.v$sca_y <- df.v$yvar * scaleArrow 227 | df.v$sta_x <- 0 228 | df.v$sta_y <- 0 229 | g <- g + geom_segment(data = df.v, aes_string(x = "sta_x", y = "sta_y", xend = "sca_x", yend = "sca_y", color = "arrowColors"), 230 | arrow = arrow(length = unit(1/2, "picas"))) + 231 | scale_color_manual(values = levels(arrowColors), name = "Group", labels = levels(groupNames)) 232 | } 233 | 234 | if (var.axes) { 235 | g <- g + geom_text(data = df.v, aes_string(label = "varname", 236 | x = "sca_x", y = "sca_y", # angle = angle, 237 | hjust = "hjust"), 238 | color = arrowColors, size = varname.size) 239 | } 240 | g <- g + theme_bw() 241 | return(g) 242 | } 243 | } 244 | -------------------------------------------------------------------------------- /R/get_annotation.R: -------------------------------------------------------------------------------- 1 | #' Get an annotation data frame from biomaRt 2 | #' 3 | #' @param dds A [DESeq2::DESeqDataSet()] object 4 | #' @param biomart_dataset A biomaRt dataset to use. To see the list, type 5 | #' `mart = useMart('ensembl')`, followed by `listDatasets(mart)`. 6 | #' @param idtype Character, the ID type of the genes as in the row names of 7 | #' `dds`, to be used for the call to [biomaRt::getBM()] 8 | #' 9 | #' @return A data frame for ready use in `pcaExplorer`, retrieved from biomaRt. 10 | #' @export 11 | #' 12 | #' @examples 13 | #' library("airway") 14 | #' data("airway", package = "airway") 15 | #' airway 16 | #' dds_airway <- DESeq2::DESeqDataSetFromMatrix(assay(airway), 17 | #' colData = colData(airway), 18 | #' design = ~dex+cell) 19 | #' \dontrun{ 20 | #' get_annotation(dds_airway, "hsapiens_gene_ensembl", "ensembl_gene_id") 21 | #' } 22 | get_annotation <- function(dds, biomart_dataset, idtype) { 23 | if (is.null(biomart_dataset)) 24 | stop("Select a species to generate the corresponding annotation. 25 | To obtain a list, type mart = useMart('ensembl'), followed by listDatasets(mart).") 26 | 27 | mart <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", 28 | host = "www.ensembl.org", 29 | dataset = biomart_dataset) 30 | anns <- getBM(attributes = c(idtype, "external_gene_name", "description"), 31 | filters = idtype, 32 | values = rownames(dds), 33 | mart = mart) 34 | 35 | # keep and match with the ones that are actually there 36 | anns2 <- anns[match(rownames(dds), anns[, 1]), ] 37 | rownames(anns2) <- rownames(dds) 38 | # rename the columns rsp. add row names to be consistent with other function 39 | colnames(anns2) <- c("gene_id", "gene_name", "description") 40 | 41 | return(anns2) 42 | } 43 | 44 | 45 | #' Get an annotation data frame from org db packages 46 | #' 47 | #' @param dds A [DESeq2::DESeqDataSet()] object 48 | #' @param orgdb_species Character string, named as the `org.XX.eg.db` 49 | #' package which should be available in Bioconductor 50 | #' @param idtype Character, the ID type of the genes as in the row names of 51 | #' `dds`, to be used for the call to [AnnotationDbi::mapIds()] 52 | #' @param key_for_genenames Character, corresponding to the column name for the 53 | #' key in the orgDb package containing the official gene name (often called 54 | #' gene symbol). 55 | #' This parameter defaults to "SYMBOL", but can be adjusted in case the key is not 56 | #' found in the annotation package (e.g. for `org.Sc.sgd.db`). 57 | #' 58 | #' @return A data frame for ready use in `pcaExplorer`, retrieved from the 59 | #' org db packages 60 | #' @export 61 | #' 62 | #' @examples 63 | #' library("airway") 64 | #' data("airway", package = "airway") 65 | #' airway 66 | #' dds_airway <- DESeq2::DESeqDataSetFromMatrix(assay(airway), 67 | #' colData = colData(airway), 68 | #' design = ~dex+cell) 69 | #' anno_df <- get_annotation_orgdb(dds_airway, "org.Hs.eg.db", "ENSEMBL") 70 | #' head(anno_df) 71 | get_annotation_orgdb <- function(dds, orgdb_species, idtype, key_for_genenames = "SYMBOL") { 72 | if (is.null(orgdb_species)) 73 | stop("Select a species to generate the corresponding annotation") 74 | 75 | orgdbpkgs <- data.frame( 76 | pkg = c("org.Ag.eg.db", "org.At.tair.db", "org.Bt.eg.db", "org.Ce.eg.db", "org.Cf.eg.db", "org.Dm.eg.db", "org.Dr.eg.db", "org.EcK12.eg.db", 77 | "org.EcSakai.eg.db", "org.Gg.eg.db", "org.Hs.eg.db", "org.Hs.ipi.db", "org.Mm.eg.db", "org.Mmu.eg.db", "org.Pf.plasmo.db", 78 | "org.Pt.eg.db", "org.Rn.eg.db", "org.Sc.sgd.db", "org.Sco.eg.db", "org.Ss.eg.db", "org.Tgondii.eg.db", "org.Xl.eg.db"), 79 | descr = c("Anopheles", "Arabidopsis", "Bovine", "Worm", "Canine", "Fly", "Zebrafish", "E coli strain K12", "E coli strain Sakai", "Chicken", 80 | "Human", "org.Hs.ipi.db", "Mouse", "Rhesus", "Malaria", "Chimp", "Rat", "Yeast", "Streptomyces coelicolor", "Pig", "Toxoplasma gondii", 81 | "Xenopus"), 82 | stringsAsFactors = FALSE 83 | ) 84 | 85 | if (!(orgdb_species %in% orgdbpkgs$pkg)) { 86 | message("The orgDB package is most likely not existent in Bioconductor") 87 | message("It should be one of", orgdbpkgs$pkg) 88 | } 89 | 90 | if (!require(orgdb_species, character.only = TRUE)) 91 | stop("The package ", orgdb_species, " is not installed/available. Try installing it with BiocManager::install('", orgdb_species, "')") 92 | 93 | if (!(idtype %in% keytypes(eval(parse(text = orgdb_species))))) { 94 | stop("The key you provided is not listed as key for the annotation package. Please try one of ", 95 | paste(keytypes(eval(parse(text = orgdb_species))), collapse = ",")) 96 | } 97 | 98 | if (!(key_for_genenames %in% keytypes(eval(parse(text = orgdb_species))))) { 99 | stop("The key specified for containing gene names is not included in the annotation package. Please try one of ", 100 | paste(keytypes(eval(parse(text = orgdb_species))), collapse = ",")) 101 | } 102 | 103 | 104 | pkg <- eval(parse(text = orgdb_species)) 105 | 106 | if (idtype == "SYMBOL") 107 | warning("You probably do not need to convert symbol to symbol") # the performance would somehow be affected 108 | 109 | anns_vec <- mapIds(pkg, keys = rownames(dds), column = key_for_genenames, 110 | keytype = idtype) 111 | 112 | anns <- data.frame( 113 | gene_id = rownames(dds), 114 | gene_name = anns_vec, 115 | stringsAsFactors = FALSE, 116 | row.names = rownames(dds) 117 | ) 118 | return(anns) 119 | } 120 | -------------------------------------------------------------------------------- /R/hi_loadings.R: -------------------------------------------------------------------------------- 1 | #' Extract genes with highest loadings 2 | #' 3 | #' @param pcaobj A `prcomp` object 4 | #' @param whichpc An integer number, corresponding to the principal component of 5 | #' interest 6 | #' @param topN Integer, number of genes with top and bottom loadings 7 | #' @param exprTable A `matrix` object, e.g. the counts of a [DESeq2::DESeqDataSet()]. 8 | #' If not NULL, returns the counts matrix for the selected genes 9 | #' @param annotation A `data.frame` object, with row.names as gene identifiers (e.g. ENSEMBL ids) 10 | #' and a column, `gene_name`, containing e.g. HGNC-based gene symbols 11 | #' @param title The title of the plot 12 | #' 13 | #' @return A ggplot2 object, or a `matrix`, if `exprTable` is not null 14 | #' 15 | #' @examples 16 | #' dds <- makeExampleDESeqDataSet_multifac(betaSD = 3, betaSD_tissue = 1) 17 | #' rlt <- DESeq2::rlogTransformation(dds) 18 | #' pcaobj <- prcomp(t(SummarizedExperiment::assay(rlt))) 19 | #' hi_loadings(pcaobj, topN = 20) 20 | #' hi_loadings(pcaobj, topN = 10, exprTable = dds) 21 | #' hi_loadings(pcaobj, topN = 10, exprTable = counts(dds)) 22 | #' 23 | #' @export 24 | hi_loadings <- function(pcaobj, whichpc = 1, topN = 10, exprTable = NULL, 25 | annotation = NULL, title="Top/bottom loadings") { 26 | if (whichpc < 0) 27 | stop("Use a positive integer value for the principal component to select") 28 | if (whichpc > nrow(pcaobj$x)) 29 | stop("You can not explore a principal component that is not in the data") 30 | 31 | geneloadings_sorted <- sort(pcaobj$rotation[, whichpc]) 32 | geneloadings_extreme <- c(tail(geneloadings_sorted, topN), head(geneloadings_sorted, topN)) 33 | 34 | if (!is.null(exprTable)) { 35 | tab <- exprTable[names(geneloadings_extreme), ] 36 | if (!is.null(annotation)) 37 | rownames(tab) <- annotation$gene_name[match(rownames(tab), rownames(annotation))] 38 | return(tab) 39 | } 40 | 41 | if (!is.null(annotation)) 42 | names(geneloadings_extreme) <- annotation$gene_name[match(names(geneloadings_extreme), rownames(annotation))] 43 | 44 | barplot(geneloadings_extreme, las = 2, col = c(rep("steelblue", topN), rep("coral", topN)), 45 | main = paste0(title, "PC", whichpc)) 46 | # mydf <- data.frame(loadings=geneloadings_extreme, 47 | # geneID=names(geneloadings_extreme), 48 | # mycol = c(rep("steelblue",topN),rep("coral",topN))) 49 | # mydf$geneID <- factor(mydf$geneID, levels = mydf$geneID) 50 | # p <- ggplot(mydf,aes_string(x="geneID",y="loadings")) + geom_col(aes_string(fill = "mycol")) + theme_bw() + 51 | # theme(axis.text.x=element_text(angle = 90, vjust = 0.5)) + guides(fill = FALSE) + 52 | # ggtitle(paste0(title, " - PC", whichpc)) 53 | # p 54 | } 55 | -------------------------------------------------------------------------------- /R/makeds.R: -------------------------------------------------------------------------------- 1 | #' Make a simulated DESeqDataSet for two or more experimental factors 2 | #' 3 | #' Constructs a simulated dataset of Negative Binomial data from different conditions. 4 | #' The fold changes between the conditions can be adjusted with the `betaSD_condition` 5 | #' and the `betaSD_tissue` arguments. 6 | #' 7 | #' This function is designed and inspired following the proposal of 8 | #' [DESeq2::makeExampleDESeqDataSet()] from the `DESeq2` package. Credits are given 9 | #' to Mike Love for the nice initial implementation 10 | #' 11 | #' @param n number of rows (genes) 12 | #' @param m number of columns (samples) 13 | #' @param betaSD_condition the standard deviation for condition betas, i.e. beta ~ N(0,betaSD) 14 | #' @param betaSD_tissue the standard deviation for tissue betas, i.e. beta ~ N(0,betaSD) 15 | #' @param interceptMean the mean of the intercept betas (log2 scale) 16 | #' @param interceptSD the standard deviation of the intercept betas (log2 scale) 17 | #' @param dispMeanRel a function specifying the relationship of the dispersions on 18 | #' `2^trueIntercept` 19 | #' @param sizeFactors multiplicative factors for each sample 20 | #' 21 | #' @return a [DESeq2::DESeqDataSet()] with true dispersion, 22 | #' intercept for two factors (condition and tissue) and beta values in the 23 | #' metadata columns. Note that the true betas are provided on the log2 scale. 24 | #' 25 | #' @examples 26 | #' dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1) 27 | #' dds 28 | #' dds2 <- makeExampleDESeqDataSet_multifac(betaSD_condition = 1, betaSD_tissue = 4) 29 | #' dds2 30 | #' 31 | #' @export 32 | makeExampleDESeqDataSet_multifac <- function(n = 1000, 33 | m = 12, 34 | betaSD_condition = 1, 35 | betaSD_tissue = 3, 36 | interceptMean = 4, 37 | interceptSD = 2, 38 | dispMeanRel = function(x) 4/x + 0.1, 39 | sizeFactors = rep(1, m)) { 40 | beta <- cbind(rnorm(n, interceptMean, interceptSD), 41 | rnorm(n, 0, betaSD_condition), 42 | rnorm(n, 0, betaSD_tissue)) # added a tissue covariate 43 | 44 | dispersion <- dispMeanRel(2^(beta[, 1])) 45 | colData <- S4Vectors::DataFrame( 46 | condition = factor(rep(c("A", "B"), 47 | times = c(ceiling(m/2), floor(m/2)))), 48 | tissue = factor(rep( 49 | rep(c("t1", "t2"), times = c(ceiling(m/4), floor(m/4))), 2)) 50 | ) 51 | x <- if (m > 1) { 52 | model.matrix(~colData$condition + colData$tissue) 53 | } else { 54 | cbind(rep(1, m), rep(0, m)) 55 | } 56 | mu <- t(2^(x %*% t(beta)) * sizeFactors) 57 | countData <- matrix(rnbinom(m * n, mu = mu, size = 1/dispersion), 58 | ncol = m) 59 | mode(countData) <- "integer" 60 | colnames(countData) <- paste("sample", 1:m, sep = "") 61 | rowRanges <- GRanges("1", IRanges(start = (1:n - 1) * 100 + 62 | 1, width = 100)) 63 | names(rowRanges) <- paste0("gene", 1:n) 64 | design <- if (m > 1) { 65 | as.formula("~ condition", env = .GlobalEnv) 66 | } else { 67 | as.formula("~ 1", env = .GlobalEnv) 68 | } 69 | object <- DESeqDataSetFromMatrix(countData = countData, colData = colData, 70 | design = design, rowRanges = rowRanges) 71 | trueVals <- DataFrame(trueIntercept = beta[, 1], 72 | trueBeta_condition = beta[, 2], 73 | trueBeta_tissue = beta[, 3], 74 | trueDisp = dispersion) 75 | mcols(trueVals) <- DataFrame(type = rep("input", ncol(trueVals)), 76 | description = c("simulated intercept values", 77 | "simulated beta values for the condition", 78 | "simulated beta values for the tissue", 79 | "simulated dispersion values")) 80 | mcols(object) <- cbind(mcols(object), trueVals) 81 | return(object) 82 | } 83 | -------------------------------------------------------------------------------- /R/pair_corr.R: -------------------------------------------------------------------------------- 1 | #' Pairwise scatter and correlation plot of counts 2 | #' 3 | #' @param df A data frame, containing the (raw/normalized/transformed) counts 4 | #' @param log Logical, whether to convert the input values to log2 (with addition 5 | #' of a pseudocount). Defaults to FALSE. 6 | #' @param method Character string, one of `pearson` (default), `kendall`, or 7 | #' `spearman` as in `cor` 8 | #' @param use_subset Logical value. If TRUE, only 1000 values per sample will be used 9 | #' to speed up the plotting operations. 10 | #' 11 | #' @return A plot with pairwise scatter plots and correlation coefficients 12 | #' @export 13 | #' 14 | #' @examples 15 | #' library("airway") 16 | #' data("airway", package = "airway") 17 | #' airway 18 | #' dds_airway <- DESeq2::DESeqDataSetFromMatrix(assay(airway), 19 | #' colData = colData(airway), 20 | #' design = ~dex+cell) 21 | #' pair_corr(counts(dds_airway)[1:100, ]) # use just a subset for the example 22 | pair_corr <- function(df, log = FALSE, method = "pearson", use_subset = TRUE) { 23 | if (log) { 24 | df <- log2(1 + df) 25 | } 26 | 27 | if (use_subset) { 28 | set.seed(42) 29 | df <- df[sample(1:nrow(df), min(nrow(df), 1000)), ] 30 | } 31 | 32 | # get min and max count values for axis range. 33 | rangeMin <- min(df) 34 | rangeMax <- max(df) 35 | 36 | colorFunction <- colorRamp(c("black", "red")) 37 | # colorFunction() expects values from 0 to 1. 38 | zMatrix <- colorFunction(seq(0, 1, by = .01)) 39 | # zColors goes from 1 to 100. 40 | zColors <- sort(rgb(zMatrix[, 1], zMatrix[, 2], zMatrix[, 3], maxColorValue = 255)) 41 | labelSize <- 1 42 | title <- "Pairwise Correlations" 43 | # Modified from R pairs() documentation 44 | panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...) { 45 | usr <- par("usr"); on.exit(par(usr = usr)) 46 | par(usr = c(0, 1, 0, 1)) 47 | r <- abs(cor(x, y, method = method)) 48 | txt <- format(c(r, 0.123456789), digits = digits)[1] 49 | txt <- paste(prefix, txt, sep = "") 50 | 51 | # color text based on r value and change size of text also based on r value (larger text for larger r value). 52 | cex.cor <- labelSize / strwidth(txt) 53 | # color text based on r value (red is r=1). 54 | text(0.5, 0.5, txt, cex = cex.cor * r * 0.7, col = zColors[r * 100]) 55 | } 56 | # par(mar = c(0,0,0,0)) 57 | 58 | pairs(df, pch = 20, col = alpha("black", 0.4), 59 | cex.labels = labelSize, 60 | main = title, 61 | upper.panel = panel.cor, 62 | xlim = c(rangeMin, rangeMax), 63 | ylim = c(rangeMin, rangeMax)) 64 | } 65 | -------------------------------------------------------------------------------- /R/pcaExplorer-pkg.R: -------------------------------------------------------------------------------- 1 | #' pcaExplorer: analyzing time-lapse microscopy imaging, from detection to tracking 2 | #' 3 | #' pcaExplorer provides functionality for interactive visualization of RNA-seq datasets 4 | #' based on Principal Components Analysis. The methods provided allow for quick information 5 | #' extraction and effective data exploration. A Shiny application encapsulates the whole analysis. 6 | #' 7 | #' pcaExplorer provides functionality for interactive visualization of RNA-seq datasets 8 | #' based on Principal Components Analysis. The methods provided allow for quick information 9 | #' extraction and effective data exploration. A Shiny application encapsulates the whole analysis. 10 | #' 11 | #' @import DESeq2 12 | #' @import SummarizedExperiment 13 | #' @importFrom GenomicRanges GRanges 14 | #' @importFrom IRanges IRanges 15 | #' @importFrom S4Vectors DataFrame 16 | #' @importFrom genefilter rowVars 17 | #' @importFrom heatmaply heatmaply 18 | #' @importFrom plotly renderPlotly plotlyOutput 19 | #' @importFrom scales brewer_pal hue_pal 20 | #' @importFrom NMF aheatmap 21 | #' @import plyr 22 | #' @importFrom limma goana topGO 23 | #' @importFrom AnnotationDbi select Term mapIds 24 | #' @importMethodsFrom GOstats hyperGTest summary 25 | #' @import GO.db 26 | #' @import shiny 27 | #' @import shinydashboard 28 | #' @importFrom shinyBS bsTooltip bsCollapse bsCollapsePanel 29 | #' @import ggplot2 30 | #' @importFrom ggrepel geom_label_repel 31 | #' @importFrom DT datatable 32 | #' @importFrom shinyAce aceAutocomplete aceEditor getAceModes getAceThemes 33 | #' updateAceEditor 34 | #' @import threejs 35 | #' @import biomaRt 36 | #' @importFrom pheatmap pheatmap 37 | #' @importFrom base64enc dataURI 38 | #' @importFrom tidyr gather 39 | #' @import knitr 40 | #' @import rmarkdown 41 | #' @importFrom grDevices dev.off pdf rainbow colorRamp rgb 42 | #' @import methods 43 | #' 44 | #' @author 45 | #' Federico Marini \email{marinif@@uni-mainz.de}, 2016 46 | #' 47 | #' Maintainer: Federico Marini \email{marinif@@uni-mainz.de} 48 | #' @name pcaExplorer-pkg 49 | #' @docType package 50 | "_PACKAGE" 51 | 52 | .onAttach <- function(libname, pkgname) { 53 | pkgVersion <- packageDescription("pcaExplorer", fields = "Version") 54 | msg <- paste0("Welcome to pcaExplorer v", pkgVersion, "\n\n") 55 | citation <- paste0("If you use pcaExplorer in your work, please cite:\n\n", 56 | "pcaExplorer: an R/Bioconductor package for interacting with RNA-seq principal components\n", 57 | "Federico Marini, Harald Binder\n", 58 | "BMC Bioinformatics, 2019 - https://doi.org/10.1186/s12859-019-2879-1\n") 59 | packageStartupMessage(paste0(msg, citation)) 60 | } 61 | -------------------------------------------------------------------------------- /R/pcaplot.R: -------------------------------------------------------------------------------- 1 | #' Sample PCA plot for transformed data 2 | #' 3 | #' Plots the results of PCA on a 2-dimensional space 4 | #' 5 | #' @param x A [DESeq2::DESeqTransform()] object, with data in `assay(x)`, 6 | #' produced for example by either [DESeq2::rlog()] or 7 | #' [DESeq2::varianceStabilizingTransformation()]/[DESeq2::vst()] 8 | #' @param intgroup Interesting groups: a character vector of 9 | #' names in `colData(x)` to use for grouping. Defaults to NULL, which would then 10 | #' select the first column of the `colData` slot 11 | #' @param ntop Number of top genes to use for principal components, 12 | #' selected by highest row variance 13 | #' @param returnData logical, if TRUE returns a data.frame for further use, containing the 14 | #' selected principal components and intgroup covariates for custom plotting 15 | #' @param title The plot title 16 | #' @param pcX The principal component to display on the x axis 17 | #' @param pcY The principal component to display on the y axis 18 | #' @param text_labels Logical, whether to display the labels with the sample identifiers 19 | #' @param point_size Integer, the size of the points for the samples 20 | #' @param ellipse Logical, whether to display the confidence ellipse for the selected groups 21 | #' @param ellipse.prob Numeric, a value in the interval [0;1) 22 | #' 23 | #' @return An object created by `ggplot`, which can be assigned and further customized. 24 | #' 25 | #' @examples 26 | #' dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1) 27 | #' rlt <- DESeq2::rlogTransformation(dds) 28 | #' pcaplot(rlt, ntop = 200) 29 | #' 30 | #' @export 31 | pcaplot <- function(x, 32 | intgroup = NULL, 33 | ntop = 500, 34 | returnData = FALSE, 35 | title = NULL, 36 | pcX = 1, 37 | pcY = 2, 38 | text_labels = TRUE, 39 | point_size = 3, 40 | ellipse = TRUE, 41 | ellipse.prob = 0.95) # customized principal components 42 | { 43 | rv <- rowVars(assay(x)) 44 | select <- order(rv, decreasing = TRUE)[seq_len(min(ntop, length(rv)))] 45 | pca <- prcomp(t(assay(x)[select, ])) 46 | 47 | percentVar <- pca$sdev^2/sum(pca$sdev^2) 48 | 49 | if (is.null(intgroup)) { 50 | # gently fall back to the first colData element if it is there 51 | if (length(names(colData(x))) > 0) { 52 | intgroup <- names(colData(x))[1] 53 | message("Defaulting to '", intgroup, "' as the `intgroup` parameter...") 54 | } else { 55 | stop("No colData has been provided, therefore `intgroup` cannot be selected properly") 56 | } 57 | } 58 | 59 | if (!all(intgroup %in% names(colData(x)))) { 60 | stop("the argument 'intgroup' should specify columns of colData(x)") 61 | } 62 | intgroup.df <- as.data.frame(colData(x)[, intgroup, drop = FALSE]) 63 | group <- factor(apply(intgroup.df, 1, paste, collapse = " : ")) 64 | d <- data.frame(PC1 = pca$x[, pcX], PC2 = pca$x[, pcY], group = group, 65 | intgroup.df, names = colnames(x)) 66 | colnames(d)[1] <- paste0("PC", pcX) 67 | colnames(d)[2] <- paste0("PC", pcY) 68 | 69 | if (returnData) { 70 | attr(d, "percentVar") <- percentVar[c(pcX, pcY)] 71 | return(d) 72 | } 73 | 74 | # clever way of positioning the labels - worked good, then no need with ggrepel 75 | d$hjust <- ifelse((sign(d[, paste0("PC", pcX)]) == 1), 0.9, 0.1)# (1 + varname.adjust * sign(PC1))/2) 76 | 77 | g <- ggplot(data = d, aes_string(x = paste0("PC", pcX), y = paste0("PC", pcY), color = "group")) + 78 | geom_point(size = point_size) + 79 | xlab(paste0("PC", pcX, ": ", round(percentVar[pcX] * 100, digits = 2), "% variance")) + 80 | ylab(paste0("PC", pcY ,": ", round(percentVar[pcY] * 100, digits = 2), "% variance")) 81 | 82 | ## plot confidence ellipse 83 | # credit to vince vu, author of ggbiplot 84 | if (ellipse) { 85 | theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50)) 86 | circle <- cbind(cos(theta), sin(theta)) 87 | 88 | ell <- ddply(d, "group", function(x) { 89 | if (nrow(x) <= 2) { 90 | return(NULL) 91 | } 92 | sigma <- var(cbind(x[[paste0("PC", pcX)]], x[[paste0("PC", pcY)]])) 93 | mu <- c(mean(x[[paste0("PC", pcX)]]), mean(x[[paste0("PC", pcY)]])) 94 | ed <- sqrt(qchisq(ellipse.prob, df = 2)) 95 | data.frame(sweep(circle %*% chol(sigma) * ed, 2, mu, FUN = '+'), 96 | groups = x$group[1]) 97 | }) 98 | # names(ell)[1:2] <- c('xvar', 'yvar') 99 | if (nrow(ell) > 0) { 100 | g <- g + geom_path(data = ell, aes_string(x = "X1", y = "X2", color = "groups", group = "groups")) 101 | } 102 | } 103 | 104 | if (text_labels) 105 | g <- g + geom_label_repel(mapping = aes_string(label = "names", fill = "group"), 106 | color = "white", show.legend = TRUE) 107 | 108 | plot_title <- paste0("PCA plot - top ", length(select), " variable genes") 109 | if (!is.null(title)) { 110 | g <- g + ggtitle(title) 111 | } else { 112 | g <- g + ggtitle(plot_title) 113 | } 114 | g <- g + theme_bw() 115 | # as in http://www.huber.embl.de/msmb/Chap-Graphics.html 116 | # "well-made PCA plots usually have a width that’s larger than the height" 117 | g <- g + coord_fixed() 118 | g 119 | } 120 | 121 | 122 | #' Scree plot of the PCA on the samples 123 | #' 124 | #' Produces a scree plot for investigating the proportion of explained variance, or 125 | #' alternatively the cumulative value 126 | #' 127 | #' @param obj A `prcomp` object 128 | #' @param type Display absolute proportions or cumulative proportion. Possible values: 129 | #' "pev" or "cev" 130 | #' @param pc_nr How many principal components to display max 131 | #' @param title Title of the plot 132 | #' 133 | #' @return An object created by `ggplot`, which can be assigned and further customized. 134 | #' 135 | #' @examples 136 | #' dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1) 137 | #' rlt <- DESeq2::rlogTransformation(dds) 138 | #' pcaobj <- prcomp(t(SummarizedExperiment::assay(rlt))) 139 | #' pcascree(pcaobj, type = "pev") 140 | #' pcascree(pcaobj, type = "cev", title = "Cumulative explained proportion of variance - Test dataset") 141 | #' 142 | #' @export 143 | pcascree <- function(obj, type = c("pev", "cev"),pc_nr=NULL,title=NULL) 144 | { 145 | type <- match.arg(type) 146 | d <- obj$sdev^2 147 | yvar <- switch(type, pev = d/sum(d), cev = cumsum(d)/sum(d)) 148 | yvar.lab <- switch(type, pev = "proportion of explained variance", 149 | cev = "cumulative proportion of explained variance") 150 | # df <- data.frame(PC = 1:length(d), yvar = yvar) 151 | 152 | if (!is.null(pc_nr)) { 153 | colsize <- pc_nr 154 | yvar <- yvar[1:pc_nr] 155 | } else { 156 | colsize <- length(d) 157 | yvar <- yvar[1:length(d)] 158 | } 159 | 160 | pc_df <- data.frame(PC_count = 1:colsize, var = yvar) 161 | 162 | if(type=="pev"){ 163 | p <- ggplot(pc_df, aes_string(x = "PC_count", y = "var")) + geom_bar(stat = "identity") 164 | p <- p + scale_x_continuous(breaks = 1:length(d)) 165 | p <- p + ylab(yvar.lab) + xlab("principal components") 166 | # p 167 | } else { 168 | p <- ggplot(pc_df, aes_string(x = "PC_count", y = "var")) + geom_point() + 169 | geom_path() + scale_x_continuous(breaks = 1:length(d)) 170 | p <- p + ylab(yvar.lab) + xlab("principal components") + ylim(0,max(pc_df$var)) 171 | # p 172 | } 173 | if(!is.null(title)) p <- p + ggtitle(title) 174 | p <- p + theme_bw() 175 | p 176 | } 177 | 178 | 179 | #' Sample PCA plot for transformed data 180 | #' 181 | #' Plots the results of PCA on a 3-dimensional space, interactively 182 | #' 183 | #' @param x A [DESeq2::DESeqTransform()] object, with data in `assay(x)`, 184 | #' produced for example by either [DESeq2::rlog()] or 185 | #' [DESeq2::varianceStabilizingTransformation()] 186 | #' @param intgroup Interesting groups: a character vector of 187 | #' names in `colData(x)` to use for grouping 188 | #' @param ntop Number of top genes to use for principal components, 189 | #' selected by highest row variance 190 | #' @param returnData logical, if TRUE returns a data.frame for further use, containing the 191 | #' selected principal components and intgroup covariates for custom plotting 192 | #' @param title The plot title 193 | #' @param pcX The principal component to display on the x axis 194 | #' @param pcY The principal component to display on the y axis 195 | #' @param pcZ The principal component to display on the z axis 196 | #' @param text_labels Logical, whether to display the labels with the sample identifiers 197 | #' @param point_size Integer, the size of the points for the samples 198 | #' 199 | #' @return A html-based visualization of the 3d PCA plot 200 | #' @export 201 | #' 202 | #' @examples 203 | #' dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1) 204 | #' rlt <- DESeq2::rlogTransformation(dds) 205 | #' pcaplot3d(rlt, ntop = 200) 206 | pcaplot3d <- function (x, intgroup = "condition", ntop = 500, returnData = FALSE,title=NULL, 207 | pcX = 1, pcY = 2, pcZ = 3, text_labels=TRUE,point_size=3) 208 | { 209 | rv <- rowVars(assay(x)) 210 | select <- order(rv, decreasing = TRUE)[seq_len(min(ntop,length(rv)))] 211 | pca <- prcomp(t(assay(x)[select, ])) 212 | 213 | percentVar <- pca$sdev^2/sum(pca$sdev^2) 214 | 215 | if (!all(intgroup %in% names(colData(x)))) { 216 | stop("the argument 'intgroup' should specify columns of colData(x)") 217 | } 218 | intgroup.df <- as.data.frame(colData(x)[, intgroup, drop = FALSE]) 219 | group <- factor(apply(intgroup.df, 1, paste, collapse = " : ")) 220 | d <- data.frame(PC1 = pca$x[, pcX], PC2 = pca$x[, pcY], PC3 = pca$x[,pcZ], 221 | group = group, 222 | intgroup.df, names = colnames(x)) 223 | colnames(d)[1] <- paste0("PC", pcX, ": ", round(percentVar[pcX] * 100,digits = 2), "% variance") 224 | colnames(d)[2] <- paste0("PC", pcY, ": ", round(percentVar[pcY] * 100,digits = 2), "% variance") 225 | colnames(d)[3] <- paste0("PC", pcZ, ": ", round(percentVar[pcZ] * 100,digits = 2), "% variance") 226 | 227 | if (returnData) { 228 | attr(d, "percentVar") <- percentVar[1:3] 229 | return(d) 230 | } 231 | 232 | nrgroups <- length(levels(d$group)) 233 | cols <- hue_pal()(nrgroups)[d$group] 234 | 235 | scatterplot3js(as.matrix(d[, 1:3]), 236 | color = cols, 237 | # renderer = "canvas", 238 | size = 1.3, 239 | labels = rownames(d), label.margin ="50px 50px 50px 50px") 240 | } 241 | -------------------------------------------------------------------------------- /R/profile_explore.R: -------------------------------------------------------------------------------- 1 | #' Extract and plot the expression profile of genes 2 | #' 3 | #' @param se A [DESeq2::DESeqDataSet()] object, or a 4 | #' [DESeq2::DESeqTransform()] object. 5 | #' @param genelist An array of characters, including the names of the genes of 6 | #' interest of which the profile is to be plotted 7 | #' @param intgroup A factor, needs to be in the `colnames` of `colData(se)` 8 | #' @param plotZ Logical, whether to plot the scaled expression values. Defaults to 9 | #' `FALSE` 10 | #' 11 | #' @return A plot of the expression profile for the genes 12 | #' @export 13 | #' 14 | #' @examples 15 | #' dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1) 16 | #' rlt <- DESeq2::rlogTransformation(dds) 17 | #' geneprofiler(rlt, paste0("gene", sample(1:1000, 20))) 18 | #' geneprofiler(rlt, paste0("gene", sample(1:1000, 20)), plotZ = TRUE) 19 | geneprofiler <- function(se, genelist = NULL, intgroup = "condition", plotZ = FALSE) { 20 | if (is.null(genelist)) 21 | stop("Provide at least one gene to the genelist parameter") 22 | # check that at least one gene is found 23 | genelist <- unique(genelist) 24 | message("You provided ", length(genelist), " unique identifiers") 25 | inthedata <- genelist %in% rownames(se) 26 | if (sum(inthedata) == 0) 27 | stop("None of the provided genes were found in the experiment data") 28 | message(sum(inthedata), " out of ", length(genelist), " provided genes were found in the data") 29 | 30 | mydata <- as.data.frame(t(assay(se)[genelist, ])) 31 | 32 | # resort the order of the rows according to the groups that are selected 33 | mygroups <- interaction(as.data.frame(colData(se)[intgroup])) 34 | mydata <- mydata[order(mygroups), ] 35 | 36 | if (plotZ) { 37 | # remove 0 variance genes 38 | rv <- rowVars(t(mydata)) 39 | mydata <- mydata[, rv > 0] 40 | 41 | mydata <- scale(mydata, center = TRUE, scale=TRUE) 42 | # was... 43 | # mydata <- NMF:::scale_mat(mydata,"col") 44 | } 45 | mylabels <- colnames(se)[order(mygroups)] 46 | mycols <- scales::hue_pal()(length(levels(mygroups)))[sort(mygroups)] 47 | 48 | par(mar=c(7.1, 4.1, 2.1, 2.1)) 49 | plot(mydata[, 1], type = "l", xaxt = "n", las = 2, ylim = range(mydata), xlab = "", ylab = ifelse(plotZ, "scaled expression value", "expression value")) 50 | Map(function(x, y, z) 51 | axis(1, at = x, col.axis = y, labels = z, lwd = 0, las = 2), 52 | 1:nrow(mydata), 53 | mycols, 54 | mylabels 55 | ) 56 | axis(1, at = 1:nrow(mydata), labels = FALSE) 57 | 58 | for (i in 2:(ncol(mydata) - 1)){ 59 | lines(mydata[, i], type = "l", xaxt = "n", las = 2, col = i) 60 | } 61 | ## TODO: if desired, plot only the avg pro group -> maybe as boxplot? 62 | } 63 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | #' @importFrom shiny addResourcePath 2 | 3 | .onLoad <- function(libname, pkgname) { 4 | # Create link to logo 5 | # shiny::addResourcePath("pcaExplorer", system.file("www", package="pcaExplorer")) 6 | 7 | shiny::addResourcePath("sbs", system.file("www", package = "shinyBS")) 8 | } 9 | 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # pcaExplorer - Interactive exploration of Principal Components of Samples and Genes in RNA-seq data 4 | 5 | 6 | 7 | 8 | ## Software status 9 | 10 | [![R build status](https://github.com/federicomarini/pcaExplorer/workflows/R-CMD-check/badge.svg)](https://github.com/federicomarini/pcaExplorer/actions) 11 | 12 | | Platforms | OS | R CMD check | 13 | |:----------------:|:----------------:|:----------------:| 14 | | Bioc ([_devel_](http://bioconductor.org/packages/devel/bioc/html/pcaExplorer.html)) | Multiple | [![Bioconductor-devel Build Status](http://bioconductor.org/shields/build/devel/bioc/pcaExplorer.svg)](http://bioconductor.org/checkResults/devel/bioc-LATEST/pcaExplorer) | 15 | | Bioc ([_release_](http://bioconductor.org/packages/release/bioc/html/pcaExplorer.html)) | Multiple | [![Bioconductor-release Build Status](http://bioconductor.org/shields/build/release/bioc/pcaExplorer.svg)](http://bioconductor.org/checkResults/release/bioc-LATEST/pcaExplorer) | 16 | 17 | [![codecov.io](https://codecov.io/github/federicomarini/pcaExplorer/coverage.svg?branch=master)](https://codecov.io/github/federicomarini/pcaExplorer?branch=master) 18 | 19 | `pcaExplorer` is a Bioconductor package containing a Shiny application for 20 | analyzing expression data in different conditions and experimental factors. 21 | 22 | It is a general-purpose interactive companion tool for RNA-seq analysis, which 23 | guides the user in exploring the Principal Components of the data under inspection. 24 | 25 | `pcaExplorer` provides tools and functionality to detect outlier samples, genes 26 | that show particular patterns, and additionally provides a functional interpretation of 27 | the principal components for further quality assessment and hypothesis generation 28 | on the input data. 29 | 30 | Moreover, a novel visualization approach is presented to simultaneously assess 31 | the effect of more than one experimental factor on the expression levels. 32 | 33 | Thanks to its interactive/reactive design, it is designed to become a practical 34 | companion to any RNA-seq dataset analysis, making exploratory data analysis 35 | accessible also to the bench biologist, while providing additional insight also 36 | for the experienced data analyst. 37 | 38 | ## Installation 39 | 40 | `pcaExplorer` can be easily installed using `BiocManager::install()`: 41 | 42 | ``` r 43 | if (!requireNamespace("BiocManager", quietly=TRUE)) 44 | install.packages("BiocManager") 45 | BiocManager::install("pcaExplorer") 46 | ``` 47 | 48 | or, optionally, 49 | 50 | ``` r 51 | BiocManager::install("federicomarini/pcaExplorer") 52 | # or alternatively... 53 | devtools::install_github("federicomarini/pcaExplorer") 54 | ``` 55 | 56 | ## Quick start 57 | 58 | This command loads the `pcaExplorer` package 59 | 60 | ``` r 61 | library("pcaExplorer") 62 | ``` 63 | 64 | The `pcaExplorer` app can be launched in different modes: 65 | 66 | - `pcaExplorer(dds = dds, dst = dst)`, where `dds` is a `DESeqDataSet` object and `dst` is a `DESeqTransform` 67 | object, which were created during an existing session for the analysis of an RNA-seq 68 | dataset with the `DESeq2` package 69 | 70 | - `pcaExplorer(dds = dds)`, where `dds` is a `DESeqDataSet` object. The `dst` object is automatically 71 | computed upon launch. 72 | 73 | - `pcaExplorer(countmatrix = countmatrix, coldata = coldata)`, where `countmatrix` is a count matrix, generated 74 | after assigning reads to features such as genes via tools such as `HTSeq-count` or `featureCounts`, and `coldata` 75 | is a data frame containing the experimental covariates of the experiments, such as condition, tissue, cell line, 76 | run batch and so on. 77 | 78 | - `pcaExplorer()`, and then subsequently uploading the count matrix and the covariates data frame through the 79 | user interface. These files need to be formatted as tab separated files, which is a common format for storing 80 | such count values. 81 | 82 | Additional parameters and objects that can be provided to the main `pcaExplorer` function are: 83 | 84 | - `pca2go`, which is an object created by the `pca2go` function, which scans the genes with high loadings in 85 | each principal component and each direction, and looks for functions (such as GO Biological Processes) that 86 | are enriched above the background. The offline `pca2go` function is based on the routines and algorithms of 87 | the `topGO` package, but as an alternative, this object can be computed live during the execution of the app 88 | exploiting the `goana` function, provided by the `limma` package. Although this likely provides more general 89 | (and probably less informative) functions, it is a good compromise for obtaining a further data interpretation. 90 | 91 | - `annotation`, a data frame object, with `row.names` as gene identifiers (e.g. ENSEMBL ids) identical to the 92 | row names of the count matrix or `dds` object, and an extra column `gene_name`, containing e.g. HGNC-based 93 | gene symbols. This can be used for making information extraction easier, as ENSEMBL ids (a usual choice when 94 | assigning reads to features) do not provide an immediate readout for which gene they refer to. This can be 95 | either passed as a parameter when launching the app, or also uploaded as a tab separated text file. 96 | 97 | ## Contact 98 | 99 | For additional details regarding the functions of **pcaExplorer**, please consult the documentation or 100 | write an email to marinif@uni-mainz.de. 101 | 102 | ## Code of Conduct 103 | 104 | Please note that the pcaExplorer project is released with a [Contributor Code of Conduct](https://contributor-covenant.org/version/2/0/CODE_OF_CONDUCT.html). By contributing to this project, you agree to abide by its terms. 105 | 106 | ### Bug reports/Issues/New features 107 | 108 | Please use https://github.com/federicomarini/pcaExplorer/issues for reporting bugs, issues or for 109 | suggesting new features to be implemented. 110 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | authors: 2 | Federico Marini: 3 | href: https://federicomarini.github.io 4 | 5 | template: 6 | params: 7 | bootswatch: cosmo 8 | 9 | navbar: 10 | structure: 11 | left: 12 | - home 13 | - intro 14 | - reference 15 | - articles 16 | - tutorials 17 | - news 18 | right: github 19 | components: 20 | home: 21 | icon: fa-home fa-lg 22 | href: index.html 23 | reference: 24 | text: Reference 25 | href: reference/index.html 26 | intro: 27 | text: User guide 28 | href: articles/pcaExplorer.html 29 | github: 30 | icon: fa-github fa-lg 31 | href: https://github.com/federicomarini/pcaExplorer 32 | 33 | reference: 34 | - title: The pcaExplorer package 35 | desc: Main functions and info on the `pcaExplorer` package 36 | contents: 37 | - '`pcaExplorer-pkg`' 38 | - '`pcaExplorer`' 39 | - title: Working with Principal Components 40 | desc: Functions for working efficiently with Principal Components 41 | contents: 42 | - '`pcaplot`' 43 | - '`pcaplot3d`' 44 | - '`pcascree`' 45 | - '`correlatePCs`' 46 | - '`genespca`' 47 | - '`hi_loadings`' 48 | - '`plotPCcorrs`' 49 | - title: Annotation and Functional interpretation 50 | desc: Functions for annotating and performing functional interpretation 51 | contents: 52 | - '`get_annotation`' 53 | - '`get_annotation_orgdb`' 54 | - '`topGOtable`' 55 | - '`pca2go`' 56 | - '`limmaquickpca2go`' 57 | - title: Expression data exploration 58 | desc: ~ 59 | contents: 60 | - '`pair_corr`' 61 | - '`distro_expr`' 62 | - '`geneprofiler`' 63 | - '`makeExampleDESeqDataSet_multifac`' 64 | - title: Deprecated functions in pcaExplorer 65 | desc: ~ 66 | contents: 67 | - '`deprecated`' 68 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | citHeader("Please cite the articles below for the 'pcaExplorer' software itself, or its usage in combined workflows with the 'ideal' or 'GeneTonic' software packages:") 2 | 3 | citEntry( 4 | entry = "Article", 5 | title = "pcaExplorer: an R/Bioconductor package for interacting with RNA-seq principal components", 6 | journal = "BMC Bioinformatics", 7 | author = personList(as.person("Federico Marini"), 8 | as.person("Harald Binder")), 9 | volume = "20", 10 | number = "1", 11 | pages = "331", 12 | year = "2019", 13 | month = "Jun", 14 | day = "13", 15 | doi = "10.1186/s12859-019-2879-1", 16 | url = "https://bioconductor.org/packages/pcaExplorer/", 17 | 18 | textVersion = 19 | paste("Federico Marini, Harald Binder (2019).", 20 | "pcaExplorer: an R/Bioconductor package for interacting with RNA-seq principal components.", 21 | "BMC Bioinformatics, 20 (1), 331,", 22 | ", .") 23 | ) 24 | 25 | citEntry( 26 | entry = "Article", 27 | title = "Interactive and Reproducible Workflows for Exploring and Modeling RNA-seq Data with pcaExplorer, ideal, and GeneTonic", 28 | journal = "Current Protocols", 29 | author = personList( 30 | as.person("Annekathrin Ludt"), 31 | as.person("Arsenij Ustjanzew"), 32 | as.person("Harald Binder"), 33 | as.person("Konstantin Strauch"), 34 | as.person("Federico Marini") 35 | ), 36 | volume = "2", 37 | number = "4", 38 | pages = "e411", 39 | year = "2022", 40 | month = "Apr", 41 | doi = "10.1002/cpz1.411", 42 | 43 | textVersion = 44 | paste("Annekathrin Ludt, Arsenij Ustjanzew, Harald Binder, Konstantin Strauch, Federico Marini (2022).", 45 | "Interactive and Reproducible Workflows for Exploring and Modeling RNA-seq Data with pcaExplorer, ideal, and GeneTonic.", 46 | "Current Protocols, 2 (4), e411,", 47 | ".") 48 | ) 49 | -------------------------------------------------------------------------------- /inst/extdata/about.md: -------------------------------------------------------------------------------- 1 | # About pcaExplorer 2 | 3 | `pcaExplorer` is a Bioconductor package containing a Shiny application for 4 | analyzing expression data in different conditions and experimental factors. 5 | 6 | `pcaExplorer` guides the user in exploring the Principal Components of the data, 7 | providing tools and functionality to detect outlier samples, genes that show 8 | particular patterns, and additionally provides a functional interpretation of 9 | the principal components for further quality assessment and hypothesis generation 10 | on the input data. 11 | 12 | Thanks to its interactive/reactive design, it is designed to become a practical 13 | companion to any RNA-seq dataset analysis, making exploratory data analysis 14 | accessible also to the bench biologist, while providing additional insight also 15 | for the experienced data analyst. 16 | 17 | Moreover, `pcaExplorer` supports reproducible research with state saving and automated 18 | report generation. 19 | 20 | `pcaExplorer` was developed in the Bioinformatics Division led by Harald Binder 21 | at the IMBEI (Institut für Medizinische Biometrie, Epidemiologie und Informatik) 22 | in the University Medical Center of the Johannes Gutenberg University Mainz. 23 | 24 | ## Developers 25 | 26 | Federico Marini 27 | 28 | ## Code 29 | 30 | All code for `pcaExplorer` is available on 31 | GitHub. 32 | 33 | # Citation info 34 | 35 | If you use `pcaExplorer` for your analysis, please cite it as here below: 36 | 37 | ```r 38 | citation("pcaExplorer") 39 | ``` 40 | 41 | ``` 42 | To cite package ‘pcaExplorer’ in publications use: 43 | 44 | Federico Marini (2018). pcaExplorer: Interactive Visualization of RNA-seq Data Using 45 | a Principal Components Approach. R package version 2.6.0. 46 | https://github.com/federicomarini/pcaExplorer 47 | 48 | A BibTeX entry for LaTeX users is 49 | 50 | @Manual{, 51 | title = {pcaExplorer: Interactive Visualization of RNA-seq Data Using a Principal Components Approach}, 52 | author = {Federico Marini}, 53 | year = {2018}, 54 | note = {R package version 2.6.0}, 55 | url = {https://github.com/federicomarini/pcaExplorer}, 56 | } 57 | ``` 58 | -------------------------------------------------------------------------------- /inst/extdata/datainput.md: -------------------------------------------------------------------------------- 1 | - `pcaExplorer` accepts only **text files, either comma, semicolon, or tab-separated**. This avoids proprietary formats such as Excel, which can also inadvertently convert your gene identifiers to dates or floating point numbers (SEPT2, see more [here](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-016-1044-7)) 2 | - by default, tab-separated values are expected, but you can change this for each input file with the related radio buttons 3 | - for **count data**: features are in the rows, samples are stored in the columns 4 | - for the **metadata**: each row stores the relevant experimental variables of each sample 5 | - for the **gene annotation**: each row relates to a feature (i.e. a gene), with its id in the row names, and at least a column called gene_name, containing a more readable format (e.g. HGNC gene symbols) 6 | - it is important to have **headers**, as they are used in constructing the `dds` and `dst` objects and checking their validity: for example, the column names of the count matrix have to be identical to the row names of the sample metadata. This small constraint guarantees a higher degree of robustness for all the subsequent steps 7 | - if your data in stored in Excel sheets, export them to csv format (specify the correct separator accordingly when loading the data - you can always open them in a text editor to check) 8 | - general reminder: if you launch `pcaExplorer` directly from the terminal/RStudio IDE, you can pre-compute the objects, this can speed up 9 | -------------------------------------------------------------------------------- /inst/extdata/instructions.md: -------------------------------------------------------------------------------- 1 | *This information is also contained in the `pcaExplorer` package vignette. For more 2 | information on the functions of the `pcaExplorer` package, please refer to the 3 | vignette and/or the documentation.* 4 | 5 | ## Getting started 6 | 7 | `pcaExplorer` is an R package distributed as part of the [Bioconductor](http://bioconductor.org) 8 | project. To install the package, start R and enter: 9 | 10 | ```r 11 | if (!requireNamespace("BiocManager", quietly=TRUE)) 12 | install.packages("BiocManager") 13 | BiocManager::install("pcaExplorer") 14 | ``` 15 | 16 | If you prefer, you can install and use the development version, which can be 17 | retrieved via Github (https://github.com/federicomarini/pcaExplorer). To do so, use 18 | 19 | ```r 20 | library("devtools") 21 | install_github("federicomarini/pcaExplorer") 22 | ``` 23 | 24 | Once `pcaExplorer` is installed, it can be loaded by the following command. 25 | 26 | ```r 27 | library("pcaExplorer") 28 | ``` 29 | 30 | ## Introduction 31 | 32 | `pcaExplorer` is a Bioconductor package containing a Shiny application for 33 | analyzing expression data in different conditions and experimental factors. 34 | 35 | It is a general-purpose interactive companion tool for RNA-seq analysis, which 36 | guides the user in exploring the Principal Components of the data under inspection. 37 | 38 | `pcaExplorer` provides tools and functionality to detect outlier samples, genes 39 | that show particular patterns, and additionally provides a functional interpretation of 40 | the principal components for further quality assessment and hypothesis generation 41 | on the input data. 42 | 43 | Moreover, a novel visualization approach is presented to simultaneously assess 44 | the effect of more than one experimental factor on the expression levels. 45 | 46 | Thanks to its interactive/reactive design, it is designed to become a practical 47 | companion to any RNA-seq dataset analysis, making exploratory data analysis 48 | accessible also to the bench biologist, while providing additional insight also 49 | for the experienced data analyst. 50 | 51 | Starting from development version 1.1.3, `pcaExplorer` supports reproducible 52 | research with state saving and automated report generation. 53 | 54 | ## Citation info 55 | 56 | If you use `pcaExplorer` for your analysis, please cite it as here below: 57 | 58 | ```r 59 | citation("pcaExplorer") 60 | ``` 61 | 62 | ``` 63 | ## 64 | ## To cite package 'pcaExplorer' in publications use: 65 | ## 66 | ## Federico Marini (2016). pcaExplorer: Interactive Visualization 67 | ## of RNA-seq Data Using a Principal Components Approach. R package 68 | ## version 1.1.3. https://github.com/federicomarini/pcaExplorer 69 | ## 70 | ## A BibTeX entry for LaTeX users is 71 | ## 72 | ## @Manual{, 73 | ## title = {pcaExplorer: Interactive Visualization of RNA-seq Data Using a Principal Components Approach}, 74 | ## author = {Federico Marini}, 75 | ## year = {2016}, 76 | ## note = {R package version 1.1.3}, 77 | ## url = {https://github.com/federicomarini/pcaExplorer}, 78 | ## } 79 | ``` 80 | 81 | ## Launching the application 82 | 83 | After loading the package, the `pcaExplorer` app can be launched in different modes: 84 | 85 | - `pcaExplorer(dds = dds, rlt = rlt)`, where `dds` is a `DESeqDataSet` object and `rlt` is a `DESeqTransform` 86 | object, which were created during an existing session for the analysis of an RNA-seq 87 | dataset with the `DESeq2` package 88 | 89 | - `pcaExplorer(dds = dds)`, where `dds` is a `DESeqDataSet` object. The `rlt` object is automatically 90 | computed upon launch. 91 | 92 | - `pcaExplorer(countmatrix = countmatrix, coldata = coldata)`, where `countmatrix` is a count matrix, generated 93 | after assigning reads to features such as genes via tools such as `HTSeq-count` or `featureCounts`, and `coldata` 94 | is a data frame containing the experimental covariates of the experiments, such as condition, tissue, cell line, 95 | run batch and so on. 96 | 97 | - `pcaExplorer()`, and then subsequently uploading the count matrix and the covariates data frame through the 98 | user interface. These files need to be formatted as tab separated files, which is a common format for storing 99 | such count values. 100 | 101 | Additional parameters and objects that can be provided to the main `pcaExplorer` function are: 102 | 103 | - `pca2go`, which is an object created by the `pca2go` function, which scans the genes with high loadings in 104 | each principal component and each direction, and looks for functions (such as GO Biological Processes) that 105 | are enriched above the background. The offline `pca2go` function is based on the routines and algorithms of 106 | the `topGO` package, but as an alternative, this object can be computed live during the execution of the app 107 | exploiting the `goana` function, provided by the `limma` package. Although this likely provides more general 108 | (and probably less informative) functions, it is a good compromise for obtaining a further data interpretation. 109 | 110 | - `annotation`, a data frame object, with `row.names` as gene identifiers (e.g. ENSEMBL ids) identical to the 111 | row names of the count matrix or `dds` object, and an extra column `gene_name`, containing e.g. HGNC-based 112 | gene symbols. This can be used for making information extraction easier, as ENSEMBL ids (a usual choice when 113 | assigning reads to features) do not provide an immediate readout for which gene they refer to. This can be 114 | either passed as a parameter when launching the app, or also uploaded as a tab separated text file. The package 115 | provides two functions, `get_annotation` and `get_annotation_orgdb`, as a convenient wrapper to obtain the updated 116 | annotation information, respectively from `biomaRt` or via the `org.XX.eg.db` packages. 117 | 118 | ## The controls sidebar 119 | 120 | Most of the input controls are located in the sidebar, some are as well in the individual tabs of the app. 121 | By changing one or more of the input parameters, the user can get a fine control on what is displayed. 122 | 123 | ### App settings 124 | 125 | Here are the parameters that set input values for most of the tabs. By hovering over with the mouse, 126 | the user can receive additional information on how to set the parameter, powered by the `shinyBS` package. 127 | 128 | - **x-axis PC** - Select the principal component to display on the x axis 129 | - **y-axis PC** - Select the principal component to display on the y axis 130 | - **Group/color by** - Select the group of samples to stratify the analysis. Can also assume multiple values. 131 | - **Nr of (most variable) genes** - Number of genes to select for computing the principal components. The top n genes are 132 | selected ranked by their variance inter-samples 133 | - **Alpha** - Color transparency for the plots. Can assume values from 0 (transparent) to 1 (opaque) 134 | - **Labels size** - Size of the labels for the samples in the principal components plots 135 | - **Points size** - Size of the points to be plotted in the principal components plots 136 | - **Variable name size** - Size of the labels for the genes PCA - correspond to the samples names 137 | - **Scaling factor** - Scale value for resizing the arrow corresponding to the variables in the PCA for the genes. It 138 | should be used for mere visualization purposes 139 | - **Color palette** - Select the color palette to be used in the principal components plots. The number of colors 140 | is selected automatically according to the number of samples and to the levels of the factors of interest 141 | and their interactions 142 | - **Plot style for gene counts** - Plot either boxplots or violin plots, with jittered points superimposed 143 | 144 | ### Plot export settings 145 | 146 | Width and height for the figures to export are input here in cm. 147 | 148 | Additional controls available in the single tabs are also assisted by tooltips that show on hovering the mouse. 149 | Normally they are tightly related to the plot/output they are placed nearby. 150 | 151 | ## The task menu 152 | 153 | The task menu, accessible by clicking on the cog icon in the upper right part of the application, provides two 154 | functionalities: 155 | 156 | - `Exit pcaExplorer & save` will close the application and store the content of the `input` and `values` reactive 157 | objects in two list objects made available in the global environment, called `pcaExplorer_inputs_YYYYMMDD_HHMMSS` and 158 | `pcaExplorer_values_YYYYMMDD_HHMMSS` 159 | - `Save State as .RData` will similarly store `LiveInputs` and `r_data` in a binary file named 160 | `pcaExplorerState_YYYYMMDD_HHMMSS.Rdata`, without closing the application 161 | 162 | ## The app panels 163 | 164 | The `pcaExplorer` app is structured in different panels, each focused on a different aspect of the 165 | data exploration. 166 | 167 | Most of the panels work extensively with click-based and brush-based interactions, to gain additional 168 | depth in the explorations, for example by zooming, subsetting, selecting. This is possible thanks to the 169 | recent developments in the `shiny` package/framework. 170 | 171 | The available panels are the described in the following subsections. 172 | 173 | ### Data Upload 174 | 175 | These **file input** controls are available when no `dds` or `countmatrix` + `coldata` are provided. Additionally, 176 | it is possible to upload the `annotation` data frame. 177 | 178 | When the objects are already passed as parameters, a brief overview/summary for them is displayed. 179 | 180 | ### Instructions 181 | 182 | This is where you most likely are reading this text (otherwise in the package vignette). 183 | 184 | ### Counts Table 185 | 186 | Interactive tables for the raw, normalized or (r)log-transformed counts are shown in this tab. 187 | The user can also generate a sample-to-sample correlation scatter plot with the selected data. 188 | 189 | ### Data Overview 190 | 191 | This panel displays information on the objects in use, either passed as parameters or 192 | generated from the count matrix provided. Displayed information comprise the design metadata, 193 | a sample to sample distance heatmap, the number of million of reads per sample and some 194 | basic summary for the counts. 195 | 196 | ### Samples View 197 | 198 | This panel displays the PCA projections of sample expression profiles onto any pair of components, 199 | a scree plot, a zoomed PCA plot, a plot of the genes with top and bottom loadings. Additionally, this section 200 | presents a PCA plot where it is possible to remove samples deemed to be outliers in the analysis, which is 201 | very useful to check the effect of excluding them. If needed, an interactive 3D visualization of the principal 202 | components is also available. 203 | 204 | ### Genes View 205 | 206 | This panel displays the PCA projections of genes abundances onto any pair of components, with samples 207 | as biplot variables, to identify interesting groups of genes. Zooming is also possible, and clicking on single 208 | genes, a boxplot is returned, grouped by the factors of interest. A static and an interactive heatmap are 209 | provided, including the subset of selected genes, also displayed as (standardized) expression profiles across the 210 | samples. These are also reported in `datatable` objects, accessible in the bottom part of the tab. 211 | 212 | ### GeneFinder 213 | 214 | The user can search and display the expression values of a gene of interest, either by ID or gene 215 | name, as provided in the `annotation`. A handy panel for quick screening of shortlisted genes, again grouped by 216 | the factors of interest. The graphic can be readily exported as it is, and this can be iterated on a shortlisted 217 | set of genes. For each of them, the underlying data is displayed in an interactive table, also exportable with a 218 | click. 219 | 220 | ### PCA2GO 221 | 222 | This panel shows the functional annotation of the principal components, with GO functions enriched in the 223 | genes with high loadings on the selected principal components. It allows for the live computing of the object, 224 | that can otherwise provided as a parameter when launching the app. The panel displays a PCA plot for the 225 | samples, surrounded on each side by the tables with the functions enriched in each component and direction. 226 | 227 | ### Multifactor Exploration 228 | 229 | This panel allows for the multifactor exploration of datasets with 2 or more experimental factors. The user has to select 230 | first the two factors and the levels for each. Then, it is possible to combine samples from Factor1-Level1 in the selected 231 | order by clicking on each sample name, one for each level available in the selected Factor2. In order to build the matrix, 232 | an equal number of samples for each level of Factor 1 is required, to keep the design somehow balanced. 233 | A typical case for choosing factors 1 and 2 is for example when different conditions and tissues are present. 234 | 235 | Once constructed, a plot is returned that tries to represent simultaneously the effect of the two factors on the data. 236 | Each gene is represented by a dot-line-dot structure, with the color that is indicating the tissue (factor 2) where the gene 237 | is mostly expressed. Each gene has two dots, one for each condition level (factor 1), and the position of the points is dictated 238 | by the scores of the principal components calculated on the matrix object. The line connecting the dots is darker when the 239 | tissue where the gene is mostly expressed varies throughout the conditions. 240 | 241 | This representation is under active development, and it is promising for identifying interesting sets or clusters of genes 242 | according to their behavior on the Principal Components subspaces. Zooming and exporting of the underlying genes is also 243 | allowed by brushing on the main plot. 244 | 245 | ### Report Editor 246 | 247 | The report editor is the backbone for generating and editing the interactive report on the basis of the 248 | uploaded data and the current state of the application. General `Markdown options` and `Editor options` 249 | are available, and the text editor, based on the `shinyAce` package, contains a comprehensive template 250 | report, that can be edited to the best convenience of the user. 251 | 252 | The editor supports R code autocompletion, making it easy to add new code chunks for additional sections. 253 | A preview is available in the tab itself, and the report can be generated, saved and subsequently shared 254 | with simple mouse clicks. 255 | 256 | ### About 257 | 258 | Contains general information on `pcaExplorer`, including the developer's contact, the link to 259 | the development version in Github, as well as the output of `sessionInfo`, to use for reproducibility sake - 260 | or bug reporting. Information for citing `pcaExplorer` is also reported. 261 | 262 | ## Running `pcaExplorer` on published datasets 263 | 264 | We can run `pcaExplorer` for demonstration purpose on published datasets that are available as SummarizedExperiment 265 | in an experiment Bioconductor packages. 266 | 267 | We will use the `airway` dataset, which can be installed with this command 268 | 269 | ``` 270 | if (!requireNamespace("BiocManager", quietly=TRUE)) 271 | install.packages("BiocManager") 272 | BiocManager::install("airway") 273 | ``` 274 | 275 | This package provides a RangedSummarizedExperiment object of read counts in genes for an RNA-Seq experiment 276 | on four human airway smooth muscle cell lines treated with dexamethasone. More details such as gene models and 277 | count quantifications can be found in the `airway` package vignette. 278 | 279 | To run `pcaExplorer` on this dataset, the following commands are required 280 | 281 | ``` 282 | library("airway"") 283 | 284 | data("airway", package = "airway") 285 | 286 | dds_airway <- DESeqDataSet(airway,design=~dex+cell) 287 | dds_airway 288 | rld_airway <- rlogTransformation(dds_airway) 289 | rld_airway 290 | pcaExplorer(dds = dds_airway, 291 | rlt = rld_airway) 292 | ``` 293 | The `annotation` for this dataset can be built by exploiting the `org.Hs.eg.db` package 294 | 295 | ``` 296 | library("org.Hs.eg.db") 297 | genenames_airway <- mapIds(org.Hs.eg.db,keys = rownames(dds_airway),column = "SYMBOL",keytype="ENSEMBL") 298 | annotation_airway <- data.frame(gene_name = genenames_airway, 299 | row.names = rownames(dds_airway), 300 | stringsAsFactors = FALSE) 301 | head(annotation_airway) 302 | ``` 303 | 304 | or alternatively, by using the `get_annotation` or `get_annotation_orgdb` wrappers. 305 | 306 | ``` 307 | anno_df_orgdb <- get_annotation_orgdb(dds = dds_airway, 308 | orgdb_species = "org.Hs.eg.db", 309 | idtype = "ENSEMBL") 310 | 311 | anno_df_biomart <- get_annotation(dds = dds_airway, 312 | biomart_dataset = "hsapiens_gene_ensembl", 313 | idtype = "ensembl_gene_id") 314 | ``` 315 | 316 | Then again, the app can be launched with 317 | 318 | ``` 319 | pcaExplorer(dds = dds_airway, 320 | rlt = rld_airway, 321 | annotation = annotation_airway) 322 | ``` 323 | 324 | If desired, alternatives can be used. See the well written annotation workflow available at the Bioconductor site (https://bioconductor.org/help/workflows/annotation/annotation/). 325 | 326 | ## Running `pcaExplorer` on synthetic datasets 327 | 328 | For testing and demonstration purposes, a function is also available to generate synthetic datasets whose counts 329 | are generated based on two or more experimental factors. 330 | 331 | This can be called with the command 332 | 333 | ``` 334 | dds_multifac <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3,betaSD_tissue = 1) 335 | ``` 336 | 337 | See all the available parameters by typing `?makeExampleDESeqDataSet_multifac`. Credits are given to the initial 338 | implementation by Mike Love in the `DESeq2` package. 339 | 340 | The following steps run the app with the synthetic dataset 341 | 342 | ``` 343 | dds_multifac <- makeExampleDESeqDataSet_multifac(betaSD_condition = 1,betaSD_tissue = 3) 344 | dds_multifac 345 | rld_multifac <- rlogTransformation(dds_multifac) 346 | rld_multifac 347 | ## checking how the samples cluster on the PCA plot 348 | pcaplot(rld_multifac,intgroup = c("condition","tissue")) 349 | ``` 350 | 351 | Launch the app for exploring this dataset with 352 | 353 | ``` 354 | pcaExplorer(dds = dds_multifac, 355 | rlt = rld_multifac) 356 | ``` 357 | 358 | When such a dataset is provided, the panel for multifactorial exploration is also usable at its best. 359 | 360 | ## Functions exported by the package for standalone usage 361 | 362 | The functions exported by the `pcaExplorer` package can be also used in a standalone scenario, 363 | provided the required objects are in the working environment. They are listed here for an overview, 364 | but please refer to the documentation for additional details. 365 | 366 | - `pcaplot` plots the sample PCA for `DESeqTransform` objects, such as rlog-transformed data. This is 367 | the workhorse of the Samples View tab 368 | - `pcaplot3d` - same as for `pcaplot`, but it uses the `threejs` package for the 3d interactive view. 369 | - `pcascree` produces a scree plot of the PC computed on the samples. A `prcomp` object needs to be 370 | passed as main argument 371 | - `correlatePCs` and `plotPCcorrs` respectively compute and plot significance of the (cor)relation 372 | of each covariate versus a principal component. The input for `correlatePCs` is a `prcomp` object 373 | - `hi_loadings` extracts and optionally plots the genes with the highest loadings 374 | - `genespca` computes and plots the principal components of the genes, eventually displaying 375 | the samples as in a typical biplot visualization. This is the function in action for the Genes View tab 376 | - `topGOtable` is a convenient wrapper for extracting functional GO terms enriched in a subset of genes 377 | (such as the differentially expressed genes), based on the algorithm and the implementation in the topGO package 378 | - `pca2go` provides a functional interpretation of the principal components, by extracting the genes 379 | with the highest loadings for each PC, and then runs internally `topGOtable` on them for efficient functional 380 | enrichment analysis. Needs a `DESeqTransform` object as main parameter 381 | - `limmaquickpca2go` is an alternative to `pca2go`, used in the live running app, thanks to its fast 382 | implementation based on the `limma::goana` function. 383 | - `makeExampleDESeqDataSet_multifac` constructs a simulated `DESeqDataSet` of Negative Binomial dataset 384 | from different conditions. The fold changes between the conditions can be adjusted with the `betaSD_condition` 385 | `betaSD_tissue` arguments 386 | - `distro_expr` plots the distribution of expression values, either with density lines, boxplots or 387 | violin plots 388 | - `geneprofiler` plots the profile expression of a subset of genes, optionally as standardized values 389 | - `get_annotation` and `get_annotation_orgdb` retrieve the latest annotations for the `dds` object, to be 390 | used in the call to the `pcaExplorer` function. They use respectively the `biomaRt` package 391 | and the `org.XX.eg.db` packages 392 | - `pair_corr` plots the pairwise scatter plots and computes the correlation coefficient on the 393 | expression matrix provided. 394 | 395 | For more information on the functions of the `pcaExplorer` package, please refer to the 396 | vignette and/or the documentation. 397 | 398 | ## Further development 399 | 400 | Additional functionality for the `pcaExplorer` will be added in the future, as it is tightly related to a topic 401 | under current development research. 402 | 403 | Improvements, suggestions, bugs, issues and feedback of any type can be sent to marinif@uni-mainz.de. 404 | -------------------------------------------------------------------------------- /inst/extdata/instructions_unr.md: -------------------------------------------------------------------------------- 1 | Setup 2 | ===== 3 | 4 | First things first: install 5 | *[pcaExplorer](https://bioconductor.org/packages/3.9/pcaExplorer)* and 6 | load it into your R session. You should receive a message notification 7 | if this is completed without errors. 8 | 9 | ``` r 10 | BiocManager::install("pcaExplorer") 11 | library("pcaExplorer") 12 | ``` 13 | 14 | This document describes a use case for 15 | *[pcaExplorer](https://bioconductor.org/packages/3.9/pcaExplorer)*, 16 | based on the dataset in the 17 | *[airway](https://bioconductor.org/packages/3.9/airway)* package. If 18 | this package is not available on your machine, please install it by 19 | executing: 20 | 21 | ``` r 22 | BiocManager::install("airway") 23 | ``` 24 | 25 | This dataset consists of the gene-level expression measurements (as raw 26 | read counts) for an experiment where four different human airway smooth 27 | muscle cell lines are either treated with dexamethasone or left 28 | untreated. 29 | 30 | Start exploring - the beauty of interactivity 31 | ============================================= 32 | 33 | To start the exploration, you just need the following lines: 34 | 35 | ``` r 36 | library("pcaExplorer") 37 | pcaExplorer() 38 | ``` 39 | 40 | The easiest way to explore the 41 | *[airway](https://bioconductor.org/packages/3.9/airway)* dataset is by 42 | clicking on the dedicated button in the **Data Upload** panel. This 43 | action will: 44 | 45 | - load the *[airway](https://bioconductor.org/packages/3.9/airway)* 46 | package 47 | - load the count matrix and the experimental metadata 48 | - compose the `dds` object, normalize the expression values (using the 49 | robust method proposed by Anders and Huber in the original DESeq 50 | manuscript), and compute the variance stabilizing transformed 51 | expression values (stored in the `dst` object) 52 | - retrieve the gene annotation information via the 53 | *[org.Hs.eg.db](https://bioconductor.org/packages/3.9/org.Hs.eg.db)*, 54 | adding gene symbols to the ENSEMBL ids - this step is optional, but 55 | recommended for more human-readable identifiers to be used. 56 | 57 | If you want to load your expression data, please refer to the [User 58 | Guide](https://bioconductor.org/packages/3.9/pcaExplorer/vignettes/pcaExplorer.html), 59 | which contains detailed information on the formats your data have to 60 | respect. 61 | 62 | Once the preprocessing of the input is done, you should get a 63 | notification in the lower right corner that you’re all set. The whole 64 | preprocessing should take around 5-6 seconds (tested on a MacBook Pro, 65 | with i7 and 16 Gb RAM). You can check how each component looks like by 66 | clicking on its respective button, once they appeared in the lower half 67 | of the panel. 68 | 69 | Overview of the Data Upload panel. After clicking on the 'Load the demo airway data' button, all widgets are automatically populated, and each data component (count matrix, experimental data, dds object, annotation) can be previewed in a modal window by clicking on its respective button. 70 |

71 | Overview of the Data Upload panel. After clicking on the ‘Load the demo 72 | airway data’ button, all widgets are automatically populated, and each 73 | data component (count matrix, experimental data, dds object, annotation) 74 | can be previewed in a modal window by clicking on its respective button. 75 |

76 | 77 | You can proceed to explore the expression values of your dataset in the 78 | **Counts Table** tab. You can change the data type you are displaying 79 | between raw counts, normalized, or transformed, and plot their values in 80 | a scatterplot matrix to explore their sample-to-sample correlations. To 81 | try this, select for example “Normalized counts”, change the correlation 82 | coefficient to “spearman”, and click on the `Run` action button. The 83 | correlation values will also be displayed as a heatmap. 84 | 85 | Screenshot of the sample to sample scatter plot matrix. The user can select the correlation method to use, the option to plot values on log2 scales, and the possibility to use a subset of genes (to obtain a quicker overview if many samples are provided). 86 |

87 | Screenshot of the sample to sample scatter plot matrix. The user can 88 | select the correlation method to use, the option to plot values on log2 89 | scales, and the possibility to use a subset of genes (to obtain a 90 | quicker overview if many samples are provided). 91 |

92 | 93 | Additional features, both for samples and for features, are displayed in 94 | the **Data overview** panel. A closer look at the metadata of the 95 | `airway` set highlights how each combination of cell type (`cell`) and 96 | dexamethasone treatment (`dex`) is represented by a single sequencing 97 | experiment. The 8 samples in the demo dataset are themselves a subsample 98 | of the [full GEO 99 | record](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE52778), 100 | namely the ones non treated with albuterol (`alb` column). 101 | 102 | The relationship among samples can be seen in the sample-to-sample 103 | heatmap. For example, by selecting the Manhattan distance metric, it is 104 | evident how the samples cluster by dex treatment, yet they show a 105 | dendrogram structure that recalls the 4 different cell types used. The 106 | total sum of counts per sample is displayed as a bar plot. 107 | 108 | Screenshot of the sample to sample heatmap. Selected is the Manhattan distance, but Euclidean and correlation-based distance are also provided as options. In this case, the user has also selected the dex and cell factors in the 'Group/color by' widget in the sidebar menu, and these covariates decorate the heatmap to facilitate identification of patterns. 109 |

110 | Screenshot of the sample to sample heatmap. Selected is the Manhattan 111 | distance, but Euclidean and correlation-based distance are also provided 112 | as options. In this case, the user has also selected the dex and cell 113 | factors in the ‘Group/color by’ widget in the sidebar menu, and these 114 | covariates decorate the heatmap to facilitate identification of 115 | patterns. 116 |

117 | 118 | Patterns can become clearer after selecting, in the **App settings** on 119 | the left, an experimental factor to group and color by: try selecting 120 | `dex`, for example. If more than one covariate is selected, the 121 | interaction between these will be taken as a grouping factor. To remove 122 | one, simply click on it to highlight and press the del or backspace key 123 | to delete it. Try doing so by also clicking on `cell`, and then removing 124 | `dex` afterwards. 125 | 126 | Basic summary information is also displayed for the genes. In the count 127 | matrix provided, one can check how many genes were detected, by 128 | selecting a “Threshold on the row sums of the counts” or on the row 129 | means of the normalized counts (more stringent). For example, selecting 130 | 5 in both cases, only 24345 genes have a total number of counts, summed 131 | by row, and 17745 genes have more than 5 counts (normalized) on average. 132 | 133 | Screenshot of the Basic Summary of the counts in the Data Overview panel. General information are provided, together with an overview on detected genes according to different filtering criteria. 134 |

135 | Screenshot of the Basic Summary of the counts in the Data Overview 136 | panel. General information are provided, together with an overview on 137 | detected genes according to different filtering criteria. 138 |

139 | 140 | The **Samples View** and the **Genes View** are the tabs where most 141 | results coming from Principal Component Analysis, either performed on 142 | the samples or on the genes, can be explored in depth. Assuming you 143 | selected `cell` in the “Group/color by” option on the left, the Samples 144 | PCA plot should clearly display how the cell type explain a considerable 145 | portion of the variability in the dataset (corresponding to the second 146 | PC). To check that `dex` treatment is the main source of variability, 147 | select that instead of `cell`. 148 | 149 | The Samples View panel. Displayed are a PCA plot (left) and the corresponding scree plot (right), with the samples colored and labeled by cell type - separating on the second principal component. 150 |

151 | The Samples View panel. Displayed are a PCA plot (left) and the 152 | corresponding scree plot (right), with the samples colored and labeled 153 | by cell type - separating on the second principal component. 154 |

155 | 156 | The scree plot on the right shows how many components should be retained 157 | for a satisfactory reduced dimension view of the original set, with 158 | their eigenvalues from largest to smallest. To explore the PCs other 159 | than the first and the second one, you can just select them in the 160 | x-axis PC and y-axis PC widgets in the left sidebar. 161 | 162 | PCA plot for the samples, colored by dexamethasone treatment. The dex factor is the main driver of the variability in the data, and samples separate nicely on the first principal component. 163 |

164 | PCA plot for the samples, colored by dexamethasone treatment. The dex 165 | factor is the main driver of the variability in the data, and samples 166 | separate nicely on the first principal component. 167 |

168 | 169 | If you brush (left-click and hold) on the PCA plot, you can display a 170 | zoomed version of it in the frame below. If you suspect some samples 171 | might be outliers (this is not the case in the `airway` set, still), you 172 | can select them in the dedicated plot, and give a first check on how the 173 | remainder of the samples would look like. On the right side, you can 174 | quickly check which genes show the top and bottom loadings, split by 175 | principal component. First, change the value in the input widget to 20; 176 | then, select one of each list and try to check them in the **Gene 177 | Finder** tab; try for example with *DUSP1*, *PER1*, and *DDX3Y*. 178 | 179 | Genes with highest loadings on the first and second principal components. The user can select how many top and bottom genes will be displayed, and the gene names are printed below each gene's contribution on each PC. 180 |

181 | Genes with highest loadings on the first and second principal 182 | components. The user can select how many top and bottom genes will be 183 | displayed, and the gene names are printed below each gene’s contribution 184 | on each PC. 185 |

186 | 187 | While *DUSP1* and *PER1* clearly show a change in expression upon 188 | dexamethasone treatment (and indeed where reported among the well known 189 | glucocorticoid-responsive genes in the original publication of Himes et 190 | al., 2014), *DDX3Y* displays variability at the cell type level (select 191 | `cell` in the Group/color by widget): this gene is almost undetected in 192 | N061011 cells, and this high variance is what determines its high 193 | loading on the second principal component. 194 | 195 | Plot of the gene expression levels of DUSP1. Points are split according to dex treatment, and both graphics and table are displayed. 196 |

197 | Plot of the gene expression levels of DUSP1. Points are split according 198 | to dex treatment, and both graphics and table are displayed. 199 |

200 | 201 | Plot of the gene expression levels of PER1. Points are split according to dex treatment. 202 |

203 | Plot of the gene expression levels of PER1. Points are split according 204 | to dex treatment. 205 |

206 | 207 | Plot of the gene expression levels of DDX3Y. Points are split according to cell type, as this gene was highly variable across this experimental factor - indeed, in one cell type it is barely detected. 208 |

209 | Plot of the gene expression levels of DDX3Y. Points are split according 210 | to cell type, as this gene was highly variable across this experimental 211 | factor - indeed, in one cell type it is barely detected. 212 |

213 | 214 | You can see the single expression values in a table as well, and this 215 | information can be downloaded with a simple click. 216 | 217 | Back to the **Samples View**, you can experiment with the number of top 218 | variable genes to see how the results of PCA are in this case robust to 219 | a wide range of this value - this might not be the case with other 220 | datasets, and the simplicity of interacting with these parameters makes 221 | it easy to iterate in the exploration steps. 222 | 223 | Proceeding to the **Genes View**, you can see the dual of the Samples 224 | PCA: now the samples are displayed as arrows in the genes biplot, which 225 | can show which genes display a similar behaviour. You can capture this 226 | with a simple brushing action on the plot, and notice how their profiles 227 | throughout all samples are shown in the Profile explorer below; 228 | moreover, a static and an interactive heatmap, together with a table 229 | containing the underlying data, are generated in the rows below. 230 | 231 | The Genes View panel. Upper panel: the genes biplot, and its zoomed plot, with gene names displayed. Lower panel: the profile explorer of the selected subset of genes (corresponding to the zoomed window), and the boxplot for the gene selected by clicking close to a location in the zoomed window. 232 |

233 | The Genes View panel. Upper panel: the genes biplot, and its zoomed 234 | plot, with gene names displayed. Lower panel: the profile explorer of 235 | the selected subset of genes (corresponding to the zoomed window), and 236 | the boxplot for the gene selected by clicking close to a location in the 237 | zoomed window. 238 |

239 | 240 | Since we compute the gene annotation table as well, it’s nice to read 241 | the gene symbols in the zoomed window (instead of the ENSEMBL ids). By 242 | clicking close enough to any of these genes, the expression values are 243 | plotted, in a similar fashion as in the **Gene Finder**. 244 | 245 | The tab **PCA2GO** helps you understanding which are the biological 246 | common themes (default: the Gene Ontology Biological Process terms) in 247 | the genes showing up in the top and in the bottom loadings for each 248 | principal component. Since we launched the `pcaExplorer` app without 249 | additional parameters, this information is not available, but can be 250 | computed live (this might take a while). 251 | 252 | The PCA2GO panel. Four tables (2 per dimension, here only 3 are displayed) decorate the PCA plot in the middle, and display the top enriched functional categories in each subset of gene with high loadings. 253 |

254 | The PCA2GO panel. Four tables (2 per dimension, here only 3 are 255 | displayed) decorate the PCA plot in the middle, and display the top 256 | enriched functional categories in each subset of gene with high 257 | loadings. 258 |

259 | 260 | Still, a previous call to `pca2go` is recommended, as it relies on the 261 | algorithm of the *[topGO](https://bioconductor.org/packages/3.9/topGO)* 262 | package: it will require some additional computing time, but it is 263 | likely to deliver more precise terms (i.e. in turn more relevant from 264 | the point of view of their biological relevance). To do so, you should 265 | exit the live session, compute this object, and provide it in the call 266 | to `pcaExplorer` (see more how to do so in [the main user 267 | guide](https://bioconductor.org/packages/3.9/pcaExplorer/vignettes/pcaExplorer.html)). 268 | 269 | When you’re done - the power of reproducibility 270 | =============================================== 271 | 272 | A typical session with `pcaExplorer` includes one or more iterations on 273 | each of these tabs. Once you are finished, you might want to store the 274 | results of your analysis in different formats. 275 | 276 | The pcaExplorer task menu. Buttons for saving the session to binary data or to a dedicated environment are displayed. 277 |

278 | The pcaExplorer task menu. Buttons for saving the session to binary data 279 | or to a dedicated environment are displayed. 280 |

281 | 282 | With `pcaExplorer` you can do all of the following: 283 | 284 | - save every plot and table by simply clicking on the respective 285 | button, below each element 286 | - save the state of the entire app and its reactive elements as a 287 | binary `.RData` file, as if it was a workspace (clicking on the cog 288 | icon in the right side of the task menu) 289 | - use the “Exit `pcaExplorer` and save” saves the state but in a 290 | specific environment of your R session, which you can later access 291 | by its name, which normally could look like 292 | `pcaExplorerState_YYYYMMDD_HHMMSS` (also accessible from the cog) 293 | - enjoy the beauty of reproducible research in the **Report Editor**: 294 | `pcaExplorer` comes with a template analysis, that picks the latest 295 | status of the app during your session, and combines these reactive 296 | values together in a R Markdown document, which you can first 297 | preview live in the app, and then download as standalone HTML file - 298 | to store or share. This document stiches together narrative text, 299 | code, and output objects, and constitutes a compendium where all 300 | actions are recorded. If you are familiar with R, you can edit that 301 | live, with support for autocompletion, in the “Edit report” tab. 302 | 303 | The Report Editor tab. The collapsible elements control general markdown and editor options, which are regarded when the report is compiled. Its content is specified in the Ace editor, integrated in the Shiny app. 304 |

305 | The Report Editor tab. The collapsible elements control general markdown 306 | and editor options, which are regarded when the report is compiled. Its 307 | content is specified in the Ace editor, integrated in the Shiny app. 308 |

309 | -------------------------------------------------------------------------------- /inst/extdata/reportTemplate.Rmd: -------------------------------------------------------------------------------- 1 | # About this report 2 | 3 | This content has been loaded from the template report `.Rmd` file. Please edit it at your best convenience! 4 | 5 | If you are viewing this report in the Preview, you might require the installation of the PhantomJS to render correctly some HTML widgets. 6 | This can be done by using the `r BiocStyle::CRANpkg("webshot")` package and calling `webshot::install_phantomjs()`. 7 | Alternatively, the more recent `r BiocStyle::CRANpkg("webshot2")` package uses the headless Chrome browser (via the `r BiocStyle::CRANpkg("chromote")` package, requiring Google Chrome or other Chromium-based browser). 8 | 9 | ```{r setup, include=FALSE, eval = TRUE, echo = FALSE} 10 | opts_chunk$set( 11 | echo=input$report_echo, 12 | error=TRUE 13 | ) 14 | ``` 15 | 16 | # Overview on the data 17 | 18 | The data provided was used to construct the following objects 19 | 20 | ```{r} 21 | values$mydds 22 | 23 | values$mydst 24 | 25 | values$transformation_type 26 | 27 | head(values$myannotation) 28 | ``` 29 | 30 | The following design were used: 31 | 32 | ```{r} 33 | DT::datatable(as.data.frame(colData(values$mydds))) 34 | ``` 35 | 36 | An overview of the table for the features is shown here, by displaying the `r input$countstable_unit` 37 | 38 | ```{r} 39 | if(input$countstable_unit=="raw_counts") 40 | currentMat <- counts(values$mydds,normalized=FALSE) 41 | if(input$countstable_unit=="normalized_counts") 42 | currentMat <- counts(values$mydds,normalized=TRUE) 43 | if(input$countstable_unit=="rlog_counts") 44 | currentMat <- assay(values$mydst) 45 | if(input$countstable_unit=="log10_counts") 46 | currentMat <- log10(1 + counts(values$mydds,normalized=TRUE)) 47 | ``` 48 | 49 | ```{r, warning=FALSE} 50 | DT::datatable(currentMat) 51 | ``` 52 | 53 | This is how the samples cluster if we use euclidean distance on the rlog transformed values 54 | 55 | ```{r} 56 | if (!is.null(input$color_by)){ 57 | expgroups <- as.data.frame(colData(values$mydst)[,input$color_by]) 58 | # expgroups <- interaction(expgroups) 59 | rownames(expgroups) <- colnames(values$mydst) 60 | colnames(expgroups) <- input$color_by 61 | 62 | pheatmap(as.matrix(dist(t(assay(values$mydst)))),annotation_col = expgroups) 63 | } else { 64 | pheatmap(as.matrix(dist(t(assay(values$mydst))))) 65 | } 66 | 67 | ``` 68 | 69 | This is an overview of the number of available reads in each sample (normally these are only uniquely aligned reads) 70 | 71 | ```{r} 72 | rr <- colSums(counts(values$mydds))/1e6 73 | if(is.null(names(rr))) 74 | names(rr) <- paste0("sample_",1:length(rr)) 75 | rrdf <- data.frame(Reads=rr,Sample=names(rr),stringsAsFactors = FALSE) 76 | if (!is.null(input$color_by)) { 77 | selGroups <- as.data.frame(colData(values$mydds)[input$color_by]) 78 | rrdf$Group <- interaction(selGroups) 79 | p <- ggplot(rrdf,aes_string("Sample",weight="Reads")) + geom_bar(aes_string(fill="Group")) + theme_bw() 80 | p 81 | } else { 82 | p <- ggplot(rrdf,aes_string("Sample",weight="Reads")) + geom_bar() + theme_bw() 83 | p 84 | } 85 | 86 | print(colSums(counts(values$mydds))) 87 | summary(colSums(counts(values$mydds))/1e6) 88 | ``` 89 | 90 | This is a quick info on the number of detected genes 91 | 92 | ```{r} 93 | t1 <- rowSums(counts(values$mydds)) 94 | t2 <- rowMeans(counts(values$mydds,normalized=TRUE)) 95 | 96 | thresh_rowsums <- input$threshold_rowsums 97 | thresh_rowmeans <- input$threshold_rowmeans 98 | abs_t1 <- sum(t1 > thresh_rowsums) 99 | rel_t1 <- 100 * mean(t1 > thresh_rowsums) 100 | abs_t2 <- sum(t2 > thresh_rowmeans) 101 | rel_t2 <- 100 * mean(t2 > thresh_rowmeans) 102 | 103 | cat("Number of detected genes:\n") 104 | # TODO: parametrize the thresholds 105 | cat(abs_t1,"genes have at least a sample with more than",thresh_rowsums,"counts\n") 106 | cat(paste0(round(rel_t1,3),"%"), "of the",nrow(values$mydds),"genes have at least a sample with more than",thresh_rowsums,"counts\n") 107 | cat(abs_t2,"genes have more than",thresh_rowmeans,"counts (normalized) on average\n") 108 | cat(paste0(round(rel_t2,3),"%"), "of the",nrow(values$mydds),"genes have more than",thresh_rowsums,"counts (normalized) on average\n") 109 | cat("Counts are ranging from", min(counts(values$mydds)),"to",max(counts(values$mydds))) 110 | ``` 111 | 112 | # PCA on the samples 113 | 114 | This plot shows how the samples are related to each other by plotting PC `r input$pc_x` vs PC `r input$pc_y`, using the top `r input$pca_nrgenes` most variable genes 115 | 116 | ```{r} 117 | res <- pcaplot(values$mydst,intgroup = input$color_by,ntop = input$pca_nrgenes, 118 | pcX = as.integer(input$pc_x),pcY = as.integer(input$pc_y), 119 | text_labels = input$sample_labels, 120 | point_size = input$pca_point_size, title="Samples PCA - zoom in", 121 | ellipse = input$pca_ellipse, ellipse.prob = input$pca_cislider 122 | ) 123 | res <- res + theme_bw() 124 | res 125 | ``` 126 | 127 | The scree plot helps determining the number of underlying principal components 128 | 129 | ```{r} 130 | rv <- rowVars(assay(values$mydst)) 131 | select <- order(rv, decreasing = TRUE)[seq_len(min(input$pca_nrgenes,length(rv)))] 132 | pca <- prcomp(t(assay(values$mydst)[select, ])) 133 | 134 | res <- pcascree(pca,type = input$scree_type, pc_nr = input$scree_pcnr, title="Scree plot for the samples PCA") 135 | res <- res + theme_bw() 136 | res 137 | ``` 138 | 139 | The genes with the highest loadings in the selected principal components are the following 140 | 141 | ```{r} 142 | rv <- rowVars(assay(values$mydst)) 143 | select <- order(rv, decreasing = TRUE)[seq_len(min(input$pca_nrgenes,length(rv)))] 144 | pca <- prcomp(t(assay(values$mydst)[select, ])) 145 | 146 | par(mfrow=c(2,1)) 147 | hi_loadings(pca,whichpc = as.integer(input$pc_x),topN = input$ntophiload,annotation = values$myannotation) 148 | hi_loadings(pca,whichpc = as.integer(input$pc_y),topN = input$ntophiload,annotation = values$myannotation) 149 | ``` 150 | 151 | # PCA on the genes 152 | 153 | This plot illustrates how the top `r input$pca_nrgenes` variant genes are distributed in PC `r input$pc_x` vs PC `r input$pc_y` 154 | 155 | ```{r} 156 | if(!is.null(input$color_by)) { 157 | expgroups <- as.data.frame(colData(values$mydst)[,input$color_by]) 158 | expgroups <- interaction(expgroups) 159 | expgroups <- factor(expgroups,levels=unique(expgroups)) 160 | 161 | } else { 162 | expgroups <- colnames(values$mydst) 163 | } 164 | colGroups <- colSel()[factor(expgroups)] 165 | 166 | res <- genespca(values$mydst, 167 | ntop = input$pca_nrgenes, 168 | choices = c(as.integer(input$pc_x),as.integer(input$pc_y)), 169 | biplot = TRUE, 170 | arrowColors = factor(colGroups,levels=unique(colGroups)), 171 | groupNames = expgroups, 172 | alpha=input$pca_point_alpha,coordEqual=FALSE,useRownamesAsLabels=FALSE,labels.size=input$pca_label_size, 173 | point_size=input$pca_point_size,varname.size=input$pca_varname_size, scaleArrow = input$pca_scale_arrow, 174 | annotation=values$myannotation) 175 | res 176 | ``` 177 | 178 | 179 | For the selected genes, this is the overall profile across all samples 180 | 181 | ```{r} 182 | if(!is.null(input$pcagenes_brush) & length(input$color_by)>0) 183 | geneprofiler(values$mydst, 184 | genelist = curData_brush()$ids, 185 | intgroup = input$color_by, 186 | plotZ = input$zprofile) 187 | ``` 188 | 189 | And here is an interactive heatmap for that subset 190 | 191 | ```{r} 192 | 193 | if(!is.null(input$pcagenes_brush)) 194 | { 195 | brushedObject <- curData_brush() 196 | if(nrow(brushedObject) > 1){ 197 | selectedGenes <- brushedObject$ids 198 | toplot <- assay(values$mydst)[selectedGenes,] 199 | rownames(toplot) <- values$myannotation$gene_name[match(rownames(toplot),rownames(values$myannotation))] 200 | 201 | mycolss <- c("#313695","#4575b4","#74add1","#abd9e9","#e0f3f8","#fee090","#fdae61","#f46d43","#d73027","#a50026") # to be consistent with red/blue usual coding 202 | 203 | heatmaply(toplot,Colv = as.logical(input$heatmap_colv),colors = mycolss) 204 | } 205 | } 206 | 207 | ``` 208 | 209 | # Shortlisted genes 210 | 211 | This gene was selected in the interactive session. 212 | 213 | ```{r} 214 | anno_id <- rownames(values$mydst) 215 | anno_gene <- values$myannotation$gene_name 216 | 217 | # if(is.null(input$color_by) & input$genefinder!="") 218 | # return(ggplot() + annotate("text",label="Select a factor to plot your gene",0,0) + theme_bw()) 219 | # if(is.null(input$color_by) & input$genefinder=="") 220 | # return(ggplot() + annotate("text",label="Select a gene and a factor to plot gene",0,0) + theme_bw()) 221 | # if(input$genefinder=="") 222 | # return(ggplot() + annotate("text",label="Type in a gene name/id",0,0) + theme_bw()) 223 | # if(!input$genefinder %in% anno_gene & !input$genefinder %in% anno_id) 224 | # return(ggplot() + annotate("text",label="Gene not found...",0,0) + theme_bw()) 225 | if(input$genefinder!="") { 226 | 227 | if (input$genefinder %in% anno_id) { 228 | selectedGene <- rownames(values$mydst)[match(input$genefinder,rownames(values$mydst))] 229 | selectedGeneSymbol <- values$myannotation$gene_name[match(selectedGene,rownames(values$myannotation))] 230 | } 231 | if (input$genefinder %in% anno_gene) { 232 | selectedGeneSymbol <- values$myannotation$gene_name[which(values$myannotation$gene_name==input$genefinder)] 233 | if (length(selectedGeneSymbol) > 1) return(ggplot() + annotate("text",label=paste0("Type in a gene name/id of the following:\n",paste(selectedGene,collapse=", ")),0,0) + theme_bw()) 234 | selectedGene <- rownames(values$myannotation)[which(values$myannotation$gene_name==input$genefinder)] 235 | } 236 | genedata <- plotCounts(values$mydds,gene=selectedGene,intgroup = input$color_by,returnData = TRUE) 237 | onlyfactors <- genedata[,match(input$color_by,colnames(genedata))] 238 | genedata$plotby <- interaction(onlyfactors) 239 | 240 | if (input$plot_style == "boxplot") { 241 | plot_style <- "boxplot" 242 | } else if (input$plot_style == "violin plot") { 243 | plot_style <- "violin" 244 | } else { 245 | plot_style <- "auto" 246 | } 247 | 248 | res <- mosdef::gene_plot(de_container = values$mydds, 249 | gene = selectedGene, 250 | intgroup = input$color_by, 251 | annotation_obj = values$myannotation, 252 | normalized = TRUE, 253 | labels_display = input$addsamplelabels, 254 | plot_type = plot_style) 255 | 256 | if (input$ylimZero) { 257 | res <- res + scale_y_log10(name = "Normalized counts - log10 scale", limits = c(0.4, NA)) 258 | } else { 259 | res <- res + scale_y_log10(name = "Normalized counts - log10 scale") 260 | } 261 | 262 | res <- res + 263 | labs(title = paste0("Normalized counts for ", selectedGeneSymbol, " - ", selectedGene)) + 264 | scale_x_discrete(name = "") + 265 | scale_fill_discrete(name = "Experimental\nconditions") 266 | 267 | exportPlots$genefinder_countsplot <- res 268 | 269 | res 270 | } 271 | ``` 272 | 273 | Repeat the same chunk of code and change the identifier of the gene to obtain the similar plot for the other candidates. 274 | 275 | # Functional interpretation of the principal components 276 | 277 | These tables report the functional categories enriched in the genes with the top and bottom loadings in the selected principal components. 278 | 279 | ```{r} 280 | if(!is.null(values$mypca2go)) 281 | { 282 | goe <- values$mypca2go[[paste0("PC",input$pc_x)]][["posLoad"]] 283 | kable(goe, caption=paste0("Functional categories enriched in ","PC",input$pc_x, "- positive loadings")) 284 | } 285 | 286 | if(!is.null(values$mypca2go)) 287 | { 288 | goe <- values$mypca2go[[paste0("PC",input$pc_x)]][["negLoad"]] 289 | kable(goe, caption=paste0("Functional categories enriched in ","PC",input$pc_x, "- negative loadings")) 290 | } 291 | 292 | if(!is.null(values$mypca2go)) 293 | { 294 | goe <- values$mypca2go[[paste0("PC",input$pc_y)]][["posLoad"]] 295 | kable(goe, caption=paste0("Functional categories enriched in ","PC",input$pc_y, "- positive loadings")) 296 | } 297 | 298 | if(!is.null(values$mypca2go)) 299 | { 300 | goe <- values$mypca2go[[paste0("PC",input$pc_y)]][["negLoad"]] 301 | kable(goe, caption=paste0("Functional categories enriched in ","PC",input$pc_y, "- negative loadings")) 302 | } 303 | ``` 304 | 305 | # Multifactor exploration of the dataset 306 | 307 | ```{r} 308 | 309 | if(input$composemat > 0){ 310 | pcmat <- obj3()[[1]] 311 | tcol <- obj3()[[2]] 312 | tcol2 <- obj3()[[3]] 313 | pres <- prcomp(t(pcmat),scale=FALSE) 314 | 315 | plot.index <- c(as.integer(input$pc_x_multifac),as.integer(input$pc_y_multifac)) 316 | offset <- ncol(pcmat)/2 317 | gene.no <- offset 318 | pcx <- pres$x 319 | # set.seed(11) 320 | # for (i in 1:ncol(pcx)) { 321 | # pcx[,i] <- pcx[,i] + rnorm(nrow(pcx),sd=diff(range(pcx[,i]))/100) 322 | # } 323 | plot(pcx[(offset+1):ncol(pcmat),plot.index[1]][1:gene.no],pcx[(offset+1):ncol(pcmat),plot.index[2]][1:gene.no],xlim=range(pcx[,plot.index[1]]),ylim=range(pcx[,plot.index[2]]),pch=20,col=tcol,cex=0.3)#,type="n") 324 | #plot(0,type="n",xlim=range(pres$x[,plot.index]),ylim=range(pres$x[,plot.index])) 325 | lcol <- ifelse(tcol != tcol2,"black","grey") 326 | for (i in 1:gene.no) { 327 | lines(pcx[c(i,offset+i),plot.index[1]],pcx[c(i,offset+i),plot.index[2]],col=lcol[i]) 328 | } 329 | points(pcx[1:offset,plot.index[1]][1:gene.no],pcx[1:offset,plot.index[2]][1:gene.no],pch=20,col=tcol,cex=0.3) 330 | points(pcx[(offset+1):ncol(pcmat),plot.index[1]][1:gene.no],pcx[(offset+1):ncol(pcmat),plot.index[2]][1:gene.no],pch=20,col=tcol2,cex=0.3)} 331 | ``` 332 | 333 | # About pcaExplorer 334 | 335 | `pcaExplorer` is a Bioconductor package containing a Shiny application for 336 | analyzing expression data in different conditions and experimental factors. 337 | 338 | `pcaExplorer` guides the user in exploring the Principal Components of the data, 339 | providing tools and functionality to detect outlier samples, genes that show 340 | particular patterns, and additionally provides a functional interpretation of 341 | the principal components for further quality assessment and hypothesis generation 342 | on the input data. 343 | 344 | Thanks to its interactive/reactive design, it is designed to become a practical 345 | companion to any RNA-seq dataset analysis, making exploratory data analysis 346 | accessible also to the bench biologist, while providing additional insight also 347 | for the experienced data analyst. 348 | 349 | `pcaExplorer` was developed in the Bioinformatics Division led by Harald Binder 350 | at the IMBEI (Institut für Medizinische Biometrie, Epidemiologie und Informatik) 351 | in the University Medical Center of the Johannes Gutenberg University Mainz. 352 | 353 | ## Developers 354 | 355 | `pcaExplorer` is currently maintained by Federico Marini at the IMBEI (www.imbei.uni-mainz.de). 356 | You can contact him by clicking on the button below. 357 | 358 | Federico Marini 359 | 360 | ## Code 361 | 362 | `pcaExplorer` is a part of the Bioconductor project (www.bioconductor.org). 363 | All code for `pcaExplorer`, especially for the development version, is available 364 | on GitHub. 365 | 366 | # Citation info 367 | 368 | If you use `pcaExplorer` for your analysis, please cite it as here below: 369 | 370 | ```{r} 371 | citation("pcaExplorer") 372 | ``` 373 | 374 | # Session Information 375 | 376 | ```{r} 377 | sessionInfo() 378 | ``` 379 | 380 | ```{r, echo = FALSE} 381 | library(shiny) 382 | footertemplate <- function(){ 383 | tags$div( 384 | class = "footer", 385 | style = "text-align:center", 386 | tags$div( 387 | class = "foot-inner", 388 | list( 389 | hr(), 390 | "This report was generated with", tags$a(href="http://bioconductor.org/packages/pcaExplorer/", "pcaExplorer"), br(), 391 | "pcaExplorer is a project developed by Federico Marini in the Bioinformatics division of the ", 392 | tags$a(href="http://www.unimedizin-mainz.de/imbei","IMBEI"),br(), 393 | "Development of the pcaExplorer package is on ", 394 | tags$a(href="https://github.com/federicomarini/pcaExplorer", "GitHub") 395 | ) 396 | ) 397 | ) 398 | } 399 | ``` 400 | 401 | ```{r, echo = FALSE} 402 | footertemplate() 403 | ``` 404 | -------------------------------------------------------------------------------- /inst/www/help_dataformats.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/inst/www/help_dataformats.png -------------------------------------------------------------------------------- /inst/www/pcaExplorer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/inst/www/pcaExplorer.png -------------------------------------------------------------------------------- /man/correlatePCs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/correlatePCs.R 3 | \name{correlatePCs} 4 | \alias{correlatePCs} 5 | \title{Principal components (cor)relation with experimental covariates} 6 | \usage{ 7 | correlatePCs(pcaobj, coldata, pcs = 1:4) 8 | } 9 | \arguments{ 10 | \item{pcaobj}{A \code{prcomp} object} 11 | 12 | \item{coldata}{A \code{data.frame} object containing the experimental 13 | covariates} 14 | 15 | \item{pcs}{A numeric vector, containing the corresponding PC number} 16 | } 17 | \value{ 18 | A \code{data.frame} object with computed p values for each covariate 19 | and for each principal component 20 | } 21 | \description{ 22 | Computes the significance of (cor)relations between PCA scores and the sample 23 | experimental covariates, using Kruskal-Wallis test for categorial variables 24 | and the \code{cor.test} based on Spearman's correlation for continuous 25 | variables 26 | } 27 | \examples{ 28 | library(DESeq2) 29 | dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1) 30 | rlt <- DESeq2::rlogTransformation(dds) 31 | pcaobj <- prcomp(t(assay(rlt))) 32 | correlatePCs(pcaobj, colData(dds)) 33 | 34 | } 35 | -------------------------------------------------------------------------------- /man/deprecated.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/deprecated.R 3 | \name{deprecated} 4 | \alias{deprecated} 5 | \title{Deprecated functions in pcaExplorer} 6 | \arguments{ 7 | \item{...}{Ignored arguments.} 8 | } 9 | \value{ 10 | All functions throw a warning, with a deprecation message pointing 11 | towards its descendent (if available). 12 | } 13 | \description{ 14 | Functions that are on their way to the function afterlife. 15 | Their successors are also listed. 16 | } 17 | \details{ 18 | The successors of these functions are likely coming after the rework that 19 | led to the creation of the \code{mosdef} package. See more into its 20 | documentation for more details. 21 | } 22 | \section{Transitioning to the mosdef framework}{ 23 | 24 | \itemize{ 25 | \item \code{\link[=topGOtable]{topGOtable()}} is now being replaced by the more flexible 26 | \code{\link[mosdef:run_topGO]{mosdef::run_topGO()}} function 27 | } 28 | } 29 | 30 | \examples{ 31 | # try(topGOtable()) 32 | 33 | } 34 | \author{ 35 | Federico Marini 36 | } 37 | -------------------------------------------------------------------------------- /man/distro_expr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/distro_expr.R 3 | \name{distro_expr} 4 | \alias{distro_expr} 5 | \title{Plot distribution of expression values} 6 | \usage{ 7 | distro_expr(rld, plot_type = "density") 8 | } 9 | \arguments{ 10 | \item{rld}{A \code{\link[DESeq2:DESeqTransform]{DESeq2::DESeqTransform()}} object.} 11 | 12 | \item{plot_type}{Character, choose one of \code{boxplot}, \code{violin} or 13 | \code{density}. Defaults to \code{density}} 14 | } 15 | \value{ 16 | A plot with the distribution of the expression values 17 | } 18 | \description{ 19 | Plot distribution of expression values 20 | } 21 | \examples{ 22 | dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1) 23 | rlt <- DESeq2::rlogTransformation(dds) 24 | distro_expr(rlt) 25 | } 26 | -------------------------------------------------------------------------------- /man/figures/pcaExplorer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/man/figures/pcaExplorer.png -------------------------------------------------------------------------------- /man/geneprofiler.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/profile_explore.R 3 | \name{geneprofiler} 4 | \alias{geneprofiler} 5 | \title{Extract and plot the expression profile of genes} 6 | \usage{ 7 | geneprofiler(se, genelist = NULL, intgroup = "condition", plotZ = FALSE) 8 | } 9 | \arguments{ 10 | \item{se}{A \code{\link[DESeq2:DESeqDataSet]{DESeq2::DESeqDataSet()}} object, or a 11 | \code{\link[DESeq2:DESeqTransform]{DESeq2::DESeqTransform()}} object.} 12 | 13 | \item{genelist}{An array of characters, including the names of the genes of 14 | interest of which the profile is to be plotted} 15 | 16 | \item{intgroup}{A factor, needs to be in the \code{colnames} of \code{colData(se)}} 17 | 18 | \item{plotZ}{Logical, whether to plot the scaled expression values. Defaults to 19 | \code{FALSE}} 20 | } 21 | \value{ 22 | A plot of the expression profile for the genes 23 | } 24 | \description{ 25 | Extract and plot the expression profile of genes 26 | } 27 | \examples{ 28 | dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1) 29 | rlt <- DESeq2::rlogTransformation(dds) 30 | geneprofiler(rlt, paste0("gene", sample(1:1000, 20))) 31 | geneprofiler(rlt, paste0("gene", sample(1:1000, 20)), plotZ = TRUE) 32 | } 33 | -------------------------------------------------------------------------------- /man/genespca.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/genespca.R 3 | \name{genespca} 4 | \alias{genespca} 5 | \title{Principal components analysis on the genes} 6 | \usage{ 7 | genespca( 8 | x, 9 | ntop, 10 | choices = c(1, 2), 11 | arrowColors = "steelblue", 12 | groupNames = "group", 13 | biplot = TRUE, 14 | scale = 1, 15 | pc.biplot = TRUE, 16 | obs.scale = 1 - scale, 17 | var.scale = scale, 18 | groups = NULL, 19 | ellipse = FALSE, 20 | ellipse.prob = 0.68, 21 | labels = NULL, 22 | labels.size = 3, 23 | alpha = 1, 24 | var.axes = TRUE, 25 | circle = FALSE, 26 | circle.prob = 0.69, 27 | varname.size = 4, 28 | varname.adjust = 1.5, 29 | varname.abbrev = FALSE, 30 | returnData = FALSE, 31 | coordEqual = FALSE, 32 | scaleArrow = 1, 33 | useRownamesAsLabels = TRUE, 34 | point_size = 2, 35 | annotation = NULL 36 | ) 37 | } 38 | \arguments{ 39 | \item{x}{A \code{\link[DESeq2:DESeqTransform]{DESeq2::DESeqTransform()}} object, with data in \code{assay(x)}, 40 | produced for example by either \code{\link[DESeq2:rlog]{DESeq2::rlog()}} or 41 | \code{\link[DESeq2:varianceStabilizingTransformation]{DESeq2::varianceStabilizingTransformation()}}} 42 | 43 | \item{ntop}{Number of top genes to use for principal components, 44 | selected by highest row variance} 45 | 46 | \item{choices}{Vector of two numeric values, to select on which principal components to plot} 47 | 48 | \item{arrowColors}{Vector of character, either as long as the number of the samples, or one single value} 49 | 50 | \item{groupNames}{Factor containing the groupings for the input data. Is efficiently chosen 51 | as the (interaction of more) factors in the colData for the object provided} 52 | 53 | \item{biplot}{Logical, whether to additionally draw the samples labels as in a biplot representation} 54 | 55 | \item{scale}{Covariance biplot (scale = 1), form biplot (scale = 0). When scale = 1, 56 | the inner product between the variables approximates the covariance and the 57 | distance between the points approximates the Mahalanobis distance.} 58 | 59 | \item{pc.biplot}{Logical, for compatibility with biplot.princomp()} 60 | 61 | \item{obs.scale}{Scale factor to apply to observations} 62 | 63 | \item{var.scale}{Scale factor to apply to variables} 64 | 65 | \item{groups}{Optional factor variable indicating the groups that the observations 66 | belong to. If provided the points will be colored according to groups} 67 | 68 | \item{ellipse}{Logical, draw a normal data ellipse for each group} 69 | 70 | \item{ellipse.prob}{Size of the ellipse in Normal probability} 71 | 72 | \item{labels}{optional Vector of labels for the observations} 73 | 74 | \item{labels.size}{Size of the text used for the labels} 75 | 76 | \item{alpha}{Alpha transparency value for the points (0 = transparent, 1 = opaque)} 77 | 78 | \item{var.axes}{Logical, draw arrows for the variables?} 79 | 80 | \item{circle}{Logical, draw a correlation circle? (only applies when prcomp 81 | was called with scale = TRUE and when var.scale = 1)} 82 | 83 | \item{circle.prob}{Size of the correlation circle in Normal probability} 84 | 85 | \item{varname.size}{Size of the text for variable names} 86 | 87 | \item{varname.adjust}{Adjustment factor the placement of the variable names, 88 | '>= 1' means farther from the arrow} 89 | 90 | \item{varname.abbrev}{Logical, whether or not to abbreviate the variable names} 91 | 92 | \item{returnData}{Logical, if TRUE returns a data.frame for further use, containing the 93 | selected principal components for custom plotting} 94 | 95 | \item{coordEqual}{Logical, default FALSE, for allowing brushing. If TRUE, plot using 96 | equal scale cartesian coordinates} 97 | 98 | \item{scaleArrow}{Multiplicative factor, usually >=1, only for visualization purposes, 99 | to allow for distinguishing where the variables are plotted} 100 | 101 | \item{useRownamesAsLabels}{Logical, if TRUE uses the row names as labels for plotting} 102 | 103 | \item{point_size}{Size of the points to be plotted for the observations (genes)} 104 | 105 | \item{annotation}{A \code{data.frame} object, with row.names as gene identifiers (e.g. ENSEMBL ids) 106 | and a column, \code{gene_name}, containing e.g. HGNC-based gene symbols} 107 | } 108 | \value{ 109 | An object created by \code{ggplot}, which can be assigned and further customized. 110 | } 111 | \description{ 112 | Computes and plots the principal components of the genes, eventually displaying 113 | the samples as in a typical biplot visualization. 114 | } 115 | \details{ 116 | The implementation of this function is based on the beautiful \code{ggbiplot} 117 | package developed by Vince Vu, available at https://github.com/vqv/ggbiplot. 118 | The adaptation and additional parameters are tailored to display typical genomics data 119 | such as the transformed counts of RNA-seq experiments 120 | } 121 | \examples{ 122 | 123 | library(DESeq2) 124 | dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1) 125 | rlt <- rlogTransformation(dds) 126 | groups <- colData(dds)$condition 127 | groups <- factor(groups, levels = unique(groups)) 128 | cols <- scales::hue_pal()(2)[groups] 129 | genespca(rlt, ntop=100, arrowColors = cols, groupNames = groups) 130 | 131 | groups_multi <- interaction(as.data.frame(colData(rlt)[, c("condition", "tissue")])) 132 | groups_multi <- factor(groups_multi, levels = unique(groups_multi)) 133 | cols_multi <- scales::hue_pal()(length(levels(groups_multi)))[factor(groups_multi)] 134 | genespca(rlt, ntop = 100, arrowColors = cols_multi, groupNames = groups_multi) 135 | 136 | } 137 | -------------------------------------------------------------------------------- /man/get_annotation.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_annotation.R 3 | \name{get_annotation} 4 | \alias{get_annotation} 5 | \title{Get an annotation data frame from biomaRt} 6 | \usage{ 7 | get_annotation(dds, biomart_dataset, idtype) 8 | } 9 | \arguments{ 10 | \item{dds}{A \code{\link[DESeq2:DESeqDataSet]{DESeq2::DESeqDataSet()}} object} 11 | 12 | \item{biomart_dataset}{A biomaRt dataset to use. To see the list, type 13 | \code{mart = useMart('ensembl')}, followed by \code{listDatasets(mart)}.} 14 | 15 | \item{idtype}{Character, the ID type of the genes as in the row names of 16 | \code{dds}, to be used for the call to \code{\link[biomaRt:getBM]{biomaRt::getBM()}}} 17 | } 18 | \value{ 19 | A data frame for ready use in \code{pcaExplorer}, retrieved from biomaRt. 20 | } 21 | \description{ 22 | Get an annotation data frame from biomaRt 23 | } 24 | \examples{ 25 | library("airway") 26 | data("airway", package = "airway") 27 | airway 28 | dds_airway <- DESeq2::DESeqDataSetFromMatrix(assay(airway), 29 | colData = colData(airway), 30 | design = ~dex+cell) 31 | \dontrun{ 32 | get_annotation(dds_airway, "hsapiens_gene_ensembl", "ensembl_gene_id") 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /man/get_annotation_orgdb.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_annotation.R 3 | \name{get_annotation_orgdb} 4 | \alias{get_annotation_orgdb} 5 | \title{Get an annotation data frame from org db packages} 6 | \usage{ 7 | get_annotation_orgdb(dds, orgdb_species, idtype, key_for_genenames = "SYMBOL") 8 | } 9 | \arguments{ 10 | \item{dds}{A \code{\link[DESeq2:DESeqDataSet]{DESeq2::DESeqDataSet()}} object} 11 | 12 | \item{orgdb_species}{Character string, named as the \code{org.XX.eg.db} 13 | package which should be available in Bioconductor} 14 | 15 | \item{idtype}{Character, the ID type of the genes as in the row names of 16 | \code{dds}, to be used for the call to \code{\link[AnnotationDbi:AnnotationDb-class]{AnnotationDbi::mapIds()}}} 17 | 18 | \item{key_for_genenames}{Character, corresponding to the column name for the 19 | key in the orgDb package containing the official gene name (often called 20 | gene symbol). 21 | This parameter defaults to "SYMBOL", but can be adjusted in case the key is not 22 | found in the annotation package (e.g. for \code{org.Sc.sgd.db}).} 23 | } 24 | \value{ 25 | A data frame for ready use in \code{pcaExplorer}, retrieved from the 26 | org db packages 27 | } 28 | \description{ 29 | Get an annotation data frame from org db packages 30 | } 31 | \examples{ 32 | library("airway") 33 | data("airway", package = "airway") 34 | airway 35 | dds_airway <- DESeq2::DESeqDataSetFromMatrix(assay(airway), 36 | colData = colData(airway), 37 | design = ~dex+cell) 38 | anno_df <- get_annotation_orgdb(dds_airway, "org.Hs.eg.db", "ENSEMBL") 39 | head(anno_df) 40 | } 41 | -------------------------------------------------------------------------------- /man/hi_loadings.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hi_loadings.R 3 | \name{hi_loadings} 4 | \alias{hi_loadings} 5 | \title{Extract genes with highest loadings} 6 | \usage{ 7 | hi_loadings( 8 | pcaobj, 9 | whichpc = 1, 10 | topN = 10, 11 | exprTable = NULL, 12 | annotation = NULL, 13 | title = "Top/bottom loadings" 14 | ) 15 | } 16 | \arguments{ 17 | \item{pcaobj}{A \code{prcomp} object} 18 | 19 | \item{whichpc}{An integer number, corresponding to the principal component of 20 | interest} 21 | 22 | \item{topN}{Integer, number of genes with top and bottom loadings} 23 | 24 | \item{exprTable}{A \code{matrix} object, e.g. the counts of a \code{\link[DESeq2:DESeqDataSet]{DESeq2::DESeqDataSet()}}. 25 | If not NULL, returns the counts matrix for the selected genes} 26 | 27 | \item{annotation}{A \code{data.frame} object, with row.names as gene identifiers (e.g. ENSEMBL ids) 28 | and a column, \code{gene_name}, containing e.g. HGNC-based gene symbols} 29 | 30 | \item{title}{The title of the plot} 31 | } 32 | \value{ 33 | A ggplot2 object, or a \code{matrix}, if \code{exprTable} is not null 34 | } 35 | \description{ 36 | Extract genes with highest loadings 37 | } 38 | \examples{ 39 | dds <- makeExampleDESeqDataSet_multifac(betaSD = 3, betaSD_tissue = 1) 40 | rlt <- DESeq2::rlogTransformation(dds) 41 | pcaobj <- prcomp(t(SummarizedExperiment::assay(rlt))) 42 | hi_loadings(pcaobj, topN = 20) 43 | hi_loadings(pcaobj, topN = 10, exprTable = dds) 44 | hi_loadings(pcaobj, topN = 10, exprTable = counts(dds)) 45 | 46 | } 47 | -------------------------------------------------------------------------------- /man/limmaquickpca2go.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pca2go.R 3 | \name{limmaquickpca2go} 4 | \alias{limmaquickpca2go} 5 | \title{Functional interpretation of the principal components, based on simple 6 | overrepresentation analysis} 7 | \usage{ 8 | limmaquickpca2go( 9 | se, 10 | pca_ngenes = 10000, 11 | inputType = "ENSEMBL", 12 | organism = "Mm", 13 | loadings_ngenes = 500, 14 | background_genes = NULL, 15 | scale = FALSE, 16 | ... 17 | ) 18 | } 19 | \arguments{ 20 | \item{se}{A \code{\link[DESeq2:DESeqTransform]{DESeq2::DESeqTransform()}} object, with data in \code{assay(se)}, 21 | produced for example by either \code{\link[DESeq2:rlog]{DESeq2::rlog()}} or 22 | \code{\link[DESeq2:varianceStabilizingTransformation]{DESeq2::varianceStabilizingTransformation()}}} 23 | 24 | \item{pca_ngenes}{Number of genes to use for the PCA} 25 | 26 | \item{inputType}{Input format type of the gene identifiers. Deafults to \code{ENSEMBL}, that then will 27 | be converted to ENTREZ ids. Can assume values such as \code{ENTREZID},\code{GENENAME} or \code{SYMBOL}, 28 | like it is normally used with the \code{select} function of \code{AnnotationDbi}} 29 | 30 | \item{organism}{Character abbreviation for the species, using \code{org.XX.eg.db} for annotation} 31 | 32 | \item{loadings_ngenes}{Number of genes to extract the loadings (in each direction)} 33 | 34 | \item{background_genes}{Which genes to consider as background.} 35 | 36 | \item{scale}{Logical, defaults to FALSE, scale values for the PCA} 37 | 38 | \item{...}{Further parameters to be passed to the goana routine} 39 | } 40 | \value{ 41 | A nested list object containing for each principal component the terms enriched 42 | in each direction. This object is to be thought in combination with the displaying feature 43 | of the main \code{\link[=pcaExplorer]{pcaExplorer()}} function 44 | } 45 | \description{ 46 | Extracts the genes with the highest loadings for each principal component, and 47 | performs functional enrichment analysis on them using the simple and quick routine 48 | provided by the \code{limma} package 49 | } 50 | \examples{ 51 | library("airway") 52 | library("DESeq2") 53 | library("limma") 54 | data("airway", package = "airway") 55 | airway 56 | dds_airway <- DESeqDataSet(airway, design = ~ cell + dex) 57 | \dontrun{ 58 | rld_airway <- rlogTransformation(dds_airway) 59 | goquick_airway <- limmaquickpca2go(rld_airway, 60 | pca_ngenes = 10000, 61 | inputType = "ENSEMBL", 62 | organism = "Hs") 63 | } 64 | 65 | } 66 | -------------------------------------------------------------------------------- /man/makeExampleDESeqDataSet_multifac.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/makeds.R 3 | \name{makeExampleDESeqDataSet_multifac} 4 | \alias{makeExampleDESeqDataSet_multifac} 5 | \title{Make a simulated DESeqDataSet for two or more experimental factors} 6 | \usage{ 7 | makeExampleDESeqDataSet_multifac( 8 | n = 1000, 9 | m = 12, 10 | betaSD_condition = 1, 11 | betaSD_tissue = 3, 12 | interceptMean = 4, 13 | interceptSD = 2, 14 | dispMeanRel = function(x) 4/x + 0.1, 15 | sizeFactors = rep(1, m) 16 | ) 17 | } 18 | \arguments{ 19 | \item{n}{number of rows (genes)} 20 | 21 | \item{m}{number of columns (samples)} 22 | 23 | \item{betaSD_condition}{the standard deviation for condition betas, i.e. beta ~ N(0,betaSD)} 24 | 25 | \item{betaSD_tissue}{the standard deviation for tissue betas, i.e. beta ~ N(0,betaSD)} 26 | 27 | \item{interceptMean}{the mean of the intercept betas (log2 scale)} 28 | 29 | \item{interceptSD}{the standard deviation of the intercept betas (log2 scale)} 30 | 31 | \item{dispMeanRel}{a function specifying the relationship of the dispersions on 32 | \code{2^trueIntercept}} 33 | 34 | \item{sizeFactors}{multiplicative factors for each sample} 35 | } 36 | \value{ 37 | a \code{\link[DESeq2:DESeqDataSet]{DESeq2::DESeqDataSet()}} with true dispersion, 38 | intercept for two factors (condition and tissue) and beta values in the 39 | metadata columns. Note that the true betas are provided on the log2 scale. 40 | } 41 | \description{ 42 | Constructs a simulated dataset of Negative Binomial data from different conditions. 43 | The fold changes between the conditions can be adjusted with the \code{betaSD_condition} 44 | and the \code{betaSD_tissue} arguments. 45 | } 46 | \details{ 47 | This function is designed and inspired following the proposal of 48 | \code{\link[DESeq2:makeExampleDESeqDataSet]{DESeq2::makeExampleDESeqDataSet()}} from the \code{DESeq2} package. Credits are given 49 | to Mike Love for the nice initial implementation 50 | } 51 | \examples{ 52 | dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1) 53 | dds 54 | dds2 <- makeExampleDESeqDataSet_multifac(betaSD_condition = 1, betaSD_tissue = 4) 55 | dds2 56 | 57 | } 58 | -------------------------------------------------------------------------------- /man/pair_corr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pair_corr.R 3 | \name{pair_corr} 4 | \alias{pair_corr} 5 | \title{Pairwise scatter and correlation plot of counts} 6 | \usage{ 7 | pair_corr(df, log = FALSE, method = "pearson", use_subset = TRUE) 8 | } 9 | \arguments{ 10 | \item{df}{A data frame, containing the (raw/normalized/transformed) counts} 11 | 12 | \item{log}{Logical, whether to convert the input values to log2 (with addition 13 | of a pseudocount). Defaults to FALSE.} 14 | 15 | \item{method}{Character string, one of \code{pearson} (default), \code{kendall}, or 16 | \code{spearman} as in \code{cor}} 17 | 18 | \item{use_subset}{Logical value. If TRUE, only 1000 values per sample will be used 19 | to speed up the plotting operations.} 20 | } 21 | \value{ 22 | A plot with pairwise scatter plots and correlation coefficients 23 | } 24 | \description{ 25 | Pairwise scatter and correlation plot of counts 26 | } 27 | \examples{ 28 | library("airway") 29 | data("airway", package = "airway") 30 | airway 31 | dds_airway <- DESeq2::DESeqDataSetFromMatrix(assay(airway), 32 | colData = colData(airway), 33 | design = ~dex+cell) 34 | pair_corr(counts(dds_airway)[1:100, ]) # use just a subset for the example 35 | } 36 | -------------------------------------------------------------------------------- /man/pca2go.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pca2go.R 3 | \name{pca2go} 4 | \alias{pca2go} 5 | \title{Functional interpretation of the principal components} 6 | \usage{ 7 | pca2go( 8 | se, 9 | pca_ngenes = 10000, 10 | annotation = NULL, 11 | inputType = "geneSymbol", 12 | organism = "Mm", 13 | ensToGeneSymbol = FALSE, 14 | loadings_ngenes = 500, 15 | background_genes = NULL, 16 | scale = FALSE, 17 | return_ranked_gene_loadings = FALSE, 18 | annopkg = NULL, 19 | ... 20 | ) 21 | } 22 | \arguments{ 23 | \item{se}{A \code{\link[DESeq2:DESeqTransform]{DESeq2::DESeqTransform()}} object, with data in \code{assay(se)}, 24 | produced for example by either \code{\link[DESeq2:rlog]{DESeq2::rlog()}} or 25 | \code{\link[DESeq2:varianceStabilizingTransformation]{DESeq2::varianceStabilizingTransformation()}}} 26 | 27 | \item{pca_ngenes}{Number of genes to use for the PCA} 28 | 29 | \item{annotation}{A \code{data.frame} object, with row.names as gene identifiers (e.g. ENSEMBL ids) 30 | and a column, \code{gene_name}, containing e.g. HGNC-based gene symbols} 31 | 32 | \item{inputType}{Input format type of the gene identifiers. Will be used by the routines of \code{topGO}} 33 | 34 | \item{organism}{Character abbreviation for the species, using \code{org.XX.eg.db} for annotation} 35 | 36 | \item{ensToGeneSymbol}{Logical, whether to expect ENSEMBL gene identifiers, to convert to gene symbols 37 | with the \code{annotation} provided} 38 | 39 | \item{loadings_ngenes}{Number of genes to extract the loadings (in each direction)} 40 | 41 | \item{background_genes}{Which genes to consider as background.} 42 | 43 | \item{scale}{Logical, defaults to FALSE, scale values for the PCA} 44 | 45 | \item{return_ranked_gene_loadings}{Logical, defaults to FALSE. If TRUE, simply returns 46 | a list containing the top ranked genes with hi loadings in each PC and in each direction} 47 | 48 | \item{annopkg}{String containing the name of the organism annotation package. Can be used to 49 | override the \code{organism} parameter, e.g. in case of alternative identifiers used 50 | in the annotation package (Arabidopsis with TAIR)} 51 | 52 | \item{...}{Further parameters to be passed to the topGO routine} 53 | } 54 | \value{ 55 | A nested list object containing for each principal component the terms enriched 56 | in each direction. This object is to be thought in combination with the displaying feature 57 | of the main \code{\link[=pcaExplorer]{pcaExplorer()}} function 58 | } 59 | \description{ 60 | Extracts the genes with the highest loadings for each principal component, and 61 | performs functional enrichment analysis on them using routines and algorithms from 62 | the \code{topGO} package 63 | } 64 | \examples{ 65 | library("airway") 66 | library("DESeq2") 67 | data("airway", package = "airway") 68 | airway 69 | dds_airway <- DESeqDataSet(airway, design= ~ cell + dex) 70 | \dontrun{ 71 | rld_airway <- rlogTransformation(dds_airway) 72 | # constructing the annotation object 73 | anno_df <- data.frame(gene_id = rownames(dds_airway), 74 | stringsAsFactors = FALSE) 75 | library("AnnotationDbi") 76 | library("org.Hs.eg.db") 77 | anno_df$gene_name <- mapIds(org.Hs.eg.db, 78 | keys = anno_df$gene_id, 79 | column = "SYMBOL", 80 | keytype = "ENSEMBL", 81 | multiVals = "first") 82 | rownames(anno_df) <- anno_df$gene_id 83 | bg_ids <- rownames(dds_airway)[rowSums(counts(dds_airway)) > 0] 84 | library(topGO) 85 | pca2go_airway <- pca2go(rld_airway, 86 | annotation = anno_df, 87 | organism = "Hs", 88 | ensToGeneSymbol = TRUE, 89 | background_genes = bg_ids) 90 | } 91 | 92 | } 93 | -------------------------------------------------------------------------------- /man/pcaExplorer-pkg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pcaExplorer-pkg.R 3 | \docType{package} 4 | \name{pcaExplorer-pkg} 5 | \alias{pcaExplorer-package} 6 | \alias{pcaExplorer-pkg} 7 | \title{pcaExplorer: analyzing time-lapse microscopy imaging, from detection to tracking} 8 | \description{ 9 | pcaExplorer provides functionality for interactive visualization of RNA-seq datasets 10 | based on Principal Components Analysis. The methods provided allow for quick information 11 | extraction and effective data exploration. A Shiny application encapsulates the whole analysis. 12 | } 13 | \details{ 14 | pcaExplorer provides functionality for interactive visualization of RNA-seq datasets 15 | based on Principal Components Analysis. The methods provided allow for quick information 16 | extraction and effective data exploration. A Shiny application encapsulates the whole analysis. 17 | } 18 | \seealso{ 19 | Useful links: 20 | \itemize{ 21 | \item \url{https://github.com/federicomarini/pcaExplorer} 22 | \item \url{https://federicomarini.github.io/pcaExplorer/} 23 | \item Report bugs at \url{https://github.com/federicomarini/pcaExplorer/issues} 24 | } 25 | 26 | } 27 | \author{ 28 | Federico Marini \email{marinif@uni-mainz.de}, 2016 29 | 30 | Maintainer: Federico Marini \email{marinif@uni-mainz.de} 31 | } 32 | -------------------------------------------------------------------------------- /man/pcaExplorer.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pcaExplorer.R 3 | \name{pcaExplorer} 4 | \alias{pcaExplorer} 5 | \title{Explore a dataset from a PCA perspective} 6 | \usage{ 7 | pcaExplorer( 8 | dds = NULL, 9 | dst = NULL, 10 | countmatrix = NULL, 11 | coldata = NULL, 12 | pca2go = NULL, 13 | annotation = NULL, 14 | runLocal = TRUE 15 | ) 16 | } 17 | \arguments{ 18 | \item{dds}{A \code{\link[DESeq2:DESeqDataSet]{DESeq2::DESeqDataSet()}} object. If not provided, then a \code{countmatrix} 19 | and a \code{coldata} need to be provided. If none of the above is provided, it is possible 20 | to upload the data during the execution of the Shiny App} 21 | 22 | \item{dst}{A \code{\link[DESeq2:DESeqTransform]{DESeq2::DESeqTransform()}} object. Can be computed from the \code{dds} object 23 | if left NULL. If none is provided, then a \code{countmatrix} 24 | and a \code{coldata} need to be provided. If none of the above is provided, it is possible 25 | to upload the data during the execution of the Shiny App} 26 | 27 | \item{countmatrix}{A count matrix, with genes as rows and samples as columns. If not provided, it is possible 28 | to upload the data during the execution of the Shiny App} 29 | 30 | \item{coldata}{A data.frame containing the info on the covariates of each sample. If not provided, it is possible 31 | to upload the data during the execution of the Shiny App} 32 | 33 | \item{pca2go}{An object generated by the \code{\link[=pca2go]{pca2go()}} function, which contains 34 | the information on enriched functional categories in the genes that show the top or bottom loadings 35 | in each principal component of interest. If not provided, it is possible 36 | to compute live during the execution of the Shiny App} 37 | 38 | \item{annotation}{A \code{data.frame} object, with row.names as gene identifiers (e.g. ENSEMBL ids) 39 | and a column, \code{gene_name}, containing e.g. HGNC-based gene symbols} 40 | 41 | \item{runLocal}{A logical indicating whether the app is to be run locally or remotely on a server, which determines how documentation will be accessed.} 42 | } 43 | \value{ 44 | A Shiny App is launched for interactive data exploration 45 | } 46 | \description{ 47 | Launch a Shiny App for interactive exploration of a dataset from the perspective 48 | of Principal Components Analysis 49 | } 50 | \examples{ 51 | library("airway") 52 | data("airway", package = "airway") 53 | airway 54 | dds_airway <- DESeq2::DESeqDataSetFromMatrix(assay(airway), 55 | colData = colData(airway), 56 | design = ~dex+cell) 57 | \dontrun{ 58 | rld_airway <- DESeq2::rlogTransformation(dds_airway) 59 | 60 | pcaExplorer(dds_airway, rld_airway) 61 | 62 | pcaExplorer(countmatrix = counts(dds_airway), coldata = colData(dds_airway)) 63 | 64 | pcaExplorer() # and then upload count matrix, covariate matrix (and eventual annotation) 65 | } 66 | 67 | } 68 | -------------------------------------------------------------------------------- /man/pcaplot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pcaplot.R 3 | \name{pcaplot} 4 | \alias{pcaplot} 5 | \title{Sample PCA plot for transformed data} 6 | \usage{ 7 | pcaplot( 8 | x, 9 | intgroup = NULL, 10 | ntop = 500, 11 | returnData = FALSE, 12 | title = NULL, 13 | pcX = 1, 14 | pcY = 2, 15 | text_labels = TRUE, 16 | point_size = 3, 17 | ellipse = TRUE, 18 | ellipse.prob = 0.95 19 | ) 20 | } 21 | \arguments{ 22 | \item{x}{A \code{\link[DESeq2:DESeqTransform]{DESeq2::DESeqTransform()}} object, with data in \code{assay(x)}, 23 | produced for example by either \code{\link[DESeq2:rlog]{DESeq2::rlog()}} or 24 | \code{\link[DESeq2:varianceStabilizingTransformation]{DESeq2::varianceStabilizingTransformation()}}/\code{\link[DESeq2:vst]{DESeq2::vst()}}} 25 | 26 | \item{intgroup}{Interesting groups: a character vector of 27 | names in \code{colData(x)} to use for grouping. Defaults to NULL, which would then 28 | select the first column of the \code{colData} slot} 29 | 30 | \item{ntop}{Number of top genes to use for principal components, 31 | selected by highest row variance} 32 | 33 | \item{returnData}{logical, if TRUE returns a data.frame for further use, containing the 34 | selected principal components and intgroup covariates for custom plotting} 35 | 36 | \item{title}{The plot title} 37 | 38 | \item{pcX}{The principal component to display on the x axis} 39 | 40 | \item{pcY}{The principal component to display on the y axis} 41 | 42 | \item{text_labels}{Logical, whether to display the labels with the sample identifiers} 43 | 44 | \item{point_size}{Integer, the size of the points for the samples} 45 | 46 | \item{ellipse}{Logical, whether to display the confidence ellipse for the selected groups} 47 | 48 | \item{ellipse.prob}{Numeric, a value in the interval [0;1)} 49 | } 50 | \value{ 51 | An object created by \code{ggplot}, which can be assigned and further customized. 52 | } 53 | \description{ 54 | Plots the results of PCA on a 2-dimensional space 55 | } 56 | \examples{ 57 | dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1) 58 | rlt <- DESeq2::rlogTransformation(dds) 59 | pcaplot(rlt, ntop = 200) 60 | 61 | } 62 | -------------------------------------------------------------------------------- /man/pcaplot3d.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pcaplot.R 3 | \name{pcaplot3d} 4 | \alias{pcaplot3d} 5 | \title{Sample PCA plot for transformed data} 6 | \usage{ 7 | pcaplot3d( 8 | x, 9 | intgroup = "condition", 10 | ntop = 500, 11 | returnData = FALSE, 12 | title = NULL, 13 | pcX = 1, 14 | pcY = 2, 15 | pcZ = 3, 16 | text_labels = TRUE, 17 | point_size = 3 18 | ) 19 | } 20 | \arguments{ 21 | \item{x}{A \code{\link[DESeq2:DESeqTransform]{DESeq2::DESeqTransform()}} object, with data in \code{assay(x)}, 22 | produced for example by either \code{\link[DESeq2:rlog]{DESeq2::rlog()}} or 23 | \code{\link[DESeq2:varianceStabilizingTransformation]{DESeq2::varianceStabilizingTransformation()}}} 24 | 25 | \item{intgroup}{Interesting groups: a character vector of 26 | names in \code{colData(x)} to use for grouping} 27 | 28 | \item{ntop}{Number of top genes to use for principal components, 29 | selected by highest row variance} 30 | 31 | \item{returnData}{logical, if TRUE returns a data.frame for further use, containing the 32 | selected principal components and intgroup covariates for custom plotting} 33 | 34 | \item{title}{The plot title} 35 | 36 | \item{pcX}{The principal component to display on the x axis} 37 | 38 | \item{pcY}{The principal component to display on the y axis} 39 | 40 | \item{pcZ}{The principal component to display on the z axis} 41 | 42 | \item{text_labels}{Logical, whether to display the labels with the sample identifiers} 43 | 44 | \item{point_size}{Integer, the size of the points for the samples} 45 | } 46 | \value{ 47 | A html-based visualization of the 3d PCA plot 48 | } 49 | \description{ 50 | Plots the results of PCA on a 3-dimensional space, interactively 51 | } 52 | \examples{ 53 | dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1) 54 | rlt <- DESeq2::rlogTransformation(dds) 55 | pcaplot3d(rlt, ntop = 200) 56 | } 57 | -------------------------------------------------------------------------------- /man/pcascree.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pcaplot.R 3 | \name{pcascree} 4 | \alias{pcascree} 5 | \title{Scree plot of the PCA on the samples} 6 | \usage{ 7 | pcascree(obj, type = c("pev", "cev"), pc_nr = NULL, title = NULL) 8 | } 9 | \arguments{ 10 | \item{obj}{A \code{prcomp} object} 11 | 12 | \item{type}{Display absolute proportions or cumulative proportion. Possible values: 13 | "pev" or "cev"} 14 | 15 | \item{pc_nr}{How many principal components to display max} 16 | 17 | \item{title}{Title of the plot} 18 | } 19 | \value{ 20 | An object created by \code{ggplot}, which can be assigned and further customized. 21 | } 22 | \description{ 23 | Produces a scree plot for investigating the proportion of explained variance, or 24 | alternatively the cumulative value 25 | } 26 | \examples{ 27 | dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1) 28 | rlt <- DESeq2::rlogTransformation(dds) 29 | pcaobj <- prcomp(t(SummarizedExperiment::assay(rlt))) 30 | pcascree(pcaobj, type = "pev") 31 | pcascree(pcaobj, type = "cev", title = "Cumulative explained proportion of variance - Test dataset") 32 | 33 | } 34 | -------------------------------------------------------------------------------- /man/plotPCcorrs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/correlatePCs.R 3 | \name{plotPCcorrs} 4 | \alias{plotPCcorrs} 5 | \title{Plot significance of (cor)relations of covariates VS principal components} 6 | \usage{ 7 | plotPCcorrs(pccorrs, pc = 1, logp = TRUE) 8 | } 9 | \arguments{ 10 | \item{pccorrs}{A \code{data.frame} object generated by \link{correlatePCs}} 11 | 12 | \item{pc}{An integer number, corresponding to the principal component of 13 | interest} 14 | 15 | \item{logp}{Logical, defaults to \code{TRUE}, displays the -\code{log10} of 16 | the pvalue instead of the p value itself} 17 | } 18 | \value{ 19 | A base plot object 20 | } 21 | \description{ 22 | Plots the significance of the (cor)relation of each covariate vs a principal component 23 | } 24 | \examples{ 25 | library(DESeq2) 26 | dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1) 27 | rlt <- rlogTransformation(dds) 28 | pcaobj <- prcomp(t(assay(rlt))) 29 | res <- correlatePCs(pcaobj, colData(dds)) 30 | plotPCcorrs(res) 31 | 32 | } 33 | -------------------------------------------------------------------------------- /man/topGOtable.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pca2go.R 3 | \name{topGOtable} 4 | \alias{topGOtable} 5 | \title{Extract functional terms enriched in the DE genes, based on topGO} 6 | \usage{ 7 | topGOtable( 8 | DEgenes, 9 | BGgenes, 10 | ontology = "BP", 11 | annot = annFUN.org, 12 | mapping = "org.Mm.eg.db", 13 | geneID = "symbol", 14 | topTablerows = 200, 15 | fullNamesInRows = TRUE, 16 | addGeneToTerms = TRUE, 17 | plotGraph = FALSE, 18 | plotNodes = 10, 19 | writeOutput = FALSE, 20 | outputFile = "", 21 | topGO_method2 = "elim", 22 | do_padj = FALSE 23 | ) 24 | } 25 | \arguments{ 26 | \item{DEgenes}{A vector of (differentially expressed) genes} 27 | 28 | \item{BGgenes}{A vector of background genes, e.g. all (expressed) genes in the assays} 29 | 30 | \item{ontology}{Which Gene Ontology domain to analyze: \code{BP} (Biological Process), \code{MF} (Molecular Function), or \code{CC} (Cellular Component)} 31 | 32 | \item{annot}{Which function to use for annotating genes to GO terms. Defaults to \code{annFUN.org}} 33 | 34 | \item{mapping}{Which \code{org.XX.eg.db} to use for annotation - select according to the species} 35 | 36 | \item{geneID}{Which format the genes are provided. Defaults to \code{symbol}, could also be 37 | \code{entrez} or \code{ENSEMBL}} 38 | 39 | \item{topTablerows}{How many rows to report before any filtering} 40 | 41 | \item{fullNamesInRows}{Logical, whether to display or not the full names for the GO terms} 42 | 43 | \item{addGeneToTerms}{Logical, whether to add a column with all genes annotated to each GO term} 44 | 45 | \item{plotGraph}{Logical, if TRUE additionally plots a graph on the identified GO terms} 46 | 47 | \item{plotNodes}{Number of nodes to plot} 48 | 49 | \item{writeOutput}{Logical, if TRUE additionally writes out the result to a file} 50 | 51 | \item{outputFile}{Name of the file the result should be written into} 52 | 53 | \item{topGO_method2}{Character, specifying which of the methods implemented by \code{topGO} should be used, in addition to the \code{classic} algorithm. Defaults to \code{elim}} 54 | 55 | \item{do_padj}{Logical, whether to perform the adjustment on the p-values from the specific 56 | topGO method, based on the FDR correction. Defaults to FALSE, since the assumption of 57 | independent hypotheses is somewhat violated by the intrinsic DAG-structure of the Gene 58 | Ontology Terms} 59 | } 60 | \value{ 61 | A table containing the computed GO Terms and related enrichment scores 62 | } 63 | \description{ 64 | A wrapper for extracting functional GO terms enriched in the DE genes, based on 65 | the algorithm and the implementation in the topGO package 66 | } 67 | \details{ 68 | Allowed values assumed by the \code{topGO_method2} parameter are one of the 69 | following: \code{elim}, \code{weight}, \code{weight01}, \code{lea}, 70 | \code{parentchild}. For more details on this, please refer to the original 71 | documentation of the \code{topGO} package itself 72 | } 73 | \examples{ 74 | library("airway") 75 | library("DESeq2") 76 | data("airway", package = "airway") 77 | airway 78 | dds_airway <- DESeqDataSet(airway, design= ~ cell + dex) 79 | # Example, performing extraction of enriched functional categories in 80 | # detected significantly expressed genes 81 | \dontrun{ 82 | dds_airway <- DESeq(dds_airway) 83 | res_airway <- results(dds_airway) 84 | library("AnnotationDbi") 85 | library("org.Hs.eg.db") 86 | res_airway$symbol <- mapIds(org.Hs.eg.db, 87 | keys = row.names(res_airway), 88 | column = "SYMBOL", 89 | keytype = "ENSEMBL", 90 | multiVals = "first") 91 | res_airway$entrez <- mapIds(org.Hs.eg.db, 92 | keys = row.names(res_airway), 93 | column = "ENTREZID", 94 | keytype = "ENSEMBL", 95 | multiVals = "first") 96 | resOrdered <- as.data.frame(res_airway[order(res_airway$padj),]) 97 | de_df <- resOrdered[resOrdered$padj < .05 & !is.na(resOrdered$padj),] 98 | de_symbols <- de_df$symbol 99 | bg_ids <- rownames(dds_airway)[rowSums(counts(dds_airway)) > 0] 100 | bg_symbols <- mapIds(org.Hs.eg.db, 101 | keys = bg_ids, 102 | column = "SYMBOL", 103 | keytype = "ENSEMBL", 104 | multiVals = "first") 105 | library(topGO) 106 | topgoDE_airway <- topGOtable(de_symbols, bg_symbols, 107 | ontology = "BP", 108 | mapping = "org.Hs.eg.db", 109 | geneID = "symbol") 110 | } 111 | 112 | } 113 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(pcaExplorer) 3 | 4 | test_check("pcaExplorer") 5 | -------------------------------------------------------------------------------- /tests/testthat/setuptests_pcaExplorer.R: -------------------------------------------------------------------------------- 1 | suppressPackageStartupMessages({ 2 | library("DESeq2") 3 | library("SummarizedExperiment") 4 | library("airway") 5 | library("AnnotationDbi") 6 | library("org.Hs.eg.db") 7 | }) 8 | 9 | # prepping the test datasets only once ----------------------------------------- 10 | dds <- makeExampleDESeqDataSet(n = 1000, m = 8) 11 | rlt <- rlogTransformation(dds) 12 | cm <- counts(dds) 13 | cd <- colData(dds) 14 | 15 | 16 | dds_multifac <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1) 17 | rlt_multifac <- rlogTransformation(dds_multifac) 18 | 19 | 20 | data("airway", package = "airway") 21 | dds_airway <- DESeqDataSet(airway, design = ~ cell + dex) 22 | dds_airway <- DESeq(dds_airway) 23 | res_airway <- results(dds_airway) 24 | 25 | rld_airway <- rlogTransformation(dds_airway) 26 | 27 | res_airway$symbol <- mapIds(org.Hs.eg.db, 28 | keys = row.names(res_airway), 29 | column = "SYMBOL", 30 | keytype = "ENSEMBL", 31 | multiVals = "first") 32 | res_airway$entrez <- mapIds(org.Hs.eg.db, 33 | keys = row.names(res_airway), 34 | column = "ENTREZID", 35 | keytype = "ENSEMBL", 36 | multiVals = "first") 37 | -------------------------------------------------------------------------------- /tests/testthat/test_annotations.R: -------------------------------------------------------------------------------- 1 | test_that("Annotation data frame retrieval", { 2 | anno_df <- get_annotation_orgdb(dds_airway, "org.Hs.eg.db", "ENSEMBL") 3 | 4 | expect_s3_class(anno_df, "data.frame") 5 | expect_true(all(dim(anno_df) == c(63677, 2))) 6 | }) -------------------------------------------------------------------------------- /tests/testthat/test_correlatePCs.R: -------------------------------------------------------------------------------- 1 | test_that("Correlation of the PCs", { 2 | pcaobj <- prcomp(t(assay(rlt_multifac))) 3 | res <- correlatePCs(pcaobj, colData(dds_multifac)) 4 | 5 | expect_equal(dim(res), c(4, 2)) 6 | expect_equal(colnames(res), colnames(colData(dds_multifac))) 7 | 8 | plotPCcorrs(res) 9 | 10 | plotPCcorrs(res, logp = FALSE, pc = 2) 11 | }) 12 | 13 | -------------------------------------------------------------------------------- /tests/testthat/test_hiloadings.R: -------------------------------------------------------------------------------- 1 | test_that("Check that genes with hi loadings are extracted", { 2 | pcaobj <- prcomp(t(SummarizedExperiment::assay(rlt))) 3 | anno <- data.frame(gene_id = rownames(dds), 4 | gene_name = toupper(rownames(dds)), 5 | stringsAsFactors = FALSE, 6 | row.names = rownames(dds)) 7 | 8 | hi_loadings(pcaobj, 1) 9 | 10 | expect_is(hi_loadings(pcaobj, 1, exprTable = counts(dds)), "matrix") 11 | expect_true( 12 | all(rownames(hi_loadings(pcaobj, 1, exprTable = counts(dds), annotation = NULL)) %in% rownames(dds)) 13 | ) 14 | expect_true( 15 | all(rownames(hi_loadings(pcaobj, 1, exprTable = counts(dds), annotation = anno)) %in% anno$gene_name) 16 | ) 17 | }) 18 | 19 | -------------------------------------------------------------------------------- /tests/testthat/test_makeDS.R: -------------------------------------------------------------------------------- 1 | test_that("Check that example dds is generated correctly", { 2 | dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1) 3 | 4 | expect_equal(names(colData(dds)), c("condition", "tissue")) 5 | }) 6 | -------------------------------------------------------------------------------- /tests/testthat/test_moreplots.R: -------------------------------------------------------------------------------- 1 | test_that("Gene profiler does its job properly", { 2 | geneprofiler(rlt, paste0("gene", sample(1:1000, 20))) 3 | 4 | expect_error( 5 | expect_message( 6 | geneprofiler(rlt, "gene_fake") 7 | ) 8 | ) 9 | }) 10 | 11 | test_that("Distribution of expression", { 12 | p <- distro_expr(rlt) 13 | expect_true(is(p, "gg")) 14 | }) 15 | 16 | test_that("Correlation scatter plot matrix works", { 17 | pair_corr(counts(dds)[1:100, ]) 18 | expect_error(pair_corr(dds)) 19 | }) 20 | -------------------------------------------------------------------------------- /tests/testthat/test_pca2go.R: -------------------------------------------------------------------------------- 1 | test_that("Checks on the functional enrichment of subset of genes/genes with hi loadings",{ 2 | resOrdered <- as.data.frame(res_airway[order(res_airway$padj), ]) 3 | de_df <- resOrdered[resOrdered$padj < .05 & !is.na(resOrdered$padj), ] 4 | de_symbols <- de_df$symbol 5 | bg_ids <- rownames(dds_airway)[rowSums(counts(dds_airway)) > 0] 6 | bg_symbols <- mapIds(org.Hs.eg.db, 7 | keys = bg_ids, 8 | column = "SYMBOL", 9 | keytype = "ENSEMBL", 10 | multiVals = "first") 11 | library(topGO) 12 | 13 | expect_is(de_symbols, "character") 14 | expect_is(bg_symbols, "character") 15 | 16 | # topgoDE_airway <- topGOtable(de_symbols, bg_symbols, 17 | # ontology = "BP", 18 | # mapping = "org.Hs.eg.db", 19 | # geneID = "symbol") 20 | # 21 | # expect_is(topgoDE_airway,"data.frame") 22 | ngenes_pca <- 500 23 | 24 | goquick_airway <- limmaquickpca2go(rld_airway, 25 | pca_ngenes = ngenes_pca, 26 | inputType = "ENSEMBL", 27 | organism = "Hs") 28 | 29 | expect_type(goquick_airway, "list") 30 | expect_equal(length(goquick_airway), 4) 31 | sapply(goquick_airway, names) 32 | expect_equal(attr(goquick_airway, "n_genesforpca"), ngenes_pca) 33 | 34 | expect_error( 35 | expect_warning( 36 | limmaquickpca2go(rld_airway, 37 | pca_ngenes = ngenes_pca, 38 | inputType = "ENSEMBL", 39 | organism = "foo") 40 | ) 41 | ) # additionally throws a warning 42 | }) 43 | -------------------------------------------------------------------------------- /tests/testthat/test_pcagenes.R: -------------------------------------------------------------------------------- 1 | test_that("Checks on the pca on the genes", { 2 | groups <- colData(dds_multifac)$condition 3 | cols <- scales::hue_pal()(2)[groups] 4 | p <- genespca(rlt_multifac, ntop = 100, arrowColors = cols, groupNames = groups) 5 | 6 | expect_true(is(p, "gg")) 7 | 8 | dat <- genespca(rlt_multifac, ntop = 100, arrowColors = cols, groupNames = groups, returnData = TRUE) 9 | 10 | p2 <- genespca(rlt_multifac, ntop = 100) 11 | p3 <- genespca(rlt_multifac, ntop = 100, arrowColors = "green") 12 | 13 | expect_error(genespca(rlt_multifac, ntop = 100, arrowColors = c("green", "red"))) 14 | 15 | groups_multi <- interaction(as.data.frame(colData(rlt_multifac)[, c("condition", "tissue")])) 16 | cols_multi <- scales::hue_pal()(length(levels(groups_multi)))[factor(groups_multi)] 17 | p4 <- genespca(rlt_multifac, ntop = 100, arrowColors = cols_multi, groupNames = groups_multi) 18 | 19 | expect_true(is(p4, "gg")) 20 | }) 21 | 22 | -------------------------------------------------------------------------------- /tests/testthat/test_pcasamples.R: -------------------------------------------------------------------------------- 1 | test_that("Checks on the pca on the samples", { 2 | pcaobj <- prcomp(t(assay(rlt_multifac))) 3 | 4 | colData(dds_multifac) 5 | 6 | pcaplot(rlt_multifac) 7 | dat <- pcaplot(rlt_multifac, returnData = TRUE) 8 | 9 | p <- pcaplot(rlt_multifac, intgroup = c("condition", "tissue")) 10 | expect_true(is(p, "gg")) 11 | 12 | expect_message({ 13 | p_def <- pcaplot(rlt_multifac) 14 | expect_true(is(p_def, "gg")) 15 | }, "Defaulting to 'condition'") 16 | 17 | expect_error({ 18 | rlt_nocoldata <- rlt_multifac 19 | colData(rlt_nocoldata)$condition <- NULL 20 | colData(rlt_nocoldata)$tissue <- NULL 21 | colData(rlt_nocoldata)$sizeFactor <- NULL 22 | 23 | pcaplot(rlt_nocoldata) 24 | }, 25 | "No colData has been provided") 26 | 27 | dat <- pcaplot(rlt_multifac, intgroup = c("condition", "tissue"), returnData = TRUE) 28 | 29 | expect_error(pcaplot(rlt_multifac, intgroup = "foo")) 30 | 31 | p2 <- pcascree(pcaobj) 32 | expect_true(is(p2, "gg")) 33 | p3 <- pcascree(pcaobj, type = "cev") 34 | expect_true(is(p3, "gg")) 35 | expect_error(pcascree(pcaobj, type = "foo")) 36 | }) 37 | -------------------------------------------------------------------------------- /tests/testthat/test_shiny.R: -------------------------------------------------------------------------------- 1 | test_that("Shiny app is generated", { 2 | expect_is(pcaExplorer(), "shiny.appobj") 3 | 4 | expect_is(pcaExplorer(dds, rlt), "shiny.appobj") 5 | 6 | expect_is(pcaExplorer(countmatrix = cm, coldata = cd), "shiny.appobj") 7 | 8 | expect_is(pcaExplorer(dds = dds), "shiny.appobj") 9 | }) 10 | -------------------------------------------------------------------------------- /vignettes/newsnap_01_upload.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/newsnap_01_upload.png -------------------------------------------------------------------------------- /vignettes/newsnap_02_instructions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/newsnap_02_instructions.png -------------------------------------------------------------------------------- /vignettes/newsnap_03_countstable.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/newsnap_03_countstable.png -------------------------------------------------------------------------------- /vignettes/newsnap_04_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/newsnap_04_overview.png -------------------------------------------------------------------------------- /vignettes/newsnap_05_samples.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/newsnap_05_samples.png -------------------------------------------------------------------------------- /vignettes/newsnap_06_genes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/newsnap_06_genes.png -------------------------------------------------------------------------------- /vignettes/newsnap_07_finder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/newsnap_07_finder.png -------------------------------------------------------------------------------- /vignettes/newsnap_08_pca2go.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/newsnap_08_pca2go.png -------------------------------------------------------------------------------- /vignettes/newsnap_09_multifac.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/newsnap_09_multifac.png -------------------------------------------------------------------------------- /vignettes/newsnap_10_editor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/newsnap_10_editor.png -------------------------------------------------------------------------------- /vignettes/newsnap_11_about.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/newsnap_11_about.png -------------------------------------------------------------------------------- /vignettes/unr_00_demo_loaded.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/unr_00_demo_loaded.png -------------------------------------------------------------------------------- /vignettes/unr_01_splom.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/unr_01_splom.png -------------------------------------------------------------------------------- /vignettes/unr_02_sts_heatmap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/unr_02_sts_heatmap.png -------------------------------------------------------------------------------- /vignettes/unr_03_summary_counts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/unr_03_summary_counts.png -------------------------------------------------------------------------------- /vignettes/unr_04a_samplespca.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/unr_04a_samplespca.png -------------------------------------------------------------------------------- /vignettes/unr_04b_samples_dex.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/unr_04b_samples_dex.png -------------------------------------------------------------------------------- /vignettes/unr_05_loadings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/unr_05_loadings.png -------------------------------------------------------------------------------- /vignettes/unr_06a_genefinder_dusp1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/unr_06a_genefinder_dusp1.png -------------------------------------------------------------------------------- /vignettes/unr_06b_genefinder_per1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/unr_06b_genefinder_per1.png -------------------------------------------------------------------------------- /vignettes/unr_06c_genefinder_ddx3y.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/unr_06c_genefinder_ddx3y.png -------------------------------------------------------------------------------- /vignettes/unr_06c_genefinder_ddx3y_dex.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/unr_06c_genefinder_ddx3y_dex.png -------------------------------------------------------------------------------- /vignettes/unr_07_genespca.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/unr_07_genespca.png -------------------------------------------------------------------------------- /vignettes/unr_08_pca2go_topgo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/unr_08_pca2go_topgo.png -------------------------------------------------------------------------------- /vignettes/unr_90_exitsave.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/unr_90_exitsave.png -------------------------------------------------------------------------------- /vignettes/unr_99_editreport.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/unr_99_editreport.png -------------------------------------------------------------------------------- /vignettes/upandrunning.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: > 3 | Up and running with `r Biocpkg("pcaExplorer")` 4 | author: 5 | - name: Federico Marini 6 | affiliation: 7 | - &id1 Institute of Medical Biostatistics, Epidemiology and Informatics (IMBEI), Mainz 8 | - Center for Thrombosis and Hemostasis (CTH), Mainz 9 | email: marinif@uni-mainz.de 10 | - name: Harald Binder 11 | affiliation: *id1 12 | date: "`r BiocStyle::doc_date()`" 13 | package: "`r BiocStyle::pkg_ver('pcaExplorer')`" 14 | output: 15 | BiocStyle::html_document: 16 | toc_float: true 17 | fig_caption: TRUE 18 | vignette: > 19 | %\VignetteIndexEntry{Up and running with pcaExplorer} 20 | %\VignetteEngine{knitr::rmarkdown} 21 | %\VignetteEncoding{UTF-8} 22 | %\VignettePackage{pcaExplorer} 23 | %\VignetteKeywords{Visualization, RNASeq, DimensionReduction, PrincipalComponent, QualityControl, GUI, ReportWriting} 24 | --- 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | ```{r} 38 | knitr::opts_chunk$set(crop = NULL) 39 | ``` 40 | 41 | # Setup 42 | 43 | First things first: install `r Biocpkg("pcaExplorer")` and load it into your R session. 44 | You should receive a message notification if this is completed without errors. 45 | 46 | ```{r eval=FALSE} 47 | BiocManager::install("pcaExplorer") 48 | library("pcaExplorer") 49 | ``` 50 | 51 | This document describes a use case for `r Biocpkg("pcaExplorer")`, based on the dataset in the `r Biocpkg("airway")` package. 52 | If this package is not available on your machine, please install it by executing: 53 | 54 | ```{r eval=FALSE} 55 | BiocManager::install("airway") 56 | ``` 57 | 58 | This dataset consists of the gene-level expression measurements (as raw read counts) for an experiment where four different human airway smooth muscle cell lines are either treated with dexamethasone or left untreated. 59 | 60 | # Start exploring - the beauty of interactivity 61 | 62 | To start the exploration, you just need the following lines: 63 | 64 | ```{r, eval=FALSE} 65 | library("pcaExplorer") 66 | pcaExplorer() 67 | ``` 68 | 69 | The easiest way to explore the `r Biocpkg("airway")` dataset is by clicking on the dedicated button in the **Data Upload** panel. 70 | This action will: 71 | 72 | - load the `r Biocpkg("airway")` package 73 | - load the count matrix and the experimental metadata 74 | - compose the `dds` object, normalize the expression values (using the robust method proposed by Anders and Huber in the original DESeq manuscript), and compute the variance stabilizing transformed expression values (stored in the `dst` object) 75 | - retrieve the gene annotation information via the `r Biocpkg("org.Hs.eg.db")`, adding gene symbols to the ENSEMBL ids - this step is optional, but recommended for more human-readable identifiers to be used. 76 | 77 | If you want to load your expression data, please refer to the `r Biocpkg("pcaExplorer", vignette="pcaExplorer.html", label="User Guide")`, which contains detailed information on the formats your data have to respect. 78 | 79 | Once the preprocessing of the input is done, you should get a notification in the lower right corner that you're all set. 80 | The whole preprocessing should take around 5-6 seconds (tested on a MacBook Pro, with i7 and 16 Gb RAM). 81 | You can check how each component looks like by clicking on its respective button, once they appeared in the lower half of the panel. 82 | 83 | ```{r ss00, echo=FALSE, fig.cap="Overview of the Data Upload panel. After clicking on the 'Load the demo airway data' button, all widgets are automatically populated, and each data component (count matrix, experimental data, dds object, annotation) can be previewed in a modal window by clicking on its respective button."} 84 | knitr::include_graphics("unr_00_demo_loaded.png") 85 | ``` 86 | 87 | You can proceed to explore the expression values of your dataset in the **Counts Table** tab. 88 | You can change the data type you are displaying between raw counts, normalized, or transformed, and plot their values in a scatterplot matrix to explore their sample-to-sample correlations. 89 | To try this, select for example "Normalized counts", change the correlation coefficient to "spearman", and click on the `Run` action button. 90 | The correlation values will also be displayed as a heatmap. 91 | 92 | ```{r ss01, echo=FALSE, fig.cap="Screenshot of the sample to sample scatter plot matrix. The user can select the correlation method to use, the option to plot values on log2 scales, and the possibility to use a subset of genes (to obtain a quicker overview if many samples are provided)."} 93 | knitr::include_graphics("unr_01_splom.png") 94 | ``` 95 | 96 | Additional features, both for samples and for features, are displayed in the **Data overview** panel. 97 | A closer look at the metadata of the `airway` set highlights how each combination of cell type (`cell`) and dexamethasone treatment (`dex`) is represented by a single sequencing experiment. 98 | The 8 samples in the demo dataset are themselves a subsample of the [full GEO record](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE52778), namely the ones non treated with albuterol (`alb` column). 99 | 100 | The relationship among samples can be seen in the sample-to-sample heatmap. 101 | For example, by selecting the Manhattan distance metric, it is evident how the samples cluster by dex treatment, yet they show a dendrogram structure that recalls the 4 different cell types used. 102 | The total sum of counts per sample is displayed as a bar plot. 103 | 104 | ```{r ss02, echo=FALSE, fig.cap="Screenshot of the sample to sample heatmap. Selected is the Manhattan distance, but Euclidean and correlation-based distance are also provided as options. In this case, the user has also selected the dex and cell factors in the 'Group/color by' widget in the sidebar menu, and these covariates decorate the heatmap to facilitate identification of patterns."} 105 | knitr::include_graphics("unr_02_sts_heatmap.png") 106 | ``` 107 | 108 | Patterns can become clearer after selecting, in the **App settings** on the left, an experimental factor to group and color by: try selecting `dex`, for example. 109 | If more than one covariate is selected, the interaction between these will be taken as a grouping factor. 110 | To remove one, simply click on it to highlight and press the del or backspace key to delete it. 111 | Try doing so by also clicking on `cell`, and then removing `dex` afterwards. 112 | 113 | Basic summary information is also displayed for the genes. 114 | In the count matrix provided, one can check how many genes were detected, by selecting a "Threshold on the row sums of the counts" or on the row means of the normalized counts (more stringent). 115 | For example, selecting 5 in both cases, only 24345 genes have a total number of counts, summed by row, and 17745 genes have more than 5 counts (normalized) on average. 116 | 117 | ```{r ss03, echo=FALSE, fig.cap="Screenshot of the Basic Summary of the counts in the Data Overview panel. General information are provided, together with an overview on detected genes according to different filtering criteria."} 118 | knitr::include_graphics("unr_03_summary_counts.png") 119 | ``` 120 | 121 | The **Samples View** and the **Genes View** are the tabs where most results coming from Principal Component Analysis, either performed on the samples or on the genes, can be explored in depth. 122 | Assuming you selected `cell` in the "Group/color by" option on the left, the Samples PCA plot should clearly display how the cell type explain a considerable portion of the variability in the dataset (corresponding to the second PC). 123 | To check that `dex` treatment is the main source of variability, select that instead of `cell`. 124 | 125 | ```{r ss04a, echo=FALSE, fig.cap="The Samples View panel. Displayed are a PCA plot (left) and the corresponding scree plot (right), with the samples colored and labeled by cell type - separating on the second principal component."} 126 | knitr::include_graphics("unr_04a_samplespca.png") 127 | ``` 128 | 129 | The scree plot on the right shows how many components should be retained for a satisfactory reduced dimension view of the original set, with their eigenvalues from largest to smallest. 130 | To explore the PCs other than the first and the second one, you can just select them in the x-axis PC and y-axis PC widgets in the left sidebar. 131 | 132 | ```{r ss04b, echo=FALSE, fig.cap="PCA plot for the samples, colored by dexamethasone treatment. The dex factor is the main driver of the variability in the data, and samples separate nicely on the first principal component."} 133 | knitr::include_graphics("unr_04b_samples_dex.png") 134 | ``` 135 | 136 | If you brush (left-click and hold) on the PCA plot, you can display a zoomed version of it in the frame below. 137 | If you suspect some samples might be outliers (this is not the case in the `airway` set, still), you can select them in the dedicated plot, and give a first check on how the remainder of the samples would look like. 138 | On the right side, you can quickly check which genes show the top and bottom loadings, split by principal component. 139 | First, change the value in the input widget to 20; then, select one of each list and try to check them in the **Gene Finder** tab; try for example with *DUSP1*, *PER1*, and *DDX3Y*. 140 | 141 | ```{r ss05, echo=FALSE, fig.cap="Genes with highest loadings on the first and second principal components. The user can select how many top and bottom genes will be displayed, and the gene names are printed below each gene's contribution on each PC."} 142 | knitr::include_graphics("unr_05_loadings.png") 143 | ``` 144 | 145 | While *DUSP1* and *PER1* clearly show a change in expression upon dexamethasone treatment (and indeed where reported among the well known glucocorticoid-responsive genes in the original publication of Himes et al., 2014), *DDX3Y* displays variability at the cell type level (select `cell` in the Group/color by widget): this gene is almost undetected in N061011 cells, and this high variance is what determines its high loading on the second principal component. 146 | 147 | ```{r ss06a, echo=FALSE, fig.cap="Plot of the gene expression levels of DUSP1. Points are split according to dex treatment, and both graphics and table are displayed."} 148 | knitr::include_graphics("unr_06a_genefinder_dusp1.png") 149 | ``` 150 | 151 | ```{r ss06b, echo=FALSE, fig.cap="Plot of the gene expression levels of PER1. Points are split according to dex treatment."} 152 | knitr::include_graphics("unr_06b_genefinder_per1.png") 153 | ``` 154 | 155 | ```{r ss06c, echo=FALSE, fig.cap="Plot of the gene expression levels of DDX3Y. Points are split according to cell type, as this gene was highly variable across this experimental factor - indeed, in one cell type it is barely detected."} 156 | knitr::include_graphics("unr_06c_genefinder_ddx3y.png") 157 | ``` 158 | 159 | You can see the single expression values in a table as well, and this information can be downloaded with a simple click. 160 | 161 | Back to the **Samples View**, you can experiment with the number of top variable genes to see how the results of PCA are in this case robust to a wide range of this value - this might not be the case with other datasets, and the simplicity of interacting with these parameters makes it easy to iterate in the exploration steps. 162 | 163 | Proceeding to the **Genes View**, you can see the dual of the Samples PCA: now the samples are displayed as arrows in the genes biplot, which can show which genes display a similar behaviour. 164 | You can capture this with a simple brushing action on the plot, and notice how their profiles throughout all samples are shown in the Profile explorer below; moreover, a static and an interactive heatmap, together with a table containing the underlying data, are generated in the rows below. 165 | 166 | ```{r ss07, echo=FALSE, fig.cap="The Genes View panel. Upper panel: the genes biplot, and its zoomed plot, with gene names displayed. Lower panel: the profile explorer of the selected subset of genes (corresponding to the zoomed window), and the boxplot for the gene selected by clicking close to a location in the zoomed window."} 167 | knitr::include_graphics("unr_07_genespca.png") 168 | ``` 169 | 170 | Since we compute the gene annotation table as well, it's nice to read the gene symbols in the zoomed window (instead of the ENSEMBL ids). 171 | By clicking close enough to any of these genes, the expression values are plotted, in a similar fashion as in the **Gene Finder**. 172 | 173 | The tab **PCA2GO** helps you understanding which are the biological common themes (default: the Gene Ontology Biological Process terms) in the genes showing up in the top and in the bottom loadings for each principal component. 174 | Since we launched the `pcaExplorer` app without additional parameters, this information is not available, but can be computed live (this might take a while). 175 | 176 | ```{r ss08, echo=FALSE, fig.cap="The PCA2GO panel. Four tables (2 per dimension, here only 3 are displayed) decorate the PCA plot in the middle, and display the top enriched functional categories in each subset of gene with high loadings."} 177 | knitr::include_graphics("unr_08_pca2go_topgo.png") 178 | ``` 179 | 180 | Still, a previous call to `pca2go` is recommended, as it relies on the algorithm of the `r Biocpkg("topGO")` package: it will require some additional computing time, but it is likely to deliver more precise terms (i.e. in turn more relevant from the point of view of their biological relevance). To do so, you should exit the live session, compute this object, and provide it in the call to `pcaExplorer` (see more how to do so in `r Biocpkg("pcaExplorer", vignette="pcaExplorer.html", label="the main user guide")`). 181 | 182 | # When you're done - the power of reproducibility 183 | 184 | A typical session with `pcaExplorer` includes one or more iterations on each of these tabs. 185 | Once you are finished, you might want to store the results of your analysis in different formats. 186 | 187 | ```{r ss90, echo=FALSE, fig.cap="The pcaExplorer task menu. Buttons for saving the session to binary data or to a dedicated environment are displayed.",out.width="80%"} 188 | knitr::include_graphics("unr_90_exitsave.png") 189 | ``` 190 | 191 | With `pcaExplorer` you can do all of the following: 192 | 193 | - save every plot and table by simply clicking on the respective button, below each element 194 | - save the state of the entire app and its reactive elements as a binary `.RData` file, as if it was a workspace (clicking on the cog icon in the right side of the task menu) 195 | - use the "Exit `pcaExplorer` and save" saves the state but in a specific environment of your R session, which you can later access by its name, which normally could look like `pcaExplorerState_YYYYMMDD_HHMMSS` (also accessible from the cog) 196 | - enjoy the beauty of reproducible research in the **Report Editor**: `pcaExplorer` comes with a template analysis, that picks the latest status of the app during your session, and combines these reactive values together in a R Markdown document, which you can first preview live in the app, and then download as standalone HTML file - to store or share. 197 | This document stiches together narrative text, code, and output objects, and constitutes a compendium where all actions are recorded. 198 | If you are familiar with R, you can edit that live, with support for autocompletion, in the "Edit report" tab. 199 | 200 | ```{r ss99, echo=FALSE, fig.cap="The Report Editor tab. The collapsible elements control general markdown and editor options, which are regarded when the report is compiled. Its content is specified in the Ace editor, integrated in the Shiny app."} 201 | knitr::include_graphics("unr_99_editreport.png") 202 | ``` 203 | 204 | The functionality to display the report preview is based on `knit2html`, and some elements such as `DataTable` objects might not render correctly. 205 | To render them correctly, please install the PhantomJS executable before launching the app. 206 | This can be done by using the `r CRANpkg("webshot")` package and calling `webshot::install_phantomjs()` - HTML widgets will be rendered automatically as screenshots. 207 | Alternatively, the more recent `r BiocStyle::CRANpkg("webshot2")` package uses the headless Chrome browser (via the `r BiocStyle::CRANpkg("chromote")` package, requiring Google Chrome or other Chromium-based browser). 208 | Keep in mind that the fully rendered report (the one you can obtain with the "Generate & Save" button) is not affected by this, since it uses `rmarkdown::render()`. 209 | 210 | # Session Info {.unnumbered} 211 | 212 | ```{r sessioninfo} 213 | sessionInfo() 214 | ``` 215 | 216 | --------------------------------------------------------------------------------