├── .Rbuildignore
├── .github
    └── workflows
    │   └── R-CMD-check.yaml
├── CODE_OF_CONDUCT.md
├── DESCRIPTION
├── LICENSE
├── LICENSE.md
├── NAMESPACE
├── NEWS.md
├── R
    ├── correlatePCs.R
    ├── deprecated.R
    ├── distro_expr.R
    ├── genespca.R
    ├── get_annotation.R
    ├── hi_loadings.R
    ├── makeds.R
    ├── pair_corr.R
    ├── pca2go.R
    ├── pcaExplorer-pkg.R
    ├── pcaExplorer.R
    ├── pcaplot.R
    ├── profile_explore.R
    └── zzz.R
├── README.md
├── _pkgdown.yml
├── inst
    ├── CITATION
    ├── extdata
    │   ├── about.md
    │   ├── datainput.md
    │   ├── instructions.md
    │   ├── instructions_unr.md
    │   └── reportTemplate.Rmd
    └── www
    │   ├── help_dataformats.png
    │   └── pcaExplorer.png
├── man
    ├── correlatePCs.Rd
    ├── deprecated.Rd
    ├── distro_expr.Rd
    ├── figures
    │   └── pcaExplorer.png
    ├── geneprofiler.Rd
    ├── genespca.Rd
    ├── get_annotation.Rd
    ├── get_annotation_orgdb.Rd
    ├── hi_loadings.Rd
    ├── limmaquickpca2go.Rd
    ├── makeExampleDESeqDataSet_multifac.Rd
    ├── pair_corr.Rd
    ├── pca2go.Rd
    ├── pcaExplorer-pkg.Rd
    ├── pcaExplorer.Rd
    ├── pcaplot.Rd
    ├── pcaplot3d.Rd
    ├── pcascree.Rd
    ├── plotPCcorrs.Rd
    └── topGOtable.Rd
├── tests
    ├── testthat.R
    └── testthat
    │   ├── setuptests_pcaExplorer.R
    │   ├── test_annotations.R
    │   ├── test_correlatePCs.R
    │   ├── test_hiloadings.R
    │   ├── test_makeDS.R
    │   ├── test_moreplots.R
    │   ├── test_pca2go.R
    │   ├── test_pcagenes.R
    │   ├── test_pcasamples.R
    │   └── test_shiny.R
└── vignettes
    ├── newsnap_01_upload.png
    ├── newsnap_02_instructions.png
    ├── newsnap_03_countstable.png
    ├── newsnap_04_overview.png
    ├── newsnap_05_samples.png
    ├── newsnap_06_genes.png
    ├── newsnap_07_finder.png
    ├── newsnap_08_pca2go.png
    ├── newsnap_09_multifac.png
    ├── newsnap_10_editor.png
    ├── newsnap_11_about.png
    ├── pcaExplorer.Rmd
    ├── unr_00_demo_loaded.png
    ├── unr_01_splom.png
    ├── unr_02_sts_heatmap.png
    ├── unr_03_summary_counts.png
    ├── unr_04a_samplespca.png
    ├── unr_04b_samples_dex.png
    ├── unr_05_loadings.png
    ├── unr_06a_genefinder_dusp1.png
    ├── unr_06b_genefinder_per1.png
    ├── unr_06c_genefinder_ddx3y.png
    ├── unr_06c_genefinder_ddx3y_dex.png
    ├── unr_07_genespca.png
    ├── unr_08_pca2go_topgo.png
    ├── unr_90_exitsave.png
    ├── unr_99_editreport.png
    └── upandrunning.Rmd


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^.gitignore$
 4 | ^.travis.yml$
 5 | ^codecov\.yml$
 6 | ^appveyor.yml$
 7 | .circleci
 8 | _pkgdown.yml
 9 | docs/*
10 | ^docs$
11 | ^\.github$
12 | ^CODE_OF_CONDUCT\.md$
13 | ^LICENSE\.md$
14 | ^TODO\.md$
15 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
  1 | on:
  2 |   push:
  3 |   pull_request:
  4 |     branches:
  5 |       - master
  6 |   schedule:
  7 |     - cron: '0 8 * * 5'
  8 | 
  9 | name: R-CMD-check
 10 | 
 11 | jobs:
 12 |   R-CMD-check:
 13 |     runs-on: ${{ matrix.config.os }}
 14 |     container: ${{ matrix.config.image }}
 15 | 
 16 |     name: ${{ matrix.config.os }} (${{ matrix.config.bioc }} - ${{ matrix.config.image }})
 17 | 
 18 |     strategy:
 19 |       fail-fast: false
 20 |       matrix:
 21 |         config:
 22 |         - { os: windows-latest, bioc: 'devel'}
 23 |         - { os: macOS-latest, bioc: 'devel', curlConfigPath: '/usr/bin/'}
 24 |         - { os: ubuntu-latest, bioc: 'devel'}
 25 |         # - { os: ubuntu-latest, bioc: 'devel', cran: "https://demo.rstudiopm.com/all/__linux__/xenial/latest"}
 26 |         - { os: ubuntu-latest, image: 'bioconductor/bioconductor_docker:devel'}
 27 | 
 28 |     env:
 29 |       R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
 30 |       CRAN: ${{ matrix.config.cran }}
 31 |       GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 32 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
 33 |       CURL_CONFIG: ${{ matrix.config.curlConfigPath }}curl-config
 34 | 
 35 |     steps:
 36 |       - name: Check out repo
 37 |         uses: actions/checkout@v2
 38 | 
 39 |       - name: Set up R and install BiocManager
 40 |         uses: grimbough/bioc-actions/setup-bioc@v1
 41 |         if: matrix.config.image == null
 42 |         with:
 43 |           bioc-version: ${{ matrix.config.bioc }}
 44 | 
 45 |       - name: Set up pandoc
 46 |         uses: r-lib/actions/setup-pandoc@v2
 47 |         if: matrix.config.image == null
 48 | 
 49 |       - name: Install remotes
 50 |         run: |
 51 |           install.packages('remotes')
 52 |         shell: Rscript {0}
 53 | 
 54 |       - name: Query dependencies
 55 |         run: |
 56 |           saveRDS(remotes::dev_package_deps(dependencies = TRUE, repos = c(getOption('repos'), BiocManager::repositories())), 'depends.Rds', version = 2)
 57 |         shell: Rscript {0}
 58 | 
 59 |       - name: Cache R packages
 60 |         if: runner.os != 'Windows' && matrix.config.image == null
 61 |         uses: actions/cache@v4
 62 |         with:
 63 |           path: ${{ env.R_LIBS_USER }}
 64 |           key: ${{ runner.os }}-bioc-${{ matrix.config.bioc }}-${{ hashFiles('depends.Rds') }}
 65 |           restore-keys: ${{ runner.os }}-bioc-${{ matrix.config.bioc }}-
 66 | 
 67 |       - name: Install system dependencies
 68 |         if: runner.os == 'Linux'
 69 |         env:
 70 |           RHUB_PLATFORM: linux-x86_64-ubuntu-gcc
 71 |         uses: r-lib/actions/setup-r-dependencies@v2
 72 |         with:
 73 |           extra-packages: any::rcmdcheck
 74 |           pak-version: devel
 75 |           
 76 |       - name: Install system dependencies (macOS)
 77 |         if: runner.os == 'macOS'
 78 |         run: |
 79 |           brew install cairo
 80 |           brew install --cask xquartz
 81 |           brew install harfbuzz
 82 |           brew install fribidi
 83 |           brew install fftw
 84 | 
 85 |       # - name: Set up gfortran symlinks (macOS)
 86 |       #   if: runner.os == 'macOS'
 87 |       #   run: |
 88 |       #     set -x
 89 |       #     sudo ln -s /usr/local/Cellar/gcc@8/8.4.0_2/lib/gcc/8 /usr/local/gfortran/lib
 90 |       #     gfortran --version
 91 | 
 92 |       - name: Install dependencies
 93 |         run: |
 94 |           local_deps <- remotes::local_package_deps(dependencies = TRUE)
 95 |           deps <- remotes::dev_package_deps(dependencies = TRUE, repos = BiocManager::repositories())
 96 |           BiocManager::install(local_deps[local_deps %in% deps$package[deps$diff != 0]], Ncpu = 2L)
 97 |           BiocManager::install(c("GenomeInfoDbData", "GO.db"), Ncpu = 2L)
 98 |           BiocManager::install("markdown")
 99 |           remotes::install_cran('rcmdcheck', Ncpu = 2L)
100 |         shell: Rscript {0}
101 | 
102 |       - name: Session info
103 |         run: |
104 |           options(width = 100)
105 |           pkgs <- installed.packages()[, "Package"]
106 |           sessioninfo::session_info(pkgs, include_base = TRUE)
107 |         shell: Rscript {0}
108 | 
109 |       - name: Build, Install, Check
110 |         uses: grimbough/bioc-actions/build-install-check@v1
111 | 
112 |       - name: Run BiocCheck
113 |         uses: grimbough/bioc-actions/run-BiocCheck@v1
114 |         with:
115 |           arguments: '--no-check-bioc-views --no-check-bioc-help'
116 |           error-on: 'error'
117 | 
118 |       - name: Upload check results
119 |         if: failure()
120 |         uses: actions/upload-artifact@master
121 |         with:
122 |           name: ${{ runner.os }}-results
123 |           path: check
124 | 
125 |       - name: Show testthat output
126 |         if: always()
127 |         run: find check -name 'testthat.Rout*' -exec cat '{}' \; || true
128 |         shell: bash
129 | 
130 |       - name: Upload check results
131 |         if: failure()
132 |         uses: actions/upload-artifact@master
133 |         with:
134 |           name: ${{ runner.os }}-bioc-${{ matrix.config.bioc }}-results
135 |           path: check
136 | 
137 |       - name: Test coverage
138 |         if: matrix.config.os == 'macOS-latest'
139 |         run: |
140 |           install.packages("covr")
141 |           covr::codecov(token = "${{secrets.CODECOV_TOKEN}}")
142 |         shell: Rscript {0}
143 | 
144 |       - name: Deploy
145 |         if: github.event_name == 'push' && github.ref == 'refs/heads/devel' && matrix.config.os == 'macOS-latest'
146 |         run: |
147 |           R CMD INSTALL .
148 |           Rscript -e "remotes::install_dev('pkgdown'); pkgdown::deploy_to_branch(new_process = FALSE)"
149 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | # Contributor Covenant Code of Conduct
  2 | 
  3 | ## Our Pledge
  4 | 
  5 | We as members, contributors, and leaders pledge to make participation in our
  6 | community a harassment-free experience for everyone, regardless of age, body
  7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  8 | identity and expression, level of experience, education, socio-economic status,
  9 | nationality, personal appearance, race, religion, or sexual identity and
 10 | orientation.
 11 | 
 12 | We pledge to act and interact in ways that contribute to an open, welcoming,
 13 | diverse, inclusive, and healthy community.
 14 | 
 15 | ## Our Standards
 16 | 
 17 | Examples of behavior that contributes to a positive environment for our
 18 | community include:
 19 | 
 20 | * Demonstrating empathy and kindness toward other people
 21 | * Being respectful of differing opinions, viewpoints, and experiences
 22 | * Giving and gracefully accepting constructive feedback
 23 | * Accepting responsibility and apologizing to those affected by our mistakes,
 24 | and learning from the experience
 25 | * Focusing on what is best not just for us as individuals, but for the overall
 26 | community
 27 | 
 28 | Examples of unacceptable behavior include:
 29 | 
 30 | * The use of sexualized language or imagery, and sexual attention or
 31 | advances of any kind
 32 | * Trolling, insulting or derogatory comments, and personal or political attacks
 33 | * Public or private harassment
 34 | * Publishing others' private information, such as a physical or email
 35 | address, without their explicit permission
 36 | * Other conduct which could reasonably be considered inappropriate in a
 37 | professional setting
 38 | 
 39 | ## Enforcement Responsibilities
 40 | 
 41 | Community leaders are responsible for clarifying and enforcing our standards
 42 | of acceptable behavior and will take appropriate and fair corrective action in
 43 | response to any behavior that they deem inappropriate, threatening, offensive,
 44 | or harmful.
 45 | 
 46 | Community leaders have the right and responsibility to remove, edit, or reject
 47 | comments, commits, code, wiki edits, issues, and other contributions that are
 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 49 | decisions when appropriate.
 50 | 
 51 | ## Scope
 52 | 
 53 | This Code of Conduct applies within all community spaces, and also applies
 54 | when an individual is officially representing the community in public spaces.
 55 | Examples of representing our community include using an official e-mail
 56 | address, posting via an official social media account, or acting as an appointed
 57 | representative at an online or offline event.
 58 | 
 59 | ## Enforcement
 60 | 
 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 62 | reported to the community leaders responsible for enforcement at marinif@uni-mainz.de. 
 63 | All complaints will be reviewed and investigated promptly and fairly.
 64 | 
 65 | All community leaders are obligated to respect the privacy and security of the
 66 | reporter of any incident.
 67 | 
 68 | ## Enforcement Guidelines
 69 | 
 70 | Community leaders will follow these Community Impact Guidelines in determining
 71 | the consequences for any action they deem in violation of this Code of Conduct:
 72 | 
 73 | ### 1. Correction
 74 | 
 75 | **Community Impact**: Use of inappropriate language or other behavior deemed
 76 | unprofessional or unwelcome in the community.
 77 | 
 78 | **Consequence**: A private, written warning from community leaders, providing
 79 | clarity around the nature of the violation and an explanation of why the
 80 | behavior was inappropriate. A public apology may be requested.
 81 | 
 82 | ### 2. Warning
 83 | 
 84 | **Community Impact**: A violation through a single incident or series of
 85 | actions.
 86 | 
 87 | **Consequence**: A warning with consequences for continued behavior. No
 88 | interaction with the people involved, including unsolicited interaction with
 89 | those enforcing the Code of Conduct, for a specified period of time. This
 90 | includes avoiding interactions in community spaces as well as external channels
 91 | like social media. Violating these terms may lead to a temporary or permanent
 92 | ban.
 93 | 
 94 | ### 3. Temporary Ban
 95 | 
 96 | **Community Impact**: A serious violation of community standards, including
 97 | sustained inappropriate behavior.
 98 | 
 99 | **Consequence**: A temporary ban from any sort of interaction or public
100 | communication with the community for a specified period of time. No public or
101 | private interaction with the people involved, including unsolicited interaction
102 | with those enforcing the Code of Conduct, is allowed during this period.
103 | Violating these terms may lead to a permanent ban.
104 | 
105 | ### 4. Permanent Ban
106 | 
107 | **Community Impact**: Demonstrating a pattern of violation of community
108 | standards, including sustained inappropriate behavior, harassment of an
109 | individual, or aggression toward or disparagement of classes of individuals.
110 | 
111 | **Consequence**: A permanent ban from any sort of public interaction within the
112 | community.
113 | 
114 | ## Attribution
115 | 
116 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
117 | version 2.0,
118 | available at <https://www.contributor-covenant.org/version/2/0/code_of_conduct.html>.
119 | 
120 | Community Impact Guidelines were inspired by [Mozilla's code of conduct
121 | enforcement ladder](https://github.com/mozilla/diversity).
122 | 
123 | [homepage]: https://www.contributor-covenant.org
124 | 
125 | For answers to common questions about this code of conduct, see the FAQ at
126 | <https://www.contributor-covenant.org/faq>. Translations are available at <https://www.contributor-covenant.org/translations>.
127 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: pcaExplorer
 2 | Type: Package
 3 | Title: Interactive Visualization of RNA-seq Data Using a Principal Components Approach
 4 | Version: 3.3.0
 5 | Date: 2024-12-19
 6 | Authors@R: c(person("Federico", "Marini", role = c("aut", "cre"), 
 7 |     email ="marinif@uni-mainz.de", 
 8 |     comment = c(ORCID = '0000-0003-3252-7758')))
 9 | Description: This package provides functionality for interactive visualization 
10 |     of RNA-seq datasets based on Principal Components Analysis. The methods provided
11 |     allow for quick information extraction and effective data exploration. A Shiny
12 |     application encapsulates the whole analysis.
13 | License: MIT + file LICENSE
14 | Imports: 
15 |     DESeq2, 
16 |     SummarizedExperiment, 
17 |     mosdef (>= 1.1.0),
18 |     GenomicRanges, 
19 |     IRanges,
20 |     S4Vectors, 
21 |     genefilter, 
22 |     ggplot2 (>= 2.0.0), 
23 |     heatmaply,
24 |     plotly,
25 |     scales,
26 |     NMF, 
27 |     plyr, 
28 |     topGO, 
29 |     limma, 
30 |     GOstats, 
31 |     GO.db, 
32 |     AnnotationDbi, 
33 |     shiny (>= 0.12.0), 
34 |     shinydashboard, 
35 |     shinyBS, 
36 |     ggrepel, 
37 |     DT, 
38 |     shinyAce,
39 |     threejs, 
40 |     biomaRt, 
41 |     pheatmap, 
42 |     knitr, 
43 |     rmarkdown, 
44 |     base64enc,
45 |     tidyr, 
46 |     grDevices, 
47 |     methods
48 | Suggests: 
49 |     testthat, 
50 |     BiocStyle, 
51 |     markdown, 
52 |     airway, 
53 |     org.Hs.eg.db, 
54 |     htmltools
55 | URL: https://github.com/federicomarini/pcaExplorer,
56 |     https://federicomarini.github.io/pcaExplorer/
57 | BugReports: https://github.com/federicomarini/pcaExplorer/issues
58 | biocViews: ImmunoOncology, Visualization, RNASeq, DimensionReduction,
59 |     PrincipalComponent, QualityControl, GUI, ReportWriting, ShinyApps
60 | VignetteBuilder: knitr
61 | RoxygenNote: 7.3.2
62 | Encoding: UTF-8
63 | NeedsCompilation: no
64 | Roxygen: list(markdown = TRUE)
65 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2024
2 | COPYRIGHT HOLDER: pcaExplorer authors
3 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | Copyright (c) 2024 pcaExplorer authors
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(correlatePCs)
 4 | export(distro_expr)
 5 | export(geneprofiler)
 6 | export(genespca)
 7 | export(get_annotation)
 8 | export(get_annotation_orgdb)
 9 | export(hi_loadings)
10 | export(limmaquickpca2go)
11 | export(makeExampleDESeqDataSet_multifac)
12 | export(pair_corr)
13 | export(pca2go)
14 | export(pcaExplorer)
15 | export(pcaplot)
16 | export(pcaplot3d)
17 | export(pcascree)
18 | export(plotPCcorrs)
19 | export(topGOtable)
20 | import(DESeq2)
21 | import(GO.db)
22 | import(SummarizedExperiment)
23 | import(biomaRt)
24 | import(ggplot2)
25 | import(knitr)
26 | import(methods)
27 | import(plyr)
28 | import(rmarkdown)
29 | import(shiny)
30 | import(shinydashboard)
31 | import(threejs)
32 | import(topGO)
33 | importFrom(AnnotationDbi,Term)
34 | importFrom(AnnotationDbi,mapIds)
35 | importFrom(AnnotationDbi,select)
36 | importFrom(DT,datatable)
37 | importFrom(GenomicRanges,GRanges)
38 | importFrom(IRanges,IRanges)
39 | importFrom(NMF,aheatmap)
40 | importFrom(S4Vectors,DataFrame)
41 | importFrom(base64enc,dataURI)
42 | importFrom(genefilter,rowVars)
43 | importFrom(ggrepel,geom_label_repel)
44 | importFrom(grDevices,colorRamp)
45 | importFrom(grDevices,dev.off)
46 | importFrom(grDevices,pdf)
47 | importFrom(grDevices,rainbow)
48 | importFrom(grDevices,rgb)
49 | importFrom(heatmaply,heatmaply)
50 | importFrom(limma,goana)
51 | importFrom(limma,topGO)
52 | importFrom(mosdef,gene_plot)
53 | importFrom(mosdef,run_topGO)
54 | importFrom(pheatmap,pheatmap)
55 | importFrom(plotly,plotlyOutput)
56 | importFrom(plotly,renderPlotly)
57 | importFrom(scales,brewer_pal)
58 | importFrom(scales,hue_pal)
59 | importFrom(shiny,addResourcePath)
60 | importFrom(shinyAce,aceAutocomplete)
61 | importFrom(shinyAce,aceEditor)
62 | importFrom(shinyAce,getAceModes)
63 | importFrom(shinyAce,getAceThemes)
64 | importFrom(shinyAce,updateAceEditor)
65 | importFrom(shinyBS,bsCollapse)
66 | importFrom(shinyBS,bsCollapsePanel)
67 | importFrom(shinyBS,bsTooltip)
68 | importFrom(tidyr,gather)
69 | importMethodsFrom(GOstats,hyperGTest)
70 | importMethodsFrom(GOstats,summary)
71 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
  1 | # pcaExplorer 3.2.0
  2 | 
  3 | ## Other notes
  4 | 
  5 | * Some changes in the source of the documentation, providing anchors to all function calls. This avoids the new note in R CMD check in the new major release
  6 | 
  7 | # pcaExplorer 2.99.0
  8 | 
  9 | ## New features
 10 | 
 11 | * The `pcaplot()` function now provides a clever default for the `intgroup` parameter, if some content (as it should) is provided in the `colData` slot of the main input object
 12 | 
 13 | ## Other notes
 14 | 
 15 | * The transition to the functions available in the `mosdef` Bioconductor is complete, with the original functions now being deprecated. This applies to `topGOtable()` (now replaced by `mosdef::run_topGO()`)
 16 | * The gene plot widgets now also use the `gene_plot()` function from `mosdef`, instead of the previous undocumented internal function
 17 | * The Roxygen-based documentation now supports markdown. No visible changes should appear to the user, as the content should have stayed fairly the same
 18 | * Although no visible changes for the end user are expected, the incoming major version bump will reflect the change in the dependency graph, ensuring that this is noticed at least at the version numbering level
 19 | 
 20 | # pcaExplorer 2.22.0
 21 | 
 22 | ## Other notes
 23 | 
 24 | * `get_annotation_orgdb()` gains an additional argument, `key_for_genenames`, which defaults to "SYMBOL". This should not change the behavior of the function, if not specified, but accommodates for the use of annotation packages where the information has been encoded differently (e.g. org.Sc.sgd.db where the info is contained in the "ORF" column)
 25 | 
 26 | ## Bug fixes
 27 | 
 28 | * `pcaplot` correctly returns the values for the percent of explained variance, which were correctly displayed on the plot but not stored as they should in the attribute slot
 29 | 
 30 | # pcaExplorer 2.20.0
 31 | 
 32 | ## Other notes
 33 | 
 34 | * the tables in the PCA2GO tab panel can be compacted only if they are computed via the `pca2go` function (offline) - at runtime, `limmaquickpca2go` is used and no compaction is required
 35 | * if an annotation is provided with a column `gene_id`, these values are actually overwriting the rownames (makes the object more robust with respect to its provenance)
 36 | 
 37 | # pcaExplorer 2.16.0
 38 | 
 39 | ## Other notes
 40 | 
 41 | * Replaced dependency from `d3heatmap` with the functionality of `heatmaply`
 42 | 
 43 | # pcaExplorer 2.12.0
 44 | 
 45 | ## Bug fixes
 46 | 
 47 | * Fixed an error in the initialization of the app due to a new behavior introduced by `shinyAce` in version >= 0.4.0
 48 | * `topGOtable` does not generate rows with NAs if providing a too high number for the categories to report
 49 | 
 50 | ## Other notes
 51 | 
 52 | * The type of the columns in the data.frame returned by `topGOtable` are now correctly referring to the type they contain - e.g. the p values are now stored as numeric values
 53 | * Citation now refers to the published manuscript - https://doi.org/10.1186/s12859-019-2879-1
 54 | 
 55 | # pcaExplorer 2.10.0
 56 | 
 57 | ## New features
 58 | 
 59 | * Added extra parameters to `topGOtable` to offer more control on the method used, and the option to correct the p-values for multiple testing (via the Benjamini-Yekutieli procedure)
 60 | * `pca2go` has now an option to return (early) a list with vectors of genes with high loadings
 61 | * Better preview of the uploaded data with modal dialog windows triggered by buttons which appear once corresponding inputs are available
 62 | * Improved notification system: in addition to the progress bar, info that all input is correctly specified, or suggest optional annotation loading
 63 | * Added flexibility to select separator for each of the uploadable files
 64 | * The pairwise correlation plots can now use logarithmic scale in the axes, use smaller subsets of the data for quicker inspection, and resizes the correlation info proportionally to its intensity
 65 | * The sample to sample heatmap supports additionally manhattan and correlation-based distances
 66 | * There is a new vignette with a detailed quick start, "Up and running with pcaExplorer", specifying how the workflow with `pcaExplorer` can look like, demonstrated on the `airway` dataset
 67 | * In the Instructions panel, we added buttons to access the fully rendered documentation, either local or online if e.g. deployed to a server. Related to this, `pcaExplorer` has a new parameter, `runLocal`, to control this behavior
 68 | * An additional parameter, `annopkg`, has been added to `pca2go()` to override the behavior with the `organism` parameter (this is useful when the name of the annotation package is not conform to the classical `org.Xx.eg.db`, e.g. for Arabidopsis Thaliana); a detailed use case has been added in the main vignette
 69 | 
 70 | ## Other notes
 71 | 
 72 | * The computing of the required objects now requires the explicit action on the dedicated button, and the tooltip informs the user on what steps are taken (including normalization)
 73 | * An information box has been added to provide detailed information on the required input formats
 74 | * Added notification to specify how to install the airway package for demonstration purposes if not already available
 75 | * Added startup message upon loading the package
 76 | * The content in the Instructions tab is now contained in collapsible elements
 77 | * The file formats accepted by `pcaExplorer` are now specified both in the vignette text, as well as in the app at runtime
 78 | * The content of the Instructions tab is now more compact, containing the rendered "Up and running with pcaExplorer" vignette. The full vignettes can be accessed via buttons in the same panel
 79 | * Added instructions to install phantomJS via the `webshot` package - would raise an error when previewing the report
 80 | 
 81 | # pcaExplorer 2.8.0
 82 | 
 83 | ## New features
 84 | 
 85 | * Added a `NEWS.md` file to track changes to the package
 86 | * PCA plots now are correctly generated with fixed coordinates
 87 | * Introduced use of conditionalPanels for better handling of errors in the app tabs
 88 | * Added possibility to use different transformations, also reflected in the change of one of the main arguments (previously `rlt`, now `dst`, i.e. `DESeqTransform`): rlog, vst, shifted log, ... The transformation type is tracked in the reactive values. 
 89 | * More modular loading of data, by splitting generation of `dds` and `dst`
 90 | * `pca2go` is now also picking values from the input widgets
 91 | 
 92 | ## Other notes
 93 | 
 94 | * Built project website via pkgdown, with customized reference structure
 95 | * Correctly adding the resources to shinyBS, loaded via `.onLoad`, and also better placement for bstooltips
 96 | * Editor options start collapsed in the Report Editor tab
 97 | * Vignette and template report are updated to reflect the new parameter names
 98 | * Uniformed style for ggplot2 plots
 99 | * Better tooltip placement in the main page
100 | * Replaced `print` calls with more appropriate `message`s
101 | * Displaying user returned messages in long (plotting) operations
102 | 
103 | ## Bug fixes
104 | 
105 | * Fixed behavior of rendering inline the content of the report - did not work properly for server deployed instances
106 | 
107 | # pcaExplorer 2.6.0
108 | 
109 | ## New features
110 | 
111 | * Automatically computing size factors where required
112 | * Added progress indication when compiling the report
113 | 
114 | ## Bug fixes
115 | 
116 | * Fixed after changes in threejs package
117 | * Edited dropdown menu to remove unused green badge
118 | * Menus start expanded on the side, again
119 | * `theme_bw` applied when needed, corrected previous behavior
120 | 
121 | ## Other notes
122 | 
123 | * Updated citation infos
124 | * Slight difference in handling validate/need errors
125 | 
126 | # pcaExplorer 2.2.0
127 | 
128 | ## New features
129 | 
130 | * Added Demo data, loadable via demo button
131 | 
132 | ## Bug fixes
133 | 
134 | * Plots work now without cutting out points when zooming in
135 | 
136 | ## Other notes
137 | 
138 | * Saved reactive values are now exported to dedicate environments (instead of assigning to global)
139 | 
140 | # pcaExplorer 1.99.0
141 | 
142 | ## Other notes
143 | 
144 | * Reflecting the major feature added, will trigger a major version number bump. Welcome soon, pcaExplorer 2.0.0!
145 | 
146 | # pcaExplorer 1.1.5
147 | 
148 | ## New features
149 | 
150 | * Automated report generation - template available + editor in the app tab for advance user customization
151 | * Support for state saving, in the global environment as well as with binary data
152 | * All plots generated can be now exported with the dedicated button 
153 | * Added confidence ellipse for PCA plot
154 | * Added 3d pca plot
155 | * Added functions to automatically retrieve the annotation in format ready to use for the app
156 | * Added profile explorer function, for plotting behaviour across all samples for subset of genes
157 | * Added distribution plots
158 | * Added pairwise correlation plot
159 | * Added table to enhance readability of the gene finder plot, also by annotating sample names
160 | 
161 | ## Bug fixes
162 | 
163 | * Minor typos fixed in the tabs
164 | * Added option row.names to read.delim for allowing row names when uploading the data
165 | 
166 | ## Other notes
167 | 
168 | * Added extra info in the about section
169 | * Instructions and vignette rewritten to reflect new design of the app
170 | 
171 | # pcaExplorer 1.1.3
172 | 
173 | ## Bug fixes
174 | 
175 | * Remove y axis limits to gene boxplots
176 | * Fixed: correct labels and colors assignements for genespca
177 | 
178 | # pcaExplorer 1.0.0
179 | 
180 | ## Other notes
181 | 
182 | * Released in Bioconductor 3.3
183 | 
184 | # pcaExplorer 0.99.1
185 | 
186 | ## Other notes
187 | 
188 | * Changed format of the NEWS file
189 | 
190 | # pcaExplorer 0.99.0
191 | 
192 | ## Other notes
193 | 
194 | * Ready for submission to Bioconductor
195 | 
196 | # pcaExplorer 0.9.0
197 | 
198 | ## Other notes
199 | 
200 | * Added TravisCI integration for both branches
201 | * Added appveyor integration - plus badges in the README.md
202 | * Code cleanup
203 | * Added screenshots for the vignette
204 | * Removed some lengthy tests
205 | 
206 | # pcaExplorer 0.8.0
207 | 
208 | ## New features
209 | 
210 | * Selection of identifier type available in pca2go
211 | 
212 | ## Bug fixes
213 | 
214 | * Couple of layout fixes
215 | 
216 | ## Other notes
217 | 
218 | * MIT license
219 | * Added TravisCI integration
220 | * Added codecov integration
221 | * Enhanced documentation
222 | 
223 | # pcaExplorer 0.7.0
224 | 
225 | ## New features
226 | 
227 | * Vignette full draft done
228 | 
229 | # pcaExplorer 0.6.4
230 | 
231 | ## Other notes
232 | 
233 | * Updated NEWS file
234 | 
235 | # pcaExplorer 0.6.3
236 | 
237 | ## New features
238 | 
239 | * About and Instructions done by now
240 | * Added some missing details on the documentations
241 | 
242 | # pcaExplorer 0.6.2
243 | 
244 | ## Other notes
245 | 
246 | * Corrected wordings for (cor)relations of principal components with covariates
247 | * Added a couple of checks if correct objects are provided
248 | 
249 | # pcaExplorer 0.6.1
250 | 
251 | ## New features
252 | 
253 | * Added function to remove selected samples suspected to be deemed as outliers, in order to see the effect of clustering on the good ones
254 | 
255 | # pcaExplorer 0.6.0
256 | 
257 | ## Other notes
258 | 
259 | * Documentation completed
260 | * Examples fully working, cleaned up further a little more.
261 | 
262 | # pcaExplorer 0.5.0
263 | 
264 | ## Other notes
265 | 
266 | * Further steps in direction of R CMD check
267 | 
268 | # pcaExplorer 0.4.0
269 | 
270 | ## New features
271 | 
272 | * Added pca2go live functionality
273 | 
274 | # pcaExplorer 0.3.0
275 | 
276 | ## New features
277 | 
278 | * Added color palette to choose, and dependent on the samples and factors available/selected
279 | 
280 | # pcaExplorer 0.2.0
281 | 
282 | ## New features
283 | 
284 | * Multifactorial exploration completed and adaptable to each dataset
285 | 
286 | # pcaExplorer 0.1.0
287 | 
288 | ## New features
289 | 
290 | * Restyling and (re)packaging mostly completed
291 | 


--------------------------------------------------------------------------------
/R/correlatePCs.R:
--------------------------------------------------------------------------------
 1 | #' Principal components (cor)relation with experimental covariates
 2 | #'
 3 | #' Computes the significance of (cor)relations between PCA scores and the sample
 4 | #' experimental covariates, using Kruskal-Wallis test for categorial variables
 5 | #' and the `cor.test` based on Spearman's correlation for continuous
 6 | #' variables
 7 | #'
 8 | #' @param pcaobj A `prcomp` object
 9 | #' @param coldata A `data.frame` object containing the experimental
10 | #' covariates
11 | #' @param pcs A numeric vector, containing the corresponding PC number
12 | #'
13 | #' @return A `data.frame` object with computed p values for each covariate
14 | #' and for each principal component
15 | #'
16 | #' @examples
17 | #' library(DESeq2)
18 | #' dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1)
19 | #' rlt <- DESeq2::rlogTransformation(dds)
20 | #' pcaobj <- prcomp(t(assay(rlt)))
21 | #' correlatePCs(pcaobj, colData(dds))
22 | #'
23 | #' @export
24 | correlatePCs <- function(pcaobj, coldata, pcs = 1:4) {
25 |   # split the analysis for continuous and categorial
26 |   coldataTypes <- vapply(coldata, class, character(1))
27 |   # extract the scores from the pc object
28 |   x <- pcaobj$x
29 | 
30 |   # do it until 1:4 PCs
31 |   res <- matrix(NA, nrow = length(pcs), ncol = ncol(coldata))
32 | 
33 |   colnames(res) <- colnames(coldata)
34 |   rownames(res) <- paste0("PC_", pcs)
35 | 
36 |   for (i in 1:ncol(res)) {
37 |     # for each covariate...
38 |     for (j in pcs) {
39 |       if (coldataTypes[i] %in% c("factor", "character")) {
40 |         if (length(levels(coldata[, i])) > 1) {
41 |           res[j, i] <- kruskal.test(x[, j], coldata[, i])$p.value
42 |         }
43 |       } else {
44 |         res[j, i] <- cor.test(x[, j], coldata[, i], method = "spearman")$p.value
45 |       }
46 |     }
47 |   }
48 |   res
49 | }
50 | 
51 | 
52 | #' Plot significance of (cor)relations of covariates VS principal components
53 | #'
54 | #' Plots the significance of the (cor)relation of each covariate vs a principal component
55 | #'
56 | #' @param pccorrs A `data.frame` object generated by [correlatePCs]
57 | #' @param pc An integer number, corresponding to the principal component of
58 | #' interest
59 | #' @param logp Logical, defaults to `TRUE`, displays the -`log10` of
60 | #' the pvalue instead of the p value itself
61 | #'
62 | #' @return A base plot object
63 | #'
64 | #' @examples
65 | #' library(DESeq2)
66 | #' dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1)
67 | #' rlt <- rlogTransformation(dds)
68 | #' pcaobj <- prcomp(t(assay(rlt)))
69 | #' res <- correlatePCs(pcaobj, colData(dds))
70 | #' plotPCcorrs(res)
71 | #'
72 | #' @export
73 | plotPCcorrs <- function(pccorrs, pc = 1, logp = TRUE) {
74 |   selectedPC <- paste0("PC_", pc)
75 |   pvals <- pccorrs[selectedPC, ]
76 | 
77 |   if (logp) pvals <- -log10(pvals)
78 | 
79 |   barplot(pvals, las = 2, col = "steelblue",
80 |           main = paste0("Significance of the relations between PC ", pc, " vs covariates"),
81 |           ylab = ifelse(logp, "-log10(pval)", "pval"))
82 | }
83 | 


--------------------------------------------------------------------------------
/R/deprecated.R:
--------------------------------------------------------------------------------
 1 | #' Deprecated functions in pcaExplorer
 2 | #'
 3 | #' Functions that are on their way to the function afterlife.
 4 | #' Their successors are also listed.
 5 | #' 
 6 | #' The successors of these functions are likely coming after the rework that
 7 | #' led to the creation of the `mosdef` package. See more into its 
 8 | #' documentation for more details.
 9 | #' 
10 | #' @param ... Ignored arguments.
11 | #' 
12 | #' @return All functions throw a warning, with a deprecation message pointing 
13 | #' towards its descendent (if available).
14 | #' 
15 | #' @name deprecated
16 | #' 
17 | #' @section Transitioning to the mosdef framework:
18 | #' 
19 | #' - [topGOtable()] is now being replaced by the more flexible 
20 | #' [mosdef::run_topGO()] function
21 | #' 
22 | #' @author Federico Marini
23 | #' 
24 | #' @examples
25 | #' # try(topGOtable())
26 | #' 
27 | NULL
28 | 
29 | 
30 | ## #' @export
31 | ## #' @rdname defunct
32 | ## trendVar <- function(...) {
33 | ##   .Defunct("fitTrendVar")
34 | ## }
35 | 


--------------------------------------------------------------------------------
/R/distro_expr.R:
--------------------------------------------------------------------------------
 1 | #' Plot distribution of expression values
 2 | #'
 3 | #' @param rld A [DESeq2::DESeqTransform()] object.
 4 | #' @param plot_type Character, choose one of `boxplot`, `violin` or
 5 | #' `density`. Defaults to `density`
 6 | #'
 7 | #' @return A plot with the distribution of the expression values
 8 | #' @export
 9 | #'
10 | #' @examples
11 | #' dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1)
12 | #' rlt <- DESeq2::rlogTransformation(dds)
13 | #' distro_expr(rlt)
14 | distro_expr <- function(rld, plot_type="density") {
15 |   allrld <- tidyr::gather(as.data.frame(assay(rld)))
16 |   names(allrld) <- c("Sample", "rlogExpression")
17 | 
18 |   if (plot_type == "boxplot") {
19 |     p <- ggplot(allrld, aes_string(x = "Sample", y = "rlogExpression")) + geom_boxplot(aes_string(col = "Sample", fill = "Sample"), alpha = 0.5)
20 |   }
21 | 
22 |   if (plot_type == "violin") {
23 |     p <- ggplot(allrld, aes_string(x = "Sample", y = "rlogExpression")) + geom_violin(aes_string(col = "Sample", fill = "Sample"), alpha = 0.5)
24 |   }
25 | 
26 |   if (plot_type == "density") {
27 |     p <- ggplot(allrld, aes_string(x = "rlogExpression")) + geom_density(aes_string(color = "Sample"), alpha = 0.1)
28 |   }
29 |   p <- p + theme_bw()
30 |   return(p)
31 | }
32 | 


--------------------------------------------------------------------------------
/R/genespca.R:
--------------------------------------------------------------------------------
  1 | #' Principal components analysis on the genes
  2 | #'
  3 | #' Computes and plots the principal components of the genes, eventually displaying
  4 | #' the samples as in a typical biplot visualization.
  5 | #'
  6 | #' The implementation of this function is based on the beautiful `ggbiplot`
  7 | #' package developed by Vince Vu, available at https://github.com/vqv/ggbiplot.
  8 | #' The adaptation and additional parameters are tailored to display typical genomics data
  9 | #' such as the transformed counts of RNA-seq experiments
 10 | #'
 11 | #' @param x A [DESeq2::DESeqTransform()] object, with data in `assay(x)`,
 12 | #' produced for example by either [DESeq2::rlog()] or
 13 | #' [DESeq2::varianceStabilizingTransformation()]
 14 | #' @param ntop Number of top genes to use for principal components,
 15 | #' selected by highest row variance
 16 | #' @param choices Vector of two numeric values, to select on which principal components to plot
 17 | #' @param arrowColors Vector of character, either as long as the number of the samples, or one single value
 18 | #' @param groupNames Factor containing the groupings for the input data. Is efficiently chosen
 19 | #' as the (interaction of more) factors in the colData for the object provided
 20 | #' @param biplot Logical, whether to additionally draw the samples labels as in a biplot representation
 21 | #' @param scale Covariance biplot (scale = 1), form biplot (scale = 0). When scale = 1,
 22 | #' the inner product between the variables approximates the covariance and the
 23 | #' distance between the points approximates the Mahalanobis distance.
 24 | #' @param pc.biplot Logical, for compatibility with biplot.princomp()
 25 | #' @param obs.scale Scale factor to apply to observations
 26 | #' @param var.scale Scale factor to apply to variables
 27 | #' @param groups Optional factor variable indicating the groups that the observations
 28 | #' belong to. If provided the points will be colored according to groups
 29 | #' @param ellipse Logical, draw a normal data ellipse for each group
 30 | #' @param ellipse.prob Size of the ellipse in Normal probability
 31 | #' @param labels optional Vector of labels for the observations
 32 | #' @param labels.size Size of the text used for the labels
 33 | #' @param alpha Alpha transparency value for the points (0 = transparent, 1 = opaque)
 34 | #' @param var.axes Logical, draw arrows for the variables?
 35 | #' @param circle Logical, draw a correlation circle? (only applies when prcomp
 36 | #' was called with scale = TRUE and when var.scale = 1)
 37 | #' @param circle.prob Size of the correlation circle in Normal probability
 38 | #' @param varname.size Size of the text for variable names
 39 | #' @param varname.adjust  Adjustment factor the placement of the variable names,
 40 | #' '>= 1' means farther from the arrow
 41 | #' @param varname.abbrev  Logical, whether or not to abbreviate the variable names
 42 | #' @param returnData Logical, if TRUE returns a data.frame for further use, containing the
 43 | #' selected principal components for custom plotting
 44 | #' @param coordEqual Logical, default FALSE, for allowing brushing. If TRUE, plot using
 45 | #' equal scale cartesian coordinates
 46 | #' @param scaleArrow Multiplicative factor, usually >=1, only for visualization purposes,
 47 | #' to allow for distinguishing where the variables are plotted
 48 | #' @param useRownamesAsLabels Logical, if TRUE uses the row names as labels for plotting
 49 | #' @param point_size Size of the points to be plotted for the observations (genes)
 50 | #' @param annotation A `data.frame` object, with row.names as gene identifiers (e.g. ENSEMBL ids)
 51 | #' and a column, `gene_name`, containing e.g. HGNC-based gene symbols
 52 | #'
 53 | #' @return An object created by `ggplot`, which can be assigned and further customized.
 54 | #'
 55 | #' @examples
 56 | #'
 57 | #' library(DESeq2)
 58 | #' dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1)
 59 | #' rlt <- rlogTransformation(dds)
 60 | #' groups <- colData(dds)$condition
 61 | #' groups <- factor(groups, levels = unique(groups))
 62 | #' cols <- scales::hue_pal()(2)[groups]
 63 | #' genespca(rlt, ntop=100, arrowColors = cols, groupNames = groups)
 64 | #'
 65 | #' groups_multi <- interaction(as.data.frame(colData(rlt)[, c("condition", "tissue")]))
 66 | #' groups_multi <- factor(groups_multi, levels = unique(groups_multi))
 67 | #' cols_multi <- scales::hue_pal()(length(levels(groups_multi)))[factor(groups_multi)]
 68 | #' genespca(rlt, ntop = 100, arrowColors = cols_multi, groupNames = groups_multi)
 69 | #'
 70 | #' @export
 71 | genespca <- function(x,
 72 |                      ntop,
 73 |                      choices = c(1, 2),
 74 |                      arrowColors = "steelblue",
 75 |                      groupNames="group",
 76 |                      biplot = TRUE,
 77 |                      scale = 1, pc.biplot = TRUE,
 78 |                      obs.scale = 1 - scale, var.scale = scale, groups = NULL,
 79 |                      ellipse = FALSE, ellipse.prob = 0.68, labels = NULL, labels.size = 3,
 80 |                      alpha = 1, var.axes = TRUE, circle = FALSE, circle.prob = 0.69,
 81 |                      varname.size = 4, varname.adjust = 1.5, varname.abbrev = FALSE,
 82 |                      returnData = FALSE, coordEqual = FALSE, scaleArrow = 1,
 83 |                      useRownamesAsLabels = TRUE, point_size = 2, annotation = NULL) {
 84 | 
 85 |   stopifnot(length(choices) == 2)
 86 |   if (length(arrowColors) != 1 & length(arrowColors) != ncol(x))
 87 |     stop("Please provide either one color or a vector as long as the number of samples")
 88 | 
 89 |   rv <- rowVars(assay(x))
 90 |   select <- order(rv, decreasing = TRUE)[seq_len(min(ntop, length(rv)))]
 91 |   pca <- prcomp((assay(x)[select, ]))
 92 | 
 93 |   percentVar <- pca$sdev^2 / sum(pca$sdev^2)
 94 | 
 95 |   if (!biplot) {
 96 |     nobs.factor <- sqrt(nrow(pca$x) - 1)
 97 |     devs <- pca$sdev
 98 |     pcast <- pca
 99 |     pcast$x <- sweep(pca$x, 2, 1 / (devs * nobs.factor), FUN = "*") * nobs.factor
100 |     d <- data.frame(PC1 = pcast$x[, choices[1]],
101 |                     PC2 = pcast$x[, choices[2]],
102 |                     names = rownames((assay(x)[select, ])))
103 | 
104 |     if (returnData) {
105 |       attr(d, "percentVar") <- percentVar
106 |       return(d)
107 |     }
108 | 
109 |     ggplot(data = d, aes_string(x = "PC1", y = "PC2")) +
110 |       geom_point(size = 3) +
111 |       xlab(paste0("PC", choices[1], ": ", round(percentVar[choices[1]] * 100), "% variance")) +
112 |       ylab(paste0("PC", choices[2], ": ", round(percentVar[choices[2]] * 100), "% variance")) +
113 |       # geom_text(aes(label=names),hjust=0.25, vjust=-0.5, show.legend = F) +
114 |       ggtitle("title") + theme_bw()
115 |   } else {
116 |     if (inherits(pca, "prcomp")) {
117 |       nobs.factor <- sqrt(nrow(pca$x) - 1)
118 |       d <- pca$sdev
119 |       u <- sweep(pca$x, 2, 1 / (d * nobs.factor), FUN = "*")
120 |       v <- pca$rotation
121 |     }
122 | 
123 |     choices <- pmin(choices, ncol(u))
124 |     df.u <- as.data.frame(sweep(u[, choices], 2, d[choices]^obs.scale,
125 |                                 FUN = "*"))
126 |     v <- sweep(v, 2, d^var.scale, FUN = "*")
127 |     df.v <- as.data.frame(v[, choices])
128 |     names(df.u) <- c("xvar", "yvar")
129 |     names(df.v) <- names(df.u)
130 |     if (pc.biplot) {
131 |       df.u <- df.u * nobs.factor
132 |     }
133 | 
134 |     r <- sqrt(qchisq(circle.prob, df = 2)) * prod(colMeans(df.u^2)) ^ (1/4)
135 |     v.scale <- rowSums(v^2)
136 |     df.v <- r * df.v / sqrt(max(v.scale))
137 |     if (obs.scale == 0) {
138 |       u.axis.labs <- paste("standardized PC", choices, sep = "")
139 |     } else {
140 |       u.axis.labs <- paste("PC", choices, sep = "")
141 |     }
142 |     u.axis.labs <- paste(u.axis.labs, sprintf("(%0.1f%% explained var.)",
143 |                                               100 * pca$sdev[choices]^2 / sum(pca$sdev^2)))
144 |     if (!is.null(labels)) {
145 |       df.u$labels <- labels
146 |     }
147 |     if (!is.null(groups)) {
148 |       df.u$groups <- groups
149 |     }
150 | 
151 |     # additionally...
152 |     df.u$ids <- rownames(df.u)
153 |     if(!is.null(annotation)) {
154 |       df.u$geneNames <- annotation$gene_name[match(df.u$ids, rownames(annotation))]
155 |     } else {
156 |       df.u$geneNames <- df.u$ids
157 |     }
158 |     if (varname.abbrev) {
159 |       df.v$varname <- abbreviate(rownames(v))
160 |     } else {
161 |       df.v$varname <- rownames(v)
162 |     }
163 |     df.v$angle <- with(df.v, (180/pi) * atan(yvar/xvar))
164 |     df.v$hjust <- with(df.v, (1 - varname.adjust * sign(xvar)) / 2)
165 | 
166 |     if (returnData) {
167 |       return(df.u)
168 |     }
169 | 
170 | 
171 | 
172 |     g <- ggplot(data = df.u, aes_string(x = "xvar", y = "yvar")) + xlab(u.axis.labs[1]) +
173 |       ylab(u.axis.labs[2]) # + coord_equal() # REMOVED OTHERWISE BRUSH DOES NOT WORK PROPERLY
174 |     if (coordEqual) g <- g + coord_equal()
175 | 
176 |     if (!is.null(df.u$labels)) {
177 |       if (!is.null(df.u$groups)) {
178 |         g <- g + geom_text(aes(label = labels, color = groups),
179 |                            size = labels.size)
180 |       } else {
181 |         g <- g + geom_text(aes(label = labels), size = labels.size)
182 |       }
183 |     } else {
184 |       if (!is.null(df.u$groups)) {
185 |         g <- g + geom_point(aes(color = groups), size = point_size, alpha = alpha)
186 |       } else {
187 |         g <- g + geom_point(size = point_size, alpha = alpha)
188 |       }
189 |     }
190 | 
191 |     if (useRownamesAsLabels) {
192 |       g <- g + geom_text(aes_string(label = "geneNames"), size = labels.size, hjust = 0.25, vjust = -0.75)
193 |     }
194 | 
195 |     if (!is.null(df.u$groups) && ellipse) {
196 |       theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50))
197 |       circle <- cbind(cos(theta), sin(theta))
198 |       ell <- ddply(df.u, "groups", function(x) {
199 |         if (nrow(x) <= 2) {
200 |           return(NULL)
201 |         }
202 |         sigma <- var(cbind(x$xvar, x$yvar))
203 |         mu <- c(mean(x$xvar), mean(x$yvar))
204 |         ed <- sqrt(qchisq(ellipse.prob, df = 2))
205 |         data.frame(sweep(circle %*% chol(sigma) * ed, 2,
206 |                          mu, FUN = "+"), groups = x$groups[1])
207 |       })
208 |       names(ell)[1:2] <- c("xvar", "yvar")
209 |       g <- g + geom_path(data = ell, aes(color = groups, group = groups))
210 |     }
211 |     # moved down to have the arrows drawn on top of the points and not vice versa
212 |     if (var.axes) {
213 |       if (circle) {
214 |         theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi,
215 |                                                   length = 50))
216 |         circle <- data.frame(xvar = r * cos(theta), yvar = r *
217 |                                sin(theta))
218 |         g <- g + geom_path(data = circle, color = "steelblue",
219 |                            size = 1/2, alpha = 1/3)
220 |       }
221 |       df.v$scaleArrow <- scaleArrow # quick fix for mapping scaling of the arrows
222 |       arrowColors <-  factor(arrowColors, levels = unique(arrowColors))
223 |       df.v$arrowColors <- factor(arrowColors, levels = unique(arrowColors))
224 |       df.v$groupNames <- factor(groupNames, levels = unique(groupNames))
225 |       df.v$sca_x <- df.v$xvar * scaleArrow
226 |       df.v$sca_y <- df.v$yvar * scaleArrow
227 |       df.v$sta_x <- 0
228 |       df.v$sta_y <- 0
229 |       g <- g + geom_segment(data = df.v, aes_string(x = "sta_x", y = "sta_y", xend = "sca_x", yend = "sca_y", color = "arrowColors"),
230 |                             arrow = arrow(length = unit(1/2, "picas"))) +
231 |         scale_color_manual(values = levels(arrowColors), name = "Group", labels = levels(groupNames))
232 |     }
233 | 
234 |     if (var.axes) {
235 |       g <- g + geom_text(data = df.v, aes_string(label = "varname",
236 |                                                  x = "sca_x", y = "sca_y", # angle = angle,
237 |                                                  hjust = "hjust"),
238 |                          color = arrowColors, size = varname.size)
239 |     }
240 |     g <- g + theme_bw()
241 |     return(g)
242 |   }
243 | }
244 | 


--------------------------------------------------------------------------------
/R/get_annotation.R:
--------------------------------------------------------------------------------
  1 | #' Get an annotation data frame from biomaRt
  2 | #'
  3 | #' @param dds A [DESeq2::DESeqDataSet()] object
  4 | #' @param biomart_dataset A biomaRt dataset to use. To see the list, type
  5 | #' `mart = useMart('ensembl')`, followed by `listDatasets(mart)`.
  6 | #' @param idtype Character, the ID type of the genes as in the row names of
  7 | #' `dds`, to be used for the call to [biomaRt::getBM()]
  8 | #'
  9 | #' @return A data frame for ready use in `pcaExplorer`, retrieved from biomaRt.
 10 | #' @export
 11 | #'
 12 | #' @examples
 13 | #' library("airway")
 14 | #' data("airway", package = "airway")
 15 | #' airway
 16 | #' dds_airway <- DESeq2::DESeqDataSetFromMatrix(assay(airway),
 17 | #'                                              colData = colData(airway),
 18 | #'                                              design = ~dex+cell)
 19 | #' \dontrun{
 20 | #' get_annotation(dds_airway, "hsapiens_gene_ensembl", "ensembl_gene_id")
 21 | #' }
 22 | get_annotation <- function(dds, biomart_dataset, idtype) {
 23 |   if (is.null(biomart_dataset))
 24 |     stop("Select a species to generate the corresponding annotation.
 25 | To obtain a list, type mart = useMart('ensembl'), followed by listDatasets(mart).")
 26 | 
 27 |   mart <- useMart(biomart = "ENSEMBL_MART_ENSEMBL",
 28 |                   host = "www.ensembl.org",
 29 |                   dataset = biomart_dataset)
 30 |   anns <- getBM(attributes = c(idtype, "external_gene_name", "description"),
 31 |                 filters = idtype,
 32 |                 values = rownames(dds),
 33 |                 mart = mart)
 34 | 
 35 |   # keep and match with the ones that are actually there
 36 |   anns2 <- anns[match(rownames(dds), anns[, 1]), ]
 37 |   rownames(anns2) <- rownames(dds)
 38 |   # rename the columns rsp. add row names to be consistent with other function
 39 |   colnames(anns2) <- c("gene_id", "gene_name", "description")
 40 | 
 41 |   return(anns2)
 42 | }
 43 | 
 44 | 
 45 | #' Get an annotation data frame from org db packages
 46 | #'
 47 | #' @param dds A [DESeq2::DESeqDataSet()] object
 48 | #' @param orgdb_species Character string, named as the `org.XX.eg.db`
 49 | #' package which should be available in Bioconductor
 50 | #' @param idtype Character, the ID type of the genes as in the row names of
 51 | #' `dds`, to be used for the call to [AnnotationDbi::mapIds()]
 52 | #' @param key_for_genenames Character, corresponding to the column name for the 
 53 | #' key in the orgDb package containing the official gene name (often called 
 54 | #' gene symbol). 
 55 | #' This parameter defaults to "SYMBOL", but can be adjusted in case the key is not
 56 | #' found in the annotation package (e.g. for `org.Sc.sgd.db`).
 57 | #'
 58 | #' @return A data frame for ready use in `pcaExplorer`, retrieved from the
 59 | #' org db packages
 60 | #' @export
 61 | #'
 62 | #' @examples
 63 | #' library("airway")
 64 | #' data("airway", package = "airway")
 65 | #' airway
 66 | #' dds_airway <- DESeq2::DESeqDataSetFromMatrix(assay(airway),
 67 | #'                                              colData = colData(airway),
 68 | #'                                              design = ~dex+cell)
 69 | #' anno_df <- get_annotation_orgdb(dds_airway, "org.Hs.eg.db", "ENSEMBL")
 70 | #' head(anno_df)
 71 | get_annotation_orgdb <- function(dds, orgdb_species, idtype, key_for_genenames = "SYMBOL") {
 72 |   if (is.null(orgdb_species))
 73 |     stop("Select a species to generate the corresponding annotation")
 74 | 
 75 |   orgdbpkgs <- data.frame(
 76 |     pkg = c("org.Ag.eg.db", "org.At.tair.db", "org.Bt.eg.db", "org.Ce.eg.db", "org.Cf.eg.db", "org.Dm.eg.db", "org.Dr.eg.db", "org.EcK12.eg.db",
 77 |             "org.EcSakai.eg.db", "org.Gg.eg.db", "org.Hs.eg.db", "org.Hs.ipi.db", "org.Mm.eg.db", "org.Mmu.eg.db", "org.Pf.plasmo.db",
 78 |             "org.Pt.eg.db", "org.Rn.eg.db", "org.Sc.sgd.db", "org.Sco.eg.db", "org.Ss.eg.db", "org.Tgondii.eg.db", "org.Xl.eg.db"),
 79 |     descr = c("Anopheles", "Arabidopsis", "Bovine", "Worm", "Canine", "Fly", "Zebrafish", "E coli strain K12", "E coli strain Sakai", "Chicken",
 80 |               "Human", "org.Hs.ipi.db", "Mouse", "Rhesus", "Malaria", "Chimp", "Rat", "Yeast", "Streptomyces coelicolor", "Pig", "Toxoplasma gondii",
 81 |               "Xenopus"),
 82 |     stringsAsFactors = FALSE
 83 |   )
 84 | 
 85 |   if (!(orgdb_species %in% orgdbpkgs$pkg)) {
 86 |     message("The orgDB package is most likely not existent in Bioconductor")
 87 |     message("It should be one of", orgdbpkgs$pkg)
 88 |   }
 89 | 
 90 |   if (!require(orgdb_species, character.only = TRUE))
 91 |     stop("The package ", orgdb_species, " is not installed/available. Try installing it with BiocManager::install('", orgdb_species, "')")
 92 | 
 93 |   if (!(idtype %in% keytypes(eval(parse(text = orgdb_species))))) {
 94 |     stop("The key you provided is not listed as key for the annotation package. Please try one of ",
 95 |          paste(keytypes(eval(parse(text = orgdb_species))), collapse = ","))
 96 |   }
 97 | 
 98 |   if (!(key_for_genenames %in% keytypes(eval(parse(text = orgdb_species))))) {
 99 |     stop("The key specified for containing gene names is not included in the annotation package. Please try one of ",
100 |          paste(keytypes(eval(parse(text = orgdb_species))), collapse = ","))
101 |   }
102 |   
103 |   
104 |   pkg <- eval(parse(text = orgdb_species))
105 | 
106 |   if (idtype == "SYMBOL")
107 |     warning("You probably do not need to convert symbol to symbol") # the performance would somehow be affected
108 | 
109 |   anns_vec <- mapIds(pkg, keys = rownames(dds), column = key_for_genenames,
110 |                      keytype = idtype)
111 | 
112 |   anns <- data.frame(
113 |     gene_id = rownames(dds),
114 |     gene_name = anns_vec,
115 |     stringsAsFactors = FALSE,
116 |     row.names = rownames(dds)
117 |   )
118 |   return(anns)
119 | }
120 | 


--------------------------------------------------------------------------------
/R/hi_loadings.R:
--------------------------------------------------------------------------------
 1 | #' Extract genes with highest loadings
 2 | #'
 3 | #' @param pcaobj A `prcomp` object
 4 | #' @param whichpc An integer number, corresponding to the principal component of
 5 | #' interest
 6 | #' @param topN Integer, number of genes with top and bottom loadings
 7 | #' @param exprTable A `matrix` object, e.g. the counts of a [DESeq2::DESeqDataSet()].
 8 | #' If not NULL, returns the counts matrix for the selected genes
 9 | #' @param annotation A `data.frame` object, with row.names as gene identifiers (e.g. ENSEMBL ids)
10 | #' and a column, `gene_name`, containing e.g. HGNC-based gene symbols
11 | #' @param title The title of the plot
12 | #'
13 | #' @return A ggplot2 object, or a `matrix`, if `exprTable` is not null
14 | #'
15 | #' @examples
16 | #' dds <- makeExampleDESeqDataSet_multifac(betaSD = 3, betaSD_tissue = 1)
17 | #' rlt <- DESeq2::rlogTransformation(dds)
18 | #' pcaobj <- prcomp(t(SummarizedExperiment::assay(rlt)))
19 | #' hi_loadings(pcaobj, topN = 20)
20 | #' hi_loadings(pcaobj, topN = 10, exprTable = dds)
21 | #' hi_loadings(pcaobj, topN = 10, exprTable = counts(dds))
22 | #'
23 | #' @export
24 | hi_loadings <- function(pcaobj, whichpc = 1, topN = 10, exprTable = NULL,
25 |                         annotation = NULL, title="Top/bottom loadings") {
26 |   if (whichpc < 0)
27 |     stop("Use a positive integer value for the principal component to select")
28 |   if (whichpc > nrow(pcaobj$x))
29 |     stop("You can not explore a principal component that is not in the data")
30 | 
31 |   geneloadings_sorted <- sort(pcaobj$rotation[, whichpc])
32 |   geneloadings_extreme <- c(tail(geneloadings_sorted, topN), head(geneloadings_sorted, topN))
33 | 
34 |   if (!is.null(exprTable)) {
35 |     tab <- exprTable[names(geneloadings_extreme), ]
36 |     if (!is.null(annotation))
37 |       rownames(tab) <- annotation$gene_name[match(rownames(tab), rownames(annotation))]
38 |     return(tab)
39 |   }
40 | 
41 |   if (!is.null(annotation))
42 |     names(geneloadings_extreme) <- annotation$gene_name[match(names(geneloadings_extreme), rownames(annotation))]
43 | 
44 |   barplot(geneloadings_extreme, las = 2, col = c(rep("steelblue", topN), rep("coral", topN)),
45 |           main = paste0(title, "PC", whichpc))
46 |   # mydf <- data.frame(loadings=geneloadings_extreme,
47 |   #                    geneID=names(geneloadings_extreme),
48 |   #                    mycol = c(rep("steelblue",topN),rep("coral",topN)))
49 |   # mydf$geneID <- factor(mydf$geneID, levels = mydf$geneID)
50 |   # p <- ggplot(mydf,aes_string(x="geneID",y="loadings")) + geom_col(aes_string(fill = "mycol")) + theme_bw() +
51 |   #   theme(axis.text.x=element_text(angle = 90, vjust = 0.5)) + guides(fill = FALSE) +
52 |   #   ggtitle(paste0(title, " - PC", whichpc))
53 |   # p
54 | }
55 | 


--------------------------------------------------------------------------------
/R/makeds.R:
--------------------------------------------------------------------------------
 1 | #' Make a simulated DESeqDataSet for two or more experimental factors
 2 | #'
 3 | #' Constructs a simulated dataset of Negative Binomial data from different conditions.
 4 | #' The fold changes between the conditions can be adjusted with the `betaSD_condition`
 5 | #' and the `betaSD_tissue` arguments.
 6 | #'
 7 | #' This function is designed and inspired following the proposal of
 8 | #' [DESeq2::makeExampleDESeqDataSet()] from the `DESeq2` package. Credits are given
 9 | #' to Mike Love for the nice initial implementation
10 | #'
11 | #' @param n number of rows (genes)
12 | #' @param m number of columns (samples)
13 | #' @param betaSD_condition the standard deviation for condition betas, i.e. beta ~ N(0,betaSD)
14 | #' @param betaSD_tissue the standard deviation for tissue betas, i.e. beta ~ N(0,betaSD)
15 | #' @param interceptMean the mean of the intercept betas (log2 scale)
16 | #' @param interceptSD the standard deviation of the intercept betas (log2 scale)
17 | #' @param dispMeanRel a function specifying the relationship of the dispersions on
18 | #' `2^trueIntercept`
19 | #' @param sizeFactors multiplicative factors for each sample
20 | #'
21 | #' @return a [DESeq2::DESeqDataSet()] with true dispersion,
22 | #' intercept for two factors (condition and tissue) and beta values in the
23 | #'  metadata columns.  Note that the true betas are provided on the log2 scale.
24 | #'
25 | #' @examples
26 | #' dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1)
27 | #' dds
28 | #' dds2 <- makeExampleDESeqDataSet_multifac(betaSD_condition = 1, betaSD_tissue = 4)
29 | #' dds2
30 | #'
31 | #' @export
32 | makeExampleDESeqDataSet_multifac <- function(n = 1000,
33 |                                              m = 12,
34 |                                              betaSD_condition = 1,
35 |                                              betaSD_tissue = 3,
36 |                                              interceptMean = 4,
37 |                                              interceptSD = 2,
38 |                                              dispMeanRel = function(x) 4/x + 0.1,
39 |                                              sizeFactors = rep(1, m)) {
40 |   beta <- cbind(rnorm(n, interceptMean, interceptSD),
41 |                 rnorm(n, 0, betaSD_condition),
42 |                 rnorm(n, 0, betaSD_tissue)) # added a tissue covariate
43 | 
44 |   dispersion <- dispMeanRel(2^(beta[, 1]))
45 |   colData <- S4Vectors::DataFrame(
46 |     condition = factor(rep(c("A", "B"),
47 |                            times = c(ceiling(m/2), floor(m/2)))),
48 |     tissue = factor(rep(
49 |       rep(c("t1", "t2"), times = c(ceiling(m/4), floor(m/4))), 2))
50 |   )
51 |   x <- if (m > 1) {
52 |     model.matrix(~colData$condition + colData$tissue)
53 |   }  else {
54 |     cbind(rep(1, m), rep(0, m))
55 |   }
56 |   mu <- t(2^(x %*% t(beta)) * sizeFactors)
57 |   countData <- matrix(rnbinom(m * n, mu = mu, size = 1/dispersion),
58 |                       ncol = m)
59 |   mode(countData) <- "integer"
60 |   colnames(countData) <- paste("sample", 1:m, sep = "")
61 |   rowRanges <- GRanges("1", IRanges(start = (1:n - 1) * 100 +
62 |                                       1, width = 100))
63 |   names(rowRanges) <- paste0("gene", 1:n)
64 |   design <- if (m > 1) {
65 |     as.formula("~ condition", env = .GlobalEnv)
66 |   } else {
67 |     as.formula("~ 1", env = .GlobalEnv)
68 |   }
69 |   object <- DESeqDataSetFromMatrix(countData = countData, colData = colData,
70 |                                    design = design, rowRanges = rowRanges)
71 |   trueVals <- DataFrame(trueIntercept = beta[, 1],
72 |                         trueBeta_condition = beta[, 2],
73 |                         trueBeta_tissue = beta[, 3],
74 |                         trueDisp = dispersion)
75 |   mcols(trueVals) <- DataFrame(type = rep("input", ncol(trueVals)),
76 |                                description = c("simulated intercept values",
77 |                                                "simulated beta values for the condition",
78 |                                                "simulated beta values for the tissue",
79 |                                                "simulated dispersion values"))
80 |   mcols(object) <- cbind(mcols(object), trueVals)
81 |   return(object)
82 | }
83 | 


--------------------------------------------------------------------------------
/R/pair_corr.R:
--------------------------------------------------------------------------------
 1 | #' Pairwise scatter and correlation plot of counts
 2 | #'
 3 | #' @param df A data frame, containing the (raw/normalized/transformed) counts
 4 | #' @param log Logical, whether to convert the input values to log2 (with addition
 5 | #' of a pseudocount). Defaults to FALSE.
 6 | #' @param method Character string, one of `pearson` (default), `kendall`, or
 7 | #' `spearman` as in `cor`
 8 | #' @param use_subset Logical value. If TRUE, only 1000 values per sample will be used
 9 | #' to speed up the plotting operations.
10 | #'
11 | #' @return A plot with pairwise scatter plots and correlation coefficients
12 | #' @export
13 | #'
14 | #' @examples
15 | #' library("airway")
16 | #' data("airway", package = "airway")
17 | #' airway
18 | #' dds_airway <- DESeq2::DESeqDataSetFromMatrix(assay(airway),
19 | #'                                              colData = colData(airway),
20 | #'                                              design = ~dex+cell)
21 | #' pair_corr(counts(dds_airway)[1:100, ]) # use just a subset for the example
22 | pair_corr <- function(df, log = FALSE, method = "pearson", use_subset = TRUE) {
23 |   if (log) {
24 |     df <- log2(1 + df)
25 |   }
26 |   
27 |   if (use_subset) {
28 |     set.seed(42)
29 |     df <- df[sample(1:nrow(df), min(nrow(df), 1000)), ]
30 |   }
31 |   
32 |   # get min and max count values for axis range.
33 |   rangeMin <- min(df)
34 |   rangeMax <- max(df)
35 | 
36 |   colorFunction <- colorRamp(c("black", "red"))
37 |   # colorFunction() expects values from 0 to 1.
38 |   zMatrix <- colorFunction(seq(0, 1, by = .01))
39 |   # zColors goes from 1 to 100.
40 |   zColors <- sort(rgb(zMatrix[, 1], zMatrix[, 2], zMatrix[, 3], maxColorValue = 255))
41 |   labelSize <- 1
42 |   title <- "Pairwise Correlations"
43 |   # Modified from R pairs() documentation
44 |   panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...) {
45 |     usr <- par("usr"); on.exit(par(usr = usr))
46 |     par(usr = c(0, 1, 0, 1))
47 |     r <- abs(cor(x, y, method = method))
48 |     txt <- format(c(r, 0.123456789), digits = digits)[1]
49 |     txt <- paste(prefix, txt, sep = "")
50 | 
51 |     # color text based on r value and change size of text also based on r value (larger text for larger r value).
52 |     cex.cor <- labelSize / strwidth(txt)
53 |     # color text based on r value (red is r=1).
54 |     text(0.5, 0.5, txt, cex = cex.cor * r * 0.7, col = zColors[r * 100])
55 |   }
56 |   # par(mar = c(0,0,0,0))
57 | 
58 |   pairs(df, pch = 20, col = alpha("black", 0.4),
59 |         cex.labels = labelSize,
60 |         main = title,
61 |         upper.panel = panel.cor,
62 |         xlim = c(rangeMin, rangeMax),
63 |         ylim = c(rangeMin, rangeMax))
64 | }
65 | 


--------------------------------------------------------------------------------
/R/pcaExplorer-pkg.R:
--------------------------------------------------------------------------------
 1 | #' pcaExplorer: analyzing time-lapse microscopy imaging, from detection to tracking
 2 | #'
 3 | #' pcaExplorer provides functionality for interactive visualization of RNA-seq datasets
 4 | #' based on Principal Components Analysis. The methods provided allow for quick information
 5 | #' extraction and effective data exploration. A Shiny application encapsulates the whole analysis.
 6 | #'
 7 | #' pcaExplorer provides functionality for interactive visualization of RNA-seq datasets
 8 | #' based on Principal Components Analysis. The methods provided allow for quick information
 9 | #' extraction and effective data exploration. A Shiny application encapsulates the whole analysis.
10 | #'
11 | #' @import DESeq2
12 | #' @import SummarizedExperiment
13 | #' @importFrom GenomicRanges GRanges
14 | #' @importFrom IRanges IRanges
15 | #' @importFrom S4Vectors DataFrame
16 | #' @importFrom genefilter rowVars
17 | #' @importFrom heatmaply heatmaply
18 | #' @importFrom plotly renderPlotly plotlyOutput
19 | #' @importFrom scales brewer_pal hue_pal
20 | #' @importFrom NMF aheatmap
21 | #' @import plyr
22 | #' @importFrom limma goana topGO
23 | #' @importFrom AnnotationDbi select Term mapIds
24 | #' @importMethodsFrom GOstats hyperGTest summary
25 | #' @import GO.db
26 | #' @import shiny
27 | #' @import shinydashboard
28 | #' @importFrom shinyBS bsTooltip bsCollapse bsCollapsePanel
29 | #' @import ggplot2
30 | #' @importFrom ggrepel geom_label_repel
31 | #' @importFrom DT datatable
32 | #' @importFrom shinyAce aceAutocomplete aceEditor getAceModes getAceThemes
33 | #' updateAceEditor
34 | #' @import threejs
35 | #' @import biomaRt
36 | #' @importFrom pheatmap pheatmap
37 | #' @importFrom base64enc dataURI
38 | #' @importFrom tidyr gather
39 | #' @import knitr
40 | #' @import rmarkdown
41 | #' @importFrom grDevices dev.off pdf rainbow colorRamp rgb
42 | #' @import methods
43 | #'
44 | #' @author
45 | #' Federico Marini \email{marinif@@uni-mainz.de}, 2016
46 | #'
47 | #' Maintainer: Federico Marini \email{marinif@@uni-mainz.de}
48 | #' @name pcaExplorer-pkg
49 | #' @docType package
50 | "_PACKAGE"
51 | 
52 | .onAttach <- function(libname, pkgname) {
53 |   pkgVersion <- packageDescription("pcaExplorer", fields = "Version")
54 |   msg <- paste0("Welcome to pcaExplorer v", pkgVersion, "\n\n")
55 |   citation <- paste0("If you use pcaExplorer in your work, please cite:\n\n",
56 |                      "pcaExplorer: an R/Bioconductor package for interacting with RNA-seq principal components\n",
57 |                      "Federico Marini, Harald Binder\n",
58 |                      "BMC Bioinformatics, 2019 - https://doi.org/10.1186/s12859-019-2879-1\n")
59 |   packageStartupMessage(paste0(msg, citation))
60 | }
61 | 


--------------------------------------------------------------------------------
/R/pcaplot.R:
--------------------------------------------------------------------------------
  1 | #' Sample PCA plot for transformed data
  2 | #'
  3 | #' Plots the results of PCA on a 2-dimensional space
  4 | #'
  5 | #' @param x A [DESeq2::DESeqTransform()] object, with data in `assay(x)`,
  6 | #' produced for example by either [DESeq2::rlog()] or
  7 | #' [DESeq2::varianceStabilizingTransformation()]/[DESeq2::vst()]
  8 | #' @param intgroup Interesting groups: a character vector of
  9 | #' names in `colData(x)` to use for grouping. Defaults to NULL, which would then 
 10 | #' select the first column of the `colData` slot
 11 | #' @param ntop Number of top genes to use for principal components,
 12 | #' selected by highest row variance
 13 | #' @param returnData logical, if TRUE returns a data.frame for further use, containing the
 14 | #' selected principal components and intgroup covariates for custom plotting
 15 | #' @param title The plot title
 16 | #' @param pcX The principal component to display on the x axis
 17 | #' @param pcY The principal component to display on the y axis
 18 | #' @param text_labels Logical, whether to display the labels with the sample identifiers
 19 | #' @param point_size Integer, the size of the points for the samples
 20 | #' @param ellipse Logical, whether to display the confidence ellipse for the selected groups
 21 | #' @param ellipse.prob Numeric, a value in the interval [0;1)
 22 | #'
 23 | #' @return An object created by `ggplot`, which can be assigned and further customized.
 24 | #'
 25 | #' @examples
 26 | #' dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1)
 27 | #' rlt <- DESeq2::rlogTransformation(dds)
 28 | #' pcaplot(rlt, ntop = 200)
 29 | #'
 30 | #' @export
 31 | pcaplot <- function(x, 
 32 |                     intgroup = NULL, 
 33 |                     ntop = 500, 
 34 |                     returnData = FALSE,
 35 |                     title = NULL,
 36 |                     pcX = 1, 
 37 |                     pcY = 2, 
 38 |                     text_labels = TRUE, 
 39 |                     point_size = 3,
 40 |                     ellipse = TRUE, 
 41 |                     ellipse.prob = 0.95) # customized principal components
 42 | {
 43 |   rv <- rowVars(assay(x))
 44 |   select <- order(rv, decreasing = TRUE)[seq_len(min(ntop, length(rv)))]
 45 |   pca <- prcomp(t(assay(x)[select, ]))
 46 | 
 47 |   percentVar <- pca$sdev^2/sum(pca$sdev^2)
 48 |   
 49 |   if (is.null(intgroup)) {
 50 |     # gently fall back to the first colData element if it is there
 51 |     if (length(names(colData(x))) > 0) {
 52 |       intgroup <- names(colData(x))[1]
 53 |       message("Defaulting to '", intgroup, "' as the `intgroup` parameter...")
 54 |     } else {
 55 |       stop("No colData has been provided, therefore `intgroup` cannot be selected properly")
 56 |     }
 57 |   }
 58 | 
 59 |   if (!all(intgroup %in% names(colData(x)))) {
 60 |     stop("the argument 'intgroup' should specify columns of colData(x)")
 61 |   }
 62 |   intgroup.df <- as.data.frame(colData(x)[, intgroup, drop = FALSE])
 63 |   group <- factor(apply(intgroup.df, 1, paste, collapse = " : "))
 64 |   d <- data.frame(PC1 = pca$x[, pcX], PC2 = pca$x[, pcY], group = group,
 65 |                   intgroup.df, names = colnames(x))
 66 |   colnames(d)[1] <- paste0("PC", pcX)
 67 |   colnames(d)[2] <- paste0("PC", pcY)
 68 | 
 69 |   if (returnData) {
 70 |     attr(d, "percentVar") <- percentVar[c(pcX, pcY)]
 71 |     return(d)
 72 |   }
 73 | 
 74 |   # clever way of positioning the labels - worked good, then no need with ggrepel
 75 |   d$hjust <- ifelse((sign(d[, paste0("PC", pcX)]) == 1), 0.9, 0.1)# (1 + varname.adjust * sign(PC1))/2)
 76 | 
 77 |   g <- ggplot(data = d, aes_string(x = paste0("PC", pcX), y = paste0("PC", pcY), color = "group")) +
 78 |     geom_point(size = point_size) +
 79 |     xlab(paste0("PC", pcX, ": ", round(percentVar[pcX] * 100, digits = 2), "% variance")) +
 80 |     ylab(paste0("PC", pcY ,": ", round(percentVar[pcY] * 100, digits = 2), "% variance"))
 81 | 
 82 |   ## plot confidence ellipse
 83 |   # credit to vince vu, author of ggbiplot
 84 |   if (ellipse) {
 85 |     theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50))
 86 |     circle <- cbind(cos(theta), sin(theta))
 87 | 
 88 |     ell <- ddply(d, "group", function(x) {
 89 |       if (nrow(x) <= 2) {
 90 |         return(NULL)
 91 |       }
 92 |       sigma <- var(cbind(x[[paste0("PC", pcX)]], x[[paste0("PC", pcY)]]))
 93 |       mu <- c(mean(x[[paste0("PC", pcX)]]), mean(x[[paste0("PC", pcY)]]))
 94 |       ed <- sqrt(qchisq(ellipse.prob, df = 2))
 95 |       data.frame(sweep(circle %*% chol(sigma) * ed, 2, mu, FUN = '+'),
 96 |                  groups = x$group[1])
 97 |     })
 98 |     # names(ell)[1:2] <- c('xvar', 'yvar')
 99 |     if (nrow(ell) > 0) {
100 |       g <- g + geom_path(data = ell, aes_string(x = "X1", y = "X2", color = "groups", group = "groups"))
101 |     }
102 |   }
103 | 
104 |   if (text_labels)
105 |     g <- g + geom_label_repel(mapping = aes_string(label = "names", fill = "group"),
106 |                               color = "white", show.legend = TRUE)
107 | 
108 |   plot_title <- paste0("PCA plot - top ", length(select), " variable genes")
109 |   if (!is.null(title)) {
110 |     g <- g + ggtitle(title)
111 |   } else {
112 |     g <- g + ggtitle(plot_title)
113 |   }
114 |   g <- g + theme_bw()
115 |   # as in http://www.huber.embl.de/msmb/Chap-Graphics.html
116 |   # "well-made PCA plots usually have a width that’s larger than the height"
117 |   g <- g + coord_fixed()
118 |   g
119 | }
120 | 
121 | 
122 | #' Scree plot of the PCA on the samples
123 | #'
124 | #' Produces a scree plot for investigating the proportion of explained variance, or
125 | #' alternatively the cumulative value
126 | #'
127 | #' @param obj A `prcomp` object
128 | #' @param type Display absolute proportions or cumulative proportion. Possible values:
129 | #' "pev" or "cev"
130 | #' @param pc_nr How many principal components to display max
131 | #' @param title Title of the plot
132 | #'
133 | #' @return An object created by `ggplot`, which can be assigned and further customized.
134 | #'
135 | #' @examples
136 | #' dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1)
137 | #' rlt <- DESeq2::rlogTransformation(dds)
138 | #' pcaobj <- prcomp(t(SummarizedExperiment::assay(rlt)))
139 | #' pcascree(pcaobj, type = "pev")
140 | #' pcascree(pcaobj, type = "cev", title = "Cumulative explained proportion of variance - Test dataset")
141 | #'
142 | #' @export
143 | pcascree <- function(obj, type = c("pev", "cev"),pc_nr=NULL,title=NULL)
144 | {
145 |   type <- match.arg(type)
146 |   d <- obj$sdev^2
147 |   yvar <- switch(type, pev = d/sum(d), cev = cumsum(d)/sum(d))
148 |   yvar.lab <- switch(type, pev = "proportion of explained variance",
149 |                      cev = "cumulative proportion of explained variance")
150 |   # df <- data.frame(PC = 1:length(d), yvar = yvar)
151 | 
152 |   if (!is.null(pc_nr)) {
153 |     colsize <- pc_nr
154 |     yvar <- yvar[1:pc_nr]
155 |   } else {
156 |     colsize <- length(d)
157 |     yvar <- yvar[1:length(d)]
158 |   }
159 | 
160 |   pc_df <- data.frame(PC_count = 1:colsize, var = yvar)
161 | 
162 |   if(type=="pev"){
163 |     p <- ggplot(pc_df, aes_string(x = "PC_count", y = "var")) + geom_bar(stat = "identity")
164 |     p <- p + scale_x_continuous(breaks = 1:length(d))
165 |     p <- p + ylab(yvar.lab) + xlab("principal components")
166 |     # p
167 |   } else {
168 |     p <- ggplot(pc_df, aes_string(x = "PC_count", y = "var")) + geom_point() +
169 |       geom_path() + scale_x_continuous(breaks = 1:length(d))
170 |     p <- p + ylab(yvar.lab) + xlab("principal components") + ylim(0,max(pc_df$var))
171 |     # p
172 |   }
173 |   if(!is.null(title)) p <- p + ggtitle(title)
174 |   p <- p + theme_bw()
175 |   p
176 | }
177 | 
178 | 
179 | #' Sample PCA plot for transformed data
180 | #'
181 | #' Plots the results of PCA on a 3-dimensional space, interactively
182 | #'
183 | #' @param x A [DESeq2::DESeqTransform()] object, with data in `assay(x)`,
184 | #' produced for example by either [DESeq2::rlog()] or
185 | #' [DESeq2::varianceStabilizingTransformation()]
186 | #' @param intgroup Interesting groups: a character vector of
187 | #' names in `colData(x)` to use for grouping
188 | #' @param ntop Number of top genes to use for principal components,
189 | #' selected by highest row variance
190 | #' @param returnData logical, if TRUE returns a data.frame for further use, containing the
191 | #' selected principal components and intgroup covariates for custom plotting
192 | #' @param title The plot title
193 | #' @param pcX The principal component to display on the x axis
194 | #' @param pcY The principal component to display on the y axis
195 | #' @param pcZ The principal component to display on the z axis
196 | #' @param text_labels Logical, whether to display the labels with the sample identifiers
197 | #' @param point_size Integer, the size of the points for the samples
198 | #'
199 | #' @return A html-based visualization of the 3d PCA plot
200 | #' @export
201 | #'
202 | #' @examples
203 | #' dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1)
204 | #' rlt <- DESeq2::rlogTransformation(dds)
205 | #' pcaplot3d(rlt, ntop = 200)
206 | pcaplot3d <- function (x, intgroup = "condition", ntop = 500, returnData = FALSE,title=NULL,
207 |                      pcX = 1, pcY = 2, pcZ = 3, text_labels=TRUE,point_size=3)
208 | {
209 |   rv <- rowVars(assay(x))
210 |   select <- order(rv, decreasing = TRUE)[seq_len(min(ntop,length(rv)))]
211 |   pca <- prcomp(t(assay(x)[select, ]))
212 | 
213 |   percentVar <- pca$sdev^2/sum(pca$sdev^2)
214 | 
215 |   if (!all(intgroup %in% names(colData(x)))) {
216 |     stop("the argument 'intgroup' should specify columns of colData(x)")
217 |   }
218 |   intgroup.df <- as.data.frame(colData(x)[, intgroup, drop = FALSE])
219 |   group <- factor(apply(intgroup.df, 1, paste, collapse = " : "))
220 |   d <- data.frame(PC1 = pca$x[, pcX], PC2 = pca$x[, pcY], PC3 = pca$x[,pcZ],
221 |                   group = group,
222 |                   intgroup.df, names = colnames(x))
223 |   colnames(d)[1] <- paste0("PC", pcX, ": ", round(percentVar[pcX] * 100,digits = 2), "% variance")
224 |   colnames(d)[2] <- paste0("PC", pcY, ": ", round(percentVar[pcY] * 100,digits = 2), "% variance")
225 |   colnames(d)[3] <- paste0("PC", pcZ, ": ", round(percentVar[pcZ] * 100,digits = 2), "% variance")
226 | 
227 |   if (returnData) {
228 |     attr(d, "percentVar") <- percentVar[1:3]
229 |     return(d)
230 |   }
231 | 
232 |   nrgroups <- length(levels(d$group))
233 |   cols <- hue_pal()(nrgroups)[d$group]
234 | 
235 |   scatterplot3js(as.matrix(d[, 1:3]),
236 |                  color = cols,
237 |                  # renderer = "canvas",
238 |                  size = 1.3,
239 |                  labels = rownames(d), label.margin ="50px 50px 50px 50px")
240 | }
241 | 


--------------------------------------------------------------------------------
/R/profile_explore.R:
--------------------------------------------------------------------------------
 1 | #' Extract and plot the expression profile of genes
 2 | #'
 3 | #' @param se A [DESeq2::DESeqDataSet()] object, or a
 4 | #' [DESeq2::DESeqTransform()] object.
 5 | #' @param genelist An array of characters, including the names of the genes of
 6 | #' interest of which the profile is to be plotted
 7 | #' @param intgroup A factor, needs to be in the `colnames` of `colData(se)`
 8 | #' @param plotZ Logical, whether to plot the scaled expression values. Defaults to
 9 | #' `FALSE`
10 | #'
11 | #' @return A plot of the expression profile for the genes
12 | #' @export
13 | #'
14 | #' @examples
15 | #' dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1)
16 | #' rlt <- DESeq2::rlogTransformation(dds)
17 | #' geneprofiler(rlt, paste0("gene", sample(1:1000, 20)))
18 | #' geneprofiler(rlt, paste0("gene", sample(1:1000, 20)), plotZ = TRUE)
19 | geneprofiler <- function(se, genelist = NULL, intgroup = "condition", plotZ = FALSE) {
20 |   if (is.null(genelist))
21 |     stop("Provide at least one gene to the genelist parameter")
22 |   # check that at least one gene is found
23 |   genelist <- unique(genelist)
24 |   message("You provided ", length(genelist), " unique identifiers")
25 |   inthedata <- genelist %in% rownames(se)
26 |   if (sum(inthedata) == 0)
27 |     stop("None of the provided genes were found in the experiment data")
28 |   message(sum(inthedata), " out of ", length(genelist), " provided genes were found in the data")
29 | 
30 |   mydata <- as.data.frame(t(assay(se)[genelist, ]))
31 | 
32 |   # resort the order of the rows according to the groups that are selected
33 |   mygroups <- interaction(as.data.frame(colData(se)[intgroup]))
34 |   mydata <- mydata[order(mygroups), ]
35 | 
36 |   if (plotZ) {
37 |     # remove 0 variance genes
38 |     rv <- rowVars(t(mydata))
39 |     mydata <- mydata[, rv > 0]
40 | 
41 |     mydata <- scale(mydata, center = TRUE, scale=TRUE)
42 |     # was...
43 |     # mydata <- NMF:::scale_mat(mydata,"col")
44 |   }
45 |   mylabels <- colnames(se)[order(mygroups)]
46 |   mycols <- scales::hue_pal()(length(levels(mygroups)))[sort(mygroups)]
47 | 
48 |   par(mar=c(7.1, 4.1, 2.1, 2.1))
49 |   plot(mydata[, 1], type = "l", xaxt = "n", las = 2, ylim = range(mydata), xlab = "", ylab = ifelse(plotZ, "scaled expression value", "expression value"))
50 |   Map(function(x, y, z)
51 |     axis(1, at = x, col.axis = y, labels = z, lwd = 0, las = 2),
52 |     1:nrow(mydata),
53 |     mycols,
54 |     mylabels
55 |   )
56 |   axis(1, at = 1:nrow(mydata), labels = FALSE)
57 | 
58 |   for (i in 2:(ncol(mydata) - 1)){
59 |       lines(mydata[, i], type = "l", xaxt = "n", las = 2, col = i)
60 |   }
61 |   ## TODO: if desired, plot only the avg pro group -> maybe as boxplot?
62 | }
63 | 


--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
 1 | #' @importFrom shiny addResourcePath
 2 | 
 3 | .onLoad <- function(libname, pkgname) {
 4 |   # Create link to logo
 5 |   # shiny::addResourcePath("pcaExplorer", system.file("www", package="pcaExplorer"))
 6 |   
 7 |   shiny::addResourcePath("sbs", system.file("www", package = "shinyBS"))
 8 | }
 9 | 
10 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <img src="man/figures/pcaExplorer.png" align="right" alt="" width="120" />
  2 | 
  3 | # pcaExplorer - Interactive exploration of Principal Components of Samples and Genes in RNA-seq data 
  4 | 
  5 | <a href="https://doi.org/10.1186/s12859-019-2879-1"><img src="https://img.shields.io/badge/doi-pcaExplorer-blue.svg"><a>
  6 | <a href="https://doi.org/10.1002/cpz1.411"><img src="https://img.shields.io/badge/doi-pcaExplorer_protocol-blue.svg"><a>
  7 | 
  8 | ## Software status
  9 | 
 10 | [![R build status](https://github.com/federicomarini/pcaExplorer/workflows/R-CMD-check/badge.svg)](https://github.com/federicomarini/pcaExplorer/actions)
 11 | 
 12 | | Platforms |  OS  | R CMD check |
 13 | |:----------------:|:----------------:|:----------------:|
 14 | | Bioc ([_devel_](http://bioconductor.org/packages/devel/bioc/html/pcaExplorer.html)) | Multiple | [![Bioconductor-devel Build Status](http://bioconductor.org/shields/build/devel/bioc/pcaExplorer.svg)](http://bioconductor.org/checkResults/devel/bioc-LATEST/pcaExplorer) |
 15 | | Bioc ([_release_](http://bioconductor.org/packages/release/bioc/html/pcaExplorer.html)) | Multiple | [![Bioconductor-release Build Status](http://bioconductor.org/shields/build/release/bioc/pcaExplorer.svg)](http://bioconductor.org/checkResults/release/bioc-LATEST/pcaExplorer) |
 16 | 
 17 | [![codecov.io](https://codecov.io/github/federicomarini/pcaExplorer/coverage.svg?branch=master)](https://codecov.io/github/federicomarini/pcaExplorer?branch=master)
 18 | 
 19 | `pcaExplorer` is a Bioconductor package containing a Shiny application for
 20 | analyzing expression data in different conditions and experimental factors. 
 21 | 
 22 | It is a general-purpose interactive companion tool for RNA-seq analysis, which 
 23 | guides the user in exploring the Principal Components of the data under inspection.
 24 | 
 25 | `pcaExplorer` provides tools and functionality to detect outlier samples, genes
 26 | that show particular patterns, and additionally provides a functional interpretation of 
 27 | the principal components for further quality assessment and hypothesis generation
 28 | on the input data. 
 29 | 
 30 | Moreover, a novel visualization approach is presented to simultaneously assess 
 31 | the effect of more than one experimental factor on the expression levels.
 32 | 
 33 | Thanks to its interactive/reactive design, it is designed to become a practical
 34 | companion to any RNA-seq dataset analysis, making exploratory data analysis 
 35 | accessible also to the bench biologist, while providing additional insight also
 36 | for the experienced data analyst.
 37 | 
 38 | ## Installation
 39 | 
 40 | `pcaExplorer` can be easily installed using `BiocManager::install()`:
 41 | 
 42 | ``` r
 43 | if (!requireNamespace("BiocManager", quietly=TRUE))
 44 |     install.packages("BiocManager")
 45 | BiocManager::install("pcaExplorer")
 46 | ```
 47 | 
 48 | or, optionally, 
 49 | 
 50 | ``` r
 51 | BiocManager::install("federicomarini/pcaExplorer")
 52 | # or alternatively...
 53 | devtools::install_github("federicomarini/pcaExplorer")
 54 | ```
 55 | 
 56 | ## Quick start
 57 | 
 58 | This command loads the `pcaExplorer` package
 59 | 
 60 | ``` r
 61 | library("pcaExplorer")
 62 | ```
 63 | 
 64 | The `pcaExplorer` app can be launched in different modes:
 65 | 
 66 | - `pcaExplorer(dds = dds, dst = dst)`, where `dds` is a `DESeqDataSet` object and `dst` is a `DESeqTransform`
 67 | object, which were created during an existing session for the analysis of an RNA-seq
 68 | dataset with the `DESeq2` package
 69 | 
 70 | - `pcaExplorer(dds = dds)`, where `dds` is a `DESeqDataSet` object. The `dst` object is automatically 
 71 | computed upon launch.
 72 | 
 73 | - `pcaExplorer(countmatrix = countmatrix, coldata = coldata)`, where `countmatrix` is a count matrix, generated
 74 | after assigning reads to features such as genes via tools such as `HTSeq-count` or `featureCounts`, and `coldata`
 75 | is a data frame containing the experimental covariates of the experiments, such as condition, tissue, cell line,
 76 | run batch and so on.
 77 | 
 78 | - `pcaExplorer()`, and then subsequently uploading the count matrix and the covariates data frame through the 
 79 | user interface. These files need to be formatted as tab separated files, which is a common format for storing
 80 | such count values.
 81 | 
 82 | Additional parameters and objects that can be provided to the main `pcaExplorer` function are:
 83 | 
 84 | - `pca2go`, which is an object created by the `pca2go` function, which scans the genes with high loadings in 
 85 | each principal component and each direction, and looks for functions (such as GO Biological Processes) that 
 86 | are enriched above the background. The offline `pca2go` function is based on the routines and algorithms of 
 87 | the `topGO` package, but as an alternative, this object can be computed live during the execution of the app
 88 | exploiting the `goana` function, provided by the `limma` package. Although this likely provides more general
 89 | (and probably less informative) functions, it is a good compromise for obtaining a further data interpretation.
 90 | 
 91 | - `annotation`, a data frame object, with `row.names` as gene identifiers (e.g. ENSEMBL ids) identical to the 
 92 | row names of the count matrix or `dds` object, and an extra column `gene_name`, containing e.g. HGNC-based 
 93 | gene symbols. This can be used for making information extraction easier, as ENSEMBL ids (a usual choice when
 94 | assigning reads to features) do not provide an immediate readout for which gene they refer to. This can be
 95 | either passed as a parameter when launching the app, or also uploaded as a tab separated text file.
 96 | 
 97 | ## Contact
 98 | 
 99 | For additional details regarding the functions of **pcaExplorer**, please consult the documentation or 
100 | write an email to marinif@uni-mainz.de. 
101 | 
102 | ## Code of Conduct
103 | 
104 | Please note that the pcaExplorer project is released with a [Contributor Code of Conduct](https://contributor-covenant.org/version/2/0/CODE_OF_CONDUCT.html). By contributing to this project, you agree to abide by its terms.
105 | 
106 | ### Bug reports/Issues/New features
107 | 
108 | Please use https://github.com/federicomarini/pcaExplorer/issues for reporting bugs, issues or for 
109 | suggesting new features to be implemented. 
110 | 


--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
 1 | authors:
 2 |   Federico Marini:
 3 |     href: https://federicomarini.github.io
 4 | 
 5 | template:
 6 |   params:
 7 |     bootswatch: cosmo
 8 |     
 9 | navbar:
10 |   structure:
11 |     left:
12 |     - home
13 |     - intro
14 |     - reference
15 |     - articles
16 |     - tutorials
17 |     - news
18 |     right: github
19 |   components:
20 |     home:
21 |       icon: fa-home fa-lg
22 |       href: index.html
23 |     reference:
24 |       text: Reference
25 |       href: reference/index.html
26 |     intro:
27 |       text: User guide
28 |       href: articles/pcaExplorer.html
29 |     github:
30 |       icon: fa-github fa-lg
31 |       href: https://github.com/federicomarini/pcaExplorer
32 | 
33 | reference:
34 | - title: The pcaExplorer package
35 |   desc: Main functions and info on the `pcaExplorer` package
36 |   contents:
37 |   - '`pcaExplorer-pkg`'
38 |   - '`pcaExplorer`'
39 | - title: Working with Principal Components
40 |   desc: Functions for working efficiently with Principal Components
41 |   contents:
42 |   - '`pcaplot`'
43 |   - '`pcaplot3d`'
44 |   - '`pcascree`'
45 |   - '`correlatePCs`'
46 |   - '`genespca`'
47 |   - '`hi_loadings`'
48 |   - '`plotPCcorrs`'
49 | - title: Annotation and Functional interpretation
50 |   desc: Functions for annotating and performing functional interpretation
51 |   contents:
52 |   - '`get_annotation`'
53 |   - '`get_annotation_orgdb`'
54 |   - '`topGOtable`'
55 |   - '`pca2go`'
56 |   - '`limmaquickpca2go`'
57 | - title: Expression data exploration
58 |   desc: ~
59 |   contents:
60 |   - '`pair_corr`'
61 |   - '`distro_expr`'
62 |   - '`geneprofiler`'
63 |   - '`makeExampleDESeqDataSet_multifac`'
64 | - title: Deprecated functions in pcaExplorer
65 |   desc: ~
66 |   contents:
67 |   - '`deprecated`'
68 | 


--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
 1 | citHeader("Please cite the articles below for the 'pcaExplorer' software itself, or its usage in combined workflows with the 'ideal' or 'GeneTonic' software packages:")
 2 | 
 3 | citEntry(
 4 |   entry        = "Article",
 5 |   title        = "pcaExplorer: an R/Bioconductor package for interacting with RNA-seq principal components",
 6 |   journal      = "BMC Bioinformatics",
 7 |   author       = personList(as.person("Federico Marini"),
 8 |                    as.person("Harald Binder")),
 9 |   volume       = "20",
10 | 	number       = "1",
11 | 	pages        = "331",
12 | 	year         = "2019",
13 | 	month        = "Jun",
14 | 	day          = "13",
15 | 	doi          = "10.1186/s12859-019-2879-1",
16 |   url          = "https://bioconductor.org/packages/pcaExplorer/",
17 | 
18 |   textVersion  =
19 |   paste("Federico Marini, Harald Binder (2019).",
20 |         "pcaExplorer: an R/Bioconductor package for interacting with RNA-seq principal components.",
21 |         "BMC Bioinformatics, 20 (1), 331,",
22 |         "<doi:10.1186/s12859-019-2879-1>, <doi:10.18129/B9.bioc.pcaExplorer>.")
23 | )
24 | 
25 | citEntry(
26 |   entry        = "Article",
27 |   title        = "Interactive and Reproducible Workflows for Exploring and Modeling RNA-seq Data with pcaExplorer, ideal, and GeneTonic",
28 |   journal      = "Current Protocols",
29 |   author       = personList(
30 |                    as.person("Annekathrin Ludt"),
31 |                    as.person("Arsenij Ustjanzew"),
32 |                    as.person("Harald Binder"),
33 |                    as.person("Konstantin Strauch"),
34 |                    as.person("Federico Marini")
35 |                    ),
36 |   volume       = "2",
37 | 	number       = "4",
38 | 	pages        = "e411",
39 | 	year         = "2022",
40 | 	month        = "Apr",
41 | 	doi          = "10.1002/cpz1.411",
42 | 
43 |   textVersion  =
44 |   paste("Annekathrin Ludt, Arsenij Ustjanzew, Harald Binder, Konstantin Strauch, Federico Marini (2022).",
45 |         "Interactive and Reproducible Workflows for Exploring and Modeling RNA-seq Data with pcaExplorer, ideal, and GeneTonic.",
46 |         "Current Protocols, 2 (4), e411,",
47 |         "<doi:10.1002/cpz1.411>.")
48 | )
49 | 


--------------------------------------------------------------------------------
/inst/extdata/about.md:
--------------------------------------------------------------------------------
 1 | # About pcaExplorer
 2 | 
 3 | `pcaExplorer` is a Bioconductor package containing a Shiny application for
 4 | analyzing expression data in different conditions and experimental factors. 
 5 | 
 6 | `pcaExplorer` guides the user in exploring the Principal Components of the data, 
 7 | providing tools and functionality to detect outlier samples, genes that show 
 8 | particular patterns, and additionally provides a functional interpretation of 
 9 | the principal components for further quality assessment and hypothesis generation
10 | on the input data. 
11 | 
12 | Thanks to its interactive/reactive design, it is designed to become a practical
13 | companion to any RNA-seq dataset analysis, making exploratory data analysis 
14 | accessible also to the bench biologist, while providing additional insight also
15 | for the experienced data analyst.
16 | 
17 | Moreover, `pcaExplorer` supports reproducible research with state saving and automated 
18 | report generation. 
19 | 
20 | `pcaExplorer` was developed in the Bioinformatics Division led by Harald Binder 
21 | at the IMBEI (Institut für Medizinische Biometrie, Epidemiologie und Informatik) 
22 | in the University Medical Center of the Johannes Gutenberg University Mainz.
23 | 
24 | ## Developers
25 | 
26 | <a href="mailto:mailto:marinif@uni-mainz.de?subject=[pcaExplorer_feedback]" class="btn btn-primary">Federico Marini</a>
27 | 
28 | ## Code
29 | 
30 | All code for `pcaExplorer` is available on 
31 | <a href="https://github.com/federicomarini/pcaExplorer" target="_blank">GitHub</a>.
32 | 
33 | # Citation info
34 | 
35 | If you use `pcaExplorer` for your analysis, please cite it as here below:
36 | 
37 | ```r
38 | citation("pcaExplorer")
39 | ```
40 | 
41 | ```
42 | To cite package ‘pcaExplorer’ in publications use:
43 | 
44 |   Federico Marini (2018). pcaExplorer: Interactive Visualization of RNA-seq Data Using
45 |   a Principal Components Approach. R package version 2.6.0.
46 |   https://github.com/federicomarini/pcaExplorer
47 | 
48 | A BibTeX entry for LaTeX users is
49 | 
50 |   @Manual{,
51 |     title = {pcaExplorer: Interactive Visualization of RNA-seq Data Using a Principal Components Approach},
52 |     author = {Federico Marini},
53 |     year = {2018},
54 |     note = {R package version 2.6.0},
55 |     url = {https://github.com/federicomarini/pcaExplorer},
56 |   }
57 | ```
58 | 


--------------------------------------------------------------------------------
/inst/extdata/datainput.md:
--------------------------------------------------------------------------------
1 | - `pcaExplorer` accepts only **text files, either comma, semicolon, or tab-separated**. This avoids proprietary formats such as Excel, which can also inadvertently convert your gene identifiers to dates or floating point numbers (SEPT2, see more [here](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-016-1044-7))
2 | - by default, tab-separated values are expected, but you can change this for each input file with the related radio buttons
3 | - for **count data**: features are in the rows, samples are stored in the columns
4 | - for the **metadata**: each row stores the relevant experimental variables of each sample
5 | - for the **gene annotation**: each row relates to a feature (i.e. a gene), with its id in the row names, and at least a column called gene_name, containing a more readable format (e.g. HGNC gene symbols)
6 | - it is important to have **headers**, as they are used in constructing the `dds` and `dst` objects and checking their validity: for example, the column names of the count matrix have to be identical to the row names of the sample metadata. This small constraint guarantees a higher degree of robustness for all the subsequent steps
7 | - if your data in stored in Excel sheets, export them to csv format (specify the correct separator accordingly when loading the data - you can always open them in a text editor to check)
8 | - general reminder: if you launch `pcaExplorer` directly from the terminal/RStudio IDE, you can pre-compute the objects, this can speed up
9 | 


--------------------------------------------------------------------------------
/inst/extdata/instructions.md:
--------------------------------------------------------------------------------
  1 | *This information is also contained in the `pcaExplorer` package vignette. For more
  2 | information on the functions of the `pcaExplorer` package, please refer to the
  3 | vignette and/or the documentation.*
  4 | 
  5 | ## Getting started
  6 | 
  7 | `pcaExplorer` is an R package distributed as part of the [Bioconductor](http://bioconductor.org)
  8 | project. To install the package, start R and enter:
  9 | 
 10 | ```r
 11 | if (!requireNamespace("BiocManager", quietly=TRUE))
 12 |     install.packages("BiocManager")
 13 | BiocManager::install("pcaExplorer")
 14 | ```
 15 | 
 16 | If you prefer, you can install and use the development version, which can be 
 17 | retrieved via Github (https://github.com/federicomarini/pcaExplorer). To do so, use
 18 | 
 19 | ```r
 20 | library("devtools")
 21 | install_github("federicomarini/pcaExplorer")
 22 | ```
 23 | 
 24 | Once `pcaExplorer` is installed, it can be loaded by the following command.
 25 | 
 26 | ```r
 27 | library("pcaExplorer")
 28 | ```
 29 | 
 30 | ## Introduction
 31 | 
 32 | `pcaExplorer` is a Bioconductor package containing a Shiny application for
 33 | analyzing expression data in different conditions and experimental factors. 
 34 | 
 35 | It is a general-purpose interactive companion tool for RNA-seq analysis, which 
 36 | guides the user in exploring the Principal Components of the data under inspection.
 37 | 
 38 | `pcaExplorer` provides tools and functionality to detect outlier samples, genes
 39 | that show particular patterns, and additionally provides a functional interpretation of 
 40 | the principal components for further quality assessment and hypothesis generation
 41 | on the input data. 
 42 | 
 43 | Moreover, a novel visualization approach is presented to simultaneously assess 
 44 | the effect of more than one experimental factor on the expression levels.
 45 | 
 46 | Thanks to its interactive/reactive design, it is designed to become a practical
 47 | companion to any RNA-seq dataset analysis, making exploratory data analysis 
 48 | accessible also to the bench biologist, while providing additional insight also
 49 | for the experienced data analyst.
 50 | 
 51 | Starting from development version 1.1.3, `pcaExplorer` supports reproducible 
 52 | research with state saving and automated report generation. 
 53 | 
 54 | ## Citation info
 55 | 
 56 | If you use `pcaExplorer` for your analysis, please cite it as here below:
 57 | 
 58 | ```r
 59 | citation("pcaExplorer")
 60 | ```
 61 | 
 62 | ```
 63 | ## 
 64 | ## To cite package 'pcaExplorer' in publications use:
 65 | ## 
 66 | ##   Federico Marini (2016). pcaExplorer: Interactive Visualization
 67 | ##   of RNA-seq Data Using a Principal Components Approach. R package
 68 | ##   version 1.1.3. https://github.com/federicomarini/pcaExplorer
 69 | ## 
 70 | ## A BibTeX entry for LaTeX users is
 71 | ## 
 72 | ##   @Manual{,
 73 | ##     title = {pcaExplorer: Interactive Visualization of RNA-seq Data Using a Principal Components Approach},
 74 | ##     author = {Federico Marini},
 75 | ##     year = {2016},
 76 | ##     note = {R package version 1.1.3},
 77 | ##     url = {https://github.com/federicomarini/pcaExplorer},
 78 | ##   }
 79 | ```
 80 | 
 81 | ## Launching the application
 82 | 
 83 | After loading the package, the `pcaExplorer` app can be launched in different modes:
 84 | 
 85 | - `pcaExplorer(dds = dds, rlt = rlt)`, where `dds` is a `DESeqDataSet` object and `rlt` is a `DESeqTransform`
 86 | object, which were created during an existing session for the analysis of an RNA-seq
 87 | dataset with the `DESeq2` package
 88 | 
 89 | - `pcaExplorer(dds = dds)`, where `dds` is a `DESeqDataSet` object. The `rlt` object is automatically 
 90 | computed upon launch.
 91 | 
 92 | - `pcaExplorer(countmatrix = countmatrix, coldata = coldata)`, where `countmatrix` is a count matrix, generated
 93 | after assigning reads to features such as genes via tools such as `HTSeq-count` or `featureCounts`, and `coldata`
 94 | is a data frame containing the experimental covariates of the experiments, such as condition, tissue, cell line,
 95 | run batch and so on.
 96 | 
 97 | - `pcaExplorer()`, and then subsequently uploading the count matrix and the covariates data frame through the 
 98 | user interface. These files need to be formatted as tab separated files, which is a common format for storing
 99 | such count values.
100 | 
101 | Additional parameters and objects that can be provided to the main `pcaExplorer` function are:
102 | 
103 | - `pca2go`, which is an object created by the `pca2go` function, which scans the genes with high loadings in 
104 | each principal component and each direction, and looks for functions (such as GO Biological Processes) that 
105 | are enriched above the background. The offline `pca2go` function is based on the routines and algorithms of 
106 | the `topGO` package, but as an alternative, this object can be computed live during the execution of the app
107 | exploiting the `goana` function, provided by the `limma` package. Although this likely provides more general
108 | (and probably less informative) functions, it is a good compromise for obtaining a further data interpretation.
109 | 
110 | - `annotation`, a data frame object, with `row.names` as gene identifiers (e.g. ENSEMBL ids) identical to the 
111 | row names of the count matrix or `dds` object, and an extra column `gene_name`, containing e.g. HGNC-based 
112 | gene symbols. This can be used for making information extraction easier, as ENSEMBL ids (a usual choice when
113 | assigning reads to features) do not provide an immediate readout for which gene they refer to. This can be
114 | either passed as a parameter when launching the app, or also uploaded as a tab separated text file. The package
115 | provides two functions, `get_annotation` and `get_annotation_orgdb`, as a convenient wrapper to obtain the updated
116 | annotation information, respectively from `biomaRt` or via the `org.XX.eg.db` packages.
117 | 
118 | ## The controls sidebar
119 | 
120 | Most of the input controls are located in the sidebar, some are as well in the individual tabs of the app.
121 | By changing one or more of the input parameters, the user can get a fine control on what is displayed.
122 | 
123 | ### App settings
124 | 
125 | Here are the parameters that set input values for most of the tabs. By hovering over with the mouse,
126 | the user can receive additional information on how to set the parameter, powered by the `shinyBS` package.
127 | 
128 | - **x-axis PC** - Select the principal component to display on the x axis
129 | - **y-axis PC** - Select the principal component to display on the y axis
130 | - **Group/color by** - Select the group of samples to stratify the analysis. Can also assume multiple values.
131 | - **Nr of (most variable) genes** - Number of genes to select for computing the principal components. The top n genes are
132 | selected ranked by their variance inter-samples
133 | - **Alpha** - Color transparency for the plots. Can assume values from 0 (transparent) to 1 (opaque)
134 | - **Labels size** - Size of the labels for the samples in the principal components plots
135 | - **Points size** - Size of the points to be plotted in the principal components plots
136 | - **Variable name size** - Size of the labels for the genes PCA - correspond to the samples names
137 | - **Scaling factor** - Scale value for resizing the arrow corresponding to the variables in the PCA for the genes. It
138 | should be used for mere visualization purposes
139 | - **Color palette** - Select the color palette to be used in the principal components plots. The number of colors 
140 | is selected automatically according to the number of samples and to the levels of the factors of interest
141 | and their interactions
142 | - **Plot style for gene counts** - Plot either boxplots or violin plots, with jittered points superimposed 
143 | 
144 | ### Plot export settings        
145 | 
146 | Width and height for the figures to export are input here in cm.
147 | 
148 | Additional controls available in the single tabs are also assisted by tooltips that show on hovering the mouse.
149 | Normally they are tightly related to the plot/output they are placed nearby.
150 | 
151 | ## The task menu
152 | 
153 | The task menu, accessible by clicking on the cog icon in the upper right part of the application, provides two 
154 | functionalities:
155 | 
156 | - `Exit pcaExplorer & save` will close the application and store the content of the `input` and `values` reactive 
157 | objects in two list objects made available in the global environment, called `pcaExplorer_inputs_YYYYMMDD_HHMMSS` and 
158 | `pcaExplorer_values_YYYYMMDD_HHMMSS`
159 | - `Save State as .RData` will similarly store `LiveInputs` and `r_data` in a binary file named
160 | `pcaExplorerState_YYYYMMDD_HHMMSS.Rdata`, without closing the application 
161 | 
162 | ## The app panels
163 | 
164 | The `pcaExplorer` app is structured in different panels, each focused on a different aspect of the 
165 | data exploration. 
166 | 
167 | Most of the panels work extensively with click-based and brush-based interactions, to gain additional
168 | depth in the explorations, for example by zooming, subsetting, selecting. This is possible thanks to the 
169 | recent developments in the `shiny` package/framework.
170 | 
171 | The available panels are the described in the following subsections.
172 | 
173 | ### Data Upload
174 | 
175 | These **file input** controls are available when no `dds` or `countmatrix` + `coldata` are provided. Additionally,
176 | it is possible to upload the `annotation` data frame.
177 | 
178 | When the objects are already passed as parameters, a brief overview/summary for them is displayed.
179 | 
180 | ### Instructions
181 | 
182 | This is where you most likely are reading this text (otherwise in the package vignette). 
183 | 
184 | ### Counts Table
185 | 
186 | Interactive tables for the raw, normalized or (r)log-transformed counts are shown in this tab.
187 | The user can also generate a sample-to-sample correlation scatter plot with the selected data.
188 | 
189 | ### Data Overview
190 | 
191 | This panel displays information on the objects in use, either passed as parameters or 
192 | generated from the count matrix provided. Displayed information comprise the design metadata,
193 | a sample to sample distance heatmap, the number of million of reads per sample and some
194 | basic summary for the counts.
195 | 
196 | ### Samples View
197 | 
198 | This panel displays the PCA projections of sample expression profiles onto any pair of components,
199 | a scree plot, a zoomed PCA plot, a plot of the genes with top and bottom loadings. Additionally, this section 
200 | presents a PCA plot where it is possible to remove samples deemed to be outliers in the analysis, which is 
201 | very useful to check the effect of excluding them. If needed, an interactive 3D visualization of the principal 
202 | components is also available.
203 | 
204 | ### Genes View
205 | 
206 | This panel displays the PCA projections of genes abundances onto any pair of components, with samples
207 | as biplot variables, to identify interesting groups of genes. Zooming is also possible, and clicking on single
208 | genes, a boxplot is returned, grouped by the factors of interest. A static and an interactive heatmap are 
209 | provided, including the subset of selected genes, also displayed as (standardized) expression profiles across the 
210 | samples. These are also reported in `datatable` objects, accessible in the bottom part of the tab.
211 | 
212 | ### GeneFinder
213 | 
214 | The user can search and display the expression values of a gene of interest, either by ID or gene
215 | name, as provided in the `annotation`. A handy panel for quick screening of shortlisted genes, again grouped by
216 | the factors of interest. The graphic can be readily exported as it is, and this can be iterated on a shortlisted
217 | set of genes. For each of them, the underlying data is displayed in an interactive table, also exportable with a 
218 | click.
219 | 
220 | ### PCA2GO
221 | 
222 | This panel shows the functional annotation of the principal components, with GO functions enriched in the 
223 | genes with high loadings on the selected principal components. It allows for the live computing of the object,
224 | that can otherwise provided as a parameter when launching the app. The panel displays a PCA plot for the 
225 | samples, surrounded on each side by the tables with the functions enriched in each component and direction.
226 | 
227 | ### Multifactor Exploration
228 | 
229 | This panel allows for the multifactor exploration of datasets with 2 or more experimental factors. The user has to select 
230 | first the two factors and the levels for each. Then, it is possible to combine samples from Factor1-Level1 in the selected
231 | order by clicking on each sample name, one for each level available in the selected Factor2. In order to build the matrix, 
232 | an equal number of samples for each level of Factor 1 is required, to keep the design somehow balanced.
233 | A typical case for choosing factors 1 and 2 is for example when different conditions and tissues are present.
234 | 
235 | Once constructed, a plot is returned that tries to represent simultaneously the effect of the two factors on the data.
236 | Each gene is represented by a dot-line-dot structure, with the color that is indicating the tissue (factor 2) where the gene 
237 | is mostly expressed. Each gene has two dots, one for each condition level (factor 1), and the position of the points is dictated
238 | by the scores of the principal components calculated on the matrix object. The line connecting the dots is darker when the 
239 | tissue where the gene is mostly expressed varies throughout the conditions. 
240 | 
241 | This representation is under active development, and it is promising for identifying interesting sets or clusters of genes
242 | according to their behavior on the Principal Components subspaces. Zooming and exporting of the underlying genes is also
243 | allowed by brushing on the main plot.
244 | 
245 | ### Report Editor
246 | 
247 | The report editor is the backbone for generating and editing the interactive report on the basis of the 
248 | uploaded data and the current state of the application. General `Markdown options` and `Editor options`
249 | are available, and the text editor, based on the `shinyAce` package, contains a comprehensive template 
250 | report, that can be edited to the best convenience of the user.
251 | 
252 | The editor supports R code autocompletion, making it easy to add new code chunks for additional sections.
253 | A preview is available in the tab itself, and the report can be generated, saved and subsequently shared 
254 | with simple mouse clicks.
255 | 
256 | ### About
257 | 
258 | Contains general information on `pcaExplorer`, including the developer's contact, the link to
259 | the development version in Github, as well as the output of `sessionInfo`, to use for reproducibility sake - 
260 | or bug reporting. Information for citing `pcaExplorer` is also reported.
261 | 
262 | ## Running `pcaExplorer` on published datasets
263 | 
264 | We can run `pcaExplorer` for demonstration purpose on published datasets that are available as SummarizedExperiment
265 | in an experiment Bioconductor packages.
266 | 
267 | We will use the `airway` dataset, which can be installed with this command
268 | 
269 | ```
270 | if (!requireNamespace("BiocManager", quietly=TRUE))
271 |     install.packages("BiocManager")
272 | BiocManager::install("airway")
273 | ```
274 | 
275 | This package provides a RangedSummarizedExperiment object of read counts in genes for an RNA-Seq experiment 
276 | on four human airway smooth muscle cell lines treated with dexamethasone. More details such as gene models and 
277 | count quantifications can be found in the `airway` package vignette. 
278 | 
279 | To run `pcaExplorer` on this dataset, the following commands are required
280 | 
281 | ```
282 | library("airway"")
283 | 
284 | data("airway", package = "airway")
285 | 
286 | dds_airway <- DESeqDataSet(airway,design=~dex+cell)
287 | dds_airway
288 | rld_airway <- rlogTransformation(dds_airway)
289 | rld_airway
290 | pcaExplorer(dds = dds_airway,
291 |             rlt = rld_airway)
292 | ```
293 | The `annotation` for this dataset can be built by exploiting the `org.Hs.eg.db` package
294 | 
295 | ```
296 | library("org.Hs.eg.db")
297 | genenames_airway <- mapIds(org.Hs.eg.db,keys = rownames(dds_airway),column = "SYMBOL",keytype="ENSEMBL")
298 | annotation_airway <- data.frame(gene_name = genenames_airway,
299 |                                 row.names = rownames(dds_airway),
300 |                                 stringsAsFactors = FALSE)
301 | head(annotation_airway)                                
302 | ```
303 | 
304 | or alternatively, by using the `get_annotation` or `get_annotation_orgdb` wrappers.
305 | 
306 | ```
307 | anno_df_orgdb <- get_annotation_orgdb(dds = dds_airway,
308 |                                       orgdb_species = "org.Hs.eg.db",
309 |                                       idtype = "ENSEMBL")
310 | 
311 | anno_df_biomart <- get_annotation(dds = dds_airway,
312 |                                   biomart_dataset = "hsapiens_gene_ensembl",
313 |                                   idtype = "ensembl_gene_id")
314 | ```
315 | 
316 | Then again, the app can be launched with 
317 | 
318 | ```
319 | pcaExplorer(dds = dds_airway,
320 |             rlt = rld_airway,
321 |             annotation = annotation_airway)
322 | ```
323 | 
324 | If desired, alternatives can be used. See the well written annotation workflow available at the Bioconductor site (https://bioconductor.org/help/workflows/annotation/annotation/).
325 | 
326 | ## Running `pcaExplorer` on synthetic datasets
327 | 
328 | For testing and demonstration purposes, a function is also available to generate synthetic datasets whose counts
329 | are generated based on two or more experimental factors.
330 | 
331 | This can be called with the command
332 | 
333 | ```
334 | dds_multifac <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3,betaSD_tissue = 1)
335 | ```
336 | 
337 | See all the available parameters by typing `?makeExampleDESeqDataSet_multifac`. Credits are given to the initial
338 | implementation by Mike Love in the `DESeq2` package.
339 | 
340 | The following steps run the app with the synthetic dataset
341 | 
342 | ```
343 | dds_multifac <- makeExampleDESeqDataSet_multifac(betaSD_condition = 1,betaSD_tissue = 3)
344 | dds_multifac
345 | rld_multifac <- rlogTransformation(dds_multifac)
346 | rld_multifac
347 | ## checking how the samples cluster on the PCA plot
348 | pcaplot(rld_multifac,intgroup = c("condition","tissue"))
349 | ```
350 | 
351 | Launch the app for exploring this dataset with 
352 | 
353 | ```
354 | pcaExplorer(dds = dds_multifac,
355 |             rlt = rld_multifac)
356 | ```
357 | 
358 | When such a dataset is provided, the panel for multifactorial exploration is also usable at its best.
359 | 
360 | ## Functions exported by the package for standalone usage
361 | 
362 | The functions exported by the `pcaExplorer` package can be also used in a standalone scenario,
363 | provided the required objects are in the working environment. They are listed here for an overview,
364 | but please refer to the documentation for additional details.
365 | 
366 | - `pcaplot` plots the sample PCA for `DESeqTransform` objects, such as rlog-transformed data. This is 
367 | the workhorse of the Samples View tab
368 | - `pcaplot3d` - same as for `pcaplot`, but it uses the `threejs` package for the 3d interactive view.
369 | - `pcascree` produces a scree plot of the PC computed on the samples. A `prcomp` object needs to be 
370 | passed as main argument
371 | - `correlatePCs` and `plotPCcorrs` respectively compute and plot significance of the (cor)relation 
372 | of each covariate versus a principal component. The input for `correlatePCs` is a `prcomp` object
373 | - `hi_loadings` extracts and optionally plots the genes with the highest loadings
374 | - `genespca` computes and plots the principal components of the genes, eventually displaying 
375 | the samples as in a typical biplot visualization. This is the function in action for the Genes View tab
376 | - `topGOtable` is a convenient wrapper for extracting functional GO terms enriched in a subset of genes 
377 | (such as the differentially expressed genes), based on the algorithm and the implementation in the topGO package
378 | - `pca2go` provides a functional interpretation of the principal components, by extracting the genes
379 | with the highest loadings for each PC, and then runs internally `topGOtable` on them for efficient functional
380 | enrichment analysis. Needs a `DESeqTransform` object as main parameter
381 | - `limmaquickpca2go` is an alternative to `pca2go`, used in the live running app, thanks to its fast 
382 | implementation based on the `limma::goana` function.
383 | - `makeExampleDESeqDataSet_multifac` constructs a simulated `DESeqDataSet` of Negative Binomial dataset
384 | from different conditions. The fold changes between the conditions can be adjusted with the `betaSD_condition`
385 | `betaSD_tissue` arguments
386 | - `distro_expr` plots the distribution of expression values, either with density lines, boxplots or 
387 | violin plots
388 | - `geneprofiler` plots the profile expression of a subset of genes, optionally as standardized values
389 | - `get_annotation` and `get_annotation_orgdb` retrieve the latest annotations for the `dds` object, to be
390 | used in the call to the `pcaExplorer` function. They use respectively the `biomaRt` package
391 | and the `org.XX.eg.db` packages
392 | - `pair_corr` plots the pairwise scatter plots and computes the correlation coefficient on the 
393 | expression matrix provided.
394 | 
395 | For more information on the functions of the `pcaExplorer` package, please refer to the
396 | vignette and/or the documentation.
397 | 
398 | ## Further development
399 | 
400 | Additional functionality for the `pcaExplorer` will be added in the future, as it is tightly related to a topic
401 | under current development research. 
402 | 
403 | Improvements, suggestions, bugs, issues and feedback of any type can be sent to marinif@uni-mainz.de.
404 | 


--------------------------------------------------------------------------------
/inst/extdata/instructions_unr.md:
--------------------------------------------------------------------------------
  1 | Setup
  2 | =====
  3 | 
  4 | First things first: install
  5 | *[pcaExplorer](https://bioconductor.org/packages/3.9/pcaExplorer)* and
  6 | load it into your R session. You should receive a message notification
  7 | if this is completed without errors.
  8 | 
  9 | ``` r
 10 | BiocManager::install("pcaExplorer")
 11 | library("pcaExplorer")
 12 | ```
 13 | 
 14 | This document describes a use case for
 15 | *[pcaExplorer](https://bioconductor.org/packages/3.9/pcaExplorer)*,
 16 | based on the dataset in the
 17 | *[airway](https://bioconductor.org/packages/3.9/airway)* package. If
 18 | this package is not available on your machine, please install it by
 19 | executing:
 20 | 
 21 | ``` r
 22 | BiocManager::install("airway")
 23 | ```
 24 | 
 25 | This dataset consists of the gene-level expression measurements (as raw
 26 | read counts) for an experiment where four different human airway smooth
 27 | muscle cell lines are either treated with dexamethasone or left
 28 | untreated.
 29 | 
 30 | Start exploring - the beauty of interactivity
 31 | =============================================
 32 | 
 33 | To start the exploration, you just need the following lines:
 34 | 
 35 | ``` r
 36 | library("pcaExplorer")
 37 | pcaExplorer()
 38 | ```
 39 | 
 40 | The easiest way to explore the
 41 | *[airway](https://bioconductor.org/packages/3.9/airway)* dataset is by
 42 | clicking on the dedicated button in the **Data Upload** panel. This
 43 | action will:
 44 | 
 45 | -   load the *[airway](https://bioconductor.org/packages/3.9/airway)*
 46 |     package
 47 | -   load the count matrix and the experimental metadata
 48 | -   compose the `dds` object, normalize the expression values (using the
 49 |     robust method proposed by Anders and Huber in the original DESeq
 50 |     manuscript), and compute the variance stabilizing transformed
 51 |     expression values (stored in the `dst` object)
 52 | -   retrieve the gene annotation information via the
 53 |     *[org.Hs.eg.db](https://bioconductor.org/packages/3.9/org.Hs.eg.db)*,
 54 |     adding gene symbols to the ENSEMBL ids - this step is optional, but
 55 |     recommended for more human-readable identifiers to be used.
 56 | 
 57 | If you want to load your expression data, please refer to the [User
 58 | Guide](https://bioconductor.org/packages/3.9/pcaExplorer/vignettes/pcaExplorer.html),
 59 | which contains detailed information on the formats your data have to
 60 | respect.
 61 | 
 62 | Once the preprocessing of the input is done, you should get a
 63 | notification in the lower right corner that you’re all set. The whole
 64 | preprocessing should take around 5-6 seconds (tested on a MacBook Pro,
 65 | with i7 and 16 Gb RAM). You can check how each component looks like by
 66 | clicking on its respective button, once they appeared in the lower half
 67 | of the panel.
 68 | 
 69 | <img src="https://github.com/federicomarini/pcaExplorer/raw/master/vignettes/unr_00_demo_loaded.png" alt="Overview of the Data Upload panel. After clicking on the 'Load the demo airway data' button, all widgets are automatically populated, and each data component (count matrix, experimental data, dds object, annotation) can be previewed in a modal window by clicking on its respective button." width="80%" />
 70 | <p class="caption">
 71 | Overview of the Data Upload panel. After clicking on the ‘Load the demo
 72 | airway data’ button, all widgets are automatically populated, and each
 73 | data component (count matrix, experimental data, dds object, annotation)
 74 | can be previewed in a modal window by clicking on its respective button.
 75 | </p>
 76 | 
 77 | You can proceed to explore the expression values of your dataset in the
 78 | **Counts Table** tab. You can change the data type you are displaying
 79 | between raw counts, normalized, or transformed, and plot their values in
 80 | a scatterplot matrix to explore their sample-to-sample correlations. To
 81 | try this, select for example “Normalized counts”, change the correlation
 82 | coefficient to “spearman”, and click on the `Run` action button. The
 83 | correlation values will also be displayed as a heatmap.
 84 | 
 85 | <img src="https://github.com/federicomarini/pcaExplorer/raw/master/vignettes/unr_01_splom.png" alt="Screenshot of the sample to sample scatter plot matrix. The user can select the correlation method to use, the option to plot values on log2 scales, and the possibility to use a subset of genes (to obtain a quicker overview if many samples are provided)." width="80%" />
 86 | <p class="caption">
 87 | Screenshot of the sample to sample scatter plot matrix. The user can
 88 | select the correlation method to use, the option to plot values on log2
 89 | scales, and the possibility to use a subset of genes (to obtain a
 90 | quicker overview if many samples are provided).
 91 | </p>
 92 | 
 93 | Additional features, both for samples and for features, are displayed in
 94 | the **Data overview** panel. A closer look at the metadata of the
 95 | `airway` set highlights how each combination of cell type (`cell`) and
 96 | dexamethasone treatment (`dex`) is represented by a single sequencing
 97 | experiment. The 8 samples in the demo dataset are themselves a subsample
 98 | of the [full GEO
 99 | record](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE52778),
100 | namely the ones non treated with albuterol (`alb` column).
101 | 
102 | The relationship among samples can be seen in the sample-to-sample
103 | heatmap. For example, by selecting the Manhattan distance metric, it is
104 | evident how the samples cluster by dex treatment, yet they show a
105 | dendrogram structure that recalls the 4 different cell types used. The
106 | total sum of counts per sample is displayed as a bar plot.
107 | 
108 | <img src="https://github.com/federicomarini/pcaExplorer/raw/master/vignettes/unr_02_sts_heatmap.png" alt="Screenshot of the sample to sample heatmap. Selected is the Manhattan distance, but Euclidean and correlation-based distance are also provided as options. In this case, the user has also selected the dex and cell factors in the 'Group/color by' widget in the sidebar menu, and these covariates decorate the heatmap to facilitate identification of patterns." width="80%" />
109 | <p class="caption">
110 | Screenshot of the sample to sample heatmap. Selected is the Manhattan
111 | distance, but Euclidean and correlation-based distance are also provided
112 | as options. In this case, the user has also selected the dex and cell
113 | factors in the ‘Group/color by’ widget in the sidebar menu, and these
114 | covariates decorate the heatmap to facilitate identification of
115 | patterns.
116 | </p>
117 | 
118 | Patterns can become clearer after selecting, in the **App settings** on
119 | the left, an experimental factor to group and color by: try selecting
120 | `dex`, for example. If more than one covariate is selected, the
121 | interaction between these will be taken as a grouping factor. To remove
122 | one, simply click on it to highlight and press the del or backspace key
123 | to delete it. Try doing so by also clicking on `cell`, and then removing
124 | `dex` afterwards.
125 | 
126 | Basic summary information is also displayed for the genes. In the count
127 | matrix provided, one can check how many genes were detected, by
128 | selecting a “Threshold on the row sums of the counts” or on the row
129 | means of the normalized counts (more stringent). For example, selecting
130 | 5 in both cases, only 24345 genes have a total number of counts, summed
131 | by row, and 17745 genes have more than 5 counts (normalized) on average.
132 | 
133 | <img src="https://github.com/federicomarini/pcaExplorer/raw/master/vignettes/unr_03_summary_counts.png" alt="Screenshot of the Basic Summary of the counts in the Data Overview panel. General information are provided, together with an overview on detected genes according to different filtering criteria." width="80%" />
134 | <p class="caption">
135 | Screenshot of the Basic Summary of the counts in the Data Overview
136 | panel. General information are provided, together with an overview on
137 | detected genes according to different filtering criteria.
138 | </p>
139 | 
140 | The **Samples View** and the **Genes View** are the tabs where most
141 | results coming from Principal Component Analysis, either performed on
142 | the samples or on the genes, can be explored in depth. Assuming you
143 | selected `cell` in the “Group/color by” option on the left, the Samples
144 | PCA plot should clearly display how the cell type explain a considerable
145 | portion of the variability in the dataset (corresponding to the second
146 | PC). To check that `dex` treatment is the main source of variability,
147 | select that instead of `cell`.
148 | 
149 | <img src="https://github.com/federicomarini/pcaExplorer/raw/master/vignettes/unr_04a_samplespca.png" alt="The Samples View panel. Displayed are a PCA plot (left) and the corresponding scree plot (right), with the samples colored and labeled by cell type - separating on the second principal component." width="80%" />
150 | <p class="caption">
151 | The Samples View panel. Displayed are a PCA plot (left) and the
152 | corresponding scree plot (right), with the samples colored and labeled
153 | by cell type - separating on the second principal component.
154 | </p>
155 | 
156 | The scree plot on the right shows how many components should be retained
157 | for a satisfactory reduced dimension view of the original set, with
158 | their eigenvalues from largest to smallest. To explore the PCs other
159 | than the first and the second one, you can just select them in the
160 | x-axis PC and y-axis PC widgets in the left sidebar.
161 | 
162 | <img src="https://github.com/federicomarini/pcaExplorer/raw/master/vignettes/unr_04b_samples_dex.png" alt="PCA plot for the samples, colored by dexamethasone treatment. The dex factor is the main driver of the variability in the data, and samples separate nicely on the first principal component." width="80%" />
163 | <p class="caption">
164 | PCA plot for the samples, colored by dexamethasone treatment. The dex
165 | factor is the main driver of the variability in the data, and samples
166 | separate nicely on the first principal component.
167 | </p>
168 | 
169 | If you brush (left-click and hold) on the PCA plot, you can display a
170 | zoomed version of it in the frame below. If you suspect some samples
171 | might be outliers (this is not the case in the `airway` set, still), you
172 | can select them in the dedicated plot, and give a first check on how the
173 | remainder of the samples would look like. On the right side, you can
174 | quickly check which genes show the top and bottom loadings, split by
175 | principal component. First, change the value in the input widget to 20;
176 | then, select one of each list and try to check them in the **Gene
177 | Finder** tab; try for example with *DUSP1*, *PER1*, and *DDX3Y*.
178 | 
179 | <img src="https://github.com/federicomarini/pcaExplorer/raw/master/vignettes/unr_05_loadings.png" alt="Genes with highest loadings on the first and second principal components. The user can select how many top and bottom genes will be displayed, and the gene names are printed below each gene's contribution on each PC." width="80%" />
180 | <p class="caption">
181 | Genes with highest loadings on the first and second principal
182 | components. The user can select how many top and bottom genes will be
183 | displayed, and the gene names are printed below each gene’s contribution
184 | on each PC.
185 | </p>
186 | 
187 | While *DUSP1* and *PER1* clearly show a change in expression upon
188 | dexamethasone treatment (and indeed where reported among the well known
189 | glucocorticoid-responsive genes in the original publication of Himes et
190 | al., 2014), *DDX3Y* displays variability at the cell type level (select
191 | `cell` in the Group/color by widget): this gene is almost undetected in
192 | N061011 cells, and this high variance is what determines its high
193 | loading on the second principal component.
194 | 
195 | <img src="https://github.com/federicomarini/pcaExplorer/raw/master/vignettes/unr_06a_genefinder_dusp1.png" alt="Plot of the gene expression levels of DUSP1. Points are split according to dex treatment, and both graphics and table are displayed." width="80%" />
196 | <p class="caption">
197 | Plot of the gene expression levels of DUSP1. Points are split according
198 | to dex treatment, and both graphics and table are displayed.
199 | </p>
200 | 
201 | <img src="https://github.com/federicomarini/pcaExplorer/raw/master/vignettes/unr_06b_genefinder_per1.png" alt="Plot of the gene expression levels of PER1. Points are split according to dex treatment." width="80%" />
202 | <p class="caption">
203 | Plot of the gene expression levels of PER1. Points are split according
204 | to dex treatment.
205 | </p>
206 | 
207 | <img src="https://github.com/federicomarini/pcaExplorer/raw/master/vignettes/unr_06c_genefinder_ddx3y.png" alt="Plot of the gene expression levels of DDX3Y. Points are split according to cell type, as this gene was highly variable across this experimental factor - indeed, in one cell type it is barely detected." width="80%" />
208 | <p class="caption">
209 | Plot of the gene expression levels of DDX3Y. Points are split according
210 | to cell type, as this gene was highly variable across this experimental
211 | factor - indeed, in one cell type it is barely detected.
212 | </p>
213 | 
214 | You can see the single expression values in a table as well, and this
215 | information can be downloaded with a simple click.
216 | 
217 | Back to the **Samples View**, you can experiment with the number of top
218 | variable genes to see how the results of PCA are in this case robust to
219 | a wide range of this value - this might not be the case with other
220 | datasets, and the simplicity of interacting with these parameters makes
221 | it easy to iterate in the exploration steps.
222 | 
223 | Proceeding to the **Genes View**, you can see the dual of the Samples
224 | PCA: now the samples are displayed as arrows in the genes biplot, which
225 | can show which genes display a similar behaviour. You can capture this
226 | with a simple brushing action on the plot, and notice how their profiles
227 | throughout all samples are shown in the Profile explorer below;
228 | moreover, a static and an interactive heatmap, together with a table
229 | containing the underlying data, are generated in the rows below.
230 | 
231 | <img src="https://github.com/federicomarini/pcaExplorer/raw/master/vignettes/unr_07_genespca.png" alt="The Genes View panel. Upper panel: the genes biplot, and its zoomed plot, with gene names displayed. Lower panel: the profile explorer of the selected subset of genes (corresponding to the zoomed window), and the boxplot for the gene selected by clicking close to a location in the zoomed window." width="80%" />
232 | <p class="caption">
233 | The Genes View panel. Upper panel: the genes biplot, and its zoomed
234 | plot, with gene names displayed. Lower panel: the profile explorer of
235 | the selected subset of genes (corresponding to the zoomed window), and
236 | the boxplot for the gene selected by clicking close to a location in the
237 | zoomed window.
238 | </p>
239 | 
240 | Since we compute the gene annotation table as well, it’s nice to read
241 | the gene symbols in the zoomed window (instead of the ENSEMBL ids). By
242 | clicking close enough to any of these genes, the expression values are
243 | plotted, in a similar fashion as in the **Gene Finder**.
244 | 
245 | The tab **PCA2GO** helps you understanding which are the biological
246 | common themes (default: the Gene Ontology Biological Process terms) in
247 | the genes showing up in the top and in the bottom loadings for each
248 | principal component. Since we launched the `pcaExplorer` app without
249 | additional parameters, this information is not available, but can be
250 | computed live (this might take a while).
251 | 
252 | <img src="https://github.com/federicomarini/pcaExplorer/raw/master/vignettes/unr_08_pca2go_topgo.png" alt="The PCA2GO panel. Four tables (2 per dimension, here only 3 are displayed) decorate the PCA plot in the middle, and display the top enriched functional categories in each subset of gene with high loadings." width="80%" />
253 | <p class="caption">
254 | The PCA2GO panel. Four tables (2 per dimension, here only 3 are
255 | displayed) decorate the PCA plot in the middle, and display the top
256 | enriched functional categories in each subset of gene with high
257 | loadings.
258 | </p>
259 | 
260 | Still, a previous call to `pca2go` is recommended, as it relies on the
261 | algorithm of the *[topGO](https://bioconductor.org/packages/3.9/topGO)*
262 | package: it will require some additional computing time, but it is
263 | likely to deliver more precise terms (i.e. in turn more relevant from
264 | the point of view of their biological relevance). To do so, you should
265 | exit the live session, compute this object, and provide it in the call
266 | to `pcaExplorer` (see more how to do so in [the main user
267 | guide](https://bioconductor.org/packages/3.9/pcaExplorer/vignettes/pcaExplorer.html)).
268 | 
269 | When you’re done - the power of reproducibility
270 | ===============================================
271 | 
272 | A typical session with `pcaExplorer` includes one or more iterations on
273 | each of these tabs. Once you are finished, you might want to store the
274 | results of your analysis in different formats.
275 | 
276 | <img src="https://github.com/federicomarini/pcaExplorer/raw/master/vignettes/unr_90_exitsave.png" alt="The pcaExplorer task menu. Buttons for saving the session to binary data or to a dedicated environment are displayed." width="50%" />
277 | <p class="caption">
278 | The pcaExplorer task menu. Buttons for saving the session to binary data
279 | or to a dedicated environment are displayed.
280 | </p>
281 | 
282 | With `pcaExplorer` you can do all of the following:
283 | 
284 | -   save every plot and table by simply clicking on the respective
285 |     button, below each element
286 | -   save the state of the entire app and its reactive elements as a
287 |     binary `.RData` file, as if it was a workspace (clicking on the cog
288 |     icon in the right side of the task menu)
289 | -   use the “Exit `pcaExplorer` and save” saves the state but in a
290 |     specific environment of your R session, which you can later access
291 |     by its name, which normally could look like
292 |     `pcaExplorerState_YYYYMMDD_HHMMSS` (also accessible from the cog)
293 | -   enjoy the beauty of reproducible research in the **Report Editor**:
294 |     `pcaExplorer` comes with a template analysis, that picks the latest
295 |     status of the app during your session, and combines these reactive
296 |     values together in a R Markdown document, which you can first
297 |     preview live in the app, and then download as standalone HTML file -
298 |     to store or share. This document stiches together narrative text,
299 |     code, and output objects, and constitutes a compendium where all
300 |     actions are recorded. If you are familiar with R, you can edit that
301 |     live, with support for autocompletion, in the “Edit report” tab.
302 | 
303 | <img src="https://github.com/federicomarini/pcaExplorer/raw/master/vignettes/unr_99_editreport.png" alt="The Report Editor tab. The collapsible elements control general markdown and editor options, which are regarded when the report is compiled. Its content is specified in the Ace editor, integrated in the Shiny app." width="80%" />
304 | <p class="caption">
305 | The Report Editor tab. The collapsible elements control general markdown
306 | and editor options, which are regarded when the report is compiled. Its
307 | content is specified in the Ace editor, integrated in the Shiny app.
308 | </p>
309 | 


--------------------------------------------------------------------------------
/inst/extdata/reportTemplate.Rmd:
--------------------------------------------------------------------------------
  1 | # About this report
  2 | 
  3 | This content has been loaded from the template report `.Rmd` file. Please edit it at your best convenience!
  4 | 
  5 | If you are viewing this report in the Preview, you might require the installation of the PhantomJS to render correctly some HTML widgets. 
  6 | This can be done by using the `r BiocStyle::CRANpkg("webshot")` package and calling `webshot::install_phantomjs()`.
  7 | Alternatively, the more recent `r BiocStyle::CRANpkg("webshot2")` package uses the headless Chrome browser (via the `r BiocStyle::CRANpkg("chromote")` package, requiring Google Chrome or other Chromium-based browser).
  8 | 
  9 | ```{r setup, include=FALSE, eval = TRUE, echo = FALSE}
 10 | opts_chunk$set(
 11 |   echo=input$report_echo,
 12 |   error=TRUE
 13 | )
 14 | ```
 15 | 
 16 | # Overview on the data
 17 | 
 18 | The data provided was used to construct the following objects
 19 | 
 20 | ```{r}
 21 | values$mydds
 22 | 
 23 | values$mydst
 24 | 
 25 | values$transformation_type
 26 | 
 27 | head(values$myannotation)
 28 | ```
 29 | 
 30 | The following design were used:
 31 | 
 32 | ```{r}
 33 | DT::datatable(as.data.frame(colData(values$mydds)))
 34 | ```
 35 | 
 36 | An overview of the table for the features is shown here, by displaying the `r input$countstable_unit`
 37 | 
 38 | ```{r}
 39 | if(input$countstable_unit=="raw_counts")
 40 |   currentMat <- counts(values$mydds,normalized=FALSE)
 41 | if(input$countstable_unit=="normalized_counts")
 42 |   currentMat <- counts(values$mydds,normalized=TRUE)
 43 | if(input$countstable_unit=="rlog_counts")
 44 |   currentMat <- assay(values$mydst)
 45 | if(input$countstable_unit=="log10_counts")
 46 |   currentMat <- log10(1 + counts(values$mydds,normalized=TRUE))
 47 | ```
 48 | 
 49 | ```{r, warning=FALSE}
 50 | DT::datatable(currentMat)
 51 | ```
 52 | 
 53 | This is how the samples cluster if we use euclidean distance on the rlog transformed values
 54 | 
 55 | ```{r}
 56 | if (!is.null(input$color_by)){
 57 |   expgroups <- as.data.frame(colData(values$mydst)[,input$color_by])
 58 |   # expgroups <- interaction(expgroups)
 59 |   rownames(expgroups) <- colnames(values$mydst)
 60 |   colnames(expgroups) <- input$color_by
 61 | 
 62 |   pheatmap(as.matrix(dist(t(assay(values$mydst)))),annotation_col = expgroups)
 63 | } else {
 64 |   pheatmap(as.matrix(dist(t(assay(values$mydst)))))
 65 | }
 66 | 
 67 | ```
 68 | 
 69 | This is an overview of the number of available reads in each sample (normally these are only uniquely aligned reads)
 70 | 
 71 | ```{r}
 72 | rr <- colSums(counts(values$mydds))/1e6
 73 | if(is.null(names(rr)))
 74 |   names(rr) <- paste0("sample_",1:length(rr))
 75 | rrdf <- data.frame(Reads=rr,Sample=names(rr),stringsAsFactors = FALSE)
 76 | if (!is.null(input$color_by)) {
 77 |   selGroups <- as.data.frame(colData(values$mydds)[input$color_by])
 78 |   rrdf$Group <- interaction(selGroups)
 79 |   p <- ggplot(rrdf,aes_string("Sample",weight="Reads")) + geom_bar(aes_string(fill="Group")) + theme_bw()
 80 |   p
 81 | } else {
 82 |   p <- ggplot(rrdf,aes_string("Sample",weight="Reads")) + geom_bar() + theme_bw()
 83 |   p
 84 | }
 85 | 
 86 | print(colSums(counts(values$mydds)))
 87 | summary(colSums(counts(values$mydds))/1e6)      
 88 | ```
 89 | 
 90 | This is a quick info on the number of detected genes
 91 | 
 92 | ```{r}
 93 | t1 <- rowSums(counts(values$mydds))
 94 | t2 <- rowMeans(counts(values$mydds,normalized=TRUE))
 95 | 
 96 | thresh_rowsums <- input$threshold_rowsums
 97 | thresh_rowmeans <- input$threshold_rowmeans
 98 | abs_t1 <- sum(t1 > thresh_rowsums)
 99 | rel_t1 <- 100 * mean(t1 > thresh_rowsums)
100 | abs_t2 <- sum(t2 > thresh_rowmeans)
101 | rel_t2 <- 100 * mean(t2 > thresh_rowmeans)
102 | 
103 | cat("Number of detected genes:\n")
104 | # TODO: parametrize the thresholds
105 | cat(abs_t1,"genes have at least a sample with more than",thresh_rowsums,"counts\n")
106 | cat(paste0(round(rel_t1,3),"%"), "of the",nrow(values$mydds),"genes have at least a sample with more than",thresh_rowsums,"counts\n")
107 | cat(abs_t2,"genes have more than",thresh_rowmeans,"counts (normalized) on average\n")
108 | cat(paste0(round(rel_t2,3),"%"), "of the",nrow(values$mydds),"genes have more than",thresh_rowsums,"counts (normalized) on average\n")
109 | cat("Counts are ranging from", min(counts(values$mydds)),"to",max(counts(values$mydds)))
110 | ```
111 | 
112 | # PCA on the samples
113 | 
114 | This plot shows how the samples are related to each other by plotting PC `r input$pc_x` vs PC `r input$pc_y`, using the top `r input$pca_nrgenes` most variable genes
115 | 
116 | ```{r}
117 | res <- pcaplot(values$mydst,intgroup = input$color_by,ntop = input$pca_nrgenes,
118 |                pcX = as.integer(input$pc_x),pcY = as.integer(input$pc_y),
119 |                text_labels = input$sample_labels,
120 |                point_size = input$pca_point_size, title="Samples PCA - zoom in",
121 |                ellipse = input$pca_ellipse, ellipse.prob = input$pca_cislider
122 | )
123 | res <- res + theme_bw()
124 | res
125 | ```
126 | 
127 | The scree plot helps determining the number of underlying principal components
128 | 
129 | ```{r}
130 | rv <- rowVars(assay(values$mydst))
131 | select <- order(rv, decreasing = TRUE)[seq_len(min(input$pca_nrgenes,length(rv)))]
132 | pca <- prcomp(t(assay(values$mydst)[select, ]))
133 | 
134 | res <- pcascree(pca,type = input$scree_type, pc_nr = input$scree_pcnr, title="Scree plot for the samples PCA")
135 | res <- res + theme_bw()
136 | res
137 | ```
138 | 
139 | The genes with the highest loadings in the selected principal components are the following
140 | 
141 | ```{r}
142 | rv <- rowVars(assay(values$mydst))
143 | select <- order(rv, decreasing = TRUE)[seq_len(min(input$pca_nrgenes,length(rv)))]
144 | pca <- prcomp(t(assay(values$mydst)[select, ]))
145 | 
146 | par(mfrow=c(2,1))
147 | hi_loadings(pca,whichpc = as.integer(input$pc_x),topN = input$ntophiload,annotation = values$myannotation)
148 | hi_loadings(pca,whichpc = as.integer(input$pc_y),topN = input$ntophiload,annotation = values$myannotation)
149 | ```
150 | 
151 | # PCA on the genes
152 | 
153 | This plot illustrates how the top `r input$pca_nrgenes` variant genes are distributed in PC `r input$pc_x` vs PC `r input$pc_y`
154 | 
155 | ```{r}
156 | if(!is.null(input$color_by)) {
157 |   expgroups <- as.data.frame(colData(values$mydst)[,input$color_by])
158 |   expgroups <- interaction(expgroups)
159 |   expgroups <- factor(expgroups,levels=unique(expgroups))
160 |   
161 | } else {
162 |   expgroups <- colnames(values$mydst)
163 | }
164 | colGroups <- colSel()[factor(expgroups)]
165 | 
166 | res <- genespca(values$mydst,
167 |                 ntop = input$pca_nrgenes,
168 |                 choices = c(as.integer(input$pc_x),as.integer(input$pc_y)),
169 |                 biplot = TRUE,
170 |                 arrowColors = factor(colGroups,levels=unique(colGroups)),
171 |                 groupNames = expgroups,
172 |                 alpha=input$pca_point_alpha,coordEqual=FALSE,useRownamesAsLabels=FALSE,labels.size=input$pca_label_size,
173 |                 point_size=input$pca_point_size,varname.size=input$pca_varname_size, scaleArrow = input$pca_scale_arrow,
174 |                 annotation=values$myannotation)
175 | res
176 | ```
177 | 
178 | 
179 | For the selected genes, this is the overall profile across all samples
180 | 
181 | ```{r}
182 | if(!is.null(input$pcagenes_brush) & length(input$color_by)>0)
183 |   geneprofiler(values$mydst,
184 |                genelist = curData_brush()$ids,
185 |                intgroup = input$color_by,
186 |                plotZ = input$zprofile)
187 | ```
188 | 
189 | And here is an interactive heatmap for that subset
190 | 
191 | ```{r}
192 | 
193 | if(!is.null(input$pcagenes_brush))
194 | {
195 |   brushedObject <- curData_brush()
196 |   if(nrow(brushedObject) > 1){
197 |     selectedGenes <- brushedObject$ids
198 |     toplot <- assay(values$mydst)[selectedGenes,]
199 |     rownames(toplot) <- values$myannotation$gene_name[match(rownames(toplot),rownames(values$myannotation))]
200 |     
201 |     mycolss <- c("#313695","#4575b4","#74add1","#abd9e9","#e0f3f8","#fee090","#fdae61","#f46d43","#d73027","#a50026") # to be consistent with red/blue usual coding
202 |     
203 |     heatmaply(toplot,Colv = as.logical(input$heatmap_colv),colors = mycolss)
204 |   }
205 | }
206 | 
207 | ```
208 | 
209 | # Shortlisted genes
210 | 
211 | This gene was selected in the interactive session.
212 | 
213 | ```{r}
214 | anno_id <- rownames(values$mydst)
215 | anno_gene <- values$myannotation$gene_name
216 | 
217 | # if(is.null(input$color_by) & input$genefinder!="")
218 | #   return(ggplot() + annotate("text",label="Select a factor to plot your gene",0,0) + theme_bw())
219 | # if(is.null(input$color_by) & input$genefinder=="")
220 | #   return(ggplot() + annotate("text",label="Select a gene and a factor to plot gene",0,0) + theme_bw())
221 | # if(input$genefinder=="")
222 | #   return(ggplot() + annotate("text",label="Type in a gene name/id",0,0) + theme_bw())
223 | # if(!input$genefinder %in% anno_gene & !input$genefinder %in% anno_id)
224 | #   return(ggplot() + annotate("text",label="Gene not found...",0,0) + theme_bw())
225 | if(input$genefinder!="") {
226 |   
227 |   if (input$genefinder %in% anno_id) {
228 |     selectedGene <- rownames(values$mydst)[match(input$genefinder,rownames(values$mydst))]
229 |     selectedGeneSymbol <- values$myannotation$gene_name[match(selectedGene,rownames(values$myannotation))]
230 |   }
231 |   if (input$genefinder %in% anno_gene) {
232 |     selectedGeneSymbol <- values$myannotation$gene_name[which(values$myannotation$gene_name==input$genefinder)]
233 |     if (length(selectedGeneSymbol) > 1) return(ggplot() + annotate("text",label=paste0("Type in a gene name/id of the following:\n",paste(selectedGene,collapse=", ")),0,0) + theme_bw())
234 |     selectedGene <- rownames(values$myannotation)[which(values$myannotation$gene_name==input$genefinder)]
235 |   }
236 |   genedata <- plotCounts(values$mydds,gene=selectedGene,intgroup = input$color_by,returnData = TRUE)
237 |   onlyfactors <- genedata[,match(input$color_by,colnames(genedata))]
238 |   genedata$plotby <- interaction(onlyfactors)
239 |   
240 |   if (input$plot_style == "boxplot") {
241 |     plot_style <- "boxplot"
242 |   } else if (input$plot_style == "violin plot") {
243 |     plot_style <- "violin"
244 |   } else {
245 |     plot_style <- "auto"
246 |   }
247 |   
248 |   res <- mosdef::gene_plot(de_container = values$mydds,
249 |                            gene = selectedGene,
250 |                            intgroup = input$color_by,
251 |                            annotation_obj = values$myannotation,
252 |                            normalized = TRUE,
253 |                            labels_display = input$addsamplelabels,
254 |                            plot_type = plot_style)
255 |   
256 |   if (input$ylimZero) {
257 |     res <- res + scale_y_log10(name = "Normalized counts - log10 scale", limits = c(0.4, NA))
258 |   } else {
259 |     res <- res + scale_y_log10(name = "Normalized counts - log10 scale")
260 |   }
261 |   
262 |   res <- res +
263 |     labs(title = paste0("Normalized counts for ", selectedGeneSymbol, " - ", selectedGene)) +
264 |     scale_x_discrete(name = "") +
265 |     scale_fill_discrete(name = "Experimental\nconditions")
266 |   
267 |   exportPlots$genefinder_countsplot <- res
268 |   
269 |   res
270 | }
271 | ```
272 | 
273 | Repeat the same chunk of code and change the identifier of the gene to obtain the similar plot for the other candidates.
274 | 
275 | # Functional interpretation of the principal components
276 | 
277 | These tables report the functional categories enriched in the genes with the top and bottom loadings in the selected principal components.
278 | 
279 | ```{r}
280 | if(!is.null(values$mypca2go))
281 | {
282 |   goe <- values$mypca2go[[paste0("PC",input$pc_x)]][["posLoad"]]
283 |   kable(goe, caption=paste0("Functional categories enriched in ","PC",input$pc_x, "- positive loadings"))
284 | }
285 | 
286 | if(!is.null(values$mypca2go))
287 | {
288 |   goe <- values$mypca2go[[paste0("PC",input$pc_x)]][["negLoad"]]
289 |   kable(goe, caption=paste0("Functional categories enriched in ","PC",input$pc_x, "- negative loadings"))
290 | }
291 | 
292 | if(!is.null(values$mypca2go))
293 | {
294 |   goe <- values$mypca2go[[paste0("PC",input$pc_y)]][["posLoad"]]
295 |   kable(goe, caption=paste0("Functional categories enriched in ","PC",input$pc_y, "- positive loadings"))
296 | }
297 | 
298 | if(!is.null(values$mypca2go))
299 | {
300 |   goe <- values$mypca2go[[paste0("PC",input$pc_y)]][["negLoad"]]
301 |   kable(goe, caption=paste0("Functional categories enriched in ","PC",input$pc_y, "- negative loadings"))
302 | }
303 | ```
304 | 
305 | # Multifactor exploration of the dataset
306 | 
307 | ```{r}
308 | 
309 | if(input$composemat > 0){
310 |   pcmat <- obj3()[[1]]
311 |   tcol <- obj3()[[2]]
312 |   tcol2 <- obj3()[[3]]
313 |   pres <- prcomp(t(pcmat),scale=FALSE)
314 |   
315 |   plot.index <- c(as.integer(input$pc_x_multifac),as.integer(input$pc_y_multifac))
316 |   offset <- ncol(pcmat)/2
317 |   gene.no <- offset
318 |   pcx <- pres$x
319 |   # set.seed(11)
320 |   # for (i in 1:ncol(pcx)) {
321 |   #   pcx[,i] <- pcx[,i] + rnorm(nrow(pcx),sd=diff(range(pcx[,i]))/100)
322 |   # }
323 |   plot(pcx[(offset+1):ncol(pcmat),plot.index[1]][1:gene.no],pcx[(offset+1):ncol(pcmat),plot.index[2]][1:gene.no],xlim=range(pcx[,plot.index[1]]),ylim=range(pcx[,plot.index[2]]),pch=20,col=tcol,cex=0.3)#,type="n")
324 |   #plot(0,type="n",xlim=range(pres$x[,plot.index]),ylim=range(pres$x[,plot.index]))
325 |   lcol <- ifelse(tcol != tcol2,"black","grey")
326 |   for (i in 1:gene.no) {
327 |     lines(pcx[c(i,offset+i),plot.index[1]],pcx[c(i,offset+i),plot.index[2]],col=lcol[i])
328 |   }
329 |   points(pcx[1:offset,plot.index[1]][1:gene.no],pcx[1:offset,plot.index[2]][1:gene.no],pch=20,col=tcol,cex=0.3)
330 |   points(pcx[(offset+1):ncol(pcmat),plot.index[1]][1:gene.no],pcx[(offset+1):ncol(pcmat),plot.index[2]][1:gene.no],pch=20,col=tcol2,cex=0.3)}
331 | ```
332 | 
333 | # About pcaExplorer
334 | 
335 | `pcaExplorer` is a Bioconductor package containing a Shiny application for
336 | analyzing expression data in different conditions and experimental factors. 
337 | 
338 | `pcaExplorer` guides the user in exploring the Principal Components of the data, 
339 | providing tools and functionality to detect outlier samples, genes that show 
340 | particular patterns, and additionally provides a functional interpretation of 
341 | the principal components for further quality assessment and hypothesis generation
342 | on the input data. 
343 | 
344 | Thanks to its interactive/reactive design, it is designed to become a practical
345 | companion to any RNA-seq dataset analysis, making exploratory data analysis 
346 | accessible also to the bench biologist, while providing additional insight also
347 | for the experienced data analyst.
348 | 
349 | `pcaExplorer` was developed in the Bioinformatics Division led by Harald Binder 
350 | at the IMBEI (Institut für Medizinische Biometrie, Epidemiologie und Informatik) 
351 | in the University Medical Center of the Johannes Gutenberg University Mainz.
352 | 
353 | ## Developers
354 | 
355 | `pcaExplorer` is currently maintained by Federico Marini at the IMBEI (www.imbei.uni-mainz.de).
356 | You can contact him by clicking on the button below.
357 | 
358 | <a href="mailto:mailto:marinif@uni-mainz.de?subject=[pcaExplorer_feedback]" class="btn btn-primary">Federico Marini</a>
359 | 
360 | ## Code
361 | 
362 | `pcaExplorer` is a part of the Bioconductor project (www.bioconductor.org).
363 | All code for `pcaExplorer`, especially for the development version, is available
364 | on <a href="https://github.com/federicomarini/pcaExplorer" target="_blank">GitHub</a>.
365 | 
366 | # Citation info
367 | 
368 | If you use `pcaExplorer` for your analysis, please cite it as here below:
369 | 
370 | ```{r}
371 | citation("pcaExplorer")
372 | ```
373 | 
374 | # Session Information
375 | 
376 | ```{r}
377 | sessionInfo()
378 | ```
379 | 
380 | ```{r, echo = FALSE}
381 | library(shiny)
382 | footertemplate <- function(){
383 |   tags$div(
384 |     class = "footer",
385 |     style = "text-align:center",
386 |     tags$div(
387 |       class = "foot-inner",
388 |       list(
389 |         hr(),
390 |         "This report was generated with", tags$a(href="http://bioconductor.org/packages/pcaExplorer/", "pcaExplorer"), br(),
391 |         "pcaExplorer is a project developed by Federico Marini in the Bioinformatics division of the ",
392 |         tags$a(href="http://www.unimedizin-mainz.de/imbei","IMBEI"),br(),
393 |         "Development of the pcaExplorer package is on ",
394 |         tags$a(href="https://github.com/federicomarini/pcaExplorer", "GitHub")
395 |       )
396 |     )
397 |   )
398 | }
399 | ```
400 | 
401 | ```{r, echo = FALSE}
402 | footertemplate()
403 | ```
404 | 


--------------------------------------------------------------------------------
/inst/www/help_dataformats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/inst/www/help_dataformats.png


--------------------------------------------------------------------------------
/inst/www/pcaExplorer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/inst/www/pcaExplorer.png


--------------------------------------------------------------------------------
/man/correlatePCs.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/correlatePCs.R
 3 | \name{correlatePCs}
 4 | \alias{correlatePCs}
 5 | \title{Principal components (cor)relation with experimental covariates}
 6 | \usage{
 7 | correlatePCs(pcaobj, coldata, pcs = 1:4)
 8 | }
 9 | \arguments{
10 | \item{pcaobj}{A \code{prcomp} object}
11 | 
12 | \item{coldata}{A \code{data.frame} object containing the experimental
13 | covariates}
14 | 
15 | \item{pcs}{A numeric vector, containing the corresponding PC number}
16 | }
17 | \value{
18 | A \code{data.frame} object with computed p values for each covariate
19 | and for each principal component
20 | }
21 | \description{
22 | Computes the significance of (cor)relations between PCA scores and the sample
23 | experimental covariates, using Kruskal-Wallis test for categorial variables
24 | and the \code{cor.test} based on Spearman's correlation for continuous
25 | variables
26 | }
27 | \examples{
28 | library(DESeq2)
29 | dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1)
30 | rlt <- DESeq2::rlogTransformation(dds)
31 | pcaobj <- prcomp(t(assay(rlt)))
32 | correlatePCs(pcaobj, colData(dds))
33 | 
34 | }
35 | 


--------------------------------------------------------------------------------
/man/deprecated.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/deprecated.R
 3 | \name{deprecated}
 4 | \alias{deprecated}
 5 | \title{Deprecated functions in pcaExplorer}
 6 | \arguments{
 7 | \item{...}{Ignored arguments.}
 8 | }
 9 | \value{
10 | All functions throw a warning, with a deprecation message pointing
11 | towards its descendent (if available).
12 | }
13 | \description{
14 | Functions that are on their way to the function afterlife.
15 | Their successors are also listed.
16 | }
17 | \details{
18 | The successors of these functions are likely coming after the rework that
19 | led to the creation of the \code{mosdef} package. See more into its
20 | documentation for more details.
21 | }
22 | \section{Transitioning to the mosdef framework}{
23 | 
24 | \itemize{
25 | \item \code{\link[=topGOtable]{topGOtable()}} is now being replaced by the more flexible
26 | \code{\link[mosdef:run_topGO]{mosdef::run_topGO()}} function
27 | }
28 | }
29 | 
30 | \examples{
31 | # try(topGOtable())
32 | 
33 | }
34 | \author{
35 | Federico Marini
36 | }
37 | 


--------------------------------------------------------------------------------
/man/distro_expr.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/distro_expr.R
 3 | \name{distro_expr}
 4 | \alias{distro_expr}
 5 | \title{Plot distribution of expression values}
 6 | \usage{
 7 | distro_expr(rld, plot_type = "density")
 8 | }
 9 | \arguments{
10 | \item{rld}{A \code{\link[DESeq2:DESeqTransform]{DESeq2::DESeqTransform()}} object.}
11 | 
12 | \item{plot_type}{Character, choose one of \code{boxplot}, \code{violin} or
13 | \code{density}. Defaults to \code{density}}
14 | }
15 | \value{
16 | A plot with the distribution of the expression values
17 | }
18 | \description{
19 | Plot distribution of expression values
20 | }
21 | \examples{
22 | dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1)
23 | rlt <- DESeq2::rlogTransformation(dds)
24 | distro_expr(rlt)
25 | }
26 | 


--------------------------------------------------------------------------------
/man/figures/pcaExplorer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/man/figures/pcaExplorer.png


--------------------------------------------------------------------------------
/man/geneprofiler.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/profile_explore.R
 3 | \name{geneprofiler}
 4 | \alias{geneprofiler}
 5 | \title{Extract and plot the expression profile of genes}
 6 | \usage{
 7 | geneprofiler(se, genelist = NULL, intgroup = "condition", plotZ = FALSE)
 8 | }
 9 | \arguments{
10 | \item{se}{A \code{\link[DESeq2:DESeqDataSet]{DESeq2::DESeqDataSet()}} object, or a
11 | \code{\link[DESeq2:DESeqTransform]{DESeq2::DESeqTransform()}} object.}
12 | 
13 | \item{genelist}{An array of characters, including the names of the genes of
14 | interest of which the profile is to be plotted}
15 | 
16 | \item{intgroup}{A factor, needs to be in the \code{colnames} of \code{colData(se)}}
17 | 
18 | \item{plotZ}{Logical, whether to plot the scaled expression values. Defaults to
19 | \code{FALSE}}
20 | }
21 | \value{
22 | A plot of the expression profile for the genes
23 | }
24 | \description{
25 | Extract and plot the expression profile of genes
26 | }
27 | \examples{
28 | dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1)
29 | rlt <- DESeq2::rlogTransformation(dds)
30 | geneprofiler(rlt, paste0("gene", sample(1:1000, 20)))
31 | geneprofiler(rlt, paste0("gene", sample(1:1000, 20)), plotZ = TRUE)
32 | }
33 | 


--------------------------------------------------------------------------------
/man/genespca.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/genespca.R
  3 | \name{genespca}
  4 | \alias{genespca}
  5 | \title{Principal components analysis on the genes}
  6 | \usage{
  7 | genespca(
  8 |   x,
  9 |   ntop,
 10 |   choices = c(1, 2),
 11 |   arrowColors = "steelblue",
 12 |   groupNames = "group",
 13 |   biplot = TRUE,
 14 |   scale = 1,
 15 |   pc.biplot = TRUE,
 16 |   obs.scale = 1 - scale,
 17 |   var.scale = scale,
 18 |   groups = NULL,
 19 |   ellipse = FALSE,
 20 |   ellipse.prob = 0.68,
 21 |   labels = NULL,
 22 |   labels.size = 3,
 23 |   alpha = 1,
 24 |   var.axes = TRUE,
 25 |   circle = FALSE,
 26 |   circle.prob = 0.69,
 27 |   varname.size = 4,
 28 |   varname.adjust = 1.5,
 29 |   varname.abbrev = FALSE,
 30 |   returnData = FALSE,
 31 |   coordEqual = FALSE,
 32 |   scaleArrow = 1,
 33 |   useRownamesAsLabels = TRUE,
 34 |   point_size = 2,
 35 |   annotation = NULL
 36 | )
 37 | }
 38 | \arguments{
 39 | \item{x}{A \code{\link[DESeq2:DESeqTransform]{DESeq2::DESeqTransform()}} object, with data in \code{assay(x)},
 40 | produced for example by either \code{\link[DESeq2:rlog]{DESeq2::rlog()}} or
 41 | \code{\link[DESeq2:varianceStabilizingTransformation]{DESeq2::varianceStabilizingTransformation()}}}
 42 | 
 43 | \item{ntop}{Number of top genes to use for principal components,
 44 | selected by highest row variance}
 45 | 
 46 | \item{choices}{Vector of two numeric values, to select on which principal components to plot}
 47 | 
 48 | \item{arrowColors}{Vector of character, either as long as the number of the samples, or one single value}
 49 | 
 50 | \item{groupNames}{Factor containing the groupings for the input data. Is efficiently chosen
 51 | as the (interaction of more) factors in the colData for the object provided}
 52 | 
 53 | \item{biplot}{Logical, whether to additionally draw the samples labels as in a biplot representation}
 54 | 
 55 | \item{scale}{Covariance biplot (scale = 1), form biplot (scale = 0). When scale = 1,
 56 | the inner product between the variables approximates the covariance and the
 57 | distance between the points approximates the Mahalanobis distance.}
 58 | 
 59 | \item{pc.biplot}{Logical, for compatibility with biplot.princomp()}
 60 | 
 61 | \item{obs.scale}{Scale factor to apply to observations}
 62 | 
 63 | \item{var.scale}{Scale factor to apply to variables}
 64 | 
 65 | \item{groups}{Optional factor variable indicating the groups that the observations
 66 | belong to. If provided the points will be colored according to groups}
 67 | 
 68 | \item{ellipse}{Logical, draw a normal data ellipse for each group}
 69 | 
 70 | \item{ellipse.prob}{Size of the ellipse in Normal probability}
 71 | 
 72 | \item{labels}{optional Vector of labels for the observations}
 73 | 
 74 | \item{labels.size}{Size of the text used for the labels}
 75 | 
 76 | \item{alpha}{Alpha transparency value for the points (0 = transparent, 1 = opaque)}
 77 | 
 78 | \item{var.axes}{Logical, draw arrows for the variables?}
 79 | 
 80 | \item{circle}{Logical, draw a correlation circle? (only applies when prcomp
 81 | was called with scale = TRUE and when var.scale = 1)}
 82 | 
 83 | \item{circle.prob}{Size of the correlation circle in Normal probability}
 84 | 
 85 | \item{varname.size}{Size of the text for variable names}
 86 | 
 87 | \item{varname.adjust}{Adjustment factor the placement of the variable names,
 88 | '>= 1' means farther from the arrow}
 89 | 
 90 | \item{varname.abbrev}{Logical, whether or not to abbreviate the variable names}
 91 | 
 92 | \item{returnData}{Logical, if TRUE returns a data.frame for further use, containing the
 93 | selected principal components for custom plotting}
 94 | 
 95 | \item{coordEqual}{Logical, default FALSE, for allowing brushing. If TRUE, plot using
 96 | equal scale cartesian coordinates}
 97 | 
 98 | \item{scaleArrow}{Multiplicative factor, usually >=1, only for visualization purposes,
 99 | to allow for distinguishing where the variables are plotted}
100 | 
101 | \item{useRownamesAsLabels}{Logical, if TRUE uses the row names as labels for plotting}
102 | 
103 | \item{point_size}{Size of the points to be plotted for the observations (genes)}
104 | 
105 | \item{annotation}{A \code{data.frame} object, with row.names as gene identifiers (e.g. ENSEMBL ids)
106 | and a column, \code{gene_name}, containing e.g. HGNC-based gene symbols}
107 | }
108 | \value{
109 | An object created by \code{ggplot}, which can be assigned and further customized.
110 | }
111 | \description{
112 | Computes and plots the principal components of the genes, eventually displaying
113 | the samples as in a typical biplot visualization.
114 | }
115 | \details{
116 | The implementation of this function is based on the beautiful \code{ggbiplot}
117 | package developed by Vince Vu, available at https://github.com/vqv/ggbiplot.
118 | The adaptation and additional parameters are tailored to display typical genomics data
119 | such as the transformed counts of RNA-seq experiments
120 | }
121 | \examples{
122 | 
123 | library(DESeq2)
124 | dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1)
125 | rlt <- rlogTransformation(dds)
126 | groups <- colData(dds)$condition
127 | groups <- factor(groups, levels = unique(groups))
128 | cols <- scales::hue_pal()(2)[groups]
129 | genespca(rlt, ntop=100, arrowColors = cols, groupNames = groups)
130 | 
131 | groups_multi <- interaction(as.data.frame(colData(rlt)[, c("condition", "tissue")]))
132 | groups_multi <- factor(groups_multi, levels = unique(groups_multi))
133 | cols_multi <- scales::hue_pal()(length(levels(groups_multi)))[factor(groups_multi)]
134 | genespca(rlt, ntop = 100, arrowColors = cols_multi, groupNames = groups_multi)
135 | 
136 | }
137 | 


--------------------------------------------------------------------------------
/man/get_annotation.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/get_annotation.R
 3 | \name{get_annotation}
 4 | \alias{get_annotation}
 5 | \title{Get an annotation data frame from biomaRt}
 6 | \usage{
 7 | get_annotation(dds, biomart_dataset, idtype)
 8 | }
 9 | \arguments{
10 | \item{dds}{A \code{\link[DESeq2:DESeqDataSet]{DESeq2::DESeqDataSet()}} object}
11 | 
12 | \item{biomart_dataset}{A biomaRt dataset to use. To see the list, type
13 | \code{mart = useMart('ensembl')}, followed by \code{listDatasets(mart)}.}
14 | 
15 | \item{idtype}{Character, the ID type of the genes as in the row names of
16 | \code{dds}, to be used for the call to \code{\link[biomaRt:getBM]{biomaRt::getBM()}}}
17 | }
18 | \value{
19 | A data frame for ready use in \code{pcaExplorer}, retrieved from biomaRt.
20 | }
21 | \description{
22 | Get an annotation data frame from biomaRt
23 | }
24 | \examples{
25 | library("airway")
26 | data("airway", package = "airway")
27 | airway
28 | dds_airway <- DESeq2::DESeqDataSetFromMatrix(assay(airway),
29 |                                              colData = colData(airway),
30 |                                              design = ~dex+cell)
31 | \dontrun{
32 | get_annotation(dds_airway, "hsapiens_gene_ensembl", "ensembl_gene_id")
33 | }
34 | }
35 | 


--------------------------------------------------------------------------------
/man/get_annotation_orgdb.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/get_annotation.R
 3 | \name{get_annotation_orgdb}
 4 | \alias{get_annotation_orgdb}
 5 | \title{Get an annotation data frame from org db packages}
 6 | \usage{
 7 | get_annotation_orgdb(dds, orgdb_species, idtype, key_for_genenames = "SYMBOL")
 8 | }
 9 | \arguments{
10 | \item{dds}{A \code{\link[DESeq2:DESeqDataSet]{DESeq2::DESeqDataSet()}} object}
11 | 
12 | \item{orgdb_species}{Character string, named as the \code{org.XX.eg.db}
13 | package which should be available in Bioconductor}
14 | 
15 | \item{idtype}{Character, the ID type of the genes as in the row names of
16 | \code{dds}, to be used for the call to \code{\link[AnnotationDbi:AnnotationDb-class]{AnnotationDbi::mapIds()}}}
17 | 
18 | \item{key_for_genenames}{Character, corresponding to the column name for the
19 | key in the orgDb package containing the official gene name (often called
20 | gene symbol).
21 | This parameter defaults to "SYMBOL", but can be adjusted in case the key is not
22 | found in the annotation package (e.g. for \code{org.Sc.sgd.db}).}
23 | }
24 | \value{
25 | A data frame for ready use in \code{pcaExplorer}, retrieved from the
26 | org db packages
27 | }
28 | \description{
29 | Get an annotation data frame from org db packages
30 | }
31 | \examples{
32 | library("airway")
33 | data("airway", package = "airway")
34 | airway
35 | dds_airway <- DESeq2::DESeqDataSetFromMatrix(assay(airway),
36 |                                              colData = colData(airway),
37 |                                              design = ~dex+cell)
38 | anno_df <- get_annotation_orgdb(dds_airway, "org.Hs.eg.db", "ENSEMBL")
39 | head(anno_df)
40 | }
41 | 


--------------------------------------------------------------------------------
/man/hi_loadings.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hi_loadings.R
 3 | \name{hi_loadings}
 4 | \alias{hi_loadings}
 5 | \title{Extract genes with highest loadings}
 6 | \usage{
 7 | hi_loadings(
 8 |   pcaobj,
 9 |   whichpc = 1,
10 |   topN = 10,
11 |   exprTable = NULL,
12 |   annotation = NULL,
13 |   title = "Top/bottom loadings"
14 | )
15 | }
16 | \arguments{
17 | \item{pcaobj}{A \code{prcomp} object}
18 | 
19 | \item{whichpc}{An integer number, corresponding to the principal component of
20 | interest}
21 | 
22 | \item{topN}{Integer, number of genes with top and bottom loadings}
23 | 
24 | \item{exprTable}{A \code{matrix} object, e.g. the counts of a \code{\link[DESeq2:DESeqDataSet]{DESeq2::DESeqDataSet()}}.
25 | If not NULL, returns the counts matrix for the selected genes}
26 | 
27 | \item{annotation}{A \code{data.frame} object, with row.names as gene identifiers (e.g. ENSEMBL ids)
28 | and a column, \code{gene_name}, containing e.g. HGNC-based gene symbols}
29 | 
30 | \item{title}{The title of the plot}
31 | }
32 | \value{
33 | A ggplot2 object, or a \code{matrix}, if \code{exprTable} is not null
34 | }
35 | \description{
36 | Extract genes with highest loadings
37 | }
38 | \examples{
39 | dds <- makeExampleDESeqDataSet_multifac(betaSD = 3, betaSD_tissue = 1)
40 | rlt <- DESeq2::rlogTransformation(dds)
41 | pcaobj <- prcomp(t(SummarizedExperiment::assay(rlt)))
42 | hi_loadings(pcaobj, topN = 20)
43 | hi_loadings(pcaobj, topN = 10, exprTable = dds)
44 | hi_loadings(pcaobj, topN = 10, exprTable = counts(dds))
45 | 
46 | }
47 | 


--------------------------------------------------------------------------------
/man/limmaquickpca2go.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/pca2go.R
 3 | \name{limmaquickpca2go}
 4 | \alias{limmaquickpca2go}
 5 | \title{Functional interpretation of the principal components, based on simple
 6 | overrepresentation analysis}
 7 | \usage{
 8 | limmaquickpca2go(
 9 |   se,
10 |   pca_ngenes = 10000,
11 |   inputType = "ENSEMBL",
12 |   organism = "Mm",
13 |   loadings_ngenes = 500,
14 |   background_genes = NULL,
15 |   scale = FALSE,
16 |   ...
17 | )
18 | }
19 | \arguments{
20 | \item{se}{A \code{\link[DESeq2:DESeqTransform]{DESeq2::DESeqTransform()}} object, with data in \code{assay(se)},
21 | produced for example by either \code{\link[DESeq2:rlog]{DESeq2::rlog()}} or
22 | \code{\link[DESeq2:varianceStabilizingTransformation]{DESeq2::varianceStabilizingTransformation()}}}
23 | 
24 | \item{pca_ngenes}{Number of genes to use for the PCA}
25 | 
26 | \item{inputType}{Input format type of the gene identifiers. Deafults to \code{ENSEMBL}, that then will
27 | be converted to ENTREZ ids. Can assume values such as \code{ENTREZID},\code{GENENAME} or \code{SYMBOL},
28 | like it is normally used with the \code{select} function of \code{AnnotationDbi}}
29 | 
30 | \item{organism}{Character abbreviation for the species, using \code{org.XX.eg.db} for annotation}
31 | 
32 | \item{loadings_ngenes}{Number of genes to extract the loadings (in each direction)}
33 | 
34 | \item{background_genes}{Which genes to consider as background.}
35 | 
36 | \item{scale}{Logical, defaults to FALSE, scale values for the PCA}
37 | 
38 | \item{...}{Further parameters to be passed to the goana routine}
39 | }
40 | \value{
41 | A nested list object containing for each principal component the terms enriched
42 | in each direction. This object is to be thought in combination with the displaying feature
43 | of the main \code{\link[=pcaExplorer]{pcaExplorer()}} function
44 | }
45 | \description{
46 | Extracts the genes with the highest loadings for each principal component, and
47 | performs functional enrichment analysis on them using the simple and quick routine
48 | provided by the \code{limma} package
49 | }
50 | \examples{
51 | library("airway")
52 | library("DESeq2")
53 | library("limma")
54 | data("airway", package = "airway")
55 | airway
56 | dds_airway <- DESeqDataSet(airway, design = ~ cell + dex)
57 | \dontrun{
58 | rld_airway <- rlogTransformation(dds_airway)
59 | goquick_airway <- limmaquickpca2go(rld_airway,
60 |                                    pca_ngenes = 10000,
61 |                                    inputType = "ENSEMBL",
62 |                                    organism = "Hs")
63 | }
64 | 
65 | }
66 | 


--------------------------------------------------------------------------------
/man/makeExampleDESeqDataSet_multifac.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/makeds.R
 3 | \name{makeExampleDESeqDataSet_multifac}
 4 | \alias{makeExampleDESeqDataSet_multifac}
 5 | \title{Make a simulated DESeqDataSet for two or more experimental factors}
 6 | \usage{
 7 | makeExampleDESeqDataSet_multifac(
 8 |   n = 1000,
 9 |   m = 12,
10 |   betaSD_condition = 1,
11 |   betaSD_tissue = 3,
12 |   interceptMean = 4,
13 |   interceptSD = 2,
14 |   dispMeanRel = function(x) 4/x + 0.1,
15 |   sizeFactors = rep(1, m)
16 | )
17 | }
18 | \arguments{
19 | \item{n}{number of rows (genes)}
20 | 
21 | \item{m}{number of columns (samples)}
22 | 
23 | \item{betaSD_condition}{the standard deviation for condition betas, i.e. beta ~ N(0,betaSD)}
24 | 
25 | \item{betaSD_tissue}{the standard deviation for tissue betas, i.e. beta ~ N(0,betaSD)}
26 | 
27 | \item{interceptMean}{the mean of the intercept betas (log2 scale)}
28 | 
29 | \item{interceptSD}{the standard deviation of the intercept betas (log2 scale)}
30 | 
31 | \item{dispMeanRel}{a function specifying the relationship of the dispersions on
32 | \code{2^trueIntercept}}
33 | 
34 | \item{sizeFactors}{multiplicative factors for each sample}
35 | }
36 | \value{
37 | a \code{\link[DESeq2:DESeqDataSet]{DESeq2::DESeqDataSet()}} with true dispersion,
38 | intercept for two factors (condition and tissue) and beta values in the
39 | metadata columns.  Note that the true betas are provided on the log2 scale.
40 | }
41 | \description{
42 | Constructs a simulated dataset of Negative Binomial data from different conditions.
43 | The fold changes between the conditions can be adjusted with the \code{betaSD_condition}
44 | and the \code{betaSD_tissue} arguments.
45 | }
46 | \details{
47 | This function is designed and inspired following the proposal of
48 | \code{\link[DESeq2:makeExampleDESeqDataSet]{DESeq2::makeExampleDESeqDataSet()}} from the \code{DESeq2} package. Credits are given
49 | to Mike Love for the nice initial implementation
50 | }
51 | \examples{
52 | dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1)
53 | dds
54 | dds2 <- makeExampleDESeqDataSet_multifac(betaSD_condition = 1, betaSD_tissue = 4)
55 | dds2
56 | 
57 | }
58 | 


--------------------------------------------------------------------------------
/man/pair_corr.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/pair_corr.R
 3 | \name{pair_corr}
 4 | \alias{pair_corr}
 5 | \title{Pairwise scatter and correlation plot of counts}
 6 | \usage{
 7 | pair_corr(df, log = FALSE, method = "pearson", use_subset = TRUE)
 8 | }
 9 | \arguments{
10 | \item{df}{A data frame, containing the (raw/normalized/transformed) counts}
11 | 
12 | \item{log}{Logical, whether to convert the input values to log2 (with addition
13 | of a pseudocount). Defaults to FALSE.}
14 | 
15 | \item{method}{Character string, one of \code{pearson} (default), \code{kendall}, or
16 | \code{spearman} as in \code{cor}}
17 | 
18 | \item{use_subset}{Logical value. If TRUE, only 1000 values per sample will be used
19 | to speed up the plotting operations.}
20 | }
21 | \value{
22 | A plot with pairwise scatter plots and correlation coefficients
23 | }
24 | \description{
25 | Pairwise scatter and correlation plot of counts
26 | }
27 | \examples{
28 | library("airway")
29 | data("airway", package = "airway")
30 | airway
31 | dds_airway <- DESeq2::DESeqDataSetFromMatrix(assay(airway),
32 |                                              colData = colData(airway),
33 |                                              design = ~dex+cell)
34 | pair_corr(counts(dds_airway)[1:100, ]) # use just a subset for the example
35 | }
36 | 


--------------------------------------------------------------------------------
/man/pca2go.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/pca2go.R
 3 | \name{pca2go}
 4 | \alias{pca2go}
 5 | \title{Functional interpretation of the principal components}
 6 | \usage{
 7 | pca2go(
 8 |   se,
 9 |   pca_ngenes = 10000,
10 |   annotation = NULL,
11 |   inputType = "geneSymbol",
12 |   organism = "Mm",
13 |   ensToGeneSymbol = FALSE,
14 |   loadings_ngenes = 500,
15 |   background_genes = NULL,
16 |   scale = FALSE,
17 |   return_ranked_gene_loadings = FALSE,
18 |   annopkg = NULL,
19 |   ...
20 | )
21 | }
22 | \arguments{
23 | \item{se}{A \code{\link[DESeq2:DESeqTransform]{DESeq2::DESeqTransform()}} object, with data in \code{assay(se)},
24 | produced for example by either \code{\link[DESeq2:rlog]{DESeq2::rlog()}} or
25 | \code{\link[DESeq2:varianceStabilizingTransformation]{DESeq2::varianceStabilizingTransformation()}}}
26 | 
27 | \item{pca_ngenes}{Number of genes to use for the PCA}
28 | 
29 | \item{annotation}{A \code{data.frame} object, with row.names as gene identifiers (e.g. ENSEMBL ids)
30 | and a column, \code{gene_name}, containing e.g. HGNC-based gene symbols}
31 | 
32 | \item{inputType}{Input format type of the gene identifiers. Will be used by the routines of \code{topGO}}
33 | 
34 | \item{organism}{Character abbreviation for the species, using \code{org.XX.eg.db} for annotation}
35 | 
36 | \item{ensToGeneSymbol}{Logical, whether to expect ENSEMBL gene identifiers, to convert to gene symbols
37 | with the \code{annotation} provided}
38 | 
39 | \item{loadings_ngenes}{Number of genes to extract the loadings (in each direction)}
40 | 
41 | \item{background_genes}{Which genes to consider as background.}
42 | 
43 | \item{scale}{Logical, defaults to FALSE, scale values for the PCA}
44 | 
45 | \item{return_ranked_gene_loadings}{Logical, defaults to FALSE. If TRUE, simply returns
46 | a list containing the top ranked genes with hi loadings in each PC and in each direction}
47 | 
48 | \item{annopkg}{String containing the name of the organism annotation package. Can be used to
49 | override the \code{organism} parameter, e.g. in case of alternative identifiers used
50 | in the annotation package (Arabidopsis with TAIR)}
51 | 
52 | \item{...}{Further parameters to be passed to the topGO routine}
53 | }
54 | \value{
55 | A nested list object containing for each principal component the terms enriched
56 | in each direction. This object is to be thought in combination with the displaying feature
57 | of the main \code{\link[=pcaExplorer]{pcaExplorer()}} function
58 | }
59 | \description{
60 | Extracts the genes with the highest loadings for each principal component, and
61 | performs functional enrichment analysis on them using routines and algorithms from
62 | the \code{topGO} package
63 | }
64 | \examples{
65 | library("airway")
66 | library("DESeq2")
67 | data("airway", package = "airway")
68 | airway
69 | dds_airway <- DESeqDataSet(airway, design= ~ cell + dex)
70 | \dontrun{
71 | rld_airway <- rlogTransformation(dds_airway)
72 | # constructing the annotation object
73 | anno_df <- data.frame(gene_id = rownames(dds_airway),
74 |                       stringsAsFactors = FALSE)
75 | library("AnnotationDbi")
76 | library("org.Hs.eg.db")
77 | anno_df$gene_name <- mapIds(org.Hs.eg.db,
78 |                             keys = anno_df$gene_id,
79 |                             column = "SYMBOL",
80 |                             keytype = "ENSEMBL",
81 |                             multiVals = "first")
82 | rownames(anno_df) <- anno_df$gene_id
83 | bg_ids <- rownames(dds_airway)[rowSums(counts(dds_airway)) > 0]
84 | library(topGO)
85 | pca2go_airway <- pca2go(rld_airway,
86 |                         annotation = anno_df,
87 |                         organism = "Hs",
88 |                         ensToGeneSymbol = TRUE,
89 |                         background_genes = bg_ids)
90 | }
91 | 
92 | }
93 | 


--------------------------------------------------------------------------------
/man/pcaExplorer-pkg.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/pcaExplorer-pkg.R
 3 | \docType{package}
 4 | \name{pcaExplorer-pkg}
 5 | \alias{pcaExplorer-package}
 6 | \alias{pcaExplorer-pkg}
 7 | \title{pcaExplorer: analyzing time-lapse microscopy imaging, from detection to tracking}
 8 | \description{
 9 | pcaExplorer provides functionality for interactive visualization of RNA-seq datasets
10 | based on Principal Components Analysis. The methods provided allow for quick information
11 | extraction and effective data exploration. A Shiny application encapsulates the whole analysis.
12 | }
13 | \details{
14 | pcaExplorer provides functionality for interactive visualization of RNA-seq datasets
15 | based on Principal Components Analysis. The methods provided allow for quick information
16 | extraction and effective data exploration. A Shiny application encapsulates the whole analysis.
17 | }
18 | \seealso{
19 | Useful links:
20 | \itemize{
21 |   \item \url{https://github.com/federicomarini/pcaExplorer}
22 |   \item \url{https://federicomarini.github.io/pcaExplorer/}
23 |   \item Report bugs at \url{https://github.com/federicomarini/pcaExplorer/issues}
24 | }
25 | 
26 | }
27 | \author{
28 | Federico Marini \email{marinif@uni-mainz.de}, 2016
29 | 
30 | Maintainer: Federico Marini \email{marinif@uni-mainz.de}
31 | }
32 | 


--------------------------------------------------------------------------------
/man/pcaExplorer.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/pcaExplorer.R
 3 | \name{pcaExplorer}
 4 | \alias{pcaExplorer}
 5 | \title{Explore a dataset from a PCA perspective}
 6 | \usage{
 7 | pcaExplorer(
 8 |   dds = NULL,
 9 |   dst = NULL,
10 |   countmatrix = NULL,
11 |   coldata = NULL,
12 |   pca2go = NULL,
13 |   annotation = NULL,
14 |   runLocal = TRUE
15 | )
16 | }
17 | \arguments{
18 | \item{dds}{A \code{\link[DESeq2:DESeqDataSet]{DESeq2::DESeqDataSet()}} object. If not provided, then a \code{countmatrix}
19 | and a \code{coldata} need to be provided. If none of the above is provided, it is possible
20 | to upload the data during the execution of the Shiny App}
21 | 
22 | \item{dst}{A \code{\link[DESeq2:DESeqTransform]{DESeq2::DESeqTransform()}} object. Can be computed from the \code{dds} object
23 | if left NULL. If none is provided, then a \code{countmatrix}
24 | and a \code{coldata} need to be provided. If none of the above is provided, it is possible
25 | to upload the data during the execution of the Shiny App}
26 | 
27 | \item{countmatrix}{A count matrix, with genes as rows and samples as columns. If not provided, it is possible
28 | to upload the data during the execution of the Shiny App}
29 | 
30 | \item{coldata}{A data.frame containing the info on the covariates of each sample. If not provided, it is possible
31 | to upload the data during the execution of the Shiny App}
32 | 
33 | \item{pca2go}{An object generated by the \code{\link[=pca2go]{pca2go()}} function, which contains
34 | the information on enriched functional categories in the genes that show the top or bottom loadings
35 | in each principal component of interest. If not provided, it is possible
36 | to compute live during the execution of the Shiny App}
37 | 
38 | \item{annotation}{A \code{data.frame} object, with row.names as gene identifiers (e.g. ENSEMBL ids)
39 | and a column, \code{gene_name}, containing e.g. HGNC-based gene symbols}
40 | 
41 | \item{runLocal}{A logical indicating whether the app is to be run locally or remotely on a server, which determines how documentation will be accessed.}
42 | }
43 | \value{
44 | A Shiny App is launched for interactive data exploration
45 | }
46 | \description{
47 | Launch a Shiny App for interactive exploration of a dataset from the perspective
48 | of Principal Components Analysis
49 | }
50 | \examples{
51 | library("airway")
52 | data("airway", package = "airway")
53 | airway
54 | dds_airway <- DESeq2::DESeqDataSetFromMatrix(assay(airway),
55 |                                              colData = colData(airway),
56 |                                              design = ~dex+cell)
57 | \dontrun{
58 | rld_airway <- DESeq2::rlogTransformation(dds_airway)
59 | 
60 | pcaExplorer(dds_airway, rld_airway)
61 | 
62 | pcaExplorer(countmatrix = counts(dds_airway), coldata = colData(dds_airway))
63 | 
64 | pcaExplorer() # and then upload count matrix, covariate matrix (and eventual annotation)
65 | }
66 | 
67 | }
68 | 


--------------------------------------------------------------------------------
/man/pcaplot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/pcaplot.R
 3 | \name{pcaplot}
 4 | \alias{pcaplot}
 5 | \title{Sample PCA plot for transformed data}
 6 | \usage{
 7 | pcaplot(
 8 |   x,
 9 |   intgroup = NULL,
10 |   ntop = 500,
11 |   returnData = FALSE,
12 |   title = NULL,
13 |   pcX = 1,
14 |   pcY = 2,
15 |   text_labels = TRUE,
16 |   point_size = 3,
17 |   ellipse = TRUE,
18 |   ellipse.prob = 0.95
19 | )
20 | }
21 | \arguments{
22 | \item{x}{A \code{\link[DESeq2:DESeqTransform]{DESeq2::DESeqTransform()}} object, with data in \code{assay(x)},
23 | produced for example by either \code{\link[DESeq2:rlog]{DESeq2::rlog()}} or
24 | \code{\link[DESeq2:varianceStabilizingTransformation]{DESeq2::varianceStabilizingTransformation()}}/\code{\link[DESeq2:vst]{DESeq2::vst()}}}
25 | 
26 | \item{intgroup}{Interesting groups: a character vector of
27 | names in \code{colData(x)} to use for grouping. Defaults to NULL, which would then
28 | select the first column of the \code{colData} slot}
29 | 
30 | \item{ntop}{Number of top genes to use for principal components,
31 | selected by highest row variance}
32 | 
33 | \item{returnData}{logical, if TRUE returns a data.frame for further use, containing the
34 | selected principal components and intgroup covariates for custom plotting}
35 | 
36 | \item{title}{The plot title}
37 | 
38 | \item{pcX}{The principal component to display on the x axis}
39 | 
40 | \item{pcY}{The principal component to display on the y axis}
41 | 
42 | \item{text_labels}{Logical, whether to display the labels with the sample identifiers}
43 | 
44 | \item{point_size}{Integer, the size of the points for the samples}
45 | 
46 | \item{ellipse}{Logical, whether to display the confidence ellipse for the selected groups}
47 | 
48 | \item{ellipse.prob}{Numeric, a value in the interval [0;1)}
49 | }
50 | \value{
51 | An object created by \code{ggplot}, which can be assigned and further customized.
52 | }
53 | \description{
54 | Plots the results of PCA on a 2-dimensional space
55 | }
56 | \examples{
57 | dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1)
58 | rlt <- DESeq2::rlogTransformation(dds)
59 | pcaplot(rlt, ntop = 200)
60 | 
61 | }
62 | 


--------------------------------------------------------------------------------
/man/pcaplot3d.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/pcaplot.R
 3 | \name{pcaplot3d}
 4 | \alias{pcaplot3d}
 5 | \title{Sample PCA plot for transformed data}
 6 | \usage{
 7 | pcaplot3d(
 8 |   x,
 9 |   intgroup = "condition",
10 |   ntop = 500,
11 |   returnData = FALSE,
12 |   title = NULL,
13 |   pcX = 1,
14 |   pcY = 2,
15 |   pcZ = 3,
16 |   text_labels = TRUE,
17 |   point_size = 3
18 | )
19 | }
20 | \arguments{
21 | \item{x}{A \code{\link[DESeq2:DESeqTransform]{DESeq2::DESeqTransform()}} object, with data in \code{assay(x)},
22 | produced for example by either \code{\link[DESeq2:rlog]{DESeq2::rlog()}} or
23 | \code{\link[DESeq2:varianceStabilizingTransformation]{DESeq2::varianceStabilizingTransformation()}}}
24 | 
25 | \item{intgroup}{Interesting groups: a character vector of
26 | names in \code{colData(x)} to use for grouping}
27 | 
28 | \item{ntop}{Number of top genes to use for principal components,
29 | selected by highest row variance}
30 | 
31 | \item{returnData}{logical, if TRUE returns a data.frame for further use, containing the
32 | selected principal components and intgroup covariates for custom plotting}
33 | 
34 | \item{title}{The plot title}
35 | 
36 | \item{pcX}{The principal component to display on the x axis}
37 | 
38 | \item{pcY}{The principal component to display on the y axis}
39 | 
40 | \item{pcZ}{The principal component to display on the z axis}
41 | 
42 | \item{text_labels}{Logical, whether to display the labels with the sample identifiers}
43 | 
44 | \item{point_size}{Integer, the size of the points for the samples}
45 | }
46 | \value{
47 | A html-based visualization of the 3d PCA plot
48 | }
49 | \description{
50 | Plots the results of PCA on a 3-dimensional space, interactively
51 | }
52 | \examples{
53 | dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1)
54 | rlt <- DESeq2::rlogTransformation(dds)
55 | pcaplot3d(rlt, ntop = 200)
56 | }
57 | 


--------------------------------------------------------------------------------
/man/pcascree.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/pcaplot.R
 3 | \name{pcascree}
 4 | \alias{pcascree}
 5 | \title{Scree plot of the PCA on the samples}
 6 | \usage{
 7 | pcascree(obj, type = c("pev", "cev"), pc_nr = NULL, title = NULL)
 8 | }
 9 | \arguments{
10 | \item{obj}{A \code{prcomp} object}
11 | 
12 | \item{type}{Display absolute proportions or cumulative proportion. Possible values:
13 | "pev" or "cev"}
14 | 
15 | \item{pc_nr}{How many principal components to display max}
16 | 
17 | \item{title}{Title of the plot}
18 | }
19 | \value{
20 | An object created by \code{ggplot}, which can be assigned and further customized.
21 | }
22 | \description{
23 | Produces a scree plot for investigating the proportion of explained variance, or
24 | alternatively the cumulative value
25 | }
26 | \examples{
27 | dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1)
28 | rlt <- DESeq2::rlogTransformation(dds)
29 | pcaobj <- prcomp(t(SummarizedExperiment::assay(rlt)))
30 | pcascree(pcaobj, type = "pev")
31 | pcascree(pcaobj, type = "cev", title = "Cumulative explained proportion of variance - Test dataset")
32 | 
33 | }
34 | 


--------------------------------------------------------------------------------
/man/plotPCcorrs.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/correlatePCs.R
 3 | \name{plotPCcorrs}
 4 | \alias{plotPCcorrs}
 5 | \title{Plot significance of (cor)relations of covariates VS principal components}
 6 | \usage{
 7 | plotPCcorrs(pccorrs, pc = 1, logp = TRUE)
 8 | }
 9 | \arguments{
10 | \item{pccorrs}{A \code{data.frame} object generated by \link{correlatePCs}}
11 | 
12 | \item{pc}{An integer number, corresponding to the principal component of
13 | interest}
14 | 
15 | \item{logp}{Logical, defaults to \code{TRUE}, displays the -\code{log10} of
16 | the pvalue instead of the p value itself}
17 | }
18 | \value{
19 | A base plot object
20 | }
21 | \description{
22 | Plots the significance of the (cor)relation of each covariate vs a principal component
23 | }
24 | \examples{
25 | library(DESeq2)
26 | dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1)
27 | rlt <- rlogTransformation(dds)
28 | pcaobj <- prcomp(t(assay(rlt)))
29 | res <- correlatePCs(pcaobj, colData(dds))
30 | plotPCcorrs(res)
31 | 
32 | }
33 | 


--------------------------------------------------------------------------------
/man/topGOtable.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/pca2go.R
  3 | \name{topGOtable}
  4 | \alias{topGOtable}
  5 | \title{Extract functional terms enriched in the DE genes, based on topGO}
  6 | \usage{
  7 | topGOtable(
  8 |   DEgenes,
  9 |   BGgenes,
 10 |   ontology = "BP",
 11 |   annot = annFUN.org,
 12 |   mapping = "org.Mm.eg.db",
 13 |   geneID = "symbol",
 14 |   topTablerows = 200,
 15 |   fullNamesInRows = TRUE,
 16 |   addGeneToTerms = TRUE,
 17 |   plotGraph = FALSE,
 18 |   plotNodes = 10,
 19 |   writeOutput = FALSE,
 20 |   outputFile = "",
 21 |   topGO_method2 = "elim",
 22 |   do_padj = FALSE
 23 | )
 24 | }
 25 | \arguments{
 26 | \item{DEgenes}{A vector of (differentially expressed) genes}
 27 | 
 28 | \item{BGgenes}{A vector of background genes, e.g. all (expressed) genes in the assays}
 29 | 
 30 | \item{ontology}{Which Gene Ontology domain to analyze: \code{BP} (Biological Process), \code{MF} (Molecular Function), or \code{CC} (Cellular Component)}
 31 | 
 32 | \item{annot}{Which function to use for annotating genes to GO terms. Defaults to \code{annFUN.org}}
 33 | 
 34 | \item{mapping}{Which \code{org.XX.eg.db} to use for annotation - select according to the species}
 35 | 
 36 | \item{geneID}{Which format the genes are provided. Defaults to \code{symbol}, could also be
 37 | \code{entrez} or \code{ENSEMBL}}
 38 | 
 39 | \item{topTablerows}{How many rows to report before any filtering}
 40 | 
 41 | \item{fullNamesInRows}{Logical, whether to display or not the full names for the GO terms}
 42 | 
 43 | \item{addGeneToTerms}{Logical, whether to add a column with all genes annotated to each GO term}
 44 | 
 45 | \item{plotGraph}{Logical, if TRUE additionally plots a graph on the identified GO terms}
 46 | 
 47 | \item{plotNodes}{Number of nodes to plot}
 48 | 
 49 | \item{writeOutput}{Logical, if TRUE additionally writes out the result to a file}
 50 | 
 51 | \item{outputFile}{Name of the file the result should be written into}
 52 | 
 53 | \item{topGO_method2}{Character, specifying which of the methods implemented by \code{topGO} should be used, in addition to the \code{classic} algorithm. Defaults to \code{elim}}
 54 | 
 55 | \item{do_padj}{Logical, whether to perform the adjustment on the p-values from the specific
 56 | topGO method, based on the FDR correction. Defaults to FALSE, since the assumption of
 57 | independent hypotheses is somewhat violated by the intrinsic DAG-structure of the Gene
 58 | Ontology Terms}
 59 | }
 60 | \value{
 61 | A table containing the computed GO Terms and related enrichment scores
 62 | }
 63 | \description{
 64 | A wrapper for extracting functional GO terms enriched in the DE genes, based on
 65 | the algorithm and the implementation in the topGO package
 66 | }
 67 | \details{
 68 | Allowed values assumed by the \code{topGO_method2} parameter are one of the
 69 | following: \code{elim}, \code{weight}, \code{weight01}, \code{lea},
 70 | \code{parentchild}. For more details on this, please refer to the original
 71 | documentation of the \code{topGO} package itself
 72 | }
 73 | \examples{
 74 | library("airway")
 75 | library("DESeq2")
 76 | data("airway", package = "airway")
 77 | airway
 78 | dds_airway <- DESeqDataSet(airway, design= ~ cell + dex)
 79 | # Example, performing extraction of enriched functional categories in
 80 | # detected significantly expressed genes
 81 | \dontrun{
 82 | dds_airway <- DESeq(dds_airway)
 83 | res_airway <- results(dds_airway)
 84 | library("AnnotationDbi")
 85 | library("org.Hs.eg.db")
 86 | res_airway$symbol <- mapIds(org.Hs.eg.db,
 87 |                             keys = row.names(res_airway),
 88 |                             column = "SYMBOL",
 89 |                             keytype = "ENSEMBL",
 90 |                             multiVals = "first")
 91 | res_airway$entrez <- mapIds(org.Hs.eg.db,
 92 |                             keys = row.names(res_airway),
 93 |                             column = "ENTREZID",
 94 |                             keytype = "ENSEMBL",
 95 |                             multiVals = "first")
 96 | resOrdered <- as.data.frame(res_airway[order(res_airway$padj),])
 97 | de_df <- resOrdered[resOrdered$padj < .05 & !is.na(resOrdered$padj),]
 98 | de_symbols <- de_df$symbol
 99 | bg_ids <- rownames(dds_airway)[rowSums(counts(dds_airway)) > 0]
100 | bg_symbols <- mapIds(org.Hs.eg.db,
101 |                      keys = bg_ids,
102 |                      column = "SYMBOL",
103 |                      keytype = "ENSEMBL",
104 |                      multiVals = "first")
105 | library(topGO)
106 | topgoDE_airway <- topGOtable(de_symbols, bg_symbols,
107 |                              ontology = "BP",
108 |                              mapping = "org.Hs.eg.db",
109 |                              geneID = "symbol")
110 | }
111 | 
112 | }
113 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(pcaExplorer)
3 | 
4 | test_check("pcaExplorer")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/setuptests_pcaExplorer.R:
--------------------------------------------------------------------------------
 1 | suppressPackageStartupMessages({
 2 |   library("DESeq2")
 3 |   library("SummarizedExperiment")
 4 |   library("airway")
 5 |   library("AnnotationDbi")
 6 |   library("org.Hs.eg.db")
 7 | })
 8 | 
 9 | # prepping the test datasets only once -----------------------------------------
10 | dds <- makeExampleDESeqDataSet(n = 1000, m = 8)
11 | rlt <- rlogTransformation(dds)
12 | cm <- counts(dds)
13 | cd <- colData(dds)
14 | 
15 | 
16 | dds_multifac <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1)
17 | rlt_multifac <- rlogTransformation(dds_multifac)
18 | 
19 | 
20 | data("airway", package = "airway")
21 | dds_airway <- DESeqDataSet(airway, design = ~ cell + dex)
22 | dds_airway <- DESeq(dds_airway)
23 | res_airway <- results(dds_airway)
24 | 
25 | rld_airway <- rlogTransformation(dds_airway)
26 | 
27 | res_airway$symbol <- mapIds(org.Hs.eg.db,
28 |                             keys = row.names(res_airway),
29 |                             column = "SYMBOL",
30 |                             keytype = "ENSEMBL",
31 |                             multiVals = "first")
32 | res_airway$entrez <- mapIds(org.Hs.eg.db,
33 |                             keys = row.names(res_airway),
34 |                             column = "ENTREZID",
35 |                             keytype = "ENSEMBL",
36 |                             multiVals = "first")
37 | 


--------------------------------------------------------------------------------
/tests/testthat/test_annotations.R:
--------------------------------------------------------------------------------
1 | test_that("Annotation data frame retrieval", {
2 |   anno_df <- get_annotation_orgdb(dds_airway, "org.Hs.eg.db", "ENSEMBL")
3 |   
4 |   expect_s3_class(anno_df, "data.frame")
5 |   expect_true(all(dim(anno_df) == c(63677, 2)))
6 | })


--------------------------------------------------------------------------------
/tests/testthat/test_correlatePCs.R:
--------------------------------------------------------------------------------
 1 | test_that("Correlation of the PCs", {
 2 |   pcaobj <- prcomp(t(assay(rlt_multifac)))
 3 |   res <- correlatePCs(pcaobj, colData(dds_multifac))
 4 |   
 5 |   expect_equal(dim(res), c(4, 2))
 6 |   expect_equal(colnames(res), colnames(colData(dds_multifac)))
 7 |   
 8 |   plotPCcorrs(res)
 9 |   
10 |   plotPCcorrs(res, logp = FALSE, pc = 2)
11 | })
12 | 
13 | 


--------------------------------------------------------------------------------
/tests/testthat/test_hiloadings.R:
--------------------------------------------------------------------------------
 1 | test_that("Check that genes with hi loadings are extracted", {
 2 |   pcaobj <- prcomp(t(SummarizedExperiment::assay(rlt)))
 3 |   anno <- data.frame(gene_id = rownames(dds), 
 4 |                      gene_name = toupper(rownames(dds)), 
 5 |                      stringsAsFactors = FALSE, 
 6 |                      row.names = rownames(dds))
 7 |   
 8 |   hi_loadings(pcaobj, 1)
 9 |   
10 |   expect_is(hi_loadings(pcaobj, 1, exprTable = counts(dds)), "matrix")
11 |   expect_true(
12 |     all(rownames(hi_loadings(pcaobj, 1, exprTable = counts(dds), annotation = NULL)) %in% rownames(dds))
13 |   )
14 |   expect_true(
15 |     all(rownames(hi_loadings(pcaobj, 1, exprTable = counts(dds), annotation = anno)) %in% anno$gene_name)
16 |   )
17 | })
18 | 
19 | 


--------------------------------------------------------------------------------
/tests/testthat/test_makeDS.R:
--------------------------------------------------------------------------------
1 | test_that("Check that example dds is generated correctly", {
2 |   dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1)
3 |   
4 |   expect_equal(names(colData(dds)), c("condition", "tissue"))
5 | })
6 | 


--------------------------------------------------------------------------------
/tests/testthat/test_moreplots.R:
--------------------------------------------------------------------------------
 1 | test_that("Gene profiler does its job properly", {
 2 |   geneprofiler(rlt, paste0("gene", sample(1:1000, 20)))
 3 |   
 4 |   expect_error(
 5 |     expect_message(
 6 |       geneprofiler(rlt, "gene_fake")
 7 |     )
 8 |   )
 9 | })
10 | 
11 | test_that("Distribution of expression", {
12 |   p <- distro_expr(rlt)
13 |   expect_true(is(p, "gg"))
14 | })
15 | 
16 | test_that("Correlation scatter plot matrix works", {
17 |   pair_corr(counts(dds)[1:100, ])
18 |   expect_error(pair_corr(dds))
19 | })
20 | 


--------------------------------------------------------------------------------
/tests/testthat/test_pca2go.R:
--------------------------------------------------------------------------------
 1 | test_that("Checks on the functional enrichment of subset of genes/genes with hi loadings",{
 2 |   resOrdered <- as.data.frame(res_airway[order(res_airway$padj), ])
 3 |   de_df <- resOrdered[resOrdered$padj < .05 & !is.na(resOrdered$padj), ]
 4 |   de_symbols <- de_df$symbol
 5 |   bg_ids <- rownames(dds_airway)[rowSums(counts(dds_airway)) > 0]
 6 |   bg_symbols <- mapIds(org.Hs.eg.db,
 7 |                        keys = bg_ids,
 8 |                        column = "SYMBOL",
 9 |                        keytype = "ENSEMBL",
10 |                        multiVals = "first")
11 |   library(topGO)
12 |   
13 |   expect_is(de_symbols, "character")
14 |   expect_is(bg_symbols, "character")
15 |   
16 |   # topgoDE_airway <- topGOtable(de_symbols, bg_symbols,
17 |   #                              ontology = "BP",
18 |   #                              mapping = "org.Hs.eg.db",
19 |   #                              geneID = "symbol")
20 |   #
21 |   # expect_is(topgoDE_airway,"data.frame")
22 |   ngenes_pca <- 500
23 |   
24 |   goquick_airway <- limmaquickpca2go(rld_airway,
25 |                                      pca_ngenes = ngenes_pca,
26 |                                      inputType = "ENSEMBL",
27 |                                      organism = "Hs")
28 |   
29 |   expect_type(goquick_airway, "list")
30 |   expect_equal(length(goquick_airway), 4)
31 |   sapply(goquick_airway, names)
32 |   expect_equal(attr(goquick_airway, "n_genesforpca"), ngenes_pca)
33 |   
34 |   expect_error(
35 |     expect_warning(
36 |       limmaquickpca2go(rld_airway,
37 |                                 pca_ngenes = ngenes_pca,
38 |                                 inputType = "ENSEMBL",
39 |                                 organism = "foo")
40 |     )
41 |   ) # additionally throws a warning
42 | })
43 | 


--------------------------------------------------------------------------------
/tests/testthat/test_pcagenes.R:
--------------------------------------------------------------------------------
 1 | test_that("Checks on the pca on the genes", {
 2 |   groups <- colData(dds_multifac)$condition
 3 |   cols <- scales::hue_pal()(2)[groups]
 4 |   p <- genespca(rlt_multifac, ntop = 100, arrowColors = cols, groupNames = groups)
 5 |   
 6 |   expect_true(is(p, "gg"))
 7 |   
 8 |   dat <- genespca(rlt_multifac, ntop = 100, arrowColors = cols, groupNames = groups, returnData = TRUE)
 9 |   
10 |   p2 <- genespca(rlt_multifac, ntop = 100)
11 |   p3 <- genespca(rlt_multifac, ntop = 100, arrowColors = "green")
12 |   
13 |   expect_error(genespca(rlt_multifac, ntop = 100, arrowColors = c("green", "red")))
14 |   
15 |   groups_multi <- interaction(as.data.frame(colData(rlt_multifac)[, c("condition", "tissue")]))
16 |   cols_multi <- scales::hue_pal()(length(levels(groups_multi)))[factor(groups_multi)]
17 |   p4 <- genespca(rlt_multifac, ntop = 100, arrowColors = cols_multi, groupNames = groups_multi)
18 |   
19 |   expect_true(is(p4, "gg"))
20 | })
21 | 
22 | 


--------------------------------------------------------------------------------
/tests/testthat/test_pcasamples.R:
--------------------------------------------------------------------------------
 1 | test_that("Checks on the pca on the samples", {
 2 |   pcaobj <- prcomp(t(assay(rlt_multifac)))
 3 |   
 4 |   colData(dds_multifac)
 5 |   
 6 |   pcaplot(rlt_multifac)
 7 |   dat <- pcaplot(rlt_multifac, returnData = TRUE)
 8 |   
 9 |   p <- pcaplot(rlt_multifac, intgroup = c("condition", "tissue"))
10 |   expect_true(is(p, "gg"))
11 |   
12 |   expect_message({
13 |     p_def <- pcaplot(rlt_multifac)
14 |     expect_true(is(p_def, "gg"))
15 |   }, "Defaulting to 'condition'")
16 |   
17 |   expect_error({
18 |     rlt_nocoldata <- rlt_multifac
19 |     colData(rlt_nocoldata)$condition <- NULL
20 |     colData(rlt_nocoldata)$tissue <- NULL
21 |     colData(rlt_nocoldata)$sizeFactor <- NULL
22 |     
23 |     pcaplot(rlt_nocoldata)
24 |   },
25 |   "No colData has been provided")
26 |   
27 |   dat <- pcaplot(rlt_multifac, intgroup = c("condition", "tissue"), returnData = TRUE)
28 |   
29 |   expect_error(pcaplot(rlt_multifac, intgroup = "foo"))
30 |   
31 |   p2 <- pcascree(pcaobj)
32 |   expect_true(is(p2, "gg"))
33 |   p3 <- pcascree(pcaobj, type = "cev")
34 |   expect_true(is(p3, "gg"))
35 |   expect_error(pcascree(pcaobj, type = "foo"))
36 | })
37 | 


--------------------------------------------------------------------------------
/tests/testthat/test_shiny.R:
--------------------------------------------------------------------------------
 1 | test_that("Shiny app is generated", {
 2 |   expect_is(pcaExplorer(), "shiny.appobj")
 3 |   
 4 |   expect_is(pcaExplorer(dds, rlt), "shiny.appobj")
 5 |   
 6 |   expect_is(pcaExplorer(countmatrix = cm, coldata = cd), "shiny.appobj")
 7 |   
 8 |   expect_is(pcaExplorer(dds = dds), "shiny.appobj")
 9 | })
10 | 


--------------------------------------------------------------------------------
/vignettes/newsnap_01_upload.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/newsnap_01_upload.png


--------------------------------------------------------------------------------
/vignettes/newsnap_02_instructions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/newsnap_02_instructions.png


--------------------------------------------------------------------------------
/vignettes/newsnap_03_countstable.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/newsnap_03_countstable.png


--------------------------------------------------------------------------------
/vignettes/newsnap_04_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/newsnap_04_overview.png


--------------------------------------------------------------------------------
/vignettes/newsnap_05_samples.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/newsnap_05_samples.png


--------------------------------------------------------------------------------
/vignettes/newsnap_06_genes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/newsnap_06_genes.png


--------------------------------------------------------------------------------
/vignettes/newsnap_07_finder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/newsnap_07_finder.png


--------------------------------------------------------------------------------
/vignettes/newsnap_08_pca2go.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/newsnap_08_pca2go.png


--------------------------------------------------------------------------------
/vignettes/newsnap_09_multifac.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/newsnap_09_multifac.png


--------------------------------------------------------------------------------
/vignettes/newsnap_10_editor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/newsnap_10_editor.png


--------------------------------------------------------------------------------
/vignettes/newsnap_11_about.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/newsnap_11_about.png


--------------------------------------------------------------------------------
/vignettes/unr_00_demo_loaded.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/unr_00_demo_loaded.png


--------------------------------------------------------------------------------
/vignettes/unr_01_splom.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/unr_01_splom.png


--------------------------------------------------------------------------------
/vignettes/unr_02_sts_heatmap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/unr_02_sts_heatmap.png


--------------------------------------------------------------------------------
/vignettes/unr_03_summary_counts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/unr_03_summary_counts.png


--------------------------------------------------------------------------------
/vignettes/unr_04a_samplespca.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/unr_04a_samplespca.png


--------------------------------------------------------------------------------
/vignettes/unr_04b_samples_dex.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/unr_04b_samples_dex.png


--------------------------------------------------------------------------------
/vignettes/unr_05_loadings.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/unr_05_loadings.png


--------------------------------------------------------------------------------
/vignettes/unr_06a_genefinder_dusp1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/unr_06a_genefinder_dusp1.png


--------------------------------------------------------------------------------
/vignettes/unr_06b_genefinder_per1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/unr_06b_genefinder_per1.png


--------------------------------------------------------------------------------
/vignettes/unr_06c_genefinder_ddx3y.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/unr_06c_genefinder_ddx3y.png


--------------------------------------------------------------------------------
/vignettes/unr_06c_genefinder_ddx3y_dex.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/unr_06c_genefinder_ddx3y_dex.png


--------------------------------------------------------------------------------
/vignettes/unr_07_genespca.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/unr_07_genespca.png


--------------------------------------------------------------------------------
/vignettes/unr_08_pca2go_topgo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/unr_08_pca2go_topgo.png


--------------------------------------------------------------------------------
/vignettes/unr_90_exitsave.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/unr_90_exitsave.png


--------------------------------------------------------------------------------
/vignettes/unr_99_editreport.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/federicomarini/pcaExplorer/a78352d5f51fda5d39ef5e990ed7a37be8d1c0a5/vignettes/unr_99_editreport.png


--------------------------------------------------------------------------------
/vignettes/upandrunning.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: >
  3 |   Up and running with `r Biocpkg("pcaExplorer")`
  4 | author: 
  5 | - name: Federico Marini
  6 |   affiliation: 
  7 |   - &id1 Institute of Medical Biostatistics, Epidemiology and Informatics (IMBEI), Mainz
  8 |   - Center for Thrombosis and Hemostasis (CTH), Mainz
  9 |   email: marinif@uni-mainz.de
 10 | - name: Harald Binder
 11 |   affiliation: *id1
 12 | date: "`r BiocStyle::doc_date()`"
 13 | package: "`r BiocStyle::pkg_ver('pcaExplorer')`"
 14 | output: 
 15 |   BiocStyle::html_document:
 16 |     toc_float: true
 17 |     fig_caption: TRUE
 18 | vignette: >
 19 |   %\VignetteIndexEntry{Up and running with pcaExplorer}
 20 |   %\VignetteEngine{knitr::rmarkdown}
 21 |   %\VignetteEncoding{UTF-8}  
 22 |   %\VignettePackage{pcaExplorer}
 23 |   %\VignetteKeywords{Visualization, RNASeq, DimensionReduction, PrincipalComponent, QualityControl, GUI, ReportWriting}
 24 | ---
 25 | 
 26 | <!-- For rendering in-app: -->
 27 | <!-- - change output format to -->
 28 | <!-- --- -->
 29 | <!-- output:  -->
 30 | <!--   md_document: -->
 31 | <!--     variant: markdown_github -->
 32 | <!-- --- -->
 33 | <!-- - replace links to imgs with github hosted to provide permanent link -->
 34 | <!-- - enforce width="80%" for all imgs (50% for the smaller screenshot) -->
 35 | <!-- - copy content to inst/extdata/instructions_unr.md -->
 36 | 
 37 | ```{r}
 38 | knitr::opts_chunk$set(crop = NULL)
 39 | ```
 40 | 
 41 | # Setup
 42 | 
 43 | First things first: install `r Biocpkg("pcaExplorer")` and load it into your R session.
 44 | You should receive a message notification if this is completed without errors.
 45 | 
 46 | ```{r eval=FALSE}
 47 | BiocManager::install("pcaExplorer")
 48 | library("pcaExplorer")
 49 | ```
 50 | 
 51 | This document describes a use case for `r Biocpkg("pcaExplorer")`, based on the dataset in the `r Biocpkg("airway")` package.
 52 | If this package is not available on your machine, please install it by executing:
 53 | 
 54 | ```{r eval=FALSE}
 55 | BiocManager::install("airway")
 56 | ```
 57 | 
 58 | This dataset consists of the gene-level expression measurements (as raw read counts) for an experiment where four different human airway smooth muscle cell lines are either treated with dexamethasone or left untreated.
 59 | 
 60 | # Start exploring - the beauty of interactivity
 61 | 
 62 | To start the exploration, you just need the following lines:
 63 | 
 64 | ```{r, eval=FALSE}
 65 | library("pcaExplorer")
 66 | pcaExplorer()
 67 | ```
 68 | 
 69 | The easiest way to explore the `r Biocpkg("airway")` dataset is by clicking on the dedicated button in the **Data Upload** panel.
 70 | This action will:
 71 | 
 72 | - load the `r Biocpkg("airway")` package
 73 | - load the count matrix and the experimental metadata
 74 | - compose the `dds` object, normalize the expression values (using the robust method proposed by Anders and Huber in the original DESeq manuscript), and compute the variance stabilizing transformed expression values (stored in the `dst` object)
 75 | - retrieve the gene annotation information via the `r Biocpkg("org.Hs.eg.db")`, adding gene symbols to the ENSEMBL ids - this step is optional, but recommended for more human-readable identifiers to be used.
 76 | 
 77 | If you want to load your expression data, please refer to the `r Biocpkg("pcaExplorer", vignette="pcaExplorer.html", label="User Guide")`, which contains detailed information on the formats your data have to respect.
 78 | 
 79 | Once the preprocessing of the input is done, you should get a notification in the lower right corner that you're all set.
 80 | The whole preprocessing should take around 5-6 seconds (tested on a MacBook Pro, with i7 and 16 Gb RAM).
 81 | You can check how each component looks like by clicking on its respective button, once they appeared in the lower half of the panel.
 82 | 
 83 | ```{r ss00, echo=FALSE, fig.cap="Overview of the Data Upload panel. After clicking on the 'Load the demo airway data' button, all widgets are automatically populated, and each data component (count matrix, experimental data, dds object, annotation) can be previewed in a modal window by clicking on its respective button."}
 84 | knitr::include_graphics("unr_00_demo_loaded.png")
 85 | ```
 86 | 
 87 | You can proceed to explore the expression values of your dataset in the **Counts Table** tab.
 88 | You can change the data type you are displaying between raw counts, normalized, or transformed, and plot their values in a scatterplot matrix to explore their sample-to-sample correlations.
 89 | To try this, select for example "Normalized counts", change the correlation coefficient to "spearman", and click on the `Run` action button. 
 90 | The correlation values will also be displayed as a heatmap.
 91 | 
 92 | ```{r ss01, echo=FALSE, fig.cap="Screenshot of the sample to sample scatter plot matrix. The user can select the correlation method to use, the option to plot values on log2 scales, and the possibility to use a subset of genes (to obtain a quicker overview if many samples are provided)."}
 93 | knitr::include_graphics("unr_01_splom.png")
 94 | ```
 95 | 
 96 | Additional features, both for samples and for features, are displayed in the **Data overview** panel.
 97 | A closer look at the metadata of the `airway` set highlights how each combination of cell type (`cell`) and dexamethasone treatment (`dex`) is represented by a single sequencing experiment.
 98 | The 8 samples in the demo dataset are themselves a subsample of the [full GEO record](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE52778), namely the ones non treated with albuterol (`alb` column).
 99 | 
100 | The relationship among samples can be seen in the sample-to-sample heatmap.
101 | For example, by selecting the Manhattan distance metric, it is evident how the samples cluster by dex treatment, yet they show a dendrogram structure that recalls the 4 different cell types used.
102 | The total sum of counts per sample is displayed as a bar plot.
103 | 
104 | ```{r ss02, echo=FALSE, fig.cap="Screenshot of the sample to sample heatmap. Selected is the Manhattan distance, but Euclidean and correlation-based distance are also provided as options. In this case, the user has also selected the dex and cell factors in the 'Group/color by' widget in the sidebar menu, and these covariates decorate the heatmap to facilitate identification of patterns."}
105 | knitr::include_graphics("unr_02_sts_heatmap.png")
106 | ```
107 | 
108 | Patterns can become clearer after selecting, in the **App settings** on the left, an experimental factor to group and color by: try selecting `dex`, for example. 
109 | If more than one covariate is selected, the interaction between these will be taken as a grouping factor.
110 | To remove one, simply click on it to highlight and press the del or backspace key to delete it.
111 | Try doing so by also clicking on `cell`, and then removing `dex` afterwards.
112 | 
113 | Basic summary information is also displayed for the genes.
114 | In the count matrix provided, one can check how many genes were detected, by selecting a "Threshold on the row sums of the counts" or on the row means of the normalized counts (more stringent). 
115 | For example, selecting 5 in both cases, only 24345 genes have a total number of counts, summed by row, and 17745 genes have more than 5 counts (normalized) on average.
116 | 
117 | ```{r ss03, echo=FALSE, fig.cap="Screenshot of the Basic Summary of the counts in the Data Overview panel. General information are provided, together with an overview on detected genes according to different filtering criteria."}
118 | knitr::include_graphics("unr_03_summary_counts.png")
119 | ```
120 | 
121 | The **Samples View** and the **Genes View** are the tabs where most results coming from Principal Component Analysis, either performed on the samples or on the genes, can be explored in depth.
122 | Assuming you selected `cell` in the "Group/color by" option on the left, the Samples PCA plot should clearly display how the cell type explain a considerable portion of the variability in the dataset (corresponding to the second PC).
123 | To check that `dex` treatment is the main source of variability, select that instead of `cell`.
124 | 
125 | ```{r ss04a, echo=FALSE, fig.cap="The Samples View panel. Displayed are a PCA plot (left) and the corresponding scree plot (right), with the samples colored and labeled by cell type - separating on the second principal component."}
126 | knitr::include_graphics("unr_04a_samplespca.png")
127 | ```
128 | 
129 | The scree plot on the right shows how many components should be retained for a satisfactory reduced dimension view of the original set, with their eigenvalues from largest to smallest.
130 | To explore the PCs other than the first and the second one, you can just select them in the x-axis PC and y-axis PC widgets in the left sidebar.
131 | 
132 | ```{r ss04b, echo=FALSE, fig.cap="PCA plot for the samples, colored by dexamethasone treatment. The dex factor is the main driver of the variability in the data, and samples separate nicely on the first principal component."}
133 | knitr::include_graphics("unr_04b_samples_dex.png")
134 | ```
135 | 
136 | If you brush (left-click and hold) on the PCA plot, you can display a zoomed version of it in the frame below.
137 | If you suspect some samples might be outliers (this is not the case in the `airway` set, still), you can select them in the dedicated plot, and give a first check on how the remainder of the samples would look like.
138 | On the right side, you can quickly check which genes show the top and bottom loadings, split by principal component. 
139 | First, change the value in the input widget to 20; then, select one of each list and try to check them in the **Gene Finder** tab; try for example with *DUSP1*, *PER1*, and *DDX3Y*.
140 | 
141 | ```{r ss05, echo=FALSE, fig.cap="Genes with highest loadings on the first and second principal components. The user can select how many top and bottom genes will be displayed, and the gene names are printed below each gene's contribution on each PC."}
142 | knitr::include_graphics("unr_05_loadings.png")
143 | ```
144 | 
145 | While *DUSP1* and *PER1* clearly show a change in expression upon dexamethasone treatment (and indeed where reported among the well known glucocorticoid-responsive genes in the original publication of Himes et al., 2014), *DDX3Y* displays variability at the cell type level (select `cell` in the Group/color by widget): this gene is almost undetected in N061011 cells, and this high variance is what determines its high loading on the second principal component.
146 | 
147 | ```{r ss06a, echo=FALSE, fig.cap="Plot of the gene expression levels of DUSP1. Points are split according to dex treatment, and both graphics and table are displayed."}
148 | knitr::include_graphics("unr_06a_genefinder_dusp1.png")
149 | ```
150 | 
151 | ```{r ss06b, echo=FALSE, fig.cap="Plot of the gene expression levels of PER1. Points are split according to dex treatment."}
152 | knitr::include_graphics("unr_06b_genefinder_per1.png")
153 | ```
154 | 
155 | ```{r ss06c, echo=FALSE, fig.cap="Plot of the gene expression levels of DDX3Y. Points are split according to cell type, as this gene was highly variable across this experimental factor - indeed, in one cell type it is barely detected."}
156 | knitr::include_graphics("unr_06c_genefinder_ddx3y.png")
157 | ```
158 | 
159 | You can see the single expression values in a table as well, and this information can be downloaded with a simple click.
160 | 
161 | Back to the **Samples View**, you can experiment with the number of top variable genes to see how the results of PCA are in this case robust to a wide range of this value - this might not be the case with other datasets, and the simplicity of interacting with these parameters makes it easy to iterate in the exploration steps.
162 | 
163 | Proceeding to the **Genes View**, you can see the dual of the Samples PCA: now the samples are displayed as arrows in the genes biplot, which can show which genes display a similar behaviour.
164 | You can capture this with a simple brushing action on the plot, and notice how their profiles throughout all samples are shown in the Profile explorer below; moreover, a static and an interactive heatmap, together with a table containing the underlying data, are generated in the rows below.
165 | 
166 | ```{r ss07, echo=FALSE, fig.cap="The Genes View panel. Upper panel: the genes biplot, and its zoomed plot, with gene names displayed. Lower panel: the profile explorer of the selected subset of genes (corresponding to the zoomed window), and the boxplot for the gene selected by clicking close to a location in the zoomed window."}
167 | knitr::include_graphics("unr_07_genespca.png")
168 | ```
169 | 
170 | Since we compute the gene annotation table as well, it's nice to read the gene symbols in the zoomed window (instead of the ENSEMBL ids).
171 | By clicking close enough to any of these genes, the expression values are plotted, in a similar fashion as in the **Gene Finder**.
172 | 
173 | The tab **PCA2GO** helps you understanding which are the biological common themes (default: the Gene Ontology Biological Process terms) in the genes showing up in the top and in the bottom loadings for each principal component.
174 | Since we launched the `pcaExplorer` app without additional parameters, this information is not available, but can be computed live (this might take a while).
175 | 
176 | ```{r ss08, echo=FALSE, fig.cap="The PCA2GO panel. Four tables (2 per dimension, here only 3 are displayed) decorate the PCA plot in the middle, and display the top enriched functional categories in each subset of gene with high loadings."}
177 | knitr::include_graphics("unr_08_pca2go_topgo.png")
178 | ```
179 | 
180 | Still, a previous call to `pca2go` is recommended, as it relies on the algorithm of the `r Biocpkg("topGO")` package: it will require some additional computing time, but it is likely to deliver more precise terms (i.e. in turn more relevant from the point of view of their biological relevance). To do so, you should exit the live session, compute this object, and provide it in the call to `pcaExplorer` (see more how to do so in `r Biocpkg("pcaExplorer", vignette="pcaExplorer.html", label="the main user guide")`).
181 | 
182 | # When you're done - the power of reproducibility
183 | 
184 | A typical session with `pcaExplorer` includes one or more iterations on each of these tabs.
185 | Once you are finished, you might want to store the results of your analysis in different formats.
186 | 
187 | ```{r ss90, echo=FALSE, fig.cap="The pcaExplorer task menu. Buttons for saving the session to binary data or to a dedicated environment are displayed.",out.width="80%"}
188 | knitr::include_graphics("unr_90_exitsave.png")
189 | ```
190 | 
191 | With `pcaExplorer` you can do all of the following:
192 | 
193 | - save every plot and table by simply clicking on the respective button, below each element
194 | - save the state of the entire app and its reactive elements as a binary `.RData` file, as if it was a workspace (clicking on the cog icon in the right side of the task menu)
195 | - use the "Exit `pcaExplorer` and save" saves the state but in a specific environment of your R session, which you can later access by its name, which normally could look like `pcaExplorerState_YYYYMMDD_HHMMSS` (also accessible from the cog)
196 | - enjoy the beauty of reproducible research in the **Report Editor**: `pcaExplorer` comes with a template analysis, that picks the latest status of the app during your session, and combines these reactive values together in a R Markdown document, which you can first preview live in the app, and then download as standalone HTML file - to store or share.
197 |   This document stiches together narrative text, code, and output objects, and constitutes a compendium where all actions are recorded. 
198 |   If you are familiar with R, you can edit that live, with support for autocompletion, in the "Edit report" tab.
199 | 
200 | ```{r ss99, echo=FALSE, fig.cap="The Report Editor tab. The collapsible elements control general markdown and editor options, which are regarded when the report is compiled. Its content is specified in the Ace editor, integrated in the Shiny app."}
201 | knitr::include_graphics("unr_99_editreport.png")
202 | ```
203 | 
204 | The functionality to display the report preview is based on `knit2html`, and some elements such as `DataTable` objects might not render correctly. 
205 | To render them correctly, please install the PhantomJS executable before launching the app. 
206 | This can be done by using the `r CRANpkg("webshot")` package and calling `webshot::install_phantomjs()` - HTML widgets will be rendered automatically as screenshots.
207 | Alternatively, the more recent `r BiocStyle::CRANpkg("webshot2")` package uses the headless Chrome browser (via the `r BiocStyle::CRANpkg("chromote")` package, requiring Google Chrome or other Chromium-based browser).
208 | Keep in mind that the fully rendered report (the one you can obtain with the "Generate & Save" button) is not affected by this, since it uses `rmarkdown::render()`.
209 | 
210 | # Session Info {.unnumbered}
211 | 
212 | ```{r sessioninfo}
213 | sessionInfo()
214 | ```
215 | 
216 | 


--------------------------------------------------------------------------------