├── _pkgdown.yml
├── .github
    ├── .gitignore
    └── workflows
    │   ├── test-coverage.yaml
    │   ├── pkgdown.yaml
    │   ├── R-CMD-check.yaml
    │   └── format-code.yml
├── vignettes
    ├── .gitignore
    └── figures
    │   ├── interaction_2hwg.png
    │   ├── peptide_map_1zmr.png
    │   ├── peptide_map_2hwg.png
    │   ├── peptide_map_1zmr_score.png
    │   └── peptide_map_2hwg_score.png
├── revdep
    ├── failures.md
    ├── problems.md
    ├── .gitignore
    ├── cran.md
    └── README.md
├── data
    ├── metal_list.rda
    ├── ptsi_pgk.rda
    ├── mako_colours.rda
    ├── protti_colours.rda
    ├── rapamycin_10uM.rda
    ├── viridis_colours.rda
    ├── metal_chebi_uniprot.rda
    ├── metal_go_slim_subset.rda
    └── rapamycin_dose_response.rda
├── tests
    ├── testthat.R
    └── testthat
    │   ├── test_import.csv
    │   └── test-queue_functions.R
├── LICENSE
├── man
    ├── figures
    │   ├── logo.png
    │   ├── README-volcano-1.png
    │   ├── lifecycle-stable.svg
    │   ├── lifecycle-defunct.svg
    │   ├── lifecycle-archived.svg
    │   ├── lifecycle-maturing.svg
    │   ├── lifecycle-deprecated.svg
    │   ├── lifecycle-superseded.svg
    │   ├── lifecycle-experimental.svg
    │   └── lifecycle-questioning.svg
    ├── protti_colours.Rd
    ├── mako_colours.Rd
    ├── viridis_colours.Rd
    ├── metal_list.Rd
    ├── plot_peptide_profiles.Rd
    ├── peptide_type.Rd
    ├── plot_pval_distribution.Rd
    ├── sequence_coverage.Rd
    ├── split_metal_name.Rd
    ├── volcano_protti.Rd
    ├── plot_drc_4p.Rd
    ├── median_normalisation.Rd
    ├── kegg_enrichment.Rd
    ├── network_analysis.Rd
    ├── fetch_go.Rd
    ├── metal_chebi_uniprot.Rd
    ├── replace_identified_by_x.Rd
    ├── metal_go_slim_subset.Rd
    ├── fetch_kegg.Rd
    ├── treatment_enrichment.Rd
    ├── read_protti.Rd
    ├── go_enrichment.Rd
    ├── rapamycin_10uM.Rd
    ├── scale_protti.Rd
    ├── rapamycin_dose_response.Rd
    ├── fetch_chebi.Rd
    ├── calculate_sequence_coverage.Rd
    ├── find_chebis.Rd
    ├── find_peptide.Rd
    ├── drc_4p.Rd
    ├── ttest_protti.Rd
    ├── normalise.Rd
    ├── pval_distribution_plot.Rd
    ├── anova_protti.Rd
    ├── fetch_uniprot_proteome.Rd
    ├── fetch_mobidb.Rd
    ├── assign_peptide_type.Rd
    ├── ptsi_pgk.Rd
    ├── try_query.Rd
    ├── find_all_subs.Rd
    ├── qc_sequence_coverage.Rd
    ├── qc_median_intensities.Rd
    ├── fetch_uniprot.Rd
    ├── qc_contaminants.Rd
    ├── qc_intensity_distribution.Rd
    ├── qc_proteome_coverage.Rd
    ├── calculate_imputation.Rd
    ├── qc_data_completeness.Rd
    ├── fetch_alphafold_aligned_error.Rd
    ├── qc_sample_correlation.Rd
    ├── randomise_queue.Rd
    ├── qc_cvs.Rd
    ├── fetch_eco.Rd
    ├── filter_cv.Rd
    ├── qc_ranked_intensities.Rd
    ├── calculate_aa_scores.Rd
    ├── qc_peak_width.Rd
    ├── qc_ids.Rd
    ├── qc_pca.Rd
    ├── qc_peptide_type.Rd
    ├── qc_charge_states.Rd
    ├── predict_alphafold_domain.Rd
    ├── diff_abundance.Rd
    ├── fetch_quickgo.Rd
    ├── fetch_pdb.Rd
    ├── qc_missed_cleavages.Rd
    ├── barcode_plot.Rd
    └── calculate_kegg_enrichment.Rd
├── pkgdown
    └── favicon
    │   ├── favicon.ico
    │   ├── favicon-16x16.png
    │   ├── favicon-32x32.png
    │   ├── apple-touch-icon.png
    │   ├── apple-touch-icon-120x120.png
    │   ├── apple-touch-icon-152x152.png
    │   ├── apple-touch-icon-180x180.png
    │   ├── apple-touch-icon-60x60.png
    │   └── apple-touch-icon-76x76.png
├── .Rbuildignore
├── codecov.yml
├── protti.Rproj
├── R
    ├── zzz.R
    ├── read_protti.R
    ├── scale_protti.R
    ├── replace_identified_by_x.R
    ├── ttest_protti.R
    ├── fetch_go.R
    ├── find_chebis.R
    ├── find_peptide.R
    ├── drc_4p.R
    ├── anova_protti.R
    ├── fetch_kegg.R
    ├── find_all_subs.R
    ├── normalise.R
    ├── fetch_uniprot_proteome.R
    ├── calculate_sequence_coverage.R
    ├── pval_distribution_plot.R
    ├── assign_peptide_type.R
    ├── qc_median_intensities.R
    ├── calculate_imputation.R
    └── calculate_aa_scores.R
├── inst
    └── CITATION
├── cran-comments.md
├── .gitignore
├── LICENSE.md
├── data-raw
    ├── rapamycin_10uM.R
    ├── rapamycin_dose_response.R
    ├── ptsi_pgk.R
    └── protti_colours.R
└── DESCRIPTION


/_pkgdown.yml:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 | 


--------------------------------------------------------------------------------
/revdep/failures.md:
--------------------------------------------------------------------------------
1 | *Wow, no problems at all. :)*


--------------------------------------------------------------------------------
/revdep/problems.md:
--------------------------------------------------------------------------------
1 | *Wow, no problems at all. :)*


--------------------------------------------------------------------------------
/data/metal_list.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpquast/protti/HEAD/data/metal_list.rda


--------------------------------------------------------------------------------
/data/ptsi_pgk.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpquast/protti/HEAD/data/ptsi_pgk.rda


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(protti)
3 | 
4 | test_check("protti")
5 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2021
2 | COPYRIGHT HOLDER: ETH Zurich, Jan-Philipp Quast, Dina Schuster
3 | 


--------------------------------------------------------------------------------
/data/mako_colours.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpquast/protti/HEAD/data/mako_colours.rda


--------------------------------------------------------------------------------
/man/figures/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpquast/protti/HEAD/man/figures/logo.png


--------------------------------------------------------------------------------
/data/protti_colours.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpquast/protti/HEAD/data/protti_colours.rda


--------------------------------------------------------------------------------
/data/rapamycin_10uM.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpquast/protti/HEAD/data/rapamycin_10uM.rda


--------------------------------------------------------------------------------
/data/viridis_colours.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpquast/protti/HEAD/data/viridis_colours.rda


--------------------------------------------------------------------------------
/data/metal_chebi_uniprot.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpquast/protti/HEAD/data/metal_chebi_uniprot.rda


--------------------------------------------------------------------------------
/data/metal_go_slim_subset.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpquast/protti/HEAD/data/metal_go_slim_subset.rda


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpquast/protti/HEAD/pkgdown/favicon/favicon.ico


--------------------------------------------------------------------------------
/data/rapamycin_dose_response.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpquast/protti/HEAD/data/rapamycin_dose_response.rda


--------------------------------------------------------------------------------
/man/figures/README-volcano-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpquast/protti/HEAD/man/figures/README-volcano-1.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpquast/protti/HEAD/pkgdown/favicon/favicon-16x16.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpquast/protti/HEAD/pkgdown/favicon/favicon-32x32.png


--------------------------------------------------------------------------------
/tests/testthat/test_import.csv:
--------------------------------------------------------------------------------
1 | Test.column,TestColumn,Test_column
2 | 10.1,_ABC_,1
3 | 11.3,_ABC_,2
4 | 14.1,_ABC_,3


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpquast/protti/HEAD/pkgdown/favicon/apple-touch-icon.png


--------------------------------------------------------------------------------
/vignettes/figures/interaction_2hwg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpquast/protti/HEAD/vignettes/figures/interaction_2hwg.png


--------------------------------------------------------------------------------
/vignettes/figures/peptide_map_1zmr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpquast/protti/HEAD/vignettes/figures/peptide_map_1zmr.png


--------------------------------------------------------------------------------
/vignettes/figures/peptide_map_2hwg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpquast/protti/HEAD/vignettes/figures/peptide_map_2hwg.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-120x120.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpquast/protti/HEAD/pkgdown/favicon/apple-touch-icon-120x120.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-152x152.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpquast/protti/HEAD/pkgdown/favicon/apple-touch-icon-152x152.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-180x180.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpquast/protti/HEAD/pkgdown/favicon/apple-touch-icon-180x180.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-60x60.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpquast/protti/HEAD/pkgdown/favicon/apple-touch-icon-60x60.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-76x76.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpquast/protti/HEAD/pkgdown/favicon/apple-touch-icon-76x76.png


--------------------------------------------------------------------------------
/vignettes/figures/peptide_map_1zmr_score.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpquast/protti/HEAD/vignettes/figures/peptide_map_1zmr_score.png


--------------------------------------------------------------------------------
/vignettes/figures/peptide_map_2hwg_score.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpquast/protti/HEAD/vignettes/figures/peptide_map_2hwg_score.png


--------------------------------------------------------------------------------
/revdep/.gitignore:
--------------------------------------------------------------------------------
1 | checks
2 | library
3 | checks.noindex
4 | library.noindex
5 | data.sqlite
6 | *.html
7 | download
8 | lib
9 | cloud.noindex


--------------------------------------------------------------------------------
/revdep/cran.md:
--------------------------------------------------------------------------------
1 | ## revdepcheck results
2 | 
3 | We checked 1 reverse dependencies, comparing R CMD check results across CRAN and dev versions of this package.
4 | 
5 |  * We saw 0 new problems
6 |  * We failed to check 0 packages
7 | 
8 | 


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^protti\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^LICENSE\.md$
 4 | ^README\.Rmd$
 5 | ^data-raw$
 6 | ^\.travis\.yml$
 7 | ^\.github$
 8 | ^codecov\.yml$
 9 | ^doc$
10 | ^Meta$
11 | ^_pkgdown\.yml$
12 | ^docs$
13 | ^pkgdown$
14 | ^cran-comments\.md$
15 | ^CRAN-RELEASE$
16 | ^CRAN-SUBMISSION$
17 | ^revdep$


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | comment: false
 2 | 
 3 | coverage:
 4 |   status:
 5 |     project:
 6 |       default:
 7 |         target: auto
 8 |         threshold: 1%
 9 |         informational: true
10 |     patch:
11 |       default:
12 |         target: auto
13 |         threshold: 1%
14 |         informational: true
15 | 


--------------------------------------------------------------------------------
/man/protti_colours.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{protti_colours}
 5 | \alias{protti_colours}
 6 | \title{Colour scheme for protti}
 7 | \format{
 8 | A vector containing 100 colours
 9 | }
10 | \source{
11 | Dina's imagination.
12 | }
13 | \usage{
14 | protti_colours
15 | }
16 | \description{
17 | A colour scheme for protti that contains 100 colours.
18 | }
19 | \keyword{datasets}
20 | 


--------------------------------------------------------------------------------
/protti.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: No
 4 | SaveWorkspace: No
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace
22 | 


--------------------------------------------------------------------------------
/man/mako_colours.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{mako_colours}
 5 | \alias{mako_colours}
 6 | \title{Viridis colour scheme}
 7 | \format{
 8 | A vector containing 256 colours
 9 | }
10 | \source{
11 | created for the Seaborn statistical data visualization package for Python
12 | }
13 | \usage{
14 | mako_colours
15 | }
16 | \description{
17 | A perceptually uniform colour scheme originally created for the Seaborn python package.
18 | }
19 | \keyword{datasets}
20 | 


--------------------------------------------------------------------------------
/man/viridis_colours.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{viridis_colours}
 5 | \alias{viridis_colours}
 6 | \title{Viridis colour scheme}
 7 | \format{
 8 | A vector containing 256 colours
 9 | }
10 | \source{
11 | viridis R package, created by Stéfan van der Walt (stefanv) and Nathaniel Smith (njsmith)
12 | }
13 | \usage{
14 | viridis_colours
15 | }
16 | \description{
17 | A colour scheme by the viridis colour scheme from the viridis R package.
18 | }
19 | \keyword{datasets}
20 | 


--------------------------------------------------------------------------------
/man/metal_list.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{metal_list}
 5 | \alias{metal_list}
 6 | \title{List of metals}
 7 | \format{
 8 | A data.frame containing the columns \code{atomic_number}, \code{symbol}, \code{name},
 9 | \code{type}, \code{chebi_id}.
10 | }
11 | \source{
12 | https://en.wikipedia.org/wiki/Metal and https://en.wikipedia.org/wiki/Metalloid
13 | }
14 | \usage{
15 | metal_list
16 | }
17 | \description{
18 | A list of all metals and metalloids in the periodic table.
19 | }
20 | \keyword{datasets}
21 | 


--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
 1 | .onAttach <- function(libname, pkgname) {
 2 |   if (.Platform$OS.type == "unix") {
 3 |     packageStartupMessage(
 4 |       "\U1F469\U1F3FD\U200D\U1F52C Welcome to protti version ",
 5 |       utils::packageVersion("protti"),
 6 |       "! \U1F468\U1F3FC\U200D\U1F4BB
 7 |                             \n\U1F52C Have fun analysing your data! \U1F4BB"
 8 |     )
 9 |   }
10 |   if (.Platform$OS.type == "windows") {
11 |     packageStartupMessage(
12 |       "Welcome to protti version ",
13 |       utils::packageVersion("protti"), "!
14 |                             \nHave fun analysing your data!"
15 |     )
16 |   }
17 | }
18 | 


--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
 1 | citHeader("To cite protti in publications, please use:")
 2 | 
 3 | bibentry(bibtype = "article",
 4 |   textVersion = "Quast, J.P., Schuster, D., Picotti, P. (2022). protti: an R package for comprehensive data analysis of peptide- and protein-centric bottom-up proteomics data. Bioinformatics Advances, 2(1).",
 5 |   author = "Jan-Philipp Quast, Dina Schuster, Paola Picotti",
 6 |   title = "protti: an R package for comprehensive data analysis of peptide- and protein-centric bottom-up proteomics data",
 7 |   journal = "Bioinformatics Advances",
 8 |   year = "2022",
 9 |   volume = "2",
10 |   number = "1",
11 |   )
12 | 


--------------------------------------------------------------------------------
/man/plot_peptide_profiles.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/peptide_profile_plot.R
 3 | \name{plot_peptide_profiles}
 4 | \alias{plot_peptide_profiles}
 5 | \title{Peptide abundance profile plot}
 6 | \usage{
 7 | plot_peptide_profiles(...)
 8 | }
 9 | \value{
10 | A list of peptide profile plots.
11 | }
12 | \description{
13 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}
14 | This function was deprecated due to its name changing to \code{peptide_profile_plot()}.
15 | }
16 | \keyword{internal}
17 | 


--------------------------------------------------------------------------------
/man/peptide_type.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/assign_peptide_type.R
 3 | \name{peptide_type}
 4 | \alias{peptide_type}
 5 | \title{Assign peptide type}
 6 | \usage{
 7 | peptide_type(...)
 8 | }
 9 | \value{
10 | A data frame that contains the input data and an additional column with the peptide
11 | type information.
12 | }
13 | \description{
14 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}
15 | This function was deprecated due to its name changing to \code{assign_peptide_type()}.
16 | }
17 | \keyword{internal}
18 | 


--------------------------------------------------------------------------------
/man/plot_pval_distribution.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/pval_distribution_plot.R
 3 | \name{plot_pval_distribution}
 4 | \alias{plot_pval_distribution}
 5 | \title{Plot histogram of p-value distribution}
 6 | \usage{
 7 | plot_pval_distribution(...)
 8 | }
 9 | \value{
10 | A histogram plot that shows the p-value distribution.
11 | }
12 | \description{
13 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}
14 | This function was deprecated due to its name changing to \code{pval_distribution_plot()}.
15 | }
16 | \keyword{internal}
17 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
 1 | ## Submission 
 2 | 
 3 | * We specifically addressed and fixed the issue raised by Prof. Brian Ripley:
 4 |   * We updated `try_query()` to also handle request unrelated errors successfully. 
 5 | 
 6 | ## Test environments
 7 | * macOS-latest (on GitHub actions), R 4.4.1
 8 | * windows-latest (on GitHub actions), R 4.4.1
 9 | * ubuntu-20.04 (on GitHub actions), R 4.4.1
10 | * ubuntu-20.04 (on GitHub actions), r-devel
11 | * windows-ix86+x86_64 (win-builder), r-devel
12 | * fedora-clang-devel (R-hub), r-devel
13 | * windows-x86_64-devel (R-hub), r-devel
14 | * Ubuntu Linux 20.04.1 LTS (R-hub), r-release
15 | 
16 | ## R CMD check results
17 | 
18 | 0 errors ✓ | 0 warnings ✓ | 0 notes ✓
19 | 
20 | 


--------------------------------------------------------------------------------
/man/sequence_coverage.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/calculate_sequence_coverage.R
 3 | \name{sequence_coverage}
 4 | \alias{sequence_coverage}
 5 | \title{Protein sequence coverage}
 6 | \usage{
 7 | sequence_coverage(...)
 8 | }
 9 | \value{
10 | A new column in the \code{data} data frame containing the calculated sequence coverage
11 | for each identified protein
12 | }
13 | \description{
14 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}
15 | This function was deprecated due to its name changing to \code{calculate_sequence_coverage()}.
16 | }
17 | \keyword{internal}
18 | 


--------------------------------------------------------------------------------
/man/split_metal_name.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/split_metal_name.R
 3 | \name{split_metal_name}
 4 | \alias{split_metal_name}
 5 | \title{Convert metal names to search pattern}
 6 | \usage{
 7 | split_metal_name(metal_names)
 8 | }
 9 | \arguments{
10 | \item{metal_names}{a character vector containing names of metals and metal containing molecules.}
11 | }
12 | \value{
13 | A character vector with metal name search patterns.
14 | }
15 | \description{
16 | Converts a vector of metal names extracted from the \code{ft_metal} column
17 | obtained with \code{fetch_uniprot} to a pattern that can be used to search for corresponding
18 | ChEBI IDs. This is used as a helper function for other functions.
19 | }
20 | 


--------------------------------------------------------------------------------
/man/volcano_protti.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/volcano_plot.R
 3 | \name{volcano_protti}
 4 | \alias{volcano_protti}
 5 | \title{Volcano plot}
 6 | \usage{
 7 | volcano_protti(...)
 8 | }
 9 | \value{
10 | Depending on the method used a volcano plot with either highlighted targets
11 | (\code{method = "target"}) or highlighted significant proteins (\code{method = "significant"})
12 | is returned.
13 | }
14 | \description{
15 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}
16 | This function was deprecated due to its name changing to \code{volcano_plot()}.
17 | }
18 | \keyword{internal}
19 | 


--------------------------------------------------------------------------------
/man/plot_drc_4p.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/drc_4p_plot.R
 3 | \name{plot_drc_4p}
 4 | \alias{plot_drc_4p}
 5 | \title{Perform gene ontology enrichment analysis}
 6 | \usage{
 7 | plot_drc_4p(...)
 8 | }
 9 | \value{
10 | If \code{targets = "all"} a list containing plots for every unique identifier in the
11 | \code{grouping} variable is created. Otherwise a plot for the specified targets is created with
12 | maximally 20 facets.
13 | }
14 | \description{
15 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}
16 | This function was deprecated due to its name changing to \code{drc_4p_plot()}.
17 | }
18 | \keyword{internal}
19 | 


--------------------------------------------------------------------------------
/man/median_normalisation.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/normalise.R
 3 | \name{median_normalisation}
 4 | \alias{median_normalisation}
 5 | \title{Intensity normalisation}
 6 | \usage{
 7 | median_normalisation(...)
 8 | }
 9 | \value{
10 | A data frame with a column called \code{normalised_intensity_log2} containing the
11 | normalised intensity values.
12 | }
13 | \description{
14 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}
15 | This function was deprecated due to its name changing to \code{normalise()}.
16 | The normalisation method in the new function needs to be provided as an argument.
17 | }
18 | \keyword{internal}
19 | 


--------------------------------------------------------------------------------
/man/kegg_enrichment.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/calculate_kegg_enrichment.R
 3 | \name{kegg_enrichment}
 4 | \alias{kegg_enrichment}
 5 | \title{Perform KEGG pathway enrichment analysis}
 6 | \usage{
 7 | kegg_enrichment(...)
 8 | }
 9 | \value{
10 | A bar plot displaying negative log10 adjusted p-values for the top 10 enriched pathways.
11 | Bars are coloured according to the direction of the enrichment. If \code{plot = FALSE}, a data
12 | frame is returned.
13 | }
14 | \description{
15 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}
16 | This function was deprecated due to its name changing to \code{calculate_kegg_enrichment()}.
17 | }
18 | \keyword{internal}
19 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # History files
 2 | .Rhistory
 3 | .Rapp.history
 4 | 
 5 | # Session Data files
 6 | .RData
 7 | 
 8 | # User-specific files
 9 | .Ruserdata
10 | 
11 | # Example code in package build process
12 | *-Ex.R
13 | 
14 | # Output files from R CMD build
15 | /*.tar.gz
16 | 
17 | # Output files from R CMD check
18 | /*.Rcheck/
19 | 
20 | # RStudio files
21 | .Rproj.user/
22 | 
23 | # produced vignettes
24 | vignettes/*.html
25 | vignettes/*.pdf
26 | 
27 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
28 | .httr-oauth
29 | 
30 | # knitr and R markdown default cache directories
31 | *_cache/
32 | /cache/
33 | 
34 | # Temporary files created by R markdown
35 | *.utf8.md
36 | *.knit.md
37 | 
38 | # R Environment Variables
39 | .Renviron
40 | 
41 | .DS_Store
42 | inst/doc
43 | doc
44 | Meta
45 | docs
46 | /doc/
47 | /Meta/
48 | 


--------------------------------------------------------------------------------
/man/network_analysis.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/analyse_functional_network.R
 3 | \name{network_analysis}
 4 | \alias{network_analysis}
 5 | \title{Analyse protein interaction network for significant hits}
 6 | \usage{
 7 | network_analysis(...)
 8 | }
 9 | \value{
10 | A network plot displaying interactions of the provided proteins. If
11 | \code{binds_treatment} was provided halos around the proteins show which proteins interact with
12 | the treatment. If \code{plot = FALSE} a data frame with interaction information is returned.
13 | }
14 | \description{
15 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}
16 | This function was deprecated due to its name changing to \code{analyse_functional_network()}.
17 | }
18 | \keyword{internal}
19 | 


--------------------------------------------------------------------------------
/man/fetch_go.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fetch_go.R
 3 | \name{fetch_go}
 4 | \alias{fetch_go}
 5 | \title{Fetch gene ontology information from geneontology.org}
 6 | \usage{
 7 | fetch_go(organism_id)
 8 | }
 9 | \arguments{
10 | \item{organism_id}{a character value NCBI taxonomy identifier of an organism (TaxId).
11 | Possible inputs inlude only: "9606" (Human), "559292" (Yeast) and "83333" (E. coli).}
12 | }
13 | \value{
14 | A data frame that contains gene ontology mappings to UniProt or SGD IDs. The original
15 | file is a .GAF file. A detailed description of all columns can be found here:
16 | http://geneontology.org/docs/go-annotation-file-gaf-format-2.1/
17 | }
18 | \description{
19 | Fetches gene ontology data from geneontology.org for the provided organism ID.
20 | }
21 | \examples{
22 | \donttest{
23 | go <- fetch_go("9606")
24 | 
25 | head(go)
26 | }
27 | }
28 | 


--------------------------------------------------------------------------------
/man/metal_chebi_uniprot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{metal_chebi_uniprot}
 5 | \alias{metal_chebi_uniprot}
 6 | \title{List of metal-related ChEBI IDs in UniProt}
 7 | \format{
 8 | A data.frame containing information retrieved from ChEBI using \code{fetch_chebi(stars = c(2, 3))},
 9 | filtered using symbols in the \code{metal_list} and manual annotation of metal related ChEBI IDs that do not
10 | contain a formula.
11 | }
12 | \source{
13 | UniProt (cc_cofactor, cc_catalytic_activity, ft_binding) and ChEBI
14 | }
15 | \usage{
16 | metal_chebi_uniprot
17 | }
18 | \description{
19 | A list that contains all ChEBI IDs that appear in UniProt and that contain either a metal atom
20 | in their formula or that do not have a formula but the ChEBI term is related to metals.
21 | This was last updated on the 19/02/24.
22 | }
23 | \keyword{datasets}
24 | 


--------------------------------------------------------------------------------
/man/replace_identified_by_x.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/replace_identified_by_x.R
 3 | \name{replace_identified_by_x}
 4 | \alias{replace_identified_by_x}
 5 | \title{Replace identified positions in protein sequence by "x"}
 6 | \usage{
 7 | replace_identified_by_x(sequence, positions_start, positions_end)
 8 | }
 9 | \arguments{
10 | \item{sequence}{a character value that contains the protein sequence.}
11 | 
12 | \item{positions_start}{a numeric vector of start positions of the identified peptides.}
13 | 
14 | \item{positions_end}{a numeric vector of end positions of the identified peptides.}
15 | }
16 | \value{
17 | A character vector that contains the modified protein sequence with each identified
18 | position replaced by "x".
19 | }
20 | \description{
21 | Helper function for the calculation of sequence coverage, replaces identified positions with an
22 | "x" within the protein sequence.
23 | }
24 | 


--------------------------------------------------------------------------------
/man/figures/lifecycle-stable.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="96" height="20"><linearGradient id="b" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="a"><rect width="96" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#a)"><path fill="#555" d="M0 0h53v20H0z"/><path fill="#4c1" d="M53 0h43v20H53z"/><path fill="url(#b)" d="M0 0h96v20H0z"/></g><g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="110"><text x="275" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="430">lifecycle</text><text x="275" y="140" transform="scale(.1)" textLength="430">lifecycle</text><text x="735" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="330">stable</text><text x="735" y="140" transform="scale(.1)" textLength="330">stable</text></g> </svg>


--------------------------------------------------------------------------------
/man/figures/lifecycle-defunct.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="104" height="20"><linearGradient id="b" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="a"><rect width="104" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#a)"><path fill="#555" d="M0 0h53v20H0z"/><path fill="#e05d44" d="M53 0h51v20H53z"/><path fill="url(#b)" d="M0 0h104v20H0z"/></g><g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="110"><text x="275" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="430">lifecycle</text><text x="275" y="140" transform="scale(.1)" textLength="430">lifecycle</text><text x="775" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="410">defunct</text><text x="775" y="140" transform="scale(.1)" textLength="410">defunct</text></g> </svg>


--------------------------------------------------------------------------------
/man/metal_go_slim_subset.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{metal_go_slim_subset}
 5 | \alias{metal_go_slim_subset}
 6 | \title{Molecular function gene ontology metal subset}
 7 | \format{
 8 | A data.frame containing a slim subset of molecular function gene ontology terms
 9 | that are related to metal binding. The \code{slims_from_id} column contains all IDs relevant
10 | in this subset while the \code{slims_to_ids} column contains the starting IDs. If ChEBI IDs
11 | have been annotated manually this is indicated in the \code{database} column.
12 | }
13 | \source{
14 | QuickGO and ChEBI
15 | }
16 | \usage{
17 | metal_go_slim_subset
18 | }
19 | \description{
20 | A subset of molecular function gene ontology terms related to metals that was created
21 | using the slimming process provided by the QuickGO EBI database.
22 | This was last updated on the 19/02/24.
23 | }
24 | \keyword{datasets}
25 | 


--------------------------------------------------------------------------------
/man/figures/lifecycle-archived.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="110" height="20"><linearGradient id="b" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="a"><rect width="110" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#a)"><path fill="#555" d="M0 0h53v20H0z"/><path fill="#e05d44" d="M53 0h57v20H53z"/><path fill="url(#b)" d="M0 0h110v20H0z"/></g><g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="110"> <text x="275" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="430">lifecycle</text><text x="275" y="140" transform="scale(.1)" textLength="430">lifecycle</text><text x="805" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="470">archived</text><text x="805" y="140" transform="scale(.1)" textLength="470">archived</text></g> </svg>


--------------------------------------------------------------------------------
/man/figures/lifecycle-maturing.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="114" height="20"><linearGradient id="b" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="a"><rect width="114" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#a)"><path fill="#555" d="M0 0h53v20H0z"/><path fill="#007ec6" d="M53 0h61v20H53z"/><path fill="url(#b)" d="M0 0h114v20H0z"/></g><g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="110"><text x="275" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="430">lifecycle</text><text x="275" y="140" transform="scale(.1)" textLength="430">lifecycle</text><text x="825" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="510">maturing</text><text x="825" y="140" transform="scale(.1)" textLength="510">maturing</text></g> </svg>


--------------------------------------------------------------------------------
/man/figures/lifecycle-deprecated.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="124" height="20"><linearGradient id="b" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="a"><rect width="124" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#a)"><path fill="#555" d="M0 0h53v20H0z"/><path fill="#fe7d37" d="M53 0h71v20H53z"/><path fill="url(#b)" d="M0 0h124v20H0z"/></g><g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="110"><text x="275" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="430">lifecycle</text><text x="275" y="140" transform="scale(.1)" textLength="430">lifecycle</text><text x="875" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="610">deprecated</text><text x="875" y="140" transform="scale(.1)" textLength="610">deprecated</text></g> </svg>


--------------------------------------------------------------------------------
/man/figures/lifecycle-superseded.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="128" height="20"><linearGradient id="b" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="a"><rect width="128" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#a)"><path fill="#555" d="M0 0h55v20H0z"/><path fill="#007ec6" d="M55 0h73v20H55z"/><path fill="url(#b)" d="M0 0h128v20H0z"/></g><g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="110"> <text x="285" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="450">lifecycle</text><text x="285" y="140" transform="scale(.1)" textLength="450">lifecycle</text><text x="905" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="630">superseded</text><text x="905" y="140" transform="scale(.1)" textLength="630">superseded</text></g> </svg>


--------------------------------------------------------------------------------
/man/figures/lifecycle-experimental.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="136" height="20"><linearGradient id="b" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="a"><rect width="136" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#a)"><path fill="#555" d="M0 0h53v20H0z"/><path fill="#fe7d37" d="M53 0h83v20H53z"/><path fill="url(#b)" d="M0 0h136v20H0z"/></g><g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="110"><text x="275" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="430">lifecycle</text><text x="275" y="140" transform="scale(.1)" textLength="430">lifecycle</text><text x="935" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="730">experimental</text><text x="935" y="140" transform="scale(.1)" textLength="730">experimental</text></g> </svg>


--------------------------------------------------------------------------------
/man/figures/lifecycle-questioning.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="126" height="20"><linearGradient id="b" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="a"><rect width="126" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#a)"><path fill="#555" d="M0 0h53v20H0z"/><path fill="#007ec6" d="M53 0h73v20H53z"/><path fill="url(#b)" d="M0 0h126v20H0z"/></g><g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="110"><text x="275" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="430">lifecycle</text><text x="275" y="140" transform="scale(.1)" textLength="430">lifecycle</text><text x="885" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="630">questioning</text><text x="885" y="140" transform="scale(.1)" textLength="630">questioning</text></g> </svg>


--------------------------------------------------------------------------------
/man/fetch_kegg.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fetch_kegg.R
 3 | \name{fetch_kegg}
 4 | \alias{fetch_kegg}
 5 | \title{Fetch KEGG pathway data from KEGG}
 6 | \usage{
 7 | fetch_kegg(species)
 8 | }
 9 | \arguments{
10 | \item{species}{a character value providing an abreviated species name. "hsa" for human, "eco"
11 | for E. coli and "sce" for S. cerevisiae. Additional possible names can be found for
12 | \href{https://www.genome.jp/kegg-bin/show_organism?category=Eukaryotes}{eukaryotes} and for
13 | \href{https://www.genome.jp/kegg-bin/show_organism?category=Prokaryotes}{prokaryotes}.}
14 | }
15 | \value{
16 | A data frame that contains gene IDs with corresponding pathway IDs and names for a
17 | selected organism.
18 | }
19 | \description{
20 | Fetches gene IDs and corresponding pathway IDs and names for the provided organism.
21 | }
22 | \examples{
23 | \donttest{
24 | kegg <- fetch_kegg(species = "hsa")
25 | 
26 | head(kegg)
27 | }
28 | }
29 | 


--------------------------------------------------------------------------------
/man/treatment_enrichment.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/calculate_treatment_enrichment.R
 3 | \name{treatment_enrichment}
 4 | \alias{treatment_enrichment}
 5 | \title{Check treatment enrichment}
 6 | \usage{
 7 | treatment_enrichment(...)
 8 | }
 9 | \value{
10 | A bar plot displaying the percentage of all detect proteins and all significant proteins
11 | that bind to the treatment. A Fisher's exact test is performed to calculate the significance of
12 | the enrichment in significant proteins compared to all proteins. The result is reported as a
13 | p-value. If \code{plot = FALSE} a contingency table in long format is returned.
14 | }
15 | \description{
16 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}
17 | This function was deprecated due to its name changing to \code{calculate_treatment_enrichment()}.
18 | }
19 | \keyword{internal}
20 | 


--------------------------------------------------------------------------------
/man/read_protti.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/read_protti.R
 3 | \name{read_protti}
 4 | \alias{read_protti}
 5 | \title{Read, clean and convert}
 6 | \usage{
 7 | read_protti(filename, ...)
 8 | }
 9 | \arguments{
10 | \item{filename}{a character value that specifies the path to the file.}
11 | 
12 | \item{...}{additional arguments for the fread function.}
13 | }
14 | \value{
15 | A data frame (with class tibble) that contains the content of the specified file.
16 | }
17 | \description{
18 | The function uses the very fast \code{fread} function form the \code{data.table} package. The
19 | column names of the resulting data table are made more r-friendly using \code{clean_names} from
20 | the \code{janitor} package. It replaces "." and " " with "_" and converts names to lower case
21 | which is also known as snake_case. In the end the data table is converted to a tibble.
22 | }
23 | \examples{
24 | \dontrun{
25 | read_protti("folder\\\\filename")
26 | }
27 | }
28 | 


--------------------------------------------------------------------------------
/man/go_enrichment.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/calculate_go_enrichment.R
 3 | \name{go_enrichment}
 4 | \alias{go_enrichment}
 5 | \title{Perform gene ontology enrichment analysis}
 6 | \usage{
 7 | go_enrichment(...)
 8 | }
 9 | \value{
10 | A bar plot displaying negative log10 adjusted p-values for the top 10 enriched or
11 | depleted gene ontology terms. Alternatively, plot cutoffs can be chosen individually with the
12 | \code{plot_cutoff} argument. Bars are colored according to the direction of the enrichment
13 | (enriched or deenriched). If \code{plot = FALSE}, a data frame is returned. P-values are
14 | adjusted with Benjamini-Hochberg.
15 | }
16 | \description{
17 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}
18 | This function was deprecated due to its name changing to \code{calculate_go_enrichment()}.
19 | }
20 | \keyword{internal}
21 | 


--------------------------------------------------------------------------------
/man/rapamycin_10uM.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{rapamycin_10uM}
 5 | \alias{rapamycin_10uM}
 6 | \title{Rapamycin 10 uM example data}
 7 | \format{
 8 | A data frame containing peptide level data from a Spectronaut report.
 9 | }
10 | \source{
11 | Piazza, I., Beaton, N., Bruderer, R. et al. A machine learning-based chemoproteomic
12 | approach to identify drug targets and binding sites in complex proteomes. Nat Commun 11, 4200
13 | (2020). \doi{10.1038/s41467-020-18071-x}
14 | }
15 | \usage{
16 | rapamycin_10uM
17 | }
18 | \description{
19 | Rapamycin example data used for the vignette about binary control/treated data. The data was
20 | obtained from \href{https://www.nature.com/articles/s41467-020-18071-x}{Piazza 2020}
21 | and corresponds to experiment 18. FKBP1A the rapamycin binding protein and 49 other randomly
22 | sampled proteins were used for this example dataset. Furthermore, only the DMSO control and the
23 | 10 uM condition were used.
24 | }
25 | \keyword{datasets}
26 | 


--------------------------------------------------------------------------------
/R/read_protti.R:
--------------------------------------------------------------------------------
 1 | #' Read, clean and convert
 2 | #'
 3 | #' The function uses the very fast \code{fread} function form the \code{data.table} package. The
 4 | #' column names of the resulting data table are made more r-friendly using \code{clean_names} from
 5 | #' the \code{janitor} package. It replaces "." and " " with "_" and converts names to lower case
 6 | #' which is also known as snake_case. In the end the data table is converted to a tibble.
 7 | #'
 8 | #' @param filename a character value that specifies the path to the file.
 9 | #' @param ... additional arguments for the fread function.
10 | #'
11 | #' @importFrom data.table fread
12 | #' @importFrom janitor clean_names
13 | #' @importFrom magrittr %>%
14 | #'
15 | #' @return A data frame (with class tibble) that contains the content of the specified file.
16 | #' @export
17 | #'
18 | #' @examples
19 | #' \dontrun{
20 | #' read_protti("folder\\filename")
21 | #' }
22 | read_protti <-
23 |   function(filename, ...) {
24 |     data.table::fread(filename, ...) %>%
25 |       janitor::clean_names() %>%
26 |       tibble::as_tibble()
27 |   }
28 | 


--------------------------------------------------------------------------------
/man/scale_protti.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/scale_protti.R
 3 | \name{scale_protti}
 4 | \alias{scale_protti}
 5 | \title{Scaling a vector}
 6 | \usage{
 7 | scale_protti(x, method)
 8 | }
 9 | \arguments{
10 | \item{x}{a numeric vector}
11 | 
12 | \item{method}{a character value that specifies the method to be used for scaling. "01" scales
13 | the vector between 0 and 1. "center" scales the vector equal to \code{base::scale} around a
14 | center. This is done by subtracting the mean from every value and then deviding them by the
15 | standard deviation.}
16 | }
17 | \value{
18 | A scaled numeric vector.
19 | }
20 | \description{
21 | \code{scale_protti} is used to scale a numeric vector either between 0 and 1 or around a
22 | centered value using the standard deviation. If a vector containing only one value or
23 | repeatedly the same value is provided, 1 is returned as the scaled value for \code{method = "01"}
24 | and 0 is returned for \code{metod = "center"}.
25 | }
26 | \examples{
27 | scale_protti(c(1, 2, 1, 4, 6, 8), method = "01")
28 | }
29 | 


--------------------------------------------------------------------------------
/man/rapamycin_dose_response.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{rapamycin_dose_response}
 5 | \alias{rapamycin_dose_response}
 6 | \title{Rapamycin dose response example data}
 7 | \format{
 8 | A data frame containing peptide level data from a Spectronaut report.
 9 | }
10 | \source{
11 | Piazza, I., Beaton, N., Bruderer, R. et al. A machine learning-based chemoproteomic
12 | approach to identify drug targets and binding sites in complex proteomes. Nat Commun 11, 4200
13 | (2020). \doi{10.1038/s41467-020-18071-x}
14 | }
15 | \usage{
16 | rapamycin_dose_response
17 | }
18 | \description{
19 | Rapamycin example data used for the vignette about dose response data. The data was obtained
20 | from \href{https://www.nature.com/articles/s41467-020-18071-x}{Piazza 2020} and corresponds
21 | to experiment 18. FKBP1A the rapamycin binding protein and 39 other randomly sampled proteins
22 | were used for this example dataset. The concentration range includes the following points:
23 | 0 (DMSO control), 10 pM, 100 pM, 1 nM, 10 nM, 100 nM, 1 uM, 10 uM and 100 uM.
24 | }
25 | \keyword{datasets}
26 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | Copyright (c) 2021 ETH Zurich, Jan-Philipp Quast, Dina Schuster
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/man/fetch_chebi.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fetch_chebi.R
 3 | \name{fetch_chebi}
 4 | \alias{fetch_chebi}
 5 | \title{Fetch ChEBI database information}
 6 | \usage{
 7 | fetch_chebi(relation = FALSE, stars = c(3), timeout = 60)
 8 | }
 9 | \arguments{
10 | \item{relation}{a logical value that indicates if ChEBI Ontology data will be returned instead
11 | the main compound data. This data can be used to check the relations of ChEBI ID's to each other.
12 | Default is FALSE.}
13 | 
14 | \item{stars}{a numeric vector indicating the "star" level (confidence) for which entries should
15 | be retrieved (Possible levels are 1, 2 and 3). Default is \code{c(3)} retrieving only "3-star"
16 | entries, which are manually annotated by the ChEBI curator team.}
17 | 
18 | \item{timeout}{a numeric value specifying the time in seconds until the download of an organism
19 | archive times out. The default is 60 seconds.}
20 | }
21 | \value{
22 | A data frame that contains information about each molecule in the ChEBI database.
23 | }
24 | \description{
25 | Fetches information from the ChEBI database.
26 | }
27 | \examples{
28 | \donttest{
29 | chebi <- fetch_chebi()
30 | 
31 | head(chebi)
32 | }
33 | }
34 | 


--------------------------------------------------------------------------------
/data-raw/rapamycin_10uM.R:
--------------------------------------------------------------------------------
 1 | # library(tidyverse)
 2 | # library(protti)
 3 | #
 4 | # set.seed(1234)
 5 | #
 6 | # # Source: Piazza, I., Beaton, N., Bruderer, R. et al. A machine learning-based chemoproteomic approach to identify drug targets and binding sites in complex proteomes. Nat Commun 11, 4200 (2020). https://doi.org/10.1038/s41467-020-18071-x
 7 | #
 8 | # rapa <- read_protti("rapamycin_dose_response.csv")
 9 | #
10 | # # filter to only retain DMSO control and 10 uM concentration
11 | #
12 | # rapa_filtered <- rapa %>%
13 | #   distinct(r_file_name, r_condition, pep_stripped_sequence, eg_precursor_id, pg_protein_accessions, fg_quantity, pep_is_proteotypic, eg_is_decoy) %>%
14 | #   filter(r_condition == 0 | r_condition == 7) %>%
15 | #   mutate(r_condition = ifelse(r_condition == 0, "control", "rapamycin")) %>%
16 | #   mutate(r_file_name = paste0(r_condition, "_", str_sub(r_file_name, start = 35, end = 36)))
17 | #
18 | # all_proteins <- unique(rapa_filter$pg_protein_accessions)
19 | #
20 | # all_proteins_wo_FKBP1A <- all_proteins[all_proteins != "P62942"]
21 | #
22 | # sampled_bg <- sample(all_proteins_wo_FKBP1A, size = 49)
23 | #
24 | # rapamycin_10uM <- rapa_filtered %>%
25 | #   filter(pg_protein_accessions %in% c(sampled_bg, "P62942"))
26 | #
27 | # usethis::use_data(rapamycin_10uM, overwrite = TRUE)
28 | 


--------------------------------------------------------------------------------
/man/calculate_sequence_coverage.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/calculate_sequence_coverage.R
 3 | \name{calculate_sequence_coverage}
 4 | \alias{calculate_sequence_coverage}
 5 | \title{Protein sequence coverage}
 6 | \usage{
 7 | calculate_sequence_coverage(data, protein_sequence, peptides)
 8 | }
 9 | \arguments{
10 | \item{data}{a data frame containing at least the protein sequence and the identified peptides
11 | as columns.}
12 | 
13 | \item{protein_sequence}{a character column in the \code{data} data frame that contains protein
14 | sequences. Can be obtained by using the function \code{fetch_uniprot()}}
15 | 
16 | \item{peptides}{a character column in the \code{data} data frame that contains the identified
17 | peptides.}
18 | }
19 | \value{
20 | A new column in the \code{data} data frame containing the calculated sequence coverage
21 | for each identified protein
22 | }
23 | \description{
24 | Calculate sequence coverage for each identified protein.
25 | }
26 | \examples{
27 | data <- data.frame(
28 |   protein_sequence = c("abcdefghijklmnop", "abcdefghijklmnop"),
29 |   pep_stripped_sequence = c("abc", "jklmn")
30 | )
31 | 
32 | calculate_sequence_coverage(
33 |   data,
34 |   protein_sequence = protein_sequence,
35 |   peptides = pep_stripped_sequence
36 | )
37 | }
38 | 


--------------------------------------------------------------------------------
/man/find_chebis.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/find_chebis.R
 3 | \name{find_chebis}
 4 | \alias{find_chebis}
 5 | \title{Find ChEBI IDs for name patterns}
 6 | \usage{
 7 | find_chebis(chebi_data, pattern)
 8 | }
 9 | \arguments{
10 | \item{chebi_data}{a data frame that contains at least information on ChEBI IDs (id) and their
11 | names (name). This data frame can be obtained by calling \code{fetch_chebi()}. Ideally this
12 | should be subsetted to only contain molecules of a specific type e.g. metals. This can be
13 | achieved by calling \code{find_all_subs} with a general ID such as "25213" (Metal cation) and
14 | then subset the complete ChEBI database to only include the returned sub-IDs. Using a subsetted
15 | database ensures better search results. This is a helper function for other functions.}
16 | 
17 | \item{pattern}{a character vector that contains names or name patterns of molecules. Name
18 | patterns can be for example obtained with the \code{split_metal_name} function.}
19 | }
20 | \value{
21 | A list of character vectors containing ChEBI IDs that have a name matching the supplied
22 | pattern. It contains one element per pattern.
23 | }
24 | \description{
25 | Search for chebi IDs that match a specific name pattern. A list of corresponding ChEBI IDs is
26 | returned.
27 | }
28 | 


--------------------------------------------------------------------------------
/man/find_peptide.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/find_peptide.R
 3 | \name{find_peptide}
 4 | \alias{find_peptide}
 5 | \title{Find peptide location}
 6 | \usage{
 7 | find_peptide(data, protein_sequence, peptide_sequence)
 8 | }
 9 | \arguments{
10 | \item{data}{a data frame that contains at least the protein and peptide sequence.}
11 | 
12 | \item{protein_sequence}{a character column in the \code{data} data frame that contains the
13 | protein sequence.}
14 | 
15 | \item{peptide_sequence}{a character column in the \code{data} data frame that contains the
16 | peptide sequence.}
17 | }
18 | \value{
19 | A data frame that contains the input data and four additional columns with peptide
20 | start and end position, the last amino acid and the amino acid before the peptide.
21 | }
22 | \description{
23 | The position of the given peptide sequence is searched within the given protein sequence. In
24 | addition the last amino acid of the peptide and the amino acid right before are reported.
25 | }
26 | \examples{
27 | # Create example data
28 | data <- data.frame(
29 |   protein_sequence = c("abcdefg"),
30 |   peptide_sequence = c("cde")
31 | )
32 | 
33 | # Find peptide
34 | find_peptide(
35 |   data = data,
36 |   protein_sequence = protein_sequence,
37 |   peptide_sequence = peptide_sequence
38 | )
39 | }
40 | 


--------------------------------------------------------------------------------
/man/drc_4p.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/drc_4p.R
 3 | \name{drc_4p}
 4 | \alias{drc_4p}
 5 | \title{Dose response curve helper function}
 6 | \usage{
 7 | drc_4p(data, response, dose, log_logarithmic = TRUE, pb = NULL)
 8 | }
 9 | \arguments{
10 | \item{data}{a data frame that contains at least the dose and response column the model should
11 | be fitted to.}
12 | 
13 | \item{response}{a numeric column that contains the response values.}
14 | 
15 | \item{dose}{a numeric column that contains the dose values.}
16 | 
17 | \item{log_logarithmic}{a logical value indicating if a logarithmic or log-logarithmic model is
18 | fitted. If response values form a symmetric curve for non-log transformed dose values, a
19 | logarithmic model instead of a log-logarithmic model should be used. Usually biological dose
20 | response data has a log-logarithmic distribution, which is the reason this is the default.
21 | Log-logarithmic models are symmetric if dose values are log transformed.}
22 | 
23 | \item{pb}{progress bar object. This is only necessary if the function is used in an iteration.}
24 | }
25 | \value{
26 | An object of class \code{drc}. If no fit was performed a character vector with content
27 | "no_fit".
28 | }
29 | \description{
30 | This function peforms the four-parameter dose response curve fit. It is the helper function
31 | for the fit in the \code{fit_drc_4p} function.
32 | }
33 | 


--------------------------------------------------------------------------------
/man/ttest_protti.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ttest_protti.R
 3 | \name{ttest_protti}
 4 | \alias{ttest_protti}
 5 | \title{Perform Welch's t-test}
 6 | \usage{
 7 | ttest_protti(mean1, mean2, sd1, sd2, n1, n2, log_values = TRUE)
 8 | }
 9 | \arguments{
10 | \item{mean1}{a numeric vector that contains the means of group1.}
11 | 
12 | \item{mean2}{a numeric vector that contains the means of group2.}
13 | 
14 | \item{sd1}{a numeric vector that contains the standard deviations of group1.}
15 | 
16 | \item{sd2}{a numeric vector that contains the standard deviations of group2.}
17 | 
18 | \item{n1}{a numeric vector that contains the number of replicates used for the calculation of
19 | each mean and standard deviation of group1.}
20 | 
21 | \item{n2}{a numeric vector that contains the number of replicates used for the calculation of
22 | each mean and standard deviation of group2.}
23 | 
24 | \item{log_values}{a logical value that indicates if values are log transformed. This determines
25 | how fold changes are calculated. Default is \code{log_values = TRUE}.}
26 | }
27 | \value{
28 | A data frame that contains the calculated differences of means, standard error, t
29 | statistic and p-values.
30 | }
31 | \description{
32 | Performs a Welch's t-test and calculates p-values between two groups.
33 | }
34 | \examples{
35 | ttest_protti(
36 |   mean1 = 10,
37 |   mean2 = 15.5,
38 |   sd1 = 1,
39 |   sd2 = 0.5,
40 |   n1 = 3,
41 |   n2 = 3
42 | )
43 | }
44 | 


--------------------------------------------------------------------------------
/man/normalise.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/normalise.R
 3 | \name{normalise}
 4 | \alias{normalise}
 5 | \title{Intensity normalisation}
 6 | \usage{
 7 | normalise(data, sample, intensity_log2, method = "median")
 8 | }
 9 | \arguments{
10 | \item{data}{a data frame containing at least sample names and intensity values. Please note that if the
11 | data frame is grouped, the normalisation will be computed by group.}
12 | 
13 | \item{sample}{a character column in the \code{data} data frame that contains the sample names.}
14 | 
15 | \item{intensity_log2}{a numeric column in the \code{data} data frame that contains the log2 transformed
16 | intensity values to be normalised.}
17 | 
18 | \item{method}{a character value specifying the method to be used for normalisation. Default
19 | is "median".}
20 | }
21 | \value{
22 | A data frame with a column called \code{normalised_intensity_log2} containing the
23 | normalised intensity values.
24 | }
25 | \description{
26 | Performs normalisation on intensities. For median normalisation the normalised intensity is the
27 | original intensity minus the run median plus the global median. This is also the way it is
28 | implemented in the Spectronaut search engine.
29 | }
30 | \examples{
31 | data <- data.frame(
32 |   r_file_name = c("s1", "s2", "s3", "s1", "s2", "s3"),
33 |   intensity_log2 = c(18, 19, 17, 20, 21, 19)
34 | )
35 | 
36 | normalise(data,
37 |   sample = r_file_name,
38 |   intensity_log2 = intensity_log2,
39 |   method = "median"
40 | )
41 | }
42 | 


--------------------------------------------------------------------------------
/data-raw/rapamycin_dose_response.R:
--------------------------------------------------------------------------------
 1 | # library(tidyverse)
 2 | # library(protti)
 3 | #
 4 | # set.seed(123)
 5 | #
 6 | # # Source: Piazza, I., Beaton, N., Bruderer, R. et al. A machine learning-based chemoproteomic approach to identify drug targets and binding sites in complex proteomes. Nat Commun 11, 4200 (2020). https://doi.org/10.1038/s41467-020-18071-x
 7 | #
 8 | rapa <- read_protti("rapamycin_dose_response.csv")
 9 | 
10 | # Filter to only contain necessary columns. Simplify file names. Annotate conditions with concentrations in pM.
11 | 
12 | rapa_filtered <- rapa %>%
13 |   distinct(r_file_name, r_condition, eg_precursor_id, pg_protein_accessions, fg_quantity, pep_is_proteotypic, eg_is_decoy) %>%
14 |   mutate(r_file_name = paste0("sample_", str_sub(r_file_name, start = 35, end = 36))) %>%
15 |   mutate(r_condition = case_when(
16 |     r_condition == 0 ~ 0,
17 |     r_condition == 1 ~ 10,
18 |     r_condition == 2 ~ 100,
19 |     r_condition == 3 ~ 1000,
20 |     r_condition == 4 ~ 10000,
21 |     r_condition == 5 ~ 100000,
22 |     r_condition == 6 ~ 1000000,
23 |     r_condition == 7 ~ 10000000,
24 |     r_condition == 8 ~ 100000000,
25 |   ))
26 | 
27 | all_proteins <- unique(rapa_filtered$pg_protein_accessions)
28 | 
29 | all_proteins_wo_FKBP1A <- all_proteins[all_proteins != "P62942"]
30 | 
31 | sampled_bg <- sample(all_proteins_wo_FKBP1A, size = 39)
32 | 
33 | rapamycin_dose_response <- rapa_filtered %>%
34 |   filter(pg_protein_accessions %in% c(sampled_bg, "P62942"))
35 | 
36 | usethis::use_data(rapamycin_dose_response, overwrite = TRUE)
37 | 


--------------------------------------------------------------------------------
/R/scale_protti.R:
--------------------------------------------------------------------------------
 1 | #' Scaling a vector
 2 | #'
 3 | #' \code{scale_protti} is used to scale a numeric vector either between 0 and 1 or around a
 4 | #' centered value using the standard deviation. If a vector containing only one value or
 5 | #' repeatedly the same value is provided, 1 is returned as the scaled value for \code{method = "01"}
 6 | #' and 0 is returned for \code{metod = "center"}.
 7 | #'
 8 | #' @param x a numeric vector
 9 | #' @param method a character value that specifies the method to be used for scaling. "01" scales
10 | #' the vector between 0 and 1. "center" scales the vector equal to \code{base::scale} around a
11 | #' center. This is done by subtracting the mean from every value and then deviding them by the
12 | #' standard deviation.
13 | #'
14 | #' @return A scaled numeric vector.
15 | #' @export
16 | #'
17 | #' @examples
18 | #' scale_protti(c(1, 2, 1, 4, 6, 8), method = "01")
19 | scale_protti <- function(x, method) {
20 |   if (is.numeric(x) == FALSE) {
21 |     stop("x is a ", typeof(x), " vector but needs to be a numeric vector!")
22 |   }
23 |   if (method == "01") {
24 |     result <- (x - min(x, na.rm = TRUE)) / (max(x, na.rm = TRUE) - min(x, na.rm = TRUE))
25 | 
26 |     if ((max(x, na.rm = TRUE) - min(x, na.rm = TRUE)) == 0) {
27 |       result <- rep(1, length(x))
28 |     }
29 |   }
30 |   if (method == "center") {
31 |     result <- (x - mean(x, na.rm = TRUE)) / stats::sd(x, na.rm = TRUE)
32 | 
33 |     if (stats::sd(x, na.rm = TRUE) == 0) {
34 |       result <- rep(0, length(x))
35 |     }
36 |   }
37 |   result
38 | }
39 | 


--------------------------------------------------------------------------------
/R/replace_identified_by_x.R:
--------------------------------------------------------------------------------
 1 | #' Replace identified positions in protein sequence by "x"
 2 | #'
 3 | #' Helper function for the calculation of sequence coverage, replaces identified positions with an
 4 | #' "x" within the protein sequence.
 5 | #'
 6 | #' @param sequence a character value that contains the protein sequence.
 7 | #' @param positions_start a numeric vector of start positions of the identified peptides.
 8 | #' @param positions_end a numeric vector of end positions of the identified peptides.
 9 | #'
10 | #' @return A character vector that contains the modified protein sequence with each identified
11 | #' position replaced by "x".
12 | #' @importFrom purrr map2
13 | #' @importFrom stringr str_sub
14 | replace_identified_by_x <-
15 |   function(sequence, positions_start, positions_end) {
16 |     sequence <- unique(sequence)
17 |     if (sequence == "" | is.na(sequence)) {
18 |       return(NA)
19 |     }
20 |     remove_na <- !is.na(positions_start) & !is.na(positions_end)
21 |     positions_start <- positions_start[remove_na]
22 |     positions_end <- positions_end[remove_na]
23 |     result <- purrr::map2(
24 |       .x = positions_start, .y = positions_end,
25 |       function(x, y) {
26 |         times <- y - x + 1
27 |         stringr::str_sub(sequence, start = x, end = y) <- paste(rep("x", times = times), collapse = "")
28 |         # this does not modify the global environment but only the
29 |         # environment of the parent function (replace_identified_by_x).
30 |         sequence <<- sequence
31 |       }
32 |     )
33 |     result[[length(result)]]
34 |   }
35 | 


--------------------------------------------------------------------------------
/.github/workflows/test-coverage.yaml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     branches:
 4 |       - main
 5 |       - master
 6 |   pull_request:
 7 |     branches:
 8 |       - main
 9 |       - master
10 | 
11 | name: test-coverage
12 | 
13 | jobs:
14 |   test-coverage:
15 |     runs-on: macOS-latest
16 |     env:
17 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
18 |     steps:
19 |       - uses: actions/checkout@v2
20 | 
21 |       - uses: r-lib/actions/setup-r@v2
22 | 
23 |       - uses: r-lib/actions/setup-pandoc@v2
24 | 
25 |       - name: Query dependencies
26 |         run: |
27 |           install.packages('remotes')
28 |           saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
29 |           writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
30 |         shell: Rscript {0}
31 | 
32 |       - name: Cache R packages
33 |         uses: actions/cache@v2
34 |         with:
35 |           path: ${{ env.R_LIBS_USER }}
36 |           key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
37 |           restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-
38 | 
39 |       - name: Install dependencies
40 |         run: |
41 |           install.packages(c("remotes"))
42 |           remotes::install_deps(dependencies = TRUE)
43 |           remotes::install_cran("covr")
44 |         shell: Rscript {0}
45 | 
46 |       - name: Test coverage
47 |         env:
48 |            TEST_PROTTI: true
49 |            BUILD_VIGNETTE: true
50 |         run: covr::codecov()
51 |         shell: Rscript {0}
52 | 


--------------------------------------------------------------------------------
/man/pval_distribution_plot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/pval_distribution_plot.R
 3 | \name{pval_distribution_plot}
 4 | \alias{pval_distribution_plot}
 5 | \title{Plot histogram of p-value distribution}
 6 | \usage{
 7 | pval_distribution_plot(data, grouping, pval, facet_by = NULL)
 8 | }
 9 | \arguments{
10 | \item{data}{a data frame that contains at least grouping identifiers (precursor, peptide or
11 | protein) and p-values derived from any statistical test.}
12 | 
13 | \item{grouping}{a character column in the \code{data} data frame that contains either precursor,
14 | peptide or protein identifiers. For each entry in this column there should be one unique p-value.
15 | That means the statistical test that created the p-value should have been performed on the
16 | level of the content of this column.}
17 | 
18 | \item{pval}{a numeric column in the \code{data} data frame that contains p-values.}
19 | 
20 | \item{facet_by}{optional, a character column that contains information by which the data should
21 | be faceted into multiple plots.}
22 | }
23 | \value{
24 | A histogram plot that shows the p-value distribution.
25 | }
26 | \description{
27 | Plots the distribution of p-values derived from any statistical test as a histogram.
28 | }
29 | \examples{
30 | set.seed(123) # Makes example reproducible
31 | 
32 | # Create example data
33 | data <- data.frame(
34 |   peptide = paste0("peptide", 1:1000),
35 |   pval = runif(n = 1000)
36 | )
37 | 
38 | # Plot p-values
39 | pval_distribution_plot(
40 |   data = data,
41 |   grouping = peptide,
42 |   pval = pval
43 | )
44 | }
45 | 


--------------------------------------------------------------------------------
/tests/testthat/test-queue_functions.R:
--------------------------------------------------------------------------------
 1 | context("test-queue_functions")
 2 | 
 3 | queue <- create_queue(
 4 |   date = c("200722"),
 5 |   instrument = c("EX1"),
 6 |   user = c("username"),
 7 |   measurement_type = c("DIA"),
 8 |   experiment_name = c("N01"),
 9 |   digestion = c("LiP", "tryptic control"),
10 |   treatment_type_1 = c("EDTA", "H2O"),
11 |   treatment_type_2 = c("Zeba", "unfiltered"),
12 |   treatment_dose_1 = c(10, 30, 60),
13 |   treatment_unit_1 = c("min"),
14 |   n_replicates = 4,
15 |   number_runs = FALSE,
16 |   organism = c("E. coli"),
17 |   exclude_combinations = list(list(
18 |     treatment_type_1 = c("H2O"),
19 |     treatment_type_2 = c("Zeba", "unfiltered"),
20 |     treatment_dose_1 = c(10, 30)
21 |   )),
22 |   inj_vol = c(2),
23 |   data_path = "D:\\2007_Data",
24 |   method_path = "C:\\Xcalibur\\methods\\username\\DIA_120min_41var_AGC200",
25 |   position_row = c("A", "B", "C", "D", "E", "F"),
26 |   position_column = 8,
27 |   blank_every_n = 4,
28 |   blank_position = "1-V1",
29 |   blank_method_path = "C:\\Xcalibur\\methods\\blank",
30 |   export = FALSE
31 | )
32 | 
33 | test_that("create_queue works", {
34 |   expect_is(queue, "data.frame")
35 |   expect_equal(ncol(queue), 21)
36 |   expect_equal(nrow(queue), 80)
37 | })
38 | 
39 | test_that("randomise_queue works", {
40 |   set.seed(123)
41 |   randomised_queue <- randomise_queue(data = queue, rows = 71:80)
42 |   expect_is(randomised_queue, "data.frame")
43 |   expect_equal(ncol(randomised_queue), 21)
44 |   expect_equal(nrow(randomised_queue), 80)
45 |   expect_equal(randomised_queue$Position[71:80], c("1-V1", "B8", "B5", "B3", "B6", "1-V1", "B7", "B4", "B1", "B2"))
46 | })
47 | 


--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     branches:
 4 |       - main
 5 |       - master
 6 | 
 7 | name: pkgdown
 8 | 
 9 | jobs:
10 |   pkgdown:
11 |     runs-on: macOS-latest
12 |     env:
13 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
14 |       TEST_PROTTI: true
15 |       BUILD_VIGNETTE: true
16 |     steps:
17 |       - uses: actions/checkout@v4
18 | 
19 |       - uses: r-lib/actions/setup-r@v2
20 | 
21 |       - uses: r-lib/actions/setup-pandoc@v2
22 | 
23 |       - name: Query dependencies
24 |         run: |
25 |           install.packages('remotes')
26 |           saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
27 |           writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
28 |         shell: Rscript {0}
29 | 
30 |       - name: Cache R packages
31 |         uses: actions/cache@v2
32 |         with:
33 |           path: ${{ env.R_LIBS_USER }}
34 |           key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
35 |           restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-
36 | 
37 |       - name: Install dependencies
38 |         run: |
39 |           remotes::install_deps(dependencies = TRUE)
40 |           install.packages("pkgdown", type = "binary")
41 |         shell: Rscript {0}
42 | 
43 |       - name: Install package
44 |         run: R CMD INSTALL .
45 | 
46 |       - name: Deploy package
47 |         run: |
48 |           git config --local user.email "actions@github.com"
49 |           git config --local user.name "GitHub Actions"
50 |           Rscript -e 'pkgdown::deploy_to_branch(new_process = FALSE)'
51 | 


--------------------------------------------------------------------------------
/man/anova_protti.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/anova_protti.R
 3 | \name{anova_protti}
 4 | \alias{anova_protti}
 5 | \title{Perform ANOVA}
 6 | \usage{
 7 | anova_protti(data, grouping, condition, mean_ratio, sd, n)
 8 | }
 9 | \arguments{
10 | \item{data}{a data frame containing at least the input variables.}
11 | 
12 | \item{grouping}{a character column in the \code{data} data frame that contains precursor or
13 | peptide identifiers.}
14 | 
15 | \item{condition}{a character or numeric column in the \code{data} data frame that contains the
16 | conditions.}
17 | 
18 | \item{mean_ratio}{a numeric column in the \code{data} data frame that contains mean intensities
19 | or mean intensity ratios.}
20 | 
21 | \item{sd}{a numeric column in the \code{data} data frame that contains the standard deviation
22 | corresponding to the mean.}
23 | 
24 | \item{n}{a numeric column in the \code{data} data frame that contains the number of replicates
25 | for which the corresponding mean was calculated.}
26 | }
27 | \value{
28 | a data frame that contains the within group error (\code{ms_group}) and the between
29 | group error (\code{ms_error}), f statistic and p-values.
30 | }
31 | \description{
32 | Performs an ANOVA statistical test
33 | }
34 | \examples{
35 | data <- data.frame(
36 |   precursor = c("A", "A", "A", "B", "B", "B"),
37 |   condition = c("C1", "C2", "C3", "C1", "C2", "C3"),
38 |   mean = c(10, 12, 20, 11, 12, 8),
39 |   sd = c(2, 1, 1.5, 1, 2, 4),
40 |   n = c(4, 4, 4, 4, 4, 4)
41 | )
42 | 
43 | anova_protti(
44 |   data,
45 |   grouping = precursor,
46 |   condition = condition,
47 |   mean = mean,
48 |   sd = sd,
49 |   n = n
50 | )
51 | }
52 | 


--------------------------------------------------------------------------------
/man/fetch_uniprot_proteome.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fetch_uniprot_proteome.R
 3 | \name{fetch_uniprot_proteome}
 4 | \alias{fetch_uniprot_proteome}
 5 | \title{Fetch proteome data from UniProt}
 6 | \usage{
 7 | fetch_uniprot_proteome(
 8 |   organism_id,
 9 |   columns = c("accession"),
10 |   reviewed = TRUE,
11 |   timeout = 120,
12 |   max_tries = 5
13 | )
14 | }
15 | \arguments{
16 | \item{organism_id}{a numeric value that specifies the NCBI taxonomy identifier (TaxId) for an
17 | organism.}
18 | 
19 | \item{columns}{a character vector of metadata columns that should be imported from UniProt (all
20 | possible columns can be found \href{https://www.uniprot.org/help/return_fields}{here}. For
21 | cross-referenced database provide the database name with the prefix "xref_", e.g. \code{"xref_pdb"}).
22 | Note: Not more than one or two columns should be selected otherwise the function will not be
23 | able to efficiently retrieve the information. If more information is needed, \code{fetch_uniprot()}
24 | can be used with the IDs retrieved by this function.}
25 | 
26 | \item{reviewed}{a logical value that determines if only reviewed protein entries will be retrieved.}
27 | 
28 | \item{timeout}{a numeric value specifying the time in seconds until the download times out.
29 | The default is 60 seconds.}
30 | 
31 | \item{max_tries}{a numeric value that specifies the number of times the function tries to download
32 | the data in case an error occurs. The default is 2.}
33 | }
34 | \value{
35 | A data frame that contains all protein metadata specified in \code{columns} for the
36 | organism of choice.
37 | }
38 | \description{
39 | Fetches proteome data from UniProt for the provided organism ID.
40 | }
41 | \examples{
42 | \donttest{
43 | head(fetch_uniprot_proteome(9606))
44 | }
45 | }
46 | 


--------------------------------------------------------------------------------
/man/fetch_mobidb.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fetch_mobidb.R
 3 | \name{fetch_mobidb}
 4 | \alias{fetch_mobidb}
 5 | \title{Fetch protein disorder and mobility information from MobiDB}
 6 | \usage{
 7 | fetch_mobidb(
 8 |   uniprot_ids = NULL,
 9 |   organism_id = NULL,
10 |   show_progress = TRUE,
11 |   timeout = 60,
12 |   max_tries = 2
13 | )
14 | }
15 | \arguments{
16 | \item{uniprot_ids}{optional, a character vector of UniProt identifiers for which information
17 | should be fetched. This argument is mutually exclusive to the \code{organism_id} argument.}
18 | 
19 | \item{organism_id}{optional, a character value providing the NCBI taxonomy identifier of an organism
20 | (TaxId) of an organism for which all available information should be retreived. This
21 | argument is mutually exclusive to the \code{uniprot_ids} argument.}
22 | 
23 | \item{show_progress}{a logical value; if \code{TRUE} a progress bar will be shown.
24 | Default is \code{TRUE}.}
25 | 
26 | \item{timeout}{a numeric value specifying the time in seconds until the download of an organism
27 | archive times out. The default is 60 seconds.}
28 | 
29 | \item{max_tries}{a numeric value that specifies the number of times the function tries to download
30 | the data in case an error occurs. The default is 2.}
31 | }
32 | \value{
33 | A data frame that contains start and end positions for disordered and flexible protein
34 | regions. The \code{feature} column contains information on the source of this
35 | annotation. More information on the source can be found
36 | \href{https://mobidb.org/about/mobidb}{here}.
37 | }
38 | \description{
39 | Fetches information about disordered and flexible protein regions from MobiDB.
40 | }
41 | \examples{
42 | \donttest{
43 | fetch_mobidb(
44 |   uniprot_ids = c("P0A799", "P62707")
45 | )
46 | }
47 | }
48 | 


--------------------------------------------------------------------------------
/man/assign_peptide_type.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/assign_peptide_type.R
 3 | \name{assign_peptide_type}
 4 | \alias{assign_peptide_type}
 5 | \title{Assign peptide type}
 6 | \usage{
 7 | assign_peptide_type(
 8 |   data,
 9 |   aa_before = aa_before,
10 |   last_aa = last_aa,
11 |   aa_after = aa_after
12 | )
13 | }
14 | \arguments{
15 | \item{data}{a data frame containing at least information about the preceding and C-terminal
16 | amino acids of peptides.}
17 | 
18 | \item{aa_before}{a character column in the \code{data} data frame that contains the preceding amino
19 | acid as one letter code.}
20 | 
21 | \item{last_aa}{a character column in the \code{data} data frame that contains the C-terminal amino
22 | acid as one letter code.}
23 | 
24 | \item{aa_after}{a character column in the \code{data} data frame that contains the following amino
25 | acid as one letter code.}
26 | }
27 | \value{
28 | A data frame that contains the input data and an additional column with the peptide
29 | type information.
30 | }
31 | \description{
32 | Based on preceding and C-terminal amino acid, the peptide type of a given peptide is assigned.
33 | Peptides with preceeding and C-terminal lysine or arginine are considered fully-tryptic. If a
34 | peptide is located at the N- or C-terminus of a protein and fulfills the criterium to be
35 | fully-tryptic otherwise, it is also considered as fully-tryptic. Peptides that only fulfill the
36 | criterium on one terminus are semi-tryptic peptides. Lastly, peptides that are not fulfilling
37 | the criteria for both termini are non-tryptic peptides.
38 | }
39 | \examples{
40 | data <- data.frame(
41 |   aa_before = c("K", "S", "T"),
42 |   last_aa = c("R", "K", "Y"),
43 |   aa_after = c("T", "R", "T")
44 | )
45 | 
46 | assign_peptide_type(data, aa_before, last_aa, aa_after)
47 | }
48 | 


--------------------------------------------------------------------------------
/man/ptsi_pgk.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{ptsi_pgk}
 5 | \alias{ptsi_pgk}
 6 | \title{Structural analysis example data}
 7 | \format{
 8 | A data frame containing differential abundances and adjusted p-values for
 9 | peptides/precursors of two proteins.
10 | }
11 | \source{
12 | Cappelletti V, Hauser T, Piazza I, Pepelnjak M, Malinovska L, Fuhrer T, Li Y, Dörig C,
13 | Boersema P, Gillet L, Grossbach J, Dugourd A, Saez-Rodriguez J, Beyer A, Zamboni N, Caflisch A,
14 | de Souza N, Picotti P. Dynamic 3D proteomes reveal protein functional alterations at high
15 | resolution in situ. Cell. 2021 Jan 21;184(2):545-559.e22. \doi{10.1016/j.cell.2020.12.021}.
16 | Epub 2020 Dec 23. PMID: 33357446; PMCID: PMC7836100.
17 | }
18 | \usage{
19 | ptsi_pgk
20 | }
21 | \description{
22 | Example data used for the vignette about structural analysis. The data was obtained from
23 | Cappelletti et al. 2021 (\doi{10.1016/j.cell.2020.12.021})
24 | and corresponds to two separate experiments. Both experiments were limited proteolyis coupled to
25 | mass spectrometry (LiP-MS) experiments conducted on purified proteins. The first protein is
26 | phosphoglycerate kinase 1 (pgk) and it was treated with 25mM 3-phosphoglyceric acid (3PG).
27 | The second protein is phosphoenolpyruvate-protein phosphotransferase (ptsI) and it was treated
28 | with 25mM fructose 1,6-bisphosphatase (FBP). From both experiments only peptides belonging to
29 | either protein were used for this data set. The ptsI data set contains precursor level data
30 | while the pgk data set contains peptide level data. The pgk data can be obtained from
31 | supplementary table 3 from the tab named "pgk+3PG". The ptsI data is only included as raw data
32 | and was analysed using the functions of this package.
33 | }
34 | \keyword{datasets}
35 | 


--------------------------------------------------------------------------------
/man/try_query.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/try_query.R
 3 | \name{try_query}
 4 | \alias{try_query}
 5 | \title{Query from URL}
 6 | \usage{
 7 | try_query(
 8 |   url,
 9 |   max_tries = 5,
10 |   silent = TRUE,
11 |   type = "text/tab-separated-values",
12 |   timeout = 60,
13 |   accept = NULL,
14 |   ...
15 | )
16 | }
17 | \arguments{
18 | \item{url}{a character value of an URL to the website that contains the table that should be
19 | downloaded.}
20 | 
21 | \item{max_tries}{a numeric value that specifies the number of times the function tries to download
22 | the data in case an error occurs. Default is 5.}
23 | 
24 | \item{silent}{a logical value that specifies if individual messages are printed after each try
25 | that failed.}
26 | 
27 | \item{type}{a character value that specifies the type of data at the target URL. Options are
28 | all options that can be supplied to httr::content, these include e.g.
29 | "text/tab-separated-values", "application/json" and "txt/csv". Default is "text/tab-separated-values".}
30 | 
31 | \item{timeout}{a numeric value that specifies the maximum request time. Default is 60 seconds.}
32 | 
33 | \item{accept}{a character value that specifies the type of data that should be sent by the API if
34 | it uses content negotiation. The default is NULL and it should only be set for APIs that use
35 | content negotiation.}
36 | 
37 | \item{...}{other parameters supplied to the parsing function used by httr::content.}
38 | }
39 | \value{
40 | A data frame that contains the table from the url.
41 | }
42 | \description{
43 | Downloads data table from URL. If an error occurs during the query (for example due to no
44 | connection) the function waits 3 seconds and tries again. If no result could be obtained
45 | after the given number of tries a message indicating the problem is returned.
46 | }
47 | 


--------------------------------------------------------------------------------
/man/find_all_subs.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/find_all_subs.R
 3 | \name{find_all_subs}
 4 | \alias{find_all_subs}
 5 | \title{Find all sub IDs of an ID in a network}
 6 | \usage{
 7 | find_all_subs(
 8 |   data,
 9 |   ids,
10 |   main_id = id,
11 |   type = type,
12 |   accepted_types = "is_a",
13 |   exclude_parent_id = FALSE
14 | )
15 | }
16 | \arguments{
17 | \item{data}{a data frame that contains relational information on IDs (main_id) their sub
18 | IDs (sub_id) and their relationship (type). For ChEBI this data frame can be obtained by calling
19 | \code{fetch_chebi(relation = TRUE)}. For ECO data it can be obtained by calling fetch_eco(relation = TRUE).}
20 | 
21 | \item{ids}{a character vector of IDs for which sub IDs should be searched.}
22 | 
23 | \item{main_id}{a character or integer column containing IDs. Default is \code{id} for ChEBI IDs.}
24 | 
25 | \item{type}{a character column that contains the type of interactions. Default is \code{type} for ChEBI IDs.}
26 | 
27 | \item{accepted_types}{a character vector containing the accepted_types of relationships that should be considered
28 | for the search. It is possible to use "all" relationships. The default type is "is_a". A list of
29 | possible relationships for e.g. ChEBI IDs can be found
30 | \href{https://docs.google.com/document/d/1_w-DwBdCCOh1gMeeP6yqGzcnkpbHYOa3AGSODe5epcg/edit#heading=h.hnsqoqu978s5}{here}.}
31 | 
32 | \item{exclude_parent_id}{a logical value that specifies if the parent ID should be included in
33 | the returned list.}
34 | }
35 | \value{
36 | A list of character vectors containing the provided ID and all of its sub IDs. It
37 | contains one element per input ID.
38 | }
39 | \description{
40 | For a given ID, find all sub IDs and their sub IDs etc. The type of
41 | relationship can be selected too. This is a helper function for other functions.
42 | }
43 | 


--------------------------------------------------------------------------------
/man/qc_sequence_coverage.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/qc_sequence_coverage.R
 3 | \name{qc_sequence_coverage}
 4 | \alias{qc_sequence_coverage}
 5 | \title{Protein coverage distribution}
 6 | \usage{
 7 | qc_sequence_coverage(
 8 |   data,
 9 |   protein_identifier,
10 |   coverage,
11 |   sample = NULL,
12 |   interactive = FALSE
13 | )
14 | }
15 | \arguments{
16 | \item{data}{a data frame that contains at least the input variables.}
17 | 
18 | \item{protein_identifier}{a character column in the \code{data} data frame that contains protein
19 | identifiers.}
20 | 
21 | \item{coverage}{a numeric column in the \code{data} data frame that contains protein coverage
22 | in percent. This information can be obtained using the \code{\link{sequence_coverage}} function.}
23 | 
24 | \item{sample}{optional, a character or factor column in the \code{data} data frame that contains sample names.
25 | Please only provide this argument if you want to facet the distribution plot by sample
26 | otherwise do not provide this argument.}
27 | 
28 | \item{interactive}{a logical value that specifies whether the plot should be interactive
29 | (default is FALSE).}
30 | }
31 | \value{
32 | A protein coverage histogram with 5 percent binwidth. The vertical dotted line
33 | indicates the median.
34 | }
35 | \description{
36 | Plots the distribution of protein coverages in a histogram.
37 | }
38 | \examples{
39 | set.seed(123) # Makes example reproducible
40 | 
41 | # Create example data
42 | data <- create_synthetic_data(
43 |   n_proteins = 100,
44 |   frac_change = 0.05,
45 |   n_replicates = 3,
46 |   n_conditions = 2,
47 |   method = "effect_random"
48 | )
49 | 
50 | # Plot sequence coverage
51 | qc_sequence_coverage(
52 |   data = data,
53 |   protein_identifier = protein,
54 |   coverage = coverage
55 | )
56 | }
57 | \seealso{
58 | \code{\link{sequence_coverage}}
59 | }
60 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches:
 8 |       - '*'
 9 | 
10 | name: R-CMD-check
11 | 
12 | jobs:
13 |   R-CMD-check:
14 |     runs-on: ${{ matrix.config.os }}
15 | 
16 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
17 | 
18 |     strategy:
19 |       fail-fast: false
20 |       matrix:
21 |         config:
22 |           - {os: macos-latest,   r: 'release'}
23 |           - {os: windows-latest, r: 'release'}
24 |           - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
25 |           - {os: ubuntu-latest,   r: 'release'}
26 |           - {os: ubuntu-latest,   r: 'oldrel-1'}
27 | 
28 |     env:
29 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
30 |       R_KEEP_PKG_SOURCE: yes
31 |       TEST_PROTTI: true
32 |       BUILD_VIGNETTE: true
33 | 
34 |     steps:
35 |       - uses: actions/checkout@v3
36 | 
37 |       - uses: r-lib/actions/setup-pandoc@v2
38 | 
39 |       - uses: r-lib/actions/setup-r@v2
40 |         with:
41 |           r-version: ${{ matrix.config.r }}
42 |           http-user-agent: ${{ matrix.config.http-user-agent }}
43 |           use-public-rspm: true
44 | 
45 |       - uses: r-lib/actions/setup-r-dependencies@v2
46 |         with:
47 |           extra-packages:
48 |               any::rcmdcheck
49 |           needs: check
50 | 
51 |       #  run: |
52 |      # - name: Install remotes and lme4 package
53 |        #   Rscript -e 'install.packages("remotes", lib=Sys.getenv("R_LIB_FOR_PAK"))'
54 |         #  Rscript -e 'remotes::install_cran("lme4", dependencies = TRUE, upgrade = "always")'
55 | 
56 |       - uses: r-lib/actions/check-r-package@v2
57 |         with:
58 |           upload-snapshots: true
59 | 


--------------------------------------------------------------------------------
/R/ttest_protti.R:
--------------------------------------------------------------------------------
 1 | #' Perform Welch's t-test
 2 | #'
 3 | #' Performs a Welch's t-test and calculates p-values between two groups.
 4 | #'
 5 | #' @param mean1 a numeric vector that contains the means of group1.
 6 | #' @param mean2 a numeric vector that contains the means of group2.
 7 | #' @param sd1 a numeric vector that contains the standard deviations of group1.
 8 | #' @param sd2 a numeric vector that contains the standard deviations of group2.
 9 | #' @param n1 a numeric vector that contains the number of replicates used for the calculation of
10 | #' each mean and standard deviation of group1.
11 | #' @param n2 a numeric vector that contains the number of replicates used for the calculation of
12 | #' each mean and standard deviation of group2.
13 | #' @param log_values a logical value that indicates if values are log transformed. This determines
14 | #' how fold changes are calculated. Default is \code{log_values = TRUE}.
15 | #'
16 | #' @return A data frame that contains the calculated differences of means, standard error, t
17 | #' statistic and p-values.
18 | #' @importFrom stats pt
19 | #' @export
20 | #'
21 | #' @examples
22 | #' ttest_protti(
23 | #'   mean1 = 10,
24 | #'   mean2 = 15.5,
25 | #'   sd1 = 1,
26 | #'   sd2 = 0.5,
27 | #'   n1 = 3,
28 | #'   n2 = 3
29 | #' )
30 | ttest_protti <- function(mean1, mean2, sd1, sd2, n1, n2, log_values = TRUE) {
31 |   std_error <- sqrt((sd1^2 / n1) + (sd2^2 / n2))
32 |   # Welch-Satterwhite equation to estimate the degrees of freedom
33 |   df <- ((sd1^2 / n1) + (sd2^2 / n2))^2 / (sd1^4 / (n1^2 * (n1 - 1)) + sd2^4 / (n2^2 * (n2 - 1)))
34 |   # fold change calculation
35 |   if (log_values == TRUE) {
36 |     diff <- mean1 - mean2
37 |   } else {
38 |     diff <- mean1 / mean2
39 |   }
40 |   # t statistic calculation
41 |   t <- (diff) / std_error
42 |   result <- data.frame(cbind(diff, std_error, t, 2 * pt(-abs(t), df)))
43 |   colnames(result) <- c("diff", "std_error", "t_statistic", "pval")
44 |   return(result)
45 | }
46 | 


--------------------------------------------------------------------------------
/data-raw/ptsi_pgk.R:
--------------------------------------------------------------------------------
 1 | # library(tidyverse)
 2 | # library(protti)
 3 | #
 4 | # # Source: Cappelletti V, Hauser T, Piazza I, Pepelnjak M, Malinovska L, Fuhrer T, Li Y, Dörig C, Boersema P, Gillet L, Grossbach J, Dugourd A, Saez-Rodriguez J, Beyer A, Zamboni N, Caflisch A, de Souza N, Picotti P. Dynamic 3D proteomes reveal protein functional alterations at high resolution in situ. Cell. 2021 Jan 21;184(2):545-559.e22. doi: 10.1016/j.cell.2020.12.021. Epub 2020 Dec 23. PMID: 33357446; PMCID: PMC7836100.
 5 | #
 6 | # # The pgk data set is from supplementary table 3, the tab is called "pgk+3PG". The data does not contain precursor level data since charge states are
 7 | # # missing from peptides.
 8 | # pgk <- read_protti("pgk.csv")
 9 | #
10 | # # The ptsI data set is not part of the supplementary tables. The raw data is included in the PRIDE repository. We exported the Spectronaut report
11 | # # and analysed that data using prottis standard pipeline.
12 | # ptsi <- read_protti("ptsi.csv")
13 | #
14 | # # pgk data tidying
15 | #
16 | # pgk_tidy <- pgk %>%
17 | #   filter(concentration == "25mM") %>% # filter to only retain the 25 mM concentration
18 | #   rename(eg_precursor_id = peptide_sequence,
19 | #          pg_protein_accessions = uniprot_id,
20 | #          diff = log2fc,
21 | #          adj_pval = qvalue) %>%
22 | #   distinct(eg_precursor_id,
23 | #            diff,
24 | #            adj_pval,
25 | #            pg_protein_accessions) %>%
26 | #   mutate(pep_stripped_sequence = str_remove_all(eg_precursor_id, pattern = "(?<=\\[)[\\w\\(\\)\\s\\-]+(?=\\])")) %>% # removes "[Carbamidomethyl]" from peptides.
27 | #   mutate(pep_stripped_sequence = str_remove_all(pep_stripped_sequence, pattern = "[\\[\\]]"))
28 | #
29 | # # ptsi data tidying
30 | #
31 | # ptsi_tidy <- ptsi %>%
32 | #   rename(eg_precursor_id = precursor_id)
33 | #
34 | # # combining data
35 | #
36 | # ptsi_pgk <- pgk_tidy %>%
37 | #   bind_rows(ptsi_tidy)
38 | #
39 | # usethis::use_data(ptsi_pgk, overwrite = TRUE)
40 | 


--------------------------------------------------------------------------------
/data-raw/protti_colours.R:
--------------------------------------------------------------------------------
  1 | protti_colours <- c(
  2 |   "#5680C1",
  3 |   "#B96DAD",
  4 |   "#64CACA",
  5 |   "#81ABE9",
  6 |   "#F6B8D1",
  7 |   "#99F1E4",
  8 |   "#9AD1FF",
  9 |   "#548BDF",
 10 |   "#A55098",
 11 |   "#3EB6B6",
 12 |   "#87AEE8",
 13 |   "#CA91C1",
 14 |   "#A4E0E0",
 15 |   "#1D4F9A",
 16 |   "#D7ACD2",
 17 |   "#49C1C1",
 18 |   "#00A2D9",
 19 |   "#6B77BF",
 20 |   "#00C2D4",
 21 |   "#816DB8",
 22 |   "#00DCB5",
 23 |   "#9561AD",
 24 |   "#95EF8C",
 25 |   "#A6549C",
 26 |   "#F9F871",
 27 |   "#B44688",
 28 |   "#65D8C2",
 29 |   "#40B4D5",
 30 |   "#7AE4B2",
 31 |   "#529AD4",
 32 |   "#9DEE9C",
 33 |   "#7B7BC0",
 34 |   "#C8F585",
 35 |   "#995997",
 36 |   "#7368B8",
 37 |   "#A03960",
 38 |   "#DA5D8C",
 39 |   "#077AC1",
 40 |   "#C793BD",
 41 |   "#0086B3",
 42 |   "#FFE6FF",
 43 |   "#00C897",
 44 |   "#B8A6B4",
 45 |   "#8292B3",
 46 |   "#B38DAC",
 47 |   "#9CCDCD",
 48 |   "#A7B6D2",
 49 |   "#E4CBD4",
 50 |   "#C8EDE7",
 51 |   "#C1D5E9",
 52 |   "#899BC4",
 53 |   "#A6739D",
 54 |   "#76BFBF",
 55 |   "#ABB9D3",
 56 |   "#C3A9BE",
 57 |   "#C7E0E0",
 58 |   "#4667AC",
 59 |   "#D0BECE",
 60 |   "#87C7C7",
 61 |   "#3BB1E7",
 62 |   "#888CAF",
 63 |   "#12CEE1",
 64 |   "#8F87AB",
 65 |   "#12E6BD",
 66 |   "#9980A7",
 67 |   "#C2EABF",
 68 |   "#A5779F",
 69 |   "#F8F7BB",
 70 |   "#AF7092",
 71 |   "#A2D8CC",
 72 |   "#85BCD1",
 73 |   "#B4E1C9",
 74 |   "#8BA6C5",
 75 |   "#C7E9C7",
 76 |   "#9191B1",
 77 |   "#DBF2C1",
 78 |   "#9E789C",
 79 |   "#8682A9",
 80 |   "#AA5C76",
 81 |   "#C4899B",
 82 |   "#428DD1",
 83 |   "#C1AABC",
 84 |   "#039ACD",
 85 |   "#F7EDF7",
 86 |   "#02D5A1",
 87 |   "#BDB5BB",
 88 |   "#516C9A",
 89 |   "#9B5C91",
 90 |   "#4BAAAA",
 91 |   "#6F8FC0",
 92 |   "#D397B0",
 93 |   "#7BCABE",
 94 |   "#7EAFD7",
 95 |   "#4C75B8",
 96 |   "#844A7B",
 97 |   "#3E9898",
 98 |   "#7492C0",
 99 |   "#A97AA1",
100 |   "#87BBBB",
101 |   "#1E4381"
102 | )
103 | 
104 | usethis::use_data(protti_colours, overwrite = TRUE)
105 | 


--------------------------------------------------------------------------------
/R/fetch_go.R:
--------------------------------------------------------------------------------
 1 | #' Fetch gene ontology information from geneontology.org
 2 | #'
 3 | #' Fetches gene ontology data from geneontology.org for the provided organism ID.
 4 | #'
 5 | #' @param organism_id a character value NCBI taxonomy identifier of an organism (TaxId).
 6 | #' Possible inputs inlude only: "9606" (Human), "559292" (Yeast) and "83333" (E. coli).
 7 | #'
 8 | #' @return A data frame that contains gene ontology mappings to UniProt or SGD IDs. The original
 9 | #' file is a .GAF file. A detailed description of all columns can be found here:
10 | #' http://geneontology.org/docs/go-annotation-file-gaf-format-2.1/
11 | #' @export
12 | #'
13 | #' @examples
14 | #' \donttest{
15 | #' go <- fetch_go("9606")
16 | #'
17 | #' head(go)
18 | #' }
19 | fetch_go <- function(organism_id) {
20 |   if (!curl::has_internet()) {
21 |     message("No internet connection.")
22 |     return(invisible(NULL))
23 |   }
24 | 
25 |   organism_id <- match.arg(organism_id, c("9606", "559292", "83333"))
26 | 
27 |   organism_url <- switch(organism_id,
28 |     "9606" = "http://current.geneontology.org/annotations/goa_human.gaf.gz",
29 |     "559292" = "http://current.geneontology.org/annotations/sgd.gaf.gz",
30 |     "83333" = "http://current.geneontology.org/annotations/ecocyc.gaf.gz"
31 |   )
32 |   go_download <- tryCatch(readLines(gzcon(url(organism_url))),
33 |     error = function(e) conditionMessage(e),
34 |     warning = function(w) conditionMessage(w)
35 |   )
36 |   go <- utils::read.delim(textConnection(go_download),
37 |     quote = "",
38 |     stringsAsFactors = FALSE,
39 |     comment.char = "!",
40 |     header = FALSE
41 |   )
42 |   if (nrow(go) == 1) {
43 |     message(go$V1)
44 |     return(invisible(NULL))
45 |   }
46 |   colnames(go) <- c(
47 |     "db", "db_id", "symbol", "qualifier", "go_id", "db_reference",
48 |     "evidence", "with_from", "ontology", "name", "synonyme",
49 |     "type", "taxon", "date", "assigned_by", "annotation_extension",
50 |     "gene_product_form_id"
51 |   )
52 |   return(go)
53 | }
54 | 


--------------------------------------------------------------------------------
/R/find_chebis.R:
--------------------------------------------------------------------------------
 1 | #' Find ChEBI IDs for name patterns
 2 | #'
 3 | #' Search for chebi IDs that match a specific name pattern. A list of corresponding ChEBI IDs is
 4 | #' returned.
 5 | #'
 6 | #' @param chebi_data a data frame that contains at least information on ChEBI IDs (id) and their
 7 | #' names (name). This data frame can be obtained by calling \code{fetch_chebi()}. Ideally this
 8 | #' should be subsetted to only contain molecules of a specific type e.g. metals. This can be
 9 | #' achieved by calling \code{find_all_subs} with a general ID such as "25213" (Metal cation) and
10 | #' then subset the complete ChEBI database to only include the returned sub-IDs. Using a subsetted
11 | #' database ensures better search results. This is a helper function for other functions.
12 | #' @param pattern a character vector that contains names or name patterns of molecules. Name
13 | #' patterns can be for example obtained with the \code{split_metal_name} function.
14 | #'
15 | #' @return A list of character vectors containing ChEBI IDs that have a name matching the supplied
16 | #' pattern. It contains one element per pattern.
17 | #' @importFrom dplyr distinct
18 | #' @importFrom magrittr %>%
19 | #' @importFrom purrr map
20 | #' @importFrom stringr str_detect regex
21 | #' @importFrom rlang .data
22 | #' @importFrom stats na.omit
23 | find_chebis <- function(chebi_data, pattern) {
24 |   if (!requireNamespace("stringi", quietly = TRUE)) {
25 |     message("Package \"stringi\" is needed for this function to work. Please install it.", call. = FALSE)
26 |     return(invisible(NULL))
27 |   }
28 |   data <- chebi_data %>%
29 |     dplyr::distinct(.data$id, .data$name)
30 | 
31 |   purrr::map(pattern, function(x) {
32 |     stringi::stri_remove_empty(stats::na.omit(unique(
33 |       ifelse(
34 |         stringr::str_detect(data$name,
35 |           pattern = stringr::regex(
36 |             x,
37 |             ignore_case = TRUE
38 |           )
39 |         ),
40 |         data$id,
41 |         ""
42 |       )
43 |     )))
44 |   })
45 | }
46 | 


--------------------------------------------------------------------------------
/man/qc_median_intensities.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/qc_median_intensities.R
 3 | \name{qc_median_intensities}
 4 | \alias{qc_median_intensities}
 5 | \title{Median run intensities}
 6 | \usage{
 7 | qc_median_intensities(
 8 |   data,
 9 |   sample,
10 |   grouping,
11 |   intensity,
12 |   plot = TRUE,
13 |   interactive = FALSE
14 | )
15 | }
16 | \arguments{
17 | \item{data}{a data frame that contains at least the input variables.}
18 | 
19 | \item{sample}{a character or factor column in the \code{data} data frame that contains the sample name.}
20 | 
21 | \item{grouping}{a character column in the \code{data} data frame that contains either precursor or
22 | peptide identifiers.}
23 | 
24 | \item{intensity}{a numeric column in the \code{data} data frame that contains intensity values.
25 | The intensity should be ideally log2 transformed, but also non-transformed values can be used.}
26 | 
27 | \item{plot}{a logical value that indicates whether the result should be plotted.}
28 | 
29 | \item{interactive}{a logical value that specifies whether the plot should be interactive
30 | (default is FALSE).}
31 | }
32 | \value{
33 | A plot that displays median intensity over all samples. If \code{plot = FALSE} a data
34 | frame containing median intensities is returned.
35 | }
36 | \description{
37 | Median intensities per run are returned either as a plot or a table.
38 | }
39 | \examples{
40 | set.seed(123) # Makes example reproducible
41 | 
42 | # Create example data
43 | data <- create_synthetic_data(
44 |   n_proteins = 100,
45 |   frac_change = 0.05,
46 |   n_replicates = 3,
47 |   n_conditions = 2,
48 |   method = "effect_random"
49 | )
50 | 
51 | # Calculate median intensities
52 | qc_median_intensities(
53 |   data = data,
54 |   sample = sample,
55 |   grouping = peptide,
56 |   intensity = peptide_intensity_missing,
57 |   plot = FALSE
58 | )
59 | 
60 | # Plot median intensities
61 | qc_median_intensities(
62 |   data = data,
63 |   sample = sample,
64 |   grouping = peptide,
65 |   intensity = peptide_intensity_missing,
66 |   plot = TRUE
67 | )
68 | }
69 | 


--------------------------------------------------------------------------------
/revdep/README.md:
--------------------------------------------------------------------------------
 1 | # Platform
 2 | 
 3 | |field    |value                                      |
 4 | |:--------|:------------------------------------------|
 5 | |version  |R version 4.3.1 (2023-06-16)               |
 6 | |os       |macOS Sonoma 14.2.1                        |
 7 | |system   |aarch64, darwin20                          |
 8 | |ui       |RStudio                                    |
 9 | |language |(EN)                                       |
10 | |collate  |en_US.UTF-8                                |
11 | |ctype    |en_US.UTF-8                                |
12 | |tz       |Europe/Zurich                              |
13 | |date     |2024-03-27                                 |
14 | |rstudio  |2023.06.1+524 Mountain Hydrangea (desktop) |
15 | |pandoc   |NA                                         |
16 | 
17 | # Dependencies
18 | 
19 | |package     |old   |new    |Δ  |
20 | |:-----------|:-----|:------|:--|
21 | |protti      |0.7.0 |0.8.0  |*  |
22 | |bslib       |NA    |0.6.2  |*  |
23 | |crosstalk   |NA    |1.2.1  |*  |
24 | |curl        |NA    |5.2.1  |*  |
25 | |data.table  |NA    |1.15.2 |*  |
26 | |digest      |NA    |0.6.35 |*  |
27 | |dplyr       |NA    |1.1.4  |*  |
28 | |fontawesome |NA    |0.5.2  |*  |
29 | |ggplot2     |NA    |3.5.0  |*  |
30 | |ggrepel     |NA    |0.9.5  |*  |
31 | |gtable      |NA    |0.3.4  |*  |
32 | |htmltools   |NA    |0.5.8  |*  |
33 | |htmlwidgets |NA    |1.6.4  |*  |
34 | |labeling    |NA    |0.4.3  |*  |
35 | |later       |NA    |1.3.2  |*  |
36 | |lubridate   |NA    |1.9.3  |*  |
37 | |plotly      |NA    |4.10.4 |*  |
38 | |R.oo        |NA    |1.26.0 |*  |
39 | |R.utils     |NA    |2.12.3 |*  |
40 | |Rcpp        |NA    |1.0.12 |*  |
41 | |readr       |NA    |2.1.5  |*  |
42 | |rmarkdown   |NA    |2.26   |*  |
43 | |sass        |NA    |0.4.9  |*  |
44 | |scales      |NA    |1.3.0  |*  |
45 | |snakecase   |NA    |0.11.1 |*  |
46 | |stringi     |NA    |1.8.3  |*  |
47 | |stringr     |NA    |1.5.1  |*  |
48 | |tidyr       |NA    |1.3.1  |*  |
49 | |tidyselect  |NA    |1.2.1  |*  |
50 | |timechange  |NA    |0.3.0  |*  |
51 | |tinytex     |NA    |0.50   |*  |
52 | |vroom       |NA    |1.6.5  |*  |
53 | |xfun        |NA    |0.43   |*  |
54 | 
55 | # Revdeps
56 | 
57 | 


--------------------------------------------------------------------------------
/man/fetch_uniprot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fetch_uniprot.R
 3 | \name{fetch_uniprot}
 4 | \alias{fetch_uniprot}
 5 | \title{Fetch protein data from UniProt}
 6 | \usage{
 7 | fetch_uniprot(
 8 |   uniprot_ids,
 9 |   columns = c("protein_name", "length", "sequence", "gene_names", "xref_geneid",
10 |     "xref_string", "go_f", "go_p", "go_c", "cc_interaction", "ft_act_site", "ft_binding",
11 |     "cc_cofactor", "cc_catalytic_activity", "xref_pdb"),
12 |   batchsize = 200,
13 |   max_tries = 10,
14 |   timeout = 20,
15 |   show_progress = TRUE
16 | )
17 | }
18 | \arguments{
19 | \item{uniprot_ids}{a character vector of UniProt accession numbers.}
20 | 
21 | \item{columns}{a character vector of metadata columns that should be imported from UniProt (all
22 | possible columns can be found \href{https://www.uniprot.org/help/return_fields}{here}. For
23 | cross-referenced database provide the database name with the prefix "xref_", e.g. \code{"xref_pdb"})}
24 | 
25 | \item{batchsize}{a numeric value that specifies the number of proteins processed in a single
26 | single query. Default and max value is 200.}
27 | 
28 | \item{max_tries}{a numeric value that specifies the number of times the function tries to download
29 | the data in case an error occurs.}
30 | 
31 | \item{timeout}{a numeric value that specifies the maximum request time per try. Default is 20 seconds.}
32 | 
33 | \item{show_progress}{a logical value that determines if a progress bar will be shown. Default
34 | is TRUE.}
35 | }
36 | \value{
37 | A data frame that contains all protein metadata specified in \code{columns} for the
38 | proteins provided. The \code{input_id} column contains the provided UniProt IDs. If an invalid ID
39 | was provided that contains a valid UniProt ID, the valid portion of the ID is still fetched and
40 | present in the \code{accession} column, while the \code{input_id} column contains the original not completely
41 | valid ID.
42 | }
43 | \description{
44 | Fetches protein metadata from UniProt.
45 | }
46 | \examples{
47 | \donttest{
48 | fetch_uniprot(c("P36578", "O43324", "Q00796"))
49 | 
50 | # Not completely valid ID
51 | fetch_uniprot(c("P02545", "P02545;P20700"))
52 | }
53 | }
54 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: protti
 2 | Title: Bottom-Up Proteomics and LiP-MS Quality Control and Data Analysis Tools
 3 | Version: 0.9.1
 4 | Authors@R: 
 5 |     c(person(given = "Jan-Philipp",
 6 |            family = "Quast",
 7 |            role = c("aut", "cre"),
 8 |            email = "quast@imsb.biol.ethz.ch",
 9 |            comment = c(ORCID = "0000-0003-2713-778X")),
10 | 	person(given = "Dina",
11 |            family = "Schuster",
12 |            role = c("aut"),
13 |            email = "dschuster@ethz.ch",
14 |            comment = c(ORCID = "0000-0001-6611-8237")),
15 | 	person(given = "ETH Zurich",
16 |            role = c("cph", "fnd")))
17 | Description: Useful functions and workflows for proteomics quality control and data analysis of both limited proteolysis-coupled mass spectrometry (LiP-MS) (Feng et. al. (2014) <doi:10.1038/nbt.2999>) and regular bottom-up proteomics experiments. Data generated with search tools such as 'Spectronaut', 'MaxQuant' and 'Proteome Discover' can be easily used due to flexibility of functions.
18 | License: MIT + file LICENSE
19 | Encoding: UTF-8
20 | LazyData: true
21 | biocViews: 
22 | Imports: 
23 |     rlang, 
24 |     dplyr, 
25 |     stringr, 
26 |     magrittr, 
27 |     data.table, 
28 |     janitor, 
29 |     progress, 
30 |     purrr, 
31 |     tidyr, 
32 |     ggplot2, 
33 |     forcats, 
34 |     tibble, 
35 |     plotly,   
36 |     ggrepel, 
37 |     utils,
38 |     grDevices,
39 |     curl,
40 |     readr,
41 |     lifecycle,
42 |     httr,
43 |     methods,
44 |     R.utils,
45 |     stats
46 | RoxygenNote: 7.3.2
47 | Suggests: 
48 |     testthat,
49 |     covr,
50 |     knitr,
51 |     rmarkdown,
52 |     shiny,
53 |     r3dmol,
54 |     proDA,
55 |     limma,
56 |     dendextend,
57 |     pheatmap, 
58 |     heatmaply,
59 |     furrr, 
60 |     future, 
61 |     parallel,
62 |     seriation,
63 |     drc,
64 |     igraph,
65 |     stringi, 
66 |     STRINGdb,
67 |     iq,
68 |     scales,
69 |     farver,
70 |     ggforce,
71 |     xml2,
72 |     jsonlite
73 | Depends: 
74 |     R (>= 4.0)
75 | URL: https://github.com/jpquast/protti, https://jpquast.github.io/protti/
76 | BugReports: https://github.com/jpquast/protti/issues
77 | VignetteBuilder: knitr
78 | Roxygen: list(markdown = TRUE)
79 | 


--------------------------------------------------------------------------------
/man/qc_contaminants.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/qc_contaminants.R
 3 | \name{qc_contaminants}
 4 | \alias{qc_contaminants}
 5 | \title{Percentage of contaminants per sample}
 6 | \usage{
 7 | qc_contaminants(
 8 |   data,
 9 |   sample,
10 |   protein,
11 |   is_contaminant,
12 |   intensity,
13 |   n_contaminants = 5,
14 |   plot = TRUE,
15 |   interactive = FALSE
16 | )
17 | }
18 | \arguments{
19 | \item{data}{a data frame that contains at least the input variables.}
20 | 
21 | \item{sample}{a character or factor column in the \code{data} data frame that contains the sample names.}
22 | 
23 | \item{protein}{a character column in the \code{data} data frame that contains protein IDs or
24 | protein names.}
25 | 
26 | \item{is_contaminant}{a logical column that indicates if the protein is a contaminant.}
27 | 
28 | \item{intensity}{a numeric column in the \code{data} data frame that contains the corresponding
29 | raw or normalised intensity values (not log2).}
30 | 
31 | \item{n_contaminants}{a numeric value that indicates how many contaminants should be displayed
32 | individually. The rest is combined to a group called "other". The default is 5.}
33 | 
34 | \item{plot}{a logical value that indicates if a plot is returned. If FALSE a table is returned.}
35 | 
36 | \item{interactive}{a logical value that indicates if the plot is made interactive using the r
37 | package \code{plotly}.}
38 | }
39 | \value{
40 | A bar plot that displays the percentage of contaminating proteins over all samples.
41 | If \code{plot = FALSE} a data frame is returned.
42 | }
43 | \description{
44 | Calculates the percentage of contaminating proteins as the share of total intensity.
45 | }
46 | \examples{
47 | data <- data.frame(
48 |   sample = c(rep("sample_1", 10), rep("sample_2", 10)),
49 |   leading_razor_protein = c(rep(c("P1", "P1", "P1", "P2", "P2", "P2", "P2", "P3", "P3", "P3"), 2)),
50 |   potential_contaminant = c(rep(c(rep(TRUE, 7), rep(FALSE, 3)), 2)),
51 |   intensity = c(rep(1, 2), rep(4, 4), rep(6, 4), rep(2, 3), rep(3, 5), rep(4, 2))
52 | )
53 | 
54 | qc_contaminants(
55 |   data,
56 |   sample = sample,
57 |   protein = leading_razor_protein,
58 |   is_contaminant = potential_contaminant,
59 |   intensity = intensity
60 | )
61 | }
62 | 


--------------------------------------------------------------------------------
/man/qc_intensity_distribution.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/qc_intensity_distribution.R
 3 | \name{qc_intensity_distribution}
 4 | \alias{qc_intensity_distribution}
 5 | \title{Check intensity distribution per sample and overall}
 6 | \usage{
 7 | qc_intensity_distribution(
 8 |   data,
 9 |   sample = NULL,
10 |   grouping,
11 |   intensity_log2,
12 |   plot_style
13 | )
14 | }
15 | \arguments{
16 | \item{data}{a data frame that contains at least sample names, grouping identifiers (precursor,
17 | peptide or protein) and log2 transformed intensities for each grouping identifier.}
18 | 
19 | \item{sample}{an optional character or factor column in the \code{data} data frame that contains the
20 | sample name. If the sample column is of type factor, the ordering is based on the factor
21 | levels. NOTE: If the overall distribution should be returned please do not provide the name of the
22 | sample column.}
23 | 
24 | \item{grouping}{a character column in the \code{data} data frame that contains the grouping
25 | variables (e.g. peptides, precursors or proteins).}
26 | 
27 | \item{intensity_log2}{a numeric column in the \code{data} data frame that contains the log2
28 | transformed intensities of each grouping identifier sample combination.}
29 | 
30 | \item{plot_style}{a character value that indicates the plot type. This can be either
31 | "histogram", "boxplot" or "violin". Plot style "boxplot" and "violin" can only be used if a
32 | sample column is provided.}
33 | }
34 | \value{
35 | A histogram or boxplot that shows the intensity distribution over all samples or by
36 | sample.
37 | }
38 | \description{
39 | Plots the overall or sample-wise distribution of all peptide intensities as a boxplot or
40 | histogram.
41 | }
42 | \examples{
43 | set.seed(123) # Makes example reproducible
44 | 
45 | # Create example data
46 | data <- create_synthetic_data(
47 |   n_proteins = 100,
48 |   frac_change = 0.05,
49 |   n_replicates = 3,
50 |   n_conditions = 2,
51 |   method = "effect_random"
52 | )
53 | 
54 | # Plot intensity distribution
55 | # The plot style can be changed
56 | qc_intensity_distribution(
57 |   data = data,
58 |   sample = sample,
59 |   grouping = peptide,
60 |   intensity_log2 = peptide_intensity_missing,
61 |   plot_style = "boxplot"
62 | )
63 | }
64 | 


--------------------------------------------------------------------------------
/R/find_peptide.R:
--------------------------------------------------------------------------------
 1 | #' Find peptide location
 2 | #'
 3 | #' The position of the given peptide sequence is searched within the given protein sequence. In
 4 | #' addition the last amino acid of the peptide and the amino acid right before are reported.
 5 | #'
 6 | #' @param data a data frame that contains at least the protein and peptide sequence.
 7 | #' @param protein_sequence a character column in the \code{data} data frame that contains the
 8 | #' protein sequence.
 9 | #' @param peptide_sequence a character column in the \code{data} data frame that contains the
10 | #' peptide sequence.
11 | #'
12 | #' @return A data frame that contains the input data and four additional columns with peptide
13 | #' start and end position, the last amino acid and the amino acid before the peptide.
14 | #' @import dplyr
15 | #' @import stringr
16 | #' @importFrom magrittr %>%
17 | #' @importFrom rlang .data
18 | #' @export
19 | #'
20 | #' @examples
21 | #' # Create example data
22 | #' data <- data.frame(
23 | #'   protein_sequence = c("abcdefg"),
24 | #'   peptide_sequence = c("cde")
25 | #' )
26 | #'
27 | #' # Find peptide
28 | #' find_peptide(
29 | #'   data = data,
30 | #'   protein_sequence = protein_sequence,
31 | #'   peptide_sequence = peptide_sequence
32 | #' )
33 | find_peptide <-
34 |   function(data, protein_sequence, peptide_sequence) {
35 |     result <- data %>%
36 |       dplyr::ungroup() %>%
37 |       dplyr::distinct({{ protein_sequence }}, {{ peptide_sequence }}) %>%
38 |       dplyr::mutate(
39 |         start = stringr::str_locate({{ protein_sequence }}, {{ peptide_sequence }})[, 1],
40 |         end = stringr::str_locate({{ protein_sequence }}, {{ peptide_sequence }})[, 2]
41 |       ) %>%
42 |       dplyr::mutate(aa_before = stringr::str_sub({{ protein_sequence }},
43 |         start = .data$start - 1,
44 |         end = .data$start - 1
45 |       )) %>%
46 |       dplyr::mutate(last_aa = stringr::str_sub({{ protein_sequence }},
47 |         start = .data$end,
48 |         end = .data$end
49 |       )) %>%
50 |       dplyr::mutate(aa_after = stringr::str_sub({{ protein_sequence }},
51 |         start = .data$end + 1,
52 |         end = .data$end + 1
53 |       ))
54 | 
55 |     data %>% dplyr::left_join(result, c(
56 |       rlang::as_name(rlang::enquo(protein_sequence)),
57 |       rlang::as_name(rlang::enquo(peptide_sequence))
58 |     ))
59 |   }
60 | 


--------------------------------------------------------------------------------
/.github/workflows/format-code.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     paths: ["**.[rR]", "**.[qrR]md", "**.[rR]markdown", "**.[rR]nw", "**.[rR]profile"]
 4 | 
 5 | name: Style
 6 | env:
 7 |   GITHUB_ACTOR: "actions-user"
 8 | 
 9 | jobs:
10 |   style:
11 |     runs-on: ubuntu-latest
12 |     permissions:
13 |       contents: write
14 |     env:
15 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
16 |     steps:
17 |       - name: Checkout repo
18 |         uses: actions/checkout@v4
19 |         with:
20 |           fetch-depth: 0
21 | 
22 |       - name: Setup R
23 |         uses: r-lib/actions/setup-r@v2
24 |         with:
25 |           use-public-rspm: true
26 | 
27 |       - name: Install dependencies
28 |         uses: r-lib/actions/setup-r-dependencies@v2
29 |         with:
30 |           extra-packages: any::styler, any::roxygen2
31 |           needs: styler
32 | 
33 |       - name: Enable styler cache
34 |         run: styler::cache_activate()
35 |         shell: Rscript {0}
36 | 
37 |       - name: Determine cache location
38 |         id: styler-location
39 |         run: |
40 |           cat(
41 |             "location=",
42 |             styler::cache_info(format = "tabular")$location,
43 |             "\n",
44 |             file = Sys.getenv("GITHUB_OUTPUT"),
45 |             append = TRUE,
46 |             sep = ""
47 |           )
48 |         shell: Rscript {0}
49 | 
50 |       - name: Cache styler
51 |         uses: actions/cache@v4
52 |         with:
53 |           path: ${{ steps.styler-location.outputs.location }}
54 |           key: ${{ runner.os }}-styler-${{ github.sha }}
55 |           restore-keys: |
56 |             ${{ runner.os }}-styler-
57 |             ${{ runner.os }}-
58 | 
59 |       - name: Style
60 |         run: styler::style_pkg()
61 |         shell: Rscript {0}
62 | 
63 |       - name: Commit and push changes
64 |         run: |
65 |           if FILES_TO_COMMIT=($(git diff-index --name-only ${{ github.sha }} \
66 |               | egrep --ignore-case '\.(R|[qR]md|Rmarkdown|Rnw|Rprofile)$'))
67 |           then
68 |             git config --local user.name "$GITHUB_ACTOR"
69 |             git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
70 |             git commit ${FILES_TO_COMMIT[*]} -m "Style code (GHA)"
71 |             git pull --ff-only
72 |             git push origin
73 |           else
74 |             echo "No changes to commit."
75 |           fi
76 | 


--------------------------------------------------------------------------------
/man/qc_proteome_coverage.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/qc_proteome_coverage.R
 3 | \name{qc_proteome_coverage}
 4 | \alias{qc_proteome_coverage}
 5 | \title{Proteome coverage per sample and total}
 6 | \usage{
 7 | qc_proteome_coverage(
 8 |   data,
 9 |   sample,
10 |   protein_id,
11 |   organism_id,
12 |   reviewed = TRUE,
13 |   plot = TRUE,
14 |   interactive = FALSE
15 | )
16 | }
17 | \arguments{
18 | \item{data}{a data frame that contains at least sample names and protein ID's.}
19 | 
20 | \item{sample}{a character column in the \code{data} data frame that contains the sample name.}
21 | 
22 | \item{protein_id}{a character or numeric column in the \code{data} data frame that contains
23 | protein identifiers such as UniProt accessions.}
24 | 
25 | \item{organism_id}{a numeric value that specifies a NCBI taxonomy identifier (TaxId) of the
26 | organism used. Human: 9606, S. cerevisiae: 559292, E. coli: 83333.}
27 | 
28 | \item{reviewed}{a logical value that determines if only reviewed protein entries will be considered
29 | as the full proteome. Default is TRUE.}
30 | 
31 | \item{plot}{a logical value that specifies whether the result should be plotted.}
32 | 
33 | \item{interactive}{a logical value that indicates whether the plot should be interactive
34 | (default is FALSE).}
35 | }
36 | \value{
37 | A bar plot showing the percentage of of the proteome detected and undetected in total
38 | and for each sample. If \code{plot = FALSE} a data frame containing the numbers is returned.
39 | }
40 | \description{
41 | Calculates the proteome coverage for each samples and for all samples combined. In other words t
42 | he fraction of detected proteins to all proteins in the proteome is calculated.
43 | }
44 | \examples{
45 | \donttest{
46 | # Create example data
47 | proteome <- data.frame(id = 1:4518)
48 | data <- data.frame(
49 |   sample = c(rep("A", 101), rep("B", 1000), rep("C", 1000)),
50 |   protein_id = c(proteome$id[1:100], proteome$id[1:1000], proteome$id[1000:2000])
51 | )
52 | 
53 | # Calculate proteome coverage
54 | qc_proteome_coverage(
55 |   data = data,
56 |   sample = sample,
57 |   protein_id = protein_id,
58 |   organism_id = 83333,
59 |   plot = FALSE
60 | )
61 | 
62 | # Plot proteome coverage
63 | qc_proteome_coverage(
64 |   data = data,
65 |   sample = sample,
66 |   protein_id = protein_id,
67 |   organism_id = 83333,
68 |   plot = TRUE
69 | )
70 | }
71 | }
72 | 


--------------------------------------------------------------------------------
/man/calculate_imputation.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/calculate_imputation.R
 3 | \name{calculate_imputation}
 4 | \alias{calculate_imputation}
 5 | \title{Sampling of values for imputation}
 6 | \usage{
 7 | calculate_imputation(
 8 |   min = NULL,
 9 |   noise = NULL,
10 |   mean = NULL,
11 |   sd,
12 |   missingness = c("MNAR", "MAR"),
13 |   method = c("ludovic", "noise"),
14 |   skip_log2_transform_error = FALSE
15 | )
16 | }
17 | \arguments{
18 | \item{min}{a numeric value specifying the minimal intensity value of the precursor/peptide.
19 | Is only required if \code{method = "ludovic"} and \code{missingness = "MNAR"}.}
20 | 
21 | \item{noise}{a numeric value specifying a noise value for the precursor/peptide. Is only
22 | required if \code{method = "noise"} and \code{missingness = "MNAR"}.}
23 | 
24 | \item{mean}{a numeric value specifying the mean intensity value of the condition with missing
25 | values for a given precursor/peptide. Is only required if \code{missingness = "MAR"}.}
26 | 
27 | \item{sd}{a numeric value specifying the mean of the standard deviation of all conditions for
28 | a given precursor/peptide.}
29 | 
30 | \item{missingness}{a character value specifying the missingness type of the data determines
31 | how values for imputation are sampled. This can be \code{"MAR"} or \code{"MNAR"}.}
32 | 
33 | \item{method}{a character value specifying the method to be used for imputation. For
34 | \code{method = "ludovic"}, MNAR missingness is sampled around a value that is three lower
35 | (log2) than the lowest intensity value recorded for the precursor/peptide. For
36 | \code{method = "noise"}, MNAR missingness is sampled around the noise value for the
37 | precursor/peptide.}
38 | 
39 | \item{skip_log2_transform_error}{a logical value, if FALSE a check is performed to validate that
40 | input values are log2 transformed. If input values are > 40 the test is failed and an error is
41 | returned.}
42 | }
43 | \value{
44 | A value sampled from a normal distribution with the input parameters. Method specifics
45 | are applied to input parameters prior to sampling.
46 | }
47 | \description{
48 | \code{calculate_imputation} is a helper function that is used in the \code{impute} function.
49 | Depending on the type of missingness and method, it samples values from a normal distribution
50 | that can be used for the imputation. Note: The input intensities should be log2 transformed.
51 | }
52 | 


--------------------------------------------------------------------------------
/man/qc_data_completeness.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/qc_data_completeness.R
 3 | \name{qc_data_completeness}
 4 | \alias{qc_data_completeness}
 5 | \title{Data completeness}
 6 | \usage{
 7 | qc_data_completeness(
 8 |   data,
 9 |   sample,
10 |   grouping,
11 |   intensity,
12 |   digestion = NULL,
13 |   plot = TRUE,
14 |   interactive = FALSE
15 | )
16 | }
17 | \arguments{
18 | \item{data}{a data frame containing at least the input variables.}
19 | 
20 | \item{sample}{a character or factor column in the \code{data} data frame that contains the sample names.}
21 | 
22 | \item{grouping}{a character column in the \code{data} data frame that contains either precursor
23 | or peptide identifiers.}
24 | 
25 | \item{intensity}{a numeric column in the \code{data} data frame that contains any intensity
26 | intensity values that missingness should be determined for.}
27 | 
28 | \item{digestion}{optional, a character column in the \code{data} data frame that indicates the
29 | mode of digestion (limited proteolysis or tryptic digest). Alternatively, any other variable
30 | by which the data should be split can be provided.}
31 | 
32 | \item{plot}{a logical value that indicates whether the result should be plotted.}
33 | 
34 | \item{interactive}{a logical value that specifies whether the plot should be interactive
35 | (default is FALSE).}
36 | }
37 | \value{
38 | A bar plot that displays the percentage of data completeness over all samples.
39 | If \code{plot = FALSE} a data frame is returned. If \code{interactive = TRUE}, the plot is
40 | interactive.
41 | }
42 | \description{
43 | Calculates the percentage of data completeness. That means, what percentage of all detected
44 | precursors is present in each sample.
45 | }
46 | \examples{
47 | set.seed(123) # Makes example reproducible
48 | 
49 | # Create example data
50 | data <- create_synthetic_data(
51 |   n_proteins = 100,
52 |   frac_change = 0.05,
53 |   n_replicates = 3,
54 |   n_conditions = 2,
55 |   method = "effect_random"
56 | )
57 | 
58 | # Determine data completeness
59 | qc_data_completeness(
60 |   data = data,
61 |   sample = sample,
62 |   grouping = peptide,
63 |   intensity = peptide_intensity_missing,
64 |   plot = FALSE
65 | )
66 | 
67 | # Plot data completeness
68 | qc_data_completeness(
69 |   data = data,
70 |   sample = sample,
71 |   grouping = peptide,
72 |   intensity = peptide_intensity_missing,
73 |   plot = TRUE
74 | )
75 | }
76 | 


--------------------------------------------------------------------------------
/R/drc_4p.R:
--------------------------------------------------------------------------------
 1 | #' Dose response curve helper function
 2 | #'
 3 | #' This function peforms the four-parameter dose response curve fit. It is the helper function
 4 | #' for the fit in the \code{fit_drc_4p} function.
 5 | #'
 6 | #' @param data a data frame that contains at least the dose and response column the model should
 7 | #' be fitted to.
 8 | #' @param response a numeric column that contains the response values.
 9 | #' @param dose a numeric column that contains the dose values.
10 | #' @param log_logarithmic a logical value indicating if a logarithmic or log-logarithmic model is
11 | #' fitted. If response values form a symmetric curve for non-log transformed dose values, a
12 | #' logarithmic model instead of a log-logarithmic model should be used. Usually biological dose
13 | #' response data has a log-logarithmic distribution, which is the reason this is the default.
14 | #' Log-logarithmic models are symmetric if dose values are log transformed.
15 | #' @param pb progress bar object. This is only necessary if the function is used in an iteration.
16 | #'
17 | #' @return An object of class \code{drc}. If no fit was performed a character vector with content
18 | #' "no_fit".
19 | drc_4p <- function(data, response, dose, log_logarithmic = TRUE, pb = NULL) {
20 |   if (!requireNamespace("drc", quietly = TRUE)) {
21 |     message("Package \"drc\" is needed for this function to work. Please install it.", call. = FALSE)
22 |     return(invisible(NULL))
23 |   }
24 |   if (!is.null(pb)) pb$tick()
25 |   if (log_logarithmic == TRUE) {
26 |     result <- tryCatch(
27 |       {
28 |         suppressWarnings(drc::drm(
29 |           stats::as.formula(paste(ensym(response), "~", ensym(dose))),
30 |           data = data,
31 |           fct = drc::LL.4(names = c("hill", "min_value", "max_value", "ec_50")),
32 |           control = drc::drmc(otrace = TRUE)
33 |         ))
34 |       },
35 |       error = function(error) {
36 |         c("no_fit")
37 |       }
38 |     )
39 |     return(result)
40 |   }
41 |   if (log_logarithmic == FALSE) {
42 |     result <- tryCatch(
43 |       {
44 |         suppressWarnings(drc::drm(
45 |           stats::as.formula(paste(ensym(response), "~", ensym(dose))),
46 |           data = data,
47 |           fct = drc::L.4(names = c("hill", "min_value", "max_value", "ec_50")),
48 |           control = drc::drmc(otrace = TRUE)
49 |         ))
50 |       },
51 |       error = function(error) {
52 |         c("no_fit")
53 |       }
54 |     )
55 |     return(result)
56 |   }
57 | }
58 | 


--------------------------------------------------------------------------------
/R/anova_protti.R:
--------------------------------------------------------------------------------
 1 | #' Perform ANOVA
 2 | #'
 3 | #' Performs an ANOVA statistical test
 4 | #'
 5 | #' @param data a data frame containing at least the input variables.
 6 | #' @param grouping a character column in the \code{data} data frame that contains precursor or
 7 | #' peptide identifiers.
 8 | #' @param condition a character or numeric column in the \code{data} data frame that contains the
 9 | #' conditions.
10 | #' @param mean_ratio a numeric column in the \code{data} data frame that contains mean intensities
11 | #' or mean intensity ratios.
12 | #' @param sd a numeric column in the \code{data} data frame that contains the standard deviation
13 | #' corresponding to the mean.
14 | #' @param n a numeric column in the \code{data} data frame that contains the number of replicates
15 | #' for which the corresponding mean was calculated.
16 | #'
17 | #' @return a data frame that contains the within group error (\code{ms_group}) and the between
18 | #' group error (\code{ms_error}), f statistic and p-values.
19 | #' @import dplyr
20 | #' @export
21 | #'
22 | #' @examples
23 | #' data <- data.frame(
24 | #'   precursor = c("A", "A", "A", "B", "B", "B"),
25 | #'   condition = c("C1", "C2", "C3", "C1", "C2", "C3"),
26 | #'   mean = c(10, 12, 20, 11, 12, 8),
27 | #'   sd = c(2, 1, 1.5, 1, 2, 4),
28 | #'   n = c(4, 4, 4, 4, 4, 4)
29 | #' )
30 | #'
31 | #' anova_protti(
32 | #'   data,
33 | #'   grouping = precursor,
34 | #'   condition = condition,
35 | #'   mean = mean,
36 | #'   sd = sd,
37 | #'   n = n
38 | #' )
39 | anova_protti <- function(data, grouping, condition, mean_ratio, sd, n) {
40 |   result <- data %>%
41 |     dplyr::distinct({{ grouping }}, {{ condition }}, {{ mean_ratio }}, {{ sd }}, {{ n }}) %>%
42 |     dplyr::group_by({{ grouping }}) %>%
43 |     dplyr::filter({{ n }} != 0) %>%
44 |     dplyr::mutate(n_groups = dplyr::n_distinct(!!ensym(condition))) %>%
45 |     dplyr::mutate(grand_mean = mean({{ mean_ratio }})) %>%
46 |     dplyr::mutate(total_n = sum({{ n }})) %>%
47 |     dplyr::mutate(ms_group = sum(({{ mean_ratio }} - .data$grand_mean)^2 * {{ n }}) / (.data$n_groups - 1)) %>%
48 |     dplyr::mutate(ms_error = sum({{ sd }}^2 * ({{ n }} - 1)) / (.data$total_n - .data$n_groups)) %>%
49 |     dplyr::mutate(f = .data$ms_group / .data$ms_error) %>%
50 |     dplyr::mutate(pval = stats::pf(.data$f, .data$n_groups - 1, .data$total_n - .data$n_groups, lower.tail = FALSE)) %>%
51 |     dplyr::distinct({{ grouping }}, .data$ms_group, .data$ms_error, .data$f, .data$pval) %>%
52 |     dplyr::ungroup()
53 | 
54 |   result
55 | }
56 | 


--------------------------------------------------------------------------------
/man/fetch_alphafold_aligned_error.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fetch_alphafold_aligned_error.R
 3 | \name{fetch_alphafold_aligned_error}
 4 | \alias{fetch_alphafold_aligned_error}
 5 | \title{Fetch AlphaFold aligned error}
 6 | \usage{
 7 | fetch_alphafold_aligned_error(
 8 |   uniprot_ids = NULL,
 9 |   error_cutoff = 20,
10 |   timeout = 30,
11 |   max_tries = 1,
12 |   return_data_frame = FALSE,
13 |   show_progress = TRUE
14 | )
15 | }
16 | \arguments{
17 | \item{uniprot_ids}{a character vector of UniProt identifiers for which predictions
18 | should be fetched.}
19 | 
20 | \item{error_cutoff}{a numeric value specifying the maximum position error (in Angstroms) that should be retained.
21 | setting this value to a low number reduces the size of the retrieved data. Default is 20.}
22 | 
23 | \item{timeout}{a numeric value specifying the time in seconds until the download times out.
24 | The default is 30 seconds.}
25 | 
26 | \item{max_tries}{a numeric value that specifies the number of times the function tries to download
27 | the data in case an error occurs. The default is 1.}
28 | 
29 | \item{return_data_frame}{a logical value; if \code{TRUE} a data frame instead of a list
30 | is returned. It is recommended to only use this if information for few proteins is retrieved.
31 | Default is \code{FALSE}.}
32 | 
33 | \item{show_progress}{a logical value; if \code{TRUE} a progress bar will be shown.
34 | Default is \code{TRUE}.}
35 | }
36 | \value{
37 | A list that contains aligned errors for AlphaFold predictions. If return_data_frame is
38 | TRUE, a data frame with this information is returned instead. The data frame contains the
39 | following columns:
40 | \itemize{
41 | \item scored_residue: The error for this position is calculated based on the alignment to the
42 | aligned residue.
43 | \item aligned_residue: The residue that is aligned for the calculation of the error of the scored
44 | residue
45 | \item error: The predicted aligned error computed by alpha fold.
46 | \item accession: The UniProt protein identifier.
47 | }
48 | }
49 | \description{
50 | Fetches the aligned error for AlphaFold predictions for provided proteins.
51 | The aligned error is useful for assessing inter-domain accuracy. In detail it
52 | represents the expected position error at residue x (scored residue), when
53 | the predicted and true structures are aligned on residue y (aligned residue).
54 | }
55 | \examples{
56 | \donttest{
57 | aligned_error <- fetch_alphafold_aligned_error(
58 |   uniprot_ids = c("F4HVG8", "O15552"),
59 |   error_cutoff = 5,
60 |   return_data_frame = TRUE
61 | )
62 | 
63 | head(aligned_error, n = 10)
64 | }
65 | }
66 | 


--------------------------------------------------------------------------------
/man/qc_sample_correlation.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/qc_sample_correlation.R
 3 | \name{qc_sample_correlation}
 4 | \alias{qc_sample_correlation}
 5 | \title{Correlation based hirachical clustering of samples}
 6 | \usage{
 7 | qc_sample_correlation(
 8 |   data,
 9 |   sample,
10 |   grouping,
11 |   intensity_log2,
12 |   condition,
13 |   digestion = NULL,
14 |   run_order = NULL,
15 |   method = "spearman",
16 |   interactive = FALSE
17 | )
18 | }
19 | \arguments{
20 | \item{data}{a data frame that contains at least the input variables.}
21 | 
22 | \item{sample}{a character column in the \code{data} data frame that contains the sample names.}
23 | 
24 | \item{grouping}{a character column in the \code{data} data frame that contains precursor or
25 | peptide identifiers.}
26 | 
27 | \item{intensity_log2}{a numeric column in the \code{data} data frame that contains log2
28 | intensity values.}
29 | 
30 | \item{condition}{a character or numeric column in the \code{data} data frame that contains the
31 | conditions.}
32 | 
33 | \item{digestion}{optional, a character column in the \code{data} data frame that contains
34 | information about the digestion method used. e.g. "LiP" or "tryptic control".}
35 | 
36 | \item{run_order}{optional, a character or numeric column in the \code{data} data frame that
37 | contains the order in which samples were measured. Useful to investigate batch effects due to
38 | run order.}
39 | 
40 | \item{method}{a character value that specifies the method to be used for correlation.
41 | \code{"spearman"} is the default but can be changed to \code{"pearson"} or \code{"kendall"}.}
42 | 
43 | \item{interactive}{a logical value that specifies whether the plot should be interactive.
44 | Determines if an interactive or static heatmap should be created using \code{heatmaply} or
45 | \code{pheatmap}, respectively.}
46 | }
47 | \value{
48 | A correlation heatmap that compares each sample. The dendrogram is sorted by optimal
49 | leaf ordering.
50 | }
51 | \description{
52 | A correlation heatmap is created that uses hirachical clustering to determine sample similarity.
53 | }
54 | \examples{
55 | \donttest{
56 | set.seed(123) # Makes example reproducible
57 | 
58 | # Create example data
59 | data <- create_synthetic_data(
60 |   n_proteins = 100,
61 |   frac_change = 0.05,
62 |   n_replicates = 3,
63 |   n_conditions = 2,
64 |   method = "effect_random"
65 | )
66 | 
67 | # Create sample correlation heatmap
68 | qc_sample_correlation(
69 |   data = data,
70 |   sample = sample,
71 |   grouping = peptide,
72 |   intensity_log2 = peptide_intensity_missing,
73 |   condition = condition
74 | )
75 | }
76 | }
77 | 


--------------------------------------------------------------------------------
/man/randomise_queue.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/randomise_queue.R
 3 | \name{randomise_queue}
 4 | \alias{randomise_queue}
 5 | \title{Randomise samples in MS queue}
 6 | \usage{
 7 | randomise_queue(data = NULL, rows = NULL, export = FALSE)
 8 | }
 9 | \arguments{
10 | \item{data}{optional, a data frame that contains a queue. If not provided a queue file can be
11 | chosen interactively.}
12 | 
13 | \item{rows}{optional, a numeric vector that specifies a range of rows in for which samples
14 | should be randomized.}
15 | 
16 | \item{export}{a logical value that determines if a \code{"randomised_queue.csv"} file will be
17 | saved in the working directory. If FALSE a data frame will be returned.}
18 | }
19 | \value{
20 | If \code{export = TRUE} a \code{"randomised_queue.csv"} file will be saved in the
21 | working directory. If \code{export = FALSE} a data frame that contains the randomised queue
22 | is returned.
23 | }
24 | \description{
25 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
26 | This function randomises the order of samples in an MS queue. QC and Blank samples are left in
27 | place. It is also possible to randomise only parts of the queue. Before running this make sure
28 | to set a specific seed with the \code{set.seed()} function. This ensures that the randomisation
29 | of the result is consistent if the function is run again.
30 | }
31 | \examples{
32 | queue <- create_queue(
33 |   date = c("200722"),
34 |   instrument = c("EX1"),
35 |   user = c("jquast"),
36 |   measurement_type = c("DIA"),
37 |   experiment_name = c("JPQ031"),
38 |   digestion = c("LiP", "tryptic control"),
39 |   treatment_type_1 = c("EDTA", "H2O"),
40 |   treatment_type_2 = c("Zeba", "unfiltered"),
41 |   treatment_dose_1 = c(10, 30, 60),
42 |   treatment_unit_1 = c("min"),
43 |   n_replicates = 4,
44 |   number_runs = FALSE,
45 |   organism = c("E. coli"),
46 |   exclude_combinations = list(list(
47 |     treatment_type_1 = c("H2O"),
48 |     treatment_type_2 = c("Zeba", "unfiltered"),
49 |     treatment_dose_1 = c(10, 30)
50 |   )),
51 |   inj_vol = c(2),
52 |   data_path = "D:\\\\2007_Data",
53 |   method_path = "C:\\\\Xcalibur\\\\methods\\\\DIA_120min",
54 |   position_row = c("A", "B", "C", "D", "E", "F"),
55 |   position_column = 8,
56 |   blank_every_n = 4,
57 |   blank_position = "1-V1",
58 |   blank_method_path = "C:\\\\Xcalibur\\\\methods\\\\blank"
59 | )
60 | 
61 | head(queue, n = 20)
62 | 
63 | randomised_queue <- randomise_queue(
64 |   data = queue,
65 |   export = FALSE
66 | )
67 | 
68 | head(randomised_queue, n = 20)
69 | }
70 | 


--------------------------------------------------------------------------------
/man/qc_cvs.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/qc_cvs.R
 3 | \name{qc_cvs}
 4 | \alias{qc_cvs}
 5 | \title{Check CV distribution}
 6 | \usage{
 7 | qc_cvs(
 8 |   data,
 9 |   grouping,
10 |   condition,
11 |   intensity,
12 |   plot = TRUE,
13 |   plot_style = "density",
14 |   max_cv = 200
15 | )
16 | }
17 | \arguments{
18 | \item{data}{a data frame containing at least peptide, precursor or protein identifiers,
19 | information on conditions and intensity values for each peptide, precursor or protein.}
20 | 
21 | \item{grouping}{a character column in the \code{data} data frame that contains the grouping
22 | variables (e.g. peptides, precursors or proteins).}
23 | 
24 | \item{condition}{a character or factor column in the \code{data} data frame that contains condition information
25 | (e.g. "treated" and "control").}
26 | 
27 | \item{intensity}{a numeric column in the \code{data} data frame that contains the corresponding
28 | raw or untransformed normalised intensity values for each peptide or precursor.}
29 | 
30 | \item{plot}{a logical value that indicates whether the result should be plotted.}
31 | 
32 | \item{plot_style}{a character value that indicates the plotting style. \code{plot_style = "boxplot"}
33 | plots a boxplot, whereas \code{plot_style = "density"} plots the CV density distribution.
34 | \code{plot_style = "violin"} returns a violin plot. Default is \code{plot_style = "density"}.}
35 | 
36 | \item{max_cv}{a numeric value that specifies the maximum percentage of CVs that should be included
37 | in the returned plot. The default value is \code{max_cv = 200}.}
38 | }
39 | \value{
40 | Either a data frame with the median CVs in \% or a plot showing the distribution of the CVs
41 | is returned.
42 | }
43 | \description{
44 | Calculates and plots the coefficients of variation for the selected grouping.
45 | }
46 | \examples{
47 | # Load libraries
48 | library(dplyr)
49 | 
50 | set.seed(123) # Makes example reproducible
51 | 
52 | # Create example data
53 | data <- create_synthetic_data(
54 |   n_proteins = 100,
55 |   frac_change = 0.05,
56 |   n_replicates = 3,
57 |   n_conditions = 2,
58 |   method = "effect_random"
59 | ) \%>\%
60 |   mutate(intensity_non_log2 = 2^peptide_intensity_missing)
61 | 
62 | # Calculate coefficients of variation
63 | qc_cvs(
64 |   data = data,
65 |   grouping = peptide,
66 |   condition = condition,
67 |   intensity = intensity_non_log2,
68 |   plot = FALSE
69 | )
70 | 
71 | # Plot coefficients of variation
72 | # Different plot styles are available
73 | qc_cvs(
74 |   data = data,
75 |   grouping = peptide,
76 |   condition = condition,
77 |   intensity = intensity_non_log2,
78 |   plot = TRUE,
79 |   plot_style = "violin"
80 | )
81 | }
82 | 


--------------------------------------------------------------------------------
/man/fetch_eco.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fetch_eco.R
 3 | \name{fetch_eco}
 4 | \alias{fetch_eco}
 5 | \title{Fetch evidence & conclusion ontology}
 6 | \usage{
 7 | fetch_eco(
 8 |   return_relation = FALSE,
 9 |   return_history = FALSE,
10 |   show_progress = TRUE
11 | )
12 | }
13 | \arguments{
14 | \item{return_relation}{a logical value that indicates if relational information should be returned instead
15 | the main descriptive information. This data can be used to check the relations of ECO terms to each other.
16 | Default is FALSE.}
17 | 
18 | \item{return_history}{a logical value that indicates if the entry history of an ECO term should be
19 | returned instead the main descriptive information.
20 | Default is FALSE.}
21 | 
22 | \item{show_progress}{a logical value that indicates if a progress bar will be shown.
23 | Default is TRUE.}
24 | }
25 | \value{
26 | A data frame that contains descriptive information about each ECO term in the EBI database.
27 | If either \code{return_relation} or \code{return_history} is set to \code{TRUE}, the respective information is
28 | returned instead of the usual output.
29 | }
30 | \description{
31 | Fetches all evidence & conclusion ontology (ECO) information from the QuickGO EBI database. The ECO project is
32 | maintained through a public \href{https://github.com/evidenceontology/evidenceontology}{GitHub repository}.
33 | }
34 | \details{
35 | According to the GitHub repository ECO is defined as follows:
36 | 
37 | "The Evidence & Conclusion Ontology (ECO) describes types of scientific evidence within the
38 | biological research domain that arise from laboratory experiments, computational methods,
39 | literature curation, or other means. Researchers use evidence to support conclusions
40 | that arise out of scientific research. Documenting evidence during scientific research
41 | is essential, because evidence gives us a sense of why we believe what we think we know.
42 | Conclusions are asserted as statements about things that are believed to be true, for
43 | example that a protein has a particular function (i.e. a protein functional annotation) or
44 | that a disease is associated with a particular gene variant (i.e. a phenotype-gene association).
45 | A systematic and structured (i.e. ontological) classification of evidence allows us to store,
46 | retreive, share, and compare data associated with that evidence using computers, which are
47 | essential to navigating the ever-growing (in size and complexity) corpus of scientific
48 | information."
49 | 
50 | More information can be found in their publication (\doi{10.1093/nar/gky1036}).
51 | }
52 | \examples{
53 | \donttest{
54 | eco <- fetch_eco()
55 | 
56 | head(eco)
57 | }
58 | }
59 | 


--------------------------------------------------------------------------------
/man/filter_cv.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/filter_cv.R
 3 | \name{filter_cv}
 4 | \alias{filter_cv}
 5 | \title{Data filtering based on coefficients of variation (CV)}
 6 | \usage{
 7 | filter_cv(
 8 |   data,
 9 |   grouping,
10 |   condition,
11 |   log2_intensity,
12 |   cv_limit = 0.25,
13 |   min_conditions,
14 |   silent = FALSE
15 | )
16 | }
17 | \arguments{
18 | \item{data}{a data frame that contains at least the input variables.}
19 | 
20 | \item{grouping}{a character column in the \code{data} data frame that contains the grouping
21 | variable that can be either precursors, peptides or proteins.}
22 | 
23 | \item{condition}{a character or numeric column in the \code{data} data frame that contains
24 | information on the sample condition.}
25 | 
26 | \item{log2_intensity}{a numeric column in the \code{data} data frame that contains log2
27 | transformed intensities.}
28 | 
29 | \item{cv_limit}{optional, a numeric value that specifies the CV cutoff that will be applied.
30 | Default is 0.25.}
31 | 
32 | \item{min_conditions}{a numeric value that specifies the minimum number of conditions for
33 | which grouping CVs should be below the cutoff.}
34 | 
35 | \item{silent}{a logical value that specifies if a message with the number of filtered out
36 | conditions should be returned. Default is FALSE.}
37 | }
38 | \value{
39 | The CV filtered data frame.
40 | }
41 | \description{
42 | Filters the input data based on precursor, peptide or protein intensity coefficients of variation.
43 | The function should be used to ensure that only robust measurements and quantifications are used for
44 | data analysis. It is advised to use the function after inspection of raw values (quality control)
45 | and median normalisation. Generally, the function calculates CVs of each peptide, precursor or
46 | protein for each condition and removes peptides, precursors or proteins that have a CV above
47 | the cutoff in less than the (user-defined) required number of conditions. Since the user-defined
48 | cutoff is fixed and does not depend on the number of conditions that have detected values, the
49 | function might bias for data completeness.
50 | }
51 | \examples{
52 | set.seed(123) # Makes example reproducible
53 | 
54 | # Create synthetic data
55 | data <- create_synthetic_data(
56 |   n_proteins = 50,
57 |   frac_change = 0.05,
58 |   n_replicates = 3,
59 |   n_conditions = 2,
60 |   method = "effect_random",
61 |   additional_metadata = FALSE
62 | )
63 | 
64 | # Filter coefficients of variation
65 | data_filtered <- filter_cv(
66 |   data = data,
67 |   grouping = peptide,
68 |   condition = condition,
69 |   log2_intensity = peptide_intensity_missing,
70 |   cv_limit = 0.25,
71 |   min_conditions = 2
72 | )
73 | }
74 | 


--------------------------------------------------------------------------------
/R/fetch_kegg.R:
--------------------------------------------------------------------------------
 1 | #' Fetch KEGG pathway data from KEGG
 2 | #'
 3 | #' Fetches gene IDs and corresponding pathway IDs and names for the provided organism.
 4 | #'
 5 | #' @param species a character value providing an abreviated species name. "hsa" for human, "eco"
 6 | #' for E. coli and "sce" for S. cerevisiae. Additional possible names can be found for
 7 | #' \href{https://www.genome.jp/kegg-bin/show_organism?category=Eukaryotes}{eukaryotes} and for
 8 | #' \href{https://www.genome.jp/kegg-bin/show_organism?category=Prokaryotes}{prokaryotes}.
 9 | #'
10 | #' @return A data frame that contains gene IDs with corresponding pathway IDs and names for a
11 | #' selected organism.
12 | #' @importFrom dplyr left_join
13 | #' @importFrom stringr str_replace_all
14 | #' @importFrom magrittr %>%
15 | #' @importFrom curl has_internet
16 | #' @export
17 | #'
18 | #' @examples
19 | #' \donttest{
20 | #' kegg <- fetch_kegg(species = "hsa")
21 | #'
22 | #' head(kegg)
23 | #' }
24 | fetch_kegg <- function(species) {
25 |   if (!curl::has_internet()) {
26 |     message("No internet connection.")
27 |     return(invisible(NULL))
28 |   }
29 |   # download kegg_id pathway link
30 |   url_link <- paste("https://rest.kegg.jp/link/pathway", species, sep = "/")
31 |   result_link <- try_query(url_link, col_names = FALSE, progress = FALSE, show_col_types = FALSE)
32 |   if (methods::is(result_link, "character")) {
33 |     message(result_link)
34 |     return(invisible(NULL))
35 |   }
36 |   colnames(result_link) <- c("kegg_id", "pathway_id")
37 |   result_link$pathway_id <- stringr::str_replace_all(result_link$pathway_id,
38 |     pattern = "path:",
39 |     replacement = ""
40 |   )
41 |   # download pathway_id names
42 |   url_name <- paste("https://rest.kegg.jp/list/pathway", species, sep = "/")
43 |   result_name <- try_query(url_name, col_names = FALSE, progress = FALSE, show_col_types = FALSE)
44 |   if (methods::is(result_name, "character")) {
45 |     message(result_name)
46 |     return(invisible(NULL))
47 |   }
48 |   colnames(result_name) <- c("pathway_id", "pathway_name")
49 | 
50 |   # download kegg_id to uniprot_id conversion
51 |   url_conv <- paste("https://rest.kegg.jp/conv/uniprot", species, sep = "/")
52 |   result_conv <- try_query(url_conv, col_names = FALSE, progress = FALSE, show_col_types = FALSE)
53 |   if (methods::is(result_conv, "character")) {
54 |     message(result_conv)
55 |     return(invisible(NULL))
56 |   }
57 |   colnames(result_conv) <- c("kegg_id", "uniprot_id")
58 |   result_conv$uniprot_id <- stringr::str_replace_all(result_conv$uniprot_id,
59 |     pattern = "up:",
60 |     replacement = ""
61 |   )
62 |   # combine datasets
63 |   result <- result_link %>%
64 |     dplyr::left_join(result_name, by = "pathway_id") %>%
65 |     dplyr::left_join(result_conv, by = "kegg_id", relationship = "many-to-many")
66 |   result
67 | }
68 | 


--------------------------------------------------------------------------------
/man/qc_ranked_intensities.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/qc_ranked_intensities.R
 3 | \name{qc_ranked_intensities}
 4 | \alias{qc_ranked_intensities}
 5 | \title{Check ranked intensities}
 6 | \usage{
 7 | qc_ranked_intensities(
 8 |   data,
 9 |   sample,
10 |   grouping,
11 |   intensity_log2,
12 |   facet = FALSE,
13 |   plot = FALSE,
14 |   y_axis_transformation = "log10",
15 |   interactive = FALSE
16 | )
17 | }
18 | \arguments{
19 | \item{data}{a data frame that contains at least sample names, grouping identifiers (precursor,
20 | peptide or protein) and log2 transformed intensities for each grouping identifier.}
21 | 
22 | \item{sample}{a character column in the \code{data} data frame that contains the sample names.}
23 | 
24 | \item{grouping}{a character column in the \code{data} data frame that contains protein, precursor,
25 | or peptide identifiers.}
26 | 
27 | \item{intensity_log2}{a numeric column in the \code{data} data frame that contains the log2
28 | transformed intensities of the selected grouping variable.}
29 | 
30 | \item{facet}{a logical value that specifies whether the calculation should be done group wise by
31 | sample and if the resulting plot should be faceted by sample. (default is \code{FALSE}).
32 | If \code{facet = FALSE} the median of each protein intensity will be returned.}
33 | 
34 | \item{plot}{a logical value that specifies whether the result should be plotted (default is \code{FALSE}).}
35 | 
36 | \item{y_axis_transformation}{a character value that determines that y-axis transformation. The
37 | value is either "log2" or "log10" (default is "log10").}
38 | 
39 | \item{interactive}{a logical value that specifies whether the plot should be interactive
40 | (default is \code{FALSE}).}
41 | }
42 | \value{
43 | A data frame containing the ranked intensities is returned. If \code{plot = TRUE} a plot
44 | is returned. The intensities are log10 transformed for the plot.
45 | }
46 | \description{
47 | Calculates and plots ranked intensities for proteins, peptides or precursors.
48 | }
49 | \examples{
50 | set.seed(123) # Makes example reproducible
51 | 
52 | # Create synthetic data
53 | data <- create_synthetic_data(
54 |   n_proteins = 50,
55 |   frac_change = 0.05,
56 |   n_replicates = 4,
57 |   n_conditions = 3,
58 |   method = "effect_random",
59 |   additional_metadata = FALSE
60 | )
61 | 
62 | # Plot ranked intensities for all samples combined
63 | qc_ranked_intensities(
64 |   data = data,
65 |   sample = sample,
66 |   grouping = peptide,
67 |   intensity_log2 = peptide_intensity,
68 |   plot = TRUE,
69 | )
70 | 
71 | # Plot ranked intensities for each sample separately
72 | qc_ranked_intensities(
73 |   data = data,
74 |   sample = sample,
75 |   grouping = peptide,
76 |   intensity_log2 = peptide_intensity,
77 |   plot = TRUE,
78 |   facet = TRUE
79 | )
80 | 
81 | }
82 | 


--------------------------------------------------------------------------------
/R/find_all_subs.R:
--------------------------------------------------------------------------------
 1 | #' Find all sub IDs of an ID in a network
 2 | #'
 3 | #' For a given ID, find all sub IDs and their sub IDs etc. The type of
 4 | #' relationship can be selected too. This is a helper function for other functions.
 5 | #'
 6 | #' @param data a data frame that contains relational information on IDs (main_id) their sub
 7 | #' IDs (sub_id) and their relationship (type). For ChEBI this data frame can be obtained by calling
 8 | #' \code{fetch_chebi(relation = TRUE)}. For ECO data it can be obtained by calling fetch_eco(relation = TRUE).
 9 | #' @param ids a character vector of IDs for which sub IDs should be searched.
10 | #' @param main_id a character or integer column containing IDs. Default is \code{id} for ChEBI IDs.
11 | #' @param type a character column that contains the type of interactions. Default is \code{type} for ChEBI IDs.
12 | #' @param accepted_types a character vector containing the accepted_types of relationships that should be considered
13 | #' for the search. It is possible to use "all" relationships. The default type is "is_a". A list of
14 | #' possible relationships for e.g. ChEBI IDs can be found
15 | #' \href{https://docs.google.com/document/d/1_w-DwBdCCOh1gMeeP6yqGzcnkpbHYOa3AGSODe5epcg/edit#heading=h.hnsqoqu978s5}{here}.
16 | #' @param exclude_parent_id a logical value that specifies if the parent ID should be included in
17 | #' the returned list.
18 | #'
19 | #' @return A list of character vectors containing the provided ID and all of its sub IDs. It
20 | #' contains one element per input ID.
21 | #' @importFrom dplyr select filter pull
22 | #' @importFrom magrittr %>%
23 | #' @importFrom purrr map
24 | #' @importFrom rlang .data
25 | find_all_subs <- function(data,
26 |                           ids,
27 |                           main_id = id,
28 |                           type = type,
29 |                           accepted_types = "is_a",
30 |                           exclude_parent_id = FALSE) {
31 |   if (!requireNamespace("igraph", quietly = TRUE)) {
32 |     message("Package \"igraph\" is needed for this function to work. Please install it.", call. = FALSE)
33 |     return(invisible(NULL))
34 |   }
35 |   if (ifelse(length(accepted_types) == 1, accepted_types == "all", FALSE)) {
36 |     data <- data %>%
37 |       dplyr::select(-{{ type }})
38 |   } else {
39 |     data <- data %>%
40 |       dplyr::filter({{ type }} %in% accepted_types) %>%
41 |       dplyr::select(-{{ type }})
42 |   }
43 |   # Generate graph
44 |   g <- igraph::graph_from_data_frame(data, directed = TRUE)
45 | 
46 |   result <- purrr::map(ids, function(x) {
47 |     if (!(x %in% dplyr::pull(data, {{ main_id }}))) {
48 |       return(NULL)
49 |     }
50 |     r <- igraph::subcomponent(g, match(x, igraph::V(g)$name), "out")$name
51 |     if (exclude_parent_id) {
52 |       r <- r[r != x]
53 |     }
54 | 
55 |     r
56 |   })
57 |   result
58 | }
59 | 


--------------------------------------------------------------------------------
/man/calculate_aa_scores.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/calculate_aa_scores.R
 3 | \name{calculate_aa_scores}
 4 | \alias{calculate_aa_scores}
 5 | \title{Calculate scores for each amino acid position in a protein sequence}
 6 | \usage{
 7 | calculate_aa_scores(
 8 |   data,
 9 |   protein,
10 |   diff = diff,
11 |   adj_pval = adj_pval,
12 |   start_position,
13 |   end_position,
14 |   retain_columns = NULL
15 | )
16 | }
17 | \arguments{
18 | \item{data}{a data frame containing at least the input columns.}
19 | 
20 | \item{protein}{a character column in the data frame containing the protein identifier or name.}
21 | 
22 | \item{diff}{a numeric column in the \code{data} data frame containing the log2 fold change.}
23 | 
24 | \item{adj_pval}{a numeric column in the \code{data} data frame containing the adjusted p-value.}
25 | 
26 | \item{start_position}{a numeric column \code{data} in the data frame containing the start position
27 | of a peptide or precursor.}
28 | 
29 | \item{end_position}{a numeric column in the data frame containing the end position of a peptide or
30 | precursor.}
31 | 
32 | \item{retain_columns}{a vector indicating if certain columns should be retained from the input
33 | data frame. Default is not retaining additional columns \code{retain_columns = NULL}. Specific
34 | columns can be retained by providing their names (not in quotations marks, just like other
35 | column names, but in a vector).}
36 | }
37 | \value{
38 | A data frame that contains the aggregated scores per amino acid position, enabling to
39 | draw fingerprints for each individual protein.
40 | }
41 | \description{
42 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
43 | Calculate a score for each amino acid position in a protein sequence based on the product of the
44 | -log10(adjusted p-value) and the absolute log2(fold change) per peptide covering this amino acid. In detail, all the
45 | peptides are aligned along the sequence of the corresponding protein, and the average score per
46 | amino acid position is computed. In a limited proteolysis coupled to mass spectrometry (LiP-MS)
47 | experiment, the score allows to prioritize and narrow down structurally affected regions.
48 | }
49 | \examples{
50 | 
51 | data <- data.frame(
52 |   pg_protein_accessions = c(rep("protein_1", 10)),
53 |   diff = c(2, -3, 1, 2, 3, -3, 5, 1, -0.5, 2),
54 |   adj_pval = c(0.001, 0.01, 0.2, 0.05, 0.002, 0.5, 0.4, 0.7, 0.001, 0.02),
55 |   start = c(1, 3, 5, 10, 15, 25, 28, 30, 41, 51),
56 |   end = c(6, 8, 10, 16, 23, 35, 35, 35, 48, 55)
57 | )
58 | calculate_aa_scores(
59 |   data,
60 |   protein = pg_protein_accessions,
61 |   diff = diff,
62 |   adj_pval = adj_pval,
63 |   start_position = start,
64 |   end_position = end
65 | )
66 | }
67 | \author{
68 | Patrick Stalder
69 | }
70 | 


--------------------------------------------------------------------------------
/man/qc_peak_width.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/qc_peak_width.R
 3 | \name{qc_peak_width}
 4 | \alias{qc_peak_width}
 5 | \title{Peak width over retention time}
 6 | \usage{
 7 | qc_peak_width(
 8 |   data,
 9 |   sample,
10 |   intensity,
11 |   retention_time,
12 |   peak_width = NULL,
13 |   retention_time_start = NULL,
14 |   retention_time_end = NULL,
15 |   remove_na_intensities = TRUE,
16 |   interactive = FALSE
17 | )
18 | }
19 | \arguments{
20 | \item{data}{a data frame containing at least sample names and protein IDs.}
21 | 
22 | \item{sample}{a character column in the \code{data} data frame that contains the sample names.}
23 | 
24 | \item{intensity}{a numeric column in the \code{data} data frame that contains intensities. If
25 | \code{remove_na_intensities = FALSE}, this argument is not required.}
26 | 
27 | \item{retention_time}{a numeric column in the \code{data} data frame that contains retention
28 | times of precursors.}
29 | 
30 | \item{peak_width}{a numeric column in the \code{data} data frame that contains peak width
31 | information. It is not required if \code{retention_time_start} and \code{retention_time_end}
32 | columns are provided.}
33 | 
34 | \item{retention_time_start}{a numeric column in the \code{data} data frame that contains the
35 | start time of the precursor elution peak. It is not required if the \code{peak_width} column
36 | is provided.}
37 | 
38 | \item{retention_time_end}{a numeric column in the \code{data} data frame that contains the end
39 | time of the precursor elution peak. It is not required if the \code{peak_width} column is
40 | provided.}
41 | 
42 | \item{remove_na_intensities}{a logical value that specifies if sample/grouping combinations
43 | with intensities that are NA (not quantified IDs) should be dropped from the data frame.
44 | Default is TRUE since we are usually interested in the peak width of quantifiable data.}
45 | 
46 | \item{interactive}{a logical value that specifies whether the plot should be interactive
47 | (default is FALSE).}
48 | }
49 | \value{
50 | A line plot displaying one minute binned median precursor elution peak width over
51 | retention time for each sample.
52 | }
53 | \description{
54 | Plots one minute binned median precursor elution peak width over retention time for each sample.
55 | }
56 | \examples{
57 | 
58 | data <- data.frame(
59 |   r_file_name = c(rep("sample_1", 10), rep("sample2", 10)),
60 |   fg_quantity = c(rep(2000, 20)),
61 |   eg_mean_apex_rt = c(rep(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), 2)),
62 |   eg_start_rt = c(0.5, 1, 3, 4, 5, 6, 7, 7.5, 8, 9, 1, 2, 2, 3, 4, 5, 5, 8, 9, 9),
63 |   eg_end_rt = c(
64 |     1.5, 2, 3.1, 4.5, 5.8, 6.6, 8, 8, 8.4,
65 |     9.1, 3, 2.2, 4, 3.4, 4.5, 5.5, 5.6, 8.3, 10, 12
66 |   )
67 | )
68 | qc_peak_width(
69 |   data,
70 |   sample = r_file_name,
71 |   intensity = fg_quantity,
72 |   retention_time = eg_mean_apex_rt,
73 |   retention_time_start = eg_start_rt,
74 |   retention_time_end = eg_end_rt
75 | )
76 | }
77 | 


--------------------------------------------------------------------------------
/man/qc_ids.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/qc_ids.R
 3 | \name{qc_ids}
 4 | \alias{qc_ids}
 5 | \title{Check number of precursor, peptide or protein IDs}
 6 | \usage{
 7 | qc_ids(
 8 |   data,
 9 |   sample,
10 |   grouping,
11 |   intensity,
12 |   remove_na_intensities = TRUE,
13 |   condition = NULL,
14 |   title = "ID count per sample",
15 |   plot = TRUE,
16 |   interactive = FALSE
17 | )
18 | }
19 | \arguments{
20 | \item{data}{a data frame containing at least sample names and precursor/peptide/protein IDs.}
21 | 
22 | \item{sample}{a character or factor column in the \code{data} data frame that contains the sample name.}
23 | 
24 | \item{grouping}{a character column in the \code{data} data frame that contains either precursor or
25 | peptide identifiers.}
26 | 
27 | \item{intensity}{a character column in the \code{data} data frame that contains raw or log2
28 | transformed intensities. If \code{remove_na_intensities = FALSE}, this argument is optional.}
29 | 
30 | \item{remove_na_intensities}{a logical value that specifies if sample/grouping combinations with
31 | intensities that are NA (not quantified IDs) should be dropped from the data frame. Default is
32 | TRUE since we are usually interested in the number of quantifiable IDs.}
33 | 
34 | \item{condition}{optional, a column in the \code{data} data frame that contains condition information
35 | (e.g. "treated" and "control"). If this column is provided, the bars in the plot will be coloured
36 | according to the condition.}
37 | 
38 | \item{title}{optional, a character value that specifies the plot title (default is "ID count
39 | per sample").}
40 | 
41 | \item{plot}{a logical value that indicates whether the result should be plotted.}
42 | 
43 | \item{interactive}{a logical value that specifies whether the plot should be interactive
44 | (default is FALSE).}
45 | }
46 | \value{
47 | A bar plot with the height corresponding to the number of IDs, each bar represents one
48 | sample (if \code{plot = TRUE}). If \code{plot = FALSE} a table with ID counts is returned.
49 | }
50 | \description{
51 | Returns a plot or table of the number of IDs for each sample. The default settings remove
52 | grouping variables without quantitative information (intensity is NA). These will not be
53 | counted as IDs.
54 | }
55 | \examples{
56 | set.seed(123) # Makes example reproducible
57 | 
58 | # Create example data
59 | data <- create_synthetic_data(
60 |   n_proteins = 100,
61 |   frac_change = 0.05,
62 |   n_replicates = 3,
63 |   n_conditions = 2,
64 |   method = "effect_random"
65 | )
66 | 
67 | # Calculate number of identifications
68 | qc_ids(
69 |   data = data,
70 |   sample = sample,
71 |   grouping = peptide,
72 |   intensity = peptide_intensity_missing,
73 |   condition = condition,
74 |   plot = FALSE
75 | )
76 | 
77 | # Plot number of identifications
78 | qc_ids(
79 |   data = data,
80 |   sample = sample,
81 |   grouping = peptide,
82 |   intensity = peptide_intensity_missing,
83 |   condition = condition,
84 |   plot = TRUE
85 | )
86 | }
87 | 


--------------------------------------------------------------------------------
/man/qc_pca.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/qc_pca.R
 3 | \name{qc_pca}
 4 | \alias{qc_pca}
 5 | \title{Plot principal component analysis}
 6 | \usage{
 7 | qc_pca(
 8 |   data,
 9 |   sample,
10 |   grouping,
11 |   intensity,
12 |   condition,
13 |   components = c("PC1", "PC2"),
14 |   digestion = NULL,
15 |   plot_style = "pca"
16 | )
17 | }
18 | \arguments{
19 | \item{data}{a data frame that contains sample names, peptide or precursor identifiers,
20 | corresponding intensities and a condition column indicating e.g. the treatment.}
21 | 
22 | \item{sample}{a character column in the \code{data} data frame that contains the sample name.}
23 | 
24 | \item{grouping}{a character column in the \code{data} data frame that contains either precursor
25 | or peptide identifiers.}
26 | 
27 | \item{intensity}{a numeric column in the \code{data} data frame that contains the corresponding
28 | intensity values for each peptide or precursor.}
29 | 
30 | \item{condition}{a numeric or character column in the \code{data} data frame that contains condition information
31 | (e.g. "treated" and "control").}
32 | 
33 | \item{components}{a character vector indicating the two components that should be displayed in
34 | the plot. By default these are PC1 and PC2. You can provide these using a character vector of
35 | the form c("PC1", "PC2").}
36 | 
37 | \item{digestion}{optional, a character column in the \code{data} data frame that indicates the
38 | mode of digestion (limited proteolysis or tryptic digest). Alternatively, any other variable
39 | by which the data should be split can be provided.}
40 | 
41 | \item{plot_style}{a character value that specifies what plot should be returned. If
42 | \code{plot_style = "pca"} is selected the two PCA components supplied with the \code{components} argument
43 | are plottet against each other. This is the default. \code{plot_style = "scree"} returns a scree
44 | plot that displays the variance explained by each principal component in percent. The scree is
45 | useful for checking if any other than the default first two components should be plotted.}
46 | }
47 | \value{
48 | A principal component analysis plot showing PC1 and PC2. If \code{plot_style = "scree"}, a
49 | scree plot for all dimensions is returned.
50 | }
51 | \description{
52 | Plots a principal component analysis based on peptide or precursor intensities.
53 | }
54 | \examples{
55 | set.seed(123) # Makes example reproducible
56 | 
57 | # Create example data
58 | data <- create_synthetic_data(
59 |   n_proteins = 100,
60 |   frac_change = 0.05,
61 |   n_replicates = 3,
62 |   n_conditions = 2,
63 | )
64 | 
65 | # Plot scree plot
66 | qc_pca(
67 |   data = data,
68 |   sample = sample,
69 |   grouping = peptide,
70 |   intensity = peptide_intensity_missing,
71 |   condition = condition,
72 |   plot_style = "scree"
73 | )
74 | 
75 | # Plot principal components
76 | qc_pca(
77 |   data = data,
78 |   sample = sample,
79 |   grouping = peptide,
80 |   intensity = peptide_intensity_missing,
81 |   condition = condition
82 | )
83 | }
84 | 


--------------------------------------------------------------------------------
/R/normalise.R:
--------------------------------------------------------------------------------
 1 | #' Intensity normalisation
 2 | #'
 3 | #' `r lifecycle::badge('deprecated')`
 4 | #' This function was deprecated due to its name changing to `normalise()`.
 5 | #' The normalisation method in the new function needs to be provided as an argument.
 6 | #'
 7 | #' @return A data frame with a column called \code{normalised_intensity_log2} containing the
 8 | #' normalised intensity values.
 9 | #' @keywords internal
10 | #' @export
11 | median_normalisation <- function(...) {
12 |   # This function has been renamed and is therefore deprecated.
13 |   lifecycle::deprecate_warn("0.2.0",
14 |     "median_normalisation()",
15 |     "normalise()",
16 |     details = "This function has been renamed."
17 |   )
18 | 
19 |   normalise(...)
20 | }
21 | #' Intensity normalisation
22 | #'
23 | #' Performs normalisation on intensities. For median normalisation the normalised intensity is the
24 | #' original intensity minus the run median plus the global median. This is also the way it is
25 | #' implemented in the Spectronaut search engine.
26 | #'
27 | #' @param data a data frame containing at least sample names and intensity values. Please note that if the
28 | #' data frame is grouped, the normalisation will be computed by group.
29 | #' @param sample a character column in the \code{data} data frame that contains the sample names.
30 | #' @param intensity_log2 a numeric column in the \code{data} data frame that contains the log2 transformed
31 | #' intensity values to be normalised.
32 | #' @param method a character value specifying the method to be used for normalisation. Default
33 | #' is "median".
34 | #'
35 | #' @return A data frame with a column called \code{normalised_intensity_log2} containing the
36 | #' normalised intensity values.
37 | #' @import dplyr
38 | #' @importFrom magrittr %>%
39 | #' @importFrom rlang .data
40 | #' @importFrom stats median
41 | #' @export
42 | #'
43 | #' @examples
44 | #' data <- data.frame(
45 | #'   r_file_name = c("s1", "s2", "s3", "s1", "s2", "s3"),
46 | #'   intensity_log2 = c(18, 19, 17, 20, 21, 19)
47 | #' )
48 | #'
49 | #' normalise(data,
50 | #'   sample = r_file_name,
51 | #'   intensity_log2 = intensity_log2,
52 | #'   method = "median"
53 | #' )
54 | normalise <-
55 |   function(data,
56 |            sample,
57 |            intensity_log2,
58 |            method = "median") {
59 |     # Ensure method is valid
60 |     if (!(method %in% c("median"))) {
61 |       stop("Invalid method. Available methods: median")
62 |     }
63 | 
64 |     if (method == "median") {
65 |       median_normalised <- data %>%
66 |         dplyr::distinct() %>%
67 |         dplyr::mutate(global_median = stats::median({{ intensity_log2 }}, na.rm = TRUE)) %>%
68 |         dplyr::group_by({{ sample }}, .add = TRUE) %>%
69 |         dplyr::mutate(run_median = stats::median({{ intensity_log2 }}, na.rm = TRUE)) %>%
70 |         dplyr::ungroup({{ sample }}) %>%
71 |         dplyr::mutate(normalised_intensity_log2 = {{ intensity_log2 }} - .data$run_median + .data$global_median) %>%
72 |         dplyr::select(-.data$run_median, -.data$global_median)
73 | 
74 |       return(median_normalised)
75 |     }
76 |   }
77 | 


--------------------------------------------------------------------------------
/R/fetch_uniprot_proteome.R:
--------------------------------------------------------------------------------
 1 | #' Fetch proteome data from UniProt
 2 | #'
 3 | #' Fetches proteome data from UniProt for the provided organism ID.
 4 | #'
 5 | #' @param organism_id a numeric value that specifies the NCBI taxonomy identifier (TaxId) for an
 6 | #' organism.
 7 | #' @param columns a character vector of metadata columns that should be imported from UniProt (all
 8 | #' possible columns can be found \href{https://www.uniprot.org/help/return_fields}{here}. For
 9 | #' cross-referenced database provide the database name with the prefix "xref_", e.g. \code{"xref_pdb"}).
10 | #' Note: Not more than one or two columns should be selected otherwise the function will not be
11 | #' able to efficiently retrieve the information. If more information is needed, \code{fetch_uniprot()}
12 | #' can be used with the IDs retrieved by this function.
13 | #' @param reviewed a logical value that determines if only reviewed protein entries will be retrieved.
14 | #' @param timeout a numeric value specifying the time in seconds until the download times out.
15 | #' The default is 60 seconds.
16 | #' @param max_tries a numeric value that specifies the number of times the function tries to download
17 | #' the data in case an error occurs. The default is 2.
18 | #'
19 | #' @return A data frame that contains all protein metadata specified in \code{columns} for the
20 | #' organism of choice.
21 | #' @importFrom janitor make_clean_names
22 | #' @export
23 | #'
24 | #' @examples
25 | #' \donttest{
26 | #' head(fetch_uniprot_proteome(9606))
27 | #' }
28 | fetch_uniprot_proteome <-
29 |   function(organism_id,
30 |            columns = c("accession"),
31 |            reviewed = TRUE,
32 |            timeout = 120,
33 |            max_tries = 5) {
34 |     if (!curl::has_internet()) {
35 |       message("No internet connection.")
36 |       return(invisible(NULL))
37 |     }
38 | 
39 |     if (length(organism_id) == 0) {
40 |       stop("No valid organism ID found.")
41 |     }
42 |     if (length(columns) > 4) {
43 |       warning(strwrap("We suggest to use the fetch_uniprot function to fetch more than four columns.",
44 |         prefix = "\n", initial = ""
45 |       ))
46 |     }
47 |     url <- "http://rest.uniprot.org/uniprotkb/stream?query="
48 |     column_names <- janitor::make_clean_names(columns)
49 |     collapsed_columns <- paste(columns, collapse = ",")
50 |     reviewed <- paste0("reviewed:", ifelse(reviewed == TRUE, "true", "false"))
51 |     organism_id <- paste0("organism_id:", organism_id)
52 |     query_url <-
53 |       utils::URLencode(paste0(
54 |         url,
55 |         reviewed,
56 |         "+AND+",
57 |         organism_id,
58 |         "&format=tsv&fields=",
59 |         collapsed_columns
60 |       ))
61 |     result <- try_query(query_url, timeout = timeout, max_tries = max_tries, silent = FALSE, progress = FALSE, show_col_types = FALSE)
62 |     # result can either be a data.frame or it is a character string with the error message
63 |     if (!methods::is(result, "data.frame")) {
64 |       if (stringr::str_detect(result, pattern = "Timeout")) {
65 |         message('The data retrieval timed out. Consider increasing the "timeout" or "max_tries" argument. \n')
66 |       }
67 |       return(invisible(result))
68 |     }
69 |     colnames(result) <- column_names
70 |     result
71 |   }
72 | 


--------------------------------------------------------------------------------
/R/calculate_sequence_coverage.R:
--------------------------------------------------------------------------------
 1 | #' Protein sequence coverage
 2 | #'
 3 | #' `r lifecycle::badge('deprecated')`
 4 | #' This function was deprecated due to its name changing to `calculate_sequence_coverage()`.
 5 | #'
 6 | #' @return A new column in the \code{data} data frame containing the calculated sequence coverage
 7 | #' for each identified protein
 8 | #' @keywords internal
 9 | #' @export
10 | sequence_coverage <- function(...) {
11 |   # This function has been renamed and is therefore deprecated.
12 |   lifecycle::deprecate_warn("0.2.0",
13 |     "sequence_coverage()",
14 |     "calculate_sequence_coverage()",
15 |     details = "This function has been renamed."
16 |   )
17 |   calculate_sequence_coverage(...)
18 | }
19 | #' Protein sequence coverage
20 | #'
21 | #' Calculate sequence coverage for each identified protein.
22 | #'
23 | #' @param data a data frame containing at least the protein sequence and the identified peptides
24 | #' as columns.
25 | #' @param protein_sequence a character column in the \code{data} data frame that contains protein
26 | #' sequences. Can be obtained by using the function \code{fetch_uniprot()}
27 | #' @param peptides a character column in the \code{data} data frame that contains the identified
28 | #' peptides.
29 | #'
30 | #' @return A new column in the \code{data} data frame containing the calculated sequence coverage
31 | #' for each identified protein
32 | #' @import dplyr
33 | #' @importFrom magrittr %>%
34 | #' @importFrom stringr str_count
35 | #' @importFrom rlang .data as_name enquo
36 | #' @importFrom tidyr drop_na
37 | #' @export
38 | #'
39 | #' @examples
40 | #' data <- data.frame(
41 | #'   protein_sequence = c("abcdefghijklmnop", "abcdefghijklmnop"),
42 | #'   pep_stripped_sequence = c("abc", "jklmn")
43 | #' )
44 | #'
45 | #' calculate_sequence_coverage(
46 | #'   data,
47 | #'   protein_sequence = protein_sequence,
48 | #'   peptides = pep_stripped_sequence
49 | #' )
50 | calculate_sequence_coverage <-
51 |   function(data, protein_sequence, peptides) {
52 |     groups <- dplyr::group_vars(data)
53 | 
54 |     result <- data %>%
55 |       # drop_na prevents function from failing if a protein group contains only NA peptide sequences.
56 |       tidyr::drop_na({{ peptides }}) %>%
57 |       dplyr::distinct({{ protein_sequence }}, {{ peptides }}) %>%
58 |       dplyr::group_by({{ protein_sequence }}, .add = TRUE) %>%
59 |       find_peptide({{ protein_sequence }}, {{ peptides }}) %>%
60 |       dplyr::mutate(sequence_length = nchar({{ protein_sequence }})) %>%
61 |       dplyr::mutate(modified_sequence = replace_identified_by_x({{ protein_sequence }}, .data$start, .data$end)) %>%
62 |       dplyr::mutate(covered = stringr::str_count(.data$modified_sequence, "x")) %>%
63 |       dplyr::mutate(coverage = .data$covered / .data$sequence_length * 100) %>%
64 |       dplyr::select(-c(
65 |         .data$sequence_length,
66 |         .data$modified_sequence,
67 |         .data$covered,
68 |         .data$start,
69 |         .data$end,
70 |         .data$aa_before,
71 |         .data$last_aa,
72 |         .data$aa_after,
73 |         {{ peptides }}
74 |       )) %>%
75 |       dplyr::distinct() %>%
76 |       dplyr::ungroup()
77 | 
78 |     data %>%
79 |       dplyr::left_join(result, by = c(rlang::as_name(rlang::enquo(protein_sequence)), groups))
80 |   }
81 | 


--------------------------------------------------------------------------------
/R/pval_distribution_plot.R:
--------------------------------------------------------------------------------
 1 | #' Plot histogram of p-value distribution
 2 | #'
 3 | #' `r lifecycle::badge('deprecated')`
 4 | #' This function was deprecated due to its name changing to `pval_distribution_plot()`.
 5 | #'
 6 | #' @return A histogram plot that shows the p-value distribution.
 7 | #' @keywords internal
 8 | #' @export
 9 | plot_pval_distribution <- function(...) {
10 |   # This function has been renamed and is therefore deprecated.
11 |   lifecycle::deprecate_warn("0.2.0",
12 |     "plot_pval_distribution()",
13 |     "pval_distribution_plot()",
14 |     details = "This function has been renamed."
15 |   )
16 | 
17 |   pval_distribution_plot(...)
18 | }
19 | #' Plot histogram of p-value distribution
20 | #'
21 | #' Plots the distribution of p-values derived from any statistical test as a histogram.
22 | #'
23 | #' @param data a data frame that contains at least grouping identifiers (precursor, peptide or
24 | #' protein) and p-values derived from any statistical test.
25 | #' @param grouping a character column in the \code{data} data frame that contains either precursor,
26 | #' peptide or protein identifiers. For each entry in this column there should be one unique p-value.
27 | #' That means the statistical test that created the p-value should have been performed on the
28 | #' level of the content of this column.
29 | #' @param pval a numeric column in the \code{data} data frame that contains p-values.
30 | #' @param facet_by optional, a character column that contains information by which the data should
31 | #' be faceted into multiple plots.
32 | #'
33 | #' @return A histogram plot that shows the p-value distribution.
34 | #' @import ggplot2
35 | #' @importFrom magrittr %>%
36 | #' @importFrom dplyr distinct
37 | #' @importFrom tidyr drop_na
38 | #' @export
39 | #'
40 | #' @examples
41 | #' set.seed(123) # Makes example reproducible
42 | #'
43 | #' # Create example data
44 | #' data <- data.frame(
45 | #'   peptide = paste0("peptide", 1:1000),
46 | #'   pval = runif(n = 1000)
47 | #' )
48 | #'
49 | #' # Plot p-values
50 | #' pval_distribution_plot(
51 | #'   data = data,
52 | #'   grouping = peptide,
53 | #'   pval = pval
54 | #' )
55 | pval_distribution_plot <- function(data, grouping, pval, facet_by = NULL) {
56 |   input <- data %>%
57 |     dplyr::distinct({{ grouping }}, {{ pval }}, {{ facet_by }}) %>%
58 |     tidyr::drop_na()
59 | 
60 |   plot <- input %>%
61 |     ggplot2::ggplot(ggplot2::aes(x = {{ pval }})) +
62 |     ggplot2::geom_histogram(
63 |       binwidth = 0.05,
64 |       boundary = 0,
65 |       color = "black",
66 |       fill = "#5680C1",
67 |       size = 1
68 |     ) +
69 |     ggplot2::labs(title = "P-Value Distribution", x = "P-Value", y = "Frequency") +
70 |     {
71 |       if (!missing(facet_by)) {
72 |         ggplot2::facet_wrap(rlang::new_formula(NULL, rlang::enquo(facet_by)),
73 |           scales = "fixed"
74 |         )
75 |       }
76 |     } +
77 |     ggplot2::theme_bw() +
78 |     ggplot2::theme(
79 |       plot.title = ggplot2::element_text(size = 20),
80 |       axis.title.x = ggplot2::element_text(size = 15),
81 |       axis.text.y = ggplot2::element_text(size = 15),
82 |       axis.text.x = ggplot2::element_text(size = 15),
83 |       axis.title.y = ggplot2::element_text(size = 15),
84 |       strip.text = ggplot2::element_text(size = 15),
85 |       strip.background = element_blank()
86 |     )
87 |   plot
88 | }
89 | 


--------------------------------------------------------------------------------
/man/qc_peptide_type.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/qc_peptide_type.R
 3 | \name{qc_peptide_type}
 4 | \alias{qc_peptide_type}
 5 | \title{Check peptide type percentage share}
 6 | \usage{
 7 | qc_peptide_type(
 8 |   data,
 9 |   sample,
10 |   peptide,
11 |   pep_type,
12 |   intensity,
13 |   remove_na_intensities = TRUE,
14 |   method = "count",
15 |   plot = FALSE,
16 |   interactive = FALSE
17 | )
18 | }
19 | \arguments{
20 | \item{data}{a data frame that contains at least the input columns.}
21 | 
22 | \item{sample}{a character or factor column in the \code{data} data frame that contains the sample names.}
23 | 
24 | \item{peptide}{a character column in the \code{data} data frame that contains the peptide
25 | sequence.}
26 | 
27 | \item{pep_type}{a character column in the \code{data} data frame that contains the peptide
28 | type. Can be obtained using the \code{find_peptide} and \code{assign_peptide_type} function
29 | together.}
30 | 
31 | \item{intensity}{a numeric column in the \code{data} data frame that contains the corresponding
32 | raw or normalised intensity values (not log2) for each peptide or precursor. Required when
33 | "intensity" is chosen as the method.}
34 | 
35 | \item{remove_na_intensities}{a logical value that specifies if sample/peptide combinations with
36 | intensities that are NA (not quantified IDs) should be dropped from the data frame for analysis
37 | of peptide type distributions. Default is TRUE since we are usually interested in the peptide
38 | type distribution of quantifiable IDs. This is only relevant for method = "count".}
39 | 
40 | \item{method}{a character value that indicates the method used for evaluation.
41 | \code{method = "intensity"} calculates the peptide type percentage by intensity, whereas
42 | \code{method = "count"} calculates the percentage by peptide ID count. Default is
43 | \code{method = count}.}
44 | 
45 | \item{plot}{a logical value that indicates whether the result should be plotted.}
46 | 
47 | \item{interactive}{a logical value that indicates whether the plot should be interactive.}
48 | }
49 | \value{
50 | A data frame that contains the calculated percentage shares of each peptide type per
51 | sample. The \code{count} column contains the number of peptides with a specific type. The
52 | \code{peptide_type_percent} column contains the percentage share of a specific peptide type.
53 | }
54 | \description{
55 | Calculates the percentage share of each peptide types (fully-tryptic, semi-tryptic,
56 | non-tryptic) for each sample.
57 | }
58 | \examples{
59 | # Load libraries
60 | library(dplyr)
61 | 
62 | set.seed(123) # Makes example reproducible
63 | 
64 | # Create example data
65 | data <- create_synthetic_data(
66 |   n_proteins = 100,
67 |   frac_change = 0.05,
68 |   n_replicates = 3,
69 |   n_conditions = 2,
70 |   method = "effect_random"
71 | ) \%>\%
72 |   mutate(intensity_non_log2 = 2^peptide_intensity_missing)
73 | 
74 | # Determine peptide type percentages
75 | qc_peptide_type(
76 |   data = data,
77 |   sample = sample,
78 |   peptide = peptide,
79 |   pep_type = pep_type,
80 |   intensity = intensity_non_log2,
81 |   method = "intensity",
82 |   plot = FALSE
83 | )
84 | 
85 | # Plot peptide type
86 | qc_peptide_type(
87 |   data = data,
88 |   sample = sample,
89 |   peptide = peptide,
90 |   pep_type = pep_type,
91 |   intensity = intensity_non_log2,
92 |   method = "intensity",
93 |   plot = TRUE
94 | )
95 | }
96 | 


--------------------------------------------------------------------------------
/man/qc_charge_states.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/qc_charge_states.R
 3 | \name{qc_charge_states}
 4 | \alias{qc_charge_states}
 5 | \title{Check charge state distribution}
 6 | \usage{
 7 | qc_charge_states(
 8 |   data,
 9 |   sample,
10 |   grouping,
11 |   charge_states,
12 |   intensity = NULL,
13 |   remove_na_intensities = TRUE,
14 |   method = "count",
15 |   plot = FALSE,
16 |   interactive = FALSE
17 | )
18 | }
19 | \arguments{
20 | \item{data}{a data frame that contains at least sample names, peptide or precursor identifiers
21 | and missed cleavage counts for each peptide or precursor.}
22 | 
23 | \item{sample}{a character or factor column in the \code{data} data frame that contains the sample name.}
24 | 
25 | \item{grouping}{a character column in the \code{data} data frame that contains either precursor or
26 | peptide identifiers.}
27 | 
28 | \item{charge_states}{a character or numeric column in the \code{data} data frame that contains
29 | the different charge states assigned to the precursor or peptide.}
30 | 
31 | \item{intensity}{a numeric column in the \code{data} data frame that contains the corresponding
32 | raw or normalised intensity values (not log2) for each peptide or precursor. Required when
33 | "intensity" is chosen as the method.}
34 | 
35 | \item{remove_na_intensities}{a logical value that specifies if sample/grouping combinations with
36 | intensities that are NA (not quantified IDs) should be dropped from the data frame for analysis
37 | of missed cleavages. Default is TRUE since we are usually interested in quantifiable peptides.
38 | This is only relevant for method = "count".}
39 | 
40 | \item{method}{a character value that indicates the method used for evaluation. "count"
41 | calculates the charge state distribution based on counts of the corresponding peptides or
42 | precursors in the charge state group, "intensity" calculates the percentage of precursors or
43 | peptides in each charge state group based on the corresponding intensity values.}
44 | 
45 | \item{plot}{a logical value that indicates whether the result should be plotted.}
46 | 
47 | \item{interactive}{a logical value that specifies whether the plot should be interactive
48 | (default is FALSE).}
49 | }
50 | \value{
51 | A data frame that contains the calculated percentage made up by the sum of either
52 | all counts or intensities of peptides or precursors of the corresponding charge state
53 | (depending on which method is chosen).
54 | }
55 | \description{
56 | Calculates the charge state distribution for each sample (by count or intensity).
57 | }
58 | \examples{
59 | # Load libraries
60 | library(dplyr)
61 | 
62 | set.seed(123) # Makes example reproducible
63 | 
64 | # Create example data
65 | data <- create_synthetic_data(
66 |   n_proteins = 100,
67 |   frac_change = 0.05,
68 |   n_replicates = 3,
69 |   n_conditions = 2,
70 |   method = "effect_random"
71 | ) \%>\%
72 |   mutate(intensity_non_log2 = 2^peptide_intensity_missing)
73 | 
74 | # Calculate charge percentages
75 | qc_charge_states(
76 |   data = data,
77 |   sample = sample,
78 |   grouping = peptide,
79 |   charge_states = charge,
80 |   intensity = intensity_non_log2,
81 |   method = "intensity",
82 |   plot = FALSE
83 | )
84 | 
85 | # Plot charge states
86 | qc_charge_states(
87 |   data = data,
88 |   sample = sample,
89 |   grouping = peptide,
90 |   charge_states = charge,
91 |   intensity = intensity_non_log2,
92 |   method = "intensity",
93 |   plot = TRUE
94 | )
95 | }
96 | 


--------------------------------------------------------------------------------
/man/predict_alphafold_domain.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/predict_alphafold_domain.R
 3 | \name{predict_alphafold_domain}
 4 | \alias{predict_alphafold_domain}
 5 | \title{Predict protein domains of AlphaFold predictions}
 6 | \usage{
 7 | predict_alphafold_domain(
 8 |   pae_list,
 9 |   pae_power = 1,
10 |   pae_cutoff = 5,
11 |   graph_resolution = 1,
12 |   return_data_frame = FALSE,
13 |   show_progress = TRUE
14 | )
15 | }
16 | \arguments{
17 | \item{pae_list}{a list of proteins that contains aligned errors for their AlphaFold predictions.
18 | This list can be retrieved with the \code{fetch_alphafold_aligned_error()} function. It should contain a
19 | column containing the scored residue (\code{scored_residue}), the aligned residue (\code{aligned_residue}) and
20 | the predicted aligned error (\code{error}).}
21 | 
22 | \item{pae_power}{a numeric value, each edge in the graph will be weighted proportional to (\code{1 / pae^pae_power}).
23 | Default is \code{1}.}
24 | 
25 | \item{pae_cutoff}{a numeric value, graph edges will only be created for residue pairs with \code{pae < pae_cutoff}.
26 | Default is \code{5}.}
27 | 
28 | \item{graph_resolution}{a numeric value that regulates how aggressive the clustering algorithm is. Smaller values
29 | lead to larger clusters. Value should be larger than zero, and values larger than 5 are unlikely to be useful.
30 | Higher values lead to stricter (i.e. smaller) clusters. The value is provided to the Leiden clustering algorithm
31 | of the \code{igraph} package as \code{graph_resolution / 100}. Default is \code{1}.}
32 | 
33 | \item{return_data_frame}{a logical value; if \code{TRUE} a data frame instead of a list
34 | is returned. It is recommended to only use this if information for few proteins is retrieved.
35 | Default is \code{FALSE}.}
36 | 
37 | \item{show_progress}{a logical value that specifies if a progress bar will be shown. Default
38 | is \code{TRUE}.}
39 | }
40 | \value{
41 | A list of the provided proteins that contains domain assignments for each residue. If \code{return_data_frame} is
42 | \code{TRUE}, a data frame with this information is returned instead. The data frame contains the
43 | following columns:
44 | \itemize{
45 | \item residue: The protein residue number.
46 | \item domain: A numeric value representing a distinct predicted domain in the protein.
47 | \item accession: The UniProt protein identifier.
48 | }
49 | }
50 | \description{
51 | Uses the predicted aligned error (PAE) of AlphaFold predictions to find possible protein domains.
52 | A graph-based community clustering algorithm (Leiden clustering) is used on the predicted error
53 | (distance) between residues of a protein in order to infer pseudo-rigid groups in the protein. This is
54 | for example useful in order to know which parts of protein predictions are likely in a fixed relative
55 | position towards each other and which might have varying distances.
56 | This function is based on python code written by Tristan Croll. The original code can be found on his
57 | \href{https://github.com/tristanic/pae_to_domains}{GitHub page}.
58 | }
59 | \examples{
60 | \donttest{
61 | # Fetch aligned errors
62 | aligned_error <- fetch_alphafold_aligned_error(
63 |   uniprot_ids = c("F4HVG8", "O15552"),
64 |   error_cutoff = 4
65 | )
66 | 
67 | # Predict protein domains
68 | af_domains <- predict_alphafold_domain(
69 |   pae_list = aligned_error,
70 |   return_data_frame = TRUE
71 | )
72 | 
73 | head(af_domains, n = 10)
74 | }
75 | }
76 | 


--------------------------------------------------------------------------------
/R/assign_peptide_type.R:
--------------------------------------------------------------------------------
 1 | #' Assign peptide type
 2 | #'
 3 | #' `r lifecycle::badge('deprecated')`
 4 | #' This function was deprecated due to its name changing to `assign_peptide_type()`.
 5 | #'
 6 | #' @return A data frame that contains the input data and an additional column with the peptide
 7 | #' type information.
 8 | #' @keywords internal
 9 | #' @export
10 | peptide_type <- function(...) {
11 |   # This function has been renamed and is therefore deprecated.
12 |   lifecycle::deprecate_warn("0.2.0",
13 |     "peptide_type()",
14 |     "assign_peptide_type()",
15 |     details = "This function has been renamed."
16 |   )
17 | 
18 |   assign_peptide_type(...)
19 | }
20 | #' Assign peptide type
21 | #'
22 | #' Based on preceding and C-terminal amino acid, the peptide type of a given peptide is assigned.
23 | #' Peptides with preceeding and C-terminal lysine or arginine are considered fully-tryptic. If a
24 | #' peptide is located at the N- or C-terminus of a protein and fulfills the criterium to be
25 | #' fully-tryptic otherwise, it is also considered as fully-tryptic. Peptides that only fulfill the
26 | #' criterium on one terminus are semi-tryptic peptides. Lastly, peptides that are not fulfilling
27 | #' the criteria for both termini are non-tryptic peptides.
28 | #'
29 | #' @param data a data frame containing at least information about the preceding and C-terminal
30 | #' amino acids of peptides.
31 | #' @param aa_before a character column in the \code{data} data frame that contains the preceding amino
32 | #' acid as one letter code.
33 | #' @param last_aa a character column in the \code{data} data frame that contains the C-terminal amino
34 | #' acid as one letter code.
35 | #' @param aa_after a character column in the \code{data} data frame that contains the following amino
36 | #' acid as one letter code.
37 | #'
38 | #' @return A data frame that contains the input data and an additional column with the peptide
39 | #' type information.
40 | #' @import dplyr
41 | #' @importFrom magrittr %>%
42 | #' @importFrom rlang .data
43 | #' @export
44 | #'
45 | #' @examples
46 | #' data <- data.frame(
47 | #'   aa_before = c("K", "S", "T"),
48 | #'   last_aa = c("R", "K", "Y"),
49 | #'   aa_after = c("T", "R", "T")
50 | #' )
51 | #'
52 | #' assign_peptide_type(data, aa_before, last_aa, aa_after)
53 | assign_peptide_type <- function(data,
54 |                                 aa_before = aa_before,
55 |                                 last_aa = last_aa,
56 |                                 aa_after = aa_after) {
57 |   data %>%
58 |     dplyr::distinct({{ aa_before }}, {{ last_aa }}, {{ aa_after }}) %>%
59 |     dplyr::mutate(N_term_tryp = dplyr::if_else({{ aa_before }} == "" |
60 |       {{ aa_before }} == "K" |
61 |       {{ aa_before }} == "R",
62 |     TRUE,
63 |     FALSE
64 |     )) %>%
65 |     dplyr::mutate(C_term_tryp = dplyr::if_else({{ last_aa }} == "K" |
66 |       {{ last_aa }} == "R" |
67 |       {{ aa_after }} == "",
68 |     TRUE,
69 |     FALSE
70 |     )) %>%
71 |     dplyr::mutate(pep_type = dplyr::case_when(
72 |       .data$N_term_tryp + .data$C_term_tryp == 2 ~ "fully-tryptic",
73 |       .data$N_term_tryp + .data$C_term_tryp == 1 ~ "semi-tryptic",
74 |       .data$N_term_tryp + .data$C_term_tryp == 0 ~ "non-tryptic"
75 |     )) %>%
76 |     dplyr::select(-.data$N_term_tryp, -.data$C_term_tryp) %>%
77 |     dplyr::right_join(data, by = c(
78 |       rlang::as_name(rlang::enquo(aa_before)),
79 |       rlang::as_name(rlang::enquo(last_aa)),
80 |       rlang::as_name(rlang::enquo(aa_after))
81 |     ))
82 | }
83 | 


--------------------------------------------------------------------------------
/man/diff_abundance.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/calculate_diff_abundance.R
 3 | \name{diff_abundance}
 4 | \alias{diff_abundance}
 5 | \title{Calculate differential abundance between conditions}
 6 | \usage{
 7 | diff_abundance(...)
 8 | }
 9 | \value{
10 | A data frame that contains differential abundances (\code{diff}), p-values (\code{pval})
11 | and adjusted p-values (\code{adj_pval}) for each protein, peptide or precursor (depending on
12 | the \code{grouping} variable) and the associated treatment/reference pair. Depending on the
13 | method the data frame contains additional columns:
14 | \itemize{
15 | \item "t-test": The \code{std_error} column contains the standard error of the differential
16 | abundances. \code{n_obs} contains the number of observations for the specific protein, peptide
17 | or precursor (depending on the \code{grouping} variable) and the associated treatment/reference pair.
18 | \item "t-test_mean_sd": Columns labeled as control refer to the second condition of the
19 | comparison pairs. Treated refers to the first condition. \code{mean_control} and \code{mean_treated}
20 | columns contain the means for the reference and treatment condition, respectively. \code{sd_control}
21 | and \code{sd_treated} columns contain the standard deviations for the reference and treatment
22 | condition, respectively. \code{n_control} and \code{n_treated} columns contain the numbers of
23 | samples for the reference and treatment condition, respectively. The \code{std_error} column
24 | contains the standard error of the differential abundances. \code{t_statistic} contains the
25 | t_statistic for the t-test.
26 | \item "moderated_t-test": \code{CI_2.5} and \code{CI_97.5} contain the 2.5\% and 97.5\%
27 | confidence interval borders for differential abundances. \code{avg_abundance} contains average
28 | abundances for treatment/reference pairs (mean of the two group means). \code{t_statistic}
29 | contains the t_statistic for the t-test. \code{B} The B-statistic is the log-odds that the
30 | protein, peptide or precursor (depending on \code{grouping}) has a differential abundance
31 | between the two groups. Suppose B=1.5. The odds of differential abundance is exp(1.5)=4.48, i.e,
32 | about four and a half to one. The probability that there is a differential abundance is
33 | 4.48/(1+4.48)=0.82, i.e., the probability is about 82\% that this group is differentially
34 | abundant. A B-statistic of zero corresponds to a 50-50 chance that the group is differentially
35 | abundant.\code{n_obs} contains the number of observations for the specific protein, peptide or
36 | precursor (depending on the \code{grouping} variable) and the associated treatment/reference pair.
37 | \item "proDA": The \code{std_error} column contains the standard error of the differential
38 | abundances. \code{avg_abundance} contains average abundances for treatment/reference pairs
39 | (mean of the two group means). \code{t_statistic} contains the t_statistic for the t-test.
40 | \code{n_obs} contains the number of observations for the specific protein, peptide or precursor
41 | (depending on the \code{grouping} variable) and the associated treatment/reference pair.
42 | }
43 | }
44 | \description{
45 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}
46 | This function was deprecated due to its name changing to \code{calculate_diff_abundance()}.
47 | }
48 | \keyword{internal}
49 | 


--------------------------------------------------------------------------------
/man/fetch_quickgo.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fetch_quickgo.R
 3 | \name{fetch_quickgo}
 4 | \alias{fetch_quickgo}
 5 | \title{Fetch information from the QuickGO API}
 6 | \usage{
 7 | fetch_quickgo(
 8 |   type = "annotations",
 9 |   id_annotations = NULL,
10 |   taxon_id_annotations = NULL,
11 |   ontology_annotations = "all",
12 |   go_id_slims = NULL,
13 |   relations_slims = c("is_a", "part_of", "regulates", "occurs_in"),
14 |   timeout = 1200,
15 |   max_tries = 2,
16 |   show_progress = TRUE
17 | )
18 | }
19 | \arguments{
20 | \item{type}{a character value that indicates if gene ontology terms, annotations or slims
21 | should be retrieved. The possible values therefore include "annotations", "terms" and "slims".
22 | If annotations are retrieved, the maximum number of results is 2,000,000.}
23 | 
24 | \item{id_annotations}{an optional character vector that specifies UniProt IDs for which GO annotations
25 | should be retrieved. This argument should only be provided if annotations are retrieved.}
26 | 
27 | \item{taxon_id_annotations}{an optional character value that specifies the NCBI taxonomy identifier (TaxId)
28 | for an organism for which GO annotations should be retrieved.
29 | This argument should only be provided if annotations are retrieved.}
30 | 
31 | \item{ontology_annotations}{an optional character value that specifies the ontology that should be retrieved.
32 | This can either have the values "all", "molecular_function", "biological_process" or
33 | "cellular_component". This argument should only be provided if annotations are retrieved.}
34 | 
35 | \item{go_id_slims}{an optional character vector that specifies gene ontology IDs (e.g. GO:0046872) for which
36 | a slim go set should be generated. This argument should only be provided if slims are retrieved.}
37 | 
38 | \item{relations_slims}{an optional character vector that specifies the relations of GO IDs that should be
39 | considered for the generation of the slim dataset. This argument should only be provided if slims are retrieved.}
40 | 
41 | \item{timeout}{a numeric value specifying the time in seconds until the download times out.
42 | The default is 1200 seconds.}
43 | 
44 | \item{max_tries}{a numeric value that specifies the number of times the function tries to download
45 | the data in case an error occurs. The default is 2.}
46 | 
47 | \item{show_progress}{a logical value that indicates if a progress bar will be shown.
48 | Default is TRUE.}
49 | }
50 | \value{
51 | A data frame that contains descriptive information about gene ontology annotations, terms or slims
52 | depending on what the input "type" was.
53 | }
54 | \description{
55 | Fetches gene ontology (GO) annotations, terms or slims from the QuickGO EBI database.
56 | Annotations can be retrieved for specific UniProt IDs or NCBI taxonomy identifiers. When
57 | terms are retrieved, a complete list of all GO terms is returned. For the generation of
58 | a slim dataset you can provide GO IDs that should be considered. A slim dataset is a subset
59 | GO dataset that considers all child terms of the supplied IDs.
60 | }
61 | \examples{
62 | \donttest{
63 | # Annotations
64 | annotations <- fetch_quickgo(
65 |   type = "annotations",
66 |   id = c("P63328", "Q4FFP4"),
67 |   ontology = "molecular_function"
68 | )
69 | 
70 | head(annotations)
71 | 
72 | # Terms
73 | terms <- fetch_quickgo(type = "terms")
74 | 
75 | head(terms)
76 | 
77 | # Slims
78 | slims <- fetch_quickgo(
79 |   type = "slims",
80 |   go_id_slims = c("GO:0046872", "GO:0051540")
81 | )
82 | 
83 | head(slims)
84 | }
85 | }
86 | 


--------------------------------------------------------------------------------
/man/fetch_pdb.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fetch_pdb.R
 3 | \name{fetch_pdb}
 4 | \alias{fetch_pdb}
 5 | \title{Fetch structure information from RCSB}
 6 | \usage{
 7 | fetch_pdb(pdb_ids, batchsize = 100, show_progress = TRUE)
 8 | }
 9 | \arguments{
10 | \item{pdb_ids}{a character vector of PDB identifiers.}
11 | 
12 | \item{batchsize}{a numeric value that specifies the number of structures to be processed in a
13 | single query. Default is 100.}
14 | 
15 | \item{show_progress}{a logical value that indicates if a progress bar will be shown. Default is
16 | TRUE.}
17 | }
18 | \value{
19 | A data frame that contains structure metadata for the PDB IDs provided. The data frame
20 | contains some columns that might not be self explanatory.
21 | \itemize{
22 | \item auth_asym_id: Chain identifier provided by the author of the structure in order to
23 | match the identification used in the publication that describes the structure.
24 | \item label_asym_id: Chain identifier following the standardised convention for mmCIF files.
25 | \item entity_beg_seq_id, ref_beg_seq_id, length, pdb_sequence: \code{entity_beg_seq_id} is a
26 | position in the structure sequence (\code{pdb_sequence}) that matches the position given in
27 | \code{ref_beg_seq_id}, which is a position within the protein sequence (not included in the
28 | data frame). \code{length} identifies the stretch of sequence for which positions match
29 | accordingly between structure and protein sequence. \code{entity_beg_seq_id} is a residue ID
30 | based on the standardised convention for mmCIF files.
31 | \item auth_seq_id: Residue identifier provided by the author of the structure in order to
32 | match the identification used in the publication that describes the structure. This character
33 | vector has the same length as the \code{pdb_sequence} and each position is the identifier for
34 | the matching amino acid position in \code{pdb_sequence}. The contained values are not
35 | necessarily numbers and the values do not have to be positive.
36 | \item modified_monomer: Is composed of first the composition ID of the modification, followed
37 | by the \code{label_seq_id} position. In parenthesis are the parent monomer identifiers as
38 | they appear in the sequence.
39 | \item ligand_*: Any column starting with the \code{ligand_*} prefix contains information about
40 | the position, identity and donors for ligand binding sites. If there are multiple entities of
41 | ligands they are separated by "|". Specific donor level information is separated by ";".
42 | \item secondar_structure: Contains information about helix and sheet secondary structure elements.
43 | Individual regions are separated by ";".
44 | \item unmodeled_structure: Contains information about unmodeled or partially modeled regions in
45 | the model. Individual regions are separated by ";".
46 | \item auth_seq_id_original: In some cases the sequence positions do not match the number of residues
47 | in the sequence either because positions are missing or duplicated. This always coincides with modified
48 | residues, however does not always occur when there is a modified residue in the sequence. This column
49 | contains the original \code{auth_seq_id} information that does not have these positions corrected.
50 | }
51 | }
52 | \description{
53 | Fetches structure metadata from RCSB. If you want to retrieve atom data such as positions, use
54 | the function \code{fetch_pdb_structure()}.
55 | }
56 | \examples{
57 | \donttest{
58 | pdb <- fetch_pdb(pdb_ids = c("6HG1", "1E9I", "6D3Q", "4JHW"))
59 | 
60 | head(pdb)
61 | }
62 | }
63 | 


--------------------------------------------------------------------------------
/man/qc_missed_cleavages.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/qc_missed_cleavages.R
 3 | \name{qc_missed_cleavages}
 4 | \alias{qc_missed_cleavages}
 5 | \title{Check missed cleavages}
 6 | \usage{
 7 | qc_missed_cleavages(
 8 |   data,
 9 |   sample,
10 |   grouping,
11 |   missed_cleavages,
12 |   intensity,
13 |   remove_na_intensities = TRUE,
14 |   method = "count",
15 |   plot = FALSE,
16 |   interactive = FALSE
17 | )
18 | }
19 | \arguments{
20 | \item{data}{a data frame containing at least sample names, peptide or precursor identifiers
21 | and missed cleavage counts for each peptide or precursor.}
22 | 
23 | \item{sample}{a character or factor column in the \code{data} data frame that contains the sample name.}
24 | 
25 | \item{grouping}{a character column in the \code{data} data frame that contains either precursor or
26 | peptide identifiers.}
27 | 
28 | \item{missed_cleavages}{a numeric column in the \code{data} data frame that contains the counts
29 | of missed cleavages per peptide or precursor.}
30 | 
31 | \item{intensity}{a numeric column in the \code{data} data frame that contains the corresponding
32 | raw or normalised intensity values (not log2) for each peptide or precursor. Required when
33 | "intensity" is chosen as the method.}
34 | 
35 | \item{remove_na_intensities}{a logical value that specifies if sample/grouping combinations with
36 | intensities that are NA (not quantified IDs) should be dropped from the data frame for analysis
37 | of missed cleavages. Default is TRUE since we are usually interested in quantifiable peptides.
38 | This is only relevant for method = "count".}
39 | 
40 | \item{method}{a character value that indicates the method used for evaluation. "count"
41 | calculates the percentage of missed cleavages based on counts of the corresponding peptide or
42 | precursor, "intensity" calculates the percentage of missed cleavages by intensity of the
43 | corresponding peptide or precursor.}
44 | 
45 | \item{plot}{a logical value that indicates whether the result should be plotted.}
46 | 
47 | \item{interactive}{a logical value that specifies whether the plot should be interactive
48 | (default is FALSE).}
49 | }
50 | \value{
51 | A data frame that contains the calculated percentage made up by the sum of all peptides
52 | or precursors containing the corresponding amount of missed cleavages.
53 | }
54 | \description{
55 | Calculates the percentage of missed cleavages for each sample (by count or intensity). The
56 | default settings remove grouping variables without quantitative information (intensity is NA).
57 | These will not be used for the calculation of missed cleavage percentages.
58 | }
59 | \examples{
60 | library(dplyr)
61 | 
62 | set.seed(123) # Makes example reproducible
63 | 
64 | # Create example data
65 | data <- create_synthetic_data(
66 |   n_proteins = 100,
67 |   frac_change = 0.05,
68 |   n_replicates = 3,
69 |   n_conditions = 2,
70 |   method = "effect_random"
71 | ) \%>\%
72 |   mutate(intensity_non_log2 = 2^peptide_intensity_missing)
73 | 
74 | # Calculate missed cleavage percentages
75 | qc_missed_cleavages(
76 |   data = data,
77 |   sample = sample,
78 |   grouping = peptide,
79 |   missed_cleavages = n_missed_cleavage,
80 |   intensity = intensity_non_log2,
81 |   method = "intensity",
82 |   plot = FALSE
83 | )
84 | 
85 | # Plot missed cleavages
86 | qc_missed_cleavages(
87 |   data = data,
88 |   sample = sample,
89 |   grouping = peptide,
90 |   missed_cleavages = n_missed_cleavage,
91 |   intensity = intensity_non_log2,
92 |   method = "intensity",
93 |   plot = TRUE
94 | )
95 | }
96 | 


--------------------------------------------------------------------------------
/R/qc_median_intensities.R:
--------------------------------------------------------------------------------
 1 | #' Median run intensities
 2 | #'
 3 | #' Median intensities per run are returned either as a plot or a table.
 4 | #'
 5 | #' @param data a data frame that contains at least the input variables.
 6 | #' @param sample a character or factor column in the \code{data} data frame that contains the sample name.
 7 | #' @param grouping a character column in the \code{data} data frame that contains either precursor or
 8 | #' peptide identifiers.
 9 | #' @param intensity a numeric column in the \code{data} data frame that contains intensity values.
10 | #' The intensity should be ideally log2 transformed, but also non-transformed values can be used.
11 | #' @param plot a logical value that indicates whether the result should be plotted.
12 | #' @param interactive a logical value that specifies whether the plot should be interactive
13 | #' (default is FALSE).
14 | #'
15 | #' @return A plot that displays median intensity over all samples. If \code{plot = FALSE} a data
16 | #' frame containing median intensities is returned.
17 | #' @import dplyr
18 | #' @import ggplot2
19 | #' @importFrom plotly ggplotly
20 | #' @importFrom magrittr %>%
21 | #' @importFrom rlang .data
22 | #' @importFrom stringr str_sort
23 | #' @export
24 | #'
25 | #' @examples
26 | #' set.seed(123) # Makes example reproducible
27 | #'
28 | #' # Create example data
29 | #' data <- create_synthetic_data(
30 | #'   n_proteins = 100,
31 | #'   frac_change = 0.05,
32 | #'   n_replicates = 3,
33 | #'   n_conditions = 2,
34 | #'   method = "effect_random"
35 | #' )
36 | #'
37 | #' # Calculate median intensities
38 | #' qc_median_intensities(
39 | #'   data = data,
40 | #'   sample = sample,
41 | #'   grouping = peptide,
42 | #'   intensity = peptide_intensity_missing,
43 | #'   plot = FALSE
44 | #' )
45 | #'
46 | #' # Plot median intensities
47 | #' qc_median_intensities(
48 | #'   data = data,
49 | #'   sample = sample,
50 | #'   grouping = peptide,
51 | #'   intensity = peptide_intensity_missing,
52 | #'   plot = TRUE
53 | #' )
54 | qc_median_intensities <- function(data,
55 |                                   sample,
56 |                                   grouping,
57 |                                   intensity,
58 |                                   plot = TRUE,
59 |                                   interactive = FALSE) {
60 |   table <- data %>%
61 |     dplyr::distinct({{ sample }}, {{ grouping }}, {{ intensity }}) %>%
62 |     dplyr::group_by({{ sample }}) %>%
63 |     dplyr::summarize(
64 |       median_intensity = stats::median({{ intensity }}, na.rm = TRUE),
65 |       .groups = "drop"
66 |     )
67 | 
68 |   if (plot == FALSE) {
69 |     return(table)
70 |   }
71 | 
72 |   if (is(dplyr::pull(table, {{ sample }}), "character")) {
73 |     table <- table %>%
74 |       mutate({{ sample }} := factor({{ sample }},
75 |         levels = unique(stringr::str_sort({{ sample }}, numeric = TRUE))
76 |       ))
77 |   }
78 | 
79 |   plot <- table %>%
80 |     ggplot2::ggplot(ggplot2::aes({{ sample }}, .data$median_intensity, group = 1)) +
81 |     ggplot2::geom_line(size = 1) +
82 |     ggplot2::labs(title = "Medians of run intensities", x = "", y = "Intensity") +
83 |     ggplot2::theme_bw() +
84 |     ggplot2::theme(
85 |       plot.title = ggplot2::element_text(size = 20),
86 |       axis.title.x = ggplot2::element_text(size = 15),
87 |       axis.text.y = ggplot2::element_text(size = 15),
88 |       axis.text.x = ggplot2::element_text(size = 12, angle = 75, hjust = 1),
89 |       axis.title.y = ggplot2::element_text(size = 15)
90 |     )
91 | 
92 |   if (interactive == FALSE) {
93 |     return(plot)
94 |   }
95 | 
96 |   suppressWarnings(plotly::ggplotly(plot))
97 | }
98 | 


--------------------------------------------------------------------------------
/man/barcode_plot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/barcode_plot.R
 3 | \name{barcode_plot}
 4 | \alias{barcode_plot}
 5 | \title{Barcode plot}
 6 | \usage{
 7 | barcode_plot(
 8 |   data,
 9 |   start_position,
10 |   end_position,
11 |   protein_length,
12 |   coverage = NULL,
13 |   colouring = NULL,
14 |   fill_colour_gradient = protti::mako_colours,
15 |   fill_colour_discrete = c("#999999", protti::protti_colours),
16 |   protein_id = NULL,
17 |   facet = NULL,
18 |   facet_n_col = 4,
19 |   cutoffs = NULL
20 | )
21 | }
22 | \arguments{
23 | \item{data}{a data frame containing differential abundance, start and end peptide or precursor positions and protein length.}
24 | 
25 | \item{start_position}{a numeric column in the data frame containing the start positions for each peptide or precursor.}
26 | 
27 | \item{end_position}{a numeric column in the data frame containing the end positions for each peptide or precursor.}
28 | 
29 | \item{protein_length}{a numeric column in the data frame containing the length of the protein.}
30 | 
31 | \item{coverage}{optional, numeric column in the data frame containing coverage in percent. Will appear in the title of the barcode if provided.}
32 | 
33 | \item{colouring}{optional, column in the data frame containing information by which peptide or precursors should
34 | be colored.}
35 | 
36 | \item{fill_colour_gradient}{a vector that contains colours that should be used to create a colour gradient
37 | for the barcode plot bars if the \code{colouring} argument is continuous. Default is \code{mako_colours}.}
38 | 
39 | \item{fill_colour_discrete}{a vector that contains colours that should be used to fill the barcode plot bars
40 | if the \code{colouring} argument is discrete. Default is \code{protti_colours}.}
41 | 
42 | \item{protein_id}{optional, column in the data frame containing protein identifiers. Required if only one protein
43 | should be plotted and the data frame contains only information for this protein.}
44 | 
45 | \item{facet}{optional, column in the data frame containing information by which data should be faceted. This can be
46 | protein identifiers. Only 20 proteins are plotted at a time, the rest is ignored. If more should be plotted, a mapper over a
47 | subsetted data frame should be created.}
48 | 
49 | \item{facet_n_col}{a numeric value that specifies the number of columns the faceted plot should have
50 | if a column name is provided to group. The default is 4.}
51 | 
52 | \item{cutoffs}{optional argument specifying the log2 fold change and significance cutoffs used for highlighting peptides.
53 | If this argument is provided colouring information will be overwritten with peptides that fulfill this condition.
54 | The cutoff should be provided in a vector of the form c(diff = 2, pval = 0.05). The name of the cutoff should reflect the
55 | column name that contains this information (log2 fold changes, p-values or adjusted p-values).}
56 | }
57 | \value{
58 | A barcode plot is returned.
59 | }
60 | \description{
61 | Plots a "barcode plot" - a vertical line for each identified peptide. Peptides can be colored based on an additional variable. Also differential
62 | abundance can be displayed.
63 | }
64 | \examples{
65 | 
66 | data <- data.frame(
67 |   start = c(5, 40, 55, 130, 181, 195),
68 |   end = c(11, 51, 60, 145, 187, 200),
69 |   length = rep(200, 6),
70 |   pg_protein_accessions = rep("Protein 1", 6),
71 |   diff = c(1, 2, 5, 2, 1, 1),
72 |   pval = c(0.1, 0.01, 0.01, 0.2, 0.2, 0.01)
73 | )
74 | 
75 | barcode_plot(
76 |   data,
77 |   start_position = start,
78 |   end_position = end,
79 |   protein_length = length,
80 |   facet = pg_protein_accessions,
81 |   cutoffs = c(diff = 2, pval = 0.05)
82 | )
83 | }
84 | 


--------------------------------------------------------------------------------
/man/calculate_kegg_enrichment.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/calculate_kegg_enrichment.R
  3 | \name{calculate_kegg_enrichment}
  4 | \alias{calculate_kegg_enrichment}
  5 | \title{Perform KEGG pathway enrichment analysis}
  6 | \usage{
  7 | calculate_kegg_enrichment(
  8 |   data,
  9 |   protein_id,
 10 |   is_significant,
 11 |   pathway_id = pathway_id,
 12 |   pathway_name = pathway_name,
 13 |   plot = TRUE,
 14 |   plot_cutoff = "adj_pval top10"
 15 | )
 16 | }
 17 | \arguments{
 18 | \item{data}{a data frame that contains at least the input variables.}
 19 | 
 20 | \item{protein_id}{a character column in the \code{data} data frame that contains the protein
 21 | accession numbers.}
 22 | 
 23 | \item{is_significant}{a logical column in the \code{data} data frame that indicates if the
 24 | corresponding protein has a significantly changing peptide. The input data frame may contain
 25 | peptide level information with significance information. The function is able to extract
 26 | protein level information from this.}
 27 | 
 28 | \item{pathway_id}{a character column in the \code{data} data frame that contains KEGG pathway
 29 | identifiers. These can be obtained from KEGG using \code{fetch_kegg}.}
 30 | 
 31 | \item{pathway_name}{a character column in the \code{data} data frame that contains KEGG pathway
 32 | names. These can be obtained from KEGG using \code{fetch_kegg}.}
 33 | 
 34 | \item{plot}{a logical value indicating whether the result should be plotted or returned as a
 35 | table.}
 36 | 
 37 | \item{plot_cutoff}{a character value indicating if the plot should contain the top 10 most
 38 | significant proteins (p-value or adjusted p-value), or if a significance cutoff should be used
 39 | to determine the number of GO terms in the plot. This information should be provided with the
 40 | type first followed by the threshold separated by a space. Example are
 41 | \code{plot_cutoff = "adj_pval top10"}, \code{plot_cutoff = "pval 0.05"} or
 42 | \code{plot_cutoff = "adj_pval 0.01"}. The threshold can be chosen freely.}
 43 | }
 44 | \value{
 45 | A bar plot displaying negative log10 adjusted p-values for the top 10 enriched pathways.
 46 | Bars are coloured according to the direction of the enrichment. If \code{plot = FALSE}, a data
 47 | frame is returned.
 48 | }
 49 | \description{
 50 | Analyses enrichment of KEGG pathways associated with proteins in the fraction of significant
 51 | proteins compared to all detected proteins. A Fisher's exact test is performed to test
 52 | significance of enrichment.
 53 | }
 54 | \examples{
 55 | \donttest{
 56 | # Load libraries
 57 | library(dplyr)
 58 | 
 59 | set.seed(123) # Makes example reproducible
 60 | 
 61 | # Create example data
 62 | kegg_data <- fetch_kegg(species = "eco")
 63 | 
 64 | if (!is.null(kegg_data)) { # only proceed if information was retrieved
 65 |   data <- kegg_data \%>\%
 66 |     group_by(uniprot_id) \%>\%
 67 |     mutate(significant = rep(
 68 |       sample(
 69 |         x = c(TRUE, FALSE),
 70 |         size = 1,
 71 |         replace = TRUE,
 72 |         prob = c(0.2, 0.8)
 73 |       ),
 74 |       n = n()
 75 |     ))
 76 | 
 77 |   # Plot KEGG enrichment
 78 |   calculate_kegg_enrichment(
 79 |     data,
 80 |     protein_id = uniprot_id,
 81 |     is_significant = significant,
 82 |     pathway_id = pathway_id,
 83 |     pathway_name = pathway_name,
 84 |     plot = TRUE,
 85 |     plot_cutoff = "pval 0.05"
 86 |   )
 87 | 
 88 |   # Calculate KEGG enrichment
 89 |   kegg <- calculate_kegg_enrichment(
 90 |     data,
 91 |     protein_id = uniprot_id,
 92 |     is_significant = significant,
 93 |     pathway_id = pathway_id,
 94 |     pathway_name = pathway_name,
 95 |     plot = FALSE
 96 |   )
 97 | 
 98 |   head(kegg, n = 10)
 99 | }
100 | }
101 | }
102 | 


--------------------------------------------------------------------------------
/R/calculate_imputation.R:
--------------------------------------------------------------------------------
 1 | #' Sampling of values for imputation
 2 | #'
 3 | #' \code{calculate_imputation} is a helper function that is used in the \code{impute} function.
 4 | #' Depending on the type of missingness and method, it samples values from a normal distribution
 5 | #' that can be used for the imputation. Note: The input intensities should be log2 transformed.
 6 | #'
 7 | #' @param min a numeric value specifying the minimal intensity value of the precursor/peptide.
 8 | #' Is only required if \code{method = "ludovic"} and \code{missingness = "MNAR"}.
 9 | #' @param noise a numeric value specifying a noise value for the precursor/peptide. Is only
10 | #' required if \code{method = "noise"} and \code{missingness = "MNAR"}.
11 | #' @param mean a numeric value specifying the mean intensity value of the condition with missing
12 | #' values for a given precursor/peptide. Is only required if \code{missingness = "MAR"}.
13 | #' @param sd a numeric value specifying the mean of the standard deviation of all conditions for
14 | #' a given precursor/peptide.
15 | #' @param missingness a character value specifying the missingness type of the data determines
16 | #' how values for imputation are sampled. This can be \code{"MAR"} or \code{"MNAR"}.
17 | #' @param method a character value specifying the method to be used for imputation. For
18 | #' \code{method = "ludovic"}, MNAR missingness is sampled around a value that is three lower
19 | #' (log2) than the lowest intensity value recorded for the precursor/peptide. For
20 | #' \code{method = "noise"}, MNAR missingness is sampled around the noise value for the
21 | #' precursor/peptide.
22 | #' @param skip_log2_transform_error a logical value, if FALSE a check is performed to validate that
23 | #' input values are log2 transformed. If input values are > 40 the test is failed and an error is
24 | #' returned.
25 | #'
26 | #' @return A value sampled from a normal distribution with the input parameters. Method specifics
27 | #' are applied to input parameters prior to sampling.
28 | calculate_imputation <-
29 |   function(min = NULL,
30 |            noise = NULL,
31 |            mean = NULL,
32 |            sd,
33 |            missingness = c("MNAR", "MAR"),
34 |            method = c("ludovic", "noise"),
35 |            skip_log2_transform_error = FALSE) {
36 |     if ((ifelse(is.na(ifelse(is.null(min), 0, min) > 40),
37 |       FALSE,
38 |       ifelse(is.null(min), 0, min) > 40
39 |     ) |
40 |       ifelse(is.na(ifelse(is.null(mean), 0, mean) > 40),
41 |         FALSE,
42 |         ifelse(is.null(mean), 0, mean) > 40
43 |       ) |
44 |       ifelse(is.na(ifelse(is.null(noise), 0, noise) > 40),
45 |         FALSE,
46 |         ifelse(is.null(noise), 0, noise) > 40
47 |       )) &
48 |       skip_log2_transform_error == FALSE) {
49 |       stop(strwrap("Input intensities seem not to be log2 transformed. If they are and you want
50 |                    to proceed set the skip_log2_transform_error argument to TRUE. Notice that
51 |                    this function does not give correct results for non-log2 transformed data.",
52 |         prefix = "\n", initial = ""
53 |       ))
54 |     }
55 |     if (!(missingness %in% c("MNAR", "MAR"))) {
56 |       return(NA)
57 |     }
58 |     if (method == "ludovic") {
59 |       if (missingness == "MNAR") {
60 |         result <- suppressWarnings(stats::rnorm(1, mean = min - 3, sd = sd))
61 |       }
62 |       if (missingness == "MAR") {
63 |         result <- suppressWarnings(stats::rnorm(1, mean = mean, sd = sd))
64 |       }
65 |     }
66 |     if (method == "noise") {
67 |       if (missingness == "MNAR") {
68 |         result <- suppressWarnings(stats::rnorm(1, mean = noise, sd = sd))
69 |       }
70 |       if (missingness == "MAR") {
71 |         result <- suppressWarnings(stats::rnorm(1, mean = mean, sd = sd))
72 |       }
73 |     }
74 |     result
75 |   }
76 | 


--------------------------------------------------------------------------------
/R/calculate_aa_scores.R:
--------------------------------------------------------------------------------
 1 | #' Calculate scores for each amino acid position in a protein sequence
 2 | #'
 3 | #' `r lifecycle::badge("experimental")`
 4 | #' Calculate a score for each amino acid position in a protein sequence based on the product of the
 5 | #' -log10(adjusted p-value) and the absolute log2(fold change) per peptide covering this amino acid. In detail, all the
 6 | #' peptides are aligned along the sequence of the corresponding protein, and the average score per
 7 | #' amino acid position is computed. In a limited proteolysis coupled to mass spectrometry (LiP-MS)
 8 | #' experiment, the score allows to prioritize and narrow down structurally affected regions.
 9 | #'
10 | #' @param data a data frame containing at least the input columns.
11 | #' @param adj_pval a numeric column in the \code{data} data frame containing the adjusted p-value.
12 | #' @param diff a numeric column in the \code{data} data frame containing the log2 fold change.
13 | #' @param start_position a numeric column \code{data} in the data frame containing the start position
14 | #' of a peptide or precursor.
15 | #' @param end_position a numeric column in the data frame containing the end position of a peptide or
16 | #' precursor.
17 | #' @param protein a character column in the data frame containing the protein identifier or name.
18 | #' @param retain_columns a vector indicating if certain columns should be retained from the input
19 | #' data frame. Default is not retaining additional columns \code{retain_columns = NULL}. Specific
20 | #' columns can be retained by providing their names (not in quotations marks, just like other
21 | #' column names, but in a vector).
22 | #'
23 | #' @return A data frame that contains the aggregated scores per amino acid position, enabling to
24 | #' draw fingerprints for each individual protein.
25 | #'
26 | #' @author Patrick Stalder
27 | #' @import dplyr
28 | #' @import tidyr
29 | #' @export
30 | #'
31 | #' @examples
32 | #'
33 | #' data <- data.frame(
34 | #'   pg_protein_accessions = c(rep("protein_1", 10)),
35 | #'   diff = c(2, -3, 1, 2, 3, -3, 5, 1, -0.5, 2),
36 | #'   adj_pval = c(0.001, 0.01, 0.2, 0.05, 0.002, 0.5, 0.4, 0.7, 0.001, 0.02),
37 | #'   start = c(1, 3, 5, 10, 15, 25, 28, 30, 41, 51),
38 | #'   end = c(6, 8, 10, 16, 23, 35, 35, 35, 48, 55)
39 | #' )
40 | #' calculate_aa_scores(
41 | #'   data,
42 | #'   protein = pg_protein_accessions,
43 | #'   diff = diff,
44 | #'   adj_pval = adj_pval,
45 | #'   start_position = start,
46 | #'   end_position = end
47 | #' )
48 | calculate_aa_scores <- function(data,
49 |                                 protein,
50 |                                 diff = diff,
51 |                                 adj_pval = adj_pval,
52 |                                 start_position,
53 |                                 end_position,
54 |                                 retain_columns = NULL) {
55 |   output <- data %>%
56 |     dplyr::ungroup() %>%
57 |     dplyr::distinct({{ protein }}, {{ diff }}, {{ adj_pval }}, {{ start_position }}, {{ end_position }}) %>%
58 |     tidyr::drop_na({{ diff }}, {{ adj_pval }}) %>%
59 |     dplyr::mutate(score = -log10({{ adj_pval }}) * abs({{ diff }})) %>%
60 |     dplyr::rowwise() %>%
61 |     dplyr::mutate(residue = list(seq({{ start_position }}, {{ end_position }}))) %>%
62 |     tidyr::unnest("residue") %>%
63 |     dplyr::group_by({{ protein }}, .data$residue) %>%
64 |     dplyr::mutate(amino_acid_score = mean(.data$score)) %>%
65 |     dplyr::distinct({{ protein }}, .data$residue, .data$amino_acid_score)
66 | 
67 | 
68 |   if (!missing(retain_columns)) {
69 |     output <- data %>%
70 |       dplyr::select(!!enquo(retain_columns), colnames(output)[!colnames(output) %in% c(
71 |         "residue",
72 |         "amino_acid_score"
73 |       )]) %>%
74 |       dplyr::distinct() %>%
75 |       dplyr::right_join(output, by = colnames(output)[!colnames(output) %in% c(
76 |         "residue",
77 |         "amino_acid_score"
78 |       )])
79 |   }
80 | 
81 |   output
82 | }
83 | 


--------------------------------------------------------------------------------