├── .Rbuildignore
├── .coveralls.yml
├── .github
    ├── .gitignore
    ├── ISSUE_TEMPLATE
    │   └── bug_report.md
    └── workflows
    │   └── rworkflows.yml
├── .gitignore
├── .travis.yml
├── DESCRIPTION
├── NAMESPACE
├── R
    ├── attach.R
    ├── data.R
    ├── dplyr_methods.R
    ├── ggplot2_methods.R
    ├── methods.R
    ├── pillar_utilities.R
    ├── plotly_methods.R
    ├── print_method.R
    ├── tibble_methods.R
    ├── tidyr_methods.R
    ├── utilities.R
    ├── utils-pipe.R
    ├── validation.R
    └── zzz.R
├── README.Rmd
├── README.md
├── _pkgdown.yml
├── codecov.yml
├── data
    ├── pasilla.rda
    └── se.rda
├── dev
    └── TCGA_processing.R
├── inst
    └── NEWS.rd
├── man
    ├── as_tibble.Rd
    ├── bind_rows.Rd
    ├── count.Rd
    ├── distinct.Rd
    ├── extract.Rd
    ├── figures
    │   ├── lifecycle-archived.svg
    │   ├── lifecycle-defunct.svg
    │   ├── lifecycle-deprecated.svg
    │   ├── lifecycle-experimental.svg
    │   ├── lifecycle-maturing.svg
    │   ├── lifecycle-questioning.svg
    │   ├── lifecycle-stable.svg
    │   ├── lifecycle-superseded.svg
    │   └── plot1-1.png
    ├── filter.Rd
    ├── formatting.Rd
    ├── full_join.Rd
    ├── ggplot.Rd
    ├── group_by.Rd
    ├── group_split.Rd
    ├── inner_join.Rd
    ├── left_join.Rd
    ├── mutate.Rd
    ├── mutate_features.Rd
    ├── mutate_samples.Rd
    ├── nest.Rd
    ├── pasilla.Rd
    ├── pipe.Rd
    ├── pivot_longer.Rd
    ├── pivot_wider.Rd
    ├── plot_ly.Rd
    ├── pull.Rd
    ├── rename.Rd
    ├── right_join.Rd
    ├── rowwise.Rd
    ├── sample_n.Rd
    ├── se.Rd
    ├── select.Rd
    ├── separate.Rd
    ├── slice.Rd
    ├── summarise.Rd
    ├── tbl_format_header.Rd
    ├── tidy.Rd
    ├── unite.Rd
    └── unnest.Rd
├── tests
    ├── testthat.R
    └── testthat
    │   ├── test-dplyr_methods.R
    │   ├── test-felix.R
    │   ├── test-old_vocabulary.R
    │   ├── test-tidyr_methods.R
    │   └── test-utilities.R
└── vignettes
    ├── introduction.Rmd
    └── tidySummarizedExperiment.bib


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^vignettes/introduction_cache$
 4 | ^doc$
 5 | ^Meta$
 6 | ^codecov\.yml$
 7 | ^dev$
 8 | ^README_cache$
 9 | ^README_files$
10 | README.Rmd
11 | ^.git$
12 | .coveralls.yml
13 | .travis.yml
14 | ^.github$
15 | ^\.github$
16 | _pkgdown.yml
17 | ^GDCdata$


--------------------------------------------------------------------------------
/.coveralls.yml:
--------------------------------------------------------------------------------
1 | service_name: travis-pro
2 | repo_token: O4NscPehU4qrWznFtQRiyJJBIOyRgPzsB
3 | 


--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | Thanks for submitting an issue.
11 | 
12 | Please add the following information to the issue
13 | 
14 | 1. Describe the issue/bug
15 | 2. Print out the input dataset immediately before the bug occurs
16 | 3. Paste the code immediately leading to the bug
17 | 4. Print out of the output, if any
18 | 5. Print out of the complete error/warning message, if any
19 | 6. sessionInfo()
20 | 
21 | Thanks!
22 | 


--------------------------------------------------------------------------------
/.github/workflows/rworkflows.yml:
--------------------------------------------------------------------------------
 1 | name: rworkflows
 2 | 'on':
 3 |   push:
 4 |     branches:
 5 |     - master
 6 |     - main
 7 |     - devel
 8 |     - RELEASE_**
 9 |   pull_request:
10 |     branches:
11 |     - master
12 |     - main
13 |     - devel
14 |     - RELEASE_**
15 | jobs:
16 |   rworkflows:
17 |     permissions: write-all
18 |     runs-on: ${{ matrix.config.os }}
19 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
20 |     container: ${{ matrix.config.cont }}
21 |     strategy:
22 |       fail-fast: ${{ false }}
23 |       matrix:
24 |         config:
25 |         - os: ubuntu-latest
26 |           bioc: devel
27 |           r: auto
28 |           cont: ghcr.io/bioconductor/bioconductor_docker:devel
29 |           rspm: ~
30 |         - os: macOS-latest
31 |           bioc: release
32 |           r: auto
33 |           cont: ~
34 |           rspm: ~
35 |         - os: windows-latest
36 |           bioc: release
37 |           r: auto
38 |           cont: ~
39 |           rspm: ~
40 |     steps:
41 |     - uses: neurogenomics/rworkflows@master
42 |       with:
43 |         run_bioccheck: ${{ false }}
44 |         run_rcmdcheck: ${{ true }}
45 |         as_cran: ${{ true }}
46 |         run_vignettes: ${{ true }}
47 |         has_testthat: ${{ true }}
48 |         run_covr: ${{ true }}
49 |         run_pkgdown: ${{ true }}
50 |         has_runit: ${{ false }}
51 |         has_latex: ${{ false }}
52 |         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
53 |         run_docker: ${{ false }}
54 |         DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }}
55 |         runner_os: ${{ runner.os }}
56 |         cache_version: cache-v1
57 |         docker_registry: ghcr.io
58 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .Rproj.user
 2 | .Rhistory
 3 | .RData
 4 | .Ruserdata
 5 | tidySummarizedExperiment.Rproj
 6 | README_cache/*
 7 | vignettes/introduction_cache*
 8 | tidySummarizedExperiment.Rproj
 9 | Meta
10 | doc
11 | dev/*csv
12 | dev/*rds
13 | dev/*rda
14 | dev/*pdf
15 | dev/dplyr-master/*
16 | tidySummarizedExperiment.Rproj
17 | GDCdata
18 | /doc/
19 | /Meta/
20 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | # Adapted from https://github.com/hadley/testthat/blob/master/.travis.yml
 2 | # R for travis: see documentation at https://docs.travis-ci.com/user/languages/r
 3 | language: r
 4 | cache: packages
 5 | r:
 6 |  - bioc-release
 7 |  - bioc-devel
 8 | env:
 9 | - R_QPDF=true
10 | 
11 | r_github_packages:
12 |   - r-lib/covr
13 | 
14 | after_success:
15 |     - tar -C .. -xf $PKG_TARBALL
16 |     - xvfb-run Rscript -e 'covr::codecov(type=c("tests", "vignettes", "examples"))'
17 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Type: Package
 2 | Package: tidySummarizedExperiment
 3 | Title: Brings SummarizedExperiment to the Tidyverse 
 4 | Version: 1.15.1
 5 | Authors@R: c(person("Stefano", "Mangiola", email = "mangiolastefano@gmail.com",
 6 |                   role = c("aut", "cre")) )
 7 | Description: The tidySummarizedExperiment package provides a set of tools for creating and
 8 |   manipulating tidy data representations of SummarizedExperiment objects. SummarizedExperiment
 9 |   is a widely used data structure in bioinformatics for storing high-throughput genomic data, 
10 |   such as gene expression or DNA sequencing data. 
11 |   The tidySummarizedExperiment package introduces a tidy framework for working with SummarizedExperiment objects. 
12 |   It allows users to convert their data into a tidy format, where each observation is a row 
13 |   and each variable is a column. This tidy representation simplifies data manipulation, 
14 |   integration with other tidyverse packages, and enables seamless integration with the broader 
15 |   ecosystem of tidy tools for data analysis.
16 | License: GPL-3
17 | Depends:
18 |     R (>= 4.3.0),
19 |     SummarizedExperiment,
20 |     ttservice (>= 0.4.0)
21 | Imports:
22 | 	dplyr,
23 |     tibble (>= 3.0.4),
24 |     magrittr,
25 |     tidyr,
26 |     ggplot2,
27 |     rlang,
28 |     purrr,
29 |     lifecycle,
30 |     methods,
31 |     utils,
32 |     S4Vectors,
33 |     tidyselect,
34 |     ellipsis,
35 |     vctrs,
36 |     pillar,
37 |     stringr,
38 |     cli,
39 |     fansi,
40 |     stats,
41 |     pkgconfig
42 | Suggests:
43 |     BiocStyle,
44 |     testthat,
45 |     knitr,
46 |     markdown,
47 |     rmarkdown,
48 |     plotly
49 | VignetteBuilder: 
50 |     knitr
51 | RdMacros:
52 |     lifecycle
53 | Biarch: true
54 | biocViews: AssayDomain, Infrastructure, RNASeq, DifferentialExpression, GeneExpression, Normalization, Clustering, QualityControl, Sequencing, Transcription, Transcriptomics
55 | Encoding: UTF-8
56 | LazyData: true
57 | RoxygenNote: 7.3.1
58 | Roxygen: list(markdown = TRUE)
59 | LazyDataCompression: xz
60 | URL: https://github.com/stemangiola/tidySummarizedExperiment
61 | BugReports: https://github.com/stemangiola/tidySummarizedExperiment/issues
62 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
  1 | # Generated by roxygen2: do not edit by hand
  2 | 
  3 | S3method(as_tibble,SummarizedExperiment)
  4 | S3method(bind_cols,RangedSummarizedExperiment)
  5 | S3method(bind_cols,SummarizedExperiment)
  6 | S3method(bind_rows,SummarizedExperiment)
  7 | S3method(count,SummarizedExperiment)
  8 | S3method(distinct,SummarizedExperiment)
  9 | S3method(extract,SummarizedExperiment)
 10 | S3method(filter,SummarizedExperiment)
 11 | S3method(full_join,SummarizedExperiment)
 12 | S3method(ggplot,SummarizedExperiment)
 13 | S3method(group_by,SummarizedExperiment)
 14 | S3method(group_split,SummarizedExperiment)
 15 | S3method(inner_join,SummarizedExperiment)
 16 | S3method(left_join,SummarizedExperiment)
 17 | S3method(mutate,SummarizedExperiment)
 18 | S3method(nest,SummarizedExperiment)
 19 | S3method(pivot_longer,SummarizedExperiment)
 20 | S3method(pivot_wider,SummarizedExperiment)
 21 | S3method(plot_ly,SummarizedExperiment)
 22 | S3method(plot_ly,tbl_df)
 23 | S3method(print,SummarizedExperiment)
 24 | S3method(pull,SummarizedExperiment)
 25 | S3method(rename,SummarizedExperiment)
 26 | S3method(right_join,SummarizedExperiment)
 27 | S3method(rowwise,SummarizedExperiment)
 28 | S3method(sample_frac,SummarizedExperiment)
 29 | S3method(sample_n,SummarizedExperiment)
 30 | S3method(select,SummarizedExperiment)
 31 | S3method(separate,SummarizedExperiment)
 32 | S3method(slice,SummarizedExperiment)
 33 | S3method(summarise,SummarizedExperiment)
 34 | S3method(summarize,SummarizedExperiment)
 35 | S3method(tbl_format_header,tidySummarizedExperiment)
 36 | S3method(tidy,RangedSummarizedExperiment)
 37 | S3method(tidy,SummarizedExperiment)
 38 | S3method(unite,SummarizedExperiment)
 39 | S3method(unnest,tidySummarizedExperiment_nested)
 40 | export("%>%")
 41 | export(mutate_features)
 42 | export(mutate_samples)
 43 | export(tidy)
 44 | export(unnest_summarized_experiment)
 45 | importFrom(S4Vectors,"metadata<-")
 46 | importFrom(S4Vectors,DataFrame)
 47 | importFrom(S4Vectors,SimpleList)
 48 | importFrom(S4Vectors,head)
 49 | importFrom(S4Vectors,metadata)
 50 | importFrom(SummarizedExperiment,"assays<-")
 51 | importFrom(SummarizedExperiment,"colData<-")
 52 | importFrom(SummarizedExperiment,"elementMetadata<-")
 53 | importFrom(SummarizedExperiment,"rowData<-")
 54 | importFrom(SummarizedExperiment,assay)
 55 | importFrom(SummarizedExperiment,assayNames)
 56 | importFrom(SummarizedExperiment,assays)
 57 | importFrom(SummarizedExperiment,cbind)
 58 | importFrom(SummarizedExperiment,colData)
 59 | importFrom(SummarizedExperiment,elementMetadata)
 60 | importFrom(SummarizedExperiment,rbind)
 61 | importFrom(SummarizedExperiment,rowData)
 62 | importFrom(SummarizedExperiment,rowRanges)
 63 | importFrom(dplyr,count)
 64 | importFrom(dplyr,distinct)
 65 | importFrom(dplyr,distinct_at)
 66 | importFrom(dplyr,filter)
 67 | importFrom(dplyr,full_join)
 68 | importFrom(dplyr,group_by)
 69 | importFrom(dplyr,group_by_drop_default)
 70 | importFrom(dplyr,group_split)
 71 | importFrom(dplyr,inner_join)
 72 | importFrom(dplyr,left_join)
 73 | importFrom(dplyr,mutate)
 74 | importFrom(dplyr,n)
 75 | importFrom(dplyr,pull)
 76 | importFrom(dplyr,rename)
 77 | importFrom(dplyr,right_join)
 78 | importFrom(dplyr,rowwise)
 79 | importFrom(dplyr,sample_frac)
 80 | importFrom(dplyr,sample_n)
 81 | importFrom(dplyr,select)
 82 | importFrom(dplyr,select_if)
 83 | importFrom(dplyr,slice)
 84 | importFrom(dplyr,summarise)
 85 | importFrom(dplyr,summarize)
 86 | importFrom(dplyr,vars)
 87 | importFrom(ellipsis,check_dots_used)
 88 | importFrom(fansi,strwrap_ctl)
 89 | importFrom(ggplot2,aes)
 90 | importFrom(ggplot2,ggplot)
 91 | importFrom(lifecycle,deprecate_warn)
 92 | importFrom(magrittr,"%$%")
 93 | importFrom(magrittr,"%>%")
 94 | importFrom(magrittr,equals)
 95 | importFrom(magrittr,set_rownames)
 96 | importFrom(methods,.hasSlot)
 97 | importFrom(methods,as)
 98 | importFrom(methods,getMethod)
 99 | importFrom(methods,is)
100 | importFrom(pillar,align)
101 | importFrom(pillar,get_extent)
102 | importFrom(pillar,style_subtle)
103 | importFrom(pillar,tbl_format_header)
104 | importFrom(pkgconfig,get_config)
105 | importFrom(purrr,imap)
106 | importFrom(purrr,map)
107 | importFrom(purrr,map2)
108 | importFrom(purrr,map_chr)
109 | importFrom(purrr,map_int)
110 | importFrom(purrr,map_lgl)
111 | importFrom(purrr,pmap)
112 | importFrom(purrr,reduce)
113 | importFrom(purrr,when)
114 | importFrom(rlang,":=")
115 | importFrom(rlang,.data)
116 | importFrom(rlang,dots_values)
117 | importFrom(rlang,enquo)
118 | importFrom(rlang,enquos)
119 | importFrom(rlang,expr)
120 | importFrom(rlang,flatten_if)
121 | importFrom(rlang,is_spliced)
122 | importFrom(rlang,names2)
123 | importFrom(rlang,quo_is_null)
124 | importFrom(rlang,quo_name)
125 | importFrom(rlang,quo_squash)
126 | importFrom(stats,setNames)
127 | importFrom(stringr,regex)
128 | importFrom(stringr,str_detect)
129 | importFrom(stringr,str_replace)
130 | importFrom(tibble,as_tibble)
131 | importFrom(tibble,enframe)
132 | importFrom(tibble,rowid_to_column)
133 | importFrom(tibble,tibble)
134 | importFrom(tidyr,extract)
135 | importFrom(tidyr,gather)
136 | importFrom(tidyr,nest)
137 | importFrom(tidyr,pivot_longer)
138 | importFrom(tidyr,pivot_wider)
139 | importFrom(tidyr,separate)
140 | importFrom(tidyr,spread)
141 | importFrom(tidyr,unite)
142 | importFrom(tidyr,unnest)
143 | importFrom(tidyselect,eval_select)
144 | importFrom(tidyselect,one_of)
145 | importFrom(ttservice,bind_cols)
146 | importFrom(ttservice,bind_rows)
147 | importFrom(ttservice,plot_ly)
148 | importFrom(utils,packageDescription)
149 | importFrom(utils,tail)
150 | importFrom(vctrs,new_data_frame)
151 | 


--------------------------------------------------------------------------------
/R/attach.R:
--------------------------------------------------------------------------------
 1 | core <- c("dplyr", "tidyr", "ttservice", "ggplot2")
 2 | 
 3 | core_unloaded <- function() {
 4 |     search <- paste0("package:", core)
 5 |     core[!search %in% search()]
 6 | }
 7 | 
 8 | # Attach the package from the same library it was loaded from before.
 9 | # [source: https://github.com/tidy-biology/tidyverse/issues/171]
10 | same_library <- function(pkg) {
11 |     loc <- if (pkg %in% loadedNamespaces()) 
12 |         dirname(getNamespaceInfo(pkg, "path"))
13 |     library(pkg, lib.loc=loc, character.only=TRUE, warn.conflicts=FALSE)
14 | }
15 | 
16 | tidyverse_attach <- function() {
17 |     to_load <- core_unloaded()
18 |     
19 |     suppressPackageStartupMessages(
20 |         lapply(to_load, same_library))
21 |     
22 |     invisible(to_load)
23 | }
24 | 


--------------------------------------------------------------------------------
/R/data.R:
--------------------------------------------------------------------------------
 1 | #' Read counts of RNA-seq samples of Pasilla knock-down by Brooks et al.
 2 | #'
 3 | #' A SummarizedExperiment dataset containing 
 4 | #' the transcriptome information for Drosophila Melanogaster.
 5 | #'
 6 | #' @format  containing 14599 features and 7 biological replicates.
 7 | #'
 8 | #' @source \url{https://bioconductor.org/packages/release/data/experiment/html/pasilla.html}
 9 | #' @usage data(pasilla)
10 | "pasilla"
11 | 
12 | #' Read counts of RNA-seq samples derived from 
13 | #' Pasilla knock-down by Brooks et al.
14 | #'
15 | #' A SummarizedExperiment dataset containing 
16 | #' the transcriptome information for Drosophila Melanogaster.
17 | #'
18 | #' @format  containing 14599 features and 7 biological replicates.
19 | #'
20 | #' @source \url{https://bioconductor.org/packages/release/data/experiment/html/pasilla.html}
21 | #' @usage data(se)
22 | "se"


--------------------------------------------------------------------------------
/R/ggplot2_methods.R:
--------------------------------------------------------------------------------
 1 | #' @name ggplot
 2 | #' @rdname ggplot
 3 | #' @inherit ggplot2::ggplot
 4 | #' @title Create a new \code{ggplot} from a \code{tidyseurat}
 5 | #' @return `ggplot`
 6 | #'
 7 | #' @examples
 8 | #' library(ggplot2)
 9 | #' data(pasilla)
10 | #' pasilla %>%
11 | #'     ggplot(aes(.sample, counts)) +
12 | #'     geom_boxplot()
13 | #' 
14 | #' @importFrom purrr map
15 | #' @importFrom rlang quo_name
16 | #' @importFrom ggplot2 aes ggplot
17 | #' @export
18 | ggplot.SummarizedExperiment <- function(data=NULL, mapping=aes(),
19 |     ..., environment=parent.frame()) {
20 | 
21 |     # Deprecation of special column names
22 |     .cols <- enquos(..., .ignore_empty="all") %>% 
23 |         map(~ quo_name(.x)) %>% unlist()
24 |     if (is_sample_feature_deprecated_used(data, .cols)) {
25 |         data <- ping_old_special_column_into_metadata(data)
26 |     }
27 |   
28 |     data %>%
29 |         as_tibble() %>%
30 |         ggplot2::ggplot(mapping=mapping)
31 | }
32 | 


--------------------------------------------------------------------------------
/R/methods.R:
--------------------------------------------------------------------------------
 1 | #' @importFrom methods getMethod
 2 | setMethod(
 3 |     f="show",
 4 |     signature="SummarizedExperiment",
 5 |     definition=function(object) {
 6 |         if (isTRUE(x=getOption(x="restore_SummarizedExperiment_show",
 7 |             default = FALSE)) |         
 8 |             # If the object is a SingleCellExperiment
 9 |             # # From BioC 3_14 SingleCellExperiment is SummarizedExperiment and 
10 |             # # we don't want to process with tidySummarizedExperiment
11 |             is(object, "SingleCellExperiment")
12 |         ) {
13 |             f <- getMethod(
14 |                 f="show",
15 |                 signature="SummarizedExperiment",
16 |                 where=asNamespace(ns="SummarizedExperiment")
17 |             )
18 |             f(object=object)
19 |         } else {
20 |             object %>%
21 |                 print()
22 |         }
23 |     }
24 | )
25 | 
26 | setClass("tidySummarizedExperiment",
27 |     contains=c("SummarizedExperiment", "RangedSummarizedExperiment"))
28 | 
29 | #' @name tidy
30 | #' @rdname tidy
31 | #' @title tidy for `Seurat`
32 | #'
33 | #' @param object A `Seurat` object.
34 | #' @return A `tidyseurat` object.
35 | #'
36 | #' @examples
37 | #' data(pasilla)
38 | #' pasilla %>% tidy()
39 | #'
40 | #' @export
41 | tidy <- function(object) {
42 |     UseMethod("tidy", object)
43 | }
44 | 
45 | #' @importFrom lifecycle deprecate_warn
46 | tidy_ <- function(object) {
47 |     
48 |     # DEPRECATE
49 |     deprecate_warn(
50 |         when = "1.1.1",
51 |         what = "tidy()",
52 |         details = "tidySummarizedExperiment says: tidy() is not needed anymore."
53 |     )
54 |     
55 |     object
56 | }
57 | 
58 | #' @importFrom methods as
59 | #' @rdname tidy
60 | #' @param object A SummarizedExperiment object
61 | #' @export
62 | tidy.SummarizedExperiment <- tidy_
63 | 
64 | #' @importFrom methods as
65 | #' @rdname tidy
66 | #' @param object A SummarizedExperiment object
67 | #' @export
68 | tidy.RangedSummarizedExperiment <- tidy_


--------------------------------------------------------------------------------
/R/pillar_utilities.R:
--------------------------------------------------------------------------------
 1 | NBSP <- "\U00A0"
 2 | 
 3 | pillar___format_comment <- function (x, width)
 4 | {
 5 |     if (length(x) == 0L) {
 6 |         return(character())
 7 |     }
 8 |     map_chr(x, pillar___wrap, prefix="# ",
 9 |         width=min(width, cli::console_width()))
10 | }
11 | 
12 | #' @importFrom fansi strwrap_ctl
13 | pillar___strwrap2 <- function (x, width, indent)
14 | {
15 |     fansi::strwrap_ctl(x, width=max(width, 0), indent=indent,
16 |         exdent=indent + 2)
17 | }
18 | 
19 | 
20 | pillar___wrap <- function (..., indent=0, prefix="", width)
21 | {
22 |     x <- paste0(..., collapse="")
23 |     wrapped <- pillar___strwrap2(x, width - get_extent(prefix), indent)
24 |     wrapped <- paste0(prefix, wrapped)
25 |     wrapped <- gsub(NBSP, " ", wrapped)
26 |     paste0(wrapped, collapse="\n")
27 | }
28 | 


--------------------------------------------------------------------------------
/R/plotly_methods.R:
--------------------------------------------------------------------------------
 1 | #' @name plot_ly
 2 | #' @rdname plot_ly
 3 | #' @inherit ttservice::plot_ly
 4 | #' @return `plotly`
 5 | #' 
 6 | #' @examples
 7 | #' data(se)
 8 | #' se |>
 9 | #'     plot_ly(x = ~counts)
10 | #' 
11 | #' @importFrom ttservice plot_ly
12 | #' @export
13 | plot_ly.tbl_df <- function(data=data.frame(), ..., type=NULL, name=NULL,
14 |     color=NULL, colors=NULL, alpha=NULL,
15 |     stroke=NULL, strokes=NULL, alpha_stroke=1,
16 |     size=NULL, sizes=c(10, 100),
17 |     span=NULL, spans=c(1, 20),
18 |     symbol=NULL, symbols=NULL,
19 |     linetype=NULL, linetypes=NULL,
20 |     split=NULL, frame=NULL,
21 |     width=NULL, height=NULL, source="A") {
22 |     data |>
23 | 
24 |         # This is a trick to not loop the call
25 |         drop_class("tbl_df") |>
26 |         plotly::plot_ly(...,
27 |             type=type, name=name,
28 |             color=color, colors=colors, alpha=alpha,
29 |             stroke=stroke, strokes=strokes, alpha_stroke=alpha_stroke,
30 |             size=size, sizes=sizes,
31 |             span=span, spans=spans,
32 |             symbol=symbol, symbols=symbols,
33 |             linetype=linetype, linetypes=linetypes,
34 |             split=split, frame=frame,
35 |             width=width, height=height, source=source
36 |         )
37 | }
38 | 
39 | #' @name plot_ly
40 | #' @rdname plot_ly
41 | #' @inherit ttservice::plot_ly
42 | #' @return `plotly`
43 | #' 
44 | #' @examples
45 | #' data(se)
46 | #' se |>
47 | #'     plot_ly(x = ~counts)
48 | #' 
49 | #' @importFrom ttservice plot_ly
50 | #' @export
51 | plot_ly.SummarizedExperiment <- function(data=data.frame(),
52 |     ..., type=NULL, name=NULL,
53 |     color=NULL, colors=NULL, alpha=NULL,
54 |     stroke=NULL, strokes=NULL, alpha_stroke=1,
55 |     size=NULL, sizes=c(10, 100),
56 |     span=NULL, spans=c(1, 20),
57 |     symbol=NULL, symbols=NULL,
58 |     linetype=NULL, linetypes=NULL,
59 |     split=NULL, frame=NULL,
60 |     width=NULL, height=NULL, source="A") {
61 |     data |>
62 | 
63 |         # This is a trick to not loop the call
64 |         as_tibble() |>
65 |         plotly::plot_ly(...,
66 |             type=type, name=name,
67 |             color=color, colors=colors, alpha=alpha,
68 |             stroke=stroke, strokes=strokes, alpha_stroke=alpha_stroke,
69 |             size=size, sizes=sizes,
70 |             span=span, spans=spans,
71 |             symbol=symbol, symbols=symbols,
72 |             linetype=linetype, linetypes=linetypes,
73 |             split=split, frame=frame,
74 |             width=width, height=height, source=source
75 |         )
76 | }
77 | 


--------------------------------------------------------------------------------
/R/print_method.R:
--------------------------------------------------------------------------------
  1 | # This file is a replacement of the unexported functions in the tibble
  2 | # package, in order to specify "tibble abstraction in the header"
  3 | 
  4 | #' @name tbl_format_header
  5 | #' @rdname tbl_format_header
  6 | #' @inherit pillar::tbl_format_header
  7 | #' 
  8 | #' @examples
  9 | #' # TODO
 10 | #' 
 11 | #' @importFrom rlang names2
 12 | #' @importFrom pillar align
 13 | #' @importFrom pillar get_extent
 14 | #' @importFrom pillar style_subtle
 15 | #' @importFrom pillar tbl_format_header
 16 | #' @importFrom cli col_br_black
 17 | #' @export
 18 | tbl_format_header.tidySummarizedExperiment <- function(x, setup, ...) {
 19 |   
 20 |     number_of_features <- x |> attr("number_of_features")
 21 |     number_of_samples <- x |> attr("number_of_samples")
 22 |     named_header <- x |> attr("named_header")
 23 |     assay_names <- x |> attr("assay_names")
 24 | 
 25 |   
 26 |     if (all(names2(named_header) == "")) {
 27 |         header <- named_header
 28 |     } else {
 29 |         header <-
 30 |             paste0(
 31 |                 align(paste0(names2(named_header), ":"), space=NBSP),
 32 |                 " ",
 33 |                 named_header
 34 |             ) %>%
 35 |             # Add further info single-cell
 36 |             append( cli::col_br_black( sprintf(
 37 |                 " Features=%s | Samples=%s | Assays=%s",
 38 |                 number_of_features,
 39 |                 number_of_samples,
 40 |                 assay_names %>% paste(collapse=", ")
 41 |             )), after = 1)
 42 |     }
 43 |     style_subtle(pillar___format_comment(header, width=setup$width))
 44 | }
 45 | 
 46 | #' @name formatting
 47 | #' @rdname formatting
 48 | #' @aliases print
 49 | #' @inherit tibble::formatting
 50 | #' @return Prints a message to the console describing
 51 | #'   the contents of the `tidySummarizedExperiment`.
 52 | #' 
 53 | #' @param n_extra Number of extra columns to print abbreviated information for,
 54 | #'   if the width is too small for the entire tibble. If `NULL`, the default,
 55 | #'   will print information about at most `tibble.max_extra_cols` extra columns.
 56 | #' 
 57 | #' @examples
 58 | #' data(pasilla)
 59 | #' print(pasilla)
 60 | #' 
 61 | #' @importFrom vctrs new_data_frame
 62 | #' @importFrom SummarizedExperiment assayNames
 63 | #' @importFrom stats setNames
 64 | #' @export
 65 | print.SummarizedExperiment <- function(x, ..., n=NULL,
 66 |     width=NULL, n_extra=NULL) {
 67 | 
 68 | 
 69 |   # Fix NOTEs
 70 |   . <- NULL
 71 |   
 72 | 
 73 |   # Stop if any column or row names are duplicated
 74 |   if (check_if_any_dimnames_duplicated(x, dim = "cols")) {
 75 |       stop("tidySummarizedExperiment says: some column names are duplicated")
 76 |   }
 77 |   if (check_if_any_dimnames_duplicated(x, dim = "rows")) {
 78 |       stop("tidySummarizedExperiment says: some row names are duplicated")
 79 |   }
 80 | 
 81 |   # Stop if column names of assays do not overlap
 82 |   if (check_if_assays_are_NOT_overlapped(x, dim = "cols")) { 
 83 |       stop( 
 84 |           "tidySummarizedExperiment says: the assays in your SummarizedExperiment have column names, 
 85 | but they do not completely overlap." 
 86 |       )
 87 |   }
 88 |   if (check_if_assays_are_NOT_overlapped(x, dim = "rows")) { 
 89 |       stop( 
 90 |           "tidySummarizedExperiment says: the assays in your SummarizedExperiment have row names, 
 91 | but they do not completely overlap." 
 92 |       )
 93 |   }
 94 |   
 95 |     # reorder assay colnames before printing
 96 |     # Rearrange if assays has colnames and rownames
 97 |     x <- order_assays_internally_to_be_consistent(x)
 98 |     
 99 |     my_tibble <-
100 |         x |>
101 |     
102 |     # If I have more than 30 genes select first sample
103 |     when(
104 |       nrow(.) > 30 ~.[1:min(50, nrow(x)), min(1, ncol(x)), drop=FALSE] ,
105 |       ncol(.) == 0 ~ .,
106 |       ~ .[, 1:min(20, ncol(x)), drop=FALSE]
107 |     ) %>%
108 |     
109 |         as_tibble() 
110 |   
111 |     my_tibble |>
112 |         new_data_frame(class=c("tidySummarizedExperiment", "tbl")) %>%
113 |         add_attr(nrow(x),  "number_of_features") %>%
114 |         add_attr(ncol(x),  "number_of_samples") %>%
115 |         add_attr(assays(x) %>% names , "assay_names") %>%
116 |     
117 |     # Set fake dimensions for efficiancy
118 |     add_attr(
119 |         sprintf(
120 |             "%s %s %s", 
121 |             x %>% dim %>% {(.)[1] * (.)[2]} %>%
122 |                 format(format="f", big.mark=",", digits=1),
123 |             cli::symbol$times,
124 |             ncol(my_tibble)
125 |         ) %>%
126 |         setNames("A SummarizedExperiment-tibble abstraction"), 
127 |         "named_header"
128 |     ) %>%
129 |     print()
130 |     invisible(x) 
131 | }
132 | 


--------------------------------------------------------------------------------
/R/tibble_methods.R:
--------------------------------------------------------------------------------
 1 | #' @name as_tibble
 2 | #' @rdname as_tibble
 3 | #' @inherit tibble::as_tibble
 4 | #' @return `tibble`
 5 | #' 
 6 | #' @examples
 7 | #' tidySummarizedExperiment::pasilla %>%
 8 | #'     as_tibble()
 9 | #'     
10 | #' tidySummarizedExperiment::pasilla %>%
11 | #'     as_tibble(.subset=-c(condition, type))
12 | #' 
13 | #' @importFrom purrr reduce
14 | #' @importFrom purrr map
15 | #' @importFrom tidyr spread
16 | #' @importFrom tibble enframe
17 | #' @importFrom SummarizedExperiment colData
18 | #' @importFrom pkgconfig get_config
19 | #' @export
20 | as_tibble.SummarizedExperiment <- function(x, ...,
21 |     .name_repair=c("check_unique", "unique", "universal", "minimal"),
22 |     rownames=pkgconfig::get_config("tibble::rownames", NULL)) {
23 | 
24 |     .as_tibble_optimised(x = x, ...,
25 |         .name_repair=.name_repair, rownames=rownames)
26 | 
27 | }
28 | 
29 | .as_tibble_optimised <- function(x, skip_GRanges=FALSE, .subset=NULL,
30 |     .name_repair=c("check_unique", "unique", "universal", "minimal"),
31 |     rownames=pkgconfig::get_config("tibble::rownames", NULL)) {
32 |   
33 |     .subset <- enquo(.subset)
34 |   
35 |     sample_info <-
36 |         colData(x) %>%
37 |     
38 |         # If reserved column names are present add .x
39 |         change_reserved_column_names(x) %>% 
40 | 
41 |         # Convert to tibble
42 |         tibble::as_tibble(rownames=s_(x)$name) %>% 
43 |         setNames(c(s_(x)$name, colnames(colData(x))))
44 |   
45 |     range_info <-
46 |         skip_GRanges %>%
47 |         when(
48 |             (.) ~ tibble() %>% list,
49 |             ~  get_special_datasets(x) 
50 |         ) %>%
51 |         reduce(left_join, by="coordinate") 
52 |     
53 |     gene_info <-
54 |         rowData(x) %>% 
55 |     
56 |         # If reserved column names are present add .x
57 |         change_reserved_column_names(x)%>% 
58 |   
59 |         # Convert to tibble
60 |         tibble::as_tibble(rownames=f_(x)$name) %>% 
61 |         setNames(c(f_(x)$name, colnames(rowData(x))))
62 |   
63 |     count_info <- get_count_datasets(x)
64 |   
65 |     # Return 
66 |     if (quo_is_null(.subset))
67 |     
68 |         # If I want to return all columns
69 |         count_info %>%
70 |             full_join(sample_info, by=s_(x)$name) %>%
71 |             full_join(gene_info, by=f_(x)$name) %>%
72 |             when(nrow(range_info) > 0 ~ 
73 |                 (.) %>% left_join(range_info) %>% suppressMessages(),
74 |                 ~ (.)) 
75 |     
76 |     # This function outputs a tibble after subsetting the columns
77 |     else subset_tibble_output(x, count_info, sample_info,
78 |         gene_info, range_info, !!.subset)
79 | }
80 | 


--------------------------------------------------------------------------------
/R/utils-pipe.R:
--------------------------------------------------------------------------------
 1 | #' Pipe operator
 2 | #'
 3 | #' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
 4 | #'
 5 | #' @name %>%
 6 | #' @rdname pipe
 7 | #' @keywords internal
 8 | #' @export
 9 | #' @importFrom magrittr %>%
10 | #' @usage lhs \%>\% rhs
11 | #' @param lhs A value or the magrittr placeholder.
12 | #' @param rhs A function call using the magrittr semantics.
13 | #' @return The result of calling `rhs(lhs)`.
14 | #' 
15 | #' @examples
16 | #' 
17 | #' library(magrittr)
18 | #' 1 %>% sum(2)
19 | NULL
20 | 


--------------------------------------------------------------------------------
/R/validation.R:
--------------------------------------------------------------------------------
 1 | #' @importFrom magrittr equals
 2 | #' @importFrom dplyr n
 3 | is_rectangular <- function(.data, se) {
 4 |     is_rectangular_sample <-
 5 |         .data %>%
 6 |         count(!!s_(se)$symbol ) %>%
 7 |         count(n, name="nn") %>%
 8 |         nrow() %>%
 9 |         st(2)
10 | 
11 |     is_rectangular_transcript <-
12 |         .data %>%
13 |         count(!!f_(se)$symbol) %>%
14 |         count(n, name="nn") %>%
15 |         nrow() %>%
16 |         st(2)
17 | 
18 |     is_rectangular_sample & is_rectangular_transcript
19 | }
20 | 
21 | is_not_duplicated <- function(.data, se) {
22 |     .data %>%
23 |         count(!!s_(se)$symbol , !!f_(se)$symbol) %>%
24 |         filter(n > 1) %>%
25 |         nrow() %>%
26 |         equals(0)
27 | }
28 | 


--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
1 | #' @importFrom utils packageDescription
2 | .onAttach <- function(libname, pkgname) {
3 |     attached <- tidyverse_attach()
4 | }


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "tidySummarizedExperiment - part of tidytranscriptomics"
  3 | output: github_document
  4 | always_allow_html: true
  5 | ---
  6 | 
  7 | <!-- badges: start -->
  8 | [![Lifecycle:maturing](https://img.shields.io/badge/lifecycle-maturing-blue.svg)](https://www.tidyverse.org/lifecycle/#maturing) [![R build status](https://github.com/stemangiola/tidySummarizedExperiment/workflows/R-CMD-check-bioc/badge.svg)](https://github.com/stemangiola/tidySummarizedExperiment/actions)
  9 | <!-- badges: end -->
 10 | 
 11 | ```{r echo=FALSE}
 12 | knitr::opts_chunk$set(fig.path="man/figures/")
 13 | ```
 14 | **Brings SummarizedExperiment to the tidyverse!**
 15 | 
 16 | website: [stemangiola.github.io/tidySummarizedExperiment/](https://stemangiola.github.io/tidySummarizedExperiment/)
 17 | 
 18 | Another [nice introduction](https://carpentries-incubator.github.io/bioc-intro/60-next-steps/index.html) by carpentries-incubator.
 19 | 
 20 | Please also have a look at
 21 | 
 22 | - [tidySingleCellExperiment](https://stemangiola.github.io/tidySingleCellExperiment/) for tidy manipulation of SingleCellExperiment objects
 23 | - [tidyseurat](https://stemangiola.github.io/tidyseurat/) for tidy manipulation of Seurat objects
 24 | - [tidybulk](https://stemangiola.github.io/tidybulk/) for tidy analysis of RNA sequencing data
 25 | - [nanny](https://github.com/stemangiola/nanny) for tidy high-level data analysis and manipulation
 26 | - [tidygate](https://github.com/stemangiola/tidygate) for adding custom gate information to your tibble
 27 | - [tidyHeatmap](https://stemangiola.github.io/tidyHeatmap/) for heatmaps produced with tidy principles
 28 | 
 29 | 
 30 | ```{r, echo=FALSE, include=FALSE}
 31 | library(knitr)
 32 | knitr::opts_chunk$set(warning=FALSE, message=FALSE)
 33 | ```
 34 | 
 35 | # Introduction
 36 | 
 37 | tidySummarizedExperiment provides a bridge between Bioconductor [SummarizedExperiment](https://bioconductor.org/packages/release/bioc/html/SummarizedExperiment.html) [@morgan2020summarized] and the tidyverse [@wickham2019welcome]. It creates an invisible layer that enables viewing the
 38 | Bioconductor *SummarizedExperiment* object as a tidyverse tibble, and provides SummarizedExperiment-compatible *dplyr*, *tidyr*, *ggplot* and *plotly* functions. This allows users to get the best of both Bioconductor and tidyverse worlds.
 39 | 
 40 | 
 41 | ## Functions/utilities available
 42 | 
 43 | SummarizedExperiment-compatible Functions | Description
 44 | ------------ | -------------
 45 | `all` | After all `tidySummarizedExperiment` is a SummarizedExperiment object, just better
 46 | 
 47 | tidyverse Packages | Description
 48 | ------------ | -------------
 49 | `dplyr` | Almost all `dplyr` APIs like for any tibble
 50 | `tidyr` | Almost all `tidyr` APIs like for any tibble
 51 | `ggplot2` | `ggplot` like for any tibble
 52 | `plotly` | `plot_ly` like for any tibble
 53 | 
 54 | Utilities | Description
 55 | ------------ | -------------
 56 | `as_tibble` | Convert cell-wise information to a `tbl_df`
 57 | 
 58 | ## Installation
 59 | 
 60 | ```{r, eval=FALSE}
 61 | if (!requireNamespace("BiocManager", quietly=TRUE)) {
 62 |       install.packages("BiocManager")
 63 |   }
 64 | 
 65 | BiocManager::install("tidySummarizedExperiment")
 66 | ```
 67 | 
 68 | From Github (development)
 69 | ```{r, eval=FALSE}
 70 | devtools::install_github("stemangiola/tidySummarizedExperiment")
 71 | ```
 72 | 
 73 | Load libraries used in the examples.
 74 | 
 75 | ```{r}
 76 | library(ggplot2)
 77 | library(tidySummarizedExperiment)
 78 | ```
 79 | 
 80 | 
 81 | # Create `tidySummarizedExperiment`, the best of both worlds!
 82 | 
 83 | This is a SummarizedExperiment object but it is evaluated as a tibble. So it is fully compatible both with SummarizedExperiment and tidyverse APIs.
 84 | 
 85 | ```{r}
 86 | pasilla_tidy <- tidySummarizedExperiment::pasilla 
 87 | ```
 88 | 
 89 | **It looks like a tibble**
 90 | 
 91 | ```{r}
 92 | pasilla_tidy
 93 | ```
 94 | 
 95 | **But it is a SummarizedExperiment object after all**
 96 | 
 97 | ```{r}
 98 | assays(pasilla_tidy)
 99 | ```
100 | 
101 | 
102 | # Tidyverse commands
103 | 
104 | We can use tidyverse commands to explore the tidy SummarizedExperiment object.
105 | 
106 | We can use `slice` to choose rows by position, for example to choose the first row.
107 | 
108 | ```{r}
109 | pasilla_tidy %>%
110 |     slice(1)
111 | ```
112 | 
113 | We can use `filter` to choose rows by criteria.
114 | 
115 | ```{r}
116 | pasilla_tidy %>%
117 |     filter(condition == "untreated")
118 | ```
119 | 
120 | We can use `select` to choose columns.
121 | 
122 | ```{r}
123 | pasilla_tidy %>%
124 |     select(.sample)
125 | ```
126 | 
127 | We can use `count` to count how many rows we have for each sample.
128 | 
129 | ```{r}
130 | pasilla_tidy %>%
131 |     count(.sample)
132 | ```
133 | 
134 | We can use `distinct` to see what distinct sample information we have.
135 | 
136 | ```{r}
137 | pasilla_tidy %>%
138 |     distinct(.sample, condition, type)
139 | ```
140 | 
141 | We could use `rename` to rename a column. For example, to modify the type column name.
142 | 
143 | ```{r}
144 | pasilla_tidy %>%
145 |     rename(sequencing=type)
146 | ```
147 | 
148 | We could use `mutate` to create a column. For example, we could create a new type column that contains single
149 | and paired instead of single_end and paired_end.
150 | 
151 | ```{r}
152 | pasilla_tidy %>%
153 |     mutate(type=gsub("_end", "", type))
154 | ```
155 | 
156 | We could use `unite` to combine multiple columns into a single column.
157 | 
158 | ```{r}
159 | pasilla_tidy %>%
160 |     unite("group", c(condition, type))
161 | ```
162 | 
163 | We can also combine commands with the tidyverse pipe `%>%`.
164 | 
165 | For example, we could combine `group_by` and `summarise` to get the total counts for each sample.
166 | 
167 | ```{r}
168 | pasilla_tidy %>%
169 |     group_by(.sample) %>%
170 |     summarise(total_counts=sum(counts))
171 | ```
172 | 
173 | We could combine `group_by`, `mutate` and `filter` to get the transcripts with mean count > 0.
174 | 
175 | ```{r}
176 | pasilla_tidy %>%
177 |     group_by(.feature) %>%
178 |     mutate(mean_count=mean(counts)) %>%
179 |     filter(mean_count > 0)
180 | ```
181 | 
182 | 
183 | # Plotting
184 | 
185 | ```{r}
186 | my_theme <-
187 |     list(
188 |         scale_fill_brewer(palette="Set1"),
189 |         scale_color_brewer(palette="Set1"),
190 |         theme_bw() +
191 |             theme(
192 |                 panel.border=element_blank(),
193 |                 axis.line=element_line(),
194 |                 panel.grid.major=element_line(size=0.2),
195 |                 panel.grid.minor=element_line(size=0.1),
196 |                 text=element_text(size=12),
197 |                 legend.position="bottom",
198 |                 aspect.ratio=1,
199 |                 strip.background=element_blank(),
200 |                 axis.title.x=element_text(margin=margin(t=10, r=10, b=10, l=10)),
201 |                 axis.title.y=element_text(margin=margin(t=10, r=10, b=10, l=10))
202 |             )
203 |     )
204 | ```
205 | 
206 | We can treat `pasilla_tidy` as a normal tibble for plotting.
207 | 
208 | Here we plot the distribution of counts per sample.
209 | 
210 | ```{r plot1}
211 | pasilla_tidy %>%
212 |     tidySummarizedExperiment::ggplot(aes(counts + 1, group=.sample, color=`type`)) +
213 |     geom_density() +
214 |     scale_x_log10() +
215 |     my_theme
216 | ```
217 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | tidySummarizedExperiment - part of tidytranscriptomics
  2 | ================
  3 | 
  4 | <!-- badges: start -->
  5 | 
  6 | [![Lifecycle:maturing](https://img.shields.io/badge/lifecycle-maturing-blue.svg)](https://www.tidyverse.org/lifecycle/#maturing)
  7 | [![R build
  8 | status](https://github.com/stemangiola/tidySummarizedExperiment/workflows/R-CMD-check-bioc/badge.svg)](https://github.com/stemangiola/tidySummarizedExperiment/actions)
  9 | <!-- badges: end -->
 10 | 
 11 | **Brings SummarizedExperiment to the tidyverse!** 
 12 | 
 13 | website:
 14 | [stemangiola.github.io/tidySummarizedExperiment/](https://stemangiola.github.io/tidySummarizedExperiment/)
 15 | 
 16 | Another [nice introduction](https://carpentries-incubator.github.io/bioc-intro/60-next-steps/index.html) by carpentries-incubator.
 17 | 
 18 | Please also have a look at
 19 | 
 20 | -   [tidySingleCellExperiment](https://stemangiola.github.io/tidySingleCellExperiment/)
 21 |     for tidy manipulation of SingleCellExperiment objects
 22 | -   [tidyseurat](https://stemangiola.github.io/tidyseurat/) for tidy
 23 |     manipulation of Seurat objects
 24 | -   [tidybulk](https://stemangiola.github.io/tidybulk/) for tidy
 25 |     analysis of RNA sequencing data
 26 | -   [nanny](https://github.com/stemangiola/nanny) for tidy high-level
 27 |     data analysis and manipulation
 28 | -   [tidygate](https://github.com/stemangiola/tidygate) for adding
 29 |     custom gate information to your tibble
 30 | -   [tidyHeatmap](https://stemangiola.github.io/tidyHeatmap/) for
 31 |     heatmaps produced with tidy principles
 32 | 
 33 | # Introduction
 34 | 
 35 | tidySummarizedExperiment provides a bridge between Bioconductor
 36 | [SummarizedExperiment](https://bioconductor.org/packages/release/bioc/html/SummarizedExperiment.html)
 37 | \[@morgan2020summarized\] and the tidyverse \[@wickham2019welcome\]. It
 38 | creates an invisible layer that enables viewing the Bioconductor
 39 | *SummarizedExperiment* object as a tidyverse tibble, and provides
 40 | SummarizedExperiment-compatible *dplyr*, *tidyr*, *ggplot* and *plotly*
 41 | functions. This allows users to get the best of both Bioconductor and
 42 | tidyverse worlds.
 43 | 
 44 | ## Functions/utilities available
 45 | 
 46 | | SummarizedExperiment-compatible Functions | Description                                                                        |
 47 | |-------------------------------------------|------------------------------------------------------------------------------------|
 48 | | `all`                                     | After all `tidySummarizedExperiment` is a SummarizedExperiment object, just better |
 49 | 
 50 | | tidyverse Packages | Description                                 |
 51 | |--------------------|---------------------------------------------|
 52 | | `dplyr`            | Almost all `dplyr` APIs like for any tibble |
 53 | | `tidyr`            | Almost all `tidyr` APIs like for any tibble |
 54 | | `ggplot2`          | `ggplot` like for any tibble                |
 55 | | `plotly`           | `plot_ly` like for any tibble               |
 56 | 
 57 | | Utilities   | Description                                 |
 58 | |-------------|---------------------------------------------|
 59 | | `as_tibble` | Convert cell-wise information to a `tbl_df` |
 60 | 
 61 | ## Installation
 62 | 
 63 | ``` r
 64 | if (!requireNamespace("BiocManager", quietly=TRUE)) {
 65 |       install.packages("BiocManager")
 66 |   }
 67 | 
 68 | BiocManager::install("tidySummarizedExperiment")
 69 | ```
 70 | 
 71 | From Github (development)
 72 | 
 73 | ``` r
 74 | devtools::install_github("stemangiola/tidySummarizedExperiment")
 75 | ```
 76 | 
 77 | Load libraries used in the examples.
 78 | 
 79 | ``` r
 80 | library(ggplot2)
 81 | library(tidySummarizedExperiment)
 82 | ```
 83 | 
 84 | # Create `tidySummarizedExperiment`, the best of both worlds!
 85 | 
 86 | This is a SummarizedExperiment object but it is evaluated as a tibble.
 87 | So it is fully compatible both with SummarizedExperiment and tidyverse
 88 | APIs.
 89 | 
 90 | ``` r
 91 | pasilla_tidy <- tidySummarizedExperiment::pasilla 
 92 | ```
 93 | 
 94 | **It looks like a tibble**
 95 | 
 96 | ``` r
 97 | pasilla_tidy
 98 | ```
 99 | 
100 |     ## # A SummarizedExperiment-tibble abstraction: 102,193 × 5
101 |     ## [90m# Transcripts=14599 | Samples=7 | Assays=counts[39m
102 |     ##    .feature    .sample counts condition type      
103 |     ##    <chr>       <chr>    <int> <chr>     <chr>     
104 |     ##  1 FBgn0000003 untrt1       0 untreated single_end
105 |     ##  2 FBgn0000008 untrt1      92 untreated single_end
106 |     ##  3 FBgn0000014 untrt1       5 untreated single_end
107 |     ##  4 FBgn0000015 untrt1       0 untreated single_end
108 |     ##  5 FBgn0000017 untrt1    4664 untreated single_end
109 |     ##  6 FBgn0000018 untrt1     583 untreated single_end
110 |     ##  7 FBgn0000022 untrt1       0 untreated single_end
111 |     ##  8 FBgn0000024 untrt1      10 untreated single_end
112 |     ##  9 FBgn0000028 untrt1       0 untreated single_end
113 |     ## 10 FBgn0000032 untrt1    1446 untreated single_end
114 |     ## # … with 40 more rows
115 | 
116 | **But it is a SummarizedExperiment object after all**
117 | 
118 | ``` r
119 | assays(pasilla_tidy)
120 | ```
121 | 
122 |     ## List of length 1
123 |     ## names(1): counts
124 | 
125 | # Tidyverse commands
126 | 
127 | We can use tidyverse commands to explore the tidy SummarizedExperiment
128 | object.
129 | 
130 | We can use `slice` to choose rows by position, for example to choose the
131 | first row.
132 | 
133 | ``` r
134 | pasilla_tidy %>%
135 |     slice(1)
136 | ```
137 | 
138 |     ## # A SummarizedExperiment-tibble abstraction: 1 × 5
139 |     ## [90m# Transcripts=1 | Samples=1 | Assays=counts[39m
140 |     ##   .feature    .sample counts condition type      
141 |     ##   <chr>       <chr>    <int> <chr>     <chr>     
142 |     ## 1 FBgn0000003 untrt1       0 untreated single_end
143 | 
144 | We can use `filter` to choose rows by criteria.
145 | 
146 | ``` r
147 | pasilla_tidy %>%
148 |     filter(condition == "untreated")
149 | ```
150 | 
151 |     ## # A SummarizedExperiment-tibble abstraction: 58,396 × 5
152 |     ## [90m# Transcripts=14599 | Samples=4 | Assays=counts[39m
153 |     ##    .feature    .sample counts condition type      
154 |     ##    <chr>       <chr>    <int> <chr>     <chr>     
155 |     ##  1 FBgn0000003 untrt1       0 untreated single_end
156 |     ##  2 FBgn0000008 untrt1      92 untreated single_end
157 |     ##  3 FBgn0000014 untrt1       5 untreated single_end
158 |     ##  4 FBgn0000015 untrt1       0 untreated single_end
159 |     ##  5 FBgn0000017 untrt1    4664 untreated single_end
160 |     ##  6 FBgn0000018 untrt1     583 untreated single_end
161 |     ##  7 FBgn0000022 untrt1       0 untreated single_end
162 |     ##  8 FBgn0000024 untrt1      10 untreated single_end
163 |     ##  9 FBgn0000028 untrt1       0 untreated single_end
164 |     ## 10 FBgn0000032 untrt1    1446 untreated single_end
165 |     ## # … with 40 more rows
166 | 
167 | We can use `select` to choose columns.
168 | 
169 | ``` r
170 | pasilla_tidy %>%
171 |     select(.sample)
172 | ```
173 | 
174 |     ## # A tibble: 102,193 × 1
175 |     ##    .sample
176 |     ##    <chr>  
177 |     ##  1 untrt1 
178 |     ##  2 untrt1 
179 |     ##  3 untrt1 
180 |     ##  4 untrt1 
181 |     ##  5 untrt1 
182 |     ##  6 untrt1 
183 |     ##  7 untrt1 
184 |     ##  8 untrt1 
185 |     ##  9 untrt1 
186 |     ## 10 untrt1 
187 |     ## # … with 102,183 more rows
188 | 
189 | We can use `count` to count how many rows we have for each sample.
190 | 
191 | ``` r
192 | pasilla_tidy %>%
193 |     count(.sample)
194 | ```
195 | 
196 |     ## # A tibble: 7 × 2
197 |     ##   .sample     n
198 |     ##   <chr>   <int>
199 |     ## 1 trt1    14599
200 |     ## 2 trt2    14599
201 |     ## 3 trt3    14599
202 |     ## 4 untrt1  14599
203 |     ## 5 untrt2  14599
204 |     ## 6 untrt3  14599
205 |     ## 7 untrt4  14599
206 | 
207 | We can use `distinct` to see what distinct sample information we have.
208 | 
209 | ``` r
210 | pasilla_tidy %>%
211 |     distinct(.sample, condition, type)
212 | ```
213 | 
214 |     ## # A tibble: 7 × 3
215 |     ##   .sample condition type      
216 |     ##   <chr>   <chr>     <chr>     
217 |     ## 1 untrt1  untreated single_end
218 |     ## 2 untrt2  untreated single_end
219 |     ## 3 untrt3  untreated paired_end
220 |     ## 4 untrt4  untreated paired_end
221 |     ## 5 trt1    treated   single_end
222 |     ## 6 trt2    treated   paired_end
223 |     ## 7 trt3    treated   paired_end
224 | 
225 | We could use `rename` to rename a column. For example, to modify the
226 | type column name.
227 | 
228 | ``` r
229 | pasilla_tidy %>%
230 |     rename(sequencing=type)
231 | ```
232 | 
233 |     ## # A SummarizedExperiment-tibble abstraction: 102,193 × 5
234 |     ## [90m# Transcripts=14599 | Samples=7 | Assays=counts[39m
235 |     ##    .feature    .sample counts condition sequencing
236 |     ##    <chr>       <chr>    <int> <chr>     <chr>     
237 |     ##  1 FBgn0000003 untrt1       0 untreated single_end
238 |     ##  2 FBgn0000008 untrt1      92 untreated single_end
239 |     ##  3 FBgn0000014 untrt1       5 untreated single_end
240 |     ##  4 FBgn0000015 untrt1       0 untreated single_end
241 |     ##  5 FBgn0000017 untrt1    4664 untreated single_end
242 |     ##  6 FBgn0000018 untrt1     583 untreated single_end
243 |     ##  7 FBgn0000022 untrt1       0 untreated single_end
244 |     ##  8 FBgn0000024 untrt1      10 untreated single_end
245 |     ##  9 FBgn0000028 untrt1       0 untreated single_end
246 |     ## 10 FBgn0000032 untrt1    1446 untreated single_end
247 |     ## # … with 40 more rows
248 | 
249 | We could use `mutate` to create a column. For example, we could create a
250 | new type column that contains single and paired instead of single_end
251 | and paired_end.
252 | 
253 | ``` r
254 | pasilla_tidy %>%
255 |     mutate(type=gsub("_end", "", type))
256 | ```
257 | 
258 |     ## # A SummarizedExperiment-tibble abstraction: 102,193 × 5
259 |     ## [90m# Transcripts=14599 | Samples=7 | Assays=counts[39m
260 |     ##    .feature    .sample counts condition type  
261 |     ##    <chr>       <chr>    <int> <chr>     <chr> 
262 |     ##  1 FBgn0000003 untrt1       0 untreated single
263 |     ##  2 FBgn0000008 untrt1      92 untreated single
264 |     ##  3 FBgn0000014 untrt1       5 untreated single
265 |     ##  4 FBgn0000015 untrt1       0 untreated single
266 |     ##  5 FBgn0000017 untrt1    4664 untreated single
267 |     ##  6 FBgn0000018 untrt1     583 untreated single
268 |     ##  7 FBgn0000022 untrt1       0 untreated single
269 |     ##  8 FBgn0000024 untrt1      10 untreated single
270 |     ##  9 FBgn0000028 untrt1       0 untreated single
271 |     ## 10 FBgn0000032 untrt1    1446 untreated single
272 |     ## # … with 40 more rows
273 | 
274 | We could use `unite` to combine multiple columns into a single column.
275 | 
276 | ``` r
277 | pasilla_tidy %>%
278 |     unite("group", c(condition, type))
279 | ```
280 | 
281 |     ## # A SummarizedExperiment-tibble abstraction: 102,193 × 4
282 |     ## [90m# Transcripts=14599 | Samples=7 | Assays=counts[39m
283 |     ##    .feature    .sample counts group               
284 |     ##    <chr>       <chr>    <int> <chr>               
285 |     ##  1 FBgn0000003 untrt1       0 untreated_single_end
286 |     ##  2 FBgn0000008 untrt1      92 untreated_single_end
287 |     ##  3 FBgn0000014 untrt1       5 untreated_single_end
288 |     ##  4 FBgn0000015 untrt1       0 untreated_single_end
289 |     ##  5 FBgn0000017 untrt1    4664 untreated_single_end
290 |     ##  6 FBgn0000018 untrt1     583 untreated_single_end
291 |     ##  7 FBgn0000022 untrt1       0 untreated_single_end
292 |     ##  8 FBgn0000024 untrt1      10 untreated_single_end
293 |     ##  9 FBgn0000028 untrt1       0 untreated_single_end
294 |     ## 10 FBgn0000032 untrt1    1446 untreated_single_end
295 |     ## # … with 40 more rows
296 | 
297 | We can also combine commands with the tidyverse pipe `%>%`.
298 | 
299 | For example, we could combine `group_by` and `summarise` to get the
300 | total counts for each sample.
301 | 
302 | ``` r
303 | pasilla_tidy %>%
304 |     group_by(.sample) %>%
305 |     summarise(total_counts=sum(counts))
306 | ```
307 | 
308 |     ## # A tibble: 7 × 2
309 |     ##   .sample total_counts
310 |     ##   <chr>          <int>
311 |     ## 1 trt1        18670279
312 |     ## 2 trt2         9571826
313 |     ## 3 trt3        10343856
314 |     ## 4 untrt1      13972512
315 |     ## 5 untrt2      21911438
316 |     ## 6 untrt3       8358426
317 |     ## 7 untrt4       9841335
318 | 
319 | We could combine `group_by`, `mutate` and `filter` to get the
320 | transcripts with mean count \> 0.
321 | 
322 | ``` r
323 | pasilla_tidy %>%
324 |     group_by(.feature) %>%
325 |     mutate(mean_count=mean(counts)) %>%
326 |     filter(mean_count > 0)
327 | ```
328 | 
329 |     ## # A tibble: 86,513 × 6
330 |     ## # Groups:   .feature [12,359]
331 |     ##    .feature    .sample counts condition type       mean_count
332 |     ##    <chr>       <chr>    <int> <chr>     <chr>           <dbl>
333 |     ##  1 FBgn0000003 untrt1       0 untreated single_end      0.143
334 |     ##  2 FBgn0000008 untrt1      92 untreated single_end     99.6  
335 |     ##  3 FBgn0000014 untrt1       5 untreated single_end      1.43 
336 |     ##  4 FBgn0000015 untrt1       0 untreated single_end      0.857
337 |     ##  5 FBgn0000017 untrt1    4664 untreated single_end   4672.   
338 |     ##  6 FBgn0000018 untrt1     583 untreated single_end    461.   
339 |     ##  7 FBgn0000022 untrt1       0 untreated single_end      0.143
340 |     ##  8 FBgn0000024 untrt1      10 untreated single_end      7    
341 |     ##  9 FBgn0000028 untrt1       0 untreated single_end      0.429
342 |     ## 10 FBgn0000032 untrt1    1446 untreated single_end   1085.   
343 |     ## # … with 86,503 more rows
344 | 
345 | # Plotting
346 | 
347 | ``` r
348 | my_theme <-
349 |     list(
350 |         scale_fill_brewer(palette="Set1"),
351 |         scale_color_brewer(palette="Set1"),
352 |         theme_bw() +
353 |             theme(
354 |                 panel.border=element_blank(),
355 |                 axis.line=element_line(),
356 |                 panel.grid.major=element_line(size=0.2),
357 |                 panel.grid.minor=element_line(size=0.1),
358 |                 text=element_text(size=12),
359 |                 legend.position="bottom",
360 |                 aspect.ratio=1,
361 |                 strip.background=element_blank(),
362 |                 axis.title.x=element_text(margin=margin(t=10, r=10, b=10, l=10)),
363 |                 axis.title.y=element_text(margin=margin(t=10, r=10, b=10, l=10))
364 |             )
365 |     )
366 | ```
367 | 
368 | We can treat `pasilla_tidy` as a normal tibble for plotting.
369 | 
370 | Here we plot the distribution of counts per sample.
371 | 
372 | ``` r
373 | pasilla_tidy %>%
374 |     tidySummarizedExperiment::ggplot(aes(counts + 1, group=.sample, color=`type`)) +
375 |     geom_density() +
376 |     scale_x_log10() +
377 |     my_theme
378 | ```
379 | 
380 | ![](man/figures/plot1-1.png)<!-- -->
381 | 


--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
1 | destination: docs
2 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | comment: false
 2 | 
 3 | coverage:
 4 |   status:
 5 |     project:
 6 |       default:
 7 |         target: auto
 8 |         threshold: 1%
 9 |     patch:
10 |       default:
11 |         target: auto
12 |         threshold: 1%
13 | 


--------------------------------------------------------------------------------
/data/pasilla.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stemangiola/tidySummarizedExperiment/4b8a4e1bdba6230abe00fc2fb0e99eaffeca2532/data/pasilla.rda


--------------------------------------------------------------------------------
/data/se.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stemangiola/tidySummarizedExperiment/4b8a4e1bdba6230abe00fc2fb0e99eaffeca2532/data/se.rda


--------------------------------------------------------------------------------
/dev/TCGA_processing.R:
--------------------------------------------------------------------------------
 1 | library(TCGAbiolinks)
 2 | library(readr)
 3 | library(tidyr)
 4 | library(dplyr)
 5 | library(ggplot2)
 6 | library(tidybulk)
 7 | library(tidySummarizedExperiment)
 8 | 
 9 | 
10 | query <- GDCquery(project = "TCGA-BRCA",
11 |                   data.category = "Gene expression",
12 |                   data.type = "Gene expression quantification",
13 |                   platform = "Illumina HiSeq", 
14 |                   file.type  = "normalized_results",
15 |                   experimental.strategy = "RNA-Seq",
16 |                   legacy = TRUE)
17 | 
18 | 
19 | GDCdownload(query, method = "api", files.per.chunk = 10)
20 | counts_se <- GDCprepare(query)
21 | 
22 | query_clin <- GDCquery(project = "TCGA-BRCA", 
23 |                        data.category = "Clinical",
24 |                        data.type = "Clinical data",
25 |                        file.type = "txt",
26 |                        legacy = TRUE)
27 | 
28 | 
29 | GDCdownload(query_clin)
30 | 
31 | clinical_list <- GDCprepare(query_clin)
32 | 
33 | clinical_patient_brca <- clinical_list$clinical_patient_brca
34 | 
35 | groups <- c("HER2_pos", "HER2_low", "HER2_neg")
36 | 
37 | tcga_her2 <- clinical_patient_brca %>% 
38 |   mutate(her2_group = case_when((her2_ihc_score == 3 | her2_fish_status == "Positive") ~ "HER2_pos",
39 |                                 ((her2_ihc_score == 1 | her2_ihc_score == 2) & her2_fish_status == "Negative") ~ "HER2_low",
40 |                                 her2_ihc_score == 0 ~ "HER2_neg"
41 |   )) %>%
42 |   filter(her2_group %in% groups) %>%
43 |   select(bcr_patient_barcode, her2_group)
44 | 
45 | tcga_joined <- 
46 |   counts_se %>% 
47 |   extract(barcode, "barcode", "(.+-.+-.+).+")
48 |   inner_join(tcga_her2, by = c("barcode" = "bcr_patient_barcode"))
49 | 
50 | counts_se %>% select(.sample)
51 | 


--------------------------------------------------------------------------------
/inst/NEWS.rd:
--------------------------------------------------------------------------------
 1 | \name{NEWS}
 2 | \title{News for Package \pkg{tidySummarizedExperiment}}
 3 | 
 4 | \section{Changes in version 1.4.0, Bioconductor 3.14 Release}{
 5 | \itemize{
 6 |     \item Improved join_*() functions.
 7 |     \item Changed special column names with a starting "." to avoid conflicts with pre-existing column names.
 8 |     \item Improved all method for large-scale datasets.
 9 | }}
10 | 
11 | \section{Changes in version 1.5.3, Bioconductor 3.15 Release}{
12 | \itemize{
13 |     \item Speed-up nest.
14 |     \item Adaptation to Ranged-SummarizedExperiment.
15 | }}
16 | 
17 | \section{Changes in version 1.7.3, Bioconductor 3.16 Release}{
18 | \itemize{
19 |     \item Fixed as_tibble edge case
20 |     \item Fixed print for DelayedArray
21 |     \item Improve performance for large-scale datasets
22 |     \item Fixed filter is the result is a no-gene dataset, and improve performance of filtering
23 | }}
24 | 
25 | 


--------------------------------------------------------------------------------
/man/as_tibble.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/tibble_methods.R
  3 | \name{as_tibble}
  4 | \alias{as_tibble}
  5 | \alias{as_tibble.SummarizedExperiment}
  6 | \title{Coerce lists, matrices, and more to data frames}
  7 | \usage{
  8 | \method{as_tibble}{SummarizedExperiment}(
  9 |   x,
 10 |   ...,
 11 |   .name_repair = c("check_unique", "unique", "universal", "minimal"),
 12 |   rownames = pkgconfig::get_config("tibble::rownames", NULL)
 13 | )
 14 | }
 15 | \arguments{
 16 | \item{x}{A data frame, list, matrix, or other object that could reasonably be
 17 | coerced to a tibble.}
 18 | 
 19 | \item{...}{Unused, for extensibility.}
 20 | 
 21 | \item{.name_repair}{Treatment of problematic column names:
 22 | \itemize{
 23 | \item \code{"minimal"}: No name repair or checks, beyond basic existence,
 24 | \item \code{"unique"}: Make sure names are unique and not empty,
 25 | \item \code{"check_unique"}: (default value), no name repair, but check they are
 26 | \code{unique},
 27 | \item \code{"universal"}: Make the names \code{unique} and syntactic
 28 | \item a function: apply custom name repair (e.g., \code{.name_repair = make.names}
 29 | for names in the style of base R).
 30 | \item A purrr-style anonymous function, see \code{\link[rlang:as_function]{rlang::as_function()}}
 31 | }
 32 | 
 33 | This argument is passed on as \code{repair} to \code{\link[vctrs:vec_as_names]{vctrs::vec_as_names()}}.
 34 | See there for more details on these terms and the strategies used
 35 | to enforce them.}
 36 | 
 37 | \item{rownames}{How to treat existing row names of a data frame or matrix:
 38 | \itemize{
 39 | \item \code{NULL}: remove row names. This is the default.
 40 | \item \code{NA}: keep row names.
 41 | \item A string: the name of a new column. Existing rownames are transferred
 42 | into this column and the \code{row.names} attribute is deleted.
 43 | No name repair is applied to the new column name, even if \code{x} already contains
 44 | a column of that name.
 45 | Use \code{as_tibble(rownames_to_column(...))} to safeguard against this case.
 46 | }
 47 | 
 48 | Read more in \link[tibble]{rownames}.}
 49 | }
 50 | \value{
 51 | \code{tibble}
 52 | }
 53 | \description{
 54 | \code{as_tibble()} turns an existing object, such as a data frame or
 55 | matrix, into a so-called tibble, a data frame with class \code{\link[tibble]{tbl_df}}. This is
 56 | in contrast with \code{\link[tibble:tibble]{tibble()}}, which builds a tibble from individual columns.
 57 | \code{as_tibble()} is to \code{\link[tibble:tibble]{tibble()}} as \code{\link[base:as.data.frame]{base::as.data.frame()}} is to
 58 | \code{\link[base:data.frame]{base::data.frame()}}.
 59 | 
 60 | \code{as_tibble()} is an S3 generic, with methods for:
 61 | \itemize{
 62 | \item \code{\link[base:data.frame]{data.frame}}: Thin wrapper around the \code{list} method
 63 | that implements tibble's treatment of \link[tibble]{rownames}.
 64 | \item \code{\link[base:matrix]{matrix}}, \code{\link[stats:poly]{poly}},
 65 | \code{\link[stats:ts]{ts}}, \code{\link[base:table]{table}}
 66 | \item Default: Other inputs are first coerced with \code{\link[base:as.data.frame]{base::as.data.frame()}}.
 67 | }
 68 | 
 69 | \code{as_tibble_row()} converts a vector to a tibble with one row.
 70 | If the input is a list, all elements must have size one.
 71 | 
 72 | \code{as_tibble_col()} converts a vector to a tibble with one column.
 73 | }
 74 | \section{Row names}{
 75 | 
 76 | 
 77 | The default behavior is to silently remove row names.
 78 | 
 79 | New code should explicitly convert row names to a new column using the
 80 | \code{rownames} argument.
 81 | 
 82 | For existing code that relies on the retention of row names, call
 83 | \code{pkgconfig::set_config("tibble::rownames" = NA)} in your script or in your
 84 | package's \code{\link[=.onLoad]{.onLoad()}}  function.
 85 | 
 86 | }
 87 | 
 88 | \section{Life cycle}{
 89 | 
 90 | 
 91 | Using \code{as_tibble()} for vectors is superseded as of version 3.0.0,
 92 | prefer the more expressive \code{as_tibble_row()} and
 93 | \code{as_tibble_col()} variants for new code.
 94 | 
 95 | }
 96 | 
 97 | \examples{
 98 | tidySummarizedExperiment::pasilla \%>\%
 99 |     as_tibble()
100 |     
101 | tidySummarizedExperiment::pasilla \%>\%
102 |     as_tibble(.subset=-c(condition, type))
103 | 
104 | }
105 | \seealso{
106 | \code{\link[tibble:tibble]{tibble()}} constructs a tibble from individual columns. \code{\link[tibble:enframe]{enframe()}}
107 | converts a named vector to a tibble with a column of names and column of
108 | values. Name repair is implemented using \code{\link[vctrs:vec_as_names]{vctrs::vec_as_names()}}.
109 | }
110 | 


--------------------------------------------------------------------------------
/man/bind_rows.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dplyr_methods.R
 3 | \name{bind_rows}
 4 | \alias{bind_rows}
 5 | \alias{bind_rows.SummarizedExperiment}
 6 | \alias{bind_cols.SummarizedExperiment}
 7 | \alias{bind_cols}
 8 | \alias{bind_cols.RangedSummarizedExperiment}
 9 | \title{Efficiently bind multiple data frames by row and column}
10 | \usage{
11 | \method{bind_rows}{SummarizedExperiment}(..., .id = NULL, add.cell.ids = NULL)
12 | 
13 | \method{bind_cols}{SummarizedExperiment}(..., .id = NULL)
14 | 
15 | \method{bind_cols}{RangedSummarizedExperiment}(..., .id = NULL)
16 | }
17 | \arguments{
18 | \item{...}{Data frames to combine.
19 | 
20 |   Each argument can either be a data frame, a list that could be a data
21 |   frame, or a list of data frames.
22 | 
23 |   When row-binding, columns are matched by name, and any missing
24 |   columns will be filled with NA.
25 | 
26 |   When column-binding, rows are matched by position, so all data
27 |   frames must have the same number of rows. To match by value, not
28 |   position, see mutate-joins.}
29 | 
30 | \item{.id}{Data frame identifier.
31 | 
32 |   When `.id` is supplied, a new column of identifiers is
33 |   created to link each row to its original data frame. The labels
34 |   are taken from the named arguments to `bind_rows()`. When a
35 |   list of data frames is supplied, the labels are taken from the
36 |   names of the list. If no names are found a numeric sequence is
37 |   used instead.}
38 | 
39 | \item{add.cell.ids}{Appends the corresponding values to}
40 | }
41 | \value{
42 | `bind_rows()` and `bind_cols()` return the same type as
43 |   the first input, either a data frame, `tbl_df`, or `grouped_df`.
44 | 
45 | `bind_rows()` and `bind_cols()` return the same type as
46 |   the first input, either a data frame, `tbl_df`, or `grouped_df`.
47 | }
48 | \description{
49 | This is an efficient implementation of the common pattern of
50 | `do.call(rbind, dfs)` or `do.call(cbind, dfs)` for binding many
51 | data frames into one.
52 | 
53 | This is an efficient implementation of the common pattern of
54 | `do.call(rbind, dfs)` or `do.call(cbind, dfs)` for binding many
55 | data frames into one.
56 | }
57 | \details{
58 | The output of `bind_rows()` will contain a column if that column
59 | appears in any of the inputs.
60 | 
61 | The output of `bind_rows()` will contain a column if that column
62 | appears in any of the inputs.
63 | }
64 | \examples{
65 | data(se)
66 | ttservice::bind_rows(se, se)
67 | 
68 | se_bind <- se |> select(dex,  albut)
69 | se |> ttservice::bind_cols(se_bind)
70 | 
71 | }
72 | 


--------------------------------------------------------------------------------
/man/count.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dplyr_methods.R
 3 | \name{count}
 4 | \alias{count}
 5 | \alias{count.SummarizedExperiment}
 6 | \title{Count the observations in each group}
 7 | \usage{
 8 | \method{count}{SummarizedExperiment}(
 9 |   x,
10 |   ...,
11 |   wt = NULL,
12 |   sort = FALSE,
13 |   name = NULL,
14 |   .drop = group_by_drop_default(x)
15 | )
16 | }
17 | \arguments{
18 | \item{x}{A data frame, data frame extension (e.g. a tibble), or a
19 | lazy data frame (e.g. from dbplyr or dtplyr).}
20 | 
21 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Variables to group
22 | by.}
23 | 
24 | \item{wt}{<\code{\link[rlang:args_data_masking]{data-masking}}> Frequency weights.
25 | Can be \code{NULL} or a variable:
26 | \itemize{
27 | \item If \code{NULL} (the default), counts the number of rows in each group.
28 | \item If a variable, computes \code{sum(wt)} for each group.
29 | }}
30 | 
31 | \item{sort}{If \code{TRUE}, will show the largest groups at the top.}
32 | 
33 | \item{name}{The name of the new column in the output.
34 | 
35 | If omitted, it will default to \code{n}. If there's already a column called \code{n},
36 | it will use \code{nn}. If there's a column called \code{n} and \code{nn}, it'll use
37 | \code{nnn}, and so on, adding \code{n}s until it gets a new name.}
38 | 
39 | \item{.drop}{Handling of factor levels that don't appear in the data, passed
40 | on to \code{\link[dplyr:group_by]{group_by()}}.
41 | 
42 | For \code{count()}: if \code{FALSE} will include counts for empty groups (i.e. for
43 | levels of factors that don't exist in the data).
44 | 
45 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} For \code{add_count()}: deprecated since it
46 | can't actually affect the output.}
47 | }
48 | \value{
49 | An object of the same type as \code{.data}. \code{count()} and \code{add_count()}
50 | group transiently, so the output has the same groups as the input.
51 | }
52 | \description{
53 | \code{count()} lets you quickly count the unique values of one or more variables:
54 | \code{df \%>\% count(a, b)} is roughly equivalent to
55 | \code{df \%>\% group_by(a, b) \%>\% summarise(n = n())}.
56 | \code{count()} is paired with \code{tally()}, a lower-level helper that is equivalent
57 | to \code{df \%>\% summarise(n = n())}. Supply \code{wt} to perform weighted counts,
58 | switching the summary from \code{n = n()} to \code{n = sum(wt)}.
59 | 
60 | \code{add_count()} and \code{add_tally()} are equivalents to \code{count()} and \code{tally()}
61 | but use \code{mutate()} instead of \code{summarise()} so that they add a new column
62 | with group-wise counts.
63 | }
64 | \examples{
65 | data(se)
66 | se |> count(dex)
67 |     
68 | }
69 | 


--------------------------------------------------------------------------------
/man/distinct.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dplyr_methods.R
 3 | \name{distinct}
 4 | \alias{distinct}
 5 | \alias{distinct.SummarizedExperiment}
 6 | \title{Keep distinct/unique rows}
 7 | \usage{
 8 | \method{distinct}{SummarizedExperiment}(.data, ..., .keep_all = FALSE)
 9 | }
10 | \arguments{
11 | \item{.data}{A data frame, data frame extension (e.g. a tibble), or a
12 | lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
13 | more details.}
14 | 
15 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Optional variables to
16 | use when determining uniqueness. If there are multiple rows for a given
17 | combination of inputs, only the first row will be preserved. If omitted,
18 | will use all variables in the data frame.}
19 | 
20 | \item{.keep_all}{If \code{TRUE}, keep all variables in \code{.data}.
21 | If a combination of \code{...} is not distinct, this keeps the
22 | first row of values.}
23 | }
24 | \value{
25 | An object of the same type as \code{.data}. The output has the following
26 | properties:
27 | \itemize{
28 | \item Rows are a subset of the input but appear in the same order.
29 | \item Columns are not modified if \code{...} is empty or \code{.keep_all} is \code{TRUE}.
30 | Otherwise, \code{distinct()} first calls \code{mutate()} to create new columns.
31 | \item Groups are not modified.
32 | \item Data frame attributes are preserved.
33 | }
34 | }
35 | \description{
36 | Keep only unique/distinct rows from a data frame. This is similar
37 | to \code{\link[=unique.data.frame]{unique.data.frame()}} but considerably faster.
38 | }
39 | \section{Methods}{
40 | 
41 | 
42 | This function is a \strong{generic}, which means that packages can provide
43 | implementations (methods) for other classes. See the documentation of
44 | individual methods for extra arguments and differences in behaviour.
45 | 
46 | The following methods are currently available in loaded packages:
47 | \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("distinct")}.
48 | 
49 | }
50 | 
51 | \examples{
52 | data(pasilla)
53 | pasilla |> distinct(.sample)
54 | 
55 | }
56 | 


--------------------------------------------------------------------------------
/man/extract.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/tidyr_methods.R
 3 | \name{extract}
 4 | \alias{extract}
 5 | \alias{extract.SummarizedExperiment}
 6 | \title{Extract a character column into multiple columns using regular
 7 | expression groups}
 8 | \usage{
 9 | \method{extract}{SummarizedExperiment}(
10 |   data,
11 |   col,
12 |   into,
13 |   regex = "([[:alnum:]]+)",
14 |   remove = TRUE,
15 |   convert = FALSE,
16 |   ...
17 | )
18 | }
19 | \arguments{
20 | \item{data}{A data frame.}
21 | 
22 | \item{col}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> Column to expand.}
23 | 
24 | \item{into}{Names of new variables to create as character vector.
25 | Use \code{NA} to omit the variable in the output.}
26 | 
27 | \item{regex}{A string representing a regular expression used to extract the
28 | desired values. There should be one group (defined by \verb{()}) for each
29 | element of \code{into}.}
30 | 
31 | \item{remove}{If \code{TRUE}, remove input column from output data frame.}
32 | 
33 | \item{convert}{If \code{TRUE}, will run \code{\link[=type.convert]{type.convert()}} with
34 | \code{as.is = TRUE} on new columns. This is useful if the component
35 | columns are integer, numeric or logical.
36 | 
37 | NB: this will cause string \code{"NA"}s to be converted to \code{NA}s.}
38 | 
39 | \item{...}{Additional arguments passed on to methods.}
40 | }
41 | \value{
42 | \code{tidySummarizedExperiment}
43 | }
44 | \description{
45 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}}
46 | 
47 | \code{extract()} has been superseded in favour of \code{\link[tidyr:separate_wider_regex]{separate_wider_regex()}}
48 | because it has a more polished API and better handling of problems.
49 | Superseded functions will not go away, but will only receive critical bug
50 | fixes.
51 | 
52 | Given a regular expression with capturing groups, \code{extract()} turns
53 | each group into a new column. If the groups don't match, or the input
54 | is NA, the output will be NA.
55 | }
56 | \examples{
57 | tidySummarizedExperiment::pasilla |>
58 |     extract(type, into="sequencing", regex="([a-z]*)_end", convert=TRUE)
59 | 
60 | }
61 | \seealso{
62 | \code{\link[tidyr:separate]{separate()}} to split up by a separator.
63 | }
64 | 


--------------------------------------------------------------------------------
/man/figures/lifecycle-archived.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="110" height="20"><linearGradient id="b" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="a"><rect width="110" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#a)"><path fill="#555" d="M0 0h53v20H0z"/><path fill="#e05d44" d="M53 0h57v20H53z"/><path fill="url(#b)" d="M0 0h110v20H0z"/></g><g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="110"> <text x="275" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="430">lifecycle</text><text x="275" y="140" transform="scale(.1)" textLength="430">lifecycle</text><text x="805" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="470">archived</text><text x="805" y="140" transform="scale(.1)" textLength="470">archived</text></g> </svg>


--------------------------------------------------------------------------------
/man/figures/lifecycle-defunct.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="104" height="20"><linearGradient id="b" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="a"><rect width="104" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#a)"><path fill="#555" d="M0 0h53v20H0z"/><path fill="#e05d44" d="M53 0h51v20H53z"/><path fill="url(#b)" d="M0 0h104v20H0z"/></g><g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="110"><text x="275" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="430">lifecycle</text><text x="275" y="140" transform="scale(.1)" textLength="430">lifecycle</text><text x="775" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="410">defunct</text><text x="775" y="140" transform="scale(.1)" textLength="410">defunct</text></g> </svg>


--------------------------------------------------------------------------------
/man/figures/lifecycle-deprecated.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="124" height="20"><linearGradient id="b" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="a"><rect width="124" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#a)"><path fill="#555" d="M0 0h53v20H0z"/><path fill="#fe7d37" d="M53 0h71v20H53z"/><path fill="url(#b)" d="M0 0h124v20H0z"/></g><g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="110"><text x="275" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="430">lifecycle</text><text x="275" y="140" transform="scale(.1)" textLength="430">lifecycle</text><text x="875" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="610">deprecated</text><text x="875" y="140" transform="scale(.1)" textLength="610">deprecated</text></g> </svg>


--------------------------------------------------------------------------------
/man/figures/lifecycle-experimental.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="136" height="20"><linearGradient id="b" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="a"><rect width="136" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#a)"><path fill="#555" d="M0 0h53v20H0z"/><path fill="#fe7d37" d="M53 0h83v20H53z"/><path fill="url(#b)" d="M0 0h136v20H0z"/></g><g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="110"><text x="275" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="430">lifecycle</text><text x="275" y="140" transform="scale(.1)" textLength="430">lifecycle</text><text x="935" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="730">experimental</text><text x="935" y="140" transform="scale(.1)" textLength="730">experimental</text></g> </svg>


--------------------------------------------------------------------------------
/man/figures/lifecycle-maturing.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="114" height="20"><linearGradient id="b" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="a"><rect width="114" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#a)"><path fill="#555" d="M0 0h53v20H0z"/><path fill="#007ec6" d="M53 0h61v20H53z"/><path fill="url(#b)" d="M0 0h114v20H0z"/></g><g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="110"><text x="275" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="430">lifecycle</text><text x="275" y="140" transform="scale(.1)" textLength="430">lifecycle</text><text x="825" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="510">maturing</text><text x="825" y="140" transform="scale(.1)" textLength="510">maturing</text></g> </svg>


--------------------------------------------------------------------------------
/man/figures/lifecycle-questioning.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="126" height="20"><linearGradient id="b" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="a"><rect width="126" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#a)"><path fill="#555" d="M0 0h53v20H0z"/><path fill="#007ec6" d="M53 0h73v20H53z"/><path fill="url(#b)" d="M0 0h126v20H0z"/></g><g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="110"><text x="275" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="430">lifecycle</text><text x="275" y="140" transform="scale(.1)" textLength="430">lifecycle</text><text x="885" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="630">questioning</text><text x="885" y="140" transform="scale(.1)" textLength="630">questioning</text></g> </svg>


--------------------------------------------------------------------------------
/man/figures/lifecycle-stable.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="96" height="20"><linearGradient id="b" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="a"><rect width="96" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#a)"><path fill="#555" d="M0 0h53v20H0z"/><path fill="#4c1" d="M53 0h43v20H53z"/><path fill="url(#b)" d="M0 0h96v20H0z"/></g><g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="110"><text x="275" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="430">lifecycle</text><text x="275" y="140" transform="scale(.1)" textLength="430">lifecycle</text><text x="735" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="330">stable</text><text x="735" y="140" transform="scale(.1)" textLength="330">stable</text></g> </svg>


--------------------------------------------------------------------------------
/man/figures/lifecycle-superseded.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="128" height="20"><linearGradient id="b" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="a"><rect width="128" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#a)"><path fill="#555" d="M0 0h55v20H0z"/><path fill="#007ec6" d="M55 0h73v20H55z"/><path fill="url(#b)" d="M0 0h128v20H0z"/></g><g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="110"> <text x="285" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="450">lifecycle</text><text x="285" y="140" transform="scale(.1)" textLength="450">lifecycle</text><text x="905" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="630">superseded</text><text x="905" y="140" transform="scale(.1)" textLength="630">superseded</text></g> </svg>


--------------------------------------------------------------------------------
/man/figures/plot1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stemangiola/tidySummarizedExperiment/4b8a4e1bdba6230abe00fc2fb0e99eaffeca2532/man/figures/plot1-1.png


--------------------------------------------------------------------------------
/man/filter.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/dplyr_methods.R
  3 | \name{filter}
  4 | \alias{filter}
  5 | \alias{filter.SummarizedExperiment}
  6 | \title{Keep rows that match a condition}
  7 | \usage{
  8 | \method{filter}{SummarizedExperiment}(.data, ..., .preserve = FALSE)
  9 | }
 10 | \arguments{
 11 | \item{.data}{A data frame, data frame extension (e.g. a tibble), or a
 12 | lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
 13 | more details.}
 14 | 
 15 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Expressions that
 16 | return a logical value, and are defined in terms of the variables in
 17 | \code{.data}. If multiple expressions are included, they are combined with the
 18 | \code{&} operator. Only rows for which all conditions evaluate to \code{TRUE} are
 19 | kept.}
 20 | 
 21 | \item{.preserve}{Relevant when the \code{.data} input is grouped.
 22 | If \code{.preserve = FALSE} (the default), the grouping structure
 23 | is recalculated based on the resulting data, otherwise the grouping is kept as is.}
 24 | }
 25 | \value{
 26 | An object of the same type as \code{.data}. The output has the following properties:
 27 | \itemize{
 28 | \item Rows are a subset of the input, but appear in the same order.
 29 | \item Columns are not modified.
 30 | \item The number of groups may be reduced (if \code{.preserve} is not \code{TRUE}).
 31 | \item Data frame attributes are preserved.
 32 | }
 33 | }
 34 | \description{
 35 | The \code{filter()} function is used to subset a data frame,
 36 | retaining all rows that satisfy your conditions.
 37 | To be retained, the row must produce a value of \code{TRUE} for all conditions.
 38 | Note that when a condition evaluates to \code{NA}
 39 | the row will be dropped, unlike base subsetting with \code{[}.
 40 | }
 41 | \details{
 42 | The \code{filter()} function is used to subset the rows of
 43 | \code{.data}, applying the expressions in \code{...} to the column values to determine which
 44 | rows should be retained. It can be applied to both grouped and ungrouped data (see \code{\link[dplyr:group_by]{group_by()}} and
 45 | \code{\link[dplyr:ungroup]{ungroup()}}). However, dplyr is not yet smart enough to optimise the filtering
 46 | operation on grouped datasets that do not need grouped calculations. For this
 47 | reason, filtering is often considerably faster on ungrouped data.
 48 | }
 49 | \section{Useful filter functions}{
 50 | 
 51 | 
 52 | 
 53 | There are many functions and operators that are useful when constructing the
 54 | expressions used to filter the data:
 55 | \itemize{
 56 | \item \code{\link{==}}, \code{\link{>}}, \code{\link{>=}} etc
 57 | \item \code{\link{&}}, \code{\link{|}}, \code{\link{!}}, \code{\link[=xor]{xor()}}
 58 | \item \code{\link[=is.na]{is.na()}}
 59 | \item \code{\link[dplyr:between]{between()}}, \code{\link[dplyr:near]{near()}}
 60 | }
 61 | 
 62 | }
 63 | 
 64 | \section{Grouped tibbles}{
 65 | 
 66 | 
 67 | 
 68 | Because filtering expressions are computed within groups, they may
 69 | yield different results on grouped tibbles. This will be the case
 70 | as soon as an aggregating, lagging, or ranking function is
 71 | involved. Compare this ungrouped filtering:
 72 | 
 73 | \if{html}{\out{<div class="sourceCode">}}\preformatted{starwars \%>\% filter(mass > mean(mass, na.rm = TRUE))
 74 | }\if{html}{\out{</div>}}
 75 | 
 76 | With the grouped equivalent:
 77 | 
 78 | \if{html}{\out{<div class="sourceCode">}}\preformatted{starwars \%>\% group_by(gender) \%>\% filter(mass > mean(mass, na.rm = TRUE))
 79 | }\if{html}{\out{</div>}}
 80 | 
 81 | In the ungrouped version, \code{filter()} compares the value of \code{mass} in each row to
 82 | the global average (taken over the whole data set), keeping only the rows with
 83 | \code{mass} greater than this global average. In contrast, the grouped version calculates
 84 | the average mass separately for each \code{gender} group, and keeps rows with \code{mass} greater
 85 | than the relevant within-gender average.
 86 | 
 87 | }
 88 | 
 89 | \section{Methods}{
 90 | 
 91 | 
 92 | This function is a \strong{generic}, which means that packages can provide
 93 | implementations (methods) for other classes. See the documentation of
 94 | individual methods for extra arguments and differences in behaviour.
 95 | 
 96 | The following methods are currently available in loaded packages:
 97 | \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("filter")}.
 98 | 
 99 | }
100 | 
101 | \examples{
102 | data(pasilla)
103 | pasilla |>  filter(.sample == "untrt1")
104 | 
105 | # Learn more in ?dplyr_tidy_eval
106 | 
107 | }
108 | \seealso{
109 | Other single table verbs: 
110 | \code{\link[dplyr]{arrange}()},
111 | \code{\link[dplyr]{mutate}()},
112 | \code{\link[dplyr]{reframe}()},
113 | \code{\link[dplyr]{rename}()},
114 | \code{\link[dplyr]{select}()},
115 | \code{\link[dplyr]{slice}()},
116 | \code{\link[dplyr]{summarise}()}
117 | }
118 | 


--------------------------------------------------------------------------------
/man/formatting.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/print_method.R
 3 | \name{formatting}
 4 | \alias{formatting}
 5 | \alias{print.SummarizedExperiment}
 6 | \alias{print}
 7 | \title{Printing tibbles}
 8 | \usage{
 9 | \method{print}{SummarizedExperiment}(x, ..., n = NULL, width = NULL, n_extra = NULL)
10 | }
11 | \arguments{
12 | \item{x}{Object to format or print.}
13 | 
14 | \item{...}{Passed on to \code{\link[=tbl_format_setup]{tbl_format_setup()}}.}
15 | 
16 | \item{n}{Number of rows to show. If \code{NULL}, the default, will print all rows
17 | if less than the \code{print_max} \link[pillar:pillar_options]{option}.
18 | Otherwise, will print as many rows as specified by the
19 | \code{print_min} \link[pillar:pillar_options]{option}.}
20 | 
21 | \item{width}{Width of text output to generate. This defaults to \code{NULL}, which
22 | means use the \code{width} \link[pillar:pillar_options]{option}.}
23 | 
24 | \item{n_extra}{Number of extra columns to print abbreviated information for,
25 | if the width is too small for the entire tibble. If \code{NULL}, the default,
26 | will print information about at most \code{tibble.max_extra_cols} extra columns.}
27 | }
28 | \value{
29 | Prints a message to the console describing
30 | the contents of the \code{tidySummarizedExperiment}.
31 | }
32 | \description{
33 | One of the main features of the \code{tbl_df} class is the printing:
34 | \itemize{
35 | \item Tibbles only print as many rows and columns as fit on one screen,
36 | supplemented by a summary of the remaining rows and columns.
37 | \item Tibble reveals the type of each column, which keeps the user informed about
38 | whether a variable is, e.g., \verb{<chr>} or \verb{<fct>} (character versus factor).
39 | See \code{vignette("types")} for an overview of common
40 | type abbreviations.
41 | }
42 | 
43 | Printing can be tweaked for a one-off call by calling \code{print()} explicitly
44 | and setting arguments like \code{n} and \code{width}. More persistent control is
45 | available by setting the options described in \link[pillar:pillar_options]{pillar::pillar_options}.
46 | See also \code{vignette("digits")} for a comparison to base options,
47 | and \code{vignette("numbers")} that showcases \code{\link[tibble:num]{num()}} and \code{\link[tibble:char]{char()}}
48 | for creating columns with custom formatting options.
49 | 
50 | As of tibble 3.1.0, printing is handled entirely by the \pkg{pillar} package.
51 | If you implement a package that extends tibble,
52 | the printed output can be customized in various ways.
53 | See \code{vignette("extending", package = "pillar")} for details,
54 | and \link[pillar:pillar_options]{pillar::pillar_options} for options that control the display in the console.
55 | }
56 | \examples{
57 | data(pasilla)
58 | print(pasilla)
59 | 
60 | }
61 | 


--------------------------------------------------------------------------------
/man/full_join.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/dplyr_methods.R
  3 | \name{full_join}
  4 | \alias{full_join}
  5 | \alias{full_join.SummarizedExperiment}
  6 | \title{Mutating joins}
  7 | \usage{
  8 | \method{full_join}{SummarizedExperiment}(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ...)
  9 | }
 10 | \arguments{
 11 | \item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or
 12 | lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
 13 | more details.}
 14 | 
 15 | \item{by}{A join specification created with \code{\link[dplyr:join_by]{join_by()}}, or a character
 16 | vector of variables to join by.
 17 | 
 18 | If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all
 19 | variables in common across \code{x} and \code{y}. A message lists the variables so
 20 | that you can check they're correct; suppress the message by supplying \code{by}
 21 | explicitly.
 22 | 
 23 | To join on different variables between \code{x} and \code{y}, use a \code{\link[dplyr:join_by]{join_by()}}
 24 | specification. For example, \code{join_by(a == b)} will match \code{x$a} to \code{y$b}.
 25 | 
 26 | To join by multiple variables, use a \code{\link[dplyr:join_by]{join_by()}} specification with
 27 | multiple expressions. For example, \code{join_by(a == b, c == d)} will match
 28 | \code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. If the column names are the same between
 29 | \code{x} and \code{y}, you can shorten this by listing only the variable names, like
 30 | \code{join_by(a, c)}.
 31 | 
 32 | \code{\link[dplyr:join_by]{join_by()}} can also be used to perform inequality, rolling, and overlap
 33 | joins. See the documentation at \link[dplyr:join_by]{?join_by} for details on
 34 | these types of joins.
 35 | 
 36 | For simple equality joins, you can alternatively specify a character vector
 37 | of variable names to join by. For example, \code{by = c("a", "b")} joins \code{x$a}
 38 | to \code{y$a} and \code{x$b} to \code{y$b}. If variable names differ between \code{x} and \code{y},
 39 | use a named character vector like \code{by = c("x_a" = "y_a", "x_b" = "y_b")}.
 40 | 
 41 | To perform a cross-join, generating all combinations of \code{x} and \code{y}, see
 42 | \code{\link[dplyr:cross_join]{cross_join()}}.}
 43 | 
 44 | \item{copy}{If \code{x} and \code{y} are not from the same data source,
 45 | and \code{copy} is \code{TRUE}, then \code{y} will be copied into the
 46 | same src as \code{x}.  This allows you to join tables across srcs, but
 47 | it is a potentially expensive operation so you must opt into it.}
 48 | 
 49 | \item{suffix}{If there are non-joined duplicate variables in \code{x} and
 50 | \code{y}, these suffixes will be added to the output to disambiguate them.
 51 | Should be a character vector of length 2.}
 52 | 
 53 | \item{...}{Other parameters passed onto methods.}
 54 | }
 55 | \value{
 56 | An object of the same type as \code{x} (including the same groups). The order of
 57 | the rows and columns of \code{x} is preserved as much as possible. The output has
 58 | the following properties:
 59 | \itemize{
 60 | \item The rows are affect by the join type.
 61 | \itemize{
 62 | \item \code{inner_join()} returns matched \code{x} rows.
 63 | \item \code{left_join()} returns all \code{x} rows.
 64 | \item \code{right_join()}  returns matched of \code{x} rows, followed by unmatched \code{y} rows.
 65 | \item \code{full_join()}  returns all \code{x} rows, followed by unmatched \code{y} rows.
 66 | }
 67 | \item Output columns include all columns from \code{x} and all non-key columns from
 68 | \code{y}. If \code{keep = TRUE}, the key columns from \code{y} are included as well.
 69 | \item If non-key columns in \code{x} and \code{y} have the same name, \code{suffix}es are added
 70 | to disambiguate. If \code{keep = TRUE} and key columns in \code{x} and \code{y} have
 71 | the same name, \code{suffix}es are added to disambiguate these as well.
 72 | \item If \code{keep = FALSE}, output columns included in \code{by} are coerced to their
 73 | common type between \code{x} and \code{y}.
 74 | }
 75 | }
 76 | \description{
 77 | Mutating joins add columns from \code{y} to \code{x}, matching observations based on
 78 | the keys. There are four mutating joins: the inner join, and the three outer
 79 | joins.
 80 | \subsection{Inner join}{
 81 | 
 82 | An \code{inner_join()} only keeps observations from \code{x} that have a matching key
 83 | in \code{y}.
 84 | 
 85 | The most important property of an inner join is that unmatched rows in either
 86 | input are not included in the result. This means that generally inner joins
 87 | are not appropriate in most analyses, because it is too easy to lose
 88 | observations.
 89 | }
 90 | 
 91 | \subsection{Outer joins}{
 92 | 
 93 | The three outer joins keep observations that appear in at least one of the
 94 | data frames:
 95 | \itemize{
 96 | \item A \code{left_join()} keeps all observations in \code{x}.
 97 | \item A \code{right_join()} keeps all observations in \code{y}.
 98 | \item A \code{full_join()} keeps all observations in \code{x} and \code{y}.
 99 | }
100 | }
101 | }
102 | \section{Many-to-many relationships}{
103 | 
104 | 
105 | 
106 | By default, dplyr guards against many-to-many relationships in equality joins
107 | by throwing a warning. These occur when both of the following are true:
108 | \itemize{
109 | \item A row in \code{x} matches multiple rows in \code{y}.
110 | \item A row in \code{y} matches multiple rows in \code{x}.
111 | }
112 | 
113 | This is typically surprising, as most joins involve a relationship of
114 | one-to-one, one-to-many, or many-to-one, and is often the result of an
115 | improperly specified join. Many-to-many relationships are particularly
116 | problematic because they can result in a Cartesian explosion of the number of
117 | rows returned from the join.
118 | 
119 | If a many-to-many relationship is expected, silence this warning by
120 | explicitly setting \code{relationship = "many-to-many"}.
121 | 
122 | In production code, it is best to preemptively set \code{relationship} to whatever
123 | relationship you expect to exist between the keys of \code{x} and \code{y}, as this
124 | forces an error to occur immediately if the data doesn't align with your
125 | expectations.
126 | 
127 | Inequality joins typically result in many-to-many relationships by nature, so
128 | they don't warn on them by default, but you should still take extra care when
129 | specifying an inequality join, because they also have the capability to
130 | return a large number of rows.
131 | 
132 | Rolling joins don't warn on many-to-many relationships either, but many
133 | rolling joins follow a many-to-one relationship, so it is often useful to
134 | set \code{relationship = "many-to-one"} to enforce this.
135 | 
136 | Note that in SQL, most database providers won't let you specify a
137 | many-to-many relationship between two tables, instead requiring that you
138 | create a third \emph{junction table} that results in two one-to-many relationships
139 | instead.
140 | 
141 | }
142 | 
143 | \section{Methods}{
144 | 
145 | 
146 | These functions are \strong{generic}s, which means that packages can provide
147 | implementations (methods) for other classes. See the documentation of
148 | individual methods for extra arguments and differences in behaviour.
149 | 
150 | Methods available in currently loaded packages:
151 | \itemize{
152 | \item \code{inner_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("inner_join")}.
153 | \item \code{left_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("left_join")}.
154 | \item \code{right_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("right_join")}.
155 | \item \code{full_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("full_join")}.
156 | }
157 | 
158 | }
159 | 
160 | \examples{
161 | data(pasilla)
162 | 
163 | tt <- pasilla
164 | tt |> full_join(tibble::tibble(condition="treated", dose=10))
165 | 
166 | }
167 | \seealso{
168 | Other joins: 
169 | \code{\link[dplyr]{cross_join}()},
170 | \code{\link[dplyr]{filter-joins}},
171 | \code{\link[dplyr]{nest_join}()}
172 | }
173 | 


--------------------------------------------------------------------------------
/man/ggplot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ggplot2_methods.R
 3 | \name{ggplot}
 4 | \alias{ggplot}
 5 | \alias{ggplot.SummarizedExperiment}
 6 | \title{Create a new \code{ggplot} from a \code{tidyseurat}}
 7 | \usage{
 8 | \method{ggplot}{SummarizedExperiment}(data = NULL, mapping = aes(), ..., environment = parent.frame())
 9 | }
10 | \arguments{
11 | \item{data}{Default dataset to use for plot. If not already a data.frame,
12 | will be converted to one by \code{\link[ggplot2:fortify]{fortify()}}. If not specified,
13 | must be supplied in each layer added to the plot.}
14 | 
15 | \item{mapping}{Default list of aesthetic mappings to use for plot.
16 | If not specified, must be supplied in each layer added to the plot.}
17 | 
18 | \item{...}{Other arguments passed on to methods. Not currently used.}
19 | 
20 | \item{environment}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Used prior to tidy
21 | evaluation.}
22 | }
23 | \value{
24 | \code{ggplot}
25 | }
26 | \description{
27 | \code{ggplot()} initializes a ggplot object. It can be used to
28 | declare the input data frame for a graphic and to specify the
29 | set of plot aesthetics intended to be common throughout all
30 | subsequent layers unless specifically overridden.
31 | }
32 | \details{
33 | \code{ggplot()} is used to construct the initial plot object,
34 | and is almost always followed by a plus sign (\code{+}) to add
35 | components to the plot.
36 | 
37 | There are three common patterns used to invoke \code{ggplot()}:
38 | \itemize{
39 | \item \verb{ggplot(data = df, mapping = aes(x, y, other aesthetics))}
40 | \item \code{ggplot(data = df)}
41 | \item \code{ggplot()}
42 | }
43 | 
44 | The first pattern is recommended if all layers use the same
45 | data and the same set of aesthetics, although this method
46 | can also be used when adding a layer using data from another
47 | data frame.
48 | 
49 | The second pattern specifies the default data frame to use
50 | for the plot, but no aesthetics are defined up front. This
51 | is useful when one data frame is used predominantly for the
52 | plot, but the aesthetics vary from one layer to another.
53 | 
54 | The third pattern initializes a skeleton \code{ggplot} object, which
55 | is fleshed out as layers are added. This is useful when
56 | multiple data frames are used to produce different layers, as
57 | is often the case in complex graphics.
58 | 
59 | The \verb{data =} and \verb{mapping =} specifications in the arguments are optional
60 | (and are often omitted in practice), so long as the data and the mapping
61 | values are passed into the function in the right order. In the examples
62 | below, however, they are left in place for clarity.
63 | }
64 | \examples{
65 | library(ggplot2)
66 | data(pasilla)
67 | pasilla \%>\%
68 |     ggplot(aes(.sample, counts)) +
69 |     geom_boxplot()
70 | 
71 | }
72 | \seealso{
73 | The \href{https://ggplot2-book.org/getting-started}{first steps chapter} of the online ggplot2 book.
74 | }
75 | 


--------------------------------------------------------------------------------
/man/group_by.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/dplyr_methods.R
  3 | \name{group_by}
  4 | \alias{group_by}
  5 | \alias{group_by.SummarizedExperiment}
  6 | \title{Group by one or more variables}
  7 | \usage{
  8 | \method{group_by}{SummarizedExperiment}(.data, ..., .add = FALSE, .drop = group_by_drop_default(.data))
  9 | }
 10 | \arguments{
 11 | \item{.data}{A data frame, data frame extension (e.g. a tibble), or a
 12 | lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
 13 | more details.}
 14 | 
 15 | \item{...}{In \code{group_by()}, variables or computations to group by.
 16 | Computations are always done on the ungrouped data frame.
 17 | To perform computations on the grouped data, you need to use
 18 | a separate \code{mutate()} step before the \code{group_by()}.
 19 | Computations are not allowed in \code{nest_by()}.
 20 | In \code{ungroup()}, variables to remove from the grouping.}
 21 | 
 22 | \item{.add}{When \code{FALSE}, the default, \code{group_by()} will
 23 | override existing groups. To add to the existing groups, use
 24 | \code{.add = TRUE}.
 25 | 
 26 | This argument was previously called \code{add}, but that prevented
 27 | creating a new grouping variable called \code{add}, and conflicts with
 28 | our naming conventions.}
 29 | 
 30 | \item{.drop}{Drop groups formed by factor levels that don't appear in the
 31 | data? The default is \code{TRUE} except when \code{.data} has been previously
 32 | grouped with \code{.drop = FALSE}. See \code{\link[dplyr:group_by_drop_default]{group_by_drop_default()}} for details.}
 33 | }
 34 | \value{
 35 | A grouped data frame with class \code{\link[dplyr]{grouped_df}},
 36 | unless the combination of \code{...} and \code{add} yields a empty set of
 37 | grouping columns, in which case a tibble will be returned.
 38 | }
 39 | \description{
 40 | Most data operations are done on groups defined by variables.
 41 | \code{group_by()} takes an existing tbl and converts it into a grouped tbl
 42 | where operations are performed "by group". \code{ungroup()} removes grouping.
 43 | }
 44 | \section{Methods}{
 45 | 
 46 | 
 47 | These function are \strong{generic}s, which means that packages can provide
 48 | implementations (methods) for other classes. See the documentation of
 49 | individual methods for extra arguments and differences in behaviour.
 50 | 
 51 | Methods available in currently loaded packages:
 52 | \itemize{
 53 | \item \code{group_by()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("group_by")}.
 54 | \item \code{ungroup()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("ungroup")}.
 55 | }
 56 | 
 57 | }
 58 | 
 59 | \section{Ordering}{
 60 | 
 61 | 
 62 | Currently, \code{group_by()} internally orders the groups in ascending order. This
 63 | results in ordered output from functions that aggregate groups, such as
 64 | \code{\link[dplyr:summarise]{summarise()}}.
 65 | 
 66 | When used as grouping columns, character vectors are ordered in the C locale
 67 | for performance and reproducibility across R sessions. If the resulting
 68 | ordering of your grouped operation matters and is dependent on the locale,
 69 | you should follow up the grouped operation with an explicit call to
 70 | \code{\link[dplyr:arrange]{arrange()}} and set the \code{.locale} argument. For example:
 71 | 
 72 | \if{html}{\out{<div class="sourceCode">}}\preformatted{data \%>\%
 73 |   group_by(chr) \%>\%
 74 |   summarise(avg = mean(x)) \%>\%
 75 |   arrange(chr, .locale = "en")
 76 | }\if{html}{\out{</div>}}
 77 | 
 78 | This is often useful as a preliminary step before generating content intended
 79 | for humans, such as an HTML table.
 80 | \subsection{Legacy behavior}{
 81 | 
 82 | Prior to dplyr 1.1.0, character vector grouping columns were ordered in the
 83 | system locale. If you need to temporarily revert to this behavior, you can
 84 | set the global option \code{dplyr.legacy_locale} to \code{TRUE}, but this should be
 85 | used sparingly and you should expect this option to be removed in a future
 86 | version of dplyr. It is better to update existing code to explicitly call
 87 | \code{arrange(.locale = )} instead. Note that setting \code{dplyr.legacy_locale} will
 88 | also force calls to \code{\link[dplyr:arrange]{arrange()}} to use the system locale.
 89 | }
 90 | 
 91 | }
 92 | 
 93 | \examples{
 94 | data(pasilla)
 95 | pasilla  |> group_by(.sample)
 96 |     
 97 | }
 98 | \seealso{
 99 | Other grouping functions: 
100 | \code{\link[dplyr]{group_map}()},
101 | \code{\link[dplyr]{group_nest}()},
102 | \code{\link[dplyr]{group_split}()},
103 | \code{\link[dplyr]{group_trim}()}
104 | }
105 | 


--------------------------------------------------------------------------------
/man/group_split.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dplyr_methods.R
 3 | \name{group_split}
 4 | \alias{group_split}
 5 | \alias{group_split.SummarizedExperiment}
 6 | \title{Split data frame by groups}
 7 | \usage{
 8 | \method{group_split}{SummarizedExperiment}(.tbl, ..., .keep = TRUE)
 9 | }
10 | \arguments{
11 | \item{.tbl}{A tbl.}
12 | 
13 | \item{...}{If \code{.tbl} is an ungrouped data frame, a grouping specification,
14 | forwarded to \code{\link[dplyr:group_by]{group_by()}}.}
15 | 
16 | \item{.keep}{Should the grouping columns be kept?}
17 | }
18 | \value{
19 | A list of tibbles. Each tibble contains the rows of \code{.tbl} for the
20 | associated group and all the columns, including the grouping variables.
21 | Note that this returns a \link[vctrs:list_of]{list_of} which is slightly
22 | stricter than a simple list but is useful for representing lists where
23 | every element has the same type.
24 | }
25 | \description{
26 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
27 | 
28 | \code{\link[dplyr:group_split]{group_split()}} works like \code{\link[base:split]{base::split()}} but:
29 | \itemize{
30 | \item It uses the grouping structure from \code{\link[dplyr:group_by]{group_by()}} and therefore is subject
31 | to the data mask
32 | \item It does not name the elements of the list based on the grouping as this
33 | only works well for a single character grouping variable. Instead,
34 | use \code{\link[dplyr:group_keys]{group_keys()}} to access a data frame that defines the groups.
35 | }
36 | 
37 | \code{group_split()} is primarily designed to work with grouped data frames.
38 | You can pass \code{...} to group and split an ungrouped data frame, but this
39 | is generally not very useful as you want have easy access to the group
40 | metadata.
41 | }
42 | \section{Lifecycle}{
43 | 
44 | 
45 | \code{group_split()} is not stable because you can achieve very similar results by
46 | manipulating the nested column returned from
47 | \code{\link[tidyr:nest]{tidyr::nest(.by =)}}. That also retains the group keys all
48 | within a single data structure. \code{group_split()} may be deprecated in the
49 | future.
50 | 
51 | }
52 | 
53 | \examples{
54 | data(pasilla, package = "tidySummarizedExperiment")
55 | pasilla |> group_split(condition)
56 | pasilla |> group_split(counts > 0)
57 | pasilla |> group_split(condition, counts > 0)
58 | 
59 | }
60 | \seealso{
61 | Other grouping functions: 
62 | \code{\link[dplyr]{group_by}()},
63 | \code{\link[dplyr]{group_map}()},
64 | \code{\link[dplyr]{group_nest}()},
65 | \code{\link[dplyr]{group_trim}()}
66 | }
67 | 


--------------------------------------------------------------------------------
/man/inner_join.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/dplyr_methods.R
  3 | \name{inner_join}
  4 | \alias{inner_join}
  5 | \alias{inner_join.SummarizedExperiment}
  6 | \title{Mutating joins}
  7 | \usage{
  8 | \method{inner_join}{SummarizedExperiment}(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ...)
  9 | }
 10 | \arguments{
 11 | \item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or
 12 | lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
 13 | more details.}
 14 | 
 15 | \item{by}{A join specification created with \code{\link[dplyr:join_by]{join_by()}}, or a character
 16 | vector of variables to join by.
 17 | 
 18 | If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all
 19 | variables in common across \code{x} and \code{y}. A message lists the variables so
 20 | that you can check they're correct; suppress the message by supplying \code{by}
 21 | explicitly.
 22 | 
 23 | To join on different variables between \code{x} and \code{y}, use a \code{\link[dplyr:join_by]{join_by()}}
 24 | specification. For example, \code{join_by(a == b)} will match \code{x$a} to \code{y$b}.
 25 | 
 26 | To join by multiple variables, use a \code{\link[dplyr:join_by]{join_by()}} specification with
 27 | multiple expressions. For example, \code{join_by(a == b, c == d)} will match
 28 | \code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. If the column names are the same between
 29 | \code{x} and \code{y}, you can shorten this by listing only the variable names, like
 30 | \code{join_by(a, c)}.
 31 | 
 32 | \code{\link[dplyr:join_by]{join_by()}} can also be used to perform inequality, rolling, and overlap
 33 | joins. See the documentation at \link[dplyr:join_by]{?join_by} for details on
 34 | these types of joins.
 35 | 
 36 | For simple equality joins, you can alternatively specify a character vector
 37 | of variable names to join by. For example, \code{by = c("a", "b")} joins \code{x$a}
 38 | to \code{y$a} and \code{x$b} to \code{y$b}. If variable names differ between \code{x} and \code{y},
 39 | use a named character vector like \code{by = c("x_a" = "y_a", "x_b" = "y_b")}.
 40 | 
 41 | To perform a cross-join, generating all combinations of \code{x} and \code{y}, see
 42 | \code{\link[dplyr:cross_join]{cross_join()}}.}
 43 | 
 44 | \item{copy}{If \code{x} and \code{y} are not from the same data source,
 45 | and \code{copy} is \code{TRUE}, then \code{y} will be copied into the
 46 | same src as \code{x}.  This allows you to join tables across srcs, but
 47 | it is a potentially expensive operation so you must opt into it.}
 48 | 
 49 | \item{suffix}{If there are non-joined duplicate variables in \code{x} and
 50 | \code{y}, these suffixes will be added to the output to disambiguate them.
 51 | Should be a character vector of length 2.}
 52 | 
 53 | \item{...}{Other parameters passed onto methods.}
 54 | }
 55 | \value{
 56 | An object of the same type as \code{x} (including the same groups). The order of
 57 | the rows and columns of \code{x} is preserved as much as possible. The output has
 58 | the following properties:
 59 | \itemize{
 60 | \item The rows are affect by the join type.
 61 | \itemize{
 62 | \item \code{inner_join()} returns matched \code{x} rows.
 63 | \item \code{left_join()} returns all \code{x} rows.
 64 | \item \code{right_join()}  returns matched of \code{x} rows, followed by unmatched \code{y} rows.
 65 | \item \code{full_join()}  returns all \code{x} rows, followed by unmatched \code{y} rows.
 66 | }
 67 | \item Output columns include all columns from \code{x} and all non-key columns from
 68 | \code{y}. If \code{keep = TRUE}, the key columns from \code{y} are included as well.
 69 | \item If non-key columns in \code{x} and \code{y} have the same name, \code{suffix}es are added
 70 | to disambiguate. If \code{keep = TRUE} and key columns in \code{x} and \code{y} have
 71 | the same name, \code{suffix}es are added to disambiguate these as well.
 72 | \item If \code{keep = FALSE}, output columns included in \code{by} are coerced to their
 73 | common type between \code{x} and \code{y}.
 74 | }
 75 | }
 76 | \description{
 77 | Mutating joins add columns from \code{y} to \code{x}, matching observations based on
 78 | the keys. There are four mutating joins: the inner join, and the three outer
 79 | joins.
 80 | \subsection{Inner join}{
 81 | 
 82 | An \code{inner_join()} only keeps observations from \code{x} that have a matching key
 83 | in \code{y}.
 84 | 
 85 | The most important property of an inner join is that unmatched rows in either
 86 | input are not included in the result. This means that generally inner joins
 87 | are not appropriate in most analyses, because it is too easy to lose
 88 | observations.
 89 | }
 90 | 
 91 | \subsection{Outer joins}{
 92 | 
 93 | The three outer joins keep observations that appear in at least one of the
 94 | data frames:
 95 | \itemize{
 96 | \item A \code{left_join()} keeps all observations in \code{x}.
 97 | \item A \code{right_join()} keeps all observations in \code{y}.
 98 | \item A \code{full_join()} keeps all observations in \code{x} and \code{y}.
 99 | }
100 | }
101 | }
102 | \section{Many-to-many relationships}{
103 | 
104 | 
105 | 
106 | By default, dplyr guards against many-to-many relationships in equality joins
107 | by throwing a warning. These occur when both of the following are true:
108 | \itemize{
109 | \item A row in \code{x} matches multiple rows in \code{y}.
110 | \item A row in \code{y} matches multiple rows in \code{x}.
111 | }
112 | 
113 | This is typically surprising, as most joins involve a relationship of
114 | one-to-one, one-to-many, or many-to-one, and is often the result of an
115 | improperly specified join. Many-to-many relationships are particularly
116 | problematic because they can result in a Cartesian explosion of the number of
117 | rows returned from the join.
118 | 
119 | If a many-to-many relationship is expected, silence this warning by
120 | explicitly setting \code{relationship = "many-to-many"}.
121 | 
122 | In production code, it is best to preemptively set \code{relationship} to whatever
123 | relationship you expect to exist between the keys of \code{x} and \code{y}, as this
124 | forces an error to occur immediately if the data doesn't align with your
125 | expectations.
126 | 
127 | Inequality joins typically result in many-to-many relationships by nature, so
128 | they don't warn on them by default, but you should still take extra care when
129 | specifying an inequality join, because they also have the capability to
130 | return a large number of rows.
131 | 
132 | Rolling joins don't warn on many-to-many relationships either, but many
133 | rolling joins follow a many-to-one relationship, so it is often useful to
134 | set \code{relationship = "many-to-one"} to enforce this.
135 | 
136 | Note that in SQL, most database providers won't let you specify a
137 | many-to-many relationship between two tables, instead requiring that you
138 | create a third \emph{junction table} that results in two one-to-many relationships
139 | instead.
140 | 
141 | }
142 | 
143 | \section{Methods}{
144 | 
145 | 
146 | These functions are \strong{generic}s, which means that packages can provide
147 | implementations (methods) for other classes. See the documentation of
148 | individual methods for extra arguments and differences in behaviour.
149 | 
150 | Methods available in currently loaded packages:
151 | \itemize{
152 | \item \code{inner_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("inner_join")}.
153 | \item \code{left_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("left_join")}.
154 | \item \code{right_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("right_join")}.
155 | \item \code{full_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("full_join")}.
156 | }
157 | 
158 | }
159 | 
160 | \examples{
161 | data(pasilla)
162 | 
163 | tt <- pasilla 
164 | tt |> inner_join(tt |>
165 |     distinct(condition) |>
166 |     mutate(new_column=1:2) |>
167 |     slice(1))
168 | 
169 | }
170 | \seealso{
171 | Other joins: 
172 | \code{\link[dplyr]{cross_join}()},
173 | \code{\link[dplyr]{filter-joins}},
174 | \code{\link[dplyr]{nest_join}()}
175 | }
176 | 


--------------------------------------------------------------------------------
/man/left_join.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/dplyr_methods.R
  3 | \name{left_join}
  4 | \alias{left_join}
  5 | \alias{left_join.SummarizedExperiment}
  6 | \title{Mutating joins}
  7 | \usage{
  8 | \method{left_join}{SummarizedExperiment}(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ...)
  9 | }
 10 | \arguments{
 11 | \item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or
 12 | lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
 13 | more details.}
 14 | 
 15 | \item{by}{A join specification created with \code{\link[dplyr:join_by]{join_by()}}, or a character
 16 | vector of variables to join by.
 17 | 
 18 | If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all
 19 | variables in common across \code{x} and \code{y}. A message lists the variables so
 20 | that you can check they're correct; suppress the message by supplying \code{by}
 21 | explicitly.
 22 | 
 23 | To join on different variables between \code{x} and \code{y}, use a \code{\link[dplyr:join_by]{join_by()}}
 24 | specification. For example, \code{join_by(a == b)} will match \code{x$a} to \code{y$b}.
 25 | 
 26 | To join by multiple variables, use a \code{\link[dplyr:join_by]{join_by()}} specification with
 27 | multiple expressions. For example, \code{join_by(a == b, c == d)} will match
 28 | \code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. If the column names are the same between
 29 | \code{x} and \code{y}, you can shorten this by listing only the variable names, like
 30 | \code{join_by(a, c)}.
 31 | 
 32 | \code{\link[dplyr:join_by]{join_by()}} can also be used to perform inequality, rolling, and overlap
 33 | joins. See the documentation at \link[dplyr:join_by]{?join_by} for details on
 34 | these types of joins.
 35 | 
 36 | For simple equality joins, you can alternatively specify a character vector
 37 | of variable names to join by. For example, \code{by = c("a", "b")} joins \code{x$a}
 38 | to \code{y$a} and \code{x$b} to \code{y$b}. If variable names differ between \code{x} and \code{y},
 39 | use a named character vector like \code{by = c("x_a" = "y_a", "x_b" = "y_b")}.
 40 | 
 41 | To perform a cross-join, generating all combinations of \code{x} and \code{y}, see
 42 | \code{\link[dplyr:cross_join]{cross_join()}}.}
 43 | 
 44 | \item{copy}{If \code{x} and \code{y} are not from the same data source,
 45 | and \code{copy} is \code{TRUE}, then \code{y} will be copied into the
 46 | same src as \code{x}.  This allows you to join tables across srcs, but
 47 | it is a potentially expensive operation so you must opt into it.}
 48 | 
 49 | \item{suffix}{If there are non-joined duplicate variables in \code{x} and
 50 | \code{y}, these suffixes will be added to the output to disambiguate them.
 51 | Should be a character vector of length 2.}
 52 | 
 53 | \item{...}{Other parameters passed onto methods.}
 54 | }
 55 | \value{
 56 | An object of the same type as \code{x} (including the same groups). The order of
 57 | the rows and columns of \code{x} is preserved as much as possible. The output has
 58 | the following properties:
 59 | \itemize{
 60 | \item The rows are affect by the join type.
 61 | \itemize{
 62 | \item \code{inner_join()} returns matched \code{x} rows.
 63 | \item \code{left_join()} returns all \code{x} rows.
 64 | \item \code{right_join()}  returns matched of \code{x} rows, followed by unmatched \code{y} rows.
 65 | \item \code{full_join()}  returns all \code{x} rows, followed by unmatched \code{y} rows.
 66 | }
 67 | \item Output columns include all columns from \code{x} and all non-key columns from
 68 | \code{y}. If \code{keep = TRUE}, the key columns from \code{y} are included as well.
 69 | \item If non-key columns in \code{x} and \code{y} have the same name, \code{suffix}es are added
 70 | to disambiguate. If \code{keep = TRUE} and key columns in \code{x} and \code{y} have
 71 | the same name, \code{suffix}es are added to disambiguate these as well.
 72 | \item If \code{keep = FALSE}, output columns included in \code{by} are coerced to their
 73 | common type between \code{x} and \code{y}.
 74 | }
 75 | }
 76 | \description{
 77 | Mutating joins add columns from \code{y} to \code{x}, matching observations based on
 78 | the keys. There are four mutating joins: the inner join, and the three outer
 79 | joins.
 80 | \subsection{Inner join}{
 81 | 
 82 | An \code{inner_join()} only keeps observations from \code{x} that have a matching key
 83 | in \code{y}.
 84 | 
 85 | The most important property of an inner join is that unmatched rows in either
 86 | input are not included in the result. This means that generally inner joins
 87 | are not appropriate in most analyses, because it is too easy to lose
 88 | observations.
 89 | }
 90 | 
 91 | \subsection{Outer joins}{
 92 | 
 93 | The three outer joins keep observations that appear in at least one of the
 94 | data frames:
 95 | \itemize{
 96 | \item A \code{left_join()} keeps all observations in \code{x}.
 97 | \item A \code{right_join()} keeps all observations in \code{y}.
 98 | \item A \code{full_join()} keeps all observations in \code{x} and \code{y}.
 99 | }
100 | }
101 | }
102 | \section{Many-to-many relationships}{
103 | 
104 | 
105 | 
106 | By default, dplyr guards against many-to-many relationships in equality joins
107 | by throwing a warning. These occur when both of the following are true:
108 | \itemize{
109 | \item A row in \code{x} matches multiple rows in \code{y}.
110 | \item A row in \code{y} matches multiple rows in \code{x}.
111 | }
112 | 
113 | This is typically surprising, as most joins involve a relationship of
114 | one-to-one, one-to-many, or many-to-one, and is often the result of an
115 | improperly specified join. Many-to-many relationships are particularly
116 | problematic because they can result in a Cartesian explosion of the number of
117 | rows returned from the join.
118 | 
119 | If a many-to-many relationship is expected, silence this warning by
120 | explicitly setting \code{relationship = "many-to-many"}.
121 | 
122 | In production code, it is best to preemptively set \code{relationship} to whatever
123 | relationship you expect to exist between the keys of \code{x} and \code{y}, as this
124 | forces an error to occur immediately if the data doesn't align with your
125 | expectations.
126 | 
127 | Inequality joins typically result in many-to-many relationships by nature, so
128 | they don't warn on them by default, but you should still take extra care when
129 | specifying an inequality join, because they also have the capability to
130 | return a large number of rows.
131 | 
132 | Rolling joins don't warn on many-to-many relationships either, but many
133 | rolling joins follow a many-to-one relationship, so it is often useful to
134 | set \code{relationship = "many-to-one"} to enforce this.
135 | 
136 | Note that in SQL, most database providers won't let you specify a
137 | many-to-many relationship between two tables, instead requiring that you
138 | create a third \emph{junction table} that results in two one-to-many relationships
139 | instead.
140 | 
141 | }
142 | 
143 | \section{Methods}{
144 | 
145 | 
146 | These functions are \strong{generic}s, which means that packages can provide
147 | implementations (methods) for other classes. See the documentation of
148 | individual methods for extra arguments and differences in behaviour.
149 | 
150 | Methods available in currently loaded packages:
151 | \itemize{
152 | \item \code{inner_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("inner_join")}.
153 | \item \code{left_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("left_join")}.
154 | \item \code{right_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("right_join")}.
155 | \item \code{full_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("full_join")}.
156 | }
157 | 
158 | }
159 | 
160 | \examples{
161 | data(pasilla)
162 | 
163 | tt <- pasilla 
164 | tt |> left_join(tt |>
165 |     distinct(condition) |>
166 |     mutate(new_column=1:2))
167 | 
168 | }
169 | \seealso{
170 | Other joins: 
171 | \code{\link[dplyr]{cross_join}()},
172 | \code{\link[dplyr]{filter-joins}},
173 | \code{\link[dplyr]{nest_join}()}
174 | }
175 | 


--------------------------------------------------------------------------------
/man/mutate.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/dplyr_methods.R
  3 | \name{mutate}
  4 | \alias{mutate}
  5 | \alias{mutate.SummarizedExperiment}
  6 | \title{Create, modify, and delete columns}
  7 | \usage{
  8 | \method{mutate}{SummarizedExperiment}(.data, ...)
  9 | }
 10 | \arguments{
 11 | \item{.data}{A data frame, data frame extension (e.g. a tibble), or a
 12 | lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
 13 | more details.}
 14 | 
 15 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Name-value pairs.
 16 | The name gives the name of the column in the output.
 17 | 
 18 | The value can be:
 19 | \itemize{
 20 | \item A vector of length 1, which will be recycled to the correct length.
 21 | \item A vector the same length as the current group (or the whole data frame
 22 | if ungrouped).
 23 | \item \code{NULL}, to remove the column.
 24 | \item A data frame or tibble, to create multiple columns in the output.
 25 | }}
 26 | }
 27 | \value{
 28 | An object of the same type as \code{.data}. The output has the following
 29 | properties:
 30 | \itemize{
 31 | \item Columns from \code{.data} will be preserved according to the \code{.keep} argument.
 32 | \item Existing columns that are modified by \code{...} will always be returned in
 33 | their original location.
 34 | \item New columns created through \code{...} will be placed according to the
 35 | \code{.before} and \code{.after} arguments.
 36 | \item The number of rows is not affected.
 37 | \item Columns given the value \code{NULL} will be removed.
 38 | \item Groups will be recomputed if a grouping variable is mutated.
 39 | \item Data frame attributes are preserved.
 40 | }
 41 | }
 42 | \description{
 43 | \code{mutate()} creates new columns that are functions of existing variables.
 44 | It can also modify (if the name is the same as an existing
 45 | column) and delete columns (by setting their value to \code{NULL}).
 46 | }
 47 | \section{Useful mutate functions}{
 48 | 
 49 | 
 50 | \itemize{
 51 | \item \code{\link{+}}, \code{\link{-}}, \code{\link[=log]{log()}}, etc., for their usual mathematical meanings
 52 | \item \code{\link[dplyr:lead]{lead()}}, \code{\link[dplyr:lag]{lag()}}
 53 | \item \code{\link[dplyr:dense_rank]{dense_rank()}}, \code{\link[dplyr:min_rank]{min_rank()}}, \code{\link[dplyr:percent_rank]{percent_rank()}}, \code{\link[dplyr:row_number]{row_number()}},
 54 | \code{\link[dplyr:cume_dist]{cume_dist()}}, \code{\link[dplyr:ntile]{ntile()}}
 55 | \item \code{\link[=cumsum]{cumsum()}}, \code{\link[dplyr:cummean]{cummean()}}, \code{\link[=cummin]{cummin()}}, \code{\link[=cummax]{cummax()}}, \code{\link[dplyr:cumany]{cumany()}}, \code{\link[dplyr:cumall]{cumall()}}
 56 | \item \code{\link[dplyr:na_if]{na_if()}}, \code{\link[dplyr:coalesce]{coalesce()}}
 57 | \item \code{\link[dplyr:if_else]{if_else()}}, \code{\link[dplyr:recode]{recode()}}, \code{\link[dplyr:case_when]{case_when()}}
 58 | }
 59 | 
 60 | }
 61 | 
 62 | \section{Grouped tibbles}{
 63 | 
 64 | 
 65 | 
 66 | Because mutating expressions are computed within groups, they may
 67 | yield different results on grouped tibbles. This will be the case
 68 | as soon as an aggregating, lagging, or ranking function is
 69 | involved. Compare this ungrouped mutate:
 70 | 
 71 | \if{html}{\out{<div class="sourceCode">}}\preformatted{starwars \%>\%
 72 |   select(name, mass, species) \%>\%
 73 |   mutate(mass_norm = mass / mean(mass, na.rm = TRUE))
 74 | }\if{html}{\out{</div>}}
 75 | 
 76 | With the grouped equivalent:
 77 | 
 78 | \if{html}{\out{<div class="sourceCode">}}\preformatted{starwars \%>\%
 79 |   select(name, mass, species) \%>\%
 80 |   group_by(species) \%>\%
 81 |   mutate(mass_norm = mass / mean(mass, na.rm = TRUE))
 82 | }\if{html}{\out{</div>}}
 83 | 
 84 | The former normalises \code{mass} by the global average whereas the
 85 | latter normalises by the averages within species levels.
 86 | 
 87 | }
 88 | 
 89 | \section{Methods}{
 90 | 
 91 | 
 92 | This function is a \strong{generic}, which means that packages can provide
 93 | implementations (methods) for other classes. See the documentation of
 94 | individual methods for extra arguments and differences in behaviour.
 95 | 
 96 | Methods available in currently loaded packages:
 97 | \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("mutate")}.
 98 | 
 99 | }
100 | 
101 | \examples{
102 | data(pasilla)
103 | pasilla |> mutate(logcounts=log2(counts))
104 | 
105 | }
106 | \seealso{
107 | Other single table verbs: 
108 | \code{\link{rename}()},
109 | \code{\link{slice}()},
110 | \code{\link{summarise}()}
111 | }
112 | \concept{single table verbs}
113 | 


--------------------------------------------------------------------------------
/man/mutate_features.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dplyr_methods.R
 3 | \name{mutate_features}
 4 | \alias{mutate_features}
 5 | \title{Mutate features}
 6 | \usage{
 7 | mutate_features(.data, ...)
 8 | }
 9 | \arguments{
10 | \item{.data}{a SummarizedExperiment}
11 | 
12 | \item{...}{extra arguments passed to dplyr::mutate}
13 | }
14 | \value{
15 | a SummarizedExperiment with modified rowData
16 | }
17 | \description{
18 | Allows mutate call on features (rowData)
19 | of a SummarizedExperiment
20 | }
21 | 


--------------------------------------------------------------------------------
/man/mutate_samples.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dplyr_methods.R
 3 | \name{mutate_samples}
 4 | \alias{mutate_samples}
 5 | \title{Mutate samples}
 6 | \usage{
 7 | mutate_samples(.data, ...)
 8 | }
 9 | \arguments{
10 | \item{.data}{a SummarizedExperiment}
11 | 
12 | \item{...}{extra arguments passed to dplyr::mutate}
13 | }
14 | \value{
15 | a SummarizedExperiment with modified colData
16 | }
17 | \description{
18 | Allows mutate call on samples (colData)
19 | of a SummarizedExperiment
20 | }
21 | 


--------------------------------------------------------------------------------
/man/nest.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/tidyr_methods.R
 3 | \name{nest}
 4 | \alias{nest}
 5 | \alias{nest.SummarizedExperiment}
 6 | \title{Nest rows into a list-column of data frames}
 7 | \usage{
 8 | \method{nest}{SummarizedExperiment}(.data, ..., .names_sep = NULL)
 9 | }
10 | \arguments{
11 | \item{.data}{A data frame.}
12 | 
13 | \item{...}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> Columns to nest; these will
14 | appear in the inner data frames.
15 | 
16 | Specified using name-variable pairs of the form
17 | \code{new_col = c(col1, col2, col3)}. The right hand side can be any valid
18 | tidyselect expression.
19 | 
20 | If not supplied, then \code{...} is derived as all columns \emph{not} selected by
21 | \code{.by}, and will use the column name from \code{.key}.
22 | 
23 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}:
24 | previously you could write \code{df \%>\% nest(x, y, z)}.
25 | Convert to \code{df \%>\% nest(data = c(x, y, z))}.}
26 | 
27 | \item{.names_sep}{If \code{NULL}, the default, the inner names will come from
28 | the former outer names. If a string, the  new inner names will use the
29 | outer names with \code{names_sep} automatically stripped. This makes
30 | \code{names_sep} roughly symmetric between nesting and unnesting.}
31 | }
32 | \value{
33 | \code{tidySummarizedExperiment_nested}
34 | }
35 | \description{
36 | Nesting creates a list-column of data frames; unnesting flattens it back out
37 | into regular columns. Nesting is implicitly a summarising operation: you
38 | get one row for each group defined by the non-nested columns. This is useful
39 | in conjunction with other summaries that work with whole datasets, most
40 | notably models.
41 | 
42 | Learn more in \code{vignette("nest")}.
43 | }
44 | \details{
45 | If neither \code{...} nor \code{.by} are supplied, \code{nest()} will nest all variables,
46 | and will use the column name supplied through \code{.key}.
47 | }
48 | \section{New syntax}{
49 | 
50 | 
51 | tidyr 1.0.0 introduced a new syntax for \code{nest()} and \code{unnest()} that's
52 | designed to be more similar to other functions. Converting to the new syntax
53 | should be straightforward (guided by the message you'll receive) but if
54 | you just need to run an old analysis, you can easily revert to the previous
55 | behaviour using \code{\link[tidyr:nest_legacy]{nest_legacy()}} and \code{\link[tidyr:unnest_legacy]{unnest_legacy()}} as follows:
56 | 
57 | \if{html}{\out{<div class="sourceCode">}}\preformatted{library(tidyr)
58 | nest <- nest_legacy
59 | unnest <- unnest_legacy
60 | }\if{html}{\out{</div>}}
61 | 
62 | }
63 | 
64 | \section{Grouped data frames}{
65 | 
66 | 
67 | \code{df \%>\% nest(data = c(x, y))} specifies the columns to be nested; i.e. the
68 | columns that will appear in the inner data frame. \code{df \%>\% nest(.by = c(x, y))} specifies the columns to nest \emph{by}; i.e. the columns that will remain in
69 | the outer data frame. An alternative way to achieve the latter is to \code{nest()}
70 | a grouped data frame created by \code{\link[dplyr:group_by]{dplyr::group_by()}}. The grouping variables
71 | remain in the outer data frame and the others are nested. The result
72 | preserves the grouping of the input.
73 | 
74 | Variables supplied to \code{nest()} will override grouping variables so that
75 | \code{df \%>\% group_by(x, y) \%>\% nest(data = !z)} will be equivalent to
76 | \code{df \%>\% nest(data = !z)}.
77 | 
78 | You can't supply \code{.by} with a grouped data frame, as the groups already
79 | represent what you are nesting by.
80 | 
81 | }
82 | 
83 | \examples{
84 | tidySummarizedExperiment::pasilla |>
85 |     nest(data=-condition)
86 |     
87 | }
88 | 


--------------------------------------------------------------------------------
/man/pasilla.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{pasilla}
 5 | \alias{pasilla}
 6 | \title{Read counts of RNA-seq samples of Pasilla knock-down by Brooks et al.}
 7 | \format{
 8 | containing 14599 features and 7 biological replicates.
 9 | }
10 | \source{
11 | \url{https://bioconductor.org/packages/release/data/experiment/html/pasilla.html}
12 | }
13 | \usage{
14 | data(pasilla)
15 | }
16 | \description{
17 | A SummarizedExperiment dataset containing
18 | the transcriptome information for Drosophila Melanogaster.
19 | }
20 | \keyword{datasets}
21 | 


--------------------------------------------------------------------------------
/man/pipe.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils-pipe.R
 3 | \name{\%>\%}
 4 | \alias{\%>\%}
 5 | \title{Pipe operator}
 6 | \usage{
 7 | lhs \%>\% rhs
 8 | }
 9 | \arguments{
10 | \item{lhs}{A value or the magrittr placeholder.}
11 | 
12 | \item{rhs}{A function call using the magrittr semantics.}
13 | }
14 | \value{
15 | The result of calling \code{rhs(lhs)}.
16 | }
17 | \description{
18 | See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
19 | }
20 | \examples{
21 | 
22 | library(magrittr)
23 | 1 \%>\% sum(2)
24 | }
25 | \keyword{internal}
26 | 


--------------------------------------------------------------------------------
/man/pivot_longer.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/tidyr_methods.R
  3 | \name{pivot_longer}
  4 | \alias{pivot_longer}
  5 | \alias{pivot_longer.SummarizedExperiment}
  6 | \title{Pivot data from wide to long}
  7 | \usage{
  8 | \method{pivot_longer}{SummarizedExperiment}(
  9 |   data,
 10 |   cols,
 11 |   ...,
 12 |   cols_vary = "fastest",
 13 |   names_to = "name",
 14 |   names_prefix = NULL,
 15 |   names_sep = NULL,
 16 |   names_pattern = NULL,
 17 |   names_ptypes = NULL,
 18 |   names_transform = NULL,
 19 |   names_repair = "check_unique",
 20 |   values_to = "value",
 21 |   values_drop_na = FALSE,
 22 |   values_ptypes = NULL,
 23 |   values_transform = NULL
 24 | )
 25 | }
 26 | \arguments{
 27 | \item{data}{A data frame to pivot.}
 28 | 
 29 | \item{cols}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> Columns to pivot into
 30 | longer format.}
 31 | 
 32 | \item{...}{Additional arguments passed on to methods.}
 33 | 
 34 | \item{cols_vary}{When pivoting \code{cols} into longer format, how should the
 35 | output rows be arranged relative to their original row number?
 36 | \itemize{
 37 | \item \code{"fastest"}, the default, keeps individual rows from \code{cols} close
 38 | together in the output. This often produces intuitively ordered output
 39 | when you have at least one key column from \code{data} that is not involved in
 40 | the pivoting process.
 41 | \item \code{"slowest"} keeps individual columns from \code{cols} close together in the
 42 | output. This often produces intuitively ordered output when you utilize
 43 | all of the columns from \code{data} in the pivoting process.
 44 | }}
 45 | 
 46 | \item{names_to}{A character vector specifying the new column or columns to
 47 | create from the information stored in the column names of \code{data} specified
 48 | by \code{cols}.
 49 | \itemize{
 50 | \item If length 0, or if \code{NULL} is supplied, no columns will be created.
 51 | \item If length 1, a single column will be created which will contain the
 52 | column names specified by \code{cols}.
 53 | \item If length >1, multiple columns will be created. In this case, one of
 54 | \code{names_sep} or \code{names_pattern} must be supplied to specify how the
 55 | column names should be split. There are also two additional character
 56 | values you can take advantage of:
 57 | \itemize{
 58 | \item \code{NA} will discard the corresponding component of the column name.
 59 | \item \code{".value"} indicates that the corresponding component of the column
 60 | name defines the name of the output column containing the cell values,
 61 | overriding \code{values_to} entirely.
 62 | }
 63 | }}
 64 | 
 65 | \item{names_prefix}{A regular expression used to remove matching text
 66 | from the start of each variable name.}
 67 | 
 68 | \item{names_sep, names_pattern}{If \code{names_to} contains multiple values,
 69 | these arguments control how the column name is broken up.
 70 | 
 71 | \code{names_sep} takes the same specification as \code{\link[tidyr:separate]{separate()}}, and can either
 72 | be a numeric vector (specifying positions to break on), or a single string
 73 | (specifying a regular expression to split on).
 74 | 
 75 | \code{names_pattern} takes the same specification as \code{\link[tidyr:extract]{extract()}}, a regular
 76 | expression containing matching groups (\verb{()}).
 77 | 
 78 | If these arguments do not give you enough control, use
 79 | \code{pivot_longer_spec()} to create a spec object and process manually as
 80 | needed.}
 81 | 
 82 | \item{names_ptypes, values_ptypes}{Optionally, a list of column name-prototype
 83 | pairs. Alternatively, a single empty prototype can be supplied, which will
 84 | be applied to all columns. A prototype (or ptype for short) is a
 85 | zero-length vector (like \code{integer()} or \code{numeric()}) that defines the type,
 86 | class, and attributes of a vector. Use these arguments if you want to
 87 | confirm that the created columns are the types that you expect. Note that
 88 | if you want to change (instead of confirm) the types of specific columns,
 89 | you should use \code{names_transform} or \code{values_transform} instead.}
 90 | 
 91 | \item{names_transform, values_transform}{Optionally, a list of column
 92 | name-function pairs. Alternatively, a single function can be supplied,
 93 | which will be applied to all columns. Use these arguments if you need to
 94 | change the types of specific columns. For example, \code{names_transform = list(week = as.integer)} would convert a character variable called \code{week}
 95 | to an integer.
 96 | 
 97 | If not specified, the type of the columns generated from \code{names_to} will
 98 | be character, and the type of the variables generated from \code{values_to}
 99 | will be the common type of the input columns used to generate them.}
100 | 
101 | \item{names_repair}{What happens if the output has invalid column names?
102 | The default, \code{"check_unique"} is to error if the columns are duplicated.
103 | Use \code{"minimal"} to allow duplicates in the output, or \code{"unique"} to
104 | de-duplicated by adding numeric suffixes. See \code{\link[vctrs:vec_as_names]{vctrs::vec_as_names()}}
105 | for more options.}
106 | 
107 | \item{values_to}{A string specifying the name of the column to create
108 | from the data stored in cell values. If \code{names_to} is a character
109 | containing the special \code{.value} sentinel, this value will be ignored,
110 | and the name of the value column will be derived from part of the
111 | existing column names.}
112 | 
113 | \item{values_drop_na}{If \code{TRUE}, will drop rows that contain only \code{NA}s
114 | in the \code{value_to} column. This effectively converts explicit missing values
115 | to implicit missing values, and should generally be used only when missing
116 | values in \code{data} were created by its structure.}
117 | }
118 | \value{
119 | \code{tidySummarizedExperiment}
120 | }
121 | \description{
122 | \code{pivot_longer()} "lengthens" data, increasing the number of rows and
123 | decreasing the number of columns. The inverse transformation is
124 | \code{\link[tidyr:pivot_wider]{pivot_wider()}}
125 | 
126 | Learn more in \code{vignette("pivot")}.
127 | }
128 | \details{
129 | \code{pivot_longer()} is an updated approach to \code{\link[tidyr:gather]{gather()}}, designed to be both
130 | simpler to use and to handle more use cases. We recommend you use
131 | \code{pivot_longer()} for new code; \code{gather()} isn't going away but is no longer
132 | under active development.
133 | }
134 | \examples{
135 | # See vignette("pivot") for examples and explanation
136 | library(dplyr)
137 | tidySummarizedExperiment::pasilla \%>\%
138 |     pivot_longer(c(condition, type),
139 |         names_to="name", values_to="value")
140 | 
141 | }
142 | 


--------------------------------------------------------------------------------
/man/pivot_wider.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/tidyr_methods.R
  3 | \name{pivot_wider}
  4 | \alias{pivot_wider}
  5 | \alias{pivot_wider.SummarizedExperiment}
  6 | \title{Pivot data from long to wide}
  7 | \usage{
  8 | \method{pivot_wider}{SummarizedExperiment}(
  9 |   data,
 10 |   ...,
 11 |   id_cols = NULL,
 12 |   id_expand = FALSE,
 13 |   names_from = name,
 14 |   names_prefix = "",
 15 |   names_sep = "_",
 16 |   names_glue = NULL,
 17 |   names_sort = FALSE,
 18 |   names_vary = "fastest",
 19 |   names_expand = FALSE,
 20 |   names_repair = "check_unique",
 21 |   values_from = value,
 22 |   values_fill = NULL,
 23 |   values_fn = NULL,
 24 |   unused_fn = NULL
 25 | )
 26 | }
 27 | \arguments{
 28 | \item{data}{A data frame to pivot.}
 29 | 
 30 | \item{...}{Additional arguments passed on to methods.}
 31 | 
 32 | \item{id_cols}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> A set of columns that
 33 | uniquely identify each observation. Typically used when you have
 34 | redundant variables, i.e. variables whose values are perfectly correlated
 35 | with existing variables.
 36 | 
 37 | Defaults to all columns in \code{data} except for the columns specified through
 38 | \code{names_from} and \code{values_from}. If a tidyselect expression is supplied, it
 39 | will be evaluated on \code{data} after removing the columns specified through
 40 | \code{names_from} and \code{values_from}.}
 41 | 
 42 | \item{id_expand}{Should the values in the \code{id_cols} columns be expanded by
 43 | \code{\link[tidyr:expand]{expand()}} before pivoting? This results in more rows, the output will
 44 | contain a complete expansion of all possible values in \code{id_cols}. Implicit
 45 | factor levels that aren't represented in the data will become explicit.
 46 | Additionally, the row values corresponding to the expanded \code{id_cols} will
 47 | be sorted.}
 48 | 
 49 | \item{names_from, values_from}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> A pair of
 50 | arguments describing which column (or columns) to get the name of the
 51 | output column (\code{names_from}), and which column (or columns) to get the
 52 | cell values from (\code{values_from}).
 53 | 
 54 | If \code{values_from} contains multiple values, the value will be added to the
 55 | front of the output column.}
 56 | 
 57 | \item{names_prefix}{String added to the start of every variable name. This is
 58 | particularly useful if \code{names_from} is a numeric vector and you want to
 59 | create syntactic variable names.}
 60 | 
 61 | \item{names_sep}{If \code{names_from} or \code{values_from} contains multiple
 62 | variables, this will be used to join their values together into a single
 63 | string to use as a column name.}
 64 | 
 65 | \item{names_glue}{Instead of \code{names_sep} and \code{names_prefix}, you can supply
 66 | a glue specification that uses the \code{names_from} columns (and special
 67 | \code{.value}) to create custom column names.}
 68 | 
 69 | \item{names_sort}{Should the column names be sorted? If \code{FALSE}, the default,
 70 | column names are ordered by first appearance.}
 71 | 
 72 | \item{names_vary}{When \code{names_from} identifies a column (or columns) with
 73 | multiple unique values, and multiple \code{values_from} columns are provided,
 74 | in what order should the resulting column names be combined?
 75 | \itemize{
 76 | \item \code{"fastest"} varies \code{names_from} values fastest, resulting in a column
 77 | naming scheme of the form: \verb{value1_name1, value1_name2, value2_name1, value2_name2}. This is the default.
 78 | \item \code{"slowest"} varies \code{names_from} values slowest, resulting in a column
 79 | naming scheme of the form: \verb{value1_name1, value2_name1, value1_name2, value2_name2}.
 80 | }}
 81 | 
 82 | \item{names_expand}{Should the values in the \code{names_from} columns be expanded
 83 | by \code{\link[tidyr:expand]{expand()}} before pivoting? This results in more columns, the output
 84 | will contain column names corresponding to a complete expansion of all
 85 | possible values in \code{names_from}. Implicit factor levels that aren't
 86 | represented in the data will become explicit. Additionally, the column
 87 | names will be sorted, identical to what \code{names_sort} would produce.}
 88 | 
 89 | \item{names_repair}{What happens if the output has invalid column names?
 90 | The default, \code{"check_unique"} is to error if the columns are duplicated.
 91 | Use \code{"minimal"} to allow duplicates in the output, or \code{"unique"} to
 92 | de-duplicated by adding numeric suffixes. See \code{\link[vctrs:vec_as_names]{vctrs::vec_as_names()}}
 93 | for more options.}
 94 | 
 95 | \item{values_fill}{Optionally, a (scalar) value that specifies what each
 96 | \code{value} should be filled in with when missing.
 97 | 
 98 | This can be a named list if you want to apply different fill values to
 99 | different value columns.}
100 | 
101 | \item{values_fn}{Optionally, a function applied to the value in each cell
102 | in the output. You will typically use this when the combination of
103 | \code{id_cols} and \code{names_from} columns does not uniquely identify an
104 | observation.
105 | 
106 | This can be a named list if you want to apply different aggregations
107 | to different \code{values_from} columns.}
108 | 
109 | \item{unused_fn}{Optionally, a function applied to summarize the values from
110 | the unused columns (i.e. columns not identified by \code{id_cols},
111 | \code{names_from}, or \code{values_from}).
112 | 
113 | The default drops all unused columns from the result.
114 | 
115 | This can be a named list if you want to apply different aggregations
116 | to different unused columns.
117 | 
118 | \code{id_cols} must be supplied for \code{unused_fn} to be useful, since otherwise
119 | all unspecified columns will be considered \code{id_cols}.
120 | 
121 | This is similar to grouping by the \code{id_cols} then summarizing the
122 | unused columns using \code{unused_fn}.}
123 | }
124 | \value{
125 | \code{tidySummarizedExperiment}
126 | }
127 | \description{
128 | \code{pivot_wider()} "widens" data, increasing the number of columns and
129 | decreasing the number of rows. The inverse transformation is
130 | \code{\link[tidyr:pivot_longer]{pivot_longer()}}.
131 | 
132 | Learn more in \code{vignette("pivot")}.
133 | }
134 | \details{
135 | \code{pivot_wider()} is an updated approach to \code{\link[tidyr:spread]{spread()}}, designed to be both
136 | simpler to use and to handle more use cases. We recommend you use
137 | \code{pivot_wider()} for new code; \code{spread()} isn't going away but is no longer
138 | under active development.
139 | }
140 | \examples{
141 | # See vignette("pivot") for examples and explanation
142 | library(dplyr)
143 | tidySummarizedExperiment::pasilla \%>\%
144 |     pivot_wider(names_from=feature, values_from=counts)
145 | 
146 | }
147 | \seealso{
148 | \code{\link[tidyr:pivot_wider_spec]{pivot_wider_spec()}} to pivot "by hand" with a data frame that
149 | defines a pivoting specification.
150 | }
151 | 


--------------------------------------------------------------------------------
/man/plot_ly.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/plotly_methods.R
  3 | \name{plot_ly}
  4 | \alias{plot_ly}
  5 | \alias{plot_ly.tbl_df}
  6 | \alias{plot_ly.SummarizedExperiment}
  7 | \title{Initiate a plotly visualization}
  8 | \usage{
  9 | \method{plot_ly}{tbl_df}(
 10 |   data = data.frame(),
 11 |   ...,
 12 |   type = NULL,
 13 |   name = NULL,
 14 |   color = NULL,
 15 |   colors = NULL,
 16 |   alpha = NULL,
 17 |   stroke = NULL,
 18 |   strokes = NULL,
 19 |   alpha_stroke = 1,
 20 |   size = NULL,
 21 |   sizes = c(10, 100),
 22 |   span = NULL,
 23 |   spans = c(1, 20),
 24 |   symbol = NULL,
 25 |   symbols = NULL,
 26 |   linetype = NULL,
 27 |   linetypes = NULL,
 28 |   split = NULL,
 29 |   frame = NULL,
 30 |   width = NULL,
 31 |   height = NULL,
 32 |   source = "A"
 33 | )
 34 | 
 35 | \method{plot_ly}{SummarizedExperiment}(
 36 |   data = data.frame(),
 37 |   ...,
 38 |   type = NULL,
 39 |   name = NULL,
 40 |   color = NULL,
 41 |   colors = NULL,
 42 |   alpha = NULL,
 43 |   stroke = NULL,
 44 |   strokes = NULL,
 45 |   alpha_stroke = 1,
 46 |   size = NULL,
 47 |   sizes = c(10, 100),
 48 |   span = NULL,
 49 |   spans = c(1, 20),
 50 |   symbol = NULL,
 51 |   symbols = NULL,
 52 |   linetype = NULL,
 53 |   linetypes = NULL,
 54 |   split = NULL,
 55 |   frame = NULL,
 56 |   width = NULL,
 57 |   height = NULL,
 58 |   source = "A"
 59 | )
 60 | }
 61 | \arguments{
 62 | \item{data}{A data frame (optional) or \link[crosstalk:SharedData]{crosstalk::SharedData} object.}
 63 | 
 64 | \item{...}{Arguments (i.e., attributes) passed along to the trace \code{type}.
 65 | See \code{\link[plotly:schema]{schema()}} for a list of acceptable attributes for a given trace \code{type}
 66 | (by going to \code{traces} -> \code{type} -> \code{attributes}). Note that attributes
 67 | provided at this level may override other arguments
 68 | (e.g. \code{plot_ly(x = 1:10, y = 1:10, color = I("red"), marker = list(color = "blue"))}).}
 69 | 
 70 | \item{type}{A character string specifying the trace type (e.g. \code{"scatter"}, \code{"bar"}, \code{"box"}, etc).
 71 | If specified, it \emph{always} creates a trace, otherwise}
 72 | 
 73 | \item{name}{Values mapped to the trace's name attribute. Since a trace can
 74 | only have one name, this argument acts very much like \code{split} in that it
 75 | creates one trace for every unique value.}
 76 | 
 77 | \item{color}{Values mapped to relevant 'fill-color' attribute(s)
 78 | (e.g. \href{https://plotly.com/r/reference/#scatter-fillcolor}{fillcolor},
 79 | \href{https://plotly.com/r/reference/#scatter-marker-color}{marker.color},
 80 | \href{https://plotly.com/r/reference/#scatter-textfont-color}{textfont.color}, etc.).
 81 | The mapping from data values to color codes may be controlled using
 82 | \code{colors} and \code{alpha}, or avoided altogether via \code{\link[=I]{I()}} (e.g., \code{color = I("red")}).
 83 | Any color understood by \code{\link[grDevices:col2rgb]{grDevices::col2rgb()}} may be used in this way.}
 84 | 
 85 | \item{colors}{Either a colorbrewer2.org palette name (e.g. "YlOrRd" or "Blues"),
 86 | or a vector of colors to interpolate in hexadecimal "#RRGGBB" format,
 87 | or a color interpolation function like \code{colorRamp()}.}
 88 | 
 89 | \item{alpha}{A number between 0 and 1 specifying the alpha channel applied to \code{color}.
 90 | Defaults to 0.5 when mapping to \href{https://plotly.com/r/reference/#scatter-fillcolor}{fillcolor} and 1 otherwise.}
 91 | 
 92 | \item{stroke}{Similar to \code{color}, but values are mapped to relevant 'stroke-color' attribute(s)
 93 | (e.g., \href{https://plotly.com/r/reference/#scatter-marker-line-color}{marker.line.color}
 94 | and \href{https://plotly.com/r/reference/#scatter-line-color}{line.color}
 95 | for filled polygons). If not specified, \code{stroke} inherits from \code{color}.}
 96 | 
 97 | \item{strokes}{Similar to \code{colors}, but controls the \code{stroke} mapping.}
 98 | 
 99 | \item{alpha_stroke}{Similar to \code{alpha}, but applied to \code{stroke}.}
100 | 
101 | \item{size}{(Numeric) values mapped to relevant 'fill-size' attribute(s)
102 | (e.g., \href{https://plotly.com/r/reference/#scatter-marker-size}{marker.size},
103 | \href{https://plotly.com/r/reference/#scatter-textfont-size}{textfont.size},
104 | and \href{https://plotly.com/r/reference/#scatter-error_x-width}{error_x.width}).
105 | The mapping from data values to symbols may be controlled using
106 | \code{sizes}, or avoided altogether via \code{\link[=I]{I()}} (e.g., \code{size = I(30)}).}
107 | 
108 | \item{sizes}{A numeric vector of length 2 used to scale \code{size} to pixels.}
109 | 
110 | \item{span}{(Numeric) values mapped to relevant 'stroke-size' attribute(s)
111 | (e.g.,
112 | \href{https://plotly.com/r/reference/#scatter-marker-line-width}{marker.line.width},
113 | \href{https://plotly.com/r/reference/#scatter-line-width}{line.width} for filled polygons,
114 | and \href{https://plotly.com/r/reference/#scatter-error_x-thickness}{error_x.thickness})
115 | The mapping from data values to symbols may be controlled using
116 | \code{spans}, or avoided altogether via \code{\link[=I]{I()}} (e.g., \code{span = I(30)}).}
117 | 
118 | \item{spans}{A numeric vector of length 2 used to scale \code{span} to pixels.}
119 | 
120 | \item{symbol}{(Discrete) values mapped to \href{https://plotly.com/r/reference/#scatter-marker-symbol}{marker.symbol}.
121 | The mapping from data values to symbols may be controlled using
122 | \code{symbols}, or avoided altogether via \code{\link[=I]{I()}} (e.g., \code{symbol = I("pentagon")}).
123 | Any \link{pch} value or \href{https://plotly.com/r/reference/#scatter-marker-symbol}{symbol name} may be used in this way.}
124 | 
125 | \item{symbols}{A character vector of \link{pch} values or \href{https://plotly.com/r/reference/#scatter-marker-symbol}{symbol names}.}
126 | 
127 | \item{linetype}{(Discrete) values mapped to \href{https://plotly.com/r/reference/#scatter-line-dash}{line.dash}.
128 | The mapping from data values to symbols may be controlled using
129 | \code{linetypes}, or avoided altogether via \code{\link[=I]{I()}} (e.g., \code{linetype = I("dash")}).
130 | Any \code{lty} (see \link{par}) value or \href{https://plotly.com/r/reference/#scatter-line-dash}{dash name} may be used in this way.}
131 | 
132 | \item{linetypes}{A character vector of \code{lty} values or \href{https://plotly.com/r/reference/#scatter-line-dash}{dash names}}
133 | 
134 | \item{split}{(Discrete) values used to create multiple traces (one trace per value).}
135 | 
136 | \item{frame}{(Discrete) values used to create animation frames.}
137 | 
138 | \item{width}{Width in pixels (optional, defaults to automatic sizing).}
139 | 
140 | \item{height}{Height in pixels (optional, defaults to automatic sizing).}
141 | 
142 | \item{source}{a character string of length 1. Match the value of this string
143 | with the source argument in \code{\link[plotly:event_data]{event_data()}} to retrieve the
144 | event data corresponding to a specific plot (shiny apps can have multiple plots).}
145 | }
146 | \value{
147 | \code{plotly}
148 | 
149 | \code{plotly}
150 | }
151 | \description{
152 | This function maps R objects to \href{https://plotly.com/javascript/}{plotly.js},
153 | an (MIT licensed) web-based interactive charting library. It provides
154 | abstractions for doing common things (e.g. mapping data values to
155 | fill colors (via \code{color}) or creating \link[plotly]{animation}s (via \code{frame})) and sets
156 | some different defaults to make the interface feel more 'R-like'
157 | (i.e., closer to \code{\link[=plot]{plot()}} and \code{\link[ggplot2:qplot]{ggplot2::qplot()}}).
158 | }
159 | \details{
160 | Unless \code{type} is specified, this function just initiates a plotly
161 | object with 'global' attributes that are passed onto downstream uses of
162 | \code{\link[plotly:add_trace]{add_trace()}} (or similar). A \link{formula} must always be used when
163 | referencing column name(s) in \code{data} (e.g. \code{plot_ly(mtcars, x = ~wt)}).
164 | Formulas are optional when supplying values directly, but they do
165 | help inform default axis/scale titles
166 | (e.g., \code{plot_ly(x = mtcars$wt)} vs \code{plot_ly(x = ~mtcars$wt)})
167 | }
168 | \examples{
169 | data(se)
170 | se |>
171 |     plot_ly(x = ~counts)
172 | 
173 | data(se)
174 | se |>
175 |     plot_ly(x = ~counts)
176 | 
177 | }
178 | \references{
179 | \url{https://plotly-r.com/overview.html}
180 | }
181 | \seealso{
182 | \itemize{
183 | \item For initializing a plotly-geo object: \code{\link[plotly:plot_geo]{plot_geo()}}
184 | \item For initializing a plotly-mapbox object: \code{\link[plotly:plot_mapbox]{plot_mapbox()}}
185 | \item For translating a ggplot2 object to a plotly object: \code{\link[plotly:ggplotly]{ggplotly()}}
186 | \item For modifying any plotly object: \code{\link[plotly:layout]{layout()}}, \code{\link[plotly:add_trace]{add_trace()}}, \code{\link[plotly:style]{style()}}
187 | \item For linked brushing: \code{\link[plotly:highlight]{highlight()}}
188 | \item For arranging multiple plots: \code{\link[plotly:subplot]{subplot()}}, \code{\link[crosstalk:bscols]{crosstalk::bscols()}}
189 | \item For inspecting plotly objects: \code{\link[plotly:plotly_json]{plotly_json()}}
190 | \item For quick, accurate, and searchable plotly.js reference: \code{\link[plotly:schema]{schema()}}
191 | }
192 | }
193 | \author{
194 | Carson Sievert
195 | }
196 | 


--------------------------------------------------------------------------------
/man/pull.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dplyr_methods.R
 3 | \name{pull}
 4 | \alias{pull}
 5 | \alias{pull.SummarizedExperiment}
 6 | \title{Extract a single column}
 7 | \usage{
 8 | \method{pull}{SummarizedExperiment}(.data, var = -1, name = NULL, ...)
 9 | }
10 | \arguments{
11 | \item{.data}{A data frame, data frame extension (e.g. a tibble), or a
12 | lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
13 | more details.}
14 | 
15 | \item{var}{A variable specified as:
16 | \itemize{
17 | \item a literal variable name
18 | \item a positive integer, giving the position counting from the left
19 | \item a negative integer, giving the position counting from the right.
20 | }
21 | 
22 | The default returns the last column (on the assumption that's the
23 | column you've created most recently).
24 | 
25 | This argument is taken by expression and supports
26 | \link[rlang:topic-inject]{quasiquotation} (you can unquote column
27 | names and column locations).}
28 | 
29 | \item{name}{An optional parameter that specifies the column to be used
30 | as names for a named vector. Specified in a similar manner as \code{var}.}
31 | 
32 | \item{...}{For use by methods.}
33 | }
34 | \value{
35 | A vector the same size as \code{.data}.
36 | }
37 | \description{
38 | \code{pull()} is similar to \code{$}. It's mostly useful because it looks a little
39 | nicer in pipes, it also works with remote data frames, and it can optionally
40 | name the output.
41 | }
42 | \section{Methods}{
43 | 
44 | 
45 | This function is a \strong{generic}, which means that packages can provide
46 | implementations (methods) for other classes. See the documentation of
47 | individual methods for extra arguments and differences in behaviour.
48 | 
49 | The following methods are currently available in loaded packages:
50 | \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("pull")}.
51 | 
52 | }
53 | 
54 | \examples{
55 | data(pasilla)
56 | pasilla |> pull(feature)
57 |     
58 | }
59 | 


--------------------------------------------------------------------------------
/man/rename.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dplyr_methods.R
 3 | \name{rename}
 4 | \alias{rename}
 5 | \alias{rename.SummarizedExperiment}
 6 | \title{Rename columns}
 7 | \usage{
 8 | \method{rename}{SummarizedExperiment}(.data, ...)
 9 | }
10 | \arguments{
11 | \item{.data}{A data frame, data frame extension (e.g. a tibble), or a
12 | lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
13 | more details.}
14 | 
15 | \item{...}{For \code{rename()}: <\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Use
16 | \code{new_name = old_name} to rename selected variables.
17 | 
18 | For \code{rename_with()}: additional arguments passed onto \code{.fn}.}
19 | }
20 | \value{
21 | An object of the same type as \code{.data}. The output has the following
22 | properties:
23 | \itemize{
24 | \item Rows are not affected.
25 | \item Column names are changed; column order is preserved.
26 | \item Data frame attributes are preserved.
27 | \item Groups are updated to reflect new names.
28 | }
29 | }
30 | \description{
31 | \code{rename()} changes the names of individual variables using
32 | \code{new_name = old_name} syntax; \code{rename_with()} renames columns using a
33 | function.
34 | }
35 | \section{Methods}{
36 | 
37 | 
38 | This function is a \strong{generic}, which means that packages can provide
39 | implementations (methods) for other classes. See the documentation of
40 | individual methods for extra arguments and differences in behaviour.
41 | 
42 | The following methods are currently available in loaded packages:
43 | \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("rename")}.
44 | 
45 | }
46 | 
47 | \examples{
48 | data(pasilla)
49 | pasilla |> rename(cond=condition)
50 | 
51 | }
52 | \seealso{
53 | Other single table verbs: 
54 | \code{\link{mutate}()},
55 | \code{\link{slice}()},
56 | \code{\link{summarise}()}
57 | }
58 | \concept{single table verbs}
59 | 


--------------------------------------------------------------------------------
/man/right_join.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/dplyr_methods.R
  3 | \name{right_join}
  4 | \alias{right_join}
  5 | \alias{right_join.SummarizedExperiment}
  6 | \title{Mutating joins}
  7 | \usage{
  8 | \method{right_join}{SummarizedExperiment}(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ...)
  9 | }
 10 | \arguments{
 11 | \item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or
 12 | lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
 13 | more details.}
 14 | 
 15 | \item{by}{A join specification created with \code{\link[dplyr:join_by]{join_by()}}, or a character
 16 | vector of variables to join by.
 17 | 
 18 | If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all
 19 | variables in common across \code{x} and \code{y}. A message lists the variables so
 20 | that you can check they're correct; suppress the message by supplying \code{by}
 21 | explicitly.
 22 | 
 23 | To join on different variables between \code{x} and \code{y}, use a \code{\link[dplyr:join_by]{join_by()}}
 24 | specification. For example, \code{join_by(a == b)} will match \code{x$a} to \code{y$b}.
 25 | 
 26 | To join by multiple variables, use a \code{\link[dplyr:join_by]{join_by()}} specification with
 27 | multiple expressions. For example, \code{join_by(a == b, c == d)} will match
 28 | \code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. If the column names are the same between
 29 | \code{x} and \code{y}, you can shorten this by listing only the variable names, like
 30 | \code{join_by(a, c)}.
 31 | 
 32 | \code{\link[dplyr:join_by]{join_by()}} can also be used to perform inequality, rolling, and overlap
 33 | joins. See the documentation at \link[dplyr:join_by]{?join_by} for details on
 34 | these types of joins.
 35 | 
 36 | For simple equality joins, you can alternatively specify a character vector
 37 | of variable names to join by. For example, \code{by = c("a", "b")} joins \code{x$a}
 38 | to \code{y$a} and \code{x$b} to \code{y$b}. If variable names differ between \code{x} and \code{y},
 39 | use a named character vector like \code{by = c("x_a" = "y_a", "x_b" = "y_b")}.
 40 | 
 41 | To perform a cross-join, generating all combinations of \code{x} and \code{y}, see
 42 | \code{\link[dplyr:cross_join]{cross_join()}}.}
 43 | 
 44 | \item{copy}{If \code{x} and \code{y} are not from the same data source,
 45 | and \code{copy} is \code{TRUE}, then \code{y} will be copied into the
 46 | same src as \code{x}.  This allows you to join tables across srcs, but
 47 | it is a potentially expensive operation so you must opt into it.}
 48 | 
 49 | \item{suffix}{If there are non-joined duplicate variables in \code{x} and
 50 | \code{y}, these suffixes will be added to the output to disambiguate them.
 51 | Should be a character vector of length 2.}
 52 | 
 53 | \item{...}{Other parameters passed onto methods.}
 54 | }
 55 | \value{
 56 | An object of the same type as \code{x} (including the same groups). The order of
 57 | the rows and columns of \code{x} is preserved as much as possible. The output has
 58 | the following properties:
 59 | \itemize{
 60 | \item The rows are affect by the join type.
 61 | \itemize{
 62 | \item \code{inner_join()} returns matched \code{x} rows.
 63 | \item \code{left_join()} returns all \code{x} rows.
 64 | \item \code{right_join()}  returns matched of \code{x} rows, followed by unmatched \code{y} rows.
 65 | \item \code{full_join()}  returns all \code{x} rows, followed by unmatched \code{y} rows.
 66 | }
 67 | \item Output columns include all columns from \code{x} and all non-key columns from
 68 | \code{y}. If \code{keep = TRUE}, the key columns from \code{y} are included as well.
 69 | \item If non-key columns in \code{x} and \code{y} have the same name, \code{suffix}es are added
 70 | to disambiguate. If \code{keep = TRUE} and key columns in \code{x} and \code{y} have
 71 | the same name, \code{suffix}es are added to disambiguate these as well.
 72 | \item If \code{keep = FALSE}, output columns included in \code{by} are coerced to their
 73 | common type between \code{x} and \code{y}.
 74 | }
 75 | }
 76 | \description{
 77 | Mutating joins add columns from \code{y} to \code{x}, matching observations based on
 78 | the keys. There are four mutating joins: the inner join, and the three outer
 79 | joins.
 80 | \subsection{Inner join}{
 81 | 
 82 | An \code{inner_join()} only keeps observations from \code{x} that have a matching key
 83 | in \code{y}.
 84 | 
 85 | The most important property of an inner join is that unmatched rows in either
 86 | input are not included in the result. This means that generally inner joins
 87 | are not appropriate in most analyses, because it is too easy to lose
 88 | observations.
 89 | }
 90 | 
 91 | \subsection{Outer joins}{
 92 | 
 93 | The three outer joins keep observations that appear in at least one of the
 94 | data frames:
 95 | \itemize{
 96 | \item A \code{left_join()} keeps all observations in \code{x}.
 97 | \item A \code{right_join()} keeps all observations in \code{y}.
 98 | \item A \code{full_join()} keeps all observations in \code{x} and \code{y}.
 99 | }
100 | }
101 | }
102 | \section{Many-to-many relationships}{
103 | 
104 | 
105 | 
106 | By default, dplyr guards against many-to-many relationships in equality joins
107 | by throwing a warning. These occur when both of the following are true:
108 | \itemize{
109 | \item A row in \code{x} matches multiple rows in \code{y}.
110 | \item A row in \code{y} matches multiple rows in \code{x}.
111 | }
112 | 
113 | This is typically surprising, as most joins involve a relationship of
114 | one-to-one, one-to-many, or many-to-one, and is often the result of an
115 | improperly specified join. Many-to-many relationships are particularly
116 | problematic because they can result in a Cartesian explosion of the number of
117 | rows returned from the join.
118 | 
119 | If a many-to-many relationship is expected, silence this warning by
120 | explicitly setting \code{relationship = "many-to-many"}.
121 | 
122 | In production code, it is best to preemptively set \code{relationship} to whatever
123 | relationship you expect to exist between the keys of \code{x} and \code{y}, as this
124 | forces an error to occur immediately if the data doesn't align with your
125 | expectations.
126 | 
127 | Inequality joins typically result in many-to-many relationships by nature, so
128 | they don't warn on them by default, but you should still take extra care when
129 | specifying an inequality join, because they also have the capability to
130 | return a large number of rows.
131 | 
132 | Rolling joins don't warn on many-to-many relationships either, but many
133 | rolling joins follow a many-to-one relationship, so it is often useful to
134 | set \code{relationship = "many-to-one"} to enforce this.
135 | 
136 | Note that in SQL, most database providers won't let you specify a
137 | many-to-many relationship between two tables, instead requiring that you
138 | create a third \emph{junction table} that results in two one-to-many relationships
139 | instead.
140 | 
141 | }
142 | 
143 | \section{Methods}{
144 | 
145 | 
146 | These functions are \strong{generic}s, which means that packages can provide
147 | implementations (methods) for other classes. See the documentation of
148 | individual methods for extra arguments and differences in behaviour.
149 | 
150 | Methods available in currently loaded packages:
151 | \itemize{
152 | \item \code{inner_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("inner_join")}.
153 | \item \code{left_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("left_join")}.
154 | \item \code{right_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("right_join")}.
155 | \item \code{full_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("full_join")}.
156 | }
157 | 
158 | }
159 | 
160 | \examples{
161 | data(pasilla)
162 | 
163 | tt <- pasilla
164 | tt |> right_join(tt |>
165 |     distinct(condition) |>
166 |     mutate(new_column=1:2) |>
167 |     slice(1))
168 | 
169 | }
170 | \seealso{
171 | Other joins: 
172 | \code{\link[dplyr]{cross_join}()},
173 | \code{\link[dplyr]{filter-joins}},
174 | \code{\link[dplyr]{nest_join}()}
175 | }
176 | 


--------------------------------------------------------------------------------
/man/rowwise.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dplyr_methods.R
 3 | \name{rowwise}
 4 | \alias{rowwise}
 5 | \alias{rowwise.SummarizedExperiment}
 6 | \title{Group input by rows}
 7 | \usage{
 8 | \method{rowwise}{SummarizedExperiment}(data, ...)
 9 | }
10 | \arguments{
11 | \item{data}{Input data frame.}
12 | 
13 | \item{...}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Variables to be preserved
14 | when calling \code{\link[dplyr:summarise]{summarise()}}. This is typically a set of variables whose
15 | combination uniquely identify each row.
16 | 
17 | \strong{NB}: unlike \code{group_by()} you can not create new variables here but
18 | instead you can select multiple variables with (e.g.) \code{everything()}.}
19 | }
20 | \value{
21 | A row-wise data frame with class \code{rowwise_df}. Note that a
22 | \code{rowwise_df} is implicitly grouped by row, but is not a \code{grouped_df}.
23 | }
24 | \description{
25 | \code{rowwise()} allows you to compute on a data frame a row-at-a-time.
26 | This is most useful when a vectorised function doesn't exist.
27 | 
28 | Most dplyr verbs preserve row-wise grouping. The exception is \code{\link[dplyr:summarise]{summarise()}},
29 | which return a \link[dplyr]{grouped_df}. You can explicitly ungroup with \code{\link[dplyr:ungroup]{ungroup()}}
30 | or \code{\link[dplyr:as_tibble]{as_tibble()}}, or convert to a \link[dplyr]{grouped_df} with \code{\link[dplyr:group_by]{group_by()}}.
31 | }
32 | \section{List-columns}{
33 | 
34 | 
35 | Because a rowwise has exactly one row per group it offers a small
36 | convenience for working with list-columns. Normally, \code{summarise()} and
37 | \code{mutate()} extract a groups worth of data with \code{[}. But when you index
38 | a list in this way, you get back another list. When you're working with
39 | a \code{rowwise} tibble, then dplyr will use \code{[[} instead of \code{[} to make your
40 | life a little easier.
41 | 
42 | }
43 | 
44 | \examples{
45 | # TODO
46 | 
47 | }
48 | \seealso{
49 | \code{\link[dplyr:nest_by]{nest_by()}} for a convenient way of creating rowwise data frames
50 | with nested data.
51 | }
52 | 


--------------------------------------------------------------------------------
/man/sample_n.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dplyr_methods.R
 3 | \name{sample_n}
 4 | \alias{sample_n}
 5 | \alias{sample_n.SummarizedExperiment}
 6 | \alias{sample_frac}
 7 | \alias{sample_frac.SummarizedExperiment}
 8 | \title{Sample n rows from a table}
 9 | \usage{
10 | \method{sample_n}{SummarizedExperiment}(tbl, size, replace = FALSE, weight = NULL, .env = NULL, ...)
11 | 
12 | \method{sample_frac}{SummarizedExperiment}(tbl, size = 1, replace = FALSE, weight = NULL, .env = NULL, ...)
13 | }
14 | \arguments{
15 | \item{tbl}{A data.frame.}
16 | 
17 | \item{size}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}>
18 | For \code{sample_n()}, the number of rows to select.
19 | For \code{sample_frac()}, the fraction of rows to select.
20 | If \code{tbl} is grouped, \code{size} applies to each group.}
21 | 
22 | \item{replace}{Sample with or without replacement?}
23 | 
24 | \item{weight}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Sampling weights.
25 | This must evaluate to a vector of non-negative numbers the same length as
26 | the input. Weights are automatically standardised to sum to 1.}
27 | 
28 | \item{.env}{DEPRECATED.}
29 | 
30 | \item{...}{ignored}
31 | }
32 | \value{
33 | \code{tidySummarizedExperiment}
34 | }
35 | \description{
36 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}}
37 | \code{sample_n()} and \code{sample_frac()} have been superseded in favour of
38 | \code{\link[dplyr:slice_sample]{slice_sample()}}. While they will not be deprecated in the near future,
39 | retirement means that we will only perform critical bug fixes, so we recommend
40 | moving to the newer alternative.
41 | 
42 | These functions were superseded because we realised it was more convenient to
43 | have two mutually exclusive arguments to one function, rather than two
44 | separate functions. This also made it to clean up a few other smaller
45 | design issues with \code{sample_n()}/\code{sample_frac}:
46 | \itemize{
47 | \item The connection to \code{slice()} was not obvious.
48 | \item The name of the first argument, \code{tbl}, is inconsistent with other
49 | single table verbs which use \code{.data}.
50 | \item The \code{size} argument uses tidy evaluation, which is surprising and
51 | undocumented.
52 | \item It was easier to remove the deprecated \code{.env} argument.
53 | \item \code{...} was in a suboptimal position.
54 | }
55 | }
56 | \examples{
57 | data(pasilla)
58 | pasilla |> sample_n(50)
59 | pasilla |> sample_frac(0.1)
60 | 
61 | }
62 | 


--------------------------------------------------------------------------------
/man/se.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{se}
 5 | \alias{se}
 6 | \title{Read counts of RNA-seq samples derived from
 7 | Pasilla knock-down by Brooks et al.}
 8 | \format{
 9 | containing 14599 features and 7 biological replicates.
10 | }
11 | \source{
12 | \url{https://bioconductor.org/packages/release/data/experiment/html/pasilla.html}
13 | }
14 | \usage{
15 | data(se)
16 | }
17 | \description{
18 | A SummarizedExperiment dataset containing
19 | the transcriptome information for Drosophila Melanogaster.
20 | }
21 | \keyword{datasets}
22 | 


--------------------------------------------------------------------------------
/man/select.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/dplyr_methods.R
  3 | \name{select}
  4 | \alias{select}
  5 | \alias{select.SummarizedExperiment}
  6 | \title{Keep or drop columns using their names and types}
  7 | \usage{
  8 | \method{select}{SummarizedExperiment}(.data, ...)
  9 | }
 10 | \arguments{
 11 | \item{.data}{A data frame, data frame extension (e.g. a tibble), or a
 12 | lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
 13 | more details.}
 14 | 
 15 | \item{...}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> One or more unquoted
 16 | expressions separated by commas. Variable names can be used as if they
 17 | were positions in the data frame, so expressions like \code{x:y} can
 18 | be used to select a range of variables.}
 19 | }
 20 | \value{
 21 | An object of the same type as \code{.data}. The output has the following
 22 | properties:
 23 | \itemize{
 24 | \item Rows are not affected.
 25 | \item Output columns are a subset of input columns, potentially with a different
 26 | order. Columns will be renamed if \code{new_name = old_name} form is used.
 27 | \item Data frame attributes are preserved.
 28 | \item Groups are maintained; you can't select off grouping variables.
 29 | }
 30 | }
 31 | \description{
 32 | Select (and optionally rename) variables in a data frame, using a concise
 33 | mini-language that makes it easy to refer to variables based on their name
 34 | (e.g. \code{a:f} selects all columns from \code{a} on the left to \code{f} on the
 35 | right) or type (e.g. \code{where(is.numeric)} selects all numeric columns).
 36 | \subsection{Overview of selection features}{
 37 | 
 38 | Tidyverse selections implement a dialect of R where operators make
 39 | it easy to select variables:
 40 | \itemize{
 41 | \item \code{:} for selecting a range of consecutive variables.
 42 | \item \code{!} for taking the complement of a set of variables.
 43 | \item \code{&} and \code{|} for selecting the intersection or the union of two
 44 | sets of variables.
 45 | \item \code{c()} for combining selections.
 46 | }
 47 | 
 48 | In addition, you can use \strong{selection helpers}. Some helpers select specific
 49 | columns:
 50 | \itemize{
 51 | \item \code{\link[tidyselect:everything]{everything()}}: Matches all variables.
 52 | \item \code{\link[tidyselect:everything]{last_col()}}: Select last variable, possibly with an offset.
 53 | \item \code{\link[dplyr:group_cols]{group_cols()}}: Select all grouping columns.
 54 | }
 55 | 
 56 | Other helpers select variables by matching patterns in their names:
 57 | \itemize{
 58 | \item \code{\link[tidyselect:starts_with]{starts_with()}}: Starts with a prefix.
 59 | \item \code{\link[tidyselect:starts_with]{ends_with()}}: Ends with a suffix.
 60 | \item \code{\link[tidyselect:starts_with]{contains()}}: Contains a literal string.
 61 | \item \code{\link[tidyselect:starts_with]{matches()}}: Matches a regular expression.
 62 | \item \code{\link[tidyselect:starts_with]{num_range()}}: Matches a numerical range like x01, x02, x03.
 63 | }
 64 | 
 65 | Or from variables stored in a character vector:
 66 | \itemize{
 67 | \item \code{\link[tidyselect:all_of]{all_of()}}: Matches variable names in a character vector. All
 68 | names must be present, otherwise an out-of-bounds error is
 69 | thrown.
 70 | \item \code{\link[tidyselect:all_of]{any_of()}}: Same as \code{all_of()}, except that no error is thrown
 71 | for names that don't exist.
 72 | }
 73 | 
 74 | Or using a predicate function:
 75 | \itemize{
 76 | \item \code{\link[tidyselect:where]{where()}}: Applies a function to all variables and selects those
 77 | for which the function returns \code{TRUE}.
 78 | }
 79 | }
 80 | }
 81 | \section{Methods}{
 82 | 
 83 | 
 84 | This function is a \strong{generic}, which means that packages can provide
 85 | implementations (methods) for other classes. See the documentation of
 86 | individual methods for extra arguments and differences in behaviour.
 87 | 
 88 | The following methods are currently available in loaded packages:
 89 | \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("select")}.
 90 | 
 91 | }
 92 | 
 93 | \section{Examples}{
 94 | 
 95 | 
 96 | 
 97 | Here we show the usage for the basic selection operators. See the
 98 | specific help pages to learn about helpers like \code{\link[dplyr:starts_with]{starts_with()}}.
 99 | 
100 | The selection language can be used in functions like
101 | \code{dplyr::select()} or \code{tidyr::pivot_longer()}. Let's first attach
102 | the tidyverse:
103 | 
104 | \if{html}{\out{<div class="sourceCode r">}}\preformatted{library(tidyverse)
105 | 
106 | # For better printing
107 | iris <- as_tibble(iris)
108 | }\if{html}{\out{</div>}}
109 | 
110 | Select variables by name:
111 | 
112 | \if{html}{\out{<div class="sourceCode r">}}\preformatted{starwars \%>\% select(height)
113 | #> # A tibble: 87 x 1
114 | #>   height
115 | #>    <int>
116 | #> 1    172
117 | #> 2    167
118 | #> 3     96
119 | #> 4    202
120 | #> # i 83 more rows
121 | 
122 | iris \%>\% pivot_longer(Sepal.Length)
123 | #> # A tibble: 150 x 6
124 | #>   Sepal.Width Petal.Length Petal.Width Species name         value
125 | #>         <dbl>        <dbl>       <dbl> <fct>   <chr>        <dbl>
126 | #> 1         3.5          1.4         0.2 setosa  Sepal.Length   5.1
127 | #> 2         3            1.4         0.2 setosa  Sepal.Length   4.9
128 | #> 3         3.2          1.3         0.2 setosa  Sepal.Length   4.7
129 | #> 4         3.1          1.5         0.2 setosa  Sepal.Length   4.6
130 | #> # i 146 more rows
131 | }\if{html}{\out{</div>}}
132 | 
133 | Select multiple variables by separating them with commas. Note how
134 | the order of columns is determined by the order of inputs:
135 | 
136 | \if{html}{\out{<div class="sourceCode r">}}\preformatted{starwars \%>\% select(homeworld, height, mass)
137 | #> # A tibble: 87 x 3
138 | #>   homeworld height  mass
139 | #>   <chr>      <int> <dbl>
140 | #> 1 Tatooine     172    77
141 | #> 2 Tatooine     167    75
142 | #> 3 Naboo         96    32
143 | #> 4 Tatooine     202   136
144 | #> # i 83 more rows
145 | }\if{html}{\out{</div>}}
146 | 
147 | Functions like \code{tidyr::pivot_longer()} don't take variables with
148 | dots. In this case use \code{c()} to select multiple variables:
149 | 
150 | \if{html}{\out{<div class="sourceCode r">}}\preformatted{iris \%>\% pivot_longer(c(Sepal.Length, Petal.Length))
151 | #> # A tibble: 300 x 5
152 | #>   Sepal.Width Petal.Width Species name         value
153 | #>         <dbl>       <dbl> <fct>   <chr>        <dbl>
154 | #> 1         3.5         0.2 setosa  Sepal.Length   5.1
155 | #> 2         3.5         0.2 setosa  Petal.Length   1.4
156 | #> 3         3           0.2 setosa  Sepal.Length   4.9
157 | #> 4         3           0.2 setosa  Petal.Length   1.4
158 | #> # i 296 more rows
159 | }\if{html}{\out{</div>}}
160 | \subsection{Operators:}{
161 | 
162 | The \code{:} operator selects a range of consecutive variables:
163 | 
164 | \if{html}{\out{<div class="sourceCode r">}}\preformatted{starwars \%>\% select(name:mass)
165 | #> # A tibble: 87 x 3
166 | #>   name           height  mass
167 | #>   <chr>           <int> <dbl>
168 | #> 1 Luke Skywalker    172    77
169 | #> 2 C-3PO             167    75
170 | #> 3 R2-D2              96    32
171 | #> 4 Darth Vader       202   136
172 | #> # i 83 more rows
173 | }\if{html}{\out{</div>}}
174 | 
175 | The \code{!} operator negates a selection:
176 | 
177 | \if{html}{\out{<div class="sourceCode r">}}\preformatted{starwars \%>\% select(!(name:mass))
178 | #> # A tibble: 87 x 11
179 | #>   hair_color skin_color  eye_color birth_year sex   gender    homeworld species
180 | #>   <chr>      <chr>       <chr>          <dbl> <chr> <chr>     <chr>     <chr>  
181 | #> 1 blond      fair        blue            19   male  masculine Tatooine  Human  
182 | #> 2 <NA>       gold        yellow         112   none  masculine Tatooine  Droid  
183 | #> 3 <NA>       white, blue red             33   none  masculine Naboo     Droid  
184 | #> 4 none       white       yellow          41.9 male  masculine Tatooine  Human  
185 | #> # i 83 more rows
186 | #> # i 3 more variables: films <list>, vehicles <list>, starships <list>
187 | 
188 | iris \%>\% select(!c(Sepal.Length, Petal.Length))
189 | #> # A tibble: 150 x 3
190 | #>   Sepal.Width Petal.Width Species
191 | #>         <dbl>       <dbl> <fct>  
192 | #> 1         3.5         0.2 setosa 
193 | #> 2         3           0.2 setosa 
194 | #> 3         3.2         0.2 setosa 
195 | #> 4         3.1         0.2 setosa 
196 | #> # i 146 more rows
197 | 
198 | iris \%>\% select(!ends_with("Width"))
199 | #> # A tibble: 150 x 3
200 | #>   Sepal.Length Petal.Length Species
201 | #>          <dbl>        <dbl> <fct>  
202 | #> 1          5.1          1.4 setosa 
203 | #> 2          4.9          1.4 setosa 
204 | #> 3          4.7          1.3 setosa 
205 | #> 4          4.6          1.5 setosa 
206 | #> # i 146 more rows
207 | }\if{html}{\out{</div>}}
208 | 
209 | \code{&} and \code{|} take the intersection or the union of two selections:
210 | 
211 | \if{html}{\out{<div class="sourceCode r">}}\preformatted{iris \%>\% select(starts_with("Petal") & ends_with("Width"))
212 | #> # A tibble: 150 x 1
213 | #>   Petal.Width
214 | #>         <dbl>
215 | #> 1         0.2
216 | #> 2         0.2
217 | #> 3         0.2
218 | #> 4         0.2
219 | #> # i 146 more rows
220 | 
221 | iris \%>\% select(starts_with("Petal") | ends_with("Width"))
222 | #> # A tibble: 150 x 3
223 | #>   Petal.Length Petal.Width Sepal.Width
224 | #>          <dbl>       <dbl>       <dbl>
225 | #> 1          1.4         0.2         3.5
226 | #> 2          1.4         0.2         3  
227 | #> 3          1.3         0.2         3.2
228 | #> 4          1.5         0.2         3.1
229 | #> # i 146 more rows
230 | }\if{html}{\out{</div>}}
231 | 
232 | To take the difference between two selections, combine the \code{&} and
233 | \code{!} operators:
234 | 
235 | \if{html}{\out{<div class="sourceCode r">}}\preformatted{iris \%>\% select(starts_with("Petal") & !ends_with("Width"))
236 | #> # A tibble: 150 x 1
237 | #>   Petal.Length
238 | #>          <dbl>
239 | #> 1          1.4
240 | #> 2          1.4
241 | #> 3          1.3
242 | #> 4          1.5
243 | #> # i 146 more rows
244 | }\if{html}{\out{</div>}}
245 | }
246 | 
247 | }
248 | 
249 | \examples{
250 | data(pasilla)
251 | pasilla |> select(.sample, .feature, counts)
252 | 
253 | }
254 | \seealso{
255 | Other single table verbs: 
256 | \code{\link[dplyr]{arrange}()},
257 | \code{\link[dplyr]{filter}()},
258 | \code{\link[dplyr]{mutate}()},
259 | \code{\link[dplyr]{reframe}()},
260 | \code{\link[dplyr]{rename}()},
261 | \code{\link[dplyr]{slice}()},
262 | \code{\link[dplyr]{summarise}()}
263 | }
264 | 


--------------------------------------------------------------------------------
/man/separate.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/tidyr_methods.R
 3 | \name{separate}
 4 | \alias{separate}
 5 | \alias{separate.SummarizedExperiment}
 6 | \title{Separate a character column into multiple columns with a regular
 7 | expression or numeric locations}
 8 | \usage{
 9 | \method{separate}{SummarizedExperiment}(
10 |   data,
11 |   col,
12 |   into,
13 |   sep = "[^[:alnum:]]+",
14 |   remove = TRUE,
15 |   convert = FALSE,
16 |   extra = "warn",
17 |   fill = "warn",
18 |   ...
19 | )
20 | }
21 | \arguments{
22 | \item{data}{A data frame.}
23 | 
24 | \item{col}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> Column to expand.}
25 | 
26 | \item{into}{Names of new variables to create as character vector.
27 | Use \code{NA} to omit the variable in the output.}
28 | 
29 | \item{sep}{Separator between columns.
30 | 
31 | If character, \code{sep} is interpreted as a regular expression. The default
32 | value is a regular expression that matches any sequence of
33 | non-alphanumeric values.
34 | 
35 | If numeric, \code{sep} is interpreted as character positions to split at. Positive
36 | values start at 1 at the far-left of the string; negative value start at -1 at
37 | the far-right of the string. The length of \code{sep} should be one less than
38 | \code{into}.}
39 | 
40 | \item{remove}{If \code{TRUE}, remove input column from output data frame.}
41 | 
42 | \item{convert}{If \code{TRUE}, will run \code{\link[=type.convert]{type.convert()}} with
43 | \code{as.is = TRUE} on new columns. This is useful if the component
44 | columns are integer, numeric or logical.
45 | 
46 | NB: this will cause string \code{"NA"}s to be converted to \code{NA}s.}
47 | 
48 | \item{extra}{If \code{sep} is a character vector, this controls what
49 | happens when there are too many pieces. There are three valid options:
50 | \itemize{
51 | \item \code{"warn"} (the default): emit a warning and drop extra values.
52 | \item \code{"drop"}: drop any extra values without a warning.
53 | \item \code{"merge"}: only splits at most \code{length(into)} times
54 | }}
55 | 
56 | \item{fill}{If \code{sep} is a character vector, this controls what
57 | happens when there are not enough pieces. There are three valid options:
58 | \itemize{
59 | \item \code{"warn"} (the default): emit a warning and fill from the right
60 | \item \code{"right"}: fill with missing values on the right
61 | \item \code{"left"}: fill with missing values on the left
62 | }}
63 | 
64 | \item{...}{Additional arguments passed on to methods.}
65 | }
66 | \value{
67 | \code{tidySummarizedExperiment}
68 | }
69 | \description{
70 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}}
71 | 
72 | \code{separate()} has been superseded in favour of \code{\link[tidyr:separate_wider_position]{separate_wider_position()}}
73 | and \code{\link[tidyr:separate_wider_delim]{separate_wider_delim()}} because the two functions make the two uses
74 | more obvious, the API is more polished, and the handling of problems is
75 | better. Superseded functions will not go away, but will only receive
76 | critical bug fixes.
77 | 
78 | Given either a regular expression or a vector of character positions,
79 | \code{separate()} turns a single character column into multiple columns.
80 | }
81 | \examples{
82 | un <- tidySummarizedExperiment::pasilla |>
83 |     unite("group", c(condition, type))
84 | un |> separate(col=group, into=c("condition", "type"))
85 | 
86 | }
87 | \seealso{
88 | \code{\link[tidyr:unite]{unite()}}, the complement, \code{\link[tidyr:extract]{extract()}} which uses regular
89 | expression capturing groups.
90 | }
91 | 


--------------------------------------------------------------------------------
/man/slice.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dplyr_methods.R
 3 | \name{slice}
 4 | \alias{slice}
 5 | \alias{slice.SummarizedExperiment}
 6 | \alias{slice_head}
 7 | \alias{slice_tail}
 8 | \alias{slice_sample}
 9 | \alias{slice_min}
10 | \alias{slice_max}
11 | \title{Subset rows using their positions}
12 | \usage{
13 | \method{slice}{SummarizedExperiment}(.data, ..., .preserve = FALSE)
14 | }
15 | \arguments{
16 | \item{.data}{A data frame, data frame extension (e.g. a tibble), or a
17 | lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
18 | more details.}
19 | 
20 | \item{...}{For \code{slice()}: <\code{\link[rlang:args_data_masking]{data-masking}}>
21 | Integer row values.
22 | 
23 | Provide either positive values to keep, or negative values to drop.
24 | The values provided must be either all positive or all negative.
25 | Indices beyond the number of rows in the input are silently ignored.
26 | 
27 | For \verb{slice_*()}, these arguments are passed on to methods.}
28 | 
29 | \item{.preserve}{Relevant when the \code{.data} input is grouped.
30 | If \code{.preserve = FALSE} (the default), the grouping structure
31 | is recalculated based on the resulting data, otherwise the grouping is kept as is.}
32 | }
33 | \value{
34 | An object of the same type as \code{.data}. The output has the following
35 | properties:
36 | \itemize{
37 | \item Each row may appear 0, 1, or many times in the output.
38 | \item Columns are not modified.
39 | \item Groups are not modified.
40 | \item Data frame attributes are preserved.
41 | }
42 | }
43 | \description{
44 | \code{slice()} lets you index rows by their (integer) locations. It allows you
45 | to select, remove, and duplicate rows. It is accompanied by a number of
46 | helpers for common use cases:
47 | \itemize{
48 | \item \code{slice_head()} and \code{slice_tail()} select the first or last rows.
49 | \item \code{slice_sample()} randomly selects rows.
50 | \item \code{slice_min()} and \code{slice_max()} select rows with the smallest or largest
51 | values of a variable.
52 | }
53 | 
54 | If \code{.data} is a \link[dplyr]{grouped_df}, the operation will be performed on each group,
55 | so that (e.g.) \code{slice_head(df, n = 5)} will select the first five rows in
56 | each group.
57 | }
58 | \details{
59 | Slice does not work with relational databases because they have no
60 | intrinsic notion of row order. If you want to perform the equivalent
61 | operation, use \code{\link[dplyr:filter]{filter()}} and \code{\link[dplyr:row_number]{row_number()}}.
62 | }
63 | \section{Methods}{
64 | 
65 | 
66 | These function are \strong{generic}s, which means that packages can provide
67 | implementations (methods) for other classes. See the documentation of
68 | individual methods for extra arguments and differences in behaviour.
69 | 
70 | Methods available in currently loaded packages:
71 | \itemize{
72 | \item \code{slice()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice")}.
73 | \item \code{slice_head()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_head")}.
74 | \item \code{slice_tail()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_tail")}.
75 | \item \code{slice_min()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_min")}.
76 | \item \code{slice_max()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_max")}.
77 | \item \code{slice_sample()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_sample")}.
78 | }
79 | 
80 | }
81 | 
82 | \examples{
83 | data(pasilla)
84 | pasilla |> slice(1)
85 | 
86 | }
87 | \seealso{
88 | Other single table verbs: 
89 | \code{\link{mutate}()},
90 | \code{\link{rename}()},
91 | \code{\link{summarise}()}
92 | }
93 | \concept{single table verbs}
94 | 


--------------------------------------------------------------------------------
/man/summarise.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/dplyr_methods.R
  3 | \name{summarise}
  4 | \alias{summarise}
  5 | \alias{summarise.SummarizedExperiment}
  6 | \alias{summarize}
  7 | \alias{summarize.SummarizedExperiment}
  8 | \title{Summarise each group down to one row}
  9 | \usage{
 10 | \method{summarise}{SummarizedExperiment}(.data, ...)
 11 | 
 12 | \method{summarize}{SummarizedExperiment}(.data, ...)
 13 | }
 14 | \arguments{
 15 | \item{.data}{A data frame, data frame extension (e.g. a tibble), or a
 16 | lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
 17 | more details.}
 18 | 
 19 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Name-value pairs of
 20 | summary functions. The name will be the name of the variable in the result.
 21 | 
 22 | The value can be:
 23 | \itemize{
 24 | \item A vector of length 1, e.g. \code{min(x)}, \code{n()}, or \code{sum(is.na(y))}.
 25 | \item A data frame, to add multiple columns from a single expression.
 26 | }
 27 | 
 28 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Returning values with size 0 or >1 was
 29 | deprecated as of 1.1.0. Please use \code{\link[dplyr:reframe]{reframe()}} for this instead.}
 30 | }
 31 | \value{
 32 | An object \emph{usually} of the same type as \code{.data}.
 33 | \itemize{
 34 | \item The rows come from the underlying \code{\link[dplyr:group_keys]{group_keys()}}.
 35 | \item The columns are a combination of the grouping keys and the summary
 36 | expressions that you provide.
 37 | \item The grouping structure is controlled by the \verb{.groups=} argument, the
 38 | output may be another \link[dplyr]{grouped_df}, a \link[dplyr]{tibble} or a \link[dplyr]{rowwise} data frame.
 39 | \item Data frame attributes are \strong{not} preserved, because \code{summarise()}
 40 | fundamentally creates a new data frame.
 41 | }
 42 | }
 43 | \description{
 44 | \code{summarise()} creates a new data frame. It returns one row for each
 45 | combination of grouping variables; if there are no grouping variables, the
 46 | output will have a single row summarising all observations in the input. It
 47 | will contain one column for each grouping variable and one column for each of
 48 | the summary statistics that you have specified.
 49 | 
 50 | \code{summarise()} and \code{summarize()} are synonyms.
 51 | }
 52 | \section{Useful functions}{
 53 | 
 54 | 
 55 | \itemize{
 56 | \item Center: \code{\link[=mean]{mean()}}, \code{\link[=median]{median()}}
 57 | \item Spread: \code{\link[=sd]{sd()}}, \code{\link[=IQR]{IQR()}}, \code{\link[=mad]{mad()}}
 58 | \item Range: \code{\link[=min]{min()}}, \code{\link[=max]{max()}},
 59 | \item Position: \code{\link[dplyr:first]{first()}}, \code{\link[dplyr:last]{last()}}, \code{\link[dplyr:nth]{nth()}},
 60 | \item Count: \code{\link[dplyr:n]{n()}}, \code{\link[dplyr:n_distinct]{n_distinct()}}
 61 | \item Logical: \code{\link[=any]{any()}}, \code{\link[=all]{all()}}
 62 | }
 63 | 
 64 | }
 65 | 
 66 | \section{Backend variations}{
 67 | 
 68 | 
 69 | 
 70 | The data frame backend supports creating a variable and using it in the
 71 | same summary. This means that previously created summary variables can be
 72 | further transformed or combined within the summary, as in \code{\link[dplyr:mutate]{mutate()}}.
 73 | However, it also means that summary variables with the same names as previous
 74 | variables overwrite them, making those variables unavailable to later summary
 75 | variables.
 76 | 
 77 | This behaviour may not be supported in other backends. To avoid unexpected
 78 | results, consider using new names for your summary variables, especially when
 79 | creating multiple summaries.
 80 | 
 81 | }
 82 | 
 83 | \section{Methods}{
 84 | 
 85 | 
 86 | This function is a \strong{generic}, which means that packages can provide
 87 | implementations (methods) for other classes. See the documentation of
 88 | individual methods for extra arguments and differences in behaviour.
 89 | 
 90 | The following methods are currently available in loaded packages:
 91 | \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("summarise")}.
 92 | 
 93 | }
 94 | 
 95 | \examples{
 96 | data(pasilla)
 97 | pasilla |> summarise(mean(counts))
 98 | 
 99 | }
100 | \seealso{
101 | Other single table verbs: 
102 | \code{\link{mutate}()},
103 | \code{\link{rename}()},
104 | \code{\link{slice}()}
105 | }
106 | \concept{single table verbs}
107 | 


--------------------------------------------------------------------------------
/man/tbl_format_header.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/print_method.R
 3 | \name{tbl_format_header}
 4 | \alias{tbl_format_header}
 5 | \alias{tbl_format_header.tidySummarizedExperiment}
 6 | \title{Format the header of a tibble}
 7 | \usage{
 8 | \method{tbl_format_header}{tidySummarizedExperiment}(x, setup, ...)
 9 | }
10 | \arguments{
11 | \item{x}{A tibble-like object.}
12 | 
13 | \item{setup}{A setup object returned from \code{\link[pillar:tbl_format_setup]{tbl_format_setup()}}.}
14 | 
15 | \item{...}{These dots are for future extensions and must be empty.}
16 | }
17 | \value{
18 | A character vector.
19 | }
20 | \description{
21 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
22 | 
23 | For easier customization, the formatting of a tibble is split
24 | into three components: header, body, and footer.
25 | The \code{tbl_format_header()} method is responsible for formatting the header
26 | of a tibble.
27 | 
28 | Override this method if you need to change the appearance
29 | of the entire header.
30 | If you only need to change or extend the components shown in the header,
31 | override or extend \code{\link[pillar:tbl_sum]{tbl_sum()}} for your class which is called by the
32 | default method.
33 | }
34 | \examples{
35 | # TODO
36 | 
37 | }
38 | 


--------------------------------------------------------------------------------
/man/tidy.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/methods.R
 3 | \name{tidy}
 4 | \alias{tidy}
 5 | \alias{tidy.SummarizedExperiment}
 6 | \alias{tidy.RangedSummarizedExperiment}
 7 | \title{tidy for \code{Seurat}}
 8 | \usage{
 9 | tidy(object)
10 | 
11 | \method{tidy}{SummarizedExperiment}(object)
12 | 
13 | \method{tidy}{RangedSummarizedExperiment}(object)
14 | }
15 | \arguments{
16 | \item{object}{A SummarizedExperiment object}
17 | }
18 | \value{
19 | A \code{tidyseurat} object.
20 | }
21 | \description{
22 | tidy for \code{Seurat}
23 | }
24 | \examples{
25 | data(pasilla)
26 | pasilla \%>\% tidy()
27 | 
28 | }
29 | 


--------------------------------------------------------------------------------
/man/unite.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/tidyr_methods.R
 3 | \name{unite}
 4 | \alias{unite}
 5 | \alias{unite.SummarizedExperiment}
 6 | \title{Unite multiple columns into one by pasting strings together}
 7 | \usage{
 8 | \method{unite}{SummarizedExperiment}(data, col, ..., sep = "_", remove = TRUE, na.rm = FALSE)
 9 | }
10 | \arguments{
11 | \item{data}{A data frame.}
12 | 
13 | \item{col}{The name of the new column, as a string or symbol.
14 | 
15 | This argument is passed by expression and supports
16 | \link[rlang:topic-inject]{quasiquotation} (you can unquote strings
17 | and symbols). The name is captured from the expression with
18 | \code{\link[rlang:defusing-advanced]{rlang::ensym()}} (note that this kind of interface where
19 | symbols do not represent actual objects is now discouraged in the
20 | tidyverse; we support it here for backward compatibility).}
21 | 
22 | \item{...}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> Columns to unite}
23 | 
24 | \item{sep}{Separator to use between values.}
25 | 
26 | \item{remove}{If \code{TRUE}, remove input columns from output data frame.}
27 | 
28 | \item{na.rm}{If \code{TRUE}, missing values will be removed prior to uniting
29 | each value.}
30 | }
31 | \value{
32 | \code{tidySummarizedExperiment}
33 | }
34 | \description{
35 | Convenience function to paste together multiple columns into one.
36 | }
37 | \examples{
38 | tidySummarizedExperiment::pasilla |>
39 |     unite("group", c(condition, type))
40 |     
41 | }
42 | \seealso{
43 | \code{\link[tidyr:separate]{separate()}}, the complement.
44 | }
45 | 


--------------------------------------------------------------------------------
/man/unnest.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/tidyr_methods.R
  3 | \name{unnest}
  4 | \alias{unnest}
  5 | \alias{unnest.tidySummarizedExperiment_nested}
  6 | \alias{unnest_summarized_experiment}
  7 | \title{Unnest a list-column of data frames into rows and columns}
  8 | \usage{
  9 | \method{unnest}{tidySummarizedExperiment_nested}(
 10 |   data,
 11 |   cols,
 12 |   ...,
 13 |   keep_empty = FALSE,
 14 |   ptype = NULL,
 15 |   names_sep = NULL,
 16 |   names_repair = "check_unique",
 17 |   .drop,
 18 |   .id,
 19 |   .sep,
 20 |   .preserve
 21 | )
 22 | 
 23 | unnest_summarized_experiment(
 24 |   data,
 25 |   cols,
 26 |   ...,
 27 |   keep_empty = FALSE,
 28 |   ptype = NULL,
 29 |   names_sep = NULL,
 30 |   names_repair = "check_unique",
 31 |   .drop,
 32 |   .id,
 33 |   .sep,
 34 |   .preserve
 35 | )
 36 | }
 37 | \arguments{
 38 | \item{data}{A data frame.}
 39 | 
 40 | \item{cols}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> List-columns to unnest.
 41 | 
 42 | When selecting multiple columns, values from the same row will be recycled
 43 | to their common size.}
 44 | 
 45 | \item{...}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}:
 46 | previously you could write \code{df \%>\% unnest(x, y, z)}.
 47 | Convert to \code{df \%>\% unnest(c(x, y, z))}. If you previously created a new
 48 | variable in \code{unnest()} you'll now need to do it explicitly with \code{mutate()}.
 49 | Convert \code{df \%>\% unnest(y = fun(x, y, z))}
 50 | to \code{df \%>\% mutate(y = fun(x, y, z)) \%>\% unnest(y)}.}
 51 | 
 52 | \item{keep_empty}{By default, you get one row of output for each element
 53 | of the list that you are unchopping/unnesting. This means that if there's a
 54 | size-0 element (like \code{NULL} or an empty data frame or vector), then that
 55 | entire row will be dropped from the output. If you want to preserve all
 56 | rows, use \code{keep_empty = TRUE} to replace size-0 elements with a single row
 57 | of missing values.}
 58 | 
 59 | \item{ptype}{Optionally, a named list of column name-prototype pairs to
 60 | coerce \code{cols} to, overriding the default that will be guessed from
 61 | combining the individual values. Alternatively, a single empty ptype
 62 | can be supplied, which will be applied to all \code{cols}.}
 63 | 
 64 | \item{names_sep}{If \code{NULL}, the default, the outer names will come from the
 65 | inner names. If a string, the outer names will be formed by pasting
 66 | together the outer and the inner column names, separated by \code{names_sep}.}
 67 | 
 68 | \item{names_repair}{Used to check that output data frame has valid
 69 | names. Must be one of the following options:
 70 | \itemize{
 71 | \item \verb{"minimal}": no name repair or checks, beyond basic existence,
 72 | \item \verb{"unique}": make sure names are unique and not empty,
 73 | \item \verb{"check_unique}": (the default), no name repair, but check they are unique,
 74 | \item \verb{"universal}": make the names unique and syntactic
 75 | \item a function: apply custom name repair.
 76 | \item \link[tidyr]{tidyr_legacy}: use the name repair from tidyr 0.8.
 77 | \item a formula: a purrr-style anonymous function (see \code{\link[rlang:as_function]{rlang::as_function()}})
 78 | }
 79 | 
 80 | See \code{\link[vctrs:vec_as_names]{vctrs::vec_as_names()}} for more details on these terms and the
 81 | strategies used to enforce them.}
 82 | 
 83 | \item{.drop, .preserve}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}:
 84 | all list-columns are now preserved; If there are any that you
 85 | don't want in the output use \code{select()} to remove them prior to
 86 | unnesting.}
 87 | 
 88 | \item{.id}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}:
 89 | convert \code{df \%>\% unnest(x, .id = "id")} to \verb{df \%>\% mutate(id = names(x)) \%>\% unnest(x))}.}
 90 | 
 91 | \item{.sep}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}:
 92 | use \code{names_sep} instead.}
 93 | }
 94 | \value{
 95 | \code{tidySummarizedExperiment}
 96 | }
 97 | \description{
 98 | Unnest expands a list-column containing data frames into rows and columns.
 99 | }
100 | \section{New syntax}{
101 | 
102 | 
103 | tidyr 1.0.0 introduced a new syntax for \code{nest()} and \code{unnest()} that's
104 | designed to be more similar to other functions. Converting to the new syntax
105 | should be straightforward (guided by the message you'll receive) but if
106 | you just need to run an old analysis, you can easily revert to the previous
107 | behaviour using \code{\link[tidyr:nest_legacy]{nest_legacy()}} and \code{\link[tidyr:unnest_legacy]{unnest_legacy()}} as follows:
108 | 
109 | \if{html}{\out{<div class="sourceCode">}}\preformatted{library(tidyr)
110 | nest <- nest_legacy
111 | unnest <- unnest_legacy
112 | }\if{html}{\out{</div>}}
113 | 
114 | }
115 | 
116 | \examples{
117 | tidySummarizedExperiment::pasilla |>
118 |     nest(data=-condition) |>
119 |     unnest(data)
120 | 
121 | tidySummarizedExperiment::pasilla |>
122 |     nest(data=-condition) |>
123 |     unnest_summarized_experiment(data)
124 | 
125 | }
126 | \seealso{
127 | Other rectangling: 
128 | \code{\link[tidyr]{hoist}()},
129 | \code{\link[tidyr]{unnest_longer}()},
130 | \code{\link[tidyr]{unnest_wider}()}
131 | }
132 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(tidySummarizedExperiment)
3 | 
4 | test_check("tidySummarizedExperiment")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/test-dplyr_methods.R:
--------------------------------------------------------------------------------
  1 | context("dplyr test")
  2 | 
  3 | library(tidySummarizedExperiment)
  4 | 
  5 | 
  6 | test_that("bind_rows", {
  7 |     pasilla_bind <- bind_rows(pasilla, pasilla)
  8 | 
  9 |     pasilla_bind %>%
 10 |         count(.sample, .feature) %>%
 11 |         dplyr::count(n) %>%
 12 |         filter(n > 1) %>%
 13 |         nrow() %>%
 14 |         expect_equal(0)
 15 | })
 16 | 
 17 | test_that("distinct", {
 18 |     pasilla %>%
 19 |         distinct(condition) %>%
 20 |         ncol() %>%
 21 |         expect_equal(1)
 22 | })
 23 | 
 24 | test_that("filter", {
 25 |     pasilla %>%
 26 |         filter(condition == "untreated") %>%
 27 |         nrow() %>%
 28 |         expect_equal(14599)
 29 | })
 30 | 
 31 | test_that("group_by", {
 32 |     pasilla %>%
 33 |         group_by(condition) %>%
 34 |         ncol() %>%
 35 |         expect_equal(5)
 36 | })
 37 | 
 38 | test_that("summarise", {
 39 |     pasilla %>%
 40 |         summarise(mean(counts)) %>%
 41 |         nrow() %>%
 42 |         expect_equal(1)
 43 | })
 44 | 
 45 | test_that("mutate", {
 46 |     pasilla %>%
 47 |         mutate(condition = 1) %>%
 48 |         distinct(condition) %>%
 49 |         nrow() %>%
 50 |         expect_equal(1)
 51 | })
 52 | 
 53 | test_that("rename", {
 54 |     pasilla %>%
 55 |         rename(groups = condition) %>%
 56 |         select(groups) %>%
 57 |         ncol() %>%
 58 |         expect_equal(1)
 59 | })
 60 | 
 61 | test_that("left_join", {
 62 |     expect_equal(
 63 |         pasilla %>%
 64 |             left_join(pasilla %>%
 65 |                           distinct(condition) %>%
 66 |                           mutate(new_column = 1:2)) %>%
 67 |             colData() %>%
 68 |             ncol(),
 69 |         pasilla %>%
 70 |             colData() %>%
 71 |             ncol() %>%
 72 |             sum(1)
 73 |     )
 74 | })
 75 | 
 76 | test_that("left_join 0 samples", {
 77 |  
 78 |     pasilla[0,] %>%
 79 |       left_join(pasilla %>%
 80 |                   distinct(condition) %>%
 81 |                   mutate(new_column = 1)) |> 
 82 |     as_tibble() |> 
 83 |       pull(new_column) %>%
 84 |       unique() |> 
 85 |       expect_equal(1)
 86 |   
 87 | })
 88 | 
 89 | test_that("inner_join", {
 90 |     pasilla %>% inner_join(pasilla %>%
 91 |                           distinct(condition) %>%
 92 |                           mutate(new_column = 1:2) %>%
 93 |                           slice(1)) %>%
 94 |         ncol() %>%
 95 |         expect_equal(4)
 96 | })
 97 | 
 98 | test_that("right_join", {
 99 |     pasilla %>% right_join(pasilla %>%
100 |                           distinct(condition) %>%
101 |                           mutate(new_column = 1:2) %>%
102 |                           slice(1)) %>%
103 |         ncol() %>%
104 |         expect_equal(4)
105 | })
106 | 
107 | test_that("full_join", {
108 |     pasilla %>%
109 |         full_join(tibble::tibble(condition = "A",     other = 1:4)) %>% nrow() %>%
110 |         expect_equal(102197)
111 | })
112 | 
113 | test_that("slice", {
114 |     pasilla %>%
115 |         slice(1) %>%
116 |         ncol() %>%
117 |         expect_equal(1)
118 | })
119 | 
120 | test_that("select", {
121 |     pasilla %>%
122 |         select(-condition) %>%
123 |         class() %>%
124 |         as.character() %>%
125 |         expect_equal("SummarizedExperiment")
126 | 
127 |     pasilla %>%
128 |         select(condition) %>%
129 |         class() %>%
130 |         as.character() %>%
131 |         .[1] %>%
132 |         expect_equal("tbl_df")
133 | })
134 | 
135 | test_that("sample_n", {
136 |     pasilla %>%
137 |         sample_n(50) %>%
138 |         nrow() %>%
139 |         expect_equal(50)
140 | })
141 | 
142 | test_that("sample_frac", {
143 |     pasilla %>%
144 |         sample_frac(0.1) %>%
145 |         nrow() %>%
146 |         expect_equal(10219)
147 | })
148 | 
149 | test_that("count", {
150 |     pasilla %>%
151 |         count(condition) %>%
152 |         nrow() %>%
153 |         expect_equal(2)
154 | })
155 | 
156 | test_that("mutate counts", {
157 |   
158 |   se = tidySummarizedExperiment::pasilla |> mutate(counts_2 = counts) 
159 | 
160 |   se |> 
161 |     pull(counts) |> 
162 |     expect_equal(
163 |       se |> pull(counts_2)
164 |     )
165 |   
166 |   se = tidySummarizedExperiment::pasilla 
167 |   assays(se, withDimnames = FALSE)$counts_2 = assays(se)$counts[,7:1]
168 |   
169 |   se |> 
170 |     pull(counts) |> 
171 |     expect_equal(
172 |       se |> pull(counts_2)
173 |     )
174 |   
175 |   se |> 
176 |   tidySummarizedExperiment:::check_if_assays_are_NOT_overlapped(dim = "cols") |> 
177 |     expect_equal(FALSE)
178 |   
179 |   se[,1] |> 
180 |     tidySummarizedExperiment:::check_if_assays_are_NOT_overlapped(dim = "cols") |> 
181 |     expect_equal(TRUE)
182 |   
183 |   })
184 | 
185 | test_that("group_split splits character columns", {
186 |   data(pasilla)
187 |   pasilla |> 
188 |     group_split(condition) |> 
189 |     length() |> 
190 |     expect_equal(2)
191 | })
192 | 
193 | test_that("group_split splits logical comparisons", {
194 |   data(pasilla)
195 |   pasilla |> 
196 |     group_split(counts > 0) |> 
197 |     length() |> 
198 |     expect_equal(2)
199 | })
200 | 
201 | test_that("group_split splits with mutliple arguments", {
202 |   data(pasilla)
203 |   pasilla |> 
204 |     group_split(condition, counts > 0) |> 
205 |     length() |> 
206 |     expect_equal(4)
207 | })
208 | 
209 | test_that("mutate features", {
210 |   pasilla %>%
211 |     mutate_features(new = 1:nrow(pasilla)) %>%
212 |     rowData() %>%
213 |     as_tibble() %>%
214 |     pull(new) %>%
215 |     expect_equal(1:nrow(pasilla))
216 | })
217 | 
218 | test_that("mutate samples", {
219 |   pasilla %>%
220 |     mutate_samples(new = 1:ncol(pasilla)) %>%
221 |     colData() %>%
222 |     as_tibble() %>%
223 |     pull(new) %>%
224 |     expect_equal(1:ncol(pasilla))
225 | })
226 | 


--------------------------------------------------------------------------------
/tests/testthat/test-felix.R:
--------------------------------------------------------------------------------
 1 | context("felix test")
 2 | 
 3 | library(magrittr)
 4 | library(tidySummarizedExperiment)
 5 | 
 6 | # Create dataset
 7 | nrows <- 200; ncols <- 6
 8 | counts <- matrix(runif(nrows * ncols, 1, 1e4), nrows)
 9 | rowRanges <- GRanges(rep(c("chr1", "chr2"), c(50, 150)),
10 |                      IRanges(floor(runif(200, 1e5, 1e6)), width=100),
11 |                      strand=sample(c("+", "-"), 200, TRUE),
12 |                      feature_id=sprintf("ID%03d", 1:200))
13 | colData <- DataFrame(Treatment=rep(c("ChIP", "Input"), 3),
14 |                      row.names=LETTERS[1:6])
15 | rse <- SummarizedExperiment(assays=SimpleList(counts=counts),
16 |                             rowRanges=rowRanges, colData=colData)
17 | 
18 | 
19 | 
20 | test_that("Example 1 all columns included", {
21 |     
22 |     rse  %>% 
23 |         as_tibble() %>%
24 |         nrow() %>%
25 |         expect_equal(1200)
26 | })
27 | 
28 | 
29 | test_that("Example 2", {
30 |     
31 |     colData(rse)$sample <- seq_len(ncol(rse))
32 |     rowData(rse)$transcript <- seq_len(nrow(rse))
33 |     rse  %>% 
34 |         as_tibble() %>%
35 |         nrow() %>%
36 |         expect_equal(1200)
37 | })
38 | 
39 | test_that("Example 3", {
40 |     
41 |     # rowRanges(rse) <- split(rowRanges(rse),seq_len(nrow(rse)))
42 |     # 
43 |     # rse  %>% 
44 |     #     as_tibble() %>%
45 |     #     nrow() %>%
46 |     #     expect_equal(1200)
47 |     
48 |     rowData(rse)$transcript <- seq_len(nrow(rse))
49 |     
50 |     rse  %>% 
51 |         as_tibble() %>%
52 |         nrow() %>%
53 |         expect_equal(1200)
54 |     
55 |     colnames(rse) <- NULL
56 |     
57 |     rse  %>% 
58 |         as_tibble() %>%
59 |         select(.sample, .feature) %>%
60 |         ncol() %>%
61 |         expect_equal(2) 
62 | })
63 | 
64 | test_that("Example 4 from tidybulk", {
65 |     
66 |    x = se %>% as_tibble()
67 | })


--------------------------------------------------------------------------------
/tests/testthat/test-old_vocabulary.R:
--------------------------------------------------------------------------------
  1 | context("old vocabulary")
  2 | 
  3 | library(tidySummarizedExperiment)
  4 | 
  5 | warning_message = "the special columns including sample/feature"
  6 | 
  7 | test_that("distinct", {
  8 |     pasilla %>%
  9 |         distinct(sample, condition) %>%
 10 |         expect_warning(warning_message)
 11 | })
 12 | 
 13 | test_that("filter", {
 14 |     pasilla %>%
 15 |         filter(feature == "FBgn0000003") %>%
 16 |         expect_warning(warning_message)
 17 | })
 18 | 
 19 | test_that("group_by", {
 20 |     pasilla %>%
 21 |         group_by(sample) %>%
 22 |         expect_warning(warning_message)
 23 | })
 24 | 
 25 | test_that("summarise", {
 26 |     pasilla %>%
 27 |         summarise(unique(sample )) %>%
 28 |         expect_warning(warning_message)
 29 | })
 30 | 
 31 | test_that("mutate", {
 32 |     pasilla %>%
 33 |         mutate(condition = sample) %>%
 34 |         expect_warning(warning_message)
 35 |     
 36 |     pasilla %>%
 37 |         mutate(sample_name=toupper(sample)) %>%
 38 |         select(sample, sample_name) %>%
 39 |         expect_warning(warning_message)
 40 | })
 41 | 
 42 | test_that("left_join", {
 43 |         pasilla %>%
 44 |             left_join(pasilla %>%
 45 |                           distinct(sample) %>%
 46 |                           mutate(new_column = 1:7)) %>%
 47 |             expect_warning(warning_message)
 48 |     
 49 |     
 50 |     pasilla %>%
 51 |         left_join(pasilla %>%
 52 |                       distinct(feature) %>%
 53 |                       mutate(new_column = 1:14599 )) %>%
 54 |         expect_warning(warning_message)
 55 |     
 56 | })
 57 | 
 58 | test_that("inner_join", {
 59 |     pasilla %>%
 60 |         inner_join(pasilla %>%
 61 |                       distinct(sample) %>%
 62 |                       mutate(new_column = 1:7)) %>%
 63 |         expect_warning(warning_message)
 64 | })
 65 | 
 66 | test_that("right_join", {
 67 |     pasilla %>%
 68 |         right_join(pasilla %>%
 69 |                       distinct(sample) %>%
 70 |                       mutate(new_column = 1:7)) %>%
 71 |         expect_warning(warning_message)
 72 | })
 73 | 
 74 | test_that("full_join", {
 75 |     pasilla %>%
 76 |         full_join(pasilla %>%
 77 |                       distinct(sample) %>%
 78 |                       mutate(new_column = 1:7)) %>%
 79 |         expect_warning(warning_message)
 80 | })
 81 | 
 82 | test_that("select", {
 83 |     pasilla %>%
 84 |         select(sample, feature, counts, condition) %>%
 85 |         expect_warning(warning_message)
 86 | 
 87 |     pasilla %>%
 88 |         select(condition) %>%
 89 |         class() %>%
 90 |         as.character() %>%
 91 |         .[1] %>%
 92 |         expect_equal("tbl_df")
 93 | })
 94 | 
 95 | test_that("count", {
 96 |     pasilla %>%
 97 |         count(sample, condition)  %>%
 98 |         expect_warning(warning_message)
 99 | })
100 | 
101 | test_that("pull", {
102 |     pasilla %>%
103 |         pull(sample, condition)  %>%
104 |         expect_warning(warning_message)
105 | })
106 | 
107 | 
108 | library(magrittr)
109 | library(tidySummarizedExperiment)
110 | 
111 | tt <-
112 |     pasilla %>%
113 |     mutate(col2 = "other_col")
114 | 
115 | test_that("nest_unnest", {
116 |     
117 | 
118 |     
119 |     tt %>%
120 |         nest(data = -sample)  %>% 
121 |         unnest(data) %>% 
122 |         expect_warning(warning_message)
123 | 
124 | })
125 | 
126 | 
127 | test_that("unite separate", {
128 |     un <- 
129 |         tt %>% 
130 |         unite("new_col", c(condition, sample), sep = ":", remove = FALSE) %>% 
131 |         expect_warning(warning_message)
132 |     
133 |         un %>%
134 |         separate(
135 |             col = feature,
136 |             into = c("orig.ident", "condition"),
137 |             sep = ":", remove = FALSE
138 |         ) %>% 
139 |             expect_warning(warning_message)
140 |     
141 | 
142 | })
143 | 
144 | test_that("extract", {
145 |     tt %>%
146 |         extract(sample,
147 |             into = "g",
148 |             regex = "other_([a-z]+)",
149 |             convert = TRUE, remove=FALSE) %>% 
150 |         expect_warning(warning_message)
151 | })
152 | 
153 | test_that("pivot_longer", {
154 |     tt %>%
155 |         pivot_longer(c(sample, condition),
156 |             names_to = "name",
157 |             values_to = "value") %>%
158 |         class() %>%
159 |         .[1] %>%
160 |         expect_equal("tbl_df")
161 | })
162 | 
163 | test_that("pivot_wider", {
164 |     tt %>%
165 |         pivot_wider(names_from=feature, values_from=counts) %>%
166 |         class() %>%
167 |         .[1] %>%
168 |         expect_equal("tbl_df")
169 | })
170 | 


--------------------------------------------------------------------------------
/tests/testthat/test-tidyr_methods.R:
--------------------------------------------------------------------------------
  1 | context("tidyr test")
  2 | 
  3 | library(magrittr)
  4 | library(tidySummarizedExperiment)
  5 | 
  6 | tt <-
  7 |     pasilla %>%
  8 |     mutate(col2="other_col")
  9 | 
 10 | # Create SummarizedExperiment object for testing
 11 | nrows <- 200
 12 | ncols <- 6
 13 | counts <- matrix(runif(nrows * ncols, 1, 1e4), nrows)
 14 | rowRanges <- GRanges(rep(c("chr1", "chr2"), c(50, 150)),
 15 |                      IRanges(floor(runif(200, 1e5, 1e6)), width=100),
 16 |                      strand=sample(c("+", "-"), 200, TRUE),
 17 |                      feature_id=sprintf("ID%03d", 1:200))
 18 | colData <- DataFrame(Treatment=rep(c("ChIP", "Input"), 3),
 19 |                      row.names=LETTERS[1:6])
 20 | rse <- SummarizedExperiment(assays=SimpleList(counts=counts),
 21 |                             rowRanges=rowRanges, colData=colData)
 22 | rownames(rse) <- sprintf("ID%03d", 1:200)
 23 | 
 24 | test_that("RangedSummarizedExperiment_nest_unnest", {
 25 |   tryCatch({
 26 |     rse_nested <- rse %>%
 27 |       nest(data = -.sample)
 28 | 
 29 |     rse_unnested <- rse_nested %>%
 30 |       unnest(data)
 31 |   })
 32 |   
 33 |   expect_equal(rse@colData, rse_unnested@colData)
 34 |   expect_equal(rse@rowRanges, rse_unnested@rowRanges)
 35 | })
 36 | 
 37 | test_that("nest_unnest", {
 38 | 
 39 |     y <- tibble::tibble(
 40 |         .sample = c(
 41 |             "untrt1",
 42 |             "untrt2",
 43 |             "untrt3",
 44 |             "untrt4",
 45 |             "trt1",
 46 |             "trt2",
 47 |             "trt3"
 48 |         ),
 49 |         counts = c(0L, 0L, 0L, 0L, 0L, 0L, 1L)
 50 |     )
 51 | 
 52 |     x <- tt %>%
 53 |         nest(data = -condition) %>%
 54 |         unnest(data) %>%
 55 |         head(n = 1) %>%
 56 |         select(.sample, counts)
 57 | 
 58 | 
 59 |     expect_equal(x, y)
 60 | })
 61 | 
 62 | test_that("nest_unnest_slice_1",{
 63 |     
 64 |     tt %>%
 65 |         nest(data = -condition) %>% 
 66 |         slice(1) %>% 
 67 |         unnest(data)
 68 |     
 69 | })
 70 | 
 71 | test_that("nest_0_samples",{
 72 |   
 73 |   rowData(tt)$n = rep(1, nrow(tt))
 74 |   
 75 |   tt[,0] |> 
 76 |     nest(data = -n) 
 77 |   
 78 | })
 79 | 
 80 | test_that("unite separate", {
 81 |     un <- tt %>% unite("new_col", c(condition, col2), sep = ":")
 82 | 
 83 |     un %>%
 84 |         select(new_col) %>%
 85 |         slice(1) %>%
 86 |         pull(new_col) %>%
 87 |         expect_equal("untreated:other_col")
 88 | 
 89 |     se <-
 90 |         un %>%
 91 |         separate(
 92 |             col = new_col,
 93 |             into = c( "condition", "col2"),
 94 |             sep = ":"
 95 |         )
 96 | 
 97 |     se %>%
 98 |         select(.sample) %>%
 99 |         ncol() %>%
100 |         expect_equal(1)
101 | })
102 | 
103 | test_that("extract()", {
104 |     tt %>%
105 |         extract(col2,
106 |             into="g", regex="other_([a-z]+)",
107 |             convert = TRUE) %>%
108 |         pull(g) %>%
109 |         class() %>%
110 |         expect_equal("character")
111 | })
112 | 
113 | test_that("pivot_longer()", {
114 |     tt %>%
115 |         pivot_longer(c(.sample, condition), names_to = "name",
116 |             values_to = "value") %>%
117 |         class() %>%
118 |         .[1] %>%
119 |         expect_equal("tbl_df")
120 | })
121 | 
122 | test_that("nest_unnest_by_feature_chunk", {
123 |   
124 |   chunks =
125 |     tibble::tibble(.feature = rownames(tt)) |>
126 |     mutate(chunk___ = c( 
127 |       rep(1, times = floor(dplyr::n()/2)), 
128 |       rep(2, times = ceiling(dplyr::n()/2))
129 |     ))
130 |   
131 |   statistics_for_features = 
132 |     tibble::tibble(.feature = rownames(tt)) |>
133 |     mutate(pvalue = runif( dplyr::n(), min = 0, max = 1))
134 |   
135 |   tt = tt |> left_join(statistics_for_features)
136 |     
137 |   tt_unnested = 
138 |     tt |> 
139 |     left_join(chunks) |>
140 |     nest(se_chunk = -chunk___) |> 
141 |     unnest(se_chunk) |> 
142 |     select(-chunk___)
143 |   
144 |   identical(tt, tt_unnested)
145 |   
146 | })


--------------------------------------------------------------------------------
/vignettes/introduction.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Overview of the tidySummarizedExperiment package"
  3 | author: "Stefano Mangiola"
  4 | date: "`r Sys.Date()`"
  5 | package: tidySummarizedExperiment
  6 | output:
  7 |   BiocStyle::html_document:
  8 |     toc_float: true
  9 | bibliography: tidySummarizedExperiment.bib
 10 | vignette: >
 11 |   %\VignetteEngine{knitr::knitr}
 12 |   %\VignetteIndexEntry{Overview of the tidySummarizedExperiment package}
 13 |   %\usepackage[UTF-8]{inputenc}
 14 | ---
 15 | 
 16 | <!-- badges: start -->
 17 | [![Lifecycle:maturing](https://img.shields.io/badge/lifecycle-maturing-blue.svg)](https://www.tidyverse.org/lifecycle/#maturing)
 18 | <!-- badges: end -->
 19 | 
 20 | 
 21 | **Brings SummarizedExperiment to the tidyverse!**
 22 | 
 23 | website: [stemangiola.github.io/tidySummarizedExperiment/](https://stemangiola.github.io/tidySummarizedExperiment/)
 24 | 
 25 | Please also have a look at
 26 | 
 27 | - [tidySingleCellExperiment](https://stemangiola.github.io/tidySingleCellExperiment/) for tidy manipulation of SingleCellExperiment objects
 28 | - [tidyseurat](https://stemangiola.github.io/tidyseurat/) for tidy manipulation of Seurat objects
 29 | - [tidybulk](https://stemangiola.github.io/tidybulk/) for tidy analysis of RNA sequencing data
 30 | - [nanny](https://github.com/stemangiola/nanny) for tidy high-level data analysis and manipulation
 31 | - [tidygate](https://github.com/stemangiola/tidygate) for adding custom gate information to your tibble
 32 | - [tidyHeatmap](https://stemangiola.github.io/tidyHeatmap/) for heatmaps produced with tidy principles
 33 | 
 34 | 
 35 | ```{r, echo=FALSE, include=FALSE}
 36 | library(knitr)
 37 | knitr::opts_chunk$set(warning=FALSE, message=FALSE)
 38 | ```
 39 | 
 40 | # Introduction
 41 | 
 42 | tidySummarizedExperiment provides a bridge between Bioconductor [SummarizedExperiment](https://bioconductor.org/packages/release/bioc/html/SummarizedExperiment.html) [@morgan2020summarized] and the tidyverse [@wickham2019welcome]. It creates an invisible layer that enables viewing the
 43 | Bioconductor *SummarizedExperiment* object as a tidyverse tibble, and provides SummarizedExperiment-compatible *dplyr*, *tidyr*, *ggplot* and *plotly* functions. This allows users to get the best of both Bioconductor and tidyverse worlds.
 44 | 
 45 | 
 46 | ## Functions/utilities available
 47 | 
 48 | SummarizedExperiment-compatible Functions | Description
 49 | ------------ | -------------
 50 | `all` | After all `tidySummarizedExperiment` is a SummarizedExperiment object, just better
 51 | 
 52 | tidyverse Packages | Description
 53 | ------------ | -------------
 54 | `dplyr` | Almost all `dplyr` APIs like for any tibble
 55 | `tidyr` | Almost all `tidyr` APIs like for any tibble
 56 | `ggplot2` | `ggplot` like for any tibble
 57 | `plotly` | `plot_ly` like for any tibble
 58 | 
 59 | Utilities | Description
 60 | ------------ | -------------
 61 | `as_tibble` | Convert cell-wise information to a `tbl_df`
 62 | 
 63 | ## Installation
 64 | 
 65 | ```{r, eval=FALSE}
 66 | if (!requireNamespace("BiocManager", quietly=TRUE)) {
 67 |       install.packages("BiocManager")
 68 |   }
 69 | 
 70 | BiocManager::install("tidySummarizedExperiment")
 71 | ```
 72 | 
 73 | From Github (development)
 74 | ```{r, eval=FALSE}
 75 | devtools::install_github("stemangiola/tidySummarizedExperiment")
 76 | ```
 77 | 
 78 | Load libraries used in the examples.
 79 | 
 80 | ```{r}
 81 | library(ggplot2)
 82 | library(tidySummarizedExperiment)
 83 | ```
 84 | 
 85 | 
 86 | # Create `tidySummarizedExperiment`, the best of both worlds!
 87 | 
 88 | This is a SummarizedExperiment object but it is evaluated as a tibble. So it is fully compatible both with SummarizedExperiment and tidyverse APIs.
 89 | 
 90 | ```{r}
 91 | pasilla_tidy <- tidySummarizedExperiment::pasilla 
 92 | ```
 93 | 
 94 | **It looks like a tibble**
 95 | 
 96 | ```{r}
 97 | pasilla_tidy
 98 | ```
 99 | 
100 | **But it is a SummarizedExperiment object after all**
101 | 
102 | ```{r}
103 | assays(pasilla_tidy)
104 | ```
105 | 
106 | 
107 | # Tidyverse commands
108 | 
109 | We can use tidyverse commands to explore the tidy SummarizedExperiment object.
110 | 
111 | We can use `slice` to choose rows by position, for example to choose the first row.
112 | 
113 | ```{r}
114 | pasilla_tidy %>%
115 |     slice(1)
116 | ```
117 | 
118 | We can use `filter` to choose rows by criteria.
119 | 
120 | ```{r}
121 | pasilla_tidy %>%
122 |     filter(condition == "untreated")
123 | ```
124 | 
125 | We can use `select` to choose columns.
126 | 
127 | ```{r}
128 | pasilla_tidy %>%
129 |     select(.sample)
130 | ```
131 | 
132 | We can use `count` to count how many rows we have for each sample.
133 | 
134 | ```{r}
135 | pasilla_tidy %>%
136 |     count(.sample)
137 | ```
138 | 
139 | We can use `distinct` to see what distinct sample information we have.
140 | 
141 | ```{r}
142 | pasilla_tidy %>%
143 |     distinct(.sample, condition, type)
144 | ```
145 | 
146 | We could use `rename` to rename a column. For example, to modify the type column name.
147 | 
148 | ```{r}
149 | pasilla_tidy %>%
150 |     rename(sequencing=type)
151 | ```
152 | 
153 | We could use `mutate` to create a column. For example, we could create a new type column that contains single
154 | and paired instead of single_end and paired_end.
155 | 
156 | ```{r}
157 | pasilla_tidy %>%
158 |     mutate(type=gsub("_end", "", type))
159 | ```
160 | 
161 | We could use `unite` to combine multiple columns into a single column.
162 | 
163 | ```{r}
164 | pasilla_tidy %>%
165 |     unite("group", c(condition, type))
166 | ```
167 | 
168 | We can also combine commands with the tidyverse pipe `%>%`.
169 | 
170 | For example, we could combine `group_by` and `summarise` to get the total counts for each sample.
171 | 
172 | ```{r}
173 | pasilla_tidy %>%
174 |     group_by(.sample) %>%
175 |     summarise(total_counts=sum(counts))
176 | ```
177 | 
178 | We could combine `group_by`, `mutate` and `filter` to get the transcripts with mean count > 0.
179 | 
180 | ```{r}
181 | pasilla_tidy %>%
182 |     group_by(.feature) %>%
183 |     mutate(mean_count=mean(counts)) %>%
184 |     filter(mean_count > 0)
185 | ```
186 | 
187 | 
188 | # Plotting
189 | 
190 | ```{r}
191 | my_theme <-
192 |     list(
193 |         scale_fill_brewer(palette="Set1"),
194 |         scale_color_brewer(palette="Set1"),
195 |         theme_bw() +
196 |             theme(
197 |                 panel.border=element_blank(),
198 |                 axis.line=element_line(),
199 |                 panel.grid.major=element_line(size=0.2),
200 |                 panel.grid.minor=element_line(size=0.1),
201 |                 text=element_text(size=12),
202 |                 legend.position="bottom",
203 |                 aspect.ratio=1,
204 |                 strip.background=element_blank(),
205 |                 axis.title.x=element_text(margin=margin(t=10, r=10, b=10, l=10)),
206 |                 axis.title.y=element_text(margin=margin(t=10, r=10, b=10, l=10))
207 |             )
208 |     )
209 | ```
210 | 
211 | We can treat `pasilla_tidy` as a normal tibble for plotting.
212 | 
213 | Here we plot the distribution of counts per sample.
214 | 
215 | ```{r plot1}
216 | pasilla_tidy %>%
217 |     ggplot(aes(counts + 1, group=.sample, color=`type`)) +
218 |     geom_density() +
219 |     scale_x_log10() +
220 |     my_theme
221 | ```
222 | 
223 | # Session Info
224 | 
225 | ```{r}
226 | sessionInfo()
227 | ```
228 | 
229 | # References
230 | 


--------------------------------------------------------------------------------
/vignettes/tidySummarizedExperiment.bib:
--------------------------------------------------------------------------------
 1 | @Manual{morgan2020summarized,
 2 |     title = {SummarizedExperiment: SummarizedExperiment container},
 3 |     author = {Martin Morgan and Valerie Obenchain and Jim Hester and Hervé Pagès},
 4 |     year = {2020},
 5 |     note = {R package version 1.19.6},
 6 |   }
 7 | 
 8 | @article{wickham2019welcome,
 9 |   title={Welcome to the Tidyverse},
10 |   author={Wickham, Hadley and Averick, Mara and Bryan, Jennifer and Chang, Winston and McGowan, Lucy D'Agostino and Fran{\c{c}}ois, Romain and Grolemund, Garrett and Hayes, Alex and Henry, Lionel and Hester, Jim and others},
11 |   journal={Journal of Open Source Software},
12 |   volume={4},
13 |   number={43},
14 |   pages={1686},
15 |   year={2019}
16 | }
17 | 


--------------------------------------------------------------------------------