├── .Rbuildignore ├── .coveralls.yml ├── .github ├── .gitignore ├── ISSUE_TEMPLATE │ └── bug_report.md └── workflows │ └── rworkflows.yml ├── .gitignore ├── .travis.yml ├── DESCRIPTION ├── NAMESPACE ├── R ├── attach.R ├── data.R ├── dplyr_methods.R ├── ggplot2_methods.R ├── methods.R ├── pillar_utilities.R ├── plotly_methods.R ├── print_method.R ├── tibble_methods.R ├── tidyr_methods.R ├── utilities.R ├── utils-pipe.R ├── validation.R └── zzz.R ├── README.Rmd ├── README.md ├── _pkgdown.yml ├── codecov.yml ├── data ├── pasilla.rda └── se.rda ├── dev └── TCGA_processing.R ├── inst └── NEWS.rd ├── man ├── as_tibble.Rd ├── bind_rows.Rd ├── count.Rd ├── distinct.Rd ├── extract.Rd ├── figures │ ├── lifecycle-archived.svg │ ├── lifecycle-defunct.svg │ ├── lifecycle-deprecated.svg │ ├── lifecycle-experimental.svg │ ├── lifecycle-maturing.svg │ ├── lifecycle-questioning.svg │ ├── lifecycle-stable.svg │ ├── lifecycle-superseded.svg │ └── plot1-1.png ├── filter.Rd ├── formatting.Rd ├── full_join.Rd ├── ggplot.Rd ├── group_by.Rd ├── group_split.Rd ├── inner_join.Rd ├── left_join.Rd ├── mutate.Rd ├── mutate_features.Rd ├── mutate_samples.Rd ├── nest.Rd ├── pasilla.Rd ├── pipe.Rd ├── pivot_longer.Rd ├── pivot_wider.Rd ├── plot_ly.Rd ├── pull.Rd ├── rename.Rd ├── right_join.Rd ├── rowwise.Rd ├── sample_n.Rd ├── se.Rd ├── select.Rd ├── separate.Rd ├── slice.Rd ├── summarise.Rd ├── tbl_format_header.Rd ├── tidy.Rd ├── unite.Rd └── unnest.Rd ├── tests ├── testthat.R └── testthat │ ├── test-dplyr_methods.R │ ├── test-felix.R │ ├── test-old_vocabulary.R │ ├── test-tidyr_methods.R │ └── test-utilities.R └── vignettes ├── introduction.Rmd └── tidySummarizedExperiment.bib /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^vignettes/introduction_cache$ 4 | ^doc$ 5 | ^Meta$ 6 | ^codecov\.yml$ 7 | ^dev$ 8 | ^README_cache$ 9 | ^README_files$ 10 | README.Rmd 11 | ^.git$ 12 | .coveralls.yml 13 | .travis.yml 14 | ^.github$ 15 | ^\.github$ 16 | _pkgdown.yml 17 | ^GDCdata$ -------------------------------------------------------------------------------- /.coveralls.yml: -------------------------------------------------------------------------------- 1 | service_name: travis-pro 2 | repo_token: O4NscPehU4qrWznFtQRiyJJBIOyRgPzsB 3 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | Thanks for submitting an issue. 11 | 12 | Please add the following information to the issue 13 | 14 | 1. Describe the issue/bug 15 | 2. Print out the input dataset immediately before the bug occurs 16 | 3. Paste the code immediately leading to the bug 17 | 4. Print out of the output, if any 18 | 5. Print out of the complete error/warning message, if any 19 | 6. sessionInfo() 20 | 21 | Thanks! 22 | -------------------------------------------------------------------------------- /.github/workflows/rworkflows.yml: -------------------------------------------------------------------------------- 1 | name: rworkflows 2 | 'on': 3 | push: 4 | branches: 5 | - master 6 | - main 7 | - devel 8 | - RELEASE_** 9 | pull_request: 10 | branches: 11 | - master 12 | - main 13 | - devel 14 | - RELEASE_** 15 | jobs: 16 | rworkflows: 17 | permissions: write-all 18 | runs-on: ${{ matrix.config.os }} 19 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 20 | container: ${{ matrix.config.cont }} 21 | strategy: 22 | fail-fast: ${{ false }} 23 | matrix: 24 | config: 25 | - os: ubuntu-latest 26 | bioc: devel 27 | r: auto 28 | cont: ghcr.io/bioconductor/bioconductor_docker:devel 29 | rspm: ~ 30 | - os: macOS-latest 31 | bioc: release 32 | r: auto 33 | cont: ~ 34 | rspm: ~ 35 | - os: windows-latest 36 | bioc: release 37 | r: auto 38 | cont: ~ 39 | rspm: ~ 40 | steps: 41 | - uses: neurogenomics/rworkflows@master 42 | with: 43 | run_bioccheck: ${{ false }} 44 | run_rcmdcheck: ${{ true }} 45 | as_cran: ${{ true }} 46 | run_vignettes: ${{ true }} 47 | has_testthat: ${{ true }} 48 | run_covr: ${{ true }} 49 | run_pkgdown: ${{ true }} 50 | has_runit: ${{ false }} 51 | has_latex: ${{ false }} 52 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 53 | run_docker: ${{ false }} 54 | DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }} 55 | runner_os: ${{ runner.os }} 56 | cache_version: cache-v1 57 | docker_registry: ghcr.io 58 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | tidySummarizedExperiment.Rproj 6 | README_cache/* 7 | vignettes/introduction_cache* 8 | tidySummarizedExperiment.Rproj 9 | Meta 10 | doc 11 | dev/*csv 12 | dev/*rds 13 | dev/*rda 14 | dev/*pdf 15 | dev/dplyr-master/* 16 | tidySummarizedExperiment.Rproj 17 | GDCdata 18 | /doc/ 19 | /Meta/ 20 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # Adapted from https://github.com/hadley/testthat/blob/master/.travis.yml 2 | # R for travis: see documentation at https://docs.travis-ci.com/user/languages/r 3 | language: r 4 | cache: packages 5 | r: 6 | - bioc-release 7 | - bioc-devel 8 | env: 9 | - R_QPDF=true 10 | 11 | r_github_packages: 12 | - r-lib/covr 13 | 14 | after_success: 15 | - tar -C .. -xf $PKG_TARBALL 16 | - xvfb-run Rscript -e 'covr::codecov(type=c("tests", "vignettes", "examples"))' 17 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Type: Package 2 | Package: tidySummarizedExperiment 3 | Title: Brings SummarizedExperiment to the Tidyverse 4 | Version: 1.15.1 5 | Authors@R: c(person("Stefano", "Mangiola", email = "mangiolastefano@gmail.com", 6 | role = c("aut", "cre")) ) 7 | Description: The tidySummarizedExperiment package provides a set of tools for creating and 8 | manipulating tidy data representations of SummarizedExperiment objects. SummarizedExperiment 9 | is a widely used data structure in bioinformatics for storing high-throughput genomic data, 10 | such as gene expression or DNA sequencing data. 11 | The tidySummarizedExperiment package introduces a tidy framework for working with SummarizedExperiment objects. 12 | It allows users to convert their data into a tidy format, where each observation is a row 13 | and each variable is a column. This tidy representation simplifies data manipulation, 14 | integration with other tidyverse packages, and enables seamless integration with the broader 15 | ecosystem of tidy tools for data analysis. 16 | License: GPL-3 17 | Depends: 18 | R (>= 4.3.0), 19 | SummarizedExperiment, 20 | ttservice (>= 0.4.0) 21 | Imports: 22 | dplyr, 23 | tibble (>= 3.0.4), 24 | magrittr, 25 | tidyr, 26 | ggplot2, 27 | rlang, 28 | purrr, 29 | lifecycle, 30 | methods, 31 | utils, 32 | S4Vectors, 33 | tidyselect, 34 | ellipsis, 35 | vctrs, 36 | pillar, 37 | stringr, 38 | cli, 39 | fansi, 40 | stats, 41 | pkgconfig 42 | Suggests: 43 | BiocStyle, 44 | testthat, 45 | knitr, 46 | markdown, 47 | rmarkdown, 48 | plotly 49 | VignetteBuilder: 50 | knitr 51 | RdMacros: 52 | lifecycle 53 | Biarch: true 54 | biocViews: AssayDomain, Infrastructure, RNASeq, DifferentialExpression, GeneExpression, Normalization, Clustering, QualityControl, Sequencing, Transcription, Transcriptomics 55 | Encoding: UTF-8 56 | LazyData: true 57 | RoxygenNote: 7.3.1 58 | Roxygen: list(markdown = TRUE) 59 | LazyDataCompression: xz 60 | URL: https://github.com/stemangiola/tidySummarizedExperiment 61 | BugReports: https://github.com/stemangiola/tidySummarizedExperiment/issues 62 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(as_tibble,SummarizedExperiment) 4 | S3method(bind_cols,RangedSummarizedExperiment) 5 | S3method(bind_cols,SummarizedExperiment) 6 | S3method(bind_rows,SummarizedExperiment) 7 | S3method(count,SummarizedExperiment) 8 | S3method(distinct,SummarizedExperiment) 9 | S3method(extract,SummarizedExperiment) 10 | S3method(filter,SummarizedExperiment) 11 | S3method(full_join,SummarizedExperiment) 12 | S3method(ggplot,SummarizedExperiment) 13 | S3method(group_by,SummarizedExperiment) 14 | S3method(group_split,SummarizedExperiment) 15 | S3method(inner_join,SummarizedExperiment) 16 | S3method(left_join,SummarizedExperiment) 17 | S3method(mutate,SummarizedExperiment) 18 | S3method(nest,SummarizedExperiment) 19 | S3method(pivot_longer,SummarizedExperiment) 20 | S3method(pivot_wider,SummarizedExperiment) 21 | S3method(plot_ly,SummarizedExperiment) 22 | S3method(plot_ly,tbl_df) 23 | S3method(print,SummarizedExperiment) 24 | S3method(pull,SummarizedExperiment) 25 | S3method(rename,SummarizedExperiment) 26 | S3method(right_join,SummarizedExperiment) 27 | S3method(rowwise,SummarizedExperiment) 28 | S3method(sample_frac,SummarizedExperiment) 29 | S3method(sample_n,SummarizedExperiment) 30 | S3method(select,SummarizedExperiment) 31 | S3method(separate,SummarizedExperiment) 32 | S3method(slice,SummarizedExperiment) 33 | S3method(summarise,SummarizedExperiment) 34 | S3method(summarize,SummarizedExperiment) 35 | S3method(tbl_format_header,tidySummarizedExperiment) 36 | S3method(tidy,RangedSummarizedExperiment) 37 | S3method(tidy,SummarizedExperiment) 38 | S3method(unite,SummarizedExperiment) 39 | S3method(unnest,tidySummarizedExperiment_nested) 40 | export("%>%") 41 | export(mutate_features) 42 | export(mutate_samples) 43 | export(tidy) 44 | export(unnest_summarized_experiment) 45 | importFrom(S4Vectors,"metadata<-") 46 | importFrom(S4Vectors,DataFrame) 47 | importFrom(S4Vectors,SimpleList) 48 | importFrom(S4Vectors,head) 49 | importFrom(S4Vectors,metadata) 50 | importFrom(SummarizedExperiment,"assays<-") 51 | importFrom(SummarizedExperiment,"colData<-") 52 | importFrom(SummarizedExperiment,"elementMetadata<-") 53 | importFrom(SummarizedExperiment,"rowData<-") 54 | importFrom(SummarizedExperiment,assay) 55 | importFrom(SummarizedExperiment,assayNames) 56 | importFrom(SummarizedExperiment,assays) 57 | importFrom(SummarizedExperiment,cbind) 58 | importFrom(SummarizedExperiment,colData) 59 | importFrom(SummarizedExperiment,elementMetadata) 60 | importFrom(SummarizedExperiment,rbind) 61 | importFrom(SummarizedExperiment,rowData) 62 | importFrom(SummarizedExperiment,rowRanges) 63 | importFrom(dplyr,count) 64 | importFrom(dplyr,distinct) 65 | importFrom(dplyr,distinct_at) 66 | importFrom(dplyr,filter) 67 | importFrom(dplyr,full_join) 68 | importFrom(dplyr,group_by) 69 | importFrom(dplyr,group_by_drop_default) 70 | importFrom(dplyr,group_split) 71 | importFrom(dplyr,inner_join) 72 | importFrom(dplyr,left_join) 73 | importFrom(dplyr,mutate) 74 | importFrom(dplyr,n) 75 | importFrom(dplyr,pull) 76 | importFrom(dplyr,rename) 77 | importFrom(dplyr,right_join) 78 | importFrom(dplyr,rowwise) 79 | importFrom(dplyr,sample_frac) 80 | importFrom(dplyr,sample_n) 81 | importFrom(dplyr,select) 82 | importFrom(dplyr,select_if) 83 | importFrom(dplyr,slice) 84 | importFrom(dplyr,summarise) 85 | importFrom(dplyr,summarize) 86 | importFrom(dplyr,vars) 87 | importFrom(ellipsis,check_dots_used) 88 | importFrom(fansi,strwrap_ctl) 89 | importFrom(ggplot2,aes) 90 | importFrom(ggplot2,ggplot) 91 | importFrom(lifecycle,deprecate_warn) 92 | importFrom(magrittr,"%$%") 93 | importFrom(magrittr,"%>%") 94 | importFrom(magrittr,equals) 95 | importFrom(magrittr,set_rownames) 96 | importFrom(methods,.hasSlot) 97 | importFrom(methods,as) 98 | importFrom(methods,getMethod) 99 | importFrom(methods,is) 100 | importFrom(pillar,align) 101 | importFrom(pillar,get_extent) 102 | importFrom(pillar,style_subtle) 103 | importFrom(pillar,tbl_format_header) 104 | importFrom(pkgconfig,get_config) 105 | importFrom(purrr,imap) 106 | importFrom(purrr,map) 107 | importFrom(purrr,map2) 108 | importFrom(purrr,map_chr) 109 | importFrom(purrr,map_int) 110 | importFrom(purrr,map_lgl) 111 | importFrom(purrr,pmap) 112 | importFrom(purrr,reduce) 113 | importFrom(purrr,when) 114 | importFrom(rlang,":=") 115 | importFrom(rlang,.data) 116 | importFrom(rlang,dots_values) 117 | importFrom(rlang,enquo) 118 | importFrom(rlang,enquos) 119 | importFrom(rlang,expr) 120 | importFrom(rlang,flatten_if) 121 | importFrom(rlang,is_spliced) 122 | importFrom(rlang,names2) 123 | importFrom(rlang,quo_is_null) 124 | importFrom(rlang,quo_name) 125 | importFrom(rlang,quo_squash) 126 | importFrom(stats,setNames) 127 | importFrom(stringr,regex) 128 | importFrom(stringr,str_detect) 129 | importFrom(stringr,str_replace) 130 | importFrom(tibble,as_tibble) 131 | importFrom(tibble,enframe) 132 | importFrom(tibble,rowid_to_column) 133 | importFrom(tibble,tibble) 134 | importFrom(tidyr,extract) 135 | importFrom(tidyr,gather) 136 | importFrom(tidyr,nest) 137 | importFrom(tidyr,pivot_longer) 138 | importFrom(tidyr,pivot_wider) 139 | importFrom(tidyr,separate) 140 | importFrom(tidyr,spread) 141 | importFrom(tidyr,unite) 142 | importFrom(tidyr,unnest) 143 | importFrom(tidyselect,eval_select) 144 | importFrom(tidyselect,one_of) 145 | importFrom(ttservice,bind_cols) 146 | importFrom(ttservice,bind_rows) 147 | importFrom(ttservice,plot_ly) 148 | importFrom(utils,packageDescription) 149 | importFrom(utils,tail) 150 | importFrom(vctrs,new_data_frame) 151 | -------------------------------------------------------------------------------- /R/attach.R: -------------------------------------------------------------------------------- 1 | core <- c("dplyr", "tidyr", "ttservice", "ggplot2") 2 | 3 | core_unloaded <- function() { 4 | search <- paste0("package:", core) 5 | core[!search %in% search()] 6 | } 7 | 8 | # Attach the package from the same library it was loaded from before. 9 | # [source: https://github.com/tidy-biology/tidyverse/issues/171] 10 | same_library <- function(pkg) { 11 | loc <- if (pkg %in% loadedNamespaces()) 12 | dirname(getNamespaceInfo(pkg, "path")) 13 | library(pkg, lib.loc=loc, character.only=TRUE, warn.conflicts=FALSE) 14 | } 15 | 16 | tidyverse_attach <- function() { 17 | to_load <- core_unloaded() 18 | 19 | suppressPackageStartupMessages( 20 | lapply(to_load, same_library)) 21 | 22 | invisible(to_load) 23 | } 24 | -------------------------------------------------------------------------------- /R/data.R: -------------------------------------------------------------------------------- 1 | #' Read counts of RNA-seq samples of Pasilla knock-down by Brooks et al. 2 | #' 3 | #' A SummarizedExperiment dataset containing 4 | #' the transcriptome information for Drosophila Melanogaster. 5 | #' 6 | #' @format containing 14599 features and 7 biological replicates. 7 | #' 8 | #' @source \url{https://bioconductor.org/packages/release/data/experiment/html/pasilla.html} 9 | #' @usage data(pasilla) 10 | "pasilla" 11 | 12 | #' Read counts of RNA-seq samples derived from 13 | #' Pasilla knock-down by Brooks et al. 14 | #' 15 | #' A SummarizedExperiment dataset containing 16 | #' the transcriptome information for Drosophila Melanogaster. 17 | #' 18 | #' @format containing 14599 features and 7 biological replicates. 19 | #' 20 | #' @source \url{https://bioconductor.org/packages/release/data/experiment/html/pasilla.html} 21 | #' @usage data(se) 22 | "se" -------------------------------------------------------------------------------- /R/ggplot2_methods.R: -------------------------------------------------------------------------------- 1 | #' @name ggplot 2 | #' @rdname ggplot 3 | #' @inherit ggplot2::ggplot 4 | #' @title Create a new \code{ggplot} from a \code{tidyseurat} 5 | #' @return `ggplot` 6 | #' 7 | #' @examples 8 | #' library(ggplot2) 9 | #' data(pasilla) 10 | #' pasilla %>% 11 | #' ggplot(aes(.sample, counts)) + 12 | #' geom_boxplot() 13 | #' 14 | #' @importFrom purrr map 15 | #' @importFrom rlang quo_name 16 | #' @importFrom ggplot2 aes ggplot 17 | #' @export 18 | ggplot.SummarizedExperiment <- function(data=NULL, mapping=aes(), 19 | ..., environment=parent.frame()) { 20 | 21 | # Deprecation of special column names 22 | .cols <- enquos(..., .ignore_empty="all") %>% 23 | map(~ quo_name(.x)) %>% unlist() 24 | if (is_sample_feature_deprecated_used(data, .cols)) { 25 | data <- ping_old_special_column_into_metadata(data) 26 | } 27 | 28 | data %>% 29 | as_tibble() %>% 30 | ggplot2::ggplot(mapping=mapping) 31 | } 32 | -------------------------------------------------------------------------------- /R/methods.R: -------------------------------------------------------------------------------- 1 | #' @importFrom methods getMethod 2 | setMethod( 3 | f="show", 4 | signature="SummarizedExperiment", 5 | definition=function(object) { 6 | if (isTRUE(x=getOption(x="restore_SummarizedExperiment_show", 7 | default = FALSE)) | 8 | # If the object is a SingleCellExperiment 9 | # # From BioC 3_14 SingleCellExperiment is SummarizedExperiment and 10 | # # we don't want to process with tidySummarizedExperiment 11 | is(object, "SingleCellExperiment") 12 | ) { 13 | f <- getMethod( 14 | f="show", 15 | signature="SummarizedExperiment", 16 | where=asNamespace(ns="SummarizedExperiment") 17 | ) 18 | f(object=object) 19 | } else { 20 | object %>% 21 | print() 22 | } 23 | } 24 | ) 25 | 26 | setClass("tidySummarizedExperiment", 27 | contains=c("SummarizedExperiment", "RangedSummarizedExperiment")) 28 | 29 | #' @name tidy 30 | #' @rdname tidy 31 | #' @title tidy for `Seurat` 32 | #' 33 | #' @param object A `Seurat` object. 34 | #' @return A `tidyseurat` object. 35 | #' 36 | #' @examples 37 | #' data(pasilla) 38 | #' pasilla %>% tidy() 39 | #' 40 | #' @export 41 | tidy <- function(object) { 42 | UseMethod("tidy", object) 43 | } 44 | 45 | #' @importFrom lifecycle deprecate_warn 46 | tidy_ <- function(object) { 47 | 48 | # DEPRECATE 49 | deprecate_warn( 50 | when = "1.1.1", 51 | what = "tidy()", 52 | details = "tidySummarizedExperiment says: tidy() is not needed anymore." 53 | ) 54 | 55 | object 56 | } 57 | 58 | #' @importFrom methods as 59 | #' @rdname tidy 60 | #' @param object A SummarizedExperiment object 61 | #' @export 62 | tidy.SummarizedExperiment <- tidy_ 63 | 64 | #' @importFrom methods as 65 | #' @rdname tidy 66 | #' @param object A SummarizedExperiment object 67 | #' @export 68 | tidy.RangedSummarizedExperiment <- tidy_ -------------------------------------------------------------------------------- /R/pillar_utilities.R: -------------------------------------------------------------------------------- 1 | NBSP <- "\U00A0" 2 | 3 | pillar___format_comment <- function (x, width) 4 | { 5 | if (length(x) == 0L) { 6 | return(character()) 7 | } 8 | map_chr(x, pillar___wrap, prefix="# ", 9 | width=min(width, cli::console_width())) 10 | } 11 | 12 | #' @importFrom fansi strwrap_ctl 13 | pillar___strwrap2 <- function (x, width, indent) 14 | { 15 | fansi::strwrap_ctl(x, width=max(width, 0), indent=indent, 16 | exdent=indent + 2) 17 | } 18 | 19 | 20 | pillar___wrap <- function (..., indent=0, prefix="", width) 21 | { 22 | x <- paste0(..., collapse="") 23 | wrapped <- pillar___strwrap2(x, width - get_extent(prefix), indent) 24 | wrapped <- paste0(prefix, wrapped) 25 | wrapped <- gsub(NBSP, " ", wrapped) 26 | paste0(wrapped, collapse="\n") 27 | } 28 | -------------------------------------------------------------------------------- /R/plotly_methods.R: -------------------------------------------------------------------------------- 1 | #' @name plot_ly 2 | #' @rdname plot_ly 3 | #' @inherit ttservice::plot_ly 4 | #' @return `plotly` 5 | #' 6 | #' @examples 7 | #' data(se) 8 | #' se |> 9 | #' plot_ly(x = ~counts) 10 | #' 11 | #' @importFrom ttservice plot_ly 12 | #' @export 13 | plot_ly.tbl_df <- function(data=data.frame(), ..., type=NULL, name=NULL, 14 | color=NULL, colors=NULL, alpha=NULL, 15 | stroke=NULL, strokes=NULL, alpha_stroke=1, 16 | size=NULL, sizes=c(10, 100), 17 | span=NULL, spans=c(1, 20), 18 | symbol=NULL, symbols=NULL, 19 | linetype=NULL, linetypes=NULL, 20 | split=NULL, frame=NULL, 21 | width=NULL, height=NULL, source="A") { 22 | data |> 23 | 24 | # This is a trick to not loop the call 25 | drop_class("tbl_df") |> 26 | plotly::plot_ly(..., 27 | type=type, name=name, 28 | color=color, colors=colors, alpha=alpha, 29 | stroke=stroke, strokes=strokes, alpha_stroke=alpha_stroke, 30 | size=size, sizes=sizes, 31 | span=span, spans=spans, 32 | symbol=symbol, symbols=symbols, 33 | linetype=linetype, linetypes=linetypes, 34 | split=split, frame=frame, 35 | width=width, height=height, source=source 36 | ) 37 | } 38 | 39 | #' @name plot_ly 40 | #' @rdname plot_ly 41 | #' @inherit ttservice::plot_ly 42 | #' @return `plotly` 43 | #' 44 | #' @examples 45 | #' data(se) 46 | #' se |> 47 | #' plot_ly(x = ~counts) 48 | #' 49 | #' @importFrom ttservice plot_ly 50 | #' @export 51 | plot_ly.SummarizedExperiment <- function(data=data.frame(), 52 | ..., type=NULL, name=NULL, 53 | color=NULL, colors=NULL, alpha=NULL, 54 | stroke=NULL, strokes=NULL, alpha_stroke=1, 55 | size=NULL, sizes=c(10, 100), 56 | span=NULL, spans=c(1, 20), 57 | symbol=NULL, symbols=NULL, 58 | linetype=NULL, linetypes=NULL, 59 | split=NULL, frame=NULL, 60 | width=NULL, height=NULL, source="A") { 61 | data |> 62 | 63 | # This is a trick to not loop the call 64 | as_tibble() |> 65 | plotly::plot_ly(..., 66 | type=type, name=name, 67 | color=color, colors=colors, alpha=alpha, 68 | stroke=stroke, strokes=strokes, alpha_stroke=alpha_stroke, 69 | size=size, sizes=sizes, 70 | span=span, spans=spans, 71 | symbol=symbol, symbols=symbols, 72 | linetype=linetype, linetypes=linetypes, 73 | split=split, frame=frame, 74 | width=width, height=height, source=source 75 | ) 76 | } 77 | -------------------------------------------------------------------------------- /R/print_method.R: -------------------------------------------------------------------------------- 1 | # This file is a replacement of the unexported functions in the tibble 2 | # package, in order to specify "tibble abstraction in the header" 3 | 4 | #' @name tbl_format_header 5 | #' @rdname tbl_format_header 6 | #' @inherit pillar::tbl_format_header 7 | #' 8 | #' @examples 9 | #' # TODO 10 | #' 11 | #' @importFrom rlang names2 12 | #' @importFrom pillar align 13 | #' @importFrom pillar get_extent 14 | #' @importFrom pillar style_subtle 15 | #' @importFrom pillar tbl_format_header 16 | #' @importFrom cli col_br_black 17 | #' @export 18 | tbl_format_header.tidySummarizedExperiment <- function(x, setup, ...) { 19 | 20 | number_of_features <- x |> attr("number_of_features") 21 | number_of_samples <- x |> attr("number_of_samples") 22 | named_header <- x |> attr("named_header") 23 | assay_names <- x |> attr("assay_names") 24 | 25 | 26 | if (all(names2(named_header) == "")) { 27 | header <- named_header 28 | } else { 29 | header <- 30 | paste0( 31 | align(paste0(names2(named_header), ":"), space=NBSP), 32 | " ", 33 | named_header 34 | ) %>% 35 | # Add further info single-cell 36 | append( cli::col_br_black( sprintf( 37 | " Features=%s | Samples=%s | Assays=%s", 38 | number_of_features, 39 | number_of_samples, 40 | assay_names %>% paste(collapse=", ") 41 | )), after = 1) 42 | } 43 | style_subtle(pillar___format_comment(header, width=setup$width)) 44 | } 45 | 46 | #' @name formatting 47 | #' @rdname formatting 48 | #' @aliases print 49 | #' @inherit tibble::formatting 50 | #' @return Prints a message to the console describing 51 | #' the contents of the `tidySummarizedExperiment`. 52 | #' 53 | #' @param n_extra Number of extra columns to print abbreviated information for, 54 | #' if the width is too small for the entire tibble. If `NULL`, the default, 55 | #' will print information about at most `tibble.max_extra_cols` extra columns. 56 | #' 57 | #' @examples 58 | #' data(pasilla) 59 | #' print(pasilla) 60 | #' 61 | #' @importFrom vctrs new_data_frame 62 | #' @importFrom SummarizedExperiment assayNames 63 | #' @importFrom stats setNames 64 | #' @export 65 | print.SummarizedExperiment <- function(x, ..., n=NULL, 66 | width=NULL, n_extra=NULL) { 67 | 68 | 69 | # Fix NOTEs 70 | . <- NULL 71 | 72 | 73 | # Stop if any column or row names are duplicated 74 | if (check_if_any_dimnames_duplicated(x, dim = "cols")) { 75 | stop("tidySummarizedExperiment says: some column names are duplicated") 76 | } 77 | if (check_if_any_dimnames_duplicated(x, dim = "rows")) { 78 | stop("tidySummarizedExperiment says: some row names are duplicated") 79 | } 80 | 81 | # Stop if column names of assays do not overlap 82 | if (check_if_assays_are_NOT_overlapped(x, dim = "cols")) { 83 | stop( 84 | "tidySummarizedExperiment says: the assays in your SummarizedExperiment have column names, 85 | but they do not completely overlap." 86 | ) 87 | } 88 | if (check_if_assays_are_NOT_overlapped(x, dim = "rows")) { 89 | stop( 90 | "tidySummarizedExperiment says: the assays in your SummarizedExperiment have row names, 91 | but they do not completely overlap." 92 | ) 93 | } 94 | 95 | # reorder assay colnames before printing 96 | # Rearrange if assays has colnames and rownames 97 | x <- order_assays_internally_to_be_consistent(x) 98 | 99 | my_tibble <- 100 | x |> 101 | 102 | # If I have more than 30 genes select first sample 103 | when( 104 | nrow(.) > 30 ~.[1:min(50, nrow(x)), min(1, ncol(x)), drop=FALSE] , 105 | ncol(.) == 0 ~ ., 106 | ~ .[, 1:min(20, ncol(x)), drop=FALSE] 107 | ) %>% 108 | 109 | as_tibble() 110 | 111 | my_tibble |> 112 | new_data_frame(class=c("tidySummarizedExperiment", "tbl")) %>% 113 | add_attr(nrow(x), "number_of_features") %>% 114 | add_attr(ncol(x), "number_of_samples") %>% 115 | add_attr(assays(x) %>% names , "assay_names") %>% 116 | 117 | # Set fake dimensions for efficiancy 118 | add_attr( 119 | sprintf( 120 | "%s %s %s", 121 | x %>% dim %>% {(.)[1] * (.)[2]} %>% 122 | format(format="f", big.mark=",", digits=1), 123 | cli::symbol$times, 124 | ncol(my_tibble) 125 | ) %>% 126 | setNames("A SummarizedExperiment-tibble abstraction"), 127 | "named_header" 128 | ) %>% 129 | print() 130 | invisible(x) 131 | } 132 | -------------------------------------------------------------------------------- /R/tibble_methods.R: -------------------------------------------------------------------------------- 1 | #' @name as_tibble 2 | #' @rdname as_tibble 3 | #' @inherit tibble::as_tibble 4 | #' @return `tibble` 5 | #' 6 | #' @examples 7 | #' tidySummarizedExperiment::pasilla %>% 8 | #' as_tibble() 9 | #' 10 | #' tidySummarizedExperiment::pasilla %>% 11 | #' as_tibble(.subset=-c(condition, type)) 12 | #' 13 | #' @importFrom purrr reduce 14 | #' @importFrom purrr map 15 | #' @importFrom tidyr spread 16 | #' @importFrom tibble enframe 17 | #' @importFrom SummarizedExperiment colData 18 | #' @importFrom pkgconfig get_config 19 | #' @export 20 | as_tibble.SummarizedExperiment <- function(x, ..., 21 | .name_repair=c("check_unique", "unique", "universal", "minimal"), 22 | rownames=pkgconfig::get_config("tibble::rownames", NULL)) { 23 | 24 | .as_tibble_optimised(x = x, ..., 25 | .name_repair=.name_repair, rownames=rownames) 26 | 27 | } 28 | 29 | .as_tibble_optimised <- function(x, skip_GRanges=FALSE, .subset=NULL, 30 | .name_repair=c("check_unique", "unique", "universal", "minimal"), 31 | rownames=pkgconfig::get_config("tibble::rownames", NULL)) { 32 | 33 | .subset <- enquo(.subset) 34 | 35 | sample_info <- 36 | colData(x) %>% 37 | 38 | # If reserved column names are present add .x 39 | change_reserved_column_names(x) %>% 40 | 41 | # Convert to tibble 42 | tibble::as_tibble(rownames=s_(x)$name) %>% 43 | setNames(c(s_(x)$name, colnames(colData(x)))) 44 | 45 | range_info <- 46 | skip_GRanges %>% 47 | when( 48 | (.) ~ tibble() %>% list, 49 | ~ get_special_datasets(x) 50 | ) %>% 51 | reduce(left_join, by="coordinate") 52 | 53 | gene_info <- 54 | rowData(x) %>% 55 | 56 | # If reserved column names are present add .x 57 | change_reserved_column_names(x)%>% 58 | 59 | # Convert to tibble 60 | tibble::as_tibble(rownames=f_(x)$name) %>% 61 | setNames(c(f_(x)$name, colnames(rowData(x)))) 62 | 63 | count_info <- get_count_datasets(x) 64 | 65 | # Return 66 | if (quo_is_null(.subset)) 67 | 68 | # If I want to return all columns 69 | count_info %>% 70 | full_join(sample_info, by=s_(x)$name) %>% 71 | full_join(gene_info, by=f_(x)$name) %>% 72 | when(nrow(range_info) > 0 ~ 73 | (.) %>% left_join(range_info) %>% suppressMessages(), 74 | ~ (.)) 75 | 76 | # This function outputs a tibble after subsetting the columns 77 | else subset_tibble_output(x, count_info, sample_info, 78 | gene_info, range_info, !!.subset) 79 | } 80 | -------------------------------------------------------------------------------- /R/utils-pipe.R: -------------------------------------------------------------------------------- 1 | #' Pipe operator 2 | #' 3 | #' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. 4 | #' 5 | #' @name %>% 6 | #' @rdname pipe 7 | #' @keywords internal 8 | #' @export 9 | #' @importFrom magrittr %>% 10 | #' @usage lhs \%>\% rhs 11 | #' @param lhs A value or the magrittr placeholder. 12 | #' @param rhs A function call using the magrittr semantics. 13 | #' @return The result of calling `rhs(lhs)`. 14 | #' 15 | #' @examples 16 | #' 17 | #' library(magrittr) 18 | #' 1 %>% sum(2) 19 | NULL 20 | -------------------------------------------------------------------------------- /R/validation.R: -------------------------------------------------------------------------------- 1 | #' @importFrom magrittr equals 2 | #' @importFrom dplyr n 3 | is_rectangular <- function(.data, se) { 4 | is_rectangular_sample <- 5 | .data %>% 6 | count(!!s_(se)$symbol ) %>% 7 | count(n, name="nn") %>% 8 | nrow() %>% 9 | st(2) 10 | 11 | is_rectangular_transcript <- 12 | .data %>% 13 | count(!!f_(se)$symbol) %>% 14 | count(n, name="nn") %>% 15 | nrow() %>% 16 | st(2) 17 | 18 | is_rectangular_sample & is_rectangular_transcript 19 | } 20 | 21 | is_not_duplicated <- function(.data, se) { 22 | .data %>% 23 | count(!!s_(se)$symbol , !!f_(se)$symbol) %>% 24 | filter(n > 1) %>% 25 | nrow() %>% 26 | equals(0) 27 | } 28 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | #' @importFrom utils packageDescription 2 | .onAttach <- function(libname, pkgname) { 3 | attached <- tidyverse_attach() 4 | } -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "tidySummarizedExperiment - part of tidytranscriptomics" 3 | output: github_document 4 | always_allow_html: true 5 | --- 6 | 7 | 8 | [![Lifecycle:maturing](https://img.shields.io/badge/lifecycle-maturing-blue.svg)](https://www.tidyverse.org/lifecycle/#maturing) [![R build status](https://github.com/stemangiola/tidySummarizedExperiment/workflows/R-CMD-check-bioc/badge.svg)](https://github.com/stemangiola/tidySummarizedExperiment/actions) 9 | 10 | 11 | ```{r echo=FALSE} 12 | knitr::opts_chunk$set(fig.path="man/figures/") 13 | ``` 14 | **Brings SummarizedExperiment to the tidyverse!** 15 | 16 | website: [stemangiola.github.io/tidySummarizedExperiment/](https://stemangiola.github.io/tidySummarizedExperiment/) 17 | 18 | Another [nice introduction](https://carpentries-incubator.github.io/bioc-intro/60-next-steps/index.html) by carpentries-incubator. 19 | 20 | Please also have a look at 21 | 22 | - [tidySingleCellExperiment](https://stemangiola.github.io/tidySingleCellExperiment/) for tidy manipulation of SingleCellExperiment objects 23 | - [tidyseurat](https://stemangiola.github.io/tidyseurat/) for tidy manipulation of Seurat objects 24 | - [tidybulk](https://stemangiola.github.io/tidybulk/) for tidy analysis of RNA sequencing data 25 | - [nanny](https://github.com/stemangiola/nanny) for tidy high-level data analysis and manipulation 26 | - [tidygate](https://github.com/stemangiola/tidygate) for adding custom gate information to your tibble 27 | - [tidyHeatmap](https://stemangiola.github.io/tidyHeatmap/) for heatmaps produced with tidy principles 28 | 29 | 30 | ```{r, echo=FALSE, include=FALSE} 31 | library(knitr) 32 | knitr::opts_chunk$set(warning=FALSE, message=FALSE) 33 | ``` 34 | 35 | # Introduction 36 | 37 | tidySummarizedExperiment provides a bridge between Bioconductor [SummarizedExperiment](https://bioconductor.org/packages/release/bioc/html/SummarizedExperiment.html) [@morgan2020summarized] and the tidyverse [@wickham2019welcome]. It creates an invisible layer that enables viewing the 38 | Bioconductor *SummarizedExperiment* object as a tidyverse tibble, and provides SummarizedExperiment-compatible *dplyr*, *tidyr*, *ggplot* and *plotly* functions. This allows users to get the best of both Bioconductor and tidyverse worlds. 39 | 40 | 41 | ## Functions/utilities available 42 | 43 | SummarizedExperiment-compatible Functions | Description 44 | ------------ | ------------- 45 | `all` | After all `tidySummarizedExperiment` is a SummarizedExperiment object, just better 46 | 47 | tidyverse Packages | Description 48 | ------------ | ------------- 49 | `dplyr` | Almost all `dplyr` APIs like for any tibble 50 | `tidyr` | Almost all `tidyr` APIs like for any tibble 51 | `ggplot2` | `ggplot` like for any tibble 52 | `plotly` | `plot_ly` like for any tibble 53 | 54 | Utilities | Description 55 | ------------ | ------------- 56 | `as_tibble` | Convert cell-wise information to a `tbl_df` 57 | 58 | ## Installation 59 | 60 | ```{r, eval=FALSE} 61 | if (!requireNamespace("BiocManager", quietly=TRUE)) { 62 | install.packages("BiocManager") 63 | } 64 | 65 | BiocManager::install("tidySummarizedExperiment") 66 | ``` 67 | 68 | From Github (development) 69 | ```{r, eval=FALSE} 70 | devtools::install_github("stemangiola/tidySummarizedExperiment") 71 | ``` 72 | 73 | Load libraries used in the examples. 74 | 75 | ```{r} 76 | library(ggplot2) 77 | library(tidySummarizedExperiment) 78 | ``` 79 | 80 | 81 | # Create `tidySummarizedExperiment`, the best of both worlds! 82 | 83 | This is a SummarizedExperiment object but it is evaluated as a tibble. So it is fully compatible both with SummarizedExperiment and tidyverse APIs. 84 | 85 | ```{r} 86 | pasilla_tidy <- tidySummarizedExperiment::pasilla 87 | ``` 88 | 89 | **It looks like a tibble** 90 | 91 | ```{r} 92 | pasilla_tidy 93 | ``` 94 | 95 | **But it is a SummarizedExperiment object after all** 96 | 97 | ```{r} 98 | assays(pasilla_tidy) 99 | ``` 100 | 101 | 102 | # Tidyverse commands 103 | 104 | We can use tidyverse commands to explore the tidy SummarizedExperiment object. 105 | 106 | We can use `slice` to choose rows by position, for example to choose the first row. 107 | 108 | ```{r} 109 | pasilla_tidy %>% 110 | slice(1) 111 | ``` 112 | 113 | We can use `filter` to choose rows by criteria. 114 | 115 | ```{r} 116 | pasilla_tidy %>% 117 | filter(condition == "untreated") 118 | ``` 119 | 120 | We can use `select` to choose columns. 121 | 122 | ```{r} 123 | pasilla_tidy %>% 124 | select(.sample) 125 | ``` 126 | 127 | We can use `count` to count how many rows we have for each sample. 128 | 129 | ```{r} 130 | pasilla_tidy %>% 131 | count(.sample) 132 | ``` 133 | 134 | We can use `distinct` to see what distinct sample information we have. 135 | 136 | ```{r} 137 | pasilla_tidy %>% 138 | distinct(.sample, condition, type) 139 | ``` 140 | 141 | We could use `rename` to rename a column. For example, to modify the type column name. 142 | 143 | ```{r} 144 | pasilla_tidy %>% 145 | rename(sequencing=type) 146 | ``` 147 | 148 | We could use `mutate` to create a column. For example, we could create a new type column that contains single 149 | and paired instead of single_end and paired_end. 150 | 151 | ```{r} 152 | pasilla_tidy %>% 153 | mutate(type=gsub("_end", "", type)) 154 | ``` 155 | 156 | We could use `unite` to combine multiple columns into a single column. 157 | 158 | ```{r} 159 | pasilla_tidy %>% 160 | unite("group", c(condition, type)) 161 | ``` 162 | 163 | We can also combine commands with the tidyverse pipe `%>%`. 164 | 165 | For example, we could combine `group_by` and `summarise` to get the total counts for each sample. 166 | 167 | ```{r} 168 | pasilla_tidy %>% 169 | group_by(.sample) %>% 170 | summarise(total_counts=sum(counts)) 171 | ``` 172 | 173 | We could combine `group_by`, `mutate` and `filter` to get the transcripts with mean count > 0. 174 | 175 | ```{r} 176 | pasilla_tidy %>% 177 | group_by(.feature) %>% 178 | mutate(mean_count=mean(counts)) %>% 179 | filter(mean_count > 0) 180 | ``` 181 | 182 | 183 | # Plotting 184 | 185 | ```{r} 186 | my_theme <- 187 | list( 188 | scale_fill_brewer(palette="Set1"), 189 | scale_color_brewer(palette="Set1"), 190 | theme_bw() + 191 | theme( 192 | panel.border=element_blank(), 193 | axis.line=element_line(), 194 | panel.grid.major=element_line(size=0.2), 195 | panel.grid.minor=element_line(size=0.1), 196 | text=element_text(size=12), 197 | legend.position="bottom", 198 | aspect.ratio=1, 199 | strip.background=element_blank(), 200 | axis.title.x=element_text(margin=margin(t=10, r=10, b=10, l=10)), 201 | axis.title.y=element_text(margin=margin(t=10, r=10, b=10, l=10)) 202 | ) 203 | ) 204 | ``` 205 | 206 | We can treat `pasilla_tidy` as a normal tibble for plotting. 207 | 208 | Here we plot the distribution of counts per sample. 209 | 210 | ```{r plot1} 211 | pasilla_tidy %>% 212 | tidySummarizedExperiment::ggplot(aes(counts + 1, group=.sample, color=`type`)) + 213 | geom_density() + 214 | scale_x_log10() + 215 | my_theme 216 | ``` 217 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | tidySummarizedExperiment - part of tidytranscriptomics 2 | ================ 3 | 4 | 5 | 6 | [![Lifecycle:maturing](https://img.shields.io/badge/lifecycle-maturing-blue.svg)](https://www.tidyverse.org/lifecycle/#maturing) 7 | [![R build 8 | status](https://github.com/stemangiola/tidySummarizedExperiment/workflows/R-CMD-check-bioc/badge.svg)](https://github.com/stemangiola/tidySummarizedExperiment/actions) 9 | 10 | 11 | **Brings SummarizedExperiment to the tidyverse!** 12 | 13 | website: 14 | [stemangiola.github.io/tidySummarizedExperiment/](https://stemangiola.github.io/tidySummarizedExperiment/) 15 | 16 | Another [nice introduction](https://carpentries-incubator.github.io/bioc-intro/60-next-steps/index.html) by carpentries-incubator. 17 | 18 | Please also have a look at 19 | 20 | - [tidySingleCellExperiment](https://stemangiola.github.io/tidySingleCellExperiment/) 21 | for tidy manipulation of SingleCellExperiment objects 22 | - [tidyseurat](https://stemangiola.github.io/tidyseurat/) for tidy 23 | manipulation of Seurat objects 24 | - [tidybulk](https://stemangiola.github.io/tidybulk/) for tidy 25 | analysis of RNA sequencing data 26 | - [nanny](https://github.com/stemangiola/nanny) for tidy high-level 27 | data analysis and manipulation 28 | - [tidygate](https://github.com/stemangiola/tidygate) for adding 29 | custom gate information to your tibble 30 | - [tidyHeatmap](https://stemangiola.github.io/tidyHeatmap/) for 31 | heatmaps produced with tidy principles 32 | 33 | # Introduction 34 | 35 | tidySummarizedExperiment provides a bridge between Bioconductor 36 | [SummarizedExperiment](https://bioconductor.org/packages/release/bioc/html/SummarizedExperiment.html) 37 | \[@morgan2020summarized\] and the tidyverse \[@wickham2019welcome\]. It 38 | creates an invisible layer that enables viewing the Bioconductor 39 | *SummarizedExperiment* object as a tidyverse tibble, and provides 40 | SummarizedExperiment-compatible *dplyr*, *tidyr*, *ggplot* and *plotly* 41 | functions. This allows users to get the best of both Bioconductor and 42 | tidyverse worlds. 43 | 44 | ## Functions/utilities available 45 | 46 | | SummarizedExperiment-compatible Functions | Description | 47 | |-------------------------------------------|------------------------------------------------------------------------------------| 48 | | `all` | After all `tidySummarizedExperiment` is a SummarizedExperiment object, just better | 49 | 50 | | tidyverse Packages | Description | 51 | |--------------------|---------------------------------------------| 52 | | `dplyr` | Almost all `dplyr` APIs like for any tibble | 53 | | `tidyr` | Almost all `tidyr` APIs like for any tibble | 54 | | `ggplot2` | `ggplot` like for any tibble | 55 | | `plotly` | `plot_ly` like for any tibble | 56 | 57 | | Utilities | Description | 58 | |-------------|---------------------------------------------| 59 | | `as_tibble` | Convert cell-wise information to a `tbl_df` | 60 | 61 | ## Installation 62 | 63 | ``` r 64 | if (!requireNamespace("BiocManager", quietly=TRUE)) { 65 | install.packages("BiocManager") 66 | } 67 | 68 | BiocManager::install("tidySummarizedExperiment") 69 | ``` 70 | 71 | From Github (development) 72 | 73 | ``` r 74 | devtools::install_github("stemangiola/tidySummarizedExperiment") 75 | ``` 76 | 77 | Load libraries used in the examples. 78 | 79 | ``` r 80 | library(ggplot2) 81 | library(tidySummarizedExperiment) 82 | ``` 83 | 84 | # Create `tidySummarizedExperiment`, the best of both worlds! 85 | 86 | This is a SummarizedExperiment object but it is evaluated as a tibble. 87 | So it is fully compatible both with SummarizedExperiment and tidyverse 88 | APIs. 89 | 90 | ``` r 91 | pasilla_tidy <- tidySummarizedExperiment::pasilla 92 | ``` 93 | 94 | **It looks like a tibble** 95 | 96 | ``` r 97 | pasilla_tidy 98 | ``` 99 | 100 | ## # A SummarizedExperiment-tibble abstraction: 102,193 × 5 101 | ## # Transcripts=14599 | Samples=7 | Assays=counts 102 | ## .feature .sample counts condition type 103 | ## 104 | ## 1 FBgn0000003 untrt1 0 untreated single_end 105 | ## 2 FBgn0000008 untrt1 92 untreated single_end 106 | ## 3 FBgn0000014 untrt1 5 untreated single_end 107 | ## 4 FBgn0000015 untrt1 0 untreated single_end 108 | ## 5 FBgn0000017 untrt1 4664 untreated single_end 109 | ## 6 FBgn0000018 untrt1 583 untreated single_end 110 | ## 7 FBgn0000022 untrt1 0 untreated single_end 111 | ## 8 FBgn0000024 untrt1 10 untreated single_end 112 | ## 9 FBgn0000028 untrt1 0 untreated single_end 113 | ## 10 FBgn0000032 untrt1 1446 untreated single_end 114 | ## # … with 40 more rows 115 | 116 | **But it is a SummarizedExperiment object after all** 117 | 118 | ``` r 119 | assays(pasilla_tidy) 120 | ``` 121 | 122 | ## List of length 1 123 | ## names(1): counts 124 | 125 | # Tidyverse commands 126 | 127 | We can use tidyverse commands to explore the tidy SummarizedExperiment 128 | object. 129 | 130 | We can use `slice` to choose rows by position, for example to choose the 131 | first row. 132 | 133 | ``` r 134 | pasilla_tidy %>% 135 | slice(1) 136 | ``` 137 | 138 | ## # A SummarizedExperiment-tibble abstraction: 1 × 5 139 | ## # Transcripts=1 | Samples=1 | Assays=counts 140 | ## .feature .sample counts condition type 141 | ## 142 | ## 1 FBgn0000003 untrt1 0 untreated single_end 143 | 144 | We can use `filter` to choose rows by criteria. 145 | 146 | ``` r 147 | pasilla_tidy %>% 148 | filter(condition == "untreated") 149 | ``` 150 | 151 | ## # A SummarizedExperiment-tibble abstraction: 58,396 × 5 152 | ## # Transcripts=14599 | Samples=4 | Assays=counts 153 | ## .feature .sample counts condition type 154 | ## 155 | ## 1 FBgn0000003 untrt1 0 untreated single_end 156 | ## 2 FBgn0000008 untrt1 92 untreated single_end 157 | ## 3 FBgn0000014 untrt1 5 untreated single_end 158 | ## 4 FBgn0000015 untrt1 0 untreated single_end 159 | ## 5 FBgn0000017 untrt1 4664 untreated single_end 160 | ## 6 FBgn0000018 untrt1 583 untreated single_end 161 | ## 7 FBgn0000022 untrt1 0 untreated single_end 162 | ## 8 FBgn0000024 untrt1 10 untreated single_end 163 | ## 9 FBgn0000028 untrt1 0 untreated single_end 164 | ## 10 FBgn0000032 untrt1 1446 untreated single_end 165 | ## # … with 40 more rows 166 | 167 | We can use `select` to choose columns. 168 | 169 | ``` r 170 | pasilla_tidy %>% 171 | select(.sample) 172 | ``` 173 | 174 | ## # A tibble: 102,193 × 1 175 | ## .sample 176 | ## 177 | ## 1 untrt1 178 | ## 2 untrt1 179 | ## 3 untrt1 180 | ## 4 untrt1 181 | ## 5 untrt1 182 | ## 6 untrt1 183 | ## 7 untrt1 184 | ## 8 untrt1 185 | ## 9 untrt1 186 | ## 10 untrt1 187 | ## # … with 102,183 more rows 188 | 189 | We can use `count` to count how many rows we have for each sample. 190 | 191 | ``` r 192 | pasilla_tidy %>% 193 | count(.sample) 194 | ``` 195 | 196 | ## # A tibble: 7 × 2 197 | ## .sample n 198 | ## 199 | ## 1 trt1 14599 200 | ## 2 trt2 14599 201 | ## 3 trt3 14599 202 | ## 4 untrt1 14599 203 | ## 5 untrt2 14599 204 | ## 6 untrt3 14599 205 | ## 7 untrt4 14599 206 | 207 | We can use `distinct` to see what distinct sample information we have. 208 | 209 | ``` r 210 | pasilla_tidy %>% 211 | distinct(.sample, condition, type) 212 | ``` 213 | 214 | ## # A tibble: 7 × 3 215 | ## .sample condition type 216 | ## 217 | ## 1 untrt1 untreated single_end 218 | ## 2 untrt2 untreated single_end 219 | ## 3 untrt3 untreated paired_end 220 | ## 4 untrt4 untreated paired_end 221 | ## 5 trt1 treated single_end 222 | ## 6 trt2 treated paired_end 223 | ## 7 trt3 treated paired_end 224 | 225 | We could use `rename` to rename a column. For example, to modify the 226 | type column name. 227 | 228 | ``` r 229 | pasilla_tidy %>% 230 | rename(sequencing=type) 231 | ``` 232 | 233 | ## # A SummarizedExperiment-tibble abstraction: 102,193 × 5 234 | ## # Transcripts=14599 | Samples=7 | Assays=counts 235 | ## .feature .sample counts condition sequencing 236 | ## 237 | ## 1 FBgn0000003 untrt1 0 untreated single_end 238 | ## 2 FBgn0000008 untrt1 92 untreated single_end 239 | ## 3 FBgn0000014 untrt1 5 untreated single_end 240 | ## 4 FBgn0000015 untrt1 0 untreated single_end 241 | ## 5 FBgn0000017 untrt1 4664 untreated single_end 242 | ## 6 FBgn0000018 untrt1 583 untreated single_end 243 | ## 7 FBgn0000022 untrt1 0 untreated single_end 244 | ## 8 FBgn0000024 untrt1 10 untreated single_end 245 | ## 9 FBgn0000028 untrt1 0 untreated single_end 246 | ## 10 FBgn0000032 untrt1 1446 untreated single_end 247 | ## # … with 40 more rows 248 | 249 | We could use `mutate` to create a column. For example, we could create a 250 | new type column that contains single and paired instead of single_end 251 | and paired_end. 252 | 253 | ``` r 254 | pasilla_tidy %>% 255 | mutate(type=gsub("_end", "", type)) 256 | ``` 257 | 258 | ## # A SummarizedExperiment-tibble abstraction: 102,193 × 5 259 | ## # Transcripts=14599 | Samples=7 | Assays=counts 260 | ## .feature .sample counts condition type 261 | ## 262 | ## 1 FBgn0000003 untrt1 0 untreated single 263 | ## 2 FBgn0000008 untrt1 92 untreated single 264 | ## 3 FBgn0000014 untrt1 5 untreated single 265 | ## 4 FBgn0000015 untrt1 0 untreated single 266 | ## 5 FBgn0000017 untrt1 4664 untreated single 267 | ## 6 FBgn0000018 untrt1 583 untreated single 268 | ## 7 FBgn0000022 untrt1 0 untreated single 269 | ## 8 FBgn0000024 untrt1 10 untreated single 270 | ## 9 FBgn0000028 untrt1 0 untreated single 271 | ## 10 FBgn0000032 untrt1 1446 untreated single 272 | ## # … with 40 more rows 273 | 274 | We could use `unite` to combine multiple columns into a single column. 275 | 276 | ``` r 277 | pasilla_tidy %>% 278 | unite("group", c(condition, type)) 279 | ``` 280 | 281 | ## # A SummarizedExperiment-tibble abstraction: 102,193 × 4 282 | ## # Transcripts=14599 | Samples=7 | Assays=counts 283 | ## .feature .sample counts group 284 | ## 285 | ## 1 FBgn0000003 untrt1 0 untreated_single_end 286 | ## 2 FBgn0000008 untrt1 92 untreated_single_end 287 | ## 3 FBgn0000014 untrt1 5 untreated_single_end 288 | ## 4 FBgn0000015 untrt1 0 untreated_single_end 289 | ## 5 FBgn0000017 untrt1 4664 untreated_single_end 290 | ## 6 FBgn0000018 untrt1 583 untreated_single_end 291 | ## 7 FBgn0000022 untrt1 0 untreated_single_end 292 | ## 8 FBgn0000024 untrt1 10 untreated_single_end 293 | ## 9 FBgn0000028 untrt1 0 untreated_single_end 294 | ## 10 FBgn0000032 untrt1 1446 untreated_single_end 295 | ## # … with 40 more rows 296 | 297 | We can also combine commands with the tidyverse pipe `%>%`. 298 | 299 | For example, we could combine `group_by` and `summarise` to get the 300 | total counts for each sample. 301 | 302 | ``` r 303 | pasilla_tidy %>% 304 | group_by(.sample) %>% 305 | summarise(total_counts=sum(counts)) 306 | ``` 307 | 308 | ## # A tibble: 7 × 2 309 | ## .sample total_counts 310 | ## 311 | ## 1 trt1 18670279 312 | ## 2 trt2 9571826 313 | ## 3 trt3 10343856 314 | ## 4 untrt1 13972512 315 | ## 5 untrt2 21911438 316 | ## 6 untrt3 8358426 317 | ## 7 untrt4 9841335 318 | 319 | We could combine `group_by`, `mutate` and `filter` to get the 320 | transcripts with mean count \> 0. 321 | 322 | ``` r 323 | pasilla_tidy %>% 324 | group_by(.feature) %>% 325 | mutate(mean_count=mean(counts)) %>% 326 | filter(mean_count > 0) 327 | ``` 328 | 329 | ## # A tibble: 86,513 × 6 330 | ## # Groups: .feature [12,359] 331 | ## .feature .sample counts condition type mean_count 332 | ## 333 | ## 1 FBgn0000003 untrt1 0 untreated single_end 0.143 334 | ## 2 FBgn0000008 untrt1 92 untreated single_end 99.6 335 | ## 3 FBgn0000014 untrt1 5 untreated single_end 1.43 336 | ## 4 FBgn0000015 untrt1 0 untreated single_end 0.857 337 | ## 5 FBgn0000017 untrt1 4664 untreated single_end 4672. 338 | ## 6 FBgn0000018 untrt1 583 untreated single_end 461. 339 | ## 7 FBgn0000022 untrt1 0 untreated single_end 0.143 340 | ## 8 FBgn0000024 untrt1 10 untreated single_end 7 341 | ## 9 FBgn0000028 untrt1 0 untreated single_end 0.429 342 | ## 10 FBgn0000032 untrt1 1446 untreated single_end 1085. 343 | ## # … with 86,503 more rows 344 | 345 | # Plotting 346 | 347 | ``` r 348 | my_theme <- 349 | list( 350 | scale_fill_brewer(palette="Set1"), 351 | scale_color_brewer(palette="Set1"), 352 | theme_bw() + 353 | theme( 354 | panel.border=element_blank(), 355 | axis.line=element_line(), 356 | panel.grid.major=element_line(size=0.2), 357 | panel.grid.minor=element_line(size=0.1), 358 | text=element_text(size=12), 359 | legend.position="bottom", 360 | aspect.ratio=1, 361 | strip.background=element_blank(), 362 | axis.title.x=element_text(margin=margin(t=10, r=10, b=10, l=10)), 363 | axis.title.y=element_text(margin=margin(t=10, r=10, b=10, l=10)) 364 | ) 365 | ) 366 | ``` 367 | 368 | We can treat `pasilla_tidy` as a normal tibble for plotting. 369 | 370 | Here we plot the distribution of counts per sample. 371 | 372 | ``` r 373 | pasilla_tidy %>% 374 | tidySummarizedExperiment::ggplot(aes(counts + 1, group=.sample, color=`type`)) + 375 | geom_density() + 376 | scale_x_log10() + 377 | my_theme 378 | ``` 379 | 380 | ![](man/figures/plot1-1.png) 381 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | destination: docs 2 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | 3 | coverage: 4 | status: 5 | project: 6 | default: 7 | target: auto 8 | threshold: 1% 9 | patch: 10 | default: 11 | target: auto 12 | threshold: 1% 13 | -------------------------------------------------------------------------------- /data/pasilla.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stemangiola/tidySummarizedExperiment/4b8a4e1bdba6230abe00fc2fb0e99eaffeca2532/data/pasilla.rda -------------------------------------------------------------------------------- /data/se.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stemangiola/tidySummarizedExperiment/4b8a4e1bdba6230abe00fc2fb0e99eaffeca2532/data/se.rda -------------------------------------------------------------------------------- /dev/TCGA_processing.R: -------------------------------------------------------------------------------- 1 | library(TCGAbiolinks) 2 | library(readr) 3 | library(tidyr) 4 | library(dplyr) 5 | library(ggplot2) 6 | library(tidybulk) 7 | library(tidySummarizedExperiment) 8 | 9 | 10 | query <- GDCquery(project = "TCGA-BRCA", 11 | data.category = "Gene expression", 12 | data.type = "Gene expression quantification", 13 | platform = "Illumina HiSeq", 14 | file.type = "normalized_results", 15 | experimental.strategy = "RNA-Seq", 16 | legacy = TRUE) 17 | 18 | 19 | GDCdownload(query, method = "api", files.per.chunk = 10) 20 | counts_se <- GDCprepare(query) 21 | 22 | query_clin <- GDCquery(project = "TCGA-BRCA", 23 | data.category = "Clinical", 24 | data.type = "Clinical data", 25 | file.type = "txt", 26 | legacy = TRUE) 27 | 28 | 29 | GDCdownload(query_clin) 30 | 31 | clinical_list <- GDCprepare(query_clin) 32 | 33 | clinical_patient_brca <- clinical_list$clinical_patient_brca 34 | 35 | groups <- c("HER2_pos", "HER2_low", "HER2_neg") 36 | 37 | tcga_her2 <- clinical_patient_brca %>% 38 | mutate(her2_group = case_when((her2_ihc_score == 3 | her2_fish_status == "Positive") ~ "HER2_pos", 39 | ((her2_ihc_score == 1 | her2_ihc_score == 2) & her2_fish_status == "Negative") ~ "HER2_low", 40 | her2_ihc_score == 0 ~ "HER2_neg" 41 | )) %>% 42 | filter(her2_group %in% groups) %>% 43 | select(bcr_patient_barcode, her2_group) 44 | 45 | tcga_joined <- 46 | counts_se %>% 47 | extract(barcode, "barcode", "(.+-.+-.+).+") 48 | inner_join(tcga_her2, by = c("barcode" = "bcr_patient_barcode")) 49 | 50 | counts_se %>% select(.sample) 51 | -------------------------------------------------------------------------------- /inst/NEWS.rd: -------------------------------------------------------------------------------- 1 | \name{NEWS} 2 | \title{News for Package \pkg{tidySummarizedExperiment}} 3 | 4 | \section{Changes in version 1.4.0, Bioconductor 3.14 Release}{ 5 | \itemize{ 6 | \item Improved join_*() functions. 7 | \item Changed special column names with a starting "." to avoid conflicts with pre-existing column names. 8 | \item Improved all method for large-scale datasets. 9 | }} 10 | 11 | \section{Changes in version 1.5.3, Bioconductor 3.15 Release}{ 12 | \itemize{ 13 | \item Speed-up nest. 14 | \item Adaptation to Ranged-SummarizedExperiment. 15 | }} 16 | 17 | \section{Changes in version 1.7.3, Bioconductor 3.16 Release}{ 18 | \itemize{ 19 | \item Fixed as_tibble edge case 20 | \item Fixed print for DelayedArray 21 | \item Improve performance for large-scale datasets 22 | \item Fixed filter is the result is a no-gene dataset, and improve performance of filtering 23 | }} 24 | 25 | -------------------------------------------------------------------------------- /man/as_tibble.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tibble_methods.R 3 | \name{as_tibble} 4 | \alias{as_tibble} 5 | \alias{as_tibble.SummarizedExperiment} 6 | \title{Coerce lists, matrices, and more to data frames} 7 | \usage{ 8 | \method{as_tibble}{SummarizedExperiment}( 9 | x, 10 | ..., 11 | .name_repair = c("check_unique", "unique", "universal", "minimal"), 12 | rownames = pkgconfig::get_config("tibble::rownames", NULL) 13 | ) 14 | } 15 | \arguments{ 16 | \item{x}{A data frame, list, matrix, or other object that could reasonably be 17 | coerced to a tibble.} 18 | 19 | \item{...}{Unused, for extensibility.} 20 | 21 | \item{.name_repair}{Treatment of problematic column names: 22 | \itemize{ 23 | \item \code{"minimal"}: No name repair or checks, beyond basic existence, 24 | \item \code{"unique"}: Make sure names are unique and not empty, 25 | \item \code{"check_unique"}: (default value), no name repair, but check they are 26 | \code{unique}, 27 | \item \code{"universal"}: Make the names \code{unique} and syntactic 28 | \item a function: apply custom name repair (e.g., \code{.name_repair = make.names} 29 | for names in the style of base R). 30 | \item A purrr-style anonymous function, see \code{\link[rlang:as_function]{rlang::as_function()}} 31 | } 32 | 33 | This argument is passed on as \code{repair} to \code{\link[vctrs:vec_as_names]{vctrs::vec_as_names()}}. 34 | See there for more details on these terms and the strategies used 35 | to enforce them.} 36 | 37 | \item{rownames}{How to treat existing row names of a data frame or matrix: 38 | \itemize{ 39 | \item \code{NULL}: remove row names. This is the default. 40 | \item \code{NA}: keep row names. 41 | \item A string: the name of a new column. Existing rownames are transferred 42 | into this column and the \code{row.names} attribute is deleted. 43 | No name repair is applied to the new column name, even if \code{x} already contains 44 | a column of that name. 45 | Use \code{as_tibble(rownames_to_column(...))} to safeguard against this case. 46 | } 47 | 48 | Read more in \link[tibble]{rownames}.} 49 | } 50 | \value{ 51 | \code{tibble} 52 | } 53 | \description{ 54 | \code{as_tibble()} turns an existing object, such as a data frame or 55 | matrix, into a so-called tibble, a data frame with class \code{\link[tibble]{tbl_df}}. This is 56 | in contrast with \code{\link[tibble:tibble]{tibble()}}, which builds a tibble from individual columns. 57 | \code{as_tibble()} is to \code{\link[tibble:tibble]{tibble()}} as \code{\link[base:as.data.frame]{base::as.data.frame()}} is to 58 | \code{\link[base:data.frame]{base::data.frame()}}. 59 | 60 | \code{as_tibble()} is an S3 generic, with methods for: 61 | \itemize{ 62 | \item \code{\link[base:data.frame]{data.frame}}: Thin wrapper around the \code{list} method 63 | that implements tibble's treatment of \link[tibble]{rownames}. 64 | \item \code{\link[base:matrix]{matrix}}, \code{\link[stats:poly]{poly}}, 65 | \code{\link[stats:ts]{ts}}, \code{\link[base:table]{table}} 66 | \item Default: Other inputs are first coerced with \code{\link[base:as.data.frame]{base::as.data.frame()}}. 67 | } 68 | 69 | \code{as_tibble_row()} converts a vector to a tibble with one row. 70 | If the input is a list, all elements must have size one. 71 | 72 | \code{as_tibble_col()} converts a vector to a tibble with one column. 73 | } 74 | \section{Row names}{ 75 | 76 | 77 | The default behavior is to silently remove row names. 78 | 79 | New code should explicitly convert row names to a new column using the 80 | \code{rownames} argument. 81 | 82 | For existing code that relies on the retention of row names, call 83 | \code{pkgconfig::set_config("tibble::rownames" = NA)} in your script or in your 84 | package's \code{\link[=.onLoad]{.onLoad()}} function. 85 | 86 | } 87 | 88 | \section{Life cycle}{ 89 | 90 | 91 | Using \code{as_tibble()} for vectors is superseded as of version 3.0.0, 92 | prefer the more expressive \code{as_tibble_row()} and 93 | \code{as_tibble_col()} variants for new code. 94 | 95 | } 96 | 97 | \examples{ 98 | tidySummarizedExperiment::pasilla \%>\% 99 | as_tibble() 100 | 101 | tidySummarizedExperiment::pasilla \%>\% 102 | as_tibble(.subset=-c(condition, type)) 103 | 104 | } 105 | \seealso{ 106 | \code{\link[tibble:tibble]{tibble()}} constructs a tibble from individual columns. \code{\link[tibble:enframe]{enframe()}} 107 | converts a named vector to a tibble with a column of names and column of 108 | values. Name repair is implemented using \code{\link[vctrs:vec_as_names]{vctrs::vec_as_names()}}. 109 | } 110 | -------------------------------------------------------------------------------- /man/bind_rows.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dplyr_methods.R 3 | \name{bind_rows} 4 | \alias{bind_rows} 5 | \alias{bind_rows.SummarizedExperiment} 6 | \alias{bind_cols.SummarizedExperiment} 7 | \alias{bind_cols} 8 | \alias{bind_cols.RangedSummarizedExperiment} 9 | \title{Efficiently bind multiple data frames by row and column} 10 | \usage{ 11 | \method{bind_rows}{SummarizedExperiment}(..., .id = NULL, add.cell.ids = NULL) 12 | 13 | \method{bind_cols}{SummarizedExperiment}(..., .id = NULL) 14 | 15 | \method{bind_cols}{RangedSummarizedExperiment}(..., .id = NULL) 16 | } 17 | \arguments{ 18 | \item{...}{Data frames to combine. 19 | 20 | Each argument can either be a data frame, a list that could be a data 21 | frame, or a list of data frames. 22 | 23 | When row-binding, columns are matched by name, and any missing 24 | columns will be filled with NA. 25 | 26 | When column-binding, rows are matched by position, so all data 27 | frames must have the same number of rows. To match by value, not 28 | position, see mutate-joins.} 29 | 30 | \item{.id}{Data frame identifier. 31 | 32 | When `.id` is supplied, a new column of identifiers is 33 | created to link each row to its original data frame. The labels 34 | are taken from the named arguments to `bind_rows()`. When a 35 | list of data frames is supplied, the labels are taken from the 36 | names of the list. If no names are found a numeric sequence is 37 | used instead.} 38 | 39 | \item{add.cell.ids}{Appends the corresponding values to} 40 | } 41 | \value{ 42 | `bind_rows()` and `bind_cols()` return the same type as 43 | the first input, either a data frame, `tbl_df`, or `grouped_df`. 44 | 45 | `bind_rows()` and `bind_cols()` return the same type as 46 | the first input, either a data frame, `tbl_df`, or `grouped_df`. 47 | } 48 | \description{ 49 | This is an efficient implementation of the common pattern of 50 | `do.call(rbind, dfs)` or `do.call(cbind, dfs)` for binding many 51 | data frames into one. 52 | 53 | This is an efficient implementation of the common pattern of 54 | `do.call(rbind, dfs)` or `do.call(cbind, dfs)` for binding many 55 | data frames into one. 56 | } 57 | \details{ 58 | The output of `bind_rows()` will contain a column if that column 59 | appears in any of the inputs. 60 | 61 | The output of `bind_rows()` will contain a column if that column 62 | appears in any of the inputs. 63 | } 64 | \examples{ 65 | data(se) 66 | ttservice::bind_rows(se, se) 67 | 68 | se_bind <- se |> select(dex, albut) 69 | se |> ttservice::bind_cols(se_bind) 70 | 71 | } 72 | -------------------------------------------------------------------------------- /man/count.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dplyr_methods.R 3 | \name{count} 4 | \alias{count} 5 | \alias{count.SummarizedExperiment} 6 | \title{Count the observations in each group} 7 | \usage{ 8 | \method{count}{SummarizedExperiment}( 9 | x, 10 | ..., 11 | wt = NULL, 12 | sort = FALSE, 13 | name = NULL, 14 | .drop = group_by_drop_default(x) 15 | ) 16 | } 17 | \arguments{ 18 | \item{x}{A data frame, data frame extension (e.g. a tibble), or a 19 | lazy data frame (e.g. from dbplyr or dtplyr).} 20 | 21 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Variables to group 22 | by.} 23 | 24 | \item{wt}{<\code{\link[rlang:args_data_masking]{data-masking}}> Frequency weights. 25 | Can be \code{NULL} or a variable: 26 | \itemize{ 27 | \item If \code{NULL} (the default), counts the number of rows in each group. 28 | \item If a variable, computes \code{sum(wt)} for each group. 29 | }} 30 | 31 | \item{sort}{If \code{TRUE}, will show the largest groups at the top.} 32 | 33 | \item{name}{The name of the new column in the output. 34 | 35 | If omitted, it will default to \code{n}. If there's already a column called \code{n}, 36 | it will use \code{nn}. If there's a column called \code{n} and \code{nn}, it'll use 37 | \code{nnn}, and so on, adding \code{n}s until it gets a new name.} 38 | 39 | \item{.drop}{Handling of factor levels that don't appear in the data, passed 40 | on to \code{\link[dplyr:group_by]{group_by()}}. 41 | 42 | For \code{count()}: if \code{FALSE} will include counts for empty groups (i.e. for 43 | levels of factors that don't exist in the data). 44 | 45 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} For \code{add_count()}: deprecated since it 46 | can't actually affect the output.} 47 | } 48 | \value{ 49 | An object of the same type as \code{.data}. \code{count()} and \code{add_count()} 50 | group transiently, so the output has the same groups as the input. 51 | } 52 | \description{ 53 | \code{count()} lets you quickly count the unique values of one or more variables: 54 | \code{df \%>\% count(a, b)} is roughly equivalent to 55 | \code{df \%>\% group_by(a, b) \%>\% summarise(n = n())}. 56 | \code{count()} is paired with \code{tally()}, a lower-level helper that is equivalent 57 | to \code{df \%>\% summarise(n = n())}. Supply \code{wt} to perform weighted counts, 58 | switching the summary from \code{n = n()} to \code{n = sum(wt)}. 59 | 60 | \code{add_count()} and \code{add_tally()} are equivalents to \code{count()} and \code{tally()} 61 | but use \code{mutate()} instead of \code{summarise()} so that they add a new column 62 | with group-wise counts. 63 | } 64 | \examples{ 65 | data(se) 66 | se |> count(dex) 67 | 68 | } 69 | -------------------------------------------------------------------------------- /man/distinct.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dplyr_methods.R 3 | \name{distinct} 4 | \alias{distinct} 5 | \alias{distinct.SummarizedExperiment} 6 | \title{Keep distinct/unique rows} 7 | \usage{ 8 | \method{distinct}{SummarizedExperiment}(.data, ..., .keep_all = FALSE) 9 | } 10 | \arguments{ 11 | \item{.data}{A data frame, data frame extension (e.g. a tibble), or a 12 | lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for 13 | more details.} 14 | 15 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Optional variables to 16 | use when determining uniqueness. If there are multiple rows for a given 17 | combination of inputs, only the first row will be preserved. If omitted, 18 | will use all variables in the data frame.} 19 | 20 | \item{.keep_all}{If \code{TRUE}, keep all variables in \code{.data}. 21 | If a combination of \code{...} is not distinct, this keeps the 22 | first row of values.} 23 | } 24 | \value{ 25 | An object of the same type as \code{.data}. The output has the following 26 | properties: 27 | \itemize{ 28 | \item Rows are a subset of the input but appear in the same order. 29 | \item Columns are not modified if \code{...} is empty or \code{.keep_all} is \code{TRUE}. 30 | Otherwise, \code{distinct()} first calls \code{mutate()} to create new columns. 31 | \item Groups are not modified. 32 | \item Data frame attributes are preserved. 33 | } 34 | } 35 | \description{ 36 | Keep only unique/distinct rows from a data frame. This is similar 37 | to \code{\link[=unique.data.frame]{unique.data.frame()}} but considerably faster. 38 | } 39 | \section{Methods}{ 40 | 41 | 42 | This function is a \strong{generic}, which means that packages can provide 43 | implementations (methods) for other classes. See the documentation of 44 | individual methods for extra arguments and differences in behaviour. 45 | 46 | The following methods are currently available in loaded packages: 47 | \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("distinct")}. 48 | 49 | } 50 | 51 | \examples{ 52 | data(pasilla) 53 | pasilla |> distinct(.sample) 54 | 55 | } 56 | -------------------------------------------------------------------------------- /man/extract.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tidyr_methods.R 3 | \name{extract} 4 | \alias{extract} 5 | \alias{extract.SummarizedExperiment} 6 | \title{Extract a character column into multiple columns using regular 7 | expression groups} 8 | \usage{ 9 | \method{extract}{SummarizedExperiment}( 10 | data, 11 | col, 12 | into, 13 | regex = "([[:alnum:]]+)", 14 | remove = TRUE, 15 | convert = FALSE, 16 | ... 17 | ) 18 | } 19 | \arguments{ 20 | \item{data}{A data frame.} 21 | 22 | \item{col}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> Column to expand.} 23 | 24 | \item{into}{Names of new variables to create as character vector. 25 | Use \code{NA} to omit the variable in the output.} 26 | 27 | \item{regex}{A string representing a regular expression used to extract the 28 | desired values. There should be one group (defined by \verb{()}) for each 29 | element of \code{into}.} 30 | 31 | \item{remove}{If \code{TRUE}, remove input column from output data frame.} 32 | 33 | \item{convert}{If \code{TRUE}, will run \code{\link[=type.convert]{type.convert()}} with 34 | \code{as.is = TRUE} on new columns. This is useful if the component 35 | columns are integer, numeric or logical. 36 | 37 | NB: this will cause string \code{"NA"}s to be converted to \code{NA}s.} 38 | 39 | \item{...}{Additional arguments passed on to methods.} 40 | } 41 | \value{ 42 | \code{tidySummarizedExperiment} 43 | } 44 | \description{ 45 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} 46 | 47 | \code{extract()} has been superseded in favour of \code{\link[tidyr:separate_wider_regex]{separate_wider_regex()}} 48 | because it has a more polished API and better handling of problems. 49 | Superseded functions will not go away, but will only receive critical bug 50 | fixes. 51 | 52 | Given a regular expression with capturing groups, \code{extract()} turns 53 | each group into a new column. If the groups don't match, or the input 54 | is NA, the output will be NA. 55 | } 56 | \examples{ 57 | tidySummarizedExperiment::pasilla |> 58 | extract(type, into="sequencing", regex="([a-z]*)_end", convert=TRUE) 59 | 60 | } 61 | \seealso{ 62 | \code{\link[tidyr:separate]{separate()}} to split up by a separator. 63 | } 64 | -------------------------------------------------------------------------------- /man/figures/lifecycle-archived.svg: -------------------------------------------------------------------------------- 1 | lifecyclelifecyclearchivedarchived -------------------------------------------------------------------------------- /man/figures/lifecycle-defunct.svg: -------------------------------------------------------------------------------- 1 | lifecyclelifecycledefunctdefunct -------------------------------------------------------------------------------- /man/figures/lifecycle-deprecated.svg: -------------------------------------------------------------------------------- 1 | lifecyclelifecycledeprecateddeprecated -------------------------------------------------------------------------------- /man/figures/lifecycle-experimental.svg: -------------------------------------------------------------------------------- 1 | lifecyclelifecycleexperimentalexperimental -------------------------------------------------------------------------------- /man/figures/lifecycle-maturing.svg: -------------------------------------------------------------------------------- 1 | lifecyclelifecyclematuringmaturing -------------------------------------------------------------------------------- /man/figures/lifecycle-questioning.svg: -------------------------------------------------------------------------------- 1 | lifecyclelifecyclequestioningquestioning -------------------------------------------------------------------------------- /man/figures/lifecycle-stable.svg: -------------------------------------------------------------------------------- 1 | lifecyclelifecyclestablestable -------------------------------------------------------------------------------- /man/figures/lifecycle-superseded.svg: -------------------------------------------------------------------------------- 1 | lifecyclelifecyclesupersededsuperseded -------------------------------------------------------------------------------- /man/figures/plot1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stemangiola/tidySummarizedExperiment/4b8a4e1bdba6230abe00fc2fb0e99eaffeca2532/man/figures/plot1-1.png -------------------------------------------------------------------------------- /man/filter.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dplyr_methods.R 3 | \name{filter} 4 | \alias{filter} 5 | \alias{filter.SummarizedExperiment} 6 | \title{Keep rows that match a condition} 7 | \usage{ 8 | \method{filter}{SummarizedExperiment}(.data, ..., .preserve = FALSE) 9 | } 10 | \arguments{ 11 | \item{.data}{A data frame, data frame extension (e.g. a tibble), or a 12 | lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for 13 | more details.} 14 | 15 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Expressions that 16 | return a logical value, and are defined in terms of the variables in 17 | \code{.data}. If multiple expressions are included, they are combined with the 18 | \code{&} operator. Only rows for which all conditions evaluate to \code{TRUE} are 19 | kept.} 20 | 21 | \item{.preserve}{Relevant when the \code{.data} input is grouped. 22 | If \code{.preserve = FALSE} (the default), the grouping structure 23 | is recalculated based on the resulting data, otherwise the grouping is kept as is.} 24 | } 25 | \value{ 26 | An object of the same type as \code{.data}. The output has the following properties: 27 | \itemize{ 28 | \item Rows are a subset of the input, but appear in the same order. 29 | \item Columns are not modified. 30 | \item The number of groups may be reduced (if \code{.preserve} is not \code{TRUE}). 31 | \item Data frame attributes are preserved. 32 | } 33 | } 34 | \description{ 35 | The \code{filter()} function is used to subset a data frame, 36 | retaining all rows that satisfy your conditions. 37 | To be retained, the row must produce a value of \code{TRUE} for all conditions. 38 | Note that when a condition evaluates to \code{NA} 39 | the row will be dropped, unlike base subsetting with \code{[}. 40 | } 41 | \details{ 42 | The \code{filter()} function is used to subset the rows of 43 | \code{.data}, applying the expressions in \code{...} to the column values to determine which 44 | rows should be retained. It can be applied to both grouped and ungrouped data (see \code{\link[dplyr:group_by]{group_by()}} and 45 | \code{\link[dplyr:ungroup]{ungroup()}}). However, dplyr is not yet smart enough to optimise the filtering 46 | operation on grouped datasets that do not need grouped calculations. For this 47 | reason, filtering is often considerably faster on ungrouped data. 48 | } 49 | \section{Useful filter functions}{ 50 | 51 | 52 | 53 | There are many functions and operators that are useful when constructing the 54 | expressions used to filter the data: 55 | \itemize{ 56 | \item \code{\link{==}}, \code{\link{>}}, \code{\link{>=}} etc 57 | \item \code{\link{&}}, \code{\link{|}}, \code{\link{!}}, \code{\link[=xor]{xor()}} 58 | \item \code{\link[=is.na]{is.na()}} 59 | \item \code{\link[dplyr:between]{between()}}, \code{\link[dplyr:near]{near()}} 60 | } 61 | 62 | } 63 | 64 | \section{Grouped tibbles}{ 65 | 66 | 67 | 68 | Because filtering expressions are computed within groups, they may 69 | yield different results on grouped tibbles. This will be the case 70 | as soon as an aggregating, lagging, or ranking function is 71 | involved. Compare this ungrouped filtering: 72 | 73 | \if{html}{\out{
}}\preformatted{starwars \%>\% filter(mass > mean(mass, na.rm = TRUE)) 74 | }\if{html}{\out{
}} 75 | 76 | With the grouped equivalent: 77 | 78 | \if{html}{\out{
}}\preformatted{starwars \%>\% group_by(gender) \%>\% filter(mass > mean(mass, na.rm = TRUE)) 79 | }\if{html}{\out{
}} 80 | 81 | In the ungrouped version, \code{filter()} compares the value of \code{mass} in each row to 82 | the global average (taken over the whole data set), keeping only the rows with 83 | \code{mass} greater than this global average. In contrast, the grouped version calculates 84 | the average mass separately for each \code{gender} group, and keeps rows with \code{mass} greater 85 | than the relevant within-gender average. 86 | 87 | } 88 | 89 | \section{Methods}{ 90 | 91 | 92 | This function is a \strong{generic}, which means that packages can provide 93 | implementations (methods) for other classes. See the documentation of 94 | individual methods for extra arguments and differences in behaviour. 95 | 96 | The following methods are currently available in loaded packages: 97 | \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("filter")}. 98 | 99 | } 100 | 101 | \examples{ 102 | data(pasilla) 103 | pasilla |> filter(.sample == "untrt1") 104 | 105 | # Learn more in ?dplyr_tidy_eval 106 | 107 | } 108 | \seealso{ 109 | Other single table verbs: 110 | \code{\link[dplyr]{arrange}()}, 111 | \code{\link[dplyr]{mutate}()}, 112 | \code{\link[dplyr]{reframe}()}, 113 | \code{\link[dplyr]{rename}()}, 114 | \code{\link[dplyr]{select}()}, 115 | \code{\link[dplyr]{slice}()}, 116 | \code{\link[dplyr]{summarise}()} 117 | } 118 | -------------------------------------------------------------------------------- /man/formatting.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/print_method.R 3 | \name{formatting} 4 | \alias{formatting} 5 | \alias{print.SummarizedExperiment} 6 | \alias{print} 7 | \title{Printing tibbles} 8 | \usage{ 9 | \method{print}{SummarizedExperiment}(x, ..., n = NULL, width = NULL, n_extra = NULL) 10 | } 11 | \arguments{ 12 | \item{x}{Object to format or print.} 13 | 14 | \item{...}{Passed on to \code{\link[=tbl_format_setup]{tbl_format_setup()}}.} 15 | 16 | \item{n}{Number of rows to show. If \code{NULL}, the default, will print all rows 17 | if less than the \code{print_max} \link[pillar:pillar_options]{option}. 18 | Otherwise, will print as many rows as specified by the 19 | \code{print_min} \link[pillar:pillar_options]{option}.} 20 | 21 | \item{width}{Width of text output to generate. This defaults to \code{NULL}, which 22 | means use the \code{width} \link[pillar:pillar_options]{option}.} 23 | 24 | \item{n_extra}{Number of extra columns to print abbreviated information for, 25 | if the width is too small for the entire tibble. If \code{NULL}, the default, 26 | will print information about at most \code{tibble.max_extra_cols} extra columns.} 27 | } 28 | \value{ 29 | Prints a message to the console describing 30 | the contents of the \code{tidySummarizedExperiment}. 31 | } 32 | \description{ 33 | One of the main features of the \code{tbl_df} class is the printing: 34 | \itemize{ 35 | \item Tibbles only print as many rows and columns as fit on one screen, 36 | supplemented by a summary of the remaining rows and columns. 37 | \item Tibble reveals the type of each column, which keeps the user informed about 38 | whether a variable is, e.g., \verb{} or \verb{} (character versus factor). 39 | See \code{vignette("types")} for an overview of common 40 | type abbreviations. 41 | } 42 | 43 | Printing can be tweaked for a one-off call by calling \code{print()} explicitly 44 | and setting arguments like \code{n} and \code{width}. More persistent control is 45 | available by setting the options described in \link[pillar:pillar_options]{pillar::pillar_options}. 46 | See also \code{vignette("digits")} for a comparison to base options, 47 | and \code{vignette("numbers")} that showcases \code{\link[tibble:num]{num()}} and \code{\link[tibble:char]{char()}} 48 | for creating columns with custom formatting options. 49 | 50 | As of tibble 3.1.0, printing is handled entirely by the \pkg{pillar} package. 51 | If you implement a package that extends tibble, 52 | the printed output can be customized in various ways. 53 | See \code{vignette("extending", package = "pillar")} for details, 54 | and \link[pillar:pillar_options]{pillar::pillar_options} for options that control the display in the console. 55 | } 56 | \examples{ 57 | data(pasilla) 58 | print(pasilla) 59 | 60 | } 61 | -------------------------------------------------------------------------------- /man/full_join.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dplyr_methods.R 3 | \name{full_join} 4 | \alias{full_join} 5 | \alias{full_join.SummarizedExperiment} 6 | \title{Mutating joins} 7 | \usage{ 8 | \method{full_join}{SummarizedExperiment}(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ...) 9 | } 10 | \arguments{ 11 | \item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or 12 | lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for 13 | more details.} 14 | 15 | \item{by}{A join specification created with \code{\link[dplyr:join_by]{join_by()}}, or a character 16 | vector of variables to join by. 17 | 18 | If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all 19 | variables in common across \code{x} and \code{y}. A message lists the variables so 20 | that you can check they're correct; suppress the message by supplying \code{by} 21 | explicitly. 22 | 23 | To join on different variables between \code{x} and \code{y}, use a \code{\link[dplyr:join_by]{join_by()}} 24 | specification. For example, \code{join_by(a == b)} will match \code{x$a} to \code{y$b}. 25 | 26 | To join by multiple variables, use a \code{\link[dplyr:join_by]{join_by()}} specification with 27 | multiple expressions. For example, \code{join_by(a == b, c == d)} will match 28 | \code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. If the column names are the same between 29 | \code{x} and \code{y}, you can shorten this by listing only the variable names, like 30 | \code{join_by(a, c)}. 31 | 32 | \code{\link[dplyr:join_by]{join_by()}} can also be used to perform inequality, rolling, and overlap 33 | joins. See the documentation at \link[dplyr:join_by]{?join_by} for details on 34 | these types of joins. 35 | 36 | For simple equality joins, you can alternatively specify a character vector 37 | of variable names to join by. For example, \code{by = c("a", "b")} joins \code{x$a} 38 | to \code{y$a} and \code{x$b} to \code{y$b}. If variable names differ between \code{x} and \code{y}, 39 | use a named character vector like \code{by = c("x_a" = "y_a", "x_b" = "y_b")}. 40 | 41 | To perform a cross-join, generating all combinations of \code{x} and \code{y}, see 42 | \code{\link[dplyr:cross_join]{cross_join()}}.} 43 | 44 | \item{copy}{If \code{x} and \code{y} are not from the same data source, 45 | and \code{copy} is \code{TRUE}, then \code{y} will be copied into the 46 | same src as \code{x}. This allows you to join tables across srcs, but 47 | it is a potentially expensive operation so you must opt into it.} 48 | 49 | \item{suffix}{If there are non-joined duplicate variables in \code{x} and 50 | \code{y}, these suffixes will be added to the output to disambiguate them. 51 | Should be a character vector of length 2.} 52 | 53 | \item{...}{Other parameters passed onto methods.} 54 | } 55 | \value{ 56 | An object of the same type as \code{x} (including the same groups). The order of 57 | the rows and columns of \code{x} is preserved as much as possible. The output has 58 | the following properties: 59 | \itemize{ 60 | \item The rows are affect by the join type. 61 | \itemize{ 62 | \item \code{inner_join()} returns matched \code{x} rows. 63 | \item \code{left_join()} returns all \code{x} rows. 64 | \item \code{right_join()} returns matched of \code{x} rows, followed by unmatched \code{y} rows. 65 | \item \code{full_join()} returns all \code{x} rows, followed by unmatched \code{y} rows. 66 | } 67 | \item Output columns include all columns from \code{x} and all non-key columns from 68 | \code{y}. If \code{keep = TRUE}, the key columns from \code{y} are included as well. 69 | \item If non-key columns in \code{x} and \code{y} have the same name, \code{suffix}es are added 70 | to disambiguate. If \code{keep = TRUE} and key columns in \code{x} and \code{y} have 71 | the same name, \code{suffix}es are added to disambiguate these as well. 72 | \item If \code{keep = FALSE}, output columns included in \code{by} are coerced to their 73 | common type between \code{x} and \code{y}. 74 | } 75 | } 76 | \description{ 77 | Mutating joins add columns from \code{y} to \code{x}, matching observations based on 78 | the keys. There are four mutating joins: the inner join, and the three outer 79 | joins. 80 | \subsection{Inner join}{ 81 | 82 | An \code{inner_join()} only keeps observations from \code{x} that have a matching key 83 | in \code{y}. 84 | 85 | The most important property of an inner join is that unmatched rows in either 86 | input are not included in the result. This means that generally inner joins 87 | are not appropriate in most analyses, because it is too easy to lose 88 | observations. 89 | } 90 | 91 | \subsection{Outer joins}{ 92 | 93 | The three outer joins keep observations that appear in at least one of the 94 | data frames: 95 | \itemize{ 96 | \item A \code{left_join()} keeps all observations in \code{x}. 97 | \item A \code{right_join()} keeps all observations in \code{y}. 98 | \item A \code{full_join()} keeps all observations in \code{x} and \code{y}. 99 | } 100 | } 101 | } 102 | \section{Many-to-many relationships}{ 103 | 104 | 105 | 106 | By default, dplyr guards against many-to-many relationships in equality joins 107 | by throwing a warning. These occur when both of the following are true: 108 | \itemize{ 109 | \item A row in \code{x} matches multiple rows in \code{y}. 110 | \item A row in \code{y} matches multiple rows in \code{x}. 111 | } 112 | 113 | This is typically surprising, as most joins involve a relationship of 114 | one-to-one, one-to-many, or many-to-one, and is often the result of an 115 | improperly specified join. Many-to-many relationships are particularly 116 | problematic because they can result in a Cartesian explosion of the number of 117 | rows returned from the join. 118 | 119 | If a many-to-many relationship is expected, silence this warning by 120 | explicitly setting \code{relationship = "many-to-many"}. 121 | 122 | In production code, it is best to preemptively set \code{relationship} to whatever 123 | relationship you expect to exist between the keys of \code{x} and \code{y}, as this 124 | forces an error to occur immediately if the data doesn't align with your 125 | expectations. 126 | 127 | Inequality joins typically result in many-to-many relationships by nature, so 128 | they don't warn on them by default, but you should still take extra care when 129 | specifying an inequality join, because they also have the capability to 130 | return a large number of rows. 131 | 132 | Rolling joins don't warn on many-to-many relationships either, but many 133 | rolling joins follow a many-to-one relationship, so it is often useful to 134 | set \code{relationship = "many-to-one"} to enforce this. 135 | 136 | Note that in SQL, most database providers won't let you specify a 137 | many-to-many relationship between two tables, instead requiring that you 138 | create a third \emph{junction table} that results in two one-to-many relationships 139 | instead. 140 | 141 | } 142 | 143 | \section{Methods}{ 144 | 145 | 146 | These functions are \strong{generic}s, which means that packages can provide 147 | implementations (methods) for other classes. See the documentation of 148 | individual methods for extra arguments and differences in behaviour. 149 | 150 | Methods available in currently loaded packages: 151 | \itemize{ 152 | \item \code{inner_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("inner_join")}. 153 | \item \code{left_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("left_join")}. 154 | \item \code{right_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("right_join")}. 155 | \item \code{full_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("full_join")}. 156 | } 157 | 158 | } 159 | 160 | \examples{ 161 | data(pasilla) 162 | 163 | tt <- pasilla 164 | tt |> full_join(tibble::tibble(condition="treated", dose=10)) 165 | 166 | } 167 | \seealso{ 168 | Other joins: 169 | \code{\link[dplyr]{cross_join}()}, 170 | \code{\link[dplyr]{filter-joins}}, 171 | \code{\link[dplyr]{nest_join}()} 172 | } 173 | -------------------------------------------------------------------------------- /man/ggplot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ggplot2_methods.R 3 | \name{ggplot} 4 | \alias{ggplot} 5 | \alias{ggplot.SummarizedExperiment} 6 | \title{Create a new \code{ggplot} from a \code{tidyseurat}} 7 | \usage{ 8 | \method{ggplot}{SummarizedExperiment}(data = NULL, mapping = aes(), ..., environment = parent.frame()) 9 | } 10 | \arguments{ 11 | \item{data}{Default dataset to use for plot. If not already a data.frame, 12 | will be converted to one by \code{\link[ggplot2:fortify]{fortify()}}. If not specified, 13 | must be supplied in each layer added to the plot.} 14 | 15 | \item{mapping}{Default list of aesthetic mappings to use for plot. 16 | If not specified, must be supplied in each layer added to the plot.} 17 | 18 | \item{...}{Other arguments passed on to methods. Not currently used.} 19 | 20 | \item{environment}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Used prior to tidy 21 | evaluation.} 22 | } 23 | \value{ 24 | \code{ggplot} 25 | } 26 | \description{ 27 | \code{ggplot()} initializes a ggplot object. It can be used to 28 | declare the input data frame for a graphic and to specify the 29 | set of plot aesthetics intended to be common throughout all 30 | subsequent layers unless specifically overridden. 31 | } 32 | \details{ 33 | \code{ggplot()} is used to construct the initial plot object, 34 | and is almost always followed by a plus sign (\code{+}) to add 35 | components to the plot. 36 | 37 | There are three common patterns used to invoke \code{ggplot()}: 38 | \itemize{ 39 | \item \verb{ggplot(data = df, mapping = aes(x, y, other aesthetics))} 40 | \item \code{ggplot(data = df)} 41 | \item \code{ggplot()} 42 | } 43 | 44 | The first pattern is recommended if all layers use the same 45 | data and the same set of aesthetics, although this method 46 | can also be used when adding a layer using data from another 47 | data frame. 48 | 49 | The second pattern specifies the default data frame to use 50 | for the plot, but no aesthetics are defined up front. This 51 | is useful when one data frame is used predominantly for the 52 | plot, but the aesthetics vary from one layer to another. 53 | 54 | The third pattern initializes a skeleton \code{ggplot} object, which 55 | is fleshed out as layers are added. This is useful when 56 | multiple data frames are used to produce different layers, as 57 | is often the case in complex graphics. 58 | 59 | The \verb{data =} and \verb{mapping =} specifications in the arguments are optional 60 | (and are often omitted in practice), so long as the data and the mapping 61 | values are passed into the function in the right order. In the examples 62 | below, however, they are left in place for clarity. 63 | } 64 | \examples{ 65 | library(ggplot2) 66 | data(pasilla) 67 | pasilla \%>\% 68 | ggplot(aes(.sample, counts)) + 69 | geom_boxplot() 70 | 71 | } 72 | \seealso{ 73 | The \href{https://ggplot2-book.org/getting-started}{first steps chapter} of the online ggplot2 book. 74 | } 75 | -------------------------------------------------------------------------------- /man/group_by.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dplyr_methods.R 3 | \name{group_by} 4 | \alias{group_by} 5 | \alias{group_by.SummarizedExperiment} 6 | \title{Group by one or more variables} 7 | \usage{ 8 | \method{group_by}{SummarizedExperiment}(.data, ..., .add = FALSE, .drop = group_by_drop_default(.data)) 9 | } 10 | \arguments{ 11 | \item{.data}{A data frame, data frame extension (e.g. a tibble), or a 12 | lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for 13 | more details.} 14 | 15 | \item{...}{In \code{group_by()}, variables or computations to group by. 16 | Computations are always done on the ungrouped data frame. 17 | To perform computations on the grouped data, you need to use 18 | a separate \code{mutate()} step before the \code{group_by()}. 19 | Computations are not allowed in \code{nest_by()}. 20 | In \code{ungroup()}, variables to remove from the grouping.} 21 | 22 | \item{.add}{When \code{FALSE}, the default, \code{group_by()} will 23 | override existing groups. To add to the existing groups, use 24 | \code{.add = TRUE}. 25 | 26 | This argument was previously called \code{add}, but that prevented 27 | creating a new grouping variable called \code{add}, and conflicts with 28 | our naming conventions.} 29 | 30 | \item{.drop}{Drop groups formed by factor levels that don't appear in the 31 | data? The default is \code{TRUE} except when \code{.data} has been previously 32 | grouped with \code{.drop = FALSE}. See \code{\link[dplyr:group_by_drop_default]{group_by_drop_default()}} for details.} 33 | } 34 | \value{ 35 | A grouped data frame with class \code{\link[dplyr]{grouped_df}}, 36 | unless the combination of \code{...} and \code{add} yields a empty set of 37 | grouping columns, in which case a tibble will be returned. 38 | } 39 | \description{ 40 | Most data operations are done on groups defined by variables. 41 | \code{group_by()} takes an existing tbl and converts it into a grouped tbl 42 | where operations are performed "by group". \code{ungroup()} removes grouping. 43 | } 44 | \section{Methods}{ 45 | 46 | 47 | These function are \strong{generic}s, which means that packages can provide 48 | implementations (methods) for other classes. See the documentation of 49 | individual methods for extra arguments and differences in behaviour. 50 | 51 | Methods available in currently loaded packages: 52 | \itemize{ 53 | \item \code{group_by()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("group_by")}. 54 | \item \code{ungroup()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("ungroup")}. 55 | } 56 | 57 | } 58 | 59 | \section{Ordering}{ 60 | 61 | 62 | Currently, \code{group_by()} internally orders the groups in ascending order. This 63 | results in ordered output from functions that aggregate groups, such as 64 | \code{\link[dplyr:summarise]{summarise()}}. 65 | 66 | When used as grouping columns, character vectors are ordered in the C locale 67 | for performance and reproducibility across R sessions. If the resulting 68 | ordering of your grouped operation matters and is dependent on the locale, 69 | you should follow up the grouped operation with an explicit call to 70 | \code{\link[dplyr:arrange]{arrange()}} and set the \code{.locale} argument. For example: 71 | 72 | \if{html}{\out{
}}\preformatted{data \%>\% 73 | group_by(chr) \%>\% 74 | summarise(avg = mean(x)) \%>\% 75 | arrange(chr, .locale = "en") 76 | }\if{html}{\out{
}} 77 | 78 | This is often useful as a preliminary step before generating content intended 79 | for humans, such as an HTML table. 80 | \subsection{Legacy behavior}{ 81 | 82 | Prior to dplyr 1.1.0, character vector grouping columns were ordered in the 83 | system locale. If you need to temporarily revert to this behavior, you can 84 | set the global option \code{dplyr.legacy_locale} to \code{TRUE}, but this should be 85 | used sparingly and you should expect this option to be removed in a future 86 | version of dplyr. It is better to update existing code to explicitly call 87 | \code{arrange(.locale = )} instead. Note that setting \code{dplyr.legacy_locale} will 88 | also force calls to \code{\link[dplyr:arrange]{arrange()}} to use the system locale. 89 | } 90 | 91 | } 92 | 93 | \examples{ 94 | data(pasilla) 95 | pasilla |> group_by(.sample) 96 | 97 | } 98 | \seealso{ 99 | Other grouping functions: 100 | \code{\link[dplyr]{group_map}()}, 101 | \code{\link[dplyr]{group_nest}()}, 102 | \code{\link[dplyr]{group_split}()}, 103 | \code{\link[dplyr]{group_trim}()} 104 | } 105 | -------------------------------------------------------------------------------- /man/group_split.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dplyr_methods.R 3 | \name{group_split} 4 | \alias{group_split} 5 | \alias{group_split.SummarizedExperiment} 6 | \title{Split data frame by groups} 7 | \usage{ 8 | \method{group_split}{SummarizedExperiment}(.tbl, ..., .keep = TRUE) 9 | } 10 | \arguments{ 11 | \item{.tbl}{A tbl.} 12 | 13 | \item{...}{If \code{.tbl} is an ungrouped data frame, a grouping specification, 14 | forwarded to \code{\link[dplyr:group_by]{group_by()}}.} 15 | 16 | \item{.keep}{Should the grouping columns be kept?} 17 | } 18 | \value{ 19 | A list of tibbles. Each tibble contains the rows of \code{.tbl} for the 20 | associated group and all the columns, including the grouping variables. 21 | Note that this returns a \link[vctrs:list_of]{list_of} which is slightly 22 | stricter than a simple list but is useful for representing lists where 23 | every element has the same type. 24 | } 25 | \description{ 26 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} 27 | 28 | \code{\link[dplyr:group_split]{group_split()}} works like \code{\link[base:split]{base::split()}} but: 29 | \itemize{ 30 | \item It uses the grouping structure from \code{\link[dplyr:group_by]{group_by()}} and therefore is subject 31 | to the data mask 32 | \item It does not name the elements of the list based on the grouping as this 33 | only works well for a single character grouping variable. Instead, 34 | use \code{\link[dplyr:group_keys]{group_keys()}} to access a data frame that defines the groups. 35 | } 36 | 37 | \code{group_split()} is primarily designed to work with grouped data frames. 38 | You can pass \code{...} to group and split an ungrouped data frame, but this 39 | is generally not very useful as you want have easy access to the group 40 | metadata. 41 | } 42 | \section{Lifecycle}{ 43 | 44 | 45 | \code{group_split()} is not stable because you can achieve very similar results by 46 | manipulating the nested column returned from 47 | \code{\link[tidyr:nest]{tidyr::nest(.by =)}}. That also retains the group keys all 48 | within a single data structure. \code{group_split()} may be deprecated in the 49 | future. 50 | 51 | } 52 | 53 | \examples{ 54 | data(pasilla, package = "tidySummarizedExperiment") 55 | pasilla |> group_split(condition) 56 | pasilla |> group_split(counts > 0) 57 | pasilla |> group_split(condition, counts > 0) 58 | 59 | } 60 | \seealso{ 61 | Other grouping functions: 62 | \code{\link[dplyr]{group_by}()}, 63 | \code{\link[dplyr]{group_map}()}, 64 | \code{\link[dplyr]{group_nest}()}, 65 | \code{\link[dplyr]{group_trim}()} 66 | } 67 | -------------------------------------------------------------------------------- /man/inner_join.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dplyr_methods.R 3 | \name{inner_join} 4 | \alias{inner_join} 5 | \alias{inner_join.SummarizedExperiment} 6 | \title{Mutating joins} 7 | \usage{ 8 | \method{inner_join}{SummarizedExperiment}(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ...) 9 | } 10 | \arguments{ 11 | \item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or 12 | lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for 13 | more details.} 14 | 15 | \item{by}{A join specification created with \code{\link[dplyr:join_by]{join_by()}}, or a character 16 | vector of variables to join by. 17 | 18 | If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all 19 | variables in common across \code{x} and \code{y}. A message lists the variables so 20 | that you can check they're correct; suppress the message by supplying \code{by} 21 | explicitly. 22 | 23 | To join on different variables between \code{x} and \code{y}, use a \code{\link[dplyr:join_by]{join_by()}} 24 | specification. For example, \code{join_by(a == b)} will match \code{x$a} to \code{y$b}. 25 | 26 | To join by multiple variables, use a \code{\link[dplyr:join_by]{join_by()}} specification with 27 | multiple expressions. For example, \code{join_by(a == b, c == d)} will match 28 | \code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. If the column names are the same between 29 | \code{x} and \code{y}, you can shorten this by listing only the variable names, like 30 | \code{join_by(a, c)}. 31 | 32 | \code{\link[dplyr:join_by]{join_by()}} can also be used to perform inequality, rolling, and overlap 33 | joins. See the documentation at \link[dplyr:join_by]{?join_by} for details on 34 | these types of joins. 35 | 36 | For simple equality joins, you can alternatively specify a character vector 37 | of variable names to join by. For example, \code{by = c("a", "b")} joins \code{x$a} 38 | to \code{y$a} and \code{x$b} to \code{y$b}. If variable names differ between \code{x} and \code{y}, 39 | use a named character vector like \code{by = c("x_a" = "y_a", "x_b" = "y_b")}. 40 | 41 | To perform a cross-join, generating all combinations of \code{x} and \code{y}, see 42 | \code{\link[dplyr:cross_join]{cross_join()}}.} 43 | 44 | \item{copy}{If \code{x} and \code{y} are not from the same data source, 45 | and \code{copy} is \code{TRUE}, then \code{y} will be copied into the 46 | same src as \code{x}. This allows you to join tables across srcs, but 47 | it is a potentially expensive operation so you must opt into it.} 48 | 49 | \item{suffix}{If there are non-joined duplicate variables in \code{x} and 50 | \code{y}, these suffixes will be added to the output to disambiguate them. 51 | Should be a character vector of length 2.} 52 | 53 | \item{...}{Other parameters passed onto methods.} 54 | } 55 | \value{ 56 | An object of the same type as \code{x} (including the same groups). The order of 57 | the rows and columns of \code{x} is preserved as much as possible. The output has 58 | the following properties: 59 | \itemize{ 60 | \item The rows are affect by the join type. 61 | \itemize{ 62 | \item \code{inner_join()} returns matched \code{x} rows. 63 | \item \code{left_join()} returns all \code{x} rows. 64 | \item \code{right_join()} returns matched of \code{x} rows, followed by unmatched \code{y} rows. 65 | \item \code{full_join()} returns all \code{x} rows, followed by unmatched \code{y} rows. 66 | } 67 | \item Output columns include all columns from \code{x} and all non-key columns from 68 | \code{y}. If \code{keep = TRUE}, the key columns from \code{y} are included as well. 69 | \item If non-key columns in \code{x} and \code{y} have the same name, \code{suffix}es are added 70 | to disambiguate. If \code{keep = TRUE} and key columns in \code{x} and \code{y} have 71 | the same name, \code{suffix}es are added to disambiguate these as well. 72 | \item If \code{keep = FALSE}, output columns included in \code{by} are coerced to their 73 | common type between \code{x} and \code{y}. 74 | } 75 | } 76 | \description{ 77 | Mutating joins add columns from \code{y} to \code{x}, matching observations based on 78 | the keys. There are four mutating joins: the inner join, and the three outer 79 | joins. 80 | \subsection{Inner join}{ 81 | 82 | An \code{inner_join()} only keeps observations from \code{x} that have a matching key 83 | in \code{y}. 84 | 85 | The most important property of an inner join is that unmatched rows in either 86 | input are not included in the result. This means that generally inner joins 87 | are not appropriate in most analyses, because it is too easy to lose 88 | observations. 89 | } 90 | 91 | \subsection{Outer joins}{ 92 | 93 | The three outer joins keep observations that appear in at least one of the 94 | data frames: 95 | \itemize{ 96 | \item A \code{left_join()} keeps all observations in \code{x}. 97 | \item A \code{right_join()} keeps all observations in \code{y}. 98 | \item A \code{full_join()} keeps all observations in \code{x} and \code{y}. 99 | } 100 | } 101 | } 102 | \section{Many-to-many relationships}{ 103 | 104 | 105 | 106 | By default, dplyr guards against many-to-many relationships in equality joins 107 | by throwing a warning. These occur when both of the following are true: 108 | \itemize{ 109 | \item A row in \code{x} matches multiple rows in \code{y}. 110 | \item A row in \code{y} matches multiple rows in \code{x}. 111 | } 112 | 113 | This is typically surprising, as most joins involve a relationship of 114 | one-to-one, one-to-many, or many-to-one, and is often the result of an 115 | improperly specified join. Many-to-many relationships are particularly 116 | problematic because they can result in a Cartesian explosion of the number of 117 | rows returned from the join. 118 | 119 | If a many-to-many relationship is expected, silence this warning by 120 | explicitly setting \code{relationship = "many-to-many"}. 121 | 122 | In production code, it is best to preemptively set \code{relationship} to whatever 123 | relationship you expect to exist between the keys of \code{x} and \code{y}, as this 124 | forces an error to occur immediately if the data doesn't align with your 125 | expectations. 126 | 127 | Inequality joins typically result in many-to-many relationships by nature, so 128 | they don't warn on them by default, but you should still take extra care when 129 | specifying an inequality join, because they also have the capability to 130 | return a large number of rows. 131 | 132 | Rolling joins don't warn on many-to-many relationships either, but many 133 | rolling joins follow a many-to-one relationship, so it is often useful to 134 | set \code{relationship = "many-to-one"} to enforce this. 135 | 136 | Note that in SQL, most database providers won't let you specify a 137 | many-to-many relationship between two tables, instead requiring that you 138 | create a third \emph{junction table} that results in two one-to-many relationships 139 | instead. 140 | 141 | } 142 | 143 | \section{Methods}{ 144 | 145 | 146 | These functions are \strong{generic}s, which means that packages can provide 147 | implementations (methods) for other classes. See the documentation of 148 | individual methods for extra arguments and differences in behaviour. 149 | 150 | Methods available in currently loaded packages: 151 | \itemize{ 152 | \item \code{inner_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("inner_join")}. 153 | \item \code{left_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("left_join")}. 154 | \item \code{right_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("right_join")}. 155 | \item \code{full_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("full_join")}. 156 | } 157 | 158 | } 159 | 160 | \examples{ 161 | data(pasilla) 162 | 163 | tt <- pasilla 164 | tt |> inner_join(tt |> 165 | distinct(condition) |> 166 | mutate(new_column=1:2) |> 167 | slice(1)) 168 | 169 | } 170 | \seealso{ 171 | Other joins: 172 | \code{\link[dplyr]{cross_join}()}, 173 | \code{\link[dplyr]{filter-joins}}, 174 | \code{\link[dplyr]{nest_join}()} 175 | } 176 | -------------------------------------------------------------------------------- /man/left_join.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dplyr_methods.R 3 | \name{left_join} 4 | \alias{left_join} 5 | \alias{left_join.SummarizedExperiment} 6 | \title{Mutating joins} 7 | \usage{ 8 | \method{left_join}{SummarizedExperiment}(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ...) 9 | } 10 | \arguments{ 11 | \item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or 12 | lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for 13 | more details.} 14 | 15 | \item{by}{A join specification created with \code{\link[dplyr:join_by]{join_by()}}, or a character 16 | vector of variables to join by. 17 | 18 | If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all 19 | variables in common across \code{x} and \code{y}. A message lists the variables so 20 | that you can check they're correct; suppress the message by supplying \code{by} 21 | explicitly. 22 | 23 | To join on different variables between \code{x} and \code{y}, use a \code{\link[dplyr:join_by]{join_by()}} 24 | specification. For example, \code{join_by(a == b)} will match \code{x$a} to \code{y$b}. 25 | 26 | To join by multiple variables, use a \code{\link[dplyr:join_by]{join_by()}} specification with 27 | multiple expressions. For example, \code{join_by(a == b, c == d)} will match 28 | \code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. If the column names are the same between 29 | \code{x} and \code{y}, you can shorten this by listing only the variable names, like 30 | \code{join_by(a, c)}. 31 | 32 | \code{\link[dplyr:join_by]{join_by()}} can also be used to perform inequality, rolling, and overlap 33 | joins. See the documentation at \link[dplyr:join_by]{?join_by} for details on 34 | these types of joins. 35 | 36 | For simple equality joins, you can alternatively specify a character vector 37 | of variable names to join by. For example, \code{by = c("a", "b")} joins \code{x$a} 38 | to \code{y$a} and \code{x$b} to \code{y$b}. If variable names differ between \code{x} and \code{y}, 39 | use a named character vector like \code{by = c("x_a" = "y_a", "x_b" = "y_b")}. 40 | 41 | To perform a cross-join, generating all combinations of \code{x} and \code{y}, see 42 | \code{\link[dplyr:cross_join]{cross_join()}}.} 43 | 44 | \item{copy}{If \code{x} and \code{y} are not from the same data source, 45 | and \code{copy} is \code{TRUE}, then \code{y} will be copied into the 46 | same src as \code{x}. This allows you to join tables across srcs, but 47 | it is a potentially expensive operation so you must opt into it.} 48 | 49 | \item{suffix}{If there are non-joined duplicate variables in \code{x} and 50 | \code{y}, these suffixes will be added to the output to disambiguate them. 51 | Should be a character vector of length 2.} 52 | 53 | \item{...}{Other parameters passed onto methods.} 54 | } 55 | \value{ 56 | An object of the same type as \code{x} (including the same groups). The order of 57 | the rows and columns of \code{x} is preserved as much as possible. The output has 58 | the following properties: 59 | \itemize{ 60 | \item The rows are affect by the join type. 61 | \itemize{ 62 | \item \code{inner_join()} returns matched \code{x} rows. 63 | \item \code{left_join()} returns all \code{x} rows. 64 | \item \code{right_join()} returns matched of \code{x} rows, followed by unmatched \code{y} rows. 65 | \item \code{full_join()} returns all \code{x} rows, followed by unmatched \code{y} rows. 66 | } 67 | \item Output columns include all columns from \code{x} and all non-key columns from 68 | \code{y}. If \code{keep = TRUE}, the key columns from \code{y} are included as well. 69 | \item If non-key columns in \code{x} and \code{y} have the same name, \code{suffix}es are added 70 | to disambiguate. If \code{keep = TRUE} and key columns in \code{x} and \code{y} have 71 | the same name, \code{suffix}es are added to disambiguate these as well. 72 | \item If \code{keep = FALSE}, output columns included in \code{by} are coerced to their 73 | common type between \code{x} and \code{y}. 74 | } 75 | } 76 | \description{ 77 | Mutating joins add columns from \code{y} to \code{x}, matching observations based on 78 | the keys. There are four mutating joins: the inner join, and the three outer 79 | joins. 80 | \subsection{Inner join}{ 81 | 82 | An \code{inner_join()} only keeps observations from \code{x} that have a matching key 83 | in \code{y}. 84 | 85 | The most important property of an inner join is that unmatched rows in either 86 | input are not included in the result. This means that generally inner joins 87 | are not appropriate in most analyses, because it is too easy to lose 88 | observations. 89 | } 90 | 91 | \subsection{Outer joins}{ 92 | 93 | The three outer joins keep observations that appear in at least one of the 94 | data frames: 95 | \itemize{ 96 | \item A \code{left_join()} keeps all observations in \code{x}. 97 | \item A \code{right_join()} keeps all observations in \code{y}. 98 | \item A \code{full_join()} keeps all observations in \code{x} and \code{y}. 99 | } 100 | } 101 | } 102 | \section{Many-to-many relationships}{ 103 | 104 | 105 | 106 | By default, dplyr guards against many-to-many relationships in equality joins 107 | by throwing a warning. These occur when both of the following are true: 108 | \itemize{ 109 | \item A row in \code{x} matches multiple rows in \code{y}. 110 | \item A row in \code{y} matches multiple rows in \code{x}. 111 | } 112 | 113 | This is typically surprising, as most joins involve a relationship of 114 | one-to-one, one-to-many, or many-to-one, and is often the result of an 115 | improperly specified join. Many-to-many relationships are particularly 116 | problematic because they can result in a Cartesian explosion of the number of 117 | rows returned from the join. 118 | 119 | If a many-to-many relationship is expected, silence this warning by 120 | explicitly setting \code{relationship = "many-to-many"}. 121 | 122 | In production code, it is best to preemptively set \code{relationship} to whatever 123 | relationship you expect to exist between the keys of \code{x} and \code{y}, as this 124 | forces an error to occur immediately if the data doesn't align with your 125 | expectations. 126 | 127 | Inequality joins typically result in many-to-many relationships by nature, so 128 | they don't warn on them by default, but you should still take extra care when 129 | specifying an inequality join, because they also have the capability to 130 | return a large number of rows. 131 | 132 | Rolling joins don't warn on many-to-many relationships either, but many 133 | rolling joins follow a many-to-one relationship, so it is often useful to 134 | set \code{relationship = "many-to-one"} to enforce this. 135 | 136 | Note that in SQL, most database providers won't let you specify a 137 | many-to-many relationship between two tables, instead requiring that you 138 | create a third \emph{junction table} that results in two one-to-many relationships 139 | instead. 140 | 141 | } 142 | 143 | \section{Methods}{ 144 | 145 | 146 | These functions are \strong{generic}s, which means that packages can provide 147 | implementations (methods) for other classes. See the documentation of 148 | individual methods for extra arguments and differences in behaviour. 149 | 150 | Methods available in currently loaded packages: 151 | \itemize{ 152 | \item \code{inner_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("inner_join")}. 153 | \item \code{left_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("left_join")}. 154 | \item \code{right_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("right_join")}. 155 | \item \code{full_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("full_join")}. 156 | } 157 | 158 | } 159 | 160 | \examples{ 161 | data(pasilla) 162 | 163 | tt <- pasilla 164 | tt |> left_join(tt |> 165 | distinct(condition) |> 166 | mutate(new_column=1:2)) 167 | 168 | } 169 | \seealso{ 170 | Other joins: 171 | \code{\link[dplyr]{cross_join}()}, 172 | \code{\link[dplyr]{filter-joins}}, 173 | \code{\link[dplyr]{nest_join}()} 174 | } 175 | -------------------------------------------------------------------------------- /man/mutate.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dplyr_methods.R 3 | \name{mutate} 4 | \alias{mutate} 5 | \alias{mutate.SummarizedExperiment} 6 | \title{Create, modify, and delete columns} 7 | \usage{ 8 | \method{mutate}{SummarizedExperiment}(.data, ...) 9 | } 10 | \arguments{ 11 | \item{.data}{A data frame, data frame extension (e.g. a tibble), or a 12 | lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for 13 | more details.} 14 | 15 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Name-value pairs. 16 | The name gives the name of the column in the output. 17 | 18 | The value can be: 19 | \itemize{ 20 | \item A vector of length 1, which will be recycled to the correct length. 21 | \item A vector the same length as the current group (or the whole data frame 22 | if ungrouped). 23 | \item \code{NULL}, to remove the column. 24 | \item A data frame or tibble, to create multiple columns in the output. 25 | }} 26 | } 27 | \value{ 28 | An object of the same type as \code{.data}. The output has the following 29 | properties: 30 | \itemize{ 31 | \item Columns from \code{.data} will be preserved according to the \code{.keep} argument. 32 | \item Existing columns that are modified by \code{...} will always be returned in 33 | their original location. 34 | \item New columns created through \code{...} will be placed according to the 35 | \code{.before} and \code{.after} arguments. 36 | \item The number of rows is not affected. 37 | \item Columns given the value \code{NULL} will be removed. 38 | \item Groups will be recomputed if a grouping variable is mutated. 39 | \item Data frame attributes are preserved. 40 | } 41 | } 42 | \description{ 43 | \code{mutate()} creates new columns that are functions of existing variables. 44 | It can also modify (if the name is the same as an existing 45 | column) and delete columns (by setting their value to \code{NULL}). 46 | } 47 | \section{Useful mutate functions}{ 48 | 49 | 50 | \itemize{ 51 | \item \code{\link{+}}, \code{\link{-}}, \code{\link[=log]{log()}}, etc., for their usual mathematical meanings 52 | \item \code{\link[dplyr:lead]{lead()}}, \code{\link[dplyr:lag]{lag()}} 53 | \item \code{\link[dplyr:dense_rank]{dense_rank()}}, \code{\link[dplyr:min_rank]{min_rank()}}, \code{\link[dplyr:percent_rank]{percent_rank()}}, \code{\link[dplyr:row_number]{row_number()}}, 54 | \code{\link[dplyr:cume_dist]{cume_dist()}}, \code{\link[dplyr:ntile]{ntile()}} 55 | \item \code{\link[=cumsum]{cumsum()}}, \code{\link[dplyr:cummean]{cummean()}}, \code{\link[=cummin]{cummin()}}, \code{\link[=cummax]{cummax()}}, \code{\link[dplyr:cumany]{cumany()}}, \code{\link[dplyr:cumall]{cumall()}} 56 | \item \code{\link[dplyr:na_if]{na_if()}}, \code{\link[dplyr:coalesce]{coalesce()}} 57 | \item \code{\link[dplyr:if_else]{if_else()}}, \code{\link[dplyr:recode]{recode()}}, \code{\link[dplyr:case_when]{case_when()}} 58 | } 59 | 60 | } 61 | 62 | \section{Grouped tibbles}{ 63 | 64 | 65 | 66 | Because mutating expressions are computed within groups, they may 67 | yield different results on grouped tibbles. This will be the case 68 | as soon as an aggregating, lagging, or ranking function is 69 | involved. Compare this ungrouped mutate: 70 | 71 | \if{html}{\out{
}}\preformatted{starwars \%>\% 72 | select(name, mass, species) \%>\% 73 | mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) 74 | }\if{html}{\out{
}} 75 | 76 | With the grouped equivalent: 77 | 78 | \if{html}{\out{
}}\preformatted{starwars \%>\% 79 | select(name, mass, species) \%>\% 80 | group_by(species) \%>\% 81 | mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) 82 | }\if{html}{\out{
}} 83 | 84 | The former normalises \code{mass} by the global average whereas the 85 | latter normalises by the averages within species levels. 86 | 87 | } 88 | 89 | \section{Methods}{ 90 | 91 | 92 | This function is a \strong{generic}, which means that packages can provide 93 | implementations (methods) for other classes. See the documentation of 94 | individual methods for extra arguments and differences in behaviour. 95 | 96 | Methods available in currently loaded packages: 97 | \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("mutate")}. 98 | 99 | } 100 | 101 | \examples{ 102 | data(pasilla) 103 | pasilla |> mutate(logcounts=log2(counts)) 104 | 105 | } 106 | \seealso{ 107 | Other single table verbs: 108 | \code{\link{rename}()}, 109 | \code{\link{slice}()}, 110 | \code{\link{summarise}()} 111 | } 112 | \concept{single table verbs} 113 | -------------------------------------------------------------------------------- /man/mutate_features.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dplyr_methods.R 3 | \name{mutate_features} 4 | \alias{mutate_features} 5 | \title{Mutate features} 6 | \usage{ 7 | mutate_features(.data, ...) 8 | } 9 | \arguments{ 10 | \item{.data}{a SummarizedExperiment} 11 | 12 | \item{...}{extra arguments passed to dplyr::mutate} 13 | } 14 | \value{ 15 | a SummarizedExperiment with modified rowData 16 | } 17 | \description{ 18 | Allows mutate call on features (rowData) 19 | of a SummarizedExperiment 20 | } 21 | -------------------------------------------------------------------------------- /man/mutate_samples.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dplyr_methods.R 3 | \name{mutate_samples} 4 | \alias{mutate_samples} 5 | \title{Mutate samples} 6 | \usage{ 7 | mutate_samples(.data, ...) 8 | } 9 | \arguments{ 10 | \item{.data}{a SummarizedExperiment} 11 | 12 | \item{...}{extra arguments passed to dplyr::mutate} 13 | } 14 | \value{ 15 | a SummarizedExperiment with modified colData 16 | } 17 | \description{ 18 | Allows mutate call on samples (colData) 19 | of a SummarizedExperiment 20 | } 21 | -------------------------------------------------------------------------------- /man/nest.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tidyr_methods.R 3 | \name{nest} 4 | \alias{nest} 5 | \alias{nest.SummarizedExperiment} 6 | \title{Nest rows into a list-column of data frames} 7 | \usage{ 8 | \method{nest}{SummarizedExperiment}(.data, ..., .names_sep = NULL) 9 | } 10 | \arguments{ 11 | \item{.data}{A data frame.} 12 | 13 | \item{...}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> Columns to nest; these will 14 | appear in the inner data frames. 15 | 16 | Specified using name-variable pairs of the form 17 | \code{new_col = c(col1, col2, col3)}. The right hand side can be any valid 18 | tidyselect expression. 19 | 20 | If not supplied, then \code{...} is derived as all columns \emph{not} selected by 21 | \code{.by}, and will use the column name from \code{.key}. 22 | 23 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}: 24 | previously you could write \code{df \%>\% nest(x, y, z)}. 25 | Convert to \code{df \%>\% nest(data = c(x, y, z))}.} 26 | 27 | \item{.names_sep}{If \code{NULL}, the default, the inner names will come from 28 | the former outer names. If a string, the new inner names will use the 29 | outer names with \code{names_sep} automatically stripped. This makes 30 | \code{names_sep} roughly symmetric between nesting and unnesting.} 31 | } 32 | \value{ 33 | \code{tidySummarizedExperiment_nested} 34 | } 35 | \description{ 36 | Nesting creates a list-column of data frames; unnesting flattens it back out 37 | into regular columns. Nesting is implicitly a summarising operation: you 38 | get one row for each group defined by the non-nested columns. This is useful 39 | in conjunction with other summaries that work with whole datasets, most 40 | notably models. 41 | 42 | Learn more in \code{vignette("nest")}. 43 | } 44 | \details{ 45 | If neither \code{...} nor \code{.by} are supplied, \code{nest()} will nest all variables, 46 | and will use the column name supplied through \code{.key}. 47 | } 48 | \section{New syntax}{ 49 | 50 | 51 | tidyr 1.0.0 introduced a new syntax for \code{nest()} and \code{unnest()} that's 52 | designed to be more similar to other functions. Converting to the new syntax 53 | should be straightforward (guided by the message you'll receive) but if 54 | you just need to run an old analysis, you can easily revert to the previous 55 | behaviour using \code{\link[tidyr:nest_legacy]{nest_legacy()}} and \code{\link[tidyr:unnest_legacy]{unnest_legacy()}} as follows: 56 | 57 | \if{html}{\out{
}}\preformatted{library(tidyr) 58 | nest <- nest_legacy 59 | unnest <- unnest_legacy 60 | }\if{html}{\out{
}} 61 | 62 | } 63 | 64 | \section{Grouped data frames}{ 65 | 66 | 67 | \code{df \%>\% nest(data = c(x, y))} specifies the columns to be nested; i.e. the 68 | columns that will appear in the inner data frame. \code{df \%>\% nest(.by = c(x, y))} specifies the columns to nest \emph{by}; i.e. the columns that will remain in 69 | the outer data frame. An alternative way to achieve the latter is to \code{nest()} 70 | a grouped data frame created by \code{\link[dplyr:group_by]{dplyr::group_by()}}. The grouping variables 71 | remain in the outer data frame and the others are nested. The result 72 | preserves the grouping of the input. 73 | 74 | Variables supplied to \code{nest()} will override grouping variables so that 75 | \code{df \%>\% group_by(x, y) \%>\% nest(data = !z)} will be equivalent to 76 | \code{df \%>\% nest(data = !z)}. 77 | 78 | You can't supply \code{.by} with a grouped data frame, as the groups already 79 | represent what you are nesting by. 80 | 81 | } 82 | 83 | \examples{ 84 | tidySummarizedExperiment::pasilla |> 85 | nest(data=-condition) 86 | 87 | } 88 | -------------------------------------------------------------------------------- /man/pasilla.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{pasilla} 5 | \alias{pasilla} 6 | \title{Read counts of RNA-seq samples of Pasilla knock-down by Brooks et al.} 7 | \format{ 8 | containing 14599 features and 7 biological replicates. 9 | } 10 | \source{ 11 | \url{https://bioconductor.org/packages/release/data/experiment/html/pasilla.html} 12 | } 13 | \usage{ 14 | data(pasilla) 15 | } 16 | \description{ 17 | A SummarizedExperiment dataset containing 18 | the transcriptome information for Drosophila Melanogaster. 19 | } 20 | \keyword{datasets} 21 | -------------------------------------------------------------------------------- /man/pipe.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-pipe.R 3 | \name{\%>\%} 4 | \alias{\%>\%} 5 | \title{Pipe operator} 6 | \usage{ 7 | lhs \%>\% rhs 8 | } 9 | \arguments{ 10 | \item{lhs}{A value or the magrittr placeholder.} 11 | 12 | \item{rhs}{A function call using the magrittr semantics.} 13 | } 14 | \value{ 15 | The result of calling \code{rhs(lhs)}. 16 | } 17 | \description{ 18 | See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. 19 | } 20 | \examples{ 21 | 22 | library(magrittr) 23 | 1 \%>\% sum(2) 24 | } 25 | \keyword{internal} 26 | -------------------------------------------------------------------------------- /man/pivot_longer.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tidyr_methods.R 3 | \name{pivot_longer} 4 | \alias{pivot_longer} 5 | \alias{pivot_longer.SummarizedExperiment} 6 | \title{Pivot data from wide to long} 7 | \usage{ 8 | \method{pivot_longer}{SummarizedExperiment}( 9 | data, 10 | cols, 11 | ..., 12 | cols_vary = "fastest", 13 | names_to = "name", 14 | names_prefix = NULL, 15 | names_sep = NULL, 16 | names_pattern = NULL, 17 | names_ptypes = NULL, 18 | names_transform = NULL, 19 | names_repair = "check_unique", 20 | values_to = "value", 21 | values_drop_na = FALSE, 22 | values_ptypes = NULL, 23 | values_transform = NULL 24 | ) 25 | } 26 | \arguments{ 27 | \item{data}{A data frame to pivot.} 28 | 29 | \item{cols}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> Columns to pivot into 30 | longer format.} 31 | 32 | \item{...}{Additional arguments passed on to methods.} 33 | 34 | \item{cols_vary}{When pivoting \code{cols} into longer format, how should the 35 | output rows be arranged relative to their original row number? 36 | \itemize{ 37 | \item \code{"fastest"}, the default, keeps individual rows from \code{cols} close 38 | together in the output. This often produces intuitively ordered output 39 | when you have at least one key column from \code{data} that is not involved in 40 | the pivoting process. 41 | \item \code{"slowest"} keeps individual columns from \code{cols} close together in the 42 | output. This often produces intuitively ordered output when you utilize 43 | all of the columns from \code{data} in the pivoting process. 44 | }} 45 | 46 | \item{names_to}{A character vector specifying the new column or columns to 47 | create from the information stored in the column names of \code{data} specified 48 | by \code{cols}. 49 | \itemize{ 50 | \item If length 0, or if \code{NULL} is supplied, no columns will be created. 51 | \item If length 1, a single column will be created which will contain the 52 | column names specified by \code{cols}. 53 | \item If length >1, multiple columns will be created. In this case, one of 54 | \code{names_sep} or \code{names_pattern} must be supplied to specify how the 55 | column names should be split. There are also two additional character 56 | values you can take advantage of: 57 | \itemize{ 58 | \item \code{NA} will discard the corresponding component of the column name. 59 | \item \code{".value"} indicates that the corresponding component of the column 60 | name defines the name of the output column containing the cell values, 61 | overriding \code{values_to} entirely. 62 | } 63 | }} 64 | 65 | \item{names_prefix}{A regular expression used to remove matching text 66 | from the start of each variable name.} 67 | 68 | \item{names_sep, names_pattern}{If \code{names_to} contains multiple values, 69 | these arguments control how the column name is broken up. 70 | 71 | \code{names_sep} takes the same specification as \code{\link[tidyr:separate]{separate()}}, and can either 72 | be a numeric vector (specifying positions to break on), or a single string 73 | (specifying a regular expression to split on). 74 | 75 | \code{names_pattern} takes the same specification as \code{\link[tidyr:extract]{extract()}}, a regular 76 | expression containing matching groups (\verb{()}). 77 | 78 | If these arguments do not give you enough control, use 79 | \code{pivot_longer_spec()} to create a spec object and process manually as 80 | needed.} 81 | 82 | \item{names_ptypes, values_ptypes}{Optionally, a list of column name-prototype 83 | pairs. Alternatively, a single empty prototype can be supplied, which will 84 | be applied to all columns. A prototype (or ptype for short) is a 85 | zero-length vector (like \code{integer()} or \code{numeric()}) that defines the type, 86 | class, and attributes of a vector. Use these arguments if you want to 87 | confirm that the created columns are the types that you expect. Note that 88 | if you want to change (instead of confirm) the types of specific columns, 89 | you should use \code{names_transform} or \code{values_transform} instead.} 90 | 91 | \item{names_transform, values_transform}{Optionally, a list of column 92 | name-function pairs. Alternatively, a single function can be supplied, 93 | which will be applied to all columns. Use these arguments if you need to 94 | change the types of specific columns. For example, \code{names_transform = list(week = as.integer)} would convert a character variable called \code{week} 95 | to an integer. 96 | 97 | If not specified, the type of the columns generated from \code{names_to} will 98 | be character, and the type of the variables generated from \code{values_to} 99 | will be the common type of the input columns used to generate them.} 100 | 101 | \item{names_repair}{What happens if the output has invalid column names? 102 | The default, \code{"check_unique"} is to error if the columns are duplicated. 103 | Use \code{"minimal"} to allow duplicates in the output, or \code{"unique"} to 104 | de-duplicated by adding numeric suffixes. See \code{\link[vctrs:vec_as_names]{vctrs::vec_as_names()}} 105 | for more options.} 106 | 107 | \item{values_to}{A string specifying the name of the column to create 108 | from the data stored in cell values. If \code{names_to} is a character 109 | containing the special \code{.value} sentinel, this value will be ignored, 110 | and the name of the value column will be derived from part of the 111 | existing column names.} 112 | 113 | \item{values_drop_na}{If \code{TRUE}, will drop rows that contain only \code{NA}s 114 | in the \code{value_to} column. This effectively converts explicit missing values 115 | to implicit missing values, and should generally be used only when missing 116 | values in \code{data} were created by its structure.} 117 | } 118 | \value{ 119 | \code{tidySummarizedExperiment} 120 | } 121 | \description{ 122 | \code{pivot_longer()} "lengthens" data, increasing the number of rows and 123 | decreasing the number of columns. The inverse transformation is 124 | \code{\link[tidyr:pivot_wider]{pivot_wider()}} 125 | 126 | Learn more in \code{vignette("pivot")}. 127 | } 128 | \details{ 129 | \code{pivot_longer()} is an updated approach to \code{\link[tidyr:gather]{gather()}}, designed to be both 130 | simpler to use and to handle more use cases. We recommend you use 131 | \code{pivot_longer()} for new code; \code{gather()} isn't going away but is no longer 132 | under active development. 133 | } 134 | \examples{ 135 | # See vignette("pivot") for examples and explanation 136 | library(dplyr) 137 | tidySummarizedExperiment::pasilla \%>\% 138 | pivot_longer(c(condition, type), 139 | names_to="name", values_to="value") 140 | 141 | } 142 | -------------------------------------------------------------------------------- /man/pivot_wider.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tidyr_methods.R 3 | \name{pivot_wider} 4 | \alias{pivot_wider} 5 | \alias{pivot_wider.SummarizedExperiment} 6 | \title{Pivot data from long to wide} 7 | \usage{ 8 | \method{pivot_wider}{SummarizedExperiment}( 9 | data, 10 | ..., 11 | id_cols = NULL, 12 | id_expand = FALSE, 13 | names_from = name, 14 | names_prefix = "", 15 | names_sep = "_", 16 | names_glue = NULL, 17 | names_sort = FALSE, 18 | names_vary = "fastest", 19 | names_expand = FALSE, 20 | names_repair = "check_unique", 21 | values_from = value, 22 | values_fill = NULL, 23 | values_fn = NULL, 24 | unused_fn = NULL 25 | ) 26 | } 27 | \arguments{ 28 | \item{data}{A data frame to pivot.} 29 | 30 | \item{...}{Additional arguments passed on to methods.} 31 | 32 | \item{id_cols}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> A set of columns that 33 | uniquely identify each observation. Typically used when you have 34 | redundant variables, i.e. variables whose values are perfectly correlated 35 | with existing variables. 36 | 37 | Defaults to all columns in \code{data} except for the columns specified through 38 | \code{names_from} and \code{values_from}. If a tidyselect expression is supplied, it 39 | will be evaluated on \code{data} after removing the columns specified through 40 | \code{names_from} and \code{values_from}.} 41 | 42 | \item{id_expand}{Should the values in the \code{id_cols} columns be expanded by 43 | \code{\link[tidyr:expand]{expand()}} before pivoting? This results in more rows, the output will 44 | contain a complete expansion of all possible values in \code{id_cols}. Implicit 45 | factor levels that aren't represented in the data will become explicit. 46 | Additionally, the row values corresponding to the expanded \code{id_cols} will 47 | be sorted.} 48 | 49 | \item{names_from, values_from}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> A pair of 50 | arguments describing which column (or columns) to get the name of the 51 | output column (\code{names_from}), and which column (or columns) to get the 52 | cell values from (\code{values_from}). 53 | 54 | If \code{values_from} contains multiple values, the value will be added to the 55 | front of the output column.} 56 | 57 | \item{names_prefix}{String added to the start of every variable name. This is 58 | particularly useful if \code{names_from} is a numeric vector and you want to 59 | create syntactic variable names.} 60 | 61 | \item{names_sep}{If \code{names_from} or \code{values_from} contains multiple 62 | variables, this will be used to join their values together into a single 63 | string to use as a column name.} 64 | 65 | \item{names_glue}{Instead of \code{names_sep} and \code{names_prefix}, you can supply 66 | a glue specification that uses the \code{names_from} columns (and special 67 | \code{.value}) to create custom column names.} 68 | 69 | \item{names_sort}{Should the column names be sorted? If \code{FALSE}, the default, 70 | column names are ordered by first appearance.} 71 | 72 | \item{names_vary}{When \code{names_from} identifies a column (or columns) with 73 | multiple unique values, and multiple \code{values_from} columns are provided, 74 | in what order should the resulting column names be combined? 75 | \itemize{ 76 | \item \code{"fastest"} varies \code{names_from} values fastest, resulting in a column 77 | naming scheme of the form: \verb{value1_name1, value1_name2, value2_name1, value2_name2}. This is the default. 78 | \item \code{"slowest"} varies \code{names_from} values slowest, resulting in a column 79 | naming scheme of the form: \verb{value1_name1, value2_name1, value1_name2, value2_name2}. 80 | }} 81 | 82 | \item{names_expand}{Should the values in the \code{names_from} columns be expanded 83 | by \code{\link[tidyr:expand]{expand()}} before pivoting? This results in more columns, the output 84 | will contain column names corresponding to a complete expansion of all 85 | possible values in \code{names_from}. Implicit factor levels that aren't 86 | represented in the data will become explicit. Additionally, the column 87 | names will be sorted, identical to what \code{names_sort} would produce.} 88 | 89 | \item{names_repair}{What happens if the output has invalid column names? 90 | The default, \code{"check_unique"} is to error if the columns are duplicated. 91 | Use \code{"minimal"} to allow duplicates in the output, or \code{"unique"} to 92 | de-duplicated by adding numeric suffixes. See \code{\link[vctrs:vec_as_names]{vctrs::vec_as_names()}} 93 | for more options.} 94 | 95 | \item{values_fill}{Optionally, a (scalar) value that specifies what each 96 | \code{value} should be filled in with when missing. 97 | 98 | This can be a named list if you want to apply different fill values to 99 | different value columns.} 100 | 101 | \item{values_fn}{Optionally, a function applied to the value in each cell 102 | in the output. You will typically use this when the combination of 103 | \code{id_cols} and \code{names_from} columns does not uniquely identify an 104 | observation. 105 | 106 | This can be a named list if you want to apply different aggregations 107 | to different \code{values_from} columns.} 108 | 109 | \item{unused_fn}{Optionally, a function applied to summarize the values from 110 | the unused columns (i.e. columns not identified by \code{id_cols}, 111 | \code{names_from}, or \code{values_from}). 112 | 113 | The default drops all unused columns from the result. 114 | 115 | This can be a named list if you want to apply different aggregations 116 | to different unused columns. 117 | 118 | \code{id_cols} must be supplied for \code{unused_fn} to be useful, since otherwise 119 | all unspecified columns will be considered \code{id_cols}. 120 | 121 | This is similar to grouping by the \code{id_cols} then summarizing the 122 | unused columns using \code{unused_fn}.} 123 | } 124 | \value{ 125 | \code{tidySummarizedExperiment} 126 | } 127 | \description{ 128 | \code{pivot_wider()} "widens" data, increasing the number of columns and 129 | decreasing the number of rows. The inverse transformation is 130 | \code{\link[tidyr:pivot_longer]{pivot_longer()}}. 131 | 132 | Learn more in \code{vignette("pivot")}. 133 | } 134 | \details{ 135 | \code{pivot_wider()} is an updated approach to \code{\link[tidyr:spread]{spread()}}, designed to be both 136 | simpler to use and to handle more use cases. We recommend you use 137 | \code{pivot_wider()} for new code; \code{spread()} isn't going away but is no longer 138 | under active development. 139 | } 140 | \examples{ 141 | # See vignette("pivot") for examples and explanation 142 | library(dplyr) 143 | tidySummarizedExperiment::pasilla \%>\% 144 | pivot_wider(names_from=feature, values_from=counts) 145 | 146 | } 147 | \seealso{ 148 | \code{\link[tidyr:pivot_wider_spec]{pivot_wider_spec()}} to pivot "by hand" with a data frame that 149 | defines a pivoting specification. 150 | } 151 | -------------------------------------------------------------------------------- /man/plot_ly.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plotly_methods.R 3 | \name{plot_ly} 4 | \alias{plot_ly} 5 | \alias{plot_ly.tbl_df} 6 | \alias{plot_ly.SummarizedExperiment} 7 | \title{Initiate a plotly visualization} 8 | \usage{ 9 | \method{plot_ly}{tbl_df}( 10 | data = data.frame(), 11 | ..., 12 | type = NULL, 13 | name = NULL, 14 | color = NULL, 15 | colors = NULL, 16 | alpha = NULL, 17 | stroke = NULL, 18 | strokes = NULL, 19 | alpha_stroke = 1, 20 | size = NULL, 21 | sizes = c(10, 100), 22 | span = NULL, 23 | spans = c(1, 20), 24 | symbol = NULL, 25 | symbols = NULL, 26 | linetype = NULL, 27 | linetypes = NULL, 28 | split = NULL, 29 | frame = NULL, 30 | width = NULL, 31 | height = NULL, 32 | source = "A" 33 | ) 34 | 35 | \method{plot_ly}{SummarizedExperiment}( 36 | data = data.frame(), 37 | ..., 38 | type = NULL, 39 | name = NULL, 40 | color = NULL, 41 | colors = NULL, 42 | alpha = NULL, 43 | stroke = NULL, 44 | strokes = NULL, 45 | alpha_stroke = 1, 46 | size = NULL, 47 | sizes = c(10, 100), 48 | span = NULL, 49 | spans = c(1, 20), 50 | symbol = NULL, 51 | symbols = NULL, 52 | linetype = NULL, 53 | linetypes = NULL, 54 | split = NULL, 55 | frame = NULL, 56 | width = NULL, 57 | height = NULL, 58 | source = "A" 59 | ) 60 | } 61 | \arguments{ 62 | \item{data}{A data frame (optional) or \link[crosstalk:SharedData]{crosstalk::SharedData} object.} 63 | 64 | \item{...}{Arguments (i.e., attributes) passed along to the trace \code{type}. 65 | See \code{\link[plotly:schema]{schema()}} for a list of acceptable attributes for a given trace \code{type} 66 | (by going to \code{traces} -> \code{type} -> \code{attributes}). Note that attributes 67 | provided at this level may override other arguments 68 | (e.g. \code{plot_ly(x = 1:10, y = 1:10, color = I("red"), marker = list(color = "blue"))}).} 69 | 70 | \item{type}{A character string specifying the trace type (e.g. \code{"scatter"}, \code{"bar"}, \code{"box"}, etc). 71 | If specified, it \emph{always} creates a trace, otherwise} 72 | 73 | \item{name}{Values mapped to the trace's name attribute. Since a trace can 74 | only have one name, this argument acts very much like \code{split} in that it 75 | creates one trace for every unique value.} 76 | 77 | \item{color}{Values mapped to relevant 'fill-color' attribute(s) 78 | (e.g. \href{https://plotly.com/r/reference/#scatter-fillcolor}{fillcolor}, 79 | \href{https://plotly.com/r/reference/#scatter-marker-color}{marker.color}, 80 | \href{https://plotly.com/r/reference/#scatter-textfont-color}{textfont.color}, etc.). 81 | The mapping from data values to color codes may be controlled using 82 | \code{colors} and \code{alpha}, or avoided altogether via \code{\link[=I]{I()}} (e.g., \code{color = I("red")}). 83 | Any color understood by \code{\link[grDevices:col2rgb]{grDevices::col2rgb()}} may be used in this way.} 84 | 85 | \item{colors}{Either a colorbrewer2.org palette name (e.g. "YlOrRd" or "Blues"), 86 | or a vector of colors to interpolate in hexadecimal "#RRGGBB" format, 87 | or a color interpolation function like \code{colorRamp()}.} 88 | 89 | \item{alpha}{A number between 0 and 1 specifying the alpha channel applied to \code{color}. 90 | Defaults to 0.5 when mapping to \href{https://plotly.com/r/reference/#scatter-fillcolor}{fillcolor} and 1 otherwise.} 91 | 92 | \item{stroke}{Similar to \code{color}, but values are mapped to relevant 'stroke-color' attribute(s) 93 | (e.g., \href{https://plotly.com/r/reference/#scatter-marker-line-color}{marker.line.color} 94 | and \href{https://plotly.com/r/reference/#scatter-line-color}{line.color} 95 | for filled polygons). If not specified, \code{stroke} inherits from \code{color}.} 96 | 97 | \item{strokes}{Similar to \code{colors}, but controls the \code{stroke} mapping.} 98 | 99 | \item{alpha_stroke}{Similar to \code{alpha}, but applied to \code{stroke}.} 100 | 101 | \item{size}{(Numeric) values mapped to relevant 'fill-size' attribute(s) 102 | (e.g., \href{https://plotly.com/r/reference/#scatter-marker-size}{marker.size}, 103 | \href{https://plotly.com/r/reference/#scatter-textfont-size}{textfont.size}, 104 | and \href{https://plotly.com/r/reference/#scatter-error_x-width}{error_x.width}). 105 | The mapping from data values to symbols may be controlled using 106 | \code{sizes}, or avoided altogether via \code{\link[=I]{I()}} (e.g., \code{size = I(30)}).} 107 | 108 | \item{sizes}{A numeric vector of length 2 used to scale \code{size} to pixels.} 109 | 110 | \item{span}{(Numeric) values mapped to relevant 'stroke-size' attribute(s) 111 | (e.g., 112 | \href{https://plotly.com/r/reference/#scatter-marker-line-width}{marker.line.width}, 113 | \href{https://plotly.com/r/reference/#scatter-line-width}{line.width} for filled polygons, 114 | and \href{https://plotly.com/r/reference/#scatter-error_x-thickness}{error_x.thickness}) 115 | The mapping from data values to symbols may be controlled using 116 | \code{spans}, or avoided altogether via \code{\link[=I]{I()}} (e.g., \code{span = I(30)}).} 117 | 118 | \item{spans}{A numeric vector of length 2 used to scale \code{span} to pixels.} 119 | 120 | \item{symbol}{(Discrete) values mapped to \href{https://plotly.com/r/reference/#scatter-marker-symbol}{marker.symbol}. 121 | The mapping from data values to symbols may be controlled using 122 | \code{symbols}, or avoided altogether via \code{\link[=I]{I()}} (e.g., \code{symbol = I("pentagon")}). 123 | Any \link{pch} value or \href{https://plotly.com/r/reference/#scatter-marker-symbol}{symbol name} may be used in this way.} 124 | 125 | \item{symbols}{A character vector of \link{pch} values or \href{https://plotly.com/r/reference/#scatter-marker-symbol}{symbol names}.} 126 | 127 | \item{linetype}{(Discrete) values mapped to \href{https://plotly.com/r/reference/#scatter-line-dash}{line.dash}. 128 | The mapping from data values to symbols may be controlled using 129 | \code{linetypes}, or avoided altogether via \code{\link[=I]{I()}} (e.g., \code{linetype = I("dash")}). 130 | Any \code{lty} (see \link{par}) value or \href{https://plotly.com/r/reference/#scatter-line-dash}{dash name} may be used in this way.} 131 | 132 | \item{linetypes}{A character vector of \code{lty} values or \href{https://plotly.com/r/reference/#scatter-line-dash}{dash names}} 133 | 134 | \item{split}{(Discrete) values used to create multiple traces (one trace per value).} 135 | 136 | \item{frame}{(Discrete) values used to create animation frames.} 137 | 138 | \item{width}{Width in pixels (optional, defaults to automatic sizing).} 139 | 140 | \item{height}{Height in pixels (optional, defaults to automatic sizing).} 141 | 142 | \item{source}{a character string of length 1. Match the value of this string 143 | with the source argument in \code{\link[plotly:event_data]{event_data()}} to retrieve the 144 | event data corresponding to a specific plot (shiny apps can have multiple plots).} 145 | } 146 | \value{ 147 | \code{plotly} 148 | 149 | \code{plotly} 150 | } 151 | \description{ 152 | This function maps R objects to \href{https://plotly.com/javascript/}{plotly.js}, 153 | an (MIT licensed) web-based interactive charting library. It provides 154 | abstractions for doing common things (e.g. mapping data values to 155 | fill colors (via \code{color}) or creating \link[plotly]{animation}s (via \code{frame})) and sets 156 | some different defaults to make the interface feel more 'R-like' 157 | (i.e., closer to \code{\link[=plot]{plot()}} and \code{\link[ggplot2:qplot]{ggplot2::qplot()}}). 158 | } 159 | \details{ 160 | Unless \code{type} is specified, this function just initiates a plotly 161 | object with 'global' attributes that are passed onto downstream uses of 162 | \code{\link[plotly:add_trace]{add_trace()}} (or similar). A \link{formula} must always be used when 163 | referencing column name(s) in \code{data} (e.g. \code{plot_ly(mtcars, x = ~wt)}). 164 | Formulas are optional when supplying values directly, but they do 165 | help inform default axis/scale titles 166 | (e.g., \code{plot_ly(x = mtcars$wt)} vs \code{plot_ly(x = ~mtcars$wt)}) 167 | } 168 | \examples{ 169 | data(se) 170 | se |> 171 | plot_ly(x = ~counts) 172 | 173 | data(se) 174 | se |> 175 | plot_ly(x = ~counts) 176 | 177 | } 178 | \references{ 179 | \url{https://plotly-r.com/overview.html} 180 | } 181 | \seealso{ 182 | \itemize{ 183 | \item For initializing a plotly-geo object: \code{\link[plotly:plot_geo]{plot_geo()}} 184 | \item For initializing a plotly-mapbox object: \code{\link[plotly:plot_mapbox]{plot_mapbox()}} 185 | \item For translating a ggplot2 object to a plotly object: \code{\link[plotly:ggplotly]{ggplotly()}} 186 | \item For modifying any plotly object: \code{\link[plotly:layout]{layout()}}, \code{\link[plotly:add_trace]{add_trace()}}, \code{\link[plotly:style]{style()}} 187 | \item For linked brushing: \code{\link[plotly:highlight]{highlight()}} 188 | \item For arranging multiple plots: \code{\link[plotly:subplot]{subplot()}}, \code{\link[crosstalk:bscols]{crosstalk::bscols()}} 189 | \item For inspecting plotly objects: \code{\link[plotly:plotly_json]{plotly_json()}} 190 | \item For quick, accurate, and searchable plotly.js reference: \code{\link[plotly:schema]{schema()}} 191 | } 192 | } 193 | \author{ 194 | Carson Sievert 195 | } 196 | -------------------------------------------------------------------------------- /man/pull.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dplyr_methods.R 3 | \name{pull} 4 | \alias{pull} 5 | \alias{pull.SummarizedExperiment} 6 | \title{Extract a single column} 7 | \usage{ 8 | \method{pull}{SummarizedExperiment}(.data, var = -1, name = NULL, ...) 9 | } 10 | \arguments{ 11 | \item{.data}{A data frame, data frame extension (e.g. a tibble), or a 12 | lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for 13 | more details.} 14 | 15 | \item{var}{A variable specified as: 16 | \itemize{ 17 | \item a literal variable name 18 | \item a positive integer, giving the position counting from the left 19 | \item a negative integer, giving the position counting from the right. 20 | } 21 | 22 | The default returns the last column (on the assumption that's the 23 | column you've created most recently). 24 | 25 | This argument is taken by expression and supports 26 | \link[rlang:topic-inject]{quasiquotation} (you can unquote column 27 | names and column locations).} 28 | 29 | \item{name}{An optional parameter that specifies the column to be used 30 | as names for a named vector. Specified in a similar manner as \code{var}.} 31 | 32 | \item{...}{For use by methods.} 33 | } 34 | \value{ 35 | A vector the same size as \code{.data}. 36 | } 37 | \description{ 38 | \code{pull()} is similar to \code{$}. It's mostly useful because it looks a little 39 | nicer in pipes, it also works with remote data frames, and it can optionally 40 | name the output. 41 | } 42 | \section{Methods}{ 43 | 44 | 45 | This function is a \strong{generic}, which means that packages can provide 46 | implementations (methods) for other classes. See the documentation of 47 | individual methods for extra arguments and differences in behaviour. 48 | 49 | The following methods are currently available in loaded packages: 50 | \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("pull")}. 51 | 52 | } 53 | 54 | \examples{ 55 | data(pasilla) 56 | pasilla |> pull(feature) 57 | 58 | } 59 | -------------------------------------------------------------------------------- /man/rename.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dplyr_methods.R 3 | \name{rename} 4 | \alias{rename} 5 | \alias{rename.SummarizedExperiment} 6 | \title{Rename columns} 7 | \usage{ 8 | \method{rename}{SummarizedExperiment}(.data, ...) 9 | } 10 | \arguments{ 11 | \item{.data}{A data frame, data frame extension (e.g. a tibble), or a 12 | lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for 13 | more details.} 14 | 15 | \item{...}{For \code{rename()}: <\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Use 16 | \code{new_name = old_name} to rename selected variables. 17 | 18 | For \code{rename_with()}: additional arguments passed onto \code{.fn}.} 19 | } 20 | \value{ 21 | An object of the same type as \code{.data}. The output has the following 22 | properties: 23 | \itemize{ 24 | \item Rows are not affected. 25 | \item Column names are changed; column order is preserved. 26 | \item Data frame attributes are preserved. 27 | \item Groups are updated to reflect new names. 28 | } 29 | } 30 | \description{ 31 | \code{rename()} changes the names of individual variables using 32 | \code{new_name = old_name} syntax; \code{rename_with()} renames columns using a 33 | function. 34 | } 35 | \section{Methods}{ 36 | 37 | 38 | This function is a \strong{generic}, which means that packages can provide 39 | implementations (methods) for other classes. See the documentation of 40 | individual methods for extra arguments and differences in behaviour. 41 | 42 | The following methods are currently available in loaded packages: 43 | \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("rename")}. 44 | 45 | } 46 | 47 | \examples{ 48 | data(pasilla) 49 | pasilla |> rename(cond=condition) 50 | 51 | } 52 | \seealso{ 53 | Other single table verbs: 54 | \code{\link{mutate}()}, 55 | \code{\link{slice}()}, 56 | \code{\link{summarise}()} 57 | } 58 | \concept{single table verbs} 59 | -------------------------------------------------------------------------------- /man/right_join.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dplyr_methods.R 3 | \name{right_join} 4 | \alias{right_join} 5 | \alias{right_join.SummarizedExperiment} 6 | \title{Mutating joins} 7 | \usage{ 8 | \method{right_join}{SummarizedExperiment}(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ...) 9 | } 10 | \arguments{ 11 | \item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or 12 | lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for 13 | more details.} 14 | 15 | \item{by}{A join specification created with \code{\link[dplyr:join_by]{join_by()}}, or a character 16 | vector of variables to join by. 17 | 18 | If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all 19 | variables in common across \code{x} and \code{y}. A message lists the variables so 20 | that you can check they're correct; suppress the message by supplying \code{by} 21 | explicitly. 22 | 23 | To join on different variables between \code{x} and \code{y}, use a \code{\link[dplyr:join_by]{join_by()}} 24 | specification. For example, \code{join_by(a == b)} will match \code{x$a} to \code{y$b}. 25 | 26 | To join by multiple variables, use a \code{\link[dplyr:join_by]{join_by()}} specification with 27 | multiple expressions. For example, \code{join_by(a == b, c == d)} will match 28 | \code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. If the column names are the same between 29 | \code{x} and \code{y}, you can shorten this by listing only the variable names, like 30 | \code{join_by(a, c)}. 31 | 32 | \code{\link[dplyr:join_by]{join_by()}} can also be used to perform inequality, rolling, and overlap 33 | joins. See the documentation at \link[dplyr:join_by]{?join_by} for details on 34 | these types of joins. 35 | 36 | For simple equality joins, you can alternatively specify a character vector 37 | of variable names to join by. For example, \code{by = c("a", "b")} joins \code{x$a} 38 | to \code{y$a} and \code{x$b} to \code{y$b}. If variable names differ between \code{x} and \code{y}, 39 | use a named character vector like \code{by = c("x_a" = "y_a", "x_b" = "y_b")}. 40 | 41 | To perform a cross-join, generating all combinations of \code{x} and \code{y}, see 42 | \code{\link[dplyr:cross_join]{cross_join()}}.} 43 | 44 | \item{copy}{If \code{x} and \code{y} are not from the same data source, 45 | and \code{copy} is \code{TRUE}, then \code{y} will be copied into the 46 | same src as \code{x}. This allows you to join tables across srcs, but 47 | it is a potentially expensive operation so you must opt into it.} 48 | 49 | \item{suffix}{If there are non-joined duplicate variables in \code{x} and 50 | \code{y}, these suffixes will be added to the output to disambiguate them. 51 | Should be a character vector of length 2.} 52 | 53 | \item{...}{Other parameters passed onto methods.} 54 | } 55 | \value{ 56 | An object of the same type as \code{x} (including the same groups). The order of 57 | the rows and columns of \code{x} is preserved as much as possible. The output has 58 | the following properties: 59 | \itemize{ 60 | \item The rows are affect by the join type. 61 | \itemize{ 62 | \item \code{inner_join()} returns matched \code{x} rows. 63 | \item \code{left_join()} returns all \code{x} rows. 64 | \item \code{right_join()} returns matched of \code{x} rows, followed by unmatched \code{y} rows. 65 | \item \code{full_join()} returns all \code{x} rows, followed by unmatched \code{y} rows. 66 | } 67 | \item Output columns include all columns from \code{x} and all non-key columns from 68 | \code{y}. If \code{keep = TRUE}, the key columns from \code{y} are included as well. 69 | \item If non-key columns in \code{x} and \code{y} have the same name, \code{suffix}es are added 70 | to disambiguate. If \code{keep = TRUE} and key columns in \code{x} and \code{y} have 71 | the same name, \code{suffix}es are added to disambiguate these as well. 72 | \item If \code{keep = FALSE}, output columns included in \code{by} are coerced to their 73 | common type between \code{x} and \code{y}. 74 | } 75 | } 76 | \description{ 77 | Mutating joins add columns from \code{y} to \code{x}, matching observations based on 78 | the keys. There are four mutating joins: the inner join, and the three outer 79 | joins. 80 | \subsection{Inner join}{ 81 | 82 | An \code{inner_join()} only keeps observations from \code{x} that have a matching key 83 | in \code{y}. 84 | 85 | The most important property of an inner join is that unmatched rows in either 86 | input are not included in the result. This means that generally inner joins 87 | are not appropriate in most analyses, because it is too easy to lose 88 | observations. 89 | } 90 | 91 | \subsection{Outer joins}{ 92 | 93 | The three outer joins keep observations that appear in at least one of the 94 | data frames: 95 | \itemize{ 96 | \item A \code{left_join()} keeps all observations in \code{x}. 97 | \item A \code{right_join()} keeps all observations in \code{y}. 98 | \item A \code{full_join()} keeps all observations in \code{x} and \code{y}. 99 | } 100 | } 101 | } 102 | \section{Many-to-many relationships}{ 103 | 104 | 105 | 106 | By default, dplyr guards against many-to-many relationships in equality joins 107 | by throwing a warning. These occur when both of the following are true: 108 | \itemize{ 109 | \item A row in \code{x} matches multiple rows in \code{y}. 110 | \item A row in \code{y} matches multiple rows in \code{x}. 111 | } 112 | 113 | This is typically surprising, as most joins involve a relationship of 114 | one-to-one, one-to-many, or many-to-one, and is often the result of an 115 | improperly specified join. Many-to-many relationships are particularly 116 | problematic because they can result in a Cartesian explosion of the number of 117 | rows returned from the join. 118 | 119 | If a many-to-many relationship is expected, silence this warning by 120 | explicitly setting \code{relationship = "many-to-many"}. 121 | 122 | In production code, it is best to preemptively set \code{relationship} to whatever 123 | relationship you expect to exist between the keys of \code{x} and \code{y}, as this 124 | forces an error to occur immediately if the data doesn't align with your 125 | expectations. 126 | 127 | Inequality joins typically result in many-to-many relationships by nature, so 128 | they don't warn on them by default, but you should still take extra care when 129 | specifying an inequality join, because they also have the capability to 130 | return a large number of rows. 131 | 132 | Rolling joins don't warn on many-to-many relationships either, but many 133 | rolling joins follow a many-to-one relationship, so it is often useful to 134 | set \code{relationship = "many-to-one"} to enforce this. 135 | 136 | Note that in SQL, most database providers won't let you specify a 137 | many-to-many relationship between two tables, instead requiring that you 138 | create a third \emph{junction table} that results in two one-to-many relationships 139 | instead. 140 | 141 | } 142 | 143 | \section{Methods}{ 144 | 145 | 146 | These functions are \strong{generic}s, which means that packages can provide 147 | implementations (methods) for other classes. See the documentation of 148 | individual methods for extra arguments and differences in behaviour. 149 | 150 | Methods available in currently loaded packages: 151 | \itemize{ 152 | \item \code{inner_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("inner_join")}. 153 | \item \code{left_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("left_join")}. 154 | \item \code{right_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("right_join")}. 155 | \item \code{full_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("full_join")}. 156 | } 157 | 158 | } 159 | 160 | \examples{ 161 | data(pasilla) 162 | 163 | tt <- pasilla 164 | tt |> right_join(tt |> 165 | distinct(condition) |> 166 | mutate(new_column=1:2) |> 167 | slice(1)) 168 | 169 | } 170 | \seealso{ 171 | Other joins: 172 | \code{\link[dplyr]{cross_join}()}, 173 | \code{\link[dplyr]{filter-joins}}, 174 | \code{\link[dplyr]{nest_join}()} 175 | } 176 | -------------------------------------------------------------------------------- /man/rowwise.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dplyr_methods.R 3 | \name{rowwise} 4 | \alias{rowwise} 5 | \alias{rowwise.SummarizedExperiment} 6 | \title{Group input by rows} 7 | \usage{ 8 | \method{rowwise}{SummarizedExperiment}(data, ...) 9 | } 10 | \arguments{ 11 | \item{data}{Input data frame.} 12 | 13 | \item{...}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Variables to be preserved 14 | when calling \code{\link[dplyr:summarise]{summarise()}}. This is typically a set of variables whose 15 | combination uniquely identify each row. 16 | 17 | \strong{NB}: unlike \code{group_by()} you can not create new variables here but 18 | instead you can select multiple variables with (e.g.) \code{everything()}.} 19 | } 20 | \value{ 21 | A row-wise data frame with class \code{rowwise_df}. Note that a 22 | \code{rowwise_df} is implicitly grouped by row, but is not a \code{grouped_df}. 23 | } 24 | \description{ 25 | \code{rowwise()} allows you to compute on a data frame a row-at-a-time. 26 | This is most useful when a vectorised function doesn't exist. 27 | 28 | Most dplyr verbs preserve row-wise grouping. The exception is \code{\link[dplyr:summarise]{summarise()}}, 29 | which return a \link[dplyr]{grouped_df}. You can explicitly ungroup with \code{\link[dplyr:ungroup]{ungroup()}} 30 | or \code{\link[dplyr:as_tibble]{as_tibble()}}, or convert to a \link[dplyr]{grouped_df} with \code{\link[dplyr:group_by]{group_by()}}. 31 | } 32 | \section{List-columns}{ 33 | 34 | 35 | Because a rowwise has exactly one row per group it offers a small 36 | convenience for working with list-columns. Normally, \code{summarise()} and 37 | \code{mutate()} extract a groups worth of data with \code{[}. But when you index 38 | a list in this way, you get back another list. When you're working with 39 | a \code{rowwise} tibble, then dplyr will use \code{[[} instead of \code{[} to make your 40 | life a little easier. 41 | 42 | } 43 | 44 | \examples{ 45 | # TODO 46 | 47 | } 48 | \seealso{ 49 | \code{\link[dplyr:nest_by]{nest_by()}} for a convenient way of creating rowwise data frames 50 | with nested data. 51 | } 52 | -------------------------------------------------------------------------------- /man/sample_n.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dplyr_methods.R 3 | \name{sample_n} 4 | \alias{sample_n} 5 | \alias{sample_n.SummarizedExperiment} 6 | \alias{sample_frac} 7 | \alias{sample_frac.SummarizedExperiment} 8 | \title{Sample n rows from a table} 9 | \usage{ 10 | \method{sample_n}{SummarizedExperiment}(tbl, size, replace = FALSE, weight = NULL, .env = NULL, ...) 11 | 12 | \method{sample_frac}{SummarizedExperiment}(tbl, size = 1, replace = FALSE, weight = NULL, .env = NULL, ...) 13 | } 14 | \arguments{ 15 | \item{tbl}{A data.frame.} 16 | 17 | \item{size}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> 18 | For \code{sample_n()}, the number of rows to select. 19 | For \code{sample_frac()}, the fraction of rows to select. 20 | If \code{tbl} is grouped, \code{size} applies to each group.} 21 | 22 | \item{replace}{Sample with or without replacement?} 23 | 24 | \item{weight}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Sampling weights. 25 | This must evaluate to a vector of non-negative numbers the same length as 26 | the input. Weights are automatically standardised to sum to 1.} 27 | 28 | \item{.env}{DEPRECATED.} 29 | 30 | \item{...}{ignored} 31 | } 32 | \value{ 33 | \code{tidySummarizedExperiment} 34 | } 35 | \description{ 36 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} 37 | \code{sample_n()} and \code{sample_frac()} have been superseded in favour of 38 | \code{\link[dplyr:slice_sample]{slice_sample()}}. While they will not be deprecated in the near future, 39 | retirement means that we will only perform critical bug fixes, so we recommend 40 | moving to the newer alternative. 41 | 42 | These functions were superseded because we realised it was more convenient to 43 | have two mutually exclusive arguments to one function, rather than two 44 | separate functions. This also made it to clean up a few other smaller 45 | design issues with \code{sample_n()}/\code{sample_frac}: 46 | \itemize{ 47 | \item The connection to \code{slice()} was not obvious. 48 | \item The name of the first argument, \code{tbl}, is inconsistent with other 49 | single table verbs which use \code{.data}. 50 | \item The \code{size} argument uses tidy evaluation, which is surprising and 51 | undocumented. 52 | \item It was easier to remove the deprecated \code{.env} argument. 53 | \item \code{...} was in a suboptimal position. 54 | } 55 | } 56 | \examples{ 57 | data(pasilla) 58 | pasilla |> sample_n(50) 59 | pasilla |> sample_frac(0.1) 60 | 61 | } 62 | -------------------------------------------------------------------------------- /man/se.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{se} 5 | \alias{se} 6 | \title{Read counts of RNA-seq samples derived from 7 | Pasilla knock-down by Brooks et al.} 8 | \format{ 9 | containing 14599 features and 7 biological replicates. 10 | } 11 | \source{ 12 | \url{https://bioconductor.org/packages/release/data/experiment/html/pasilla.html} 13 | } 14 | \usage{ 15 | data(se) 16 | } 17 | \description{ 18 | A SummarizedExperiment dataset containing 19 | the transcriptome information for Drosophila Melanogaster. 20 | } 21 | \keyword{datasets} 22 | -------------------------------------------------------------------------------- /man/select.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dplyr_methods.R 3 | \name{select} 4 | \alias{select} 5 | \alias{select.SummarizedExperiment} 6 | \title{Keep or drop columns using their names and types} 7 | \usage{ 8 | \method{select}{SummarizedExperiment}(.data, ...) 9 | } 10 | \arguments{ 11 | \item{.data}{A data frame, data frame extension (e.g. a tibble), or a 12 | lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for 13 | more details.} 14 | 15 | \item{...}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> One or more unquoted 16 | expressions separated by commas. Variable names can be used as if they 17 | were positions in the data frame, so expressions like \code{x:y} can 18 | be used to select a range of variables.} 19 | } 20 | \value{ 21 | An object of the same type as \code{.data}. The output has the following 22 | properties: 23 | \itemize{ 24 | \item Rows are not affected. 25 | \item Output columns are a subset of input columns, potentially with a different 26 | order. Columns will be renamed if \code{new_name = old_name} form is used. 27 | \item Data frame attributes are preserved. 28 | \item Groups are maintained; you can't select off grouping variables. 29 | } 30 | } 31 | \description{ 32 | Select (and optionally rename) variables in a data frame, using a concise 33 | mini-language that makes it easy to refer to variables based on their name 34 | (e.g. \code{a:f} selects all columns from \code{a} on the left to \code{f} on the 35 | right) or type (e.g. \code{where(is.numeric)} selects all numeric columns). 36 | \subsection{Overview of selection features}{ 37 | 38 | Tidyverse selections implement a dialect of R where operators make 39 | it easy to select variables: 40 | \itemize{ 41 | \item \code{:} for selecting a range of consecutive variables. 42 | \item \code{!} for taking the complement of a set of variables. 43 | \item \code{&} and \code{|} for selecting the intersection or the union of two 44 | sets of variables. 45 | \item \code{c()} for combining selections. 46 | } 47 | 48 | In addition, you can use \strong{selection helpers}. Some helpers select specific 49 | columns: 50 | \itemize{ 51 | \item \code{\link[tidyselect:everything]{everything()}}: Matches all variables. 52 | \item \code{\link[tidyselect:everything]{last_col()}}: Select last variable, possibly with an offset. 53 | \item \code{\link[dplyr:group_cols]{group_cols()}}: Select all grouping columns. 54 | } 55 | 56 | Other helpers select variables by matching patterns in their names: 57 | \itemize{ 58 | \item \code{\link[tidyselect:starts_with]{starts_with()}}: Starts with a prefix. 59 | \item \code{\link[tidyselect:starts_with]{ends_with()}}: Ends with a suffix. 60 | \item \code{\link[tidyselect:starts_with]{contains()}}: Contains a literal string. 61 | \item \code{\link[tidyselect:starts_with]{matches()}}: Matches a regular expression. 62 | \item \code{\link[tidyselect:starts_with]{num_range()}}: Matches a numerical range like x01, x02, x03. 63 | } 64 | 65 | Or from variables stored in a character vector: 66 | \itemize{ 67 | \item \code{\link[tidyselect:all_of]{all_of()}}: Matches variable names in a character vector. All 68 | names must be present, otherwise an out-of-bounds error is 69 | thrown. 70 | \item \code{\link[tidyselect:all_of]{any_of()}}: Same as \code{all_of()}, except that no error is thrown 71 | for names that don't exist. 72 | } 73 | 74 | Or using a predicate function: 75 | \itemize{ 76 | \item \code{\link[tidyselect:where]{where()}}: Applies a function to all variables and selects those 77 | for which the function returns \code{TRUE}. 78 | } 79 | } 80 | } 81 | \section{Methods}{ 82 | 83 | 84 | This function is a \strong{generic}, which means that packages can provide 85 | implementations (methods) for other classes. See the documentation of 86 | individual methods for extra arguments and differences in behaviour. 87 | 88 | The following methods are currently available in loaded packages: 89 | \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("select")}. 90 | 91 | } 92 | 93 | \section{Examples}{ 94 | 95 | 96 | 97 | Here we show the usage for the basic selection operators. See the 98 | specific help pages to learn about helpers like \code{\link[dplyr:starts_with]{starts_with()}}. 99 | 100 | The selection language can be used in functions like 101 | \code{dplyr::select()} or \code{tidyr::pivot_longer()}. Let's first attach 102 | the tidyverse: 103 | 104 | \if{html}{\out{
}}\preformatted{library(tidyverse) 105 | 106 | # For better printing 107 | iris <- as_tibble(iris) 108 | }\if{html}{\out{
}} 109 | 110 | Select variables by name: 111 | 112 | \if{html}{\out{
}}\preformatted{starwars \%>\% select(height) 113 | #> # A tibble: 87 x 1 114 | #> height 115 | #> 116 | #> 1 172 117 | #> 2 167 118 | #> 3 96 119 | #> 4 202 120 | #> # i 83 more rows 121 | 122 | iris \%>\% pivot_longer(Sepal.Length) 123 | #> # A tibble: 150 x 6 124 | #> Sepal.Width Petal.Length Petal.Width Species name value 125 | #> 126 | #> 1 3.5 1.4 0.2 setosa Sepal.Length 5.1 127 | #> 2 3 1.4 0.2 setosa Sepal.Length 4.9 128 | #> 3 3.2 1.3 0.2 setosa Sepal.Length 4.7 129 | #> 4 3.1 1.5 0.2 setosa Sepal.Length 4.6 130 | #> # i 146 more rows 131 | }\if{html}{\out{
}} 132 | 133 | Select multiple variables by separating them with commas. Note how 134 | the order of columns is determined by the order of inputs: 135 | 136 | \if{html}{\out{
}}\preformatted{starwars \%>\% select(homeworld, height, mass) 137 | #> # A tibble: 87 x 3 138 | #> homeworld height mass 139 | #> 140 | #> 1 Tatooine 172 77 141 | #> 2 Tatooine 167 75 142 | #> 3 Naboo 96 32 143 | #> 4 Tatooine 202 136 144 | #> # i 83 more rows 145 | }\if{html}{\out{
}} 146 | 147 | Functions like \code{tidyr::pivot_longer()} don't take variables with 148 | dots. In this case use \code{c()} to select multiple variables: 149 | 150 | \if{html}{\out{
}}\preformatted{iris \%>\% pivot_longer(c(Sepal.Length, Petal.Length)) 151 | #> # A tibble: 300 x 5 152 | #> Sepal.Width Petal.Width Species name value 153 | #> 154 | #> 1 3.5 0.2 setosa Sepal.Length 5.1 155 | #> 2 3.5 0.2 setosa Petal.Length 1.4 156 | #> 3 3 0.2 setosa Sepal.Length 4.9 157 | #> 4 3 0.2 setosa Petal.Length 1.4 158 | #> # i 296 more rows 159 | }\if{html}{\out{
}} 160 | \subsection{Operators:}{ 161 | 162 | The \code{:} operator selects a range of consecutive variables: 163 | 164 | \if{html}{\out{
}}\preformatted{starwars \%>\% select(name:mass) 165 | #> # A tibble: 87 x 3 166 | #> name height mass 167 | #> 168 | #> 1 Luke Skywalker 172 77 169 | #> 2 C-3PO 167 75 170 | #> 3 R2-D2 96 32 171 | #> 4 Darth Vader 202 136 172 | #> # i 83 more rows 173 | }\if{html}{\out{
}} 174 | 175 | The \code{!} operator negates a selection: 176 | 177 | \if{html}{\out{
}}\preformatted{starwars \%>\% select(!(name:mass)) 178 | #> # A tibble: 87 x 11 179 | #> hair_color skin_color eye_color birth_year sex gender homeworld species 180 | #> 181 | #> 1 blond fair blue 19 male masculine Tatooine Human 182 | #> 2 gold yellow 112 none masculine Tatooine Droid 183 | #> 3 white, blue red 33 none masculine Naboo Droid 184 | #> 4 none white yellow 41.9 male masculine Tatooine Human 185 | #> # i 83 more rows 186 | #> # i 3 more variables: films , vehicles , starships 187 | 188 | iris \%>\% select(!c(Sepal.Length, Petal.Length)) 189 | #> # A tibble: 150 x 3 190 | #> Sepal.Width Petal.Width Species 191 | #> 192 | #> 1 3.5 0.2 setosa 193 | #> 2 3 0.2 setosa 194 | #> 3 3.2 0.2 setosa 195 | #> 4 3.1 0.2 setosa 196 | #> # i 146 more rows 197 | 198 | iris \%>\% select(!ends_with("Width")) 199 | #> # A tibble: 150 x 3 200 | #> Sepal.Length Petal.Length Species 201 | #> 202 | #> 1 5.1 1.4 setosa 203 | #> 2 4.9 1.4 setosa 204 | #> 3 4.7 1.3 setosa 205 | #> 4 4.6 1.5 setosa 206 | #> # i 146 more rows 207 | }\if{html}{\out{
}} 208 | 209 | \code{&} and \code{|} take the intersection or the union of two selections: 210 | 211 | \if{html}{\out{
}}\preformatted{iris \%>\% select(starts_with("Petal") & ends_with("Width")) 212 | #> # A tibble: 150 x 1 213 | #> Petal.Width 214 | #> 215 | #> 1 0.2 216 | #> 2 0.2 217 | #> 3 0.2 218 | #> 4 0.2 219 | #> # i 146 more rows 220 | 221 | iris \%>\% select(starts_with("Petal") | ends_with("Width")) 222 | #> # A tibble: 150 x 3 223 | #> Petal.Length Petal.Width Sepal.Width 224 | #> 225 | #> 1 1.4 0.2 3.5 226 | #> 2 1.4 0.2 3 227 | #> 3 1.3 0.2 3.2 228 | #> 4 1.5 0.2 3.1 229 | #> # i 146 more rows 230 | }\if{html}{\out{
}} 231 | 232 | To take the difference between two selections, combine the \code{&} and 233 | \code{!} operators: 234 | 235 | \if{html}{\out{
}}\preformatted{iris \%>\% select(starts_with("Petal") & !ends_with("Width")) 236 | #> # A tibble: 150 x 1 237 | #> Petal.Length 238 | #> 239 | #> 1 1.4 240 | #> 2 1.4 241 | #> 3 1.3 242 | #> 4 1.5 243 | #> # i 146 more rows 244 | }\if{html}{\out{
}} 245 | } 246 | 247 | } 248 | 249 | \examples{ 250 | data(pasilla) 251 | pasilla |> select(.sample, .feature, counts) 252 | 253 | } 254 | \seealso{ 255 | Other single table verbs: 256 | \code{\link[dplyr]{arrange}()}, 257 | \code{\link[dplyr]{filter}()}, 258 | \code{\link[dplyr]{mutate}()}, 259 | \code{\link[dplyr]{reframe}()}, 260 | \code{\link[dplyr]{rename}()}, 261 | \code{\link[dplyr]{slice}()}, 262 | \code{\link[dplyr]{summarise}()} 263 | } 264 | -------------------------------------------------------------------------------- /man/separate.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tidyr_methods.R 3 | \name{separate} 4 | \alias{separate} 5 | \alias{separate.SummarizedExperiment} 6 | \title{Separate a character column into multiple columns with a regular 7 | expression or numeric locations} 8 | \usage{ 9 | \method{separate}{SummarizedExperiment}( 10 | data, 11 | col, 12 | into, 13 | sep = "[^[:alnum:]]+", 14 | remove = TRUE, 15 | convert = FALSE, 16 | extra = "warn", 17 | fill = "warn", 18 | ... 19 | ) 20 | } 21 | \arguments{ 22 | \item{data}{A data frame.} 23 | 24 | \item{col}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> Column to expand.} 25 | 26 | \item{into}{Names of new variables to create as character vector. 27 | Use \code{NA} to omit the variable in the output.} 28 | 29 | \item{sep}{Separator between columns. 30 | 31 | If character, \code{sep} is interpreted as a regular expression. The default 32 | value is a regular expression that matches any sequence of 33 | non-alphanumeric values. 34 | 35 | If numeric, \code{sep} is interpreted as character positions to split at. Positive 36 | values start at 1 at the far-left of the string; negative value start at -1 at 37 | the far-right of the string. The length of \code{sep} should be one less than 38 | \code{into}.} 39 | 40 | \item{remove}{If \code{TRUE}, remove input column from output data frame.} 41 | 42 | \item{convert}{If \code{TRUE}, will run \code{\link[=type.convert]{type.convert()}} with 43 | \code{as.is = TRUE} on new columns. This is useful if the component 44 | columns are integer, numeric or logical. 45 | 46 | NB: this will cause string \code{"NA"}s to be converted to \code{NA}s.} 47 | 48 | \item{extra}{If \code{sep} is a character vector, this controls what 49 | happens when there are too many pieces. There are three valid options: 50 | \itemize{ 51 | \item \code{"warn"} (the default): emit a warning and drop extra values. 52 | \item \code{"drop"}: drop any extra values without a warning. 53 | \item \code{"merge"}: only splits at most \code{length(into)} times 54 | }} 55 | 56 | \item{fill}{If \code{sep} is a character vector, this controls what 57 | happens when there are not enough pieces. There are three valid options: 58 | \itemize{ 59 | \item \code{"warn"} (the default): emit a warning and fill from the right 60 | \item \code{"right"}: fill with missing values on the right 61 | \item \code{"left"}: fill with missing values on the left 62 | }} 63 | 64 | \item{...}{Additional arguments passed on to methods.} 65 | } 66 | \value{ 67 | \code{tidySummarizedExperiment} 68 | } 69 | \description{ 70 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} 71 | 72 | \code{separate()} has been superseded in favour of \code{\link[tidyr:separate_wider_position]{separate_wider_position()}} 73 | and \code{\link[tidyr:separate_wider_delim]{separate_wider_delim()}} because the two functions make the two uses 74 | more obvious, the API is more polished, and the handling of problems is 75 | better. Superseded functions will not go away, but will only receive 76 | critical bug fixes. 77 | 78 | Given either a regular expression or a vector of character positions, 79 | \code{separate()} turns a single character column into multiple columns. 80 | } 81 | \examples{ 82 | un <- tidySummarizedExperiment::pasilla |> 83 | unite("group", c(condition, type)) 84 | un |> separate(col=group, into=c("condition", "type")) 85 | 86 | } 87 | \seealso{ 88 | \code{\link[tidyr:unite]{unite()}}, the complement, \code{\link[tidyr:extract]{extract()}} which uses regular 89 | expression capturing groups. 90 | } 91 | -------------------------------------------------------------------------------- /man/slice.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dplyr_methods.R 3 | \name{slice} 4 | \alias{slice} 5 | \alias{slice.SummarizedExperiment} 6 | \alias{slice_head} 7 | \alias{slice_tail} 8 | \alias{slice_sample} 9 | \alias{slice_min} 10 | \alias{slice_max} 11 | \title{Subset rows using their positions} 12 | \usage{ 13 | \method{slice}{SummarizedExperiment}(.data, ..., .preserve = FALSE) 14 | } 15 | \arguments{ 16 | \item{.data}{A data frame, data frame extension (e.g. a tibble), or a 17 | lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for 18 | more details.} 19 | 20 | \item{...}{For \code{slice()}: <\code{\link[rlang:args_data_masking]{data-masking}}> 21 | Integer row values. 22 | 23 | Provide either positive values to keep, or negative values to drop. 24 | The values provided must be either all positive or all negative. 25 | Indices beyond the number of rows in the input are silently ignored. 26 | 27 | For \verb{slice_*()}, these arguments are passed on to methods.} 28 | 29 | \item{.preserve}{Relevant when the \code{.data} input is grouped. 30 | If \code{.preserve = FALSE} (the default), the grouping structure 31 | is recalculated based on the resulting data, otherwise the grouping is kept as is.} 32 | } 33 | \value{ 34 | An object of the same type as \code{.data}. The output has the following 35 | properties: 36 | \itemize{ 37 | \item Each row may appear 0, 1, or many times in the output. 38 | \item Columns are not modified. 39 | \item Groups are not modified. 40 | \item Data frame attributes are preserved. 41 | } 42 | } 43 | \description{ 44 | \code{slice()} lets you index rows by their (integer) locations. It allows you 45 | to select, remove, and duplicate rows. It is accompanied by a number of 46 | helpers for common use cases: 47 | \itemize{ 48 | \item \code{slice_head()} and \code{slice_tail()} select the first or last rows. 49 | \item \code{slice_sample()} randomly selects rows. 50 | \item \code{slice_min()} and \code{slice_max()} select rows with the smallest or largest 51 | values of a variable. 52 | } 53 | 54 | If \code{.data} is a \link[dplyr]{grouped_df}, the operation will be performed on each group, 55 | so that (e.g.) \code{slice_head(df, n = 5)} will select the first five rows in 56 | each group. 57 | } 58 | \details{ 59 | Slice does not work with relational databases because they have no 60 | intrinsic notion of row order. If you want to perform the equivalent 61 | operation, use \code{\link[dplyr:filter]{filter()}} and \code{\link[dplyr:row_number]{row_number()}}. 62 | } 63 | \section{Methods}{ 64 | 65 | 66 | These function are \strong{generic}s, which means that packages can provide 67 | implementations (methods) for other classes. See the documentation of 68 | individual methods for extra arguments and differences in behaviour. 69 | 70 | Methods available in currently loaded packages: 71 | \itemize{ 72 | \item \code{slice()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice")}. 73 | \item \code{slice_head()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_head")}. 74 | \item \code{slice_tail()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_tail")}. 75 | \item \code{slice_min()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_min")}. 76 | \item \code{slice_max()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_max")}. 77 | \item \code{slice_sample()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_sample")}. 78 | } 79 | 80 | } 81 | 82 | \examples{ 83 | data(pasilla) 84 | pasilla |> slice(1) 85 | 86 | } 87 | \seealso{ 88 | Other single table verbs: 89 | \code{\link{mutate}()}, 90 | \code{\link{rename}()}, 91 | \code{\link{summarise}()} 92 | } 93 | \concept{single table verbs} 94 | -------------------------------------------------------------------------------- /man/summarise.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dplyr_methods.R 3 | \name{summarise} 4 | \alias{summarise} 5 | \alias{summarise.SummarizedExperiment} 6 | \alias{summarize} 7 | \alias{summarize.SummarizedExperiment} 8 | \title{Summarise each group down to one row} 9 | \usage{ 10 | \method{summarise}{SummarizedExperiment}(.data, ...) 11 | 12 | \method{summarize}{SummarizedExperiment}(.data, ...) 13 | } 14 | \arguments{ 15 | \item{.data}{A data frame, data frame extension (e.g. a tibble), or a 16 | lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for 17 | more details.} 18 | 19 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Name-value pairs of 20 | summary functions. The name will be the name of the variable in the result. 21 | 22 | The value can be: 23 | \itemize{ 24 | \item A vector of length 1, e.g. \code{min(x)}, \code{n()}, or \code{sum(is.na(y))}. 25 | \item A data frame, to add multiple columns from a single expression. 26 | } 27 | 28 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Returning values with size 0 or >1 was 29 | deprecated as of 1.1.0. Please use \code{\link[dplyr:reframe]{reframe()}} for this instead.} 30 | } 31 | \value{ 32 | An object \emph{usually} of the same type as \code{.data}. 33 | \itemize{ 34 | \item The rows come from the underlying \code{\link[dplyr:group_keys]{group_keys()}}. 35 | \item The columns are a combination of the grouping keys and the summary 36 | expressions that you provide. 37 | \item The grouping structure is controlled by the \verb{.groups=} argument, the 38 | output may be another \link[dplyr]{grouped_df}, a \link[dplyr]{tibble} or a \link[dplyr]{rowwise} data frame. 39 | \item Data frame attributes are \strong{not} preserved, because \code{summarise()} 40 | fundamentally creates a new data frame. 41 | } 42 | } 43 | \description{ 44 | \code{summarise()} creates a new data frame. It returns one row for each 45 | combination of grouping variables; if there are no grouping variables, the 46 | output will have a single row summarising all observations in the input. It 47 | will contain one column for each grouping variable and one column for each of 48 | the summary statistics that you have specified. 49 | 50 | \code{summarise()} and \code{summarize()} are synonyms. 51 | } 52 | \section{Useful functions}{ 53 | 54 | 55 | \itemize{ 56 | \item Center: \code{\link[=mean]{mean()}}, \code{\link[=median]{median()}} 57 | \item Spread: \code{\link[=sd]{sd()}}, \code{\link[=IQR]{IQR()}}, \code{\link[=mad]{mad()}} 58 | \item Range: \code{\link[=min]{min()}}, \code{\link[=max]{max()}}, 59 | \item Position: \code{\link[dplyr:first]{first()}}, \code{\link[dplyr:last]{last()}}, \code{\link[dplyr:nth]{nth()}}, 60 | \item Count: \code{\link[dplyr:n]{n()}}, \code{\link[dplyr:n_distinct]{n_distinct()}} 61 | \item Logical: \code{\link[=any]{any()}}, \code{\link[=all]{all()}} 62 | } 63 | 64 | } 65 | 66 | \section{Backend variations}{ 67 | 68 | 69 | 70 | The data frame backend supports creating a variable and using it in the 71 | same summary. This means that previously created summary variables can be 72 | further transformed or combined within the summary, as in \code{\link[dplyr:mutate]{mutate()}}. 73 | However, it also means that summary variables with the same names as previous 74 | variables overwrite them, making those variables unavailable to later summary 75 | variables. 76 | 77 | This behaviour may not be supported in other backends. To avoid unexpected 78 | results, consider using new names for your summary variables, especially when 79 | creating multiple summaries. 80 | 81 | } 82 | 83 | \section{Methods}{ 84 | 85 | 86 | This function is a \strong{generic}, which means that packages can provide 87 | implementations (methods) for other classes. See the documentation of 88 | individual methods for extra arguments and differences in behaviour. 89 | 90 | The following methods are currently available in loaded packages: 91 | \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("summarise")}. 92 | 93 | } 94 | 95 | \examples{ 96 | data(pasilla) 97 | pasilla |> summarise(mean(counts)) 98 | 99 | } 100 | \seealso{ 101 | Other single table verbs: 102 | \code{\link{mutate}()}, 103 | \code{\link{rename}()}, 104 | \code{\link{slice}()} 105 | } 106 | \concept{single table verbs} 107 | -------------------------------------------------------------------------------- /man/tbl_format_header.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/print_method.R 3 | \name{tbl_format_header} 4 | \alias{tbl_format_header} 5 | \alias{tbl_format_header.tidySummarizedExperiment} 6 | \title{Format the header of a tibble} 7 | \usage{ 8 | \method{tbl_format_header}{tidySummarizedExperiment}(x, setup, ...) 9 | } 10 | \arguments{ 11 | \item{x}{A tibble-like object.} 12 | 13 | \item{setup}{A setup object returned from \code{\link[pillar:tbl_format_setup]{tbl_format_setup()}}.} 14 | 15 | \item{...}{These dots are for future extensions and must be empty.} 16 | } 17 | \value{ 18 | A character vector. 19 | } 20 | \description{ 21 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} 22 | 23 | For easier customization, the formatting of a tibble is split 24 | into three components: header, body, and footer. 25 | The \code{tbl_format_header()} method is responsible for formatting the header 26 | of a tibble. 27 | 28 | Override this method if you need to change the appearance 29 | of the entire header. 30 | If you only need to change or extend the components shown in the header, 31 | override or extend \code{\link[pillar:tbl_sum]{tbl_sum()}} for your class which is called by the 32 | default method. 33 | } 34 | \examples{ 35 | # TODO 36 | 37 | } 38 | -------------------------------------------------------------------------------- /man/tidy.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/methods.R 3 | \name{tidy} 4 | \alias{tidy} 5 | \alias{tidy.SummarizedExperiment} 6 | \alias{tidy.RangedSummarizedExperiment} 7 | \title{tidy for \code{Seurat}} 8 | \usage{ 9 | tidy(object) 10 | 11 | \method{tidy}{SummarizedExperiment}(object) 12 | 13 | \method{tidy}{RangedSummarizedExperiment}(object) 14 | } 15 | \arguments{ 16 | \item{object}{A SummarizedExperiment object} 17 | } 18 | \value{ 19 | A \code{tidyseurat} object. 20 | } 21 | \description{ 22 | tidy for \code{Seurat} 23 | } 24 | \examples{ 25 | data(pasilla) 26 | pasilla \%>\% tidy() 27 | 28 | } 29 | -------------------------------------------------------------------------------- /man/unite.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tidyr_methods.R 3 | \name{unite} 4 | \alias{unite} 5 | \alias{unite.SummarizedExperiment} 6 | \title{Unite multiple columns into one by pasting strings together} 7 | \usage{ 8 | \method{unite}{SummarizedExperiment}(data, col, ..., sep = "_", remove = TRUE, na.rm = FALSE) 9 | } 10 | \arguments{ 11 | \item{data}{A data frame.} 12 | 13 | \item{col}{The name of the new column, as a string or symbol. 14 | 15 | This argument is passed by expression and supports 16 | \link[rlang:topic-inject]{quasiquotation} (you can unquote strings 17 | and symbols). The name is captured from the expression with 18 | \code{\link[rlang:defusing-advanced]{rlang::ensym()}} (note that this kind of interface where 19 | symbols do not represent actual objects is now discouraged in the 20 | tidyverse; we support it here for backward compatibility).} 21 | 22 | \item{...}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> Columns to unite} 23 | 24 | \item{sep}{Separator to use between values.} 25 | 26 | \item{remove}{If \code{TRUE}, remove input columns from output data frame.} 27 | 28 | \item{na.rm}{If \code{TRUE}, missing values will be removed prior to uniting 29 | each value.} 30 | } 31 | \value{ 32 | \code{tidySummarizedExperiment} 33 | } 34 | \description{ 35 | Convenience function to paste together multiple columns into one. 36 | } 37 | \examples{ 38 | tidySummarizedExperiment::pasilla |> 39 | unite("group", c(condition, type)) 40 | 41 | } 42 | \seealso{ 43 | \code{\link[tidyr:separate]{separate()}}, the complement. 44 | } 45 | -------------------------------------------------------------------------------- /man/unnest.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tidyr_methods.R 3 | \name{unnest} 4 | \alias{unnest} 5 | \alias{unnest.tidySummarizedExperiment_nested} 6 | \alias{unnest_summarized_experiment} 7 | \title{Unnest a list-column of data frames into rows and columns} 8 | \usage{ 9 | \method{unnest}{tidySummarizedExperiment_nested}( 10 | data, 11 | cols, 12 | ..., 13 | keep_empty = FALSE, 14 | ptype = NULL, 15 | names_sep = NULL, 16 | names_repair = "check_unique", 17 | .drop, 18 | .id, 19 | .sep, 20 | .preserve 21 | ) 22 | 23 | unnest_summarized_experiment( 24 | data, 25 | cols, 26 | ..., 27 | keep_empty = FALSE, 28 | ptype = NULL, 29 | names_sep = NULL, 30 | names_repair = "check_unique", 31 | .drop, 32 | .id, 33 | .sep, 34 | .preserve 35 | ) 36 | } 37 | \arguments{ 38 | \item{data}{A data frame.} 39 | 40 | \item{cols}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> List-columns to unnest. 41 | 42 | When selecting multiple columns, values from the same row will be recycled 43 | to their common size.} 44 | 45 | \item{...}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}: 46 | previously you could write \code{df \%>\% unnest(x, y, z)}. 47 | Convert to \code{df \%>\% unnest(c(x, y, z))}. If you previously created a new 48 | variable in \code{unnest()} you'll now need to do it explicitly with \code{mutate()}. 49 | Convert \code{df \%>\% unnest(y = fun(x, y, z))} 50 | to \code{df \%>\% mutate(y = fun(x, y, z)) \%>\% unnest(y)}.} 51 | 52 | \item{keep_empty}{By default, you get one row of output for each element 53 | of the list that you are unchopping/unnesting. This means that if there's a 54 | size-0 element (like \code{NULL} or an empty data frame or vector), then that 55 | entire row will be dropped from the output. If you want to preserve all 56 | rows, use \code{keep_empty = TRUE} to replace size-0 elements with a single row 57 | of missing values.} 58 | 59 | \item{ptype}{Optionally, a named list of column name-prototype pairs to 60 | coerce \code{cols} to, overriding the default that will be guessed from 61 | combining the individual values. Alternatively, a single empty ptype 62 | can be supplied, which will be applied to all \code{cols}.} 63 | 64 | \item{names_sep}{If \code{NULL}, the default, the outer names will come from the 65 | inner names. If a string, the outer names will be formed by pasting 66 | together the outer and the inner column names, separated by \code{names_sep}.} 67 | 68 | \item{names_repair}{Used to check that output data frame has valid 69 | names. Must be one of the following options: 70 | \itemize{ 71 | \item \verb{"minimal}": no name repair or checks, beyond basic existence, 72 | \item \verb{"unique}": make sure names are unique and not empty, 73 | \item \verb{"check_unique}": (the default), no name repair, but check they are unique, 74 | \item \verb{"universal}": make the names unique and syntactic 75 | \item a function: apply custom name repair. 76 | \item \link[tidyr]{tidyr_legacy}: use the name repair from tidyr 0.8. 77 | \item a formula: a purrr-style anonymous function (see \code{\link[rlang:as_function]{rlang::as_function()}}) 78 | } 79 | 80 | See \code{\link[vctrs:vec_as_names]{vctrs::vec_as_names()}} for more details on these terms and the 81 | strategies used to enforce them.} 82 | 83 | \item{.drop, .preserve}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}: 84 | all list-columns are now preserved; If there are any that you 85 | don't want in the output use \code{select()} to remove them prior to 86 | unnesting.} 87 | 88 | \item{.id}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}: 89 | convert \code{df \%>\% unnest(x, .id = "id")} to \verb{df \%>\% mutate(id = names(x)) \%>\% unnest(x))}.} 90 | 91 | \item{.sep}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}: 92 | use \code{names_sep} instead.} 93 | } 94 | \value{ 95 | \code{tidySummarizedExperiment} 96 | } 97 | \description{ 98 | Unnest expands a list-column containing data frames into rows and columns. 99 | } 100 | \section{New syntax}{ 101 | 102 | 103 | tidyr 1.0.0 introduced a new syntax for \code{nest()} and \code{unnest()} that's 104 | designed to be more similar to other functions. Converting to the new syntax 105 | should be straightforward (guided by the message you'll receive) but if 106 | you just need to run an old analysis, you can easily revert to the previous 107 | behaviour using \code{\link[tidyr:nest_legacy]{nest_legacy()}} and \code{\link[tidyr:unnest_legacy]{unnest_legacy()}} as follows: 108 | 109 | \if{html}{\out{
}}\preformatted{library(tidyr) 110 | nest <- nest_legacy 111 | unnest <- unnest_legacy 112 | }\if{html}{\out{
}} 113 | 114 | } 115 | 116 | \examples{ 117 | tidySummarizedExperiment::pasilla |> 118 | nest(data=-condition) |> 119 | unnest(data) 120 | 121 | tidySummarizedExperiment::pasilla |> 122 | nest(data=-condition) |> 123 | unnest_summarized_experiment(data) 124 | 125 | } 126 | \seealso{ 127 | Other rectangling: 128 | \code{\link[tidyr]{hoist}()}, 129 | \code{\link[tidyr]{unnest_longer}()}, 130 | \code{\link[tidyr]{unnest_wider}()} 131 | } 132 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(tidySummarizedExperiment) 3 | 4 | test_check("tidySummarizedExperiment") 5 | -------------------------------------------------------------------------------- /tests/testthat/test-dplyr_methods.R: -------------------------------------------------------------------------------- 1 | context("dplyr test") 2 | 3 | library(tidySummarizedExperiment) 4 | 5 | 6 | test_that("bind_rows", { 7 | pasilla_bind <- bind_rows(pasilla, pasilla) 8 | 9 | pasilla_bind %>% 10 | count(.sample, .feature) %>% 11 | dplyr::count(n) %>% 12 | filter(n > 1) %>% 13 | nrow() %>% 14 | expect_equal(0) 15 | }) 16 | 17 | test_that("distinct", { 18 | pasilla %>% 19 | distinct(condition) %>% 20 | ncol() %>% 21 | expect_equal(1) 22 | }) 23 | 24 | test_that("filter", { 25 | pasilla %>% 26 | filter(condition == "untreated") %>% 27 | nrow() %>% 28 | expect_equal(14599) 29 | }) 30 | 31 | test_that("group_by", { 32 | pasilla %>% 33 | group_by(condition) %>% 34 | ncol() %>% 35 | expect_equal(5) 36 | }) 37 | 38 | test_that("summarise", { 39 | pasilla %>% 40 | summarise(mean(counts)) %>% 41 | nrow() %>% 42 | expect_equal(1) 43 | }) 44 | 45 | test_that("mutate", { 46 | pasilla %>% 47 | mutate(condition = 1) %>% 48 | distinct(condition) %>% 49 | nrow() %>% 50 | expect_equal(1) 51 | }) 52 | 53 | test_that("rename", { 54 | pasilla %>% 55 | rename(groups = condition) %>% 56 | select(groups) %>% 57 | ncol() %>% 58 | expect_equal(1) 59 | }) 60 | 61 | test_that("left_join", { 62 | expect_equal( 63 | pasilla %>% 64 | left_join(pasilla %>% 65 | distinct(condition) %>% 66 | mutate(new_column = 1:2)) %>% 67 | colData() %>% 68 | ncol(), 69 | pasilla %>% 70 | colData() %>% 71 | ncol() %>% 72 | sum(1) 73 | ) 74 | }) 75 | 76 | test_that("left_join 0 samples", { 77 | 78 | pasilla[0,] %>% 79 | left_join(pasilla %>% 80 | distinct(condition) %>% 81 | mutate(new_column = 1)) |> 82 | as_tibble() |> 83 | pull(new_column) %>% 84 | unique() |> 85 | expect_equal(1) 86 | 87 | }) 88 | 89 | test_that("inner_join", { 90 | pasilla %>% inner_join(pasilla %>% 91 | distinct(condition) %>% 92 | mutate(new_column = 1:2) %>% 93 | slice(1)) %>% 94 | ncol() %>% 95 | expect_equal(4) 96 | }) 97 | 98 | test_that("right_join", { 99 | pasilla %>% right_join(pasilla %>% 100 | distinct(condition) %>% 101 | mutate(new_column = 1:2) %>% 102 | slice(1)) %>% 103 | ncol() %>% 104 | expect_equal(4) 105 | }) 106 | 107 | test_that("full_join", { 108 | pasilla %>% 109 | full_join(tibble::tibble(condition = "A", other = 1:4)) %>% nrow() %>% 110 | expect_equal(102197) 111 | }) 112 | 113 | test_that("slice", { 114 | pasilla %>% 115 | slice(1) %>% 116 | ncol() %>% 117 | expect_equal(1) 118 | }) 119 | 120 | test_that("select", { 121 | pasilla %>% 122 | select(-condition) %>% 123 | class() %>% 124 | as.character() %>% 125 | expect_equal("SummarizedExperiment") 126 | 127 | pasilla %>% 128 | select(condition) %>% 129 | class() %>% 130 | as.character() %>% 131 | .[1] %>% 132 | expect_equal("tbl_df") 133 | }) 134 | 135 | test_that("sample_n", { 136 | pasilla %>% 137 | sample_n(50) %>% 138 | nrow() %>% 139 | expect_equal(50) 140 | }) 141 | 142 | test_that("sample_frac", { 143 | pasilla %>% 144 | sample_frac(0.1) %>% 145 | nrow() %>% 146 | expect_equal(10219) 147 | }) 148 | 149 | test_that("count", { 150 | pasilla %>% 151 | count(condition) %>% 152 | nrow() %>% 153 | expect_equal(2) 154 | }) 155 | 156 | test_that("mutate counts", { 157 | 158 | se = tidySummarizedExperiment::pasilla |> mutate(counts_2 = counts) 159 | 160 | se |> 161 | pull(counts) |> 162 | expect_equal( 163 | se |> pull(counts_2) 164 | ) 165 | 166 | se = tidySummarizedExperiment::pasilla 167 | assays(se, withDimnames = FALSE)$counts_2 = assays(se)$counts[,7:1] 168 | 169 | se |> 170 | pull(counts) |> 171 | expect_equal( 172 | se |> pull(counts_2) 173 | ) 174 | 175 | se |> 176 | tidySummarizedExperiment:::check_if_assays_are_NOT_overlapped(dim = "cols") |> 177 | expect_equal(FALSE) 178 | 179 | se[,1] |> 180 | tidySummarizedExperiment:::check_if_assays_are_NOT_overlapped(dim = "cols") |> 181 | expect_equal(TRUE) 182 | 183 | }) 184 | 185 | test_that("group_split splits character columns", { 186 | data(pasilla) 187 | pasilla |> 188 | group_split(condition) |> 189 | length() |> 190 | expect_equal(2) 191 | }) 192 | 193 | test_that("group_split splits logical comparisons", { 194 | data(pasilla) 195 | pasilla |> 196 | group_split(counts > 0) |> 197 | length() |> 198 | expect_equal(2) 199 | }) 200 | 201 | test_that("group_split splits with mutliple arguments", { 202 | data(pasilla) 203 | pasilla |> 204 | group_split(condition, counts > 0) |> 205 | length() |> 206 | expect_equal(4) 207 | }) 208 | 209 | test_that("mutate features", { 210 | pasilla %>% 211 | mutate_features(new = 1:nrow(pasilla)) %>% 212 | rowData() %>% 213 | as_tibble() %>% 214 | pull(new) %>% 215 | expect_equal(1:nrow(pasilla)) 216 | }) 217 | 218 | test_that("mutate samples", { 219 | pasilla %>% 220 | mutate_samples(new = 1:ncol(pasilla)) %>% 221 | colData() %>% 222 | as_tibble() %>% 223 | pull(new) %>% 224 | expect_equal(1:ncol(pasilla)) 225 | }) 226 | -------------------------------------------------------------------------------- /tests/testthat/test-felix.R: -------------------------------------------------------------------------------- 1 | context("felix test") 2 | 3 | library(magrittr) 4 | library(tidySummarizedExperiment) 5 | 6 | # Create dataset 7 | nrows <- 200; ncols <- 6 8 | counts <- matrix(runif(nrows * ncols, 1, 1e4), nrows) 9 | rowRanges <- GRanges(rep(c("chr1", "chr2"), c(50, 150)), 10 | IRanges(floor(runif(200, 1e5, 1e6)), width=100), 11 | strand=sample(c("+", "-"), 200, TRUE), 12 | feature_id=sprintf("ID%03d", 1:200)) 13 | colData <- DataFrame(Treatment=rep(c("ChIP", "Input"), 3), 14 | row.names=LETTERS[1:6]) 15 | rse <- SummarizedExperiment(assays=SimpleList(counts=counts), 16 | rowRanges=rowRanges, colData=colData) 17 | 18 | 19 | 20 | test_that("Example 1 all columns included", { 21 | 22 | rse %>% 23 | as_tibble() %>% 24 | nrow() %>% 25 | expect_equal(1200) 26 | }) 27 | 28 | 29 | test_that("Example 2", { 30 | 31 | colData(rse)$sample <- seq_len(ncol(rse)) 32 | rowData(rse)$transcript <- seq_len(nrow(rse)) 33 | rse %>% 34 | as_tibble() %>% 35 | nrow() %>% 36 | expect_equal(1200) 37 | }) 38 | 39 | test_that("Example 3", { 40 | 41 | # rowRanges(rse) <- split(rowRanges(rse),seq_len(nrow(rse))) 42 | # 43 | # rse %>% 44 | # as_tibble() %>% 45 | # nrow() %>% 46 | # expect_equal(1200) 47 | 48 | rowData(rse)$transcript <- seq_len(nrow(rse)) 49 | 50 | rse %>% 51 | as_tibble() %>% 52 | nrow() %>% 53 | expect_equal(1200) 54 | 55 | colnames(rse) <- NULL 56 | 57 | rse %>% 58 | as_tibble() %>% 59 | select(.sample, .feature) %>% 60 | ncol() %>% 61 | expect_equal(2) 62 | }) 63 | 64 | test_that("Example 4 from tidybulk", { 65 | 66 | x = se %>% as_tibble() 67 | }) -------------------------------------------------------------------------------- /tests/testthat/test-old_vocabulary.R: -------------------------------------------------------------------------------- 1 | context("old vocabulary") 2 | 3 | library(tidySummarizedExperiment) 4 | 5 | warning_message = "the special columns including sample/feature" 6 | 7 | test_that("distinct", { 8 | pasilla %>% 9 | distinct(sample, condition) %>% 10 | expect_warning(warning_message) 11 | }) 12 | 13 | test_that("filter", { 14 | pasilla %>% 15 | filter(feature == "FBgn0000003") %>% 16 | expect_warning(warning_message) 17 | }) 18 | 19 | test_that("group_by", { 20 | pasilla %>% 21 | group_by(sample) %>% 22 | expect_warning(warning_message) 23 | }) 24 | 25 | test_that("summarise", { 26 | pasilla %>% 27 | summarise(unique(sample )) %>% 28 | expect_warning(warning_message) 29 | }) 30 | 31 | test_that("mutate", { 32 | pasilla %>% 33 | mutate(condition = sample) %>% 34 | expect_warning(warning_message) 35 | 36 | pasilla %>% 37 | mutate(sample_name=toupper(sample)) %>% 38 | select(sample, sample_name) %>% 39 | expect_warning(warning_message) 40 | }) 41 | 42 | test_that("left_join", { 43 | pasilla %>% 44 | left_join(pasilla %>% 45 | distinct(sample) %>% 46 | mutate(new_column = 1:7)) %>% 47 | expect_warning(warning_message) 48 | 49 | 50 | pasilla %>% 51 | left_join(pasilla %>% 52 | distinct(feature) %>% 53 | mutate(new_column = 1:14599 )) %>% 54 | expect_warning(warning_message) 55 | 56 | }) 57 | 58 | test_that("inner_join", { 59 | pasilla %>% 60 | inner_join(pasilla %>% 61 | distinct(sample) %>% 62 | mutate(new_column = 1:7)) %>% 63 | expect_warning(warning_message) 64 | }) 65 | 66 | test_that("right_join", { 67 | pasilla %>% 68 | right_join(pasilla %>% 69 | distinct(sample) %>% 70 | mutate(new_column = 1:7)) %>% 71 | expect_warning(warning_message) 72 | }) 73 | 74 | test_that("full_join", { 75 | pasilla %>% 76 | full_join(pasilla %>% 77 | distinct(sample) %>% 78 | mutate(new_column = 1:7)) %>% 79 | expect_warning(warning_message) 80 | }) 81 | 82 | test_that("select", { 83 | pasilla %>% 84 | select(sample, feature, counts, condition) %>% 85 | expect_warning(warning_message) 86 | 87 | pasilla %>% 88 | select(condition) %>% 89 | class() %>% 90 | as.character() %>% 91 | .[1] %>% 92 | expect_equal("tbl_df") 93 | }) 94 | 95 | test_that("count", { 96 | pasilla %>% 97 | count(sample, condition) %>% 98 | expect_warning(warning_message) 99 | }) 100 | 101 | test_that("pull", { 102 | pasilla %>% 103 | pull(sample, condition) %>% 104 | expect_warning(warning_message) 105 | }) 106 | 107 | 108 | library(magrittr) 109 | library(tidySummarizedExperiment) 110 | 111 | tt <- 112 | pasilla %>% 113 | mutate(col2 = "other_col") 114 | 115 | test_that("nest_unnest", { 116 | 117 | 118 | 119 | tt %>% 120 | nest(data = -sample) %>% 121 | unnest(data) %>% 122 | expect_warning(warning_message) 123 | 124 | }) 125 | 126 | 127 | test_that("unite separate", { 128 | un <- 129 | tt %>% 130 | unite("new_col", c(condition, sample), sep = ":", remove = FALSE) %>% 131 | expect_warning(warning_message) 132 | 133 | un %>% 134 | separate( 135 | col = feature, 136 | into = c("orig.ident", "condition"), 137 | sep = ":", remove = FALSE 138 | ) %>% 139 | expect_warning(warning_message) 140 | 141 | 142 | }) 143 | 144 | test_that("extract", { 145 | tt %>% 146 | extract(sample, 147 | into = "g", 148 | regex = "other_([a-z]+)", 149 | convert = TRUE, remove=FALSE) %>% 150 | expect_warning(warning_message) 151 | }) 152 | 153 | test_that("pivot_longer", { 154 | tt %>% 155 | pivot_longer(c(sample, condition), 156 | names_to = "name", 157 | values_to = "value") %>% 158 | class() %>% 159 | .[1] %>% 160 | expect_equal("tbl_df") 161 | }) 162 | 163 | test_that("pivot_wider", { 164 | tt %>% 165 | pivot_wider(names_from=feature, values_from=counts) %>% 166 | class() %>% 167 | .[1] %>% 168 | expect_equal("tbl_df") 169 | }) 170 | -------------------------------------------------------------------------------- /tests/testthat/test-tidyr_methods.R: -------------------------------------------------------------------------------- 1 | context("tidyr test") 2 | 3 | library(magrittr) 4 | library(tidySummarizedExperiment) 5 | 6 | tt <- 7 | pasilla %>% 8 | mutate(col2="other_col") 9 | 10 | # Create SummarizedExperiment object for testing 11 | nrows <- 200 12 | ncols <- 6 13 | counts <- matrix(runif(nrows * ncols, 1, 1e4), nrows) 14 | rowRanges <- GRanges(rep(c("chr1", "chr2"), c(50, 150)), 15 | IRanges(floor(runif(200, 1e5, 1e6)), width=100), 16 | strand=sample(c("+", "-"), 200, TRUE), 17 | feature_id=sprintf("ID%03d", 1:200)) 18 | colData <- DataFrame(Treatment=rep(c("ChIP", "Input"), 3), 19 | row.names=LETTERS[1:6]) 20 | rse <- SummarizedExperiment(assays=SimpleList(counts=counts), 21 | rowRanges=rowRanges, colData=colData) 22 | rownames(rse) <- sprintf("ID%03d", 1:200) 23 | 24 | test_that("RangedSummarizedExperiment_nest_unnest", { 25 | tryCatch({ 26 | rse_nested <- rse %>% 27 | nest(data = -.sample) 28 | 29 | rse_unnested <- rse_nested %>% 30 | unnest(data) 31 | }) 32 | 33 | expect_equal(rse@colData, rse_unnested@colData) 34 | expect_equal(rse@rowRanges, rse_unnested@rowRanges) 35 | }) 36 | 37 | test_that("nest_unnest", { 38 | 39 | y <- tibble::tibble( 40 | .sample = c( 41 | "untrt1", 42 | "untrt2", 43 | "untrt3", 44 | "untrt4", 45 | "trt1", 46 | "trt2", 47 | "trt3" 48 | ), 49 | counts = c(0L, 0L, 0L, 0L, 0L, 0L, 1L) 50 | ) 51 | 52 | x <- tt %>% 53 | nest(data = -condition) %>% 54 | unnest(data) %>% 55 | head(n = 1) %>% 56 | select(.sample, counts) 57 | 58 | 59 | expect_equal(x, y) 60 | }) 61 | 62 | test_that("nest_unnest_slice_1",{ 63 | 64 | tt %>% 65 | nest(data = -condition) %>% 66 | slice(1) %>% 67 | unnest(data) 68 | 69 | }) 70 | 71 | test_that("nest_0_samples",{ 72 | 73 | rowData(tt)$n = rep(1, nrow(tt)) 74 | 75 | tt[,0] |> 76 | nest(data = -n) 77 | 78 | }) 79 | 80 | test_that("unite separate", { 81 | un <- tt %>% unite("new_col", c(condition, col2), sep = ":") 82 | 83 | un %>% 84 | select(new_col) %>% 85 | slice(1) %>% 86 | pull(new_col) %>% 87 | expect_equal("untreated:other_col") 88 | 89 | se <- 90 | un %>% 91 | separate( 92 | col = new_col, 93 | into = c( "condition", "col2"), 94 | sep = ":" 95 | ) 96 | 97 | se %>% 98 | select(.sample) %>% 99 | ncol() %>% 100 | expect_equal(1) 101 | }) 102 | 103 | test_that("extract()", { 104 | tt %>% 105 | extract(col2, 106 | into="g", regex="other_([a-z]+)", 107 | convert = TRUE) %>% 108 | pull(g) %>% 109 | class() %>% 110 | expect_equal("character") 111 | }) 112 | 113 | test_that("pivot_longer()", { 114 | tt %>% 115 | pivot_longer(c(.sample, condition), names_to = "name", 116 | values_to = "value") %>% 117 | class() %>% 118 | .[1] %>% 119 | expect_equal("tbl_df") 120 | }) 121 | 122 | test_that("nest_unnest_by_feature_chunk", { 123 | 124 | chunks = 125 | tibble::tibble(.feature = rownames(tt)) |> 126 | mutate(chunk___ = c( 127 | rep(1, times = floor(dplyr::n()/2)), 128 | rep(2, times = ceiling(dplyr::n()/2)) 129 | )) 130 | 131 | statistics_for_features = 132 | tibble::tibble(.feature = rownames(tt)) |> 133 | mutate(pvalue = runif( dplyr::n(), min = 0, max = 1)) 134 | 135 | tt = tt |> left_join(statistics_for_features) 136 | 137 | tt_unnested = 138 | tt |> 139 | left_join(chunks) |> 140 | nest(se_chunk = -chunk___) |> 141 | unnest(se_chunk) |> 142 | select(-chunk___) 143 | 144 | identical(tt, tt_unnested) 145 | 146 | }) -------------------------------------------------------------------------------- /vignettes/introduction.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Overview of the tidySummarizedExperiment package" 3 | author: "Stefano Mangiola" 4 | date: "`r Sys.Date()`" 5 | package: tidySummarizedExperiment 6 | output: 7 | BiocStyle::html_document: 8 | toc_float: true 9 | bibliography: tidySummarizedExperiment.bib 10 | vignette: > 11 | %\VignetteEngine{knitr::knitr} 12 | %\VignetteIndexEntry{Overview of the tidySummarizedExperiment package} 13 | %\usepackage[UTF-8]{inputenc} 14 | --- 15 | 16 | 17 | [![Lifecycle:maturing](https://img.shields.io/badge/lifecycle-maturing-blue.svg)](https://www.tidyverse.org/lifecycle/#maturing) 18 | 19 | 20 | 21 | **Brings SummarizedExperiment to the tidyverse!** 22 | 23 | website: [stemangiola.github.io/tidySummarizedExperiment/](https://stemangiola.github.io/tidySummarizedExperiment/) 24 | 25 | Please also have a look at 26 | 27 | - [tidySingleCellExperiment](https://stemangiola.github.io/tidySingleCellExperiment/) for tidy manipulation of SingleCellExperiment objects 28 | - [tidyseurat](https://stemangiola.github.io/tidyseurat/) for tidy manipulation of Seurat objects 29 | - [tidybulk](https://stemangiola.github.io/tidybulk/) for tidy analysis of RNA sequencing data 30 | - [nanny](https://github.com/stemangiola/nanny) for tidy high-level data analysis and manipulation 31 | - [tidygate](https://github.com/stemangiola/tidygate) for adding custom gate information to your tibble 32 | - [tidyHeatmap](https://stemangiola.github.io/tidyHeatmap/) for heatmaps produced with tidy principles 33 | 34 | 35 | ```{r, echo=FALSE, include=FALSE} 36 | library(knitr) 37 | knitr::opts_chunk$set(warning=FALSE, message=FALSE) 38 | ``` 39 | 40 | # Introduction 41 | 42 | tidySummarizedExperiment provides a bridge between Bioconductor [SummarizedExperiment](https://bioconductor.org/packages/release/bioc/html/SummarizedExperiment.html) [@morgan2020summarized] and the tidyverse [@wickham2019welcome]. It creates an invisible layer that enables viewing the 43 | Bioconductor *SummarizedExperiment* object as a tidyverse tibble, and provides SummarizedExperiment-compatible *dplyr*, *tidyr*, *ggplot* and *plotly* functions. This allows users to get the best of both Bioconductor and tidyverse worlds. 44 | 45 | 46 | ## Functions/utilities available 47 | 48 | SummarizedExperiment-compatible Functions | Description 49 | ------------ | ------------- 50 | `all` | After all `tidySummarizedExperiment` is a SummarizedExperiment object, just better 51 | 52 | tidyverse Packages | Description 53 | ------------ | ------------- 54 | `dplyr` | Almost all `dplyr` APIs like for any tibble 55 | `tidyr` | Almost all `tidyr` APIs like for any tibble 56 | `ggplot2` | `ggplot` like for any tibble 57 | `plotly` | `plot_ly` like for any tibble 58 | 59 | Utilities | Description 60 | ------------ | ------------- 61 | `as_tibble` | Convert cell-wise information to a `tbl_df` 62 | 63 | ## Installation 64 | 65 | ```{r, eval=FALSE} 66 | if (!requireNamespace("BiocManager", quietly=TRUE)) { 67 | install.packages("BiocManager") 68 | } 69 | 70 | BiocManager::install("tidySummarizedExperiment") 71 | ``` 72 | 73 | From Github (development) 74 | ```{r, eval=FALSE} 75 | devtools::install_github("stemangiola/tidySummarizedExperiment") 76 | ``` 77 | 78 | Load libraries used in the examples. 79 | 80 | ```{r} 81 | library(ggplot2) 82 | library(tidySummarizedExperiment) 83 | ``` 84 | 85 | 86 | # Create `tidySummarizedExperiment`, the best of both worlds! 87 | 88 | This is a SummarizedExperiment object but it is evaluated as a tibble. So it is fully compatible both with SummarizedExperiment and tidyverse APIs. 89 | 90 | ```{r} 91 | pasilla_tidy <- tidySummarizedExperiment::pasilla 92 | ``` 93 | 94 | **It looks like a tibble** 95 | 96 | ```{r} 97 | pasilla_tidy 98 | ``` 99 | 100 | **But it is a SummarizedExperiment object after all** 101 | 102 | ```{r} 103 | assays(pasilla_tidy) 104 | ``` 105 | 106 | 107 | # Tidyverse commands 108 | 109 | We can use tidyverse commands to explore the tidy SummarizedExperiment object. 110 | 111 | We can use `slice` to choose rows by position, for example to choose the first row. 112 | 113 | ```{r} 114 | pasilla_tidy %>% 115 | slice(1) 116 | ``` 117 | 118 | We can use `filter` to choose rows by criteria. 119 | 120 | ```{r} 121 | pasilla_tidy %>% 122 | filter(condition == "untreated") 123 | ``` 124 | 125 | We can use `select` to choose columns. 126 | 127 | ```{r} 128 | pasilla_tidy %>% 129 | select(.sample) 130 | ``` 131 | 132 | We can use `count` to count how many rows we have for each sample. 133 | 134 | ```{r} 135 | pasilla_tidy %>% 136 | count(.sample) 137 | ``` 138 | 139 | We can use `distinct` to see what distinct sample information we have. 140 | 141 | ```{r} 142 | pasilla_tidy %>% 143 | distinct(.sample, condition, type) 144 | ``` 145 | 146 | We could use `rename` to rename a column. For example, to modify the type column name. 147 | 148 | ```{r} 149 | pasilla_tidy %>% 150 | rename(sequencing=type) 151 | ``` 152 | 153 | We could use `mutate` to create a column. For example, we could create a new type column that contains single 154 | and paired instead of single_end and paired_end. 155 | 156 | ```{r} 157 | pasilla_tidy %>% 158 | mutate(type=gsub("_end", "", type)) 159 | ``` 160 | 161 | We could use `unite` to combine multiple columns into a single column. 162 | 163 | ```{r} 164 | pasilla_tidy %>% 165 | unite("group", c(condition, type)) 166 | ``` 167 | 168 | We can also combine commands with the tidyverse pipe `%>%`. 169 | 170 | For example, we could combine `group_by` and `summarise` to get the total counts for each sample. 171 | 172 | ```{r} 173 | pasilla_tidy %>% 174 | group_by(.sample) %>% 175 | summarise(total_counts=sum(counts)) 176 | ``` 177 | 178 | We could combine `group_by`, `mutate` and `filter` to get the transcripts with mean count > 0. 179 | 180 | ```{r} 181 | pasilla_tidy %>% 182 | group_by(.feature) %>% 183 | mutate(mean_count=mean(counts)) %>% 184 | filter(mean_count > 0) 185 | ``` 186 | 187 | 188 | # Plotting 189 | 190 | ```{r} 191 | my_theme <- 192 | list( 193 | scale_fill_brewer(palette="Set1"), 194 | scale_color_brewer(palette="Set1"), 195 | theme_bw() + 196 | theme( 197 | panel.border=element_blank(), 198 | axis.line=element_line(), 199 | panel.grid.major=element_line(size=0.2), 200 | panel.grid.minor=element_line(size=0.1), 201 | text=element_text(size=12), 202 | legend.position="bottom", 203 | aspect.ratio=1, 204 | strip.background=element_blank(), 205 | axis.title.x=element_text(margin=margin(t=10, r=10, b=10, l=10)), 206 | axis.title.y=element_text(margin=margin(t=10, r=10, b=10, l=10)) 207 | ) 208 | ) 209 | ``` 210 | 211 | We can treat `pasilla_tidy` as a normal tibble for plotting. 212 | 213 | Here we plot the distribution of counts per sample. 214 | 215 | ```{r plot1} 216 | pasilla_tidy %>% 217 | ggplot(aes(counts + 1, group=.sample, color=`type`)) + 218 | geom_density() + 219 | scale_x_log10() + 220 | my_theme 221 | ``` 222 | 223 | # Session Info 224 | 225 | ```{r} 226 | sessionInfo() 227 | ``` 228 | 229 | # References 230 | -------------------------------------------------------------------------------- /vignettes/tidySummarizedExperiment.bib: -------------------------------------------------------------------------------- 1 | @Manual{morgan2020summarized, 2 | title = {SummarizedExperiment: SummarizedExperiment container}, 3 | author = {Martin Morgan and Valerie Obenchain and Jim Hester and Hervé Pagès}, 4 | year = {2020}, 5 | note = {R package version 1.19.6}, 6 | } 7 | 8 | @article{wickham2019welcome, 9 | title={Welcome to the Tidyverse}, 10 | author={Wickham, Hadley and Averick, Mara and Bryan, Jennifer and Chang, Winston and McGowan, Lucy D'Agostino and Fran{\c{c}}ois, Romain and Grolemund, Garrett and Hayes, Alex and Henry, Lionel and Hester, Jim and others}, 11 | journal={Journal of Open Source Software}, 12 | volume={4}, 13 | number={43}, 14 | pages={1686}, 15 | year={2019} 16 | } 17 | --------------------------------------------------------------------------------