├── .github ├── .gitignore └── workflows │ ├── test-coverage.yaml │ ├── R-CMD-check.yaml │ └── pkgdown.yaml ├── .gitignore ├── .Rbuildignore ├── vignettes ├── images │ └── icons │ │ ├── tip.png │ │ ├── up.png │ │ ├── wip.png │ │ ├── home.png │ │ ├── next.png │ │ ├── note.png │ │ ├── prev.png │ │ ├── caution.png │ │ ├── download.png │ │ ├── example.png │ │ ├── warning.png │ │ ├── wip-full.png │ │ ├── INFO.txt │ │ ├── callouts │ │ ├── 1.png │ │ ├── 2.png │ │ ├── 3.png │ │ ├── 4.png │ │ ├── 5.png │ │ ├── 6.png │ │ ├── 7.png │ │ ├── 8.png │ │ ├── 9.png │ │ ├── 10.png │ │ ├── 11.png │ │ ├── 12.png │ │ ├── 13.png │ │ ├── 14.png │ │ └── 15.png │ │ ├── important.png │ │ └── callout-border.png └── custom.css ├── inst ├── presentations │ └── facile-overview.key ├── testdata │ ├── test-sample-covariates.rds │ ├── generate-TCGA-tesdata.R │ └── expected-meta.yaml ├── extdata │ ├── ensembl-v75-gene-info.csv.gz │ ├── exampleFacileDataSet │ │ ├── data.h5 │ │ ├── data.sqlite │ │ ├── custom-annotation │ │ │ └── README.txt │ │ └── meta.yaml │ └── test │ │ └── sample-meta-definitions.yaml └── scripts │ └── retrieve-parathyroidSE-gene-info.R ├── man ├── figures │ └── her2_cnv_vs_expression.png ├── gene_info_tbl.Rd ├── set_class.Rd ├── assay_units.Rd ├── fetch_sample_statistics.default.Rd ├── sample_stats_tbl.Rd ├── exampleFacileDataSet.Rd ├── feature_types.Rd ├── executeSQL.Rd ├── sqlFromFile.Rd ├── adata.Rd ├── nameit.Rd ├── is.FacileDataSet.Rd ├── pdata.Rd ├── fdata.Rd ├── assay_feature_type.Rd ├── pdata_metadata.Rd ├── primary_key.Rd ├── with_feature_info.Rd ├── features.Rd ├── labeled.Rd ├── hdf5fn.Rd ├── has_feature_type.Rd ├── spread_covariates.Rd ├── flog_level.Rd ├── covariate_definitions.Rd ├── fetch_sample_statistics.Rd ├── samples.Rd ├── fetch_assay_score.Rd ├── feature_name_map.Rd ├── dbfn.Rd ├── eavdef_for_column.Rd ├── initializeFacileDataSet.Rd ├── validate_covariate_def_list.Rd ├── assay_sample_info.Rd ├── assay_info_over_samples.Rd ├── eav_encode.Rd ├── extract_transcribed_info_from_ensembl_gtf.Rd ├── create_assay_feature_descriptor.Rd ├── check_facile_data_set.Rd ├── conform_data_frame.Rd ├── eav_encode_covariate.Rd ├── organism.FacileDataSet.Rd ├── covariate_meta_info.Rd ├── check_facile_data_store.Rd ├── test-helpers.Rd ├── ds_annot.Rd ├── append_facile_table.Rd ├── parse_sample_criterion.Rd ├── assemble_example_dataset.Rd ├── fds.Rd ├── dot-level_biotypes.Rd ├── samples.FacileDataSet.Rd ├── as_facile_frame.Rd ├── fetch_assay_score.FacileDataSet.Rd ├── join_samples.Rd ├── append_facile_feature_info.Rd ├── assay_feature_info.FacileDataSet.Rd ├── assay_info.Rd ├── save_custom_sample_covariates.Rd ├── filter_features.FacileDataSet.Rd ├── reexports.Rd ├── cSurv.Rd ├── as.EAVtable.Rd ├── fetch_samples.FacileDataSet.Rd ├── fetch_custom_sample_covariates.FacileDataSet.Rd ├── infer_feature_type.Rd ├── check_categorical.Rd ├── eav_metadata_merge.Rd ├── facet_frame.Rd ├── spread_assay_data.Rd ├── normalize_assay_data.Rd ├── meta-info.Rd ├── FacileData-package.Rd ├── fetch_sample_statistics.FacileDataSet.Rd ├── summary.eav_covariates.Rd ├── cast_covariate.Rd ├── biocbox.Rd ├── freplace_na.Rd ├── simple-eav-decode-functions.Rd ├── facilitate.Rd ├── assertions.Rd ├── eav-right-censor.Rd ├── filter_samples.FacileDataSet.Rd ├── flog.Rd ├── addFacileAssaySet.Rd ├── sample-covariates.Rd ├── remove_batch_effect.Rd ├── FacileDataSet.Rd ├── fetch_assay_data.Rd └── as.BiocContainer.Rd ├── tests ├── testthat.R └── testthat │ ├── test-samples.R │ ├── test-csurv.R │ ├── test-feature-info.R │ ├── test-assay-normalization.R │ ├── test-feature-types.R │ ├── test-FacileDataSet.R │ ├── test-replace_na.R │ ├── test-as.FacileDataSet.R │ ├── test-EAV.R │ ├── test-assay-data.R │ ├── test-biocbox.R │ ├── test-entity-attribute-value.R │ └── test-bioc-assay-containers.R ├── codecov.yml ├── TODO.Rmd ├── R ├── NSE-filter-features.R ├── package.R ├── sql.R ├── test-helpers.R ├── sample-info.R ├── feature-types.R ├── csurv.R ├── utilities.R ├── zzz.R ├── replace_na.R ├── NSE-filter-samples.R └── assemble_example_dataset.R ├── DESCRIPTION ├── NEWS.md ├── pkgdown └── extra.css ├── README.md └── README.Rmd /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^docs$ 4 | ^\.github$ 5 | -------------------------------------------------------------------------------- /vignettes/images/icons/tip.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/tip.png -------------------------------------------------------------------------------- /vignettes/images/icons/up.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/up.png -------------------------------------------------------------------------------- /vignettes/images/icons/wip.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/wip.png -------------------------------------------------------------------------------- /vignettes/images/icons/home.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/home.png -------------------------------------------------------------------------------- /vignettes/images/icons/next.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/next.png -------------------------------------------------------------------------------- /vignettes/images/icons/note.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/note.png -------------------------------------------------------------------------------- /vignettes/images/icons/prev.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/prev.png -------------------------------------------------------------------------------- /vignettes/images/icons/caution.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/caution.png -------------------------------------------------------------------------------- /vignettes/images/icons/download.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/download.png -------------------------------------------------------------------------------- /vignettes/images/icons/example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/example.png -------------------------------------------------------------------------------- /vignettes/images/icons/warning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/warning.png -------------------------------------------------------------------------------- /vignettes/images/icons/wip-full.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/wip-full.png -------------------------------------------------------------------------------- /vignettes/images/icons/INFO.txt: -------------------------------------------------------------------------------- 1 | These icons were taken from asciidoc: 2 | 3 | http://www.methods.co.nz/asciidoc/images/icons/ 4 | -------------------------------------------------------------------------------- /vignettes/images/icons/callouts/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/callouts/1.png -------------------------------------------------------------------------------- /vignettes/images/icons/callouts/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/callouts/2.png -------------------------------------------------------------------------------- /vignettes/images/icons/callouts/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/callouts/3.png -------------------------------------------------------------------------------- /vignettes/images/icons/callouts/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/callouts/4.png -------------------------------------------------------------------------------- /vignettes/images/icons/callouts/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/callouts/5.png -------------------------------------------------------------------------------- /vignettes/images/icons/callouts/6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/callouts/6.png -------------------------------------------------------------------------------- /vignettes/images/icons/callouts/7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/callouts/7.png -------------------------------------------------------------------------------- /vignettes/images/icons/callouts/8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/callouts/8.png -------------------------------------------------------------------------------- /vignettes/images/icons/callouts/9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/callouts/9.png -------------------------------------------------------------------------------- /vignettes/images/icons/important.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/important.png -------------------------------------------------------------------------------- /inst/presentations/facile-overview.key: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonocarroll/FacileData/main/inst/presentations/facile-overview.key -------------------------------------------------------------------------------- /inst/testdata/test-sample-covariates.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonocarroll/FacileData/main/inst/testdata/test-sample-covariates.rds -------------------------------------------------------------------------------- /man/figures/her2_cnv_vs_expression.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonocarroll/FacileData/main/man/figures/her2_cnv_vs_expression.png -------------------------------------------------------------------------------- /vignettes/images/icons/callouts/10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/callouts/10.png -------------------------------------------------------------------------------- /vignettes/images/icons/callouts/11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/callouts/11.png -------------------------------------------------------------------------------- /vignettes/images/icons/callouts/12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/callouts/12.png -------------------------------------------------------------------------------- /vignettes/images/icons/callouts/13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/callouts/13.png -------------------------------------------------------------------------------- /vignettes/images/icons/callouts/14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/callouts/14.png -------------------------------------------------------------------------------- /vignettes/images/icons/callouts/15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/callouts/15.png -------------------------------------------------------------------------------- /inst/extdata/ensembl-v75-gene-info.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonocarroll/FacileData/main/inst/extdata/ensembl-v75-gene-info.csv.gz -------------------------------------------------------------------------------- /inst/extdata/exampleFacileDataSet/data.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonocarroll/FacileData/main/inst/extdata/exampleFacileDataSet/data.h5 -------------------------------------------------------------------------------- /vignettes/images/icons/callout-border.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/callout-border.png -------------------------------------------------------------------------------- /inst/extdata/exampleFacileDataSet/data.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonocarroll/FacileData/main/inst/extdata/exampleFacileDataSet/data.sqlite -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library("testthat") 2 | library("checkmate") 3 | library("FacileData") 4 | library("magrittr") 5 | library("reshape2") 6 | library("tidyr") 7 | library("dplyr") 8 | 9 | test_check("FacileData") 10 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | 3 | coverage: 4 | status: 5 | project: 6 | default: 7 | target: auto 8 | threshold: 5% 9 | patch: 10 | default: 11 | target: auto 12 | threshold: 5% 13 | -------------------------------------------------------------------------------- /inst/extdata/exampleFacileDataSet/custom-annotation/README.txt: -------------------------------------------------------------------------------- 1 | This is a dummy file to ensure that this directory is maintained in the VCS. 2 | 3 | By default a FacileDataSet assumes that the custom annoation directory is a 4 | `custom-annotation` directory within the FacileDataSet data directory. 5 | -------------------------------------------------------------------------------- /TODO.Rmd: -------------------------------------------------------------------------------- 1 | * Do not depend on `FacileAtezo` artifacts in this package. 2 | - Create a slimmer, examplar database, which FacileDb() connects to 3 | (largely for testing) 4 | - Change the name of the options in .onLoad 5 | - Change the sample-meta-definitions.yaml file to be appropriate for the 6 | new mini-test-db that FacileDb creates 7 | -------------------------------------------------------------------------------- /man/gene_info_tbl.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/db-and-table-functions.R 3 | \name{gene_info_tbl} 4 | \alias{gene_info_tbl} 5 | \title{Mimics the old \code{gene_info} table.} 6 | \usage{ 7 | gene_info_tbl(x) 8 | } 9 | \description{ 10 | Mimics the old \code{gene_info} table. 11 | } 12 | -------------------------------------------------------------------------------- /man/set_class.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utilities.R 3 | \name{set_class} 4 | \alias{set_class} 5 | \title{Set the class of an object and return the object} 6 | \usage{ 7 | set_class(x, .class, ...) 8 | } 9 | \description{ 10 | Set the class of an object and return the object 11 | } 12 | -------------------------------------------------------------------------------- /man/assay_units.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/api.R 3 | \name{assay_units} 4 | \alias{assay_units} 5 | \title{Units of measure in an assay} 6 | \usage{ 7 | assay_units(x, assay_name, normalized = FALSE, abbreviate = FALSE, ...) 8 | } 9 | \value{ 10 | string 11 | } 12 | \description{ 13 | Units of measure in an assay 14 | } 15 | -------------------------------------------------------------------------------- /man/fetch_sample_statistics.default.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/api.R 3 | \name{fetch_sample_statistics.default} 4 | \alias{fetch_sample_statistics.default} 5 | \title{Issue #2} 6 | \usage{ 7 | \method{fetch_sample_statistics}{default}(x, samples = NULL, semi = TRUE, assay_name = NULL) 8 | } 9 | \description{ 10 | Issue #2 11 | } 12 | -------------------------------------------------------------------------------- /man/sample_stats_tbl.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/db-and-table-functions.R 3 | \name{sample_stats_tbl} 4 | \alias{sample_stats_tbl} 5 | \title{Mimics old sample_stats table} 6 | \usage{ 7 | sample_stats_tbl(x) 8 | } 9 | \description{ 10 | This function needs to be removed and the code that relies on 11 | sample_stats_tbl be updated. 12 | } 13 | -------------------------------------------------------------------------------- /man/exampleFacileDataSet.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/test-helpers.R 3 | \name{exampleFacileDataSet} 4 | \alias{exampleFacileDataSet} 5 | \title{Retrieves an example FacileDataSet} 6 | \usage{ 7 | exampleFacileDataSet() 8 | } 9 | \description{ 10 | A subset of the TCGA data from the BLCA and COAD indications is provided 11 | as a FacileDataSet. 12 | } 13 | -------------------------------------------------------------------------------- /man/feature_types.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/features.R 3 | \name{feature_types} 4 | \alias{feature_types} 5 | \title{Enumerate the types of feature stored in a FacileDataSet} 6 | \usage{ 7 | feature_types(x) 8 | } 9 | \arguments{ 10 | \item{x}{A \code{FacileDataSet}} 11 | } 12 | \description{ 13 | Enumerate the types of feature stored in a FacileDataSet 14 | } 15 | -------------------------------------------------------------------------------- /man/executeSQL.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sql.R 3 | \name{executeSQL} 4 | \alias{executeSQL} 5 | \title{Execute multiple queries against a database} 6 | \usage{ 7 | executeSQL(con, sql) 8 | } 9 | \arguments{ 10 | \item{con}{database handle} 11 | 12 | \item{sql}{list of charvecs (SQL statements)} 13 | } 14 | \description{ 15 | Execute multiple queries against a database 16 | } 17 | -------------------------------------------------------------------------------- /man/sqlFromFile.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sql.R 3 | \name{sqlFromFile} 4 | \alias{sqlFromFile} 5 | \title{Utility function to send more than one sql command to the database} 6 | \usage{ 7 | sqlFromFile(file) 8 | } 9 | \arguments{ 10 | \item{file}{single character, name of file with SQL statements} 11 | } 12 | \description{ 13 | Copied from http://stackoverflow.com/questions/18914283 14 | } 15 | -------------------------------------------------------------------------------- /man/adata.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/as.FacileDataSet.R 3 | \name{adata} 4 | \alias{adata} 5 | \title{Bioc-container specific assay data extraction functions} 6 | \usage{ 7 | adata(x, assay = NULL, ...) 8 | } 9 | \arguments{ 10 | \item{x}{SummarizedExperiment, ExpressionSet or DGEList} 11 | 12 | \item{...}{additional args, ignored for now} 13 | } 14 | \description{ 15 | Get assay matrix 16 | } 17 | -------------------------------------------------------------------------------- /man/nameit.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utilities.R 3 | \name{nameit} 4 | \alias{nameit} 5 | \title{Ensures that a vector has names for all elements if it has names for any} 6 | \usage{ 7 | nameit(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{an object with names} 11 | } 12 | \value{ 13 | \code{x} with all elements either being uniquely named, or NULL 14 | } 15 | \description{ 16 | If the vector is not named, it remains that way 17 | } 18 | -------------------------------------------------------------------------------- /tests/testthat/test-samples.R: -------------------------------------------------------------------------------- 1 | context("samples(FacileDataSet)") 2 | 3 | test_that("samples() is a facile_frame", { 4 | efds <- exampleFacileDataSet() 5 | 6 | expected <- dplyr::tbl(efds$con, 'sample_info') %>% 7 | collect() %>% 8 | select(dataset, sample_id) %>% 9 | arrange(sample_id) 10 | samples. <- samples(efds) %>% 11 | collect() %>% 12 | arrange(sample_id) 13 | expect_equal(samples., expected, check.attributes = FALSE) 14 | expect_s3_class(fds(samples.), "FacileDataSet") 15 | }) 16 | -------------------------------------------------------------------------------- /man/is.FacileDataSet.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FacileDataSet.R 3 | \name{is.FacileDataSet} 4 | \alias{is.FacileDataSet} 5 | \title{Class and validity checker for FacileDataSet} 6 | \usage{ 7 | is.FacileDataSet(x) 8 | } 9 | \arguments{ 10 | \item{x}{object to test} 11 | } 12 | \value{ 13 | \code{TRUE}/\code{FALSE} indicating that \code{x} nominally "looks like" a 14 | \code{FacileDataSet} 15 | } 16 | \description{ 17 | Class and validity checker for FacileDataSet 18 | } 19 | -------------------------------------------------------------------------------- /man/pdata.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/as.FacileDataSet.R 3 | \name{pdata} 4 | \alias{pdata} 5 | \title{Bioc-container specific pData extraction functions} 6 | \usage{ 7 | pdata(x, covariate_metadata = NULL, ...) 8 | } 9 | \arguments{ 10 | \item{x}{SummarizedExperiment, ExpressionSet or DGEList} 11 | 12 | \item{...}{additional args, ignored for now} 13 | } 14 | \description{ 15 | This is an internal function, but exported so it is registered and found 16 | post R 4.0 17 | } 18 | -------------------------------------------------------------------------------- /man/fdata.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/as.FacileDataSet.R 3 | \name{fdata} 4 | \alias{fdata} 5 | \title{BioC-container specific fData extraction functions} 6 | \usage{ 7 | fdata(x, validate = FALSE, ...) 8 | } 9 | \arguments{ 10 | \item{x}{SummarizedExperiment, ExpressionSet or DGEList} 11 | 12 | \item{validate}{single logical, check results} 13 | 14 | \item{...}{additional args (ignored for now)} 15 | } 16 | \description{ 17 | BioC-container specific fData extraction functions 18 | } 19 | -------------------------------------------------------------------------------- /man/assay_feature_type.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/assay-data.R 3 | \name{assay_feature_type} 4 | \alias{assay_feature_type} 5 | \title{Returns the feature_type for a given assay} 6 | \usage{ 7 | assay_feature_type(x, assay_name) 8 | } 9 | \arguments{ 10 | \item{x}{\code{FacileDataSet}} 11 | 12 | \item{assay_name}{the name of the assay} 13 | } 14 | \description{ 15 | The elements of the rows for a given assay all correspond to a particular 16 | feature space (ie. feature_type='entrez') 17 | } 18 | -------------------------------------------------------------------------------- /man/pdata_metadata.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/as.FacileDataSet.R 3 | \name{pdata_metadata} 4 | \alias{pdata_metadata} 5 | \title{Bioc-container specific pData extraction functions} 6 | \usage{ 7 | pdata_metadata(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{SummarizedExperiment, ExpressionSet or DGEList} 11 | 12 | \item{...}{additional args, ignored for now} 13 | } 14 | \description{ 15 | Get metadata on columns of sample info data.frame (label, etc.) for 16 | inclusion in metadata YAML. 17 | } 18 | -------------------------------------------------------------------------------- /man/primary_key.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/db-and-table-functions.R 3 | \name{primary_key} 4 | \alias{primary_key} 5 | \title{Query a table to identify its primary key(s)} 6 | \usage{ 7 | primary_key(x, table_name) 8 | } 9 | \arguments{ 10 | \item{x}{a \code{FacileDataSet} or \code{SQLiteConnection}} 11 | 12 | \item{table_name}{the name of the table to query} 13 | } 14 | \value{ 15 | a character vector of primary keys 16 | } 17 | \description{ 18 | Query a table to identify its primary key(s) 19 | } 20 | -------------------------------------------------------------------------------- /man/with_feature_info.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/api.R 3 | \name{with_feature_info} 4 | \alias{with_feature_info} 5 | \title{Append feature information columns to (feature-rows)} 6 | \usage{ 7 | with_feature_info(x, covariates = NULL, ...) 8 | } 9 | \arguments{ 10 | \item{x}{a data.frame feature descriptor columns (feature_id, feature_type)} 11 | } 12 | \value{ 13 | \code{x} fattened with the columns asked for 14 | } 15 | \description{ 16 | Append feature information columns to (feature-rows) 17 | } 18 | -------------------------------------------------------------------------------- /man/features.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/api.R 3 | \name{features} 4 | \alias{features} 5 | \title{Returns a table of information about the features (from an assay, or ...)} 6 | \usage{ 7 | features(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{a facile object} 11 | } 12 | \value{ 13 | a tibble with containing feature_id, feature_type, and whatever other 14 | columns are appropriate given \code{x} 15 | } 16 | \description{ 17 | Returns a table of information about the features (from an assay, or ...) 18 | } 19 | -------------------------------------------------------------------------------- /man/labeled.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/api.R 3 | \name{name} 4 | \alias{name} 5 | \title{Labeled acts like interface to reactive modules.} 6 | \usage{ 7 | name(x, ...) 8 | } 9 | \description{ 10 | Modules that implement this interface must return \code{label} and \code{name} reactive 11 | elements within them. 12 | } 13 | \details{ 14 | We use these when something (like a \code{assayFeatureSelect}) needs 15 | a "computer friendly" name for itself (\code{name()}), or a more human readable 16 | name (\code{label()}) 17 | } 18 | -------------------------------------------------------------------------------- /R/NSE-filter-features.R: -------------------------------------------------------------------------------- 1 | #' Filter against the sample_covariate_tbl as if it were wide. 2 | #' 3 | #' This feature is only really meant to be used 4 | #' interactively, and with extreme caution ... programatically specifying 5 | #' column names in feature table, for instance, does not work right now. 6 | #' 7 | #' TODO: Use tidyeval 8 | #' 9 | #' @export 10 | #' @param x A \code{FacileDataSet} 11 | #' @param ... NSE claused to use in \code{\link[dplyr]{filter}} expressions 12 | #' @family API 13 | filter_features.FacileDataSet <- function(x, ...) { 14 | feature_info_tbl(x) %>% filter(...) %>% set_fds(x) 15 | } 16 | -------------------------------------------------------------------------------- /man/hdf5fn.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FacileDataSet.R 3 | \name{hdf5fn} 4 | \alias{hdf5fn} 5 | \title{Get location of the FacileDataSet HDF5 file} 6 | \usage{ 7 | hdf5fn(x, mustWork = TRUE) 8 | } 9 | \arguments{ 10 | \item{x}{FacileDataSet} 11 | 12 | \item{mustWork}{single logical} 13 | } 14 | \value{ 15 | path to HDF5 file 16 | } 17 | \description{ 18 | Get location of the FacileDataSet HDF5 file 19 | } 20 | \seealso{ 21 | Other FacileDataSet: 22 | \code{\link{FacileDataSet}()}, 23 | \code{\link{dbfn}()}, 24 | \code{\link{meta_file}()} 25 | } 26 | \concept{FacileDataSet} 27 | -------------------------------------------------------------------------------- /man/has_feature_type.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/features.R 3 | \name{has_feature_type} 4 | \alias{has_feature_type} 5 | \title{Test if a given feature type is stored in a FacileDataSet} 6 | \usage{ 7 | has_feature_type(x, feature_type) 8 | } 9 | \arguments{ 10 | \item{x}{A \code{FacileDataSet}} 11 | 12 | \item{feature_type}{a character vector of potential feature types} 13 | } 14 | \value{ 15 | logical vector indicating whether or not a given \code{feature_type} 16 | is stored in \code{x} 17 | } 18 | \description{ 19 | Test if a given feature type is stored in a FacileDataSet 20 | } 21 | -------------------------------------------------------------------------------- /man/spread_covariates.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sample-covariates.R 3 | \name{spread_covariates} 4 | \alias{spread_covariates} 5 | \title{Spreads the covariates returned from database into wide data.frame} 6 | \usage{ 7 | spread_covariates(x, .fds = fds(x), cov.def = NULL, ...) 8 | } 9 | \arguments{ 10 | \item{x}{output from \code{fetch_sample_covariates}} 11 | 12 | \item{.fds}{A \code{FacileDataSet} object} 13 | } 14 | \value{ 15 | a wide \code{tbl_df}-like object 16 | } 17 | \description{ 18 | Samples that did not have a value for a specific covariate are assigned to 19 | have NA. 20 | } 21 | -------------------------------------------------------------------------------- /man/flog_level.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/logging.R 3 | \name{flog_level} 4 | \alias{flog_level} 5 | \title{Retrieves the currently set logging level} 6 | \usage{ 7 | flog_level(namespace = NULL) 8 | } 9 | \arguments{ 10 | \item{namespace}{Package (or whoever) can provide a value here to set the 11 | level they want to listen to. If this is \code{NULL} (default), the top level 12 | \code{facile.log.level} value will be used.} 13 | } 14 | \value{ 15 | the logging level, as an integer (from \code{FacileData:::.flog_levels}) 16 | } 17 | \description{ 18 | Retrieves the currently set logging level 19 | } 20 | -------------------------------------------------------------------------------- /man/covariate_definitions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/api.R 3 | \name{covariate_definitions} 4 | \alias{covariate_definitions} 5 | \title{Get description of sample metadata columns} 6 | \usage{ 7 | covariate_definitions(x, as.list = TRUE, ...) 8 | } 9 | \arguments{ 10 | \item{x}{FacileDataTore} 11 | 12 | \item{as.list}{single logical, return tibble or list} 13 | } 14 | \value{ 15 | meta information about the sample covariates in \code{x} 16 | } 17 | \description{ 18 | Descriptions of the sample covariates can be specified in a FacileDataSet's 19 | \code{meta.yaml} file. This function returns those. 20 | } 21 | -------------------------------------------------------------------------------- /tests/testthat/test-csurv.R: -------------------------------------------------------------------------------- 1 | context("Coercion among Surv, cSurv and character") 2 | 3 | test_that("We can convert among Surv, cSurv and character", { 4 | a <- Surv(c(14, 12, 3), event = c(1, 0, 1)) 5 | b <- as(a, "character") 6 | c <- as(b, "Surv") 7 | expect_identical(a, c) 8 | 9 | d <- Surv(c(14, 12, 3), event = c(1, 0, 1)) 10 | e <- as(d, "cSurv") 11 | f <- as(e, "Surv") 12 | expect_identical(d, f) 13 | 14 | g <- Surv(c(14, 12, 3), event = c(1, 0, 1)) 15 | h <- as(g, "cSurv") 16 | i <- as(h, "character") 17 | expect_identical(as(i, "cSurv"), h) 18 | expect_identical(as(h, "Surv"), g) 19 | expect_identical(as(i, "Surv"), g) 20 | expect_identical(as(i, "cSurv"), h) 21 | }) 22 | -------------------------------------------------------------------------------- /man/fetch_sample_statistics.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/api.R 3 | \name{fetch_sample_statistics} 4 | \alias{fetch_sample_statistics} 5 | \title{NOTE: fetch_sample_statistics -> \code{fetch_assay_covariates} 6 | Issue #2} 7 | \usage{ 8 | fetch_sample_statistics(x, samples = NULL, semi = TRUE, assay_name = NULL, ...) 9 | } 10 | \description{ 11 | NOTE: fetch_sample_statistics -> \code{fetch_assay_covariates} 12 | Issue #2 13 | } 14 | \seealso{ 15 | Other FacileInterface: 16 | \code{\link{facet_frame.FacileDataSet}()}, 17 | \code{\link{fetch_assay_score}()}, 18 | \code{\link{fetch_sample_covariates}()}, 19 | \code{\link{samples}()} 20 | } 21 | \concept{FacileInterface} 22 | -------------------------------------------------------------------------------- /inst/scripts/retrieve-parathyroidSE-gene-info.R: -------------------------------------------------------------------------------- 1 | library(SummarizedExperiment) 2 | 3 | data("parathyroidGenesSE", package = "parathyroidSE") 4 | 5 | bm <- loadNamespace("biomaRt") 6 | mart <- bm$useMart( 7 | host = "feb2014.archive.ensembl.org", 8 | biomart = "ENSEMBL_MART_ENSEMBL", 9 | dataset = "hsapiens_gene_ensembl") 10 | mart.info <- bm$getBM( 11 | attributes = c("ensembl_gene_id", "hgnc_symbol", "gene_biotype"), 12 | filters = "ensembl_gene_id", 13 | values = rownames(parathyroidGenesSE), 14 | mart = mart) 15 | 16 | write.csv(mart.info, "inst/extdata/parathyroidSE-gene-info.csv", 17 | row.names=FALSE) 18 | write.csv(mart.info, "inst/extdata/airway-gene-info.csv", 19 | row.names=FALSE) 20 | -------------------------------------------------------------------------------- /man/samples.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/api.R 3 | \name{samples} 4 | \alias{samples} 5 | \title{Returns a facile_frame of samples from an object in the faciebio ecosystem.} 6 | \usage{ 7 | samples(x, ...) 8 | } 9 | \description{ 10 | \code{samples} can be extracted from a FacileDataStore itself, or the result of 11 | an analysis initiated from a FacileDataStore (like you'll find in the 12 | FacileAnalysis package). 13 | } 14 | \seealso{ 15 | Other FacileInterface: 16 | \code{\link{facet_frame.FacileDataSet}()}, 17 | \code{\link{fetch_assay_score}()}, 18 | \code{\link{fetch_sample_covariates}()}, 19 | \code{\link{fetch_sample_statistics}()} 20 | } 21 | \concept{FacileInterface} 22 | -------------------------------------------------------------------------------- /man/fetch_assay_score.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/api.R 3 | \name{fetch_assay_score} 4 | \alias{fetch_assay_score} 5 | \title{NOTE: is fetch_assay_score really necessary?} 6 | \usage{ 7 | fetch_assay_score( 8 | x, 9 | features, 10 | samples = NULL, 11 | assay_name = NULL, 12 | as.matrix = FALSE, 13 | ..., 14 | subset.threshold = 700 15 | ) 16 | } 17 | \description{ 18 | NOTE: is fetch_assay_score really necessary? 19 | } 20 | \seealso{ 21 | Other FacileInterface: 22 | \code{\link{facet_frame.FacileDataSet}()}, 23 | \code{\link{fetch_sample_covariates}()}, 24 | \code{\link{fetch_sample_statistics}()}, 25 | \code{\link{samples}()} 26 | } 27 | \concept{FacileInterface} 28 | -------------------------------------------------------------------------------- /man/feature_name_map.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/assay-data.R, R/features.R 3 | \name{assay_feature_name_map} 4 | \alias{assay_feature_name_map} 5 | \alias{feature_name_map} 6 | \title{Returns table of names and aliases for features.} 7 | \usage{ 8 | assay_feature_name_map(x, assay_name) 9 | 10 | feature_name_map(x, feature_type) 11 | } 12 | \arguments{ 13 | \item{x}{\code{FacileDataSet}} 14 | 15 | \item{assay_name}{the name of assay to get the feature map for.} 16 | 17 | \item{feature_type}{a character vector specifying the feature type} 18 | } 19 | \value{ 20 | a tibble with \code{feature_id, name, type} columns, where type 21 | is "primary" or "alias" 22 | } 23 | \description{ 24 | #dropme 25 | } 26 | -------------------------------------------------------------------------------- /man/dbfn.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FacileDataSet.R 3 | \name{dbfn} 4 | \alias{dbfn} 5 | \title{Get location of the FacileDataSet database} 6 | \usage{ 7 | dbfn(x, mustWork = TRUE) 8 | } 9 | \arguments{ 10 | \item{x}{FacileDataSet} 11 | 12 | \item{mustWork}{boolean, if \code{TRUE} (default), throws an error if the sqlite 13 | file does not exist. When \code{FALSE}, this returns the "expected" path to the 14 | sqlite file for \code{x}} 15 | } 16 | \value{ 17 | the filepath to the sqlite database 18 | } 19 | \description{ 20 | Get location of the FacileDataSet database 21 | } 22 | \seealso{ 23 | Other FacileDataSet: 24 | \code{\link{FacileDataSet}()}, 25 | \code{\link{hdf5fn}()}, 26 | \code{\link{meta_file}()} 27 | } 28 | \concept{FacileDataSet} 29 | -------------------------------------------------------------------------------- /man/eavdef_for_column.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/entity-attribute-value.R 3 | \name{eavdef_for_column} 4 | \alias{eavdef_for_column} 5 | \title{Generate entity-attribute-value definition for a column in a data.frame} 6 | \usage{ 7 | eavdef_for_column(column, column_name) 8 | } 9 | \arguments{ 10 | \item{column}{a vector, e.g. a column out of a pdata} 11 | 12 | \item{column_name}{single character, name of the colum} 13 | } 14 | \value{ 15 | a generic list-of-list definition column 16 | } 17 | \description{ 18 | Creates the minimal list-definition for a single column in a \code{pData} 19 | \code{data.frame}. This function is not exported on purpose. Column descriptions 20 | will be taken from the "label" attribute of data.frames or the "metadata" list 21 | for DataFrames. 22 | } 23 | -------------------------------------------------------------------------------- /man/initializeFacileDataSet.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/construction.R 3 | \name{initializeFacileDataSet} 4 | \alias{initializeFacileDataSet} 5 | \title{Create an empty FacileDataSet} 6 | \usage{ 7 | initializeFacileDataSet(path, meta_file, page_size = 2^12, cache_size = 2e+05) 8 | } 9 | \arguments{ 10 | \item{path}{the directory to create which will house the 11 | \code{FacileDataSet}} 12 | 13 | \item{page_size, cache_size}{\code{pragma} values to setup the backend SQLite 14 | database} 15 | 16 | \item{covariate_definition}{the path to the covariate definition file} 17 | } 18 | \value{ 19 | inivisibly returns the path to the successfully created datastore 20 | } 21 | \description{ 22 | This is a helper function that is currently only called from 23 | \code{as.FacileDataSet} 24 | } 25 | -------------------------------------------------------------------------------- /.github/workflows/test-coverage.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | schedule: 3 | # Only push the test coverage up on a schedule. We can use the full 4 | # facilebio image for this for expediency 5 | - cron: '30 12 * * *' 6 | 7 | name: test-coverage 8 | 9 | jobs: 10 | test-coverage: 11 | runs-on: ubuntu-latest 12 | container: facilebio/facilebio 13 | env: 14 | ACTIONS_ALLOW_UNSECURE_COMMANDS: true 15 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true 16 | NOT_CRAN: true 17 | CI: true 18 | 19 | steps: 20 | - name: Checkout Repository 21 | uses: actions/checkout@v2 22 | 23 | - name: Setup R 24 | uses: r-lib/actions/setup-r@v1 25 | with: 26 | install-r: false 27 | 28 | - name: Test coverage 29 | run: covr::codecov(quiet = FALSE) 30 | shell: Rscript {0} 31 | -------------------------------------------------------------------------------- /man/validate_covariate_def_list.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/entity-attribute-value.R 3 | \name{validate_covariate_def_list} 4 | \alias{validate_covariate_def_list} 5 | \title{Validates that a covariate defintion list reasonably describes a data.frame.} 6 | \usage{ 7 | validate_covariate_def_list(x, pdata) 8 | } 9 | \arguments{ 10 | \item{x}{a covariate definition list-of-lists} 11 | 12 | \item{pdata}{a \code{data.frame}} 13 | } 14 | \description{ 15 | The covariates defined in \code{x} must be a subset of the columns in \code{pdata}. 16 | This method will throw an error if there is a covariate in \code{x} that does 17 | not have a matching column in \code{pdata}. 18 | } 19 | \details{ 20 | This function does not check if all columns in \code{pdata} have definitions in 21 | \code{x}. 22 | } 23 | -------------------------------------------------------------------------------- /R/package.R: -------------------------------------------------------------------------------- 1 | #' @import checkmate 2 | #' @import dplyr 3 | #' @import methods 4 | #' @importFrom utils read.csv 5 | "_PACKAGE" 6 | 7 | #' @importFrom broom tidy 8 | #' @export 9 | broom::tidy 10 | 11 | # Export oft-used dplyr stuff -------------------------------------------------- 12 | # Should we just but dplyr in Depends? 13 | 14 | #' @export 15 | dplyr::`%>%` 16 | 17 | #' @export 18 | dplyr::arrange 19 | 20 | #' @export 21 | dplyr::collect 22 | 23 | #' @export 24 | dplyr::distinct 25 | 26 | #' @export 27 | dplyr::filter 28 | 29 | #' @export 30 | dplyr::group_by 31 | 32 | #' @export 33 | dplyr::mutate 34 | 35 | #' @export 36 | dplyr::select 37 | 38 | #' @export 39 | dplyr::transmute 40 | 41 | #' @export 42 | dplyr::ungroup 43 | 44 | #' @export 45 | dplyr::left_join 46 | 47 | #' @export 48 | dplyr::inner_join 49 | 50 | #' @export 51 | dplyr::semi_join 52 | -------------------------------------------------------------------------------- /man/assay_sample_info.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/api.R 3 | \name{assay_sample_info} 4 | \alias{assay_sample_info} 5 | \title{Utility functions to get row and column indices of rnaseq hdf5 files.} 6 | \usage{ 7 | assay_sample_info(x, assay_name, samples = NULL, ...) 8 | } 9 | \arguments{ 10 | \item{x}{\code{FacileDataStore}} 11 | 12 | \item{assay_name}{the name of the assay} 13 | 14 | \item{samples}{a sample descriptor} 15 | } 16 | \value{ 17 | an updated version of \code{samples} decorated with hd5_index, 18 | scaling factors, etc. Note that rows in \code{samples} that do not appear 19 | in \code{assay_name} will be returnd here with NA values for hd5_index and 20 | such. 21 | } 22 | \description{ 23 | This is called to get things like hdf5_index and scaling factors for 24 | the samples in a given assay. 25 | } 26 | -------------------------------------------------------------------------------- /man/assay_info_over_samples.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/assay-data.R 3 | \name{assay_info_over_samples} 4 | \alias{assay_info_over_samples} 5 | \title{Identify the number of each assay run across specific samples.} 6 | \usage{ 7 | assay_info_over_samples(x, samples = NULL) 8 | } 9 | \arguments{ 10 | \item{x}{FacileDataSet} 11 | 12 | \item{samples}{sample descriptor} 13 | 14 | \item{with_count}{return the number of samples in \code{samples} that are 15 | assayed over each assay as a column in \code{return}} 16 | } 17 | \value{ 18 | rows from assay_info_tbl that correspond to the assays defined 19 | over the given samples. If no assays are defined over these samples, 20 | you're going to get an empty tibble. 21 | } 22 | \description{ 23 | The default assay is listed first, the rest of the order is undetermined. 24 | } 25 | -------------------------------------------------------------------------------- /R/sql.R: -------------------------------------------------------------------------------- 1 | #' Utility function to send more than one sql command to the database 2 | #' 3 | #' Copied from http://stackoverflow.com/questions/18914283 4 | #' 5 | #' @param file single character, name of file with SQL statements 6 | sqlFromFile <- function(file){ 7 | requireNamespace("stringr") || stop("Failed to require stringr") 8 | sql <- readLines(file) 9 | sql <- gsub("--.*$", "", sql) ## remove comments 10 | sql <- unlist(strsplit(paste(sql,collapse=" "),";")) 11 | sql <- sql[grep("^ *$", sql, invert=TRUE)] 12 | sql 13 | } 14 | 15 | #' Execute multiple queries against a database 16 | #' 17 | #' @importFrom DBI dbExecute 18 | #' @param con database handle 19 | #' @param sql list of charvecs (SQL statements) 20 | executeSQL <- function(con, sql){ 21 | execsql <- function(sql, con) { 22 | # dbGetQuery(con,sql) 23 | dbExecute(con, sql) 24 | } 25 | invisible(lapply(sql, execsql, con)) 26 | } 27 | -------------------------------------------------------------------------------- /man/eav_encode.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/entity-attribute-value.R 3 | \name{eav_encode} 4 | \alias{eav_encode} 5 | \title{Encodes column(s) from \code{pData} into character values} 6 | \usage{ 7 | eav_encode(dat, covariate_def, varname) 8 | } 9 | \arguments{ 10 | \item{dat}{the vector to values to encode into an EAV table} 11 | 12 | \item{covariate_def}{the single-list-definition of this covariate} 13 | 14 | \item{vname}{the name of the attribute column in the eav table} 15 | } 16 | \value{ 17 | a four-column \code{data.frame} (dataset,sample_id,variable,value) 18 | with the encoded covariate into a single \code{value} column. 19 | } 20 | \description{ 21 | This function is not exported, and should only be called from within the 22 | \code{\link[=as.EAVtable]{as.EAVtable()}} function because we rely on validity checks that are 23 | happening there. 24 | } 25 | -------------------------------------------------------------------------------- /man/extract_transcribed_info_from_ensembl_gtf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/extract_transcribed_info_from_ensembl_gtf.R 3 | \name{extract_transcribed_info_from_ensembl_gtf} 4 | \alias{extract_transcribed_info_from_ensembl_gtf} 5 | \title{Extract gene- and transcript-level information from an ENSEMBL gtf.} 6 | \usage{ 7 | extract_transcribed_info_from_ensembl_gtf( 8 | fn, 9 | gene_type = "gene_type", 10 | transcript_type = "transcript_type" 11 | ) 12 | } 13 | \arguments{ 14 | \item{fn}{the path to the ENSEMBL (or GENCODE) GTF} 15 | } 16 | \value{ 17 | a list of tibbles with \verb{$transcript_info} and \verb{$gene_info} elements 18 | } 19 | \description{ 20 | This was written for release_28 annotations. This is noted because some 21 | column names seemsed to have changed, ie. "gene_type" instead of 22 | "gene_biotype", etc. Let's see how consistent this is! 23 | } 24 | -------------------------------------------------------------------------------- /man/create_assay_feature_descriptor.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/assay-data.R 3 | \name{create_assay_feature_descriptor} 4 | \alias{create_assay_feature_descriptor} 5 | \title{Creates a feature descriptor for interactive ease} 6 | \usage{ 7 | create_assay_feature_descriptor(x, features = NULL, assay_name = NULL) 8 | } 9 | \arguments{ 10 | \item{x}{FacileDataSet} 11 | 12 | \item{features}{a character string of fearture ids (requires assay_name) 13 | or a data.frame with feature_id column.} 14 | 15 | \item{assay_name}{the assay to get the featurespace from. if this is provided, 16 | it will trump an already existing assay_name column in \code{features}} 17 | } 18 | \value{ 19 | a feature descriptor with feature_id and assay_name, which can be 20 | used to absolutely find features 21 | } 22 | \description{ 23 | Creates a data.frame of features and assays they come from 24 | } 25 | -------------------------------------------------------------------------------- /man/check_facile_data_set.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/validation.R 3 | \name{check_facile_data_set} 4 | \alias{check_facile_data_set} 5 | \alias{assert_facile_data_set} 6 | \alias{test_facile_data_set} 7 | \title{Check if argument is a FacileDataSet} 8 | \usage{ 9 | check_facile_data_set(x, ...) 10 | 11 | assert_facile_data_set(x, ..., .var.name = vname(x), add = NULL) 12 | 13 | test_facile_data_set(x, ...) 14 | } 15 | \arguments{ 16 | \item{x}{The object to check.} 17 | 18 | \item{...}{to be determined later} 19 | 20 | \item{.var.name}{Name of the checked object to print in assertions. Defaults 21 | to the heuristic implemented in \code{\link[checkmate:vname]{checkmate::vname()}}.} 22 | 23 | \item{add}{An \code{\link[checkmate:AssertCollection]{checkmate::AssertCollection()}} object. Default is \code{NULL}.} 24 | } 25 | \description{ 26 | Check if argument is a FacileDataSet 27 | } 28 | -------------------------------------------------------------------------------- /man/conform_data_frame.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utilities.R 3 | \name{conform_data_frame} 4 | \alias{conform_data_frame} 5 | \title{Arranges the columns of one data.frame to another} 6 | \usage{ 7 | conform_data_frame(x, to) 8 | } 9 | \arguments{ 10 | \item{x}{a \code{data.frame} that needs to be checked and conformed} 11 | 12 | \item{to}{the prototype \code{data.frame} that \code{x} needs to be aligned 13 | against.} 14 | } 15 | \value{ 16 | the \code{tibble} version of \code{x} that is arranged to look 17 | like \code{to}. 18 | } 19 | \description{ 20 | This function is primarily used to add data to the FacileDataSet's SQLite 21 | database. \code{x} is new data to add, and \code{to} is the a table of 22 | the form that is expected in the database. We check that the columns of 23 | \code{x} are a superset of columns in \code{x} and the matching columns are 24 | all of the same class. 25 | } 26 | -------------------------------------------------------------------------------- /man/eav_encode_covariate.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/entity-attribute-value.R 3 | \name{eav_encode_covariate} 4 | \alias{eav_encode_covariate} 5 | \title{Encodes column(s) from \code{pData} into character values} 6 | \usage{ 7 | eav_encode_covariate(dat, covariate_def, aname = "variable") 8 | } 9 | \arguments{ 10 | \item{covariate_def}{the single-list-definition of this covariate} 11 | 12 | \item{pdata}{the \code{pData} \code{data.frame}} 13 | 14 | \item{vname}{the name of the attribute column in the eav table} 15 | } 16 | \value{ 17 | a four-column \code{data.frame} (dataset,sample_id,variable,value) 18 | with the encoded covariate into a single \code{value} column. 19 | } 20 | \description{ 21 | This function is not exported, and should only be called from within the 22 | \code{\link[=as.EAVtable]{as.EAVtable()}} function because we rely on validity checks that are 23 | happening there. 24 | } 25 | -------------------------------------------------------------------------------- /man/organism.FacileDataSet.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FacileDataSet.R 3 | \name{organism.FacileDataSet} 4 | \alias{organism.FacileDataSet} 5 | \title{Retrieves the organism the data is defined over} 6 | \usage{ 7 | \method{organism}{FacileDataSet}(x) 8 | } 9 | \value{ 10 | \code{"Homo sapiens"}, \code{"Mus musculus"}, etc. 11 | } 12 | \description{ 13 | A FacileDataStore is only expected to hold data for one organism. 14 | } 15 | \seealso{ 16 | Other API: 17 | \code{\link{fetch_assay_score.FacileDataSet}()}, 18 | \code{\link{fetch_custom_sample_covariates.FacileDataSet}()}, 19 | \code{\link{fetch_sample_covariates}()}, 20 | \code{\link{fetch_sample_statistics.FacileDataSet}()}, 21 | \code{\link{fetch_samples.FacileDataSet}()}, 22 | \code{\link{filter_features.FacileDataSet}()}, 23 | \code{\link{filter_samples.FacileDataSet}()}, 24 | \code{\link{samples.FacileDataSet}()} 25 | } 26 | \concept{API} 27 | -------------------------------------------------------------------------------- /man/covariate_meta_info.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/entity-attribute-value.R 3 | \name{covariate_meta_info} 4 | \alias{covariate_meta_info} 5 | \title{Retrieve the meta information about a covariate for EAV decoding} 6 | \usage{ 7 | covariate_meta_info(covariate, .fds, covdefs = NULL) 8 | } 9 | \arguments{ 10 | \item{covariate}{the name of the covariate} 11 | 12 | \item{.fds}{the \code{FacileDataSet}} 13 | 14 | \item{covdefs}{The \code{covariate_definitions(.fds)} list} 15 | } 16 | \value{ 17 | a list of covariate information with the following elements: 18 | \verb{$name}, \verb{$type}, \verb{$class}, \verb{$description}, 19 | \verb{$label}, \verb{$is.factor}, (and maybe \verb{$levels}) 20 | } 21 | \description{ 22 | Mappings that define attribute-value encodings into R-native objects are 23 | stored in a \code{FacileDataSet}'s \code{meta.yaml} file, in the \code{sample_covariate} 24 | section. 25 | } 26 | -------------------------------------------------------------------------------- /man/check_facile_data_store.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/validation.R 3 | \name{check_facile_data_store} 4 | \alias{check_facile_data_store} 5 | \alias{assert_facile_data_store} 6 | \alias{test_facile_data_store} 7 | \title{Check if argument is a FacileDataStore} 8 | \usage{ 9 | check_facile_data_store(x, ...) 10 | 11 | assert_facile_data_store(x, ..., .var.name = vname(x), add = NULL) 12 | 13 | test_facile_data_store(x, ...) 14 | } 15 | \arguments{ 16 | \item{x}{The object to check.} 17 | 18 | \item{...}{to be determined later} 19 | 20 | \item{.var.name}{Name of the checked object to print in assertions. Defaults 21 | to the heuristic implemented in \code{\link[checkmate:vname]{checkmate::vname()}}.} 22 | 23 | \item{add}{An \code{\link[checkmate:AssertCollection]{checkmate::AssertCollection()}} object. Default is \code{NULL}.} 24 | } 25 | \description{ 26 | Check if argument is a FacileDataStore 27 | } 28 | -------------------------------------------------------------------------------- /man/test-helpers.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/test-helpers.R 3 | \name{example_sample_covariates} 4 | \alias{example_sample_covariates} 5 | \alias{example_meta} 6 | \alias{example_sample_covariate_definitions} 7 | \title{Fetches exemplar data for unit testing} 8 | \usage{ 9 | example_sample_covariates() 10 | 11 | example_meta(file.path = FALSE) 12 | 13 | example_sample_covariate_definitions() 14 | } 15 | \arguments{ 16 | \item{file.path}{If \code{TRUE}, returns the path to the yaml file, otherwise 17 | returns the list-of-list meta definition.} 18 | } 19 | \value{ 20 | Either the list-of-list meta definition, or path to the \code{meta.yaml} 21 | file where these are defined. 22 | 23 | the list-of-list definitions for the example \code{pData} returned from 24 | \code{\link[=example_sample_covariates]{example_sample_covariates()}} 25 | } 26 | \description{ 27 | Fetches exemplar data for unit testing 28 | } 29 | -------------------------------------------------------------------------------- /man/ds_annot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/as.FacileDataSet.R 3 | \name{ds_annot} 4 | \alias{ds_annot} 5 | \title{Bioc-container specific data set annotation extraction functions} 6 | \usage{ 7 | ds_annot(x, meta = NULL, validate = FALSE, ...) 8 | } 9 | \arguments{ 10 | \item{x}{SummarizedExperiment, ExpressionSet or DGEList} 11 | 12 | \item{meta}{a list of description stuff for the dataset, this can act to 13 | override what's there, already} 14 | 15 | \item{validate}{single logical, check results} 16 | 17 | \item{...}{additional args (ignored for now)} 18 | } 19 | \description{ 20 | Takes dataset-level annotion as stored by each type. DGEList has 21 | no such slot, unfortunately, and thus gets the default. SE has a 22 | metadata slot and can provide url and description. eSet just has 23 | a character annotation and can provide a description. 24 | } 25 | \details{ 26 | This is an internal helper function. 27 | } 28 | -------------------------------------------------------------------------------- /man/append_facile_table.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/db-and-table-functions.R 3 | \name{append_facile_table} 4 | \alias{append_facile_table} 5 | \title{Adds rows to a table in a FacileDataSet} 6 | \usage{ 7 | append_facile_table(dat, x, table_name, warn_existing = FALSE) 8 | } 9 | \arguments{ 10 | \item{dat}{the \code{data.frame} of rows to add to the table, which must 11 | have a superset of columns present in the \code{table_name} that is being 12 | appended to} 13 | 14 | \item{x}{the \code{FacileDataSet}} 15 | 16 | \item{table_name}{the name of the table in \code{x} to add the rows of 17 | \code{dat} to.} 18 | } 19 | \value{ 20 | invisibly returns the conformed version of \code{dat}. 21 | } 22 | \description{ 23 | This function first checks the data in the target table \code{table_name} 24 | to ensure that rows in \code{dat} that exist in \code{table_name} (by 25 | checking the primary key) are not added. 26 | } 27 | -------------------------------------------------------------------------------- /man/parse_sample_criterion.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/samples.R 3 | \name{parse_sample_criterion} 4 | \alias{parse_sample_criterion} 5 | \title{Creates a filter expression to select samples based on value of a covariate} 6 | \usage{ 7 | parse_sample_criterion(variable, value) 8 | } 9 | \arguments{ 10 | \item{variable}{the name of the variable to look for in the sample_covariate 11 | \code{variable} column} 12 | 13 | \item{value}{\code{character} vector of values for the \code{variable} that 14 | you want your samples to have.} 15 | } 16 | \value{ 17 | a 18 | } 19 | \description{ 20 | This leverages dplyr's standard (vs non-standard) evaluation mojo. There is 21 | likely a cleaner way to do this, but to be honest I still find the 22 | \code{\link[lazyeval]{interp}} stuff rather confusing 23 | } 24 | \seealso{ 25 | \href{https://cran.r-project.org/web/packages/dplyr/vignettes/nse.html}{dplyr non-standard evaluation} 26 | } 27 | -------------------------------------------------------------------------------- /man/assemble_example_dataset.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/assemble_example_dataset.R 3 | \name{assemble_example_dataset} 4 | \alias{assemble_example_dataset} 5 | \title{Assembles an example facile dataset to play with} 6 | \usage{ 7 | assemble_example_dataset( 8 | directory = tempdir(), 9 | name = "ExampleRnaFacileDataSet" 10 | ) 11 | } 12 | \arguments{ 13 | \item{directory}{The name of the parent directory to hold the dataset} 14 | 15 | \item{name}{A subdirectory within \code{directory} will be created using this 16 | name.} 17 | } 18 | \value{ 19 | The FacileDataSet object itself. 20 | } 21 | \description{ 22 | This combines the airway and parathyroidSE RNA-seq datasets into a single 23 | FacileDataSet. 24 | } 25 | \details{ 26 | The code here is extracted from the \code{FacileDataSet-assembly} vignette. Please 27 | read that for some of the why's and how's of the decisions made here when 28 | assembling datasets. 29 | } 30 | -------------------------------------------------------------------------------- /man/fds.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/api.R 3 | \name{fds} 4 | \alias{fds} 5 | \alias{fds.FacileDataStore} 6 | \alias{fds.default} 7 | \alias{fds<-} 8 | \alias{fds<-.tbl} 9 | \alias{fds<-.data.frame} 10 | \alias{set_fds} 11 | \title{Get or set the FacileDataStore for an object} 12 | \usage{ 13 | fds(x, ...) 14 | 15 | \method{fds}{FacileDataStore}(x) 16 | 17 | \method{fds}{default}(x, ...) 18 | 19 | fds(x) <- value 20 | 21 | \method{fds}{tbl}(x) <- value 22 | 23 | \method{fds}{data.frame}(x) <- value 24 | 25 | set_fds(x, value) 26 | } 27 | \arguments{ 28 | \item{x}{the object} 29 | 30 | \item{value}{The \code{FacileDataStore} object} 31 | } 32 | \description{ 33 | FacileDataStores are passed along with most every object generated from 34 | functions in the facilebio universe. This makes it convenient to dig back 35 | into a large genomics objects to retrieve data from "slim" results, like 36 | a sample covariate data.frame. 37 | } 38 | -------------------------------------------------------------------------------- /man/dot-level_biotypes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/extract_transcribed_info_from_ensembl_gtf.R 3 | \name{.level_biotypes} 4 | \alias{.level_biotypes} 5 | \title{Utility function to "factorize" biotypes into an order we care about.} 6 | \usage{ 7 | .level_biotypes(x) 8 | } 9 | \arguments{ 10 | \item{x}{a \code{character} vector of biotypes} 11 | } 12 | \value{ 13 | a factor version of \code{x}, with \code{levels(x)} in approximately the order 14 | we care about. 15 | } 16 | \description{ 17 | ENSEMBL GTFs provide biotype information for genes/transcripts. These are 18 | things like "3prime_overlapping_ncRNA", "antisense", ..., "protein_coding", 19 | etc. This function turns the "biotype"-vector \code{x} into a factor with levels 20 | in (roughly) the order we care to "unique"-ify these levels. Ie. if a gene 21 | has a "protein_coding" annotation, we will care to keep that one over one 22 | of its annotations which categorize it as a "processed_transcript" 23 | } 24 | -------------------------------------------------------------------------------- /man/samples.FacileDataSet.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FacileDataSet.R 3 | \name{samples.FacileDataSet} 4 | \alias{samples.FacileDataSet} 5 | \title{Retrieves the sample identifiers for all samples in a FacileDataSet.} 6 | \usage{ 7 | \method{samples}{FacileDataSet}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{a \code{FacileDataSet}} 11 | } 12 | \value{ 13 | tibble of sample attributes 14 | } 15 | \description{ 16 | Sample identifiers are provided as \verb{dataset,sample_id tuples}. 17 | } 18 | \seealso{ 19 | Other API: 20 | \code{\link{fetch_assay_score.FacileDataSet}()}, 21 | \code{\link{fetch_custom_sample_covariates.FacileDataSet}()}, 22 | \code{\link{fetch_sample_covariates}()}, 23 | \code{\link{fetch_sample_statistics.FacileDataSet}()}, 24 | \code{\link{fetch_samples.FacileDataSet}()}, 25 | \code{\link{filter_features.FacileDataSet}()}, 26 | \code{\link{filter_samples.FacileDataSet}()}, 27 | \code{\link{organism.FacileDataSet}()} 28 | } 29 | \concept{API} 30 | -------------------------------------------------------------------------------- /tests/testthat/test-feature-info.R: -------------------------------------------------------------------------------- 1 | context("Feature Info") 2 | 3 | FDS <- exampleFacileDataSet() 4 | genes <- tibble( 5 | feature_id = c("800", "1009", "1289", "50509", "2191", "2335", "5159"), 6 | feature_type = "entrez") 7 | 8 | test_that("with_feature_info grabs the right goods", { 9 | finfo.all <- collect(fetch_feature_info(FDS, "entrez"), n = Inf) 10 | finfo <- with_feature_info(genes, .fds = FDS) 11 | 12 | expected <- left_join(genes, finfo.all, by = c("feature_id", "feature_type")) 13 | expect_equal(finfo, expected, check.attributes = FALSE) 14 | 15 | f2 <- with_feature_info(genes, c("name", "meta"), .fds = FDS) 16 | expect_equal(f2, select(expected, !!colnames(f2)), check.attributes = FALSE) 17 | }) 18 | 19 | test_that("with_feature_info can rename feature covariates", { 20 | expected <- genes %>% 21 | with_feature_info(c("name", "meta"), .fds = FDS) %>% 22 | rename(symbol = "name") 23 | 24 | res <- genes %>% 25 | with_feature_info(c(symbol = "name", "meta"), .fds = FDS) 26 | 27 | expect_equal(res, expected) 28 | }) 29 | -------------------------------------------------------------------------------- /tests/testthat/test-assay-normalization.R: -------------------------------------------------------------------------------- 1 | context("Normalizaiton of assay data") 2 | 3 | if (!exists("FDS")) FDS <- exampleFacileDataSet() 4 | 5 | samples <- FDS %>% 6 | filter_samples(stage == "III") %>% 7 | select(dataset, sample_id) 8 | 9 | genes <- c( 10 | PRF1='5551', 11 | GZMA='3001', 12 | CD274='29126', 13 | TIGIT='201633') 14 | 15 | features <- tibble(assay='rnaseq', feature_id=genes) 16 | 17 | test_that("Normalization of rnaseq data is equivalent to edgeR::cpm", { 18 | y <- edgeR::calcNormFactors(as.DGEList(samples)) 19 | cpms <- edgeR::cpm(y, log = TRUE, prior.count = 0.25)[genes,] 20 | 21 | # use the lib.size and norm.factors from this subset of data 22 | samples. <- samples %>% 23 | left_join(select(y$samples, sample_id, lib.size, norm.factors), 24 | by = "sample_id") 25 | 26 | normed <- fetch_assay_data(samples., genes, normalized = TRUE, 27 | prior.count = 0.25, as.matrix = TRUE) 28 | normed <- normed[rownames(cpms), colnames(cpms)] 29 | expect_equal(normed, cpms) 30 | }) 31 | 32 | -------------------------------------------------------------------------------- /man/as_facile_frame.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/facile_frame.R 3 | \name{as_facile_frame} 4 | \alias{as_facile_frame} 5 | \title{Converts a normal tibble/data.frame to a facile_frame} 6 | \usage{ 7 | as_facile_frame( 8 | x, 9 | datastore = fds(x), 10 | classes = NULL, 11 | ..., 12 | .valid_sample_check = TRUE 13 | ) 14 | } 15 | \arguments{ 16 | \item{x}{a sample-like descriptor} 17 | 18 | \item{datastore}{the FacileDataStore tied to x} 19 | 20 | \item{classes}{more classes to append to the outgoing object. The 21 | \code{"facile_frame"} class entry is always the last one of the bunch.} 22 | 23 | \item{...}{dots} 24 | 25 | \item{.valid_sample_check}{If \code{TRUE} (default), will check if \code{x} is a valid 26 | subset of the FacileDataStore \code{.fds}. Internal functions may set this to 27 | \code{TRUE} to avoid the check to (1) save time; and (2) save infinite 28 | recursion in the call to \code{assert_sample_subset}.} 29 | } 30 | \description{ 31 | Converts a normal tibble/data.frame to a facile_frame 32 | } 33 | -------------------------------------------------------------------------------- /man/fetch_assay_score.FacileDataSet.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/assay-data.R 3 | \name{fetch_assay_score.FacileDataSet} 4 | \alias{fetch_assay_score.FacileDataSet} 5 | \title{Helper function to get sample assay data from single or aggregate features} 6 | \usage{ 7 | \method{fetch_assay_score}{FacileDataSet}( 8 | x, 9 | features, 10 | samples = NULL, 11 | assay_name = NULL, 12 | as.matrix = FALSE, 13 | ..., 14 | subset.threshold = 700 15 | ) 16 | } 17 | \description{ 18 | Helper function to get sample assay data from single or aggregate features 19 | } 20 | \seealso{ 21 | Other API: 22 | \code{\link{fetch_custom_sample_covariates.FacileDataSet}()}, 23 | \code{\link{fetch_sample_covariates}()}, 24 | \code{\link{fetch_sample_statistics.FacileDataSet}()}, 25 | \code{\link{fetch_samples.FacileDataSet}()}, 26 | \code{\link{filter_features.FacileDataSet}()}, 27 | \code{\link{filter_samples.FacileDataSet}()}, 28 | \code{\link{organism.FacileDataSet}()}, 29 | \code{\link{samples.FacileDataSet}()} 30 | } 31 | \concept{API} 32 | -------------------------------------------------------------------------------- /man/join_samples.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/samples.R 3 | \name{join_samples} 4 | \alias{join_samples} 5 | \title{Filters the samples down in a dataset to ones specified} 6 | \usage{ 7 | join_samples(x, samples = NULL, semi = FALSE, distinct.samples = FALSE) 8 | } 9 | \arguments{ 10 | \item{x}{likely a \code{tbl_sqlite} object, but a \code{tbl_df}-like 11 | object should work as well.} 12 | 13 | \item{samples}{a sample descriptor \code{tbl_df}-like object (likely a 14 | \code{tbl_sqlite} object) that has \code{"dataset"} and \code{"samle_id"} 15 | columns.} 16 | 17 | \item{semi}{if \code{TRUE}, appropximates a semi-join on the \code{samples}, 18 | otherwise does an inner_join between \code{x} and \code{samples} 19 | (default \code{FALSE}).} 20 | } 21 | \value{ 22 | joined result between \code{x} and \code{samples} 23 | } 24 | \description{ 25 | Tables like \code{expression} and \code{sample_covariate} house different 26 | datapoints per sample, and we often want to only retreive data points over 27 | a subset of samples. 28 | } 29 | -------------------------------------------------------------------------------- /man/append_facile_feature_info.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/construction.R 3 | \name{append_facile_feature_info} 4 | \alias{append_facile_feature_info} 5 | \title{Appends new features to \code{feature_info} table} 6 | \usage{ 7 | append_facile_feature_info( 8 | x, 9 | feature_info, 10 | type = feature_info$feature_type, 11 | warn_existing = FALSE 12 | ) 13 | } 14 | \arguments{ 15 | \item{x}{The \code{FacileDataSet}} 16 | 17 | \item{feature_info}{a table of new features that provides all columns 18 | in \code{feature_info_tbl(x)}} 19 | 20 | \item{type}{A way to override (or set) the \code{feature_type} column of the 21 | \code{feature_info} table} 22 | } 23 | \value{ 24 | invisible returns an annotated version of the \code{feature_info} 25 | table with an \code{$added} column with \code{TRUE/FALSE} values for the 26 | features that were new (and added) to the repository or \code{FALSE} to 27 | indicate that they were already in the database. 28 | } 29 | \description{ 30 | This function only adds features (feature_type, feature_id) that are not 31 | in the \code{feature_info} table already 32 | } 33 | -------------------------------------------------------------------------------- /man/assay_feature_info.FacileDataSet.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/assay-data.R 3 | \name{assay_feature_info.FacileDataSet} 4 | \alias{assay_feature_info.FacileDataSet} 5 | \title{Materializes a table with all feature information for a given assay.} 6 | \usage{ 7 | \method{assay_feature_info}{FacileDataSet}(x, assay_name, feature_ids = NULL, ...) 8 | } 9 | \arguments{ 10 | \item{x}{\code{FacileDataSet}} 11 | 12 | \item{assay_name}{the name of the assay} 13 | 14 | \item{feature_ids}{a character vector of feature_ids} 15 | } 16 | \value{ 17 | a \code{tbl_sqlite} result with the feature information for the 18 | features in a specified assay 19 | } 20 | \description{ 21 | DEBUG: This logic is unnecessarily complex because I make sure to collect 22 | all tables from the database as opposed to copying external tables in and 23 | doing an inner_join in the database. I'm doing this becuase we are getting 24 | name collisions on some of the temporary tables. We get errors like: 25 | Warning: Error in : Table pkdtpohpsu already exists. 26 | } 27 | \details{ 28 | This fetches the hdf5_index for the assays as well 29 | } 30 | -------------------------------------------------------------------------------- /man/assay_info.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/api.R 3 | \name{assay_info} 4 | \alias{assay_info} 5 | \title{Fetches assay meta information for the assays stored in a FacileDataStore} 6 | \usage{ 7 | assay_info(x, assay_name = NULL, ...) 8 | } 9 | \arguments{ 10 | \item{x}{A \code{FacileDataStore}} 11 | 12 | \item{assay_name}{optional name of the assay to get information for} 13 | } 14 | \value{ 15 | a tibble of meta information for the assays stored in \code{x}, with these 16 | columns: 17 | \itemize{ 18 | \item \verb{assay }: Name of the assay 19 | \item \verb{assay_type }: \code{"rnaseq"}, \code{"lognorm"}, etc. Look at 20 | \code{FacileData:::.assay.types} vector 21 | \item \verb{feature_type }: A string from \code{FacileData:::.feature.types}, ie. 22 | \code{"ensgid"}, \code{"entrez"}, \code{"custom"}, etc. 23 | \item \verb{description }: string description 24 | \item \verb{nfeatures }: number of features we have info for 25 | \item \verb{storage_mode }: \code{"integer"}, \code{"numeric"} 26 | } 27 | } 28 | \description{ 29 | Fetches assay meta information for the assays stored in a FacileDataStore 30 | } 31 | -------------------------------------------------------------------------------- /man/save_custom_sample_covariates.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sample-covariates.R 3 | \name{save_custom_sample_covariates} 4 | \alias{save_custom_sample_covariates} 5 | \title{Saves custom sample covariates to a FacileDataSet} 6 | \usage{ 7 | save_custom_sample_covariates( 8 | x, 9 | annotation, 10 | name = NULL, 11 | class = "categorical", 12 | custom_key = Sys.getenv("USER"), 13 | file.prefix = "facile", 14 | sample_filter_critera = NULL 15 | ) 16 | } 17 | \arguments{ 18 | \item{x}{the \code{FacileDataSet}} 19 | 20 | \item{annotation}{the annotation table of covariate values to a 21 | sample-descriptor-like table} 22 | 23 | \item{name}{the variable name of the covariate} 24 | 25 | \item{custom_key}{the custom key (likely userid) for the annotation} 26 | 27 | \item{file.prefix}{Vincent uses this} 28 | 29 | \item{sample_filter_criteria}{optional list of filtering criteria that were 30 | used to drill down into the samples we have the \code{annotatino} 31 | data.frame for 32 | TODO: Figure out how to encode sample_filter_criteria into serialized 33 | (JSON) annotation file} 34 | } 35 | \description{ 36 | Saves custom sample covariates to a FacileDataSet 37 | } 38 | -------------------------------------------------------------------------------- /man/filter_features.FacileDataSet.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/NSE-filter-features.R 3 | \name{filter_features.FacileDataSet} 4 | \alias{filter_features.FacileDataSet} 5 | \title{Filter against the sample_covariate_tbl as if it were wide.} 6 | \usage{ 7 | \method{filter_features}{FacileDataSet}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{A \code{FacileDataSet}} 11 | 12 | \item{...}{NSE claused to use in \code{\link[dplyr]{filter}} expressions} 13 | } 14 | \description{ 15 | This feature is only really meant to be used 16 | interactively, and with extreme caution ... programatically specifying 17 | column names in feature table, for instance, does not work right now. 18 | } 19 | \details{ 20 | TODO: Use tidyeval 21 | } 22 | \seealso{ 23 | Other API: 24 | \code{\link{fetch_assay_score.FacileDataSet}()}, 25 | \code{\link{fetch_custom_sample_covariates.FacileDataSet}()}, 26 | \code{\link{fetch_sample_covariates}()}, 27 | \code{\link{fetch_sample_statistics.FacileDataSet}()}, 28 | \code{\link{fetch_samples.FacileDataSet}()}, 29 | \code{\link{filter_samples.FacileDataSet}()}, 30 | \code{\link{organism.FacileDataSet}()}, 31 | \code{\link{samples.FacileDataSet}()} 32 | } 33 | \concept{API} 34 | -------------------------------------------------------------------------------- /man/reexports.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/package.R 3 | \docType{import} 4 | \name{reexports} 5 | \alias{reexports} 6 | \alias{tidy} 7 | \alias{\%>\%} 8 | \alias{arrange} 9 | \alias{collect} 10 | \alias{distinct} 11 | \alias{filter} 12 | \alias{group_by} 13 | \alias{mutate} 14 | \alias{select} 15 | \alias{transmute} 16 | \alias{ungroup} 17 | \alias{left_join} 18 | \alias{inner_join} 19 | \alias{semi_join} 20 | \title{Objects exported from other packages} 21 | \keyword{internal} 22 | \description{ 23 | These objects are imported from other packages. Follow the links 24 | below to see their documentation. 25 | 26 | \describe{ 27 | \item{broom}{\code{\link[broom:reexports]{tidy}}} 28 | 29 | \item{dplyr}{\code{\link[dplyr:reexports]{\%>\%}}, \code{\link[dplyr]{arrange}}, \code{\link[dplyr:compute]{collect}}, \code{\link[dplyr]{distinct}}, \code{\link[dplyr]{filter}}, \code{\link[dplyr]{group_by}}, \code{\link[dplyr:mutate-joins]{inner_join}}, \code{\link[dplyr:mutate-joins]{left_join}}, \code{\link[dplyr]{mutate}}, \code{\link[dplyr]{select}}, \code{\link[dplyr:filter-joins]{semi_join}}, \code{\link[dplyr:mutate]{transmute}}, \code{\link[dplyr:group_by]{ungroup}}} 30 | }} 31 | 32 | -------------------------------------------------------------------------------- /man/cSurv.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/csurv.R 3 | \name{cSurv} 4 | \alias{cSurv} 5 | \alias{as_cSurv} 6 | \alias{as_Surv} 7 | \title{cSurv is a character representation of survival::surv ###} 8 | \usage{ 9 | as_cSurv(from) 10 | 11 | as_Surv(from) 12 | } 13 | \description{ 14 | cSurv serves as a more reliable way to use Surv objects as data.frame columns. A 15 | data.frame is supposed to be able to hold Surv columns. There are multiple special 16 | cases written into base for this. It seems the implementation is incomplete as 17 | subsetting the DF breaks the Surv object. cSurv cannot do anything but get subset 18 | and become a Surv again. In the FacileVerse we hold Surv objects as cSurv, which 19 | allows us to survive a round-trip through an EAV sample metadata table. Survival 20 | analyses can convert cSurv to Surv as needed. It is assumed that all Surv censoring 21 | is right-censored. 22 | } 23 | \examples{ 24 | library(survival) 25 | x = Surv(c(14,12,3), event = c(1,0,1)) 26 | y = as(x,"cSurv") 27 | z = as(y, "Surv") 28 | x2 = as.character(x) 29 | z2 = as(x2, "Surv") 30 | a = as(x, "cSurv") 31 | b = as(a, "character") 32 | c = as(b, "cSurv") 33 | d = as(c, "Surv") 34 | } 35 | \concept{cSurv} 36 | -------------------------------------------------------------------------------- /man/as.EAVtable.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/entity-attribute-value.R 3 | \name{as.EAVtable} 4 | \alias{as.EAVtable} 5 | \title{Convert a \code{pData} data.frame to a melted EAV table} 6 | \usage{ 7 | as.EAVtable( 8 | x, 9 | ignore = c("dataset", "sample_id"), 10 | covariate_def = list(), 11 | na.rm = TRUE 12 | ) 13 | } 14 | \arguments{ 15 | \item{x}{a wide \code{pData} data.frame} 16 | 17 | \item{covariate_def}{passed to \code{\link[=eav_metadata_create]{eav_metadata_create()}} that is used to 18 | override default covariate definitions extracted from the columns of \code{x}} 19 | } 20 | \value{ 21 | a melted EAV table from \code{x} 22 | } 23 | \description{ 24 | Transforms a wide \code{pData} data.frame into a melted EAV table for use in 25 | a \code{FacileDataSet}. This function will also produce the list-of-list encodings 26 | that are generated from \code{\link[=eav_metadata_create]{eav_metadata_create()}} to do its thing as an 27 | attribute of the returned object. 28 | } 29 | \details{ 30 | If you want to provide custom definitions for the covariates in the EAVtable 31 | that are different than the ones generated in \code{\link[=eav_metadata_create]{eav_metadata_create()}}, then 32 | provie that definition list in the \code{covariate_def} parameter. 33 | } 34 | -------------------------------------------------------------------------------- /tests/testthat/test-feature-types.R: -------------------------------------------------------------------------------- 1 | context("Features Types") 2 | 3 | test_that("Different classes of identifiers guessed correctly", { 4 | expected <- tribble( 5 | ~id_type, ~id, ~organism, 6 | "refseq", "NC_000023.11", "unknown", 7 | "refseq", "NC_000023.10", "unknown", 8 | "refseq", "NM_001306206.1", "unknown", 9 | "refseq", "NP_001293135.1", "unknown", 10 | "refseq", "NC_000023", "unknown", 11 | "refseq", "NM_001306206", "unknown", 12 | "ens_gene", "ENSG00000101811", "Homo sapiens", 13 | "ens_gene", "ENSMUSG00000030088", "Mus musculus", 14 | "ens_gene", "ENSMUSG00000030088.2", "Mus musculus", 15 | "ens_tx", "ENST00000415585.6", "Homo sapiens", 16 | "ens_tx", "ENSMUST00000113287.7", "Mus musculus", 17 | "ens_tx", "ENSMUST00000113287", "Mus musculus", 18 | "entrez", "85007", "unknown") 19 | 20 | res <- infer_feature_type(expected$id) 21 | expect_equal(res$id, expected$id) 22 | expect_equal(res$id_type, expected$id_type) 23 | # expect_equal(res$source_organism, expected$organism) 24 | }) 25 | -------------------------------------------------------------------------------- /man/fetch_samples.FacileDataSet.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/samples.R 3 | \name{fetch_samples.FacileDataSet} 4 | \alias{fetch_samples.FacileDataSet} 5 | \title{Fetches a sample descriptor that matches the filter criterion} 6 | \usage{ 7 | \method{fetch_samples}{FacileDataSet}(x, samples = NULL, assay = "rnaseq", ...) 8 | } 9 | \arguments{ 10 | \item{x}{A \code{FacileDataRepository}} 11 | 12 | \item{...}{the NSE boolean filter criteria} 13 | } 14 | \value{ 15 | a facile sample descriptor 16 | } 17 | \description{ 18 | Use \code{...} as if this is a dplyr::filter call, and our 19 | sample_covariate_tbl was "wide". 20 | } 21 | \details{ 22 | This is experimental, so each "term" in the filter criteria should be 23 | just one boolean operation. Multiple terms passed into \code{...} will be 24 | "AND"ed together. 25 | } 26 | \seealso{ 27 | Other API: 28 | \code{\link{fetch_assay_score.FacileDataSet}()}, 29 | \code{\link{fetch_custom_sample_covariates.FacileDataSet}()}, 30 | \code{\link{fetch_sample_covariates}()}, 31 | \code{\link{fetch_sample_statistics.FacileDataSet}()}, 32 | \code{\link{filter_features.FacileDataSet}()}, 33 | \code{\link{filter_samples.FacileDataSet}()}, 34 | \code{\link{organism.FacileDataSet}()}, 35 | \code{\link{samples.FacileDataSet}()} 36 | } 37 | \concept{API} 38 | -------------------------------------------------------------------------------- /man/fetch_custom_sample_covariates.FacileDataSet.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sample-covariates.R 3 | \name{fetch_custom_sample_covariates.FacileDataSet} 4 | \alias{fetch_custom_sample_covariates.FacileDataSet} 5 | \title{Fetches custom (user) annotations for a given user prefix} 6 | \usage{ 7 | \method{fetch_custom_sample_covariates}{FacileDataSet}( 8 | x, 9 | samples = NULL, 10 | covariates = NULL, 11 | custom_key = Sys.getenv("USER"), 12 | with_source = FALSE, 13 | file.prefix = "facile", 14 | ... 15 | ) 16 | } 17 | \arguments{ 18 | \item{samples}{the facile sample descriptor} 19 | 20 | \item{custom_key}{The key to use for the custom annotation} 21 | 22 | \item{fds}{The \code{FacileDataSet}} 23 | } 24 | \value{ 25 | covariate tbl 26 | } 27 | \description{ 28 | Fetches custom (user) annotations for a given user prefix 29 | } 30 | \seealso{ 31 | Other API: 32 | \code{\link{fetch_assay_score.FacileDataSet}()}, 33 | \code{\link{fetch_sample_covariates}()}, 34 | \code{\link{fetch_sample_statistics.FacileDataSet}()}, 35 | \code{\link{fetch_samples.FacileDataSet}()}, 36 | \code{\link{filter_features.FacileDataSet}()}, 37 | \code{\link{filter_samples.FacileDataSet}()}, 38 | \code{\link{organism.FacileDataSet}()}, 39 | \code{\link{samples.FacileDataSet}()} 40 | } 41 | \concept{API} 42 | -------------------------------------------------------------------------------- /man/infer_feature_type.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/feature-types.R 3 | \name{infer_feature_type} 4 | \alias{infer_feature_type} 5 | \title{Guesses the type of feature identifiers from a character vector.} 6 | \usage{ 7 | infer_feature_type(x, with_organism = FALSE, ...) 8 | } 9 | \arguments{ 10 | \item{x}{a character vector of ids} 11 | } 12 | \value{ 13 | data.frame with \code{id} (\code{x}) and \code{id_type}. If \code{with_organism = TRUE}, 14 | a third \code{organism} column is added with a guess for the organism. 15 | } 16 | \description{ 17 | We rely on meta-information about our data types than "usual", and its useful 18 | to know what types of identifiers we are using for different assay. This 19 | function tries to guess whether an identifier is an ensembl gene identifier, 20 | entrez id, etc. 21 | } 22 | \details{ 23 | A two-column data.frame is returned for id_type and organism. Organism 24 | is "unknown" for identifiers where there this can't be inferred (like Refseq). 25 | 26 | If an identifier matches more than one id_type, the id_type is set to 27 | \code{"ambiguous"}. If the identifier doesn't match any guesses, then \code{"unknown"}. 28 | } 29 | \examples{ 30 | fids <- c("NC_000023", "ENSG00000101811", "ENSMUSG00000030088.2", "85007") 31 | infer_feature_type(fids) 32 | } 33 | -------------------------------------------------------------------------------- /man/check_categorical.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/validation.R 3 | \name{check_categorical} 4 | \alias{check_categorical} 5 | \alias{assert_categorical} 6 | \alias{test_categorical} 7 | \title{Check to see if a vector is categorical (character or string)} 8 | \usage{ 9 | check_categorical( 10 | x, 11 | any.missing = TRUE, 12 | all.missing = TRUE, 13 | len = NULL, 14 | min.len = NULL, 15 | max.len = NULL, 16 | ... 17 | ) 18 | 19 | assert_categorical( 20 | x, 21 | any.missing = TRUE, 22 | all.missing = TRUE, 23 | len = NULL, 24 | min.len = NULL, 25 | max.len = NULL, 26 | ..., 27 | .var.name = vname(x), 28 | add = NULL 29 | ) 30 | 31 | test_categorical(x, ...) 32 | } 33 | \arguments{ 34 | \item{x}{a vector of things} 35 | 36 | \item{any.missing}{are vectors with missing values allowed? Default is \code{TRUE}} 37 | 38 | \item{all.missing}{are vectors with missing values allowed? Default is \code{TRUE}} 39 | 40 | \item{len}{expected length of \code{x}. If provided, overrides \code{min.len} and 41 | \code{max.len}. Defaults to \code{NULL}.} 42 | 43 | \item{min.len}{minimum length for \code{x}} 44 | 45 | \item{max.len}{maximum length for \code{x}} 46 | 47 | \item{...}{dots} 48 | } 49 | \description{ 50 | Check to see if a vector is categorical (character or string) 51 | } 52 | -------------------------------------------------------------------------------- /man/eav_metadata_merge.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/entity-attribute-value.R 3 | \name{eav_metadata_merge} 4 | \alias{eav_metadata_merge} 5 | \title{Merge inferred and explicit covariate column metadata.} 6 | \usage{ 7 | eav_metadata_merge(default_def, covariate_def = list()) 8 | } 9 | \arguments{ 10 | \item{default_def}{A list of covariate-definition-lists, as would be returned 11 | from \code{\link[=eav_metadata_create]{eav_metadata_create()}}} 12 | 13 | \item{covariate_def}{list of additional covariate info, such as 'label'. The 14 | variables defined here (defined by \code{names(covarate_def)}) need not be 15 | identical to \code{names(defeault_def)}.} 16 | } 17 | \value{ 18 | list of column metadata lists 19 | } 20 | \description{ 21 | Takes a list of (perhaps) default sets of entity-attribute metadata, as would 22 | be generated from \code{eav_metadata_create(pData(eSet), covariate_def = NULL)}, 23 | and pulls out the sister custom-definitions from the \code{covariate_def} 24 | attribute definition list. 25 | } 26 | \details{ 27 | If the \code{covariate_def} list-of-lists has information for variables not 28 | found in \code{default_def}, ie. the definitions returned from 29 | \code{setdiff(names(covariate_def), names(default_def))} will be added to 30 | the object returned from this funciton. 31 | } 32 | -------------------------------------------------------------------------------- /man/facet_frame.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FacileDataSet.R, R/api.R 3 | \name{facet_frame.FacileDataSet} 4 | \alias{facet_frame.FacileDataSet} 5 | \alias{facet_frame} 6 | \title{Retrieves grouping table for samples within a FacileDataSet.} 7 | \usage{ 8 | \method{facet_frame}{FacileDataSet}(x, name = "default", ...) 9 | 10 | facet_frame(x, name = "default", ...) 11 | } 12 | \arguments{ 13 | \item{x}{An object of a class implementing the FacileInterface} 14 | 15 | \item{name}{The specific facet (grouping) definition to return. Note that 16 | this parameter isn't yet used. Only one facet table was originally 17 | defined for each FacileDataSet, but we want to enable different facet 18 | definitions to be used in the future.} 19 | } 20 | \value{ 21 | A \code{tibble} that defines the \verb{dataset,sample_id} tuples that belong 22 | to each "facet" (group). 23 | } 24 | \description{ 25 | It is natural to define subgroups of samples within larger datasets. 26 | This function returns grouping definitions (which we call "facets") for 27 | a \code{FacileDataStore}. 28 | } 29 | \seealso{ 30 | Other FacileInterface: 31 | \code{\link{fetch_assay_score}()}, 32 | \code{\link{fetch_sample_covariates}()}, 33 | \code{\link{fetch_sample_statistics}()}, 34 | \code{\link{samples}()} 35 | } 36 | \concept{FacileInterface} 37 | -------------------------------------------------------------------------------- /tests/testthat/test-FacileDataSet.R: -------------------------------------------------------------------------------- 1 | context("Basic FacileDataSet functions") 2 | 3 | FDS <- exampleFacileDataSet() 4 | 5 | test_that("Fetching various database tables from FacileDataSet", { 6 | sctable <- sample_covariate_tbl(FDS) 7 | expect_true(is(sctable, 'tbl')) 8 | 9 | sstable <- sample_stats_tbl(FDS) 10 | expect_true(is(sstable, 'tbl')) 11 | 12 | gitable <- gene_info_tbl(FDS) 13 | expect_true(is(gitable, 'tbl')) 14 | }) 15 | 16 | test_that("compound filter criteria == method chaining with filter_samples()", { 17 | s1 <- FDS %>% 18 | filter_samples(indication == "CRC", sex == "f") 19 | s2 <- FDS %>% 20 | filter_samples(indication == "CRC") %>% 21 | filter_samples(sex == "f") 22 | expect_setequal(s2$sample_id, s1$sample_id) 23 | }) 24 | 25 | test_that("filter_samples filters against dataset and sample_id columns", { 26 | all.samples <- FDS %>% 27 | samples() %>% 28 | with_sample_covariates() 29 | 30 | blca.f <- filter_samples(FDS, dataset == "BLCA", sex == "f") 31 | blca.e <- filter(all.samples, dataset == "BLCA", sex == "f") 32 | expect_setequal(blca.f$sample_id, blca.e$sample_id) 33 | 34 | some.ids <- sample(all.samples$sample_id, 5) 35 | some.f <- filter_samples(FDS, sample_id %in% some.ids) 36 | some.e <- filter(all.samples, sample_id %in% some.ids) 37 | expect_setequal(some.f$sample_id, some.e$sample_id) 38 | }) 39 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - main 5 | paths: 6 | - 'R/**' 7 | - 'DESCRIPTION' 8 | - 'NAMESPACE' 9 | - 'src/**' 10 | - 'tests/**' 11 | pull_request: 12 | schedule: 13 | # Rerun checks daily, after facilebio/facilebio_base_extra is rebuilt. 14 | # This will catch failures due to changes in functionality of packages 15 | # we depend on. 16 | # facilebio_base_extra builds every day 4AM pacific time (11 AM UTC), so 17 | # we will build this an hour after that (the same time facilebio/facilebio) 18 | - cron: '0 12 * * *' 19 | 20 | name: R-CMD-check 21 | 22 | jobs: 23 | R-CMD-check: 24 | runs-on: ubuntu-latest 25 | container: facilebio/facilebio 26 | env: 27 | ACTIONS_ALLOW_UNSECURE_COMMANDS: true 28 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true 29 | 30 | steps: 31 | - name: Checkout Repository 32 | uses: actions/checkout@v2 33 | 34 | - name: Setup R 35 | uses: r-lib/actions/setup-r@v1 36 | with: 37 | install-r: false 38 | 39 | - name: Install dependencies 40 | run: remotes::install_deps(dependencies = TRUE, upgrade = FALSE) 41 | shell: Rscript {0} 42 | 43 | - name: Check 44 | run: rcmdcheck::rcmdcheck(args = "--no-manual", error_on = "error") 45 | shell: Rscript {0} 46 | -------------------------------------------------------------------------------- /man/spread_assay_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/assay-data.R 3 | \name{spread_assay_data} 4 | \alias{spread_assay_data} 5 | \title{Takes a result from fetch_expression and spreads out genes across columns} 6 | \usage{ 7 | spread_assay_data( 8 | x, 9 | assay_name, 10 | key = c("name", "feature_id"), 11 | value = c("cpm", "value", "count"), 12 | .fds = fds(x) 13 | ) 14 | } 15 | \arguments{ 16 | \item{x}{facile expression result from \code{fetch_expression}} 17 | 18 | \item{key}{the column from the long-form \code{fetch_expression} table 19 | to put in the columns of the outgoing data.frame that the values are 20 | "spread into"} 21 | 22 | \item{value}{the value column to spread into the \code{key} columns} 23 | 24 | \item{.fds}{the \code{FacileDataSet}} 25 | } 26 | \value{ 27 | a more stout \code{x} with the expression values spread across 28 | columns. 29 | } 30 | \description{ 31 | This is a convenience function, and will try to guess what you mean if you 32 | don't explicitly specify which columns to spread and what to call them. 33 | With that mind set, if we find a cpm and symbol column, we will use them 34 | because those are the thing you will likely want to use for exploratory 35 | data analysis if they're in the incoming dataset. If those columns aren't 36 | found, then we'll pick the feature_id and count column. 37 | } 38 | -------------------------------------------------------------------------------- /man/normalize_assay_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/normalize_assay_data.R 3 | \name{normalize_assay_data} 4 | \alias{normalize_assay_data} 5 | \title{Helper functions to normalize assay data into log2 space.} 6 | \usage{ 7 | normalize_assay_data( 8 | x, 9 | features, 10 | samples, 11 | batch = NULL, 12 | log = TRUE, 13 | prior.count = 0.1, 14 | main = NULL, 15 | verbose = FALSE, 16 | ..., 17 | .fds = NULL 18 | ) 19 | } 20 | \arguments{ 21 | \item{x}{A matrix of raw/unnormalized assay data retrieved from 22 | within the \code{fetch_assay_data()} itself.} 23 | 24 | \item{features}{a feature descriptor data.frame that includes the 25 | feature_id's of the rows in \code{x}, as well as the assay name/type they 26 | were pulled from. We assert that all features come from the same assay 27 | type, and the rows here match 1:1 the rows in \code{x}.} 28 | 29 | \item{samples}{a sample descriptor for the columns in \code{x}. Rows here 30 | should match columns in \code{x} 1:1.} 31 | 32 | \item{batch, main}{paramters sent to \code{\link[=remove_batch_effects]{remove_batch_effects()}} after} 33 | } 34 | \description{ 35 | This is defined for the assay_types defined within this package. If you are 36 | writing a package to handle new types of data, you need to define a 37 | \code{normalize_assay_matrix.ASSAY_TYPE} function. This is experimental. 38 | } 39 | -------------------------------------------------------------------------------- /R/test-helpers.R: -------------------------------------------------------------------------------- 1 | #' Retrieves an example FacileDataSet 2 | #' 3 | #' A subset of the TCGA data from the BLCA and COAD indications is provided 4 | #' as a FacileDataSet. 5 | #' 6 | #' @export 7 | exampleFacileDataSet <- function() { 8 | fn <- system.file('extdata', 'exampleFacileDataSet', package='FacileData') 9 | FacileDataSet(fn) 10 | } 11 | 12 | #' Fetches exemplar data for unit testing 13 | #' 14 | #' @export 15 | #' @rdname test-helpers 16 | example_sample_covariates <- function() { 17 | pdat <- system.file("testdata", "test-sample-covariates.rds", 18 | package = "FacileData") 19 | readRDS(pdat) 20 | } 21 | 22 | #' @export 23 | #' @rdname test-helpers 24 | #' @param file.path If `TRUE`, returns the path to the yaml file, otherwise 25 | #' returns the list-of-list meta definition. 26 | #' @return Either the list-of-list meta definition, or path to the `meta.yaml` 27 | #' file where these are defined. 28 | example_meta <- function(file.path=FALSE) { 29 | out <- system.file("testdata", "expected-meta.yaml", 30 | package = "FacileData") 31 | if (!isTRUE(file.path)) { 32 | out <- yaml::read_yaml(out) 33 | } 34 | out 35 | } 36 | 37 | #' @export 38 | #' @importFrom yaml read_yaml 39 | #' @rdname test-helpers 40 | #' @return the list-of-list definitions for the example `pData` returned from 41 | #' [example_sample_covariates()] 42 | example_sample_covariate_definitions <- function() { 43 | out <- example_meta(file.path=FALSE) 44 | out$sample_covariate 45 | } 46 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: main 4 | paths: 5 | - 'vignettes/**' 6 | - 'man/**' 7 | - 'README.**' 8 | schedule: 9 | # Rerun pkgdown after the (expected) time package is rebuilt on the daily. 10 | # Let's give it 30 mins 11 | - cron: '30 12 * * *' 12 | 13 | name: pkgdown 14 | 15 | jobs: 16 | pkgdown: 17 | runs-on: ubuntu-latest 18 | container: facilebio/facilebio 19 | env: 20 | ACTIONS_ALLOW_UNSECURE_COMMANDS: true 21 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true 22 | NOT_CRAN: true 23 | CI: true 24 | # For some reason I need to put these 25 | GIT_AUTHOR_NAME: Steve Lianoglou 26 | GIT_AUTHOR_EMAIL: slianoglou@gmail.com 27 | GIT_COMMITTER_NAME: Steve Lianoglou 28 | GIT_COMMITTER_EMAIL: slianoglou@gmail.com 29 | 30 | steps: 31 | - name: Checkout Repository 32 | uses: actions/checkout@v2 33 | 34 | - name: Setup R 35 | uses: r-lib/actions/setup-r@v1 36 | with: 37 | install-r: false 38 | 39 | # pandoc is already installed in the docker container 40 | # - uses: r-lib/actions/setup-pandoc@master 41 | 42 | - name: Install dependencies 43 | run: remotes::install_deps(dependencies = TRUE, upgrade = FALSE) 44 | shell: Rscript {0} 45 | 46 | - name: Install package 47 | run: R CMD INSTALL . 48 | 49 | - name: Deploy package 50 | run: pkgdown::deploy_to_branch(new_process = FALSE) 51 | shell: Rscript {0} 52 | -------------------------------------------------------------------------------- /man/meta-info.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FacileDataSet.R 3 | \name{meta_file} 4 | \alias{meta_file} 5 | \alias{meta_info} 6 | \alias{default_assay.FacileDataSet} 7 | \alias{dataset_definitions} 8 | \title{Path to the meta information YAML file} 9 | \usage{ 10 | meta_file(x) 11 | 12 | meta_info(x, fn = meta_file(x)) 13 | 14 | \method{default_assay}{FacileDataSet}(x) 15 | 16 | dataset_definitions(x, as.list = TRUE) 17 | } 18 | \arguments{ 19 | \item{x}{A \code{FacileDataSet}} 20 | 21 | \item{fn}{The path to the \code{meta.yaml} file.} 22 | 23 | \item{as.list}{boolean, if \code{FALSE} (default) returns a list, otherwise 24 | summarizes results into a tibble.} 25 | } 26 | \value{ 27 | The \code{meta.yaml} file parsed into a list-of-lists representation 28 | 29 | meta information about the datasets in \code{x} as a \code{list} or \code{tibble} 30 | } 31 | \description{ 32 | Lots of useful information is stored in a \code{FacileDataSet}'s \code{meta.yaml} file. 33 | This function returns all of that in a list-of-lists 34 | 35 | A \code{FacileDataSet} can contain assay data from different "datasets" (such 36 | as different cancer indications from the TCGA). This functions returns 37 | description and URL information that describes these datasets in more detail, 38 | which is specified in the FacileDataSets \code{meta.yaml} file. 39 | } 40 | \seealso{ 41 | Other FacileDataSet: 42 | \code{\link{FacileDataSet}()}, 43 | \code{\link{dbfn}()}, 44 | \code{\link{hdf5fn}()} 45 | } 46 | \concept{FacileDataSet} 47 | -------------------------------------------------------------------------------- /man/FacileData-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/package.R 3 | \docType{package} 4 | \name{FacileData-package} 5 | \alias{FacileData} 6 | \alias{FacileData-package} 7 | \title{FacileData: A fluent API for accessing multi-assay high-throughput genomics data} 8 | \description{ 9 | Defines the "FacileData" API over multi-assay, high-throughput 10 | genomics data. The FacileData API is a fluent tidy-like grammar that 11 | facilitates exploratory data analysis. This package also defines a 12 | "FacileDataSet" class, which is a reference implementation of the "FacileData" 13 | API that uses SQLite and HDF5 files to store arbitrarily large datasets and 14 | provide fast and efficient access to arbitrary subsets of these data without 15 | loading it all into memory. 16 | } 17 | \seealso{ 18 | Useful links: 19 | \itemize{ 20 | \item \url{https://github.com/facilebio/FacileData} 21 | \item Report bugs at \url{https://github.com/facilebio/FacileData/issues} 22 | } 23 | 24 | } 25 | \author{ 26 | \strong{Maintainer}: Steve Lianoglou \email{lianoglou@dnli.com} (\href{https://orcid.org/0000-0002-0924-1754}{ORCID}) 27 | 28 | Authors: 29 | \itemize{ 30 | \item Vincent Rouilly \email{rouilly.vincent@gene.com} 31 | \item Peter Haverty (@phaverty on github) 32 | } 33 | 34 | Other contributors: 35 | \itemize{ 36 | \item Jonathan Carrol \email{jono@jcarroll.com.au} [contributor] 37 | \item Denali Therapeutics (Coypright 2019) [copyright holder, funder] 38 | \item Genentech (Coypright 2016 - 2018) [copyright holder, funder] 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /man/fetch_sample_statistics.FacileDataSet.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sample-info.R 3 | \name{fetch_sample_statistics.FacileDataSet} 4 | \alias{fetch_sample_statistics.FacileDataSet} 5 | \title{Fetch the sample statistics for sets of samples in the warehouse} 6 | \usage{ 7 | \method{fetch_sample_statistics}{FacileDataSet}( 8 | x, 9 | samples = NULL, 10 | semi = TRUE, 11 | assay_name = default_assay(x), 12 | ... 13 | ) 14 | } 15 | \arguments{ 16 | \item{x}{A \code{FacileDataSet} object} 17 | 18 | \item{samples}{a data.frame or tbl_sqlite that has dataset and sample_id 19 | columns} 20 | 21 | \item{semi}{use \code{semi_join}? I've found this to be slow sometimes in 22 | SQLite for some reason} 23 | 24 | \item{assay_name}{parameter added to keep old API same with new "unhinged" 25 | FacileDataSets.} 26 | } 27 | \value{ 28 | a tbl_df or tbl_sqlite result from the sample_stats table 29 | } 30 | \description{ 31 | NOTE: this function needs the axe. It has been changed to use the 32 | assay_sample_info_table, but the way we handle this with the new unhinged 33 | assay needs to change. 34 | } 35 | \seealso{ 36 | Other API: 37 | \code{\link{fetch_assay_score.FacileDataSet}()}, 38 | \code{\link{fetch_custom_sample_covariates.FacileDataSet}()}, 39 | \code{\link{fetch_sample_covariates}()}, 40 | \code{\link{fetch_samples.FacileDataSet}()}, 41 | \code{\link{filter_features.FacileDataSet}()}, 42 | \code{\link{filter_samples.FacileDataSet}()}, 43 | \code{\link{organism.FacileDataSet}()}, 44 | \code{\link{samples.FacileDataSet}()} 45 | } 46 | \concept{API} 47 | -------------------------------------------------------------------------------- /inst/extdata/test/sample-meta-definitions.yaml: -------------------------------------------------------------------------------- 1 | sex: 2 | class: clinical 3 | type: categorical 4 | levels: ['m', 'f'] 5 | stage: 6 | class: clinical 7 | type: categorical 8 | sample_type: 9 | class: clinical 10 | type: categorical 11 | levels: ['normal', 'tumor'] 12 | indication: 13 | class: tumor_classification 14 | type: categorical 15 | subtype_expression: 16 | class: tumor_classification 17 | type: categorical 18 | subtype_breast_receptor_status: 19 | class: tumor_classification 20 | type: categorical 21 | subtype_microsatellite_instability: 22 | class: tumor_classification 23 | type: categorical 24 | subtype_crc_cms: 25 | class: tumor_classification 26 | type: categorical 27 | is_primary: 28 | class: tumor_classification 29 | type: categorical 30 | histology: 31 | class: tumor_classification 32 | type: categorical 33 | location: 34 | class: tumor_classification 35 | type: categorical 36 | cohort: 37 | class: clinical 38 | type: categorical 39 | smoking_status: 40 | class: clinical 41 | type: categorical 42 | has_lymph_met: 43 | class: clinical 44 | type: categorical 45 | has_liver_met: 46 | class: clinical 47 | type: categorical 48 | has_bone_met: 49 | class: clinical 50 | type: categorical 51 | PFS: 52 | class: response 53 | type: right_censored 54 | OS: 55 | class: response 56 | type: right_censored 57 | BCOR: 58 | class: response 59 | type: categorical 60 | levels: ["CR", "PR", "SD", "PD", "NE"] 61 | IC: 62 | class: IHC 63 | type: categorical 64 | levels: ["IC0", "IC1", "IC2", "IC3"] 65 | TC: 66 | class: IHC 67 | type: categorical 68 | levels: ["TC0", "TC1", "TC2", "TC3"] 69 | -------------------------------------------------------------------------------- /man/summary.eav_covariates.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sample-covariates.R 3 | \name{summary.eav_covariates} 4 | \alias{summary.eav_covariates} 5 | \title{Provides a summary table of sample covariates.} 6 | \usage{ 7 | \method{summary}{eav_covariates}(object, expanded = FALSE, droplevels = TRUE, ...) 8 | } 9 | \arguments{ 10 | \item{object}{A sample covariate table, the likes returned from 11 | \code{\link[=fetch_sample_covariates]{fetch_sample_covariates()}}.} 12 | 13 | \item{expanded}{includes details (rows) for each covariate per level 14 | (or quantile), depending on the covariates \code{"class"} attribute.} 15 | } 16 | \value{ 17 | a tibble of summary sample covariate information with the following 18 | columns: 19 | \itemize{ 20 | \item \code{variable}: name of the variable 21 | \item \code{class}: class of variable (real, categorical) 22 | \item \code{nsamples}: the number of samples that have this variable defined 23 | \item \code{level}: the level (or quantile) of the covariate 24 | (included only when \code{expanded == TRUE}) 25 | \item \code{ninlevel}: the number of samples with this covariate value 26 | (included only when \code{expanded == TRUE}) 27 | } 28 | } 29 | \description{ 30 | Sumamrizes a set of sample covariates (returned from 31 | \code{\link[=fetch_sample_covariates]{fetch_sample_covariates()}} at different granulaities. 32 | } 33 | \examples{ 34 | fds <- exampleFacileDataSet() 35 | covs <- fetch_sample_covariates(fds) 36 | smry <- summary(covs) 37 | details <- summary(covs, expanded = TRUE) 38 | catdeetz <- covs \%>\% 39 | filter(class == "categorical") \%>\% 40 | summary(expanded = TRUE) 41 | } 42 | -------------------------------------------------------------------------------- /man/cast_covariate.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/entity-attribute-value.R 3 | \name{cast_covariate} 4 | \alias{cast_covariate} 5 | \title{Casts the character EAV values to their R-native defined types.} 6 | \usage{ 7 | cast_covariate(covariate, values, cov.def, .fds) 8 | } 9 | \arguments{ 10 | \item{covariate}{the name of the covariate} 11 | 12 | \item{values}{the covariate values (which is a \code{character}) as it is 13 | pulled from the database.} 14 | 15 | \item{cov.def}{the un-yamled covariate definitions, if missing we rely on 16 | pulling this out from the \code{FacileDataSet} object \code{.fds}} 17 | 18 | \item{.fds}{If \code{missing(cov.def)}, this is the \code{FacileDataSet} to 19 | get the covariate definitions from.} 20 | } 21 | \value{ 22 | values cast to appropriate type if a valid definition was found for 23 | \code{covariate}, otherwise values is returned "as is". Most of the time 24 | this is a single vector, but others it can be a data.frame (for 25 | \code{right_censored} data, for instance) 26 | } 27 | \description{ 28 | For most things, a single value will be returned from each cast, but in the 29 | case of "time_to_event" data, the value is expended to a two column 30 | data.frame with a \verb{tte_} column for time to event, and an 31 | \verb{event_} column to indicate event (1) or right censored (2). 32 | } 33 | \details{ 34 | The mechanics of how values in the \code{sample_covariate} table are converted 35 | into R objects are handled by the information stored in the 36 | \code{FacileDataSets}'s \code{meta.yaml} file. 37 | } 38 | \seealso{ 39 | \code{\link[=covariate_meta_info]{covariate_meta_info()}}, \code{\link[=covariate_definitions]{covariate_definitions()}} 40 | } 41 | -------------------------------------------------------------------------------- /R/sample-info.R: -------------------------------------------------------------------------------- 1 | #' Fetch the sample statistics for sets of samples in the warehouse 2 | #' 3 | #' NOTE: this function needs the axe. It has been changed to use the 4 | #' assay_sample_info_table, but the way we handle this with the new unhinged 5 | #' assay needs to change. 6 | #' 7 | #' @export 8 | #' @param x A \code{FacileDataSet} object 9 | #' @param samples a data.frame or tbl_sqlite that has dataset and sample_id 10 | #' columns 11 | #' @param semi use \code{semi_join}? I've found this to be slow sometimes in 12 | #' SQLite for some reason 13 | #' @param assay_name parameter added to keep old API same with new "unhinged" 14 | #' FacileDataSets. 15 | #' @return a tbl_df or tbl_sqlite result from the sample_stats table 16 | #' @family API 17 | fetch_sample_statistics.FacileDataSet <- function(x, samples = NULL, 18 | semi = TRUE, 19 | assay_name = default_assay(x), 20 | ...) { 21 | warning("`fetch_sample_statistics` will be removed from FacileData API\n,", 22 | "See Issue #2\n", 23 | "https://github.com/denalitherapeutics/FacileData/issues/2", 24 | immediate. = TRUE) 25 | 26 | assert_string(assay_name) 27 | stopifnot(assay_name %in% assay_names(x)) 28 | 29 | ss <- assay_sample_info_tbl(x) %>% 30 | filter(assay == assay_name) %>% 31 | set_fds(x) 32 | 33 | if (is.null(samples)) { 34 | out <- ss 35 | } else { 36 | # TODO: Need to write unit tests here to exercise what we want to do with 37 | # these results when samples are provided 38 | samples <- assert_sample_subset(samples) 39 | out <- join_samples(ss, samples, semi) 40 | } 41 | 42 | set_fds(out, x) 43 | } 44 | -------------------------------------------------------------------------------- /man/biocbox.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/api.R, R/biocbox.R 3 | \name{biocbox} 4 | \alias{biocbox} 5 | \alias{biocbox.facile_frame} 6 | \title{Materialize a Bioconductor assay container from some facile object.} 7 | \usage{ 8 | biocbox(x, ...) 9 | 10 | \method{biocbox}{facile_frame}( 11 | x, 12 | class = NULL, 13 | assay_name = NULL, 14 | features = NULL, 15 | sample_covariates = NULL, 16 | feature_covariates = NULL, 17 | normalized = FALSE, 18 | with_fds = FALSE, 19 | custom_key = Sys.getenv("USER"), 20 | ... 21 | ) 22 | } 23 | \arguments{ 24 | \item{x}{A facile object} 25 | 26 | \item{sample_covariates}{If \code{NULL} (default), all sample covariates will 27 | be included over samples in x. If a data.frame, we will treat the 28 | extra columns as custom covariates, and include them in the outgoing 29 | box, along with the internal ones.} 30 | } 31 | \description{ 32 | Most often, this will be from some facile_frame to create a Bioconductor 33 | assay container object, but this function can be overloaded for other 34 | purposes. 35 | } 36 | \details{ 37 | The FacileAnalysis package, for example, uses this function to materialize 38 | bioconductor objects of different flavors from different analysis results, 39 | ie. a DGEList, or perhaps a limma fit object, etc. 40 | } 41 | \section{facile_frame}{ 42 | 43 | We can materialize a Bioconductor data container for a given assay over a set 44 | of samples. 45 | 46 | There is a default bioc class provided for different assay types, however 47 | the class type can be overrided by the \code{class} parameter. This function 48 | simply puts the assay data requested into the container. There is no 49 | sepcial functionality that happens downstream of that (for instance, 50 | DGEList lib.size calculated from the data that made its way into the DGEList) 51 | } 52 | 53 | -------------------------------------------------------------------------------- /tests/testthat/test-replace_na.R: -------------------------------------------------------------------------------- 1 | context("freplace_na") 2 | 3 | .def.categorical <- FacileData:::defaults.freplace_na$categorical 4 | 5 | test_that("freplace_na handles factors", { 6 | data <- data.frame( 7 | a = rnorm(10), 8 | b = letters[1:10], 9 | c = factor(LETTERS[1:10])) 10 | data[3, 2:3] <- NA 11 | 12 | r1 <- freplace_na(data) 13 | expect_true(all(complete.cases(r1))) 14 | checkmate::expect_factor( 15 | r1$c, 16 | levels = c(head(LETTERS, nrow(data)), .def.categorical)) 17 | }) 18 | 19 | test_that("freplace_na errors on numerics unless given explicit replacement", { 20 | data <- data.frame( 21 | a = rnorm(10), 22 | b = letters[1:10], 23 | c = factor(LETTERS[1:10])) 24 | data[3, ] <- NA 25 | expect_error(freplace_na(data), "numerics.*number") 26 | 27 | r <- freplace_na(data, defaults = list(numeric = -1)) 28 | expect_equal(r$a[3], -1) 29 | expect_equal(r$b[3], .def.categorical) 30 | expect_equal(as.character(r$c[3]), .def.categorical) 31 | }) 32 | 33 | test_that("freplace_na handles custom values per column", { 34 | data <- data.frame( 35 | a = rnorm(10), 36 | b = letters[1:10], 37 | c = factor(LETTERS[1:10])) 38 | data[3, 2:3] <- NA 39 | 40 | r <- freplace_na(data, list(b = "bee")) 41 | expect_equal(r$b[3], "bee") 42 | expect_equal(as.character(r$c[3]), .def.categorical) 43 | }) 44 | 45 | test_that("freplace_na ignores specified columns", { 46 | data <- data.frame( 47 | a = rnorm(10), 48 | b = letters[1:10], 49 | c = factor(LETTERS[1:10])) 50 | data[3, ] <- NA 51 | 52 | # Since a is numeric and has NA, this should error, but we explicitly ask to 53 | # skip the numeric column 54 | r <- freplace_na(data, list(b = "bee"), ignore = "a") 55 | checkmate::expect_scalar_na(r$a[3]) 56 | expect_equal(r$b[3], "bee") 57 | expect_equal(as.character(r$c[3]), .def.categorical) 58 | }) 59 | -------------------------------------------------------------------------------- /man/freplace_na.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/replace_na.R 3 | \name{freplace_na} 4 | \alias{freplace_na} 5 | \title{Replaces NA's with specified values.} 6 | \usage{ 7 | freplace_na( 8 | data, 9 | replace = list(), 10 | defaults = list(), 11 | ignore = character(), 12 | ... 13 | ) 14 | } 15 | \arguments{ 16 | \item{data}{the thing that has NA's in it (a data.frame or vector)} 17 | 18 | \item{replace}{a named list of elements to use for custom replacement values} 19 | 20 | \item{defaults}{if named elements in \code{data} do not appear in \code{replace}, you 21 | can provide default values for categories of parameters (ie. 22 | \code{"categorical"} or \code{"numeric"}), otherwise 23 | FacileData:::defaults.freplace_na will be used.} 24 | } 25 | \value{ 26 | an NA-replaced version of \code{data} 27 | } 28 | \description{ 29 | Some the downstream uses of a FacileDataStore can throw problems when NA's 30 | are found in data or covariates, so we often want to fill in NA's with 31 | non-NA markers of missing values. Note that unless specified otherwise 32 | (using the \code{replace} and \code{defaults} parameters), 33 | } 34 | \details{ 35 | Depending on the atomic type of the thing that NA's are being replaced with, 36 | a default value is assumed. These can be overriden by using the \code{defaults} 37 | parameter, or specifically by column (or list) names via the \code{replace} 38 | parameter. 39 | 40 | Missing values (NA's) come up often in FacileDataStores since we often use 41 | them to include data from multiple datasets, which induces "ragged" (sparse) 42 | covariate (pData) entries. In man 43 | } 44 | \examples{ 45 | data <- data.frame( 46 | a = rnorm(10), 47 | b = letters[1:10], 48 | c = factor(LETTERS[1:10])) 49 | data[3, ] <- NA 50 | r1 <- freplace_na(data, list(b = "bee"), ignore = "a") 51 | r2 <- freplace_na(data, list(b = "bee"), defaults = list(numeric = -Inf)) 52 | } 53 | -------------------------------------------------------------------------------- /man/simple-eav-decode-functions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/entity-attribute-value.R 3 | \name{eav_decode_real} 4 | \alias{eav_decode_real} 5 | \alias{eav_encode_real} 6 | \alias{eav_encode_logical} 7 | \alias{eav_decode_logical} 8 | \alias{eav_encode_cSurv} 9 | \alias{eav_decode_cSurv} 10 | \alias{eav_decode_categorical} 11 | \alias{eav_encode_categorical} 12 | \title{Entity-attribute-value decoding for real values.} 13 | \usage{ 14 | eav_decode_real(x, attrname = character(), def = list(), ...) 15 | 16 | eav_encode_real(x, ...) 17 | 18 | eav_encode_logical(x, ...) 19 | 20 | eav_decode_logical(x, attrname = character(), def = list(), ...) 21 | 22 | eav_encode_cSurv(x, ...) 23 | 24 | eav_decode_cSurv(x, attrname = character(), def = list(), ...) 25 | 26 | eav_decode_categorical( 27 | x, 28 | attrname = character(), 29 | def = list(), 30 | droplevels = TRUE, 31 | ... 32 | ) 33 | 34 | eav_encode_categorical(x, ...) 35 | } 36 | \arguments{ 37 | \item{x}{the values column from the \code{EAV} table for this covariate} 38 | 39 | \item{attrname}{the name of "attribute" (covariate) in the EAV table.} 40 | 41 | \item{def}{the \code{covariate_definition} list for this covariate} 42 | } 43 | \value{ 44 | a \code{numeric} vector of \code{length(x)} 45 | } 46 | \description{ 47 | This is a simple function to handle converting numeric values in the EAV 48 | table to numeric data in R. 49 | 50 | This is essentially a pass through-function for categorical/character 51 | values in the EAV table. If the \code{def} list contains a \code{levels} entry, then 52 | the returned value is converted to a factor, with the levels in the order 53 | as defined in \code{def$levels}. If more levels appear in \code{x} than exist in 54 | \code{def$levels} they are appended to the end of the factor levels in 55 | alphabetical order. If more levels are defined in \code{def$levels} than appear 56 | in \code{x}, they are by default dropped, set \code{droplevels = FALSE} to keep them. 57 | } 58 | -------------------------------------------------------------------------------- /inst/testdata/generate-TCGA-tesdata.R: -------------------------------------------------------------------------------- 1 | # Using FacileTCGADataSet to create some testdata 2 | library(FacileTCGADataSet) 3 | library(magrittr) 4 | tcga <- FacileTCGADataSet() 5 | 6 | ## let's get some 20 samples 7 | set.seed(0xBEEF) 8 | bsamples.all <- tcga %>% 9 | filter_samples(indication %in% c("BLCA", "BRCA")) %>% 10 | with_sample_covariates %>% 11 | filter(sample_type != 'tumor_metastatic') 12 | 13 | bsamples <- bsamples.all %>% 14 | group_by(dataset, sample_type) %>% 15 | sample_n(5) %>% 16 | ungroup %>% 17 | set_fds(tcga) 18 | 19 | # pData for testing entity-attribute-value encodings 20 | # Create a covariate pData object with non-default factor levels to test 21 | scovs <- bsamples %>% 22 | mutate(stage = factor(stage), 23 | sex = factor(sex, c("male", "female"))) %>% 24 | select(dataset, sample_id, stage, sex, age, sample_type, 25 | subtype_molecular_bladder, subtype_receptor_breast, 26 | tte_OS, event_OS) 27 | 28 | # Let's fill the categorical variables with all levels, even though our sampling 29 | # can't possibly do that. 30 | scovs %<>% mutate(stage = sub("[ab]$", "", stage)) 31 | scovs %<>% mutate(stage = factor(stage, paste("stage", c("i", "ii", "iii", "iv")))) 32 | is.blca.tumor <- with(bsamples, dataset == "BLCA" & sample_type == "tumor") 33 | is.brca.tumor <- with(bsamples, dataset == "BRCA" & sample_type == "tumor") 34 | blca.sub.lvls <- c("luminal", "basal") 35 | brca.sub.lvls <- c("ER+/PR+", "Her2+", "TNBC") 36 | scovs %<>% 37 | mutate(subtype_molecular_bladder = ifelse( 38 | is.blca.tumor, 39 | sample(blca.sub.lvls, sum(is.blca.tumor), replace = TRUE), NA)) 40 | scovs %<>% 41 | mutate(subtype_receptor_breast = ifelse( 42 | is.brca.tumor, 43 | sample(brca.sub.lvls, sum(is.brca.tumor), replace = TRUE), NA)) 44 | scovs %<>% 45 | mutate( 46 | # keep subtype_molecular_bladder as just a character 47 | # subtype_molecular_bladder = factor(subtype_molecular_bladder, blca.sub.lvls), 48 | subtype_receptor_breast = factor(subtype_receptor_breast, brca.sub.lvls)) 49 | saveRDS(scovs, "test-sample-covariates.rds") 50 | 51 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: FacileData 2 | Type: Package 3 | Title: A fluent API for accessing multi-assay high-throughput genomics data 4 | Version: 0.98.1 5 | Authors@R: c( 6 | person("Steve", "Lianoglou", , "lianoglou@dnli.com", c("aut", "cre"), 7 | comment = c(ORCID = "0000-0002-0924-1754")), 8 | person("Vincent", "Rouilly", , "rouilly.vincent@gene.com", role = "aut"), 9 | person("Peter", "Haverty", , comment = "@phaverty on github", role = "aut"), 10 | person("Jonathan", "Carrol", , "jono@jcarroll.com.au", role = "ctb"), 11 | person("Denali Therapeutics", role = c("cph", "fnd"), 12 | comment = "Coypright 2019"), 13 | person("Genentech", role = c("cph", "fnd"), 14 | comment = "Coypright 2016 - 2018")) 15 | Description: Defines the "FacileData" API over multi-assay, high-throughput 16 | genomics data. The FacileData API is a fluent tidy-like grammar that 17 | facilitates exploratory data analysis. This package also defines a 18 | "FacileDataSet" class, which is a reference implementation of the "FacileData" 19 | API that uses SQLite and HDF5 files to store arbitrarily large datasets and 20 | provide fast and efficient access to arbitrary subsets of these data without 21 | loading it all into memory. 22 | URL: https://github.com/facilebio/FacileData 23 | BugReports: https://github.com/facilebio/FacileData/issues 24 | Depends: 25 | R (>= 3.6.0), 26 | Imports: 27 | broom, 28 | checkmate (>= 1.8.5), 29 | crayon, 30 | DBI, 31 | data.table, 32 | dbplyr (>= 1.4.0), 33 | dplyr (>= 1.0.0), 34 | glue, 35 | edgeR, 36 | jsonlite, 37 | lazyeval, 38 | limma, 39 | methods, 40 | sparrow, 41 | reshape2, 42 | rhdf5, 43 | RSQLite, 44 | survival, 45 | utils, 46 | yaml 47 | Suggests: 48 | airway, 49 | Biobase, 50 | knitr, 51 | parathyroidSE, 52 | rmarkdown, 53 | roxygen2 (>= 6.1.0), 54 | stringr, 55 | S4Vectors, 56 | SummarizedExperiment, 57 | testthat (>= 1.0.2), 58 | tidyr 59 | Remotes: 60 | lianos/sparrow 61 | biocViews: Infrastructure, DataRepresentation 62 | RoxygenNote: 7.1.1 63 | Roxygen: list(markdown = TRUE) 64 | License: Apache License (>= 2.0) 65 | Encoding: UTF-8 66 | -------------------------------------------------------------------------------- /man/facilitate.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/api.R 3 | \name{facilitate} 4 | \alias{facilitate} 5 | \title{Converts an arbitrary object into one that works in the facile ecosystem.} 6 | \usage{ 7 | facilitate(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{A non-facile object that we want to bring into the facile ecosystem} 11 | 12 | \item{...}{we're going to need a lot of flexibility in the implementation of 13 | this function for different types of analyses} 14 | } 15 | \value{ 16 | A facile-subclass of \code{x} that can take advantage of the interactive 17 | facile ecosystem. 18 | } 19 | \description{ 20 | There will be many times when the particular analysis you want to conduct 21 | is not well supported in the facileverse. In this case, we will endeavor 22 | to implement ways for you to take these results and bring them back into 23 | the facile ecosystem so that you can benefit from the interactivity provided 24 | therein. 25 | } 26 | \details{ 27 | We'll want to define \code{facilitate()} over a wide variety of objects. For 28 | instance: 29 | \itemize{ 30 | \item \code{facilitate(a_DGElist)} would convert an \code{\link[edgeR:DGEList]{edgeR::DGEList()}} object into 31 | a \code{FacileDGEList}, which is just the same DGEList that implements the 32 | FacileData API. This is a work in progress and will be implemented in the 33 | FacileBioc package. 34 | \item You might perform a differential expression analysis using standard a 35 | standard limma pipeline, but you'll want to be able to drop this result 36 | into the facile ecosystem provided in the FacileAnalysis package. 37 | The particulars of this \code{faciltate()} implementation would be defined in 38 | the FacileAnalysis package, and migth look something like this:\preformatted{fit <- eBayes(lmFit(elist, design)) 39 | limma.res <- topTable(fit, coef = "something", n = Inf) 40 | facile.res <- facilitate(elist, fit, limma.res) 41 | } 42 | } 43 | 44 | It's not clear how well well we'll be able to do this, or if this is even 45 | the right way to do it, but we'll need to do something. 46 | } 47 | \seealso{ 48 | https://github.com/facilebio/FacileBiocData 49 | } 50 | -------------------------------------------------------------------------------- /man/assertions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/validation.R 3 | \name{assert_sample_subset} 4 | \alias{assert_sample_subset} 5 | \alias{check_sample_subset} 6 | \alias{test_sample_subset} 7 | \alias{assert_facet_descriptor} 8 | \alias{is_facet_descriptor} 9 | \alias{assert_assay_feature_descriptor} 10 | \alias{is_assay_feature_descriptor} 11 | \alias{assert_expression_result} 12 | \alias{is_expression_result} 13 | \alias{assert_sample_statistics} 14 | \alias{is_sample_statistics} 15 | \alias{assert_sample_covariates} 16 | \alias{is_sample_covariates} 17 | \alias{assert_columns} 18 | \alias{has_columns} 19 | \alias{assert_covariate_definitions} 20 | \alias{is_covariate_definitions} 21 | \title{Check to see that samples are referenced correctly} 22 | \usage{ 23 | assert_sample_subset(x, fds = NULL, ..., .var.name = vname(x), add = NULL) 24 | 25 | check_sample_subset(x, fds = NULL, ...) 26 | 27 | test_sample_subset(x, fds = NULL, ...) 28 | 29 | assert_facet_descriptor(x) 30 | 31 | is_facet_descriptor(x) 32 | 33 | assert_assay_feature_descriptor(x, .fds = NULL) 34 | 35 | is_assay_feature_descriptor(x, .fds = NULL) 36 | 37 | assert_expression_result(x) 38 | 39 | is_expression_result(x) 40 | 41 | assert_sample_statistics(x) 42 | 43 | is_sample_statistics(x) 44 | 45 | assert_sample_covariates(x) 46 | 47 | is_sample_covariates(x) 48 | 49 | assert_columns(x, req.cols) 50 | 51 | has_columns(x, req.cols, warn = TRUE) 52 | 53 | assert_covariate_definitions(x, required = NULL) 54 | 55 | is_covariate_definitions(x, required = NULL) 56 | } 57 | \description{ 58 | Samples have compound keys: dataset,sample_id. If we want to index into 59 | them, we can either: 60 | } 61 | \details{ 62 | \enumerate{ 63 | \item pass a data.frame around with dataset and sample_id columns 64 | \item pass a "loaded up" tbl_sqlite" over the sample_covariate table which 65 | has your filters of interest set 66 | } 67 | } 68 | \section{assay_feature_descriptor}{ 69 | 70 | If .fds is provided, it must be a \code{FaclieDataSet} and these functions 71 | will check to ensure that the \code{x[['assay']]} is a valid assay element 72 | in \code{.fds} 73 | } 74 | 75 | -------------------------------------------------------------------------------- /man/eav-right-censor.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/entity-attribute-value.R 3 | \name{eav_encode_right_censored} 4 | \alias{eav_encode_right_censored} 5 | \alias{eav_decode_right_censored} 6 | \title{Entity-attribute-value encodings for survival data.} 7 | \usage{ 8 | eav_encode_right_censored(time, event, sas.encoding = FALSE, ...) 9 | 10 | eav_decode_right_censored( 11 | x, 12 | attrname = character(), 13 | def = list(), 14 | suffix = attrname, 15 | sas.encoding = FALSE, 16 | ... 17 | ) 18 | } 19 | \arguments{ 20 | \item{time}{\code{numeric} time to event} 21 | 22 | \item{event}{0/1 vector encoded in the "R sense". "1" is an event, "0" is 23 | right censored.} 24 | 25 | \item{sas.encoding}{Is the 'event' vector "SAS encoded"? In the SAS world, 26 | 1 means censored, and 0 is event. This is \code{FALSE} by default.} 27 | 28 | \item{x}{the time to event} 29 | 30 | \item{def}{the covariate definition for this variable} 31 | 32 | \item{suffix}{adds \verb{_} to the \code{tte} and \code{event} columns of the 33 | outgoing \code{data.frame}} 34 | } 35 | \value{ 36 | returns a numeric vector that combines time-to-event and censoring 37 | info (sign of the value). 38 | 39 | two column \code{data.frame} with \verb{tte(_SUFFIX)?} and \verb{event(_SUFFIX)?} 40 | columns. 41 | } 42 | \description{ 43 | Entity-attribute-value encodings for survival data. 44 | } 45 | \details{ 46 | Encoding of survival data in R requires two columns, one to store 47 | the time-to-event and another to indicate if there was an "event" at stored 48 | time, or if it was censored. A \code{FacileDataSet} stores these two \code{pData} 49 | columns into one "value" column in its entity-attribute-value 50 | \code{sample_covariate} table. 51 | 52 | The \code{encode_right_censored} function takes the time-to-event and censoring 53 | vectors and encodes them into a single signed time-to-event numeric value. 54 | Positive values indicate an event, and negative value are censored. 55 | 56 | The \code{decode_right_censored} function re-instantiates the two-column R-native 57 | storage of this data. 58 | } 59 | \seealso{ 60 | \code{\link[=eav_metadata_create]{eav_metadata_create()}} 61 | } 62 | -------------------------------------------------------------------------------- /man/filter_samples.FacileDataSet.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/NSE-filter-samples.R 3 | \name{filter_samples.FacileDataSet} 4 | \alias{filter_samples.FacileDataSet} 5 | \title{Filter against the sample_covariate_tbl EAV table as if it were wide.} 6 | \usage{ 7 | \method{filter_samples}{FacileDataSet}( 8 | x, 9 | ..., 10 | samples. = samples(x), 11 | custom_key = Sys.getenv("USER"), 12 | with_covariates = FALSE 13 | ) 14 | } 15 | \arguments{ 16 | \item{x}{A \code{FacileDataSet}} 17 | 18 | \item{...}{NSE claused to use in \code{\link[dplyr:filter]{dplyr::filter()}} expressions} 19 | } 20 | \value{ 21 | a sample-descriptor \code{data.frame} that includes the dataset,sample_id 22 | pairs that match the virtual \code{filter(covaries, ...)} clause executed here. 23 | } 24 | \description{ 25 | This allows the user to query the \code{FacileDataSet} as if it were a wide 26 | \code{pData} \code{data.frame} of all its covariates. 27 | } 28 | \details{ 29 | This feature is only really meant to be 30 | used interactively, and with extreme caution ... programatically specifying 31 | the covariates, for instance, does not work right now. 32 | 33 | TODO: Implement using \code{tidyeval} 34 | } 35 | \examples{ 36 | fds <- exampleFacileDataSet() 37 | 38 | # To identify all samples that are of "CMS3" or "CMS4" subtype( 39 | # stored in the "subtype_crc_cms" covariate: 40 | crc.34 <- filter_samples(fds, subtype_crc_cms \%in\% c("CMS3", "CMS4")) 41 | eav.query <- fds \%>\% 42 | fetch_sample_covariates(covariates = "subtype_crc_cms") \%>\% 43 | filter(value \%in\% c("CMS3", "CMS4")) \%>\% 44 | collect() 45 | setequal(crc.34$sample_id, eav.query$sample_id) 46 | 47 | # You can keep filtering a filtered dataset 48 | crc.34.male <- filter_samples(crc.34, sex == "m") 49 | } 50 | \seealso{ 51 | Other API: 52 | \code{\link{fetch_assay_score.FacileDataSet}()}, 53 | \code{\link{fetch_custom_sample_covariates.FacileDataSet}()}, 54 | \code{\link{fetch_sample_covariates}()}, 55 | \code{\link{fetch_sample_statistics.FacileDataSet}()}, 56 | \code{\link{fetch_samples.FacileDataSet}()}, 57 | \code{\link{filter_features.FacileDataSet}()}, 58 | \code{\link{organism.FacileDataSet}()}, 59 | \code{\link{samples.FacileDataSet}()} 60 | } 61 | \concept{API} 62 | -------------------------------------------------------------------------------- /vignettes/custom.css: -------------------------------------------------------------------------------- 1 | /* ------------------ callout boxes ----------------------------------------- */ 2 | /* let's take desiree's approach and merge with the multiGSEA style */ 3 | div.caution, div.demo, div.download, div.note, div.tip, div.warning, div.wip { 4 | margin: 10px 10px 10px 0px; 5 | min-height: 55px; 6 | padding: 2px 40px 2px 85px; 7 | background-position: 5px 5px, 68px 0px; 8 | background-repeat: no-repeat, repeat-y; 9 | width: 90%; 10 | } 11 | 12 | div.caution { 13 | background-image: url("images/icons/caution.png"), url('images/icons/callout-border.png'); 14 | } 15 | 16 | div.demo { 17 | background-image: url("images/icons/example.png"), url('images/icons/callout-border.png'); 18 | } 19 | 20 | div.download { 21 | background-image: url("images/icons/download.png"), url('images/icons/callout-border.png'); 22 | } 23 | 24 | div.note { 25 | background-image: url("images/icons/note.png"), url('images/icons/callout-border.png'); 26 | } 27 | 28 | div.tip { 29 | background-image: url("images/icons/tip.png"), url('images/icons/callout-border.png'); 30 | } 31 | 32 | div.warning { 33 | background-image: url("images/icons/warning.png"), url('images/icons/callout-border.png'); 34 | } 35 | 36 | div.wip { 37 | background-image: url("images/icons/wip.png"), url('images/icons/callout-border.png'); 38 | } 39 | 40 | /* http://desiree.rbind.io/post/2019/making-tip-boxes-with-bookdown-and-rmarkdown/ */ 41 | 42 | /* 43 | div.caution, div.demo, div.download, div.note, div.tip, div.wip { 44 | padding: 1em; 45 | margin: 1em 0; 46 | padding-left: 100px; 47 | background-size: 70px; 48 | background-repeat: no-repeat; 49 | background-position: 15px center; 50 | min-height: 120px; 51 | color: #1f5386; 52 | background-color: #bed3ec; 53 | border: solid 5px #dfedff; 54 | } 55 | 56 | div.caution { 57 | background-image: url("images/icons/caution.png"); 58 | } 59 | 60 | div.demo { 61 | background-image: url("images/icons/example.png"); 62 | } 63 | 64 | div.download { 65 | background-image: url("images/icons/download.png"); 66 | } 67 | 68 | div.note { 69 | background-image: url("images/icons/note.png"); 70 | } 71 | 72 | div.tip { 73 | background-image: url("images/icons/tip.png"); 74 | } 75 | 76 | div.warning { 77 | background-image: url("images/icons/warning.png"); 78 | } 79 | 80 | div.wip { 81 | background-image: url("images/icons/wip.png"); 82 | } 83 | */ 84 | -------------------------------------------------------------------------------- /R/feature-types.R: -------------------------------------------------------------------------------- 1 | #' Guesses the type of feature identifiers from a character vector. 2 | #' 3 | #' We rely on meta-information about our data types than "usual", and its useful 4 | #' to know what types of identifiers we are using for different assay. This 5 | #' function tries to guess whether an identifier is an ensembl gene identifier, 6 | #' entrez id, etc. 7 | #' 8 | #' A two-column data.frame is returned for id_type and organism. Organism 9 | #' is "unknown" for identifiers where there this can't be inferred (like Refseq). 10 | #' 11 | #' If an identifier matches more than one id_type, the id_type is set to 12 | #' `"ambiguous"`. If the identifier doesn't match any guesses, then `"unknown"`. 13 | #' 14 | #' @export 15 | #' @param x a character vector of ids 16 | #' @return data.frame with `id` (`x`) and `id_type`. If `with_organism = TRUE`, 17 | #' a third `organism` column is added with a guess for the organism. 18 | #' @examples 19 | #' fids <- c("NC_000023", "ENSG00000101811", "ENSMUSG00000030088.2", "85007") 20 | #' infer_feature_type(fids) 21 | infer_feature_type <- function(x, with_organism = FALSE, ...) { 22 | regex <- list( 23 | ens_gene = "^ENS[A-Z]*G\\d+(\\.\\d+)?$", 24 | ens_tx = "^ENS[A-Z]*?T\\d+(\\.\\d+)?$", 25 | refseq = "^[NXW][CGMRP]_\\d+(\\.\\d+)?$", 26 | entrez = "^\\d+$") 27 | 28 | bool <- sapply(regex, grepl, x) 29 | nmatch <- rowSums(bool) 30 | type <- names(regex)[apply(bool, 1, function(vals) which(vals)[1])] 31 | type <- ifelse(nmatch == 1L, type, "ambiguous") 32 | type <- ifelse(nmatch == 0L, "unknown", type) 33 | 34 | is.bad <- type %in% c("ambiguous", "unknown") 35 | if (any(is.bad)) { 36 | warning(sum(is.bad), " identifiers were either ambiguous or unknown", 37 | immediate. = TRUE) 38 | } 39 | 40 | out <- tibble( 41 | id = x, 42 | id_type = type) 43 | 44 | if (with_organism) { 45 | is.ens <- grepl("^ens_", out[["id_type"]]) 46 | ens <- sub("^ENS", "", out[["id"]]) 47 | is.human <- is.ens & grepl("^[TG]\\d+", ens) 48 | is.mouse <- is.ens & grepl("MUS[TG]\\d+", ens) 49 | out[["source_organism"]] <- ifelse(is.human, "Homo sapiens", "unknown") 50 | out[["source_organism"]] <- ifelse(is.mouse, "Mus musculus", out[["source_organism"]]) 51 | unk <- out[["source_organism"]] == "unknown" 52 | if (any(unk)) { 53 | warning(sum(unk), " identifiers could not be matched to an organism", 54 | immediate. = TRUE) 55 | } 56 | } 57 | 58 | out 59 | } 60 | -------------------------------------------------------------------------------- /R/csurv.R: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Make a type called cSurv that is a character representation of survival::surv 3 | ################################################################################ 4 | 5 | #' @name cSurv 6 | #' @title cSurv is a character representation of survival::surv ### 7 | #' 8 | #' @description cSurv serves as a more reliable way to use Surv objects as data.frame columns. A 9 | #' data.frame is supposed to be able to hold Surv columns. There are multiple special 10 | #' cases written into base for this. It seems the implementation is incomplete as 11 | #' subsetting the DF breaks the Surv object. cSurv cannot do anything but get subset 12 | #' and become a Surv again. In the FacileVerse we hold Surv objects as cSurv, which 13 | #' allows us to survive a round-trip through an EAV sample metadata table. Survival 14 | #' analyses can convert cSurv to Surv as needed. It is assumed that all Surv censoring 15 | #' is right-censored. 16 | #' @importFrom survival Surv 17 | #' @examples 18 | #' library(survival) 19 | #' x = Surv(c(14,12,3), event = c(1,0,1)) 20 | #' y = as(x,"cSurv") 21 | #' z = as(y, "Surv") 22 | #' x2 = as.character(x) 23 | #' z2 = as(x2, "Surv") 24 | #' a = as(x, "cSurv") 25 | #' b = as(a, "character") 26 | #' c = as(b, "cSurv") 27 | #' d = as(c, "Surv") 28 | NULL 29 | 30 | setOldClass("Surv") 31 | setOldClass("cSurv") 32 | 33 | #' @rdname cSurv 34 | #' @family cSurv 35 | #' @export 36 | as_cSurv <- function(from) { 37 | structure(as.character(from), class = "cSurv") 38 | } 39 | 40 | #' @rdname cSurv 41 | #' @family cSurv 42 | #' @export 43 | as_Surv <- function(from) { 44 | ns <- tryCatch(loadNamespace("survival"), error = function(e) NULL) 45 | if (is.null(ns)) stop("survival package required") 46 | 47 | from <- as.character(from) # Both check type and drop attributes 48 | stopifnot(all(is.na(from) | grepl("\\d[\\+ ]*$", from))) 49 | status <- ifelse(endsWith(from, "+"), 0, 1) 50 | ns$Surv(as.numeric(gsub("[\\+ ]*$", "", from)), status) 51 | } 52 | 53 | #' @family cSurv 54 | setAs( 55 | from = "Surv", 56 | to = "cSurv", 57 | def = as_cSurv 58 | ) 59 | 60 | #' @family cSurv 61 | setAs( 62 | from = "cSurv", 63 | to = "Surv", 64 | def = as_Surv 65 | ) 66 | 67 | #' @family cSurv 68 | setAs( 69 | from = "character", 70 | to = "Surv", 71 | def = as_Surv 72 | ) 73 | 74 | #' @family cSurv 75 | setAs( 76 | from = "character", 77 | to = "cSurv", 78 | def = function(from) { 79 | as_cSurv(as_Surv(from)) 80 | } 81 | ) 82 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # Cleanup Branch 2 | 3 | This includes changes that: 4 | 5 | 1. Lays some groundwork down for factoring out the "FacileData API" from the 6 | FacileDataSet. 7 | 2. Largely fills out documentation needed to placate `R CMD check` [WIP] 8 | 3. Fills out some vignettes [WIP] 9 | 4. pkgdown [not started] 10 | 5. Addresses unit test failures introduced by code changes in this branch [not started] 11 | 12 | ## Introduction of FacileDataStore 13 | 14 | Introduced a `FacileDataStore` "abstract class" in anticipation of refactoring 15 | out the "FacileData API" into a top-level `FacileData` package. 16 | 17 | The idea is that: 18 | 19 | 1. Any object that implements the "FacileData API" must include 20 | `"FacileDataStore"` in its class hierarchy (at the root(?)). For instance, 21 | `class(exampleFacileDataSet())` returns 22 | `"ExampleFacileTCGADataSet" "FacileDataSet" "FacileDataStore"` 23 | 24 | 2. The S3 methods that will be factored out of this package to define the 25 | "FacileData API" will effectively use `*.FacileDataStore` as their 26 | "default" methods. `*.default` FacileData API S3 methods should either 27 | (i) not be defined; or (ii) throw an error. 28 | 29 | Note that the names of the to-be-factored-out base package name ("FacileData", 30 | here) and "FacileData API" are up for discussion. I'm just using them here 31 | as placeholders to reference the concept we are all working towards. 32 | 33 | Random notes in this orbit: 34 | 35 | * I added `@family API` roxygen tags to methods that I (loosely) think should 36 | make up the "FacileData API". I'm pretty sure 37 | 38 | * `@family API` methods should probably `assert_facile_data_store()` instead 39 | of `assert_facile_data_set()` 40 | 41 | ## FacileDataSet validity checking 42 | 43 | This is currently split across many functions: 44 | 45 | * `(assert|check|test)_facile_data_set()` are [checkmate][checkmate]-esque 46 | * The `FacileDataSet()` constructor calls `validate.facile.dirs()` which 47 | is a workhorse-of a function. 48 | * `is.FacileDataSet()` now delegates to `assert_facile_data_set()` 49 | 50 | ASK: Should `check_facile_data_set()` do all of the checking that 51 | `validate.facile.dirs()` does? The downside is that `assert_facile_data_set()` 52 | is likely called a lot, and `validate.facile.dirs()` may be doing too much 53 | all of the time? 54 | 55 | [checkmate]: https://CRAN.R-project.org/package=checkmate 56 | 57 | ## Minor Changes 58 | 59 | * Enables roxygen markdown parsing as default in DESCRIPTION. 60 | * Most `##'` documentation blocks are changed to `#'` 61 | 62 | -------------------------------------------------------------------------------- /R/utilities.R: -------------------------------------------------------------------------------- 1 | #' Convenience wrapper to require specified packages 2 | #' 3 | #' @noRd 4 | #' @param pkg A character vector of packages to require 5 | #' @param quietly defaults to true 6 | #' @param ... passed into [requireNamespace()] 7 | reqpkg <- function(pkg, quietly = TRUE, ...) { 8 | assert_character(pkg) 9 | for (p in pkg) { 10 | if (!requireNamespace(p, ..., quietly = quietly)) { 11 | stop("'", p, "' package required, please install it.", call. = FALSE) 12 | } 13 | } 14 | } 15 | 16 | 17 | #' Arranges the columns of one data.frame to another 18 | #' 19 | #' This function is primarily used to add data to the FacileDataSet's SQLite 20 | #' database. \code{x} is new data to add, and \code{to} is the a table of 21 | #' the form that is expected in the database. We check that the columns of 22 | #' \code{x} are a superset of columns in \code{x} and the matching columns are 23 | #' all of the same class. 24 | #' 25 | #' @export 26 | #' @param x a \code{data.frame} that needs to be checked and conformed 27 | #' @param to the prototype \code{data.frame} that \code{x} needs to be aligned 28 | #' against. 29 | #' @return the \code{tibble} version of \code{x} that is arranged to look 30 | #' like \code{to}. 31 | conform_data_frame <- function(x, to) { 32 | stopifnot(is.data.frame(x)) 33 | to <- suppressWarnings(collect(to, n=1L)) 34 | stopifnot(is.data.frame(to)) 35 | assert_columns(x, colnames(to)) 36 | for (cname in colnames(to)) { 37 | if (!cname %in% colnames(x)) { 38 | stop("Expected columnt not found in target data.frame: ", cname) 39 | } 40 | p.class <- class(to[[cname]])[1L] 41 | x.class <- class(x[[cname]])[1L] 42 | if (p.class != x.class) { 43 | stop("Expected class `", p.class, "` for column '", cname, "', but got ", 44 | "`", x.class, "` instead") 45 | } 46 | } 47 | x <- as_tibble(x) 48 | x[, colnames(to)] 49 | } 50 | 51 | #' Set the class of an object and return the object 52 | #' 53 | #' @export 54 | set_class <- function(x, .class, ...) { 55 | assert_character(.class) 56 | class(x) <- unique(c(.class, class(x))) 57 | x 58 | } 59 | 60 | #' Ensures that a vector has names for all elements if it has names for any 61 | #' 62 | #' If the vector is not named, it remains that way 63 | #' @export 64 | #' @param x an object with names 65 | #' @return `x` with all elements either being uniquely named, or NULL 66 | nameit <- function(x, ...) { 67 | if (is.null(names(x))) return(x) 68 | noname <- nchar(names(x)) == 0L 69 | names(x)[noname] <- x[noname] 70 | names(x) <- make.names(names(x), unique = TRUE) 71 | x 72 | } 73 | -------------------------------------------------------------------------------- /pkgdown/extra.css: -------------------------------------------------------------------------------- 1 | /* This version of the css file is different from vignettes because this css 2 | * file needs to link into the articles folder for the image assets 3 | */ 4 | 5 | /************************ Callouts ********************************************/ 6 | /* Too bad you can't inhert in CSS, change one of these you should change all */ 7 | div.tip { 8 | margin: 2px 10px 10px 0px; 9 | min-height: 55px; 10 | padding: 2px 10px 2px 85px; 11 | background-position: 5px 5px, 68px 0px; 12 | background-image: url('articles/images/icons/tip.png'), url('articles/images/icons/callout-border.png'); 13 | background-repeat: no-repeat, repeat-y; 14 | width: 90%; 15 | } 16 | 17 | div.note { 18 | margin: 2px 10px 10px 0px; 19 | min-height: 55px; 20 | padding: 2px 10px 2px 85px; 21 | background-position: 5px 5px, 68px 0px; 22 | background-image: url('articles/images/icons/note.png'), url('articles/images/icons/callout-border.png'); 23 | background-repeat: no-repeat, repeat-y; 24 | width: 90%; 25 | } 26 | 27 | div.caution { 28 | margin: 2px 10px 10px 0px; 29 | min-height: 55px; 30 | padding: 2px 10px 2px 85px; 31 | background-position: 5px 5px, 68px 0px; 32 | background-image: url('articles/images/icons/caution.png'), url('articles/images/icons/callout-border.png'); 33 | background-repeat: no-repeat, repeat-y; 34 | width: 90%; 35 | } 36 | 37 | div.warning { 38 | margin: 2px 10px 10px 0px; 39 | min-height: 55px; 40 | padding: 2px 10px 2px 85px; 41 | background-position: 5px 5px, 68px 0px; 42 | background-image: url('articles/images/icons/warning.png'), url('articles/images/icons/callout-border.png'); 43 | background-repeat: no-repeat, repeat-y; 44 | width: 90%; 45 | } 46 | 47 | div.important { 48 | margin: 2px 10px 10px 0px; 49 | min-height: 55px; 50 | padding: 2px 10px 2px 85px; 51 | background-position: 5px 5px, 68px 0px; 52 | background-image: url('articles/images/icons/important.png'), url('articles/images/icons/callout-border.png'); 53 | background-repeat: no-repeat, repeat-y; 54 | width: 90%; 55 | } 56 | 57 | div.example { 58 | margin: 2px 10px 10px 0px; 59 | min-height: 55px; 60 | padding: 2px 10px 2px 85px; 61 | background-position: 5px 5px, 68px 0px; 62 | background-image: url('articles/images/icons/example.png'), url('articles/images/icons/callout-border.png'); 63 | background-repeat: no-repeat, repeat-y; 64 | width: 90%; 65 | } 66 | 67 | div.download { 68 | margin: 2px 10px 10px 0px; 69 | min-height: 55px; 70 | padding: 2px 10px 2px 85px; 71 | background-position: 5px 5px, 68px 0px; 72 | background-image: url('articles/images/icons/download.png'), url('articles/images/icons/callout-border.png'); 73 | background-repeat: no-repeat, repeat-y; 74 | width: 90%; 75 | } 76 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | # .onLoad <- function(libname, pkgname) { 2 | # impl.prefix <- 'ftest' 3 | # ## This package serves as an "abstract implementation" to a FacileDb database. 4 | # ## The packages that implement access to a FacileWareHouse should define 5 | # ## the following options: 6 | # ## 7 | # ## - *.datapath 8 | # ## - *.dbpath 9 | # ## - *.covdef 10 | # ## - *.cachedir 11 | # ## 12 | # ## Since this FacileRepo package should not be tied to a specific FacileRepo 13 | # ## "implementation", we set the impl.prefex to be "ftest". This means that 14 | # ## this setup will create (or reuse) the following global options 15 | # ## 16 | # ## - ftest.datapath 17 | # ## - ftest.dbpath 18 | # ## - ftest.covdef 19 | # ## - ftest.cachedir 20 | # 21 | # ## Default database this will point to is configured so everything works 22 | # ## when this package is loaded (deployed) on rescomp 23 | # ## TODO: Update this database path to a test db instead of Atezo 24 | # 25 | # ## Until we create a test database and distribute within the package, I'm 26 | # ## affraid we can't avoid explicity defining the *.datapath. This is because 27 | # ## the unit tests are run in a "clean" (R --vanilla) environment which doesn't 28 | # ## load the stuff in your .Rprofile 29 | # ## dpath <- system.file('extdata', 'test', package='FacileData') 30 | # if (dir.exists('/gne')) { 31 | # dpath <- '/gne/home/lianogls/workspace/data/facile/test' 32 | # } else { 33 | # dpath <- '~/workspace/data/facile/test' 34 | # } 35 | # db.name <- 'TcgaDb-test.sqlite' 36 | # dpath <- getOption(sprintf('%s.datapath', impl.prefix), dpath) 37 | # 38 | # pkg.opts <- list( 39 | # datapath=dpath, 40 | # dbpath=file.path(dpath, db.name), 41 | # cachedir=file.path(dpath, 'cache'), 42 | # covdef=file.path(dpath, 'sample-meta-definitions.yaml')) 43 | # names(pkg.opts) <- sprintf('%s.%s', impl.prefix, names(pkg.opts)) 44 | # 45 | # ## We only set these options if they aren't already set in the global options 46 | # ## The developers should set the appropriate options in the ~/.Rprofile 47 | # opts <- options() 48 | # toset <- !(names(pkg.opts) %in% names(opts)) 49 | # if (any(toset)) { 50 | # options(pkg.opts[toset]) 51 | # } 52 | # 53 | # ## Check options 54 | # db.path <- getOption(paste0(impl.prefix, '.dbpath')) 55 | # if (!file.exists(db.path)) { 56 | # msg <- paste0( 57 | # "Default path to faciledb is not a valid file: ", db.path, "\n", 58 | # "Set options('facile.datapath') before loading the facilewarehouse ", 59 | # "package to a valid path to the SQLite database to skip this message.\n", 60 | # "A good place to do this for your local work is in your ~/.Rprofile") 61 | # ## warning(msg, immediate.=TRUE) 62 | # } 63 | # 64 | # invisible() 65 | # } 66 | -------------------------------------------------------------------------------- /inst/testdata/expected-meta.yaml: -------------------------------------------------------------------------------- 1 | name: TestFacileDataSet 2 | organism: Homo sapiens 3 | default_assay: rnaseq 4 | datasets: 5 | BLCA: 6 | url: https://portal.gdc.cancer.gov/projects/TCGA-BLCA 7 | description: Bladder urothelial carcinoma 8 | BRCA: 9 | url: https://portal.gdc.cancer.gov/projects/TCGA-BRCA 10 | description: Breast invasive carcinoma 11 | sample_covariates: 12 | stage: 13 | class: categorical 14 | levels: ["stage i", "stage ii", "stage iii", "stage iv"] 15 | description: Cancer staging classification (I-IV) 16 | label: Pathological Tumor Staging 17 | # colnames: stage 18 | arguments: 19 | x: stage 20 | type: clinical 21 | sex: 22 | class: categorical 23 | levels: ['male', 'female'] # reversed factor 24 | description: "chrX:chrY ratio" 25 | label: Sex 26 | # colnames: sex 27 | arguments: 28 | x: sex 29 | type: clinical 30 | age: 31 | class: real 32 | description: Age of patient in years 33 | label: Age (years) 34 | # colnames: age 35 | arguments: 36 | x: age 37 | type: clinical 38 | subtype_molecular_bladder: 39 | class: categorical 40 | # levels: ["luminal", "basal"] # for test, we don't specify this is factor 41 | description: > 42 | The luminal/basal subtyping scheme in bladder, as defined by Damrauer et al. 43 | (doi:10.1073/pnas.1318376111). For a larger umbrella review 44 | of the diversity of bladder subtypes, you can refer to doi:10.1038/nrc3817. 45 | label: Bladder cancer subtype (luminal/basal) 46 | # colnames: subtype_molecular_bladder 47 | arguments: 48 | x: subtype_molecular_bladder 49 | type: tumor_classification 50 | subtype_receptor_breast: 51 | class: categorical 52 | levels: ["ER+/PR+", "Her2+", "TNBC"] 53 | description: > 54 | Breast cancer classification based on amplification and/or deletion of 55 | different receptors. 56 | label: Breast cancer subtype (receptor status) 57 | # colnames: subtype_receptor_breast 58 | arguments: 59 | x: subtype_receptor_breast 60 | type: tumor_classification 61 | sample_type: 62 | class: categorical 63 | levels: ['normal', 'tumor', 'tumor_metastatic'] 64 | description: Whether sample comes from a tumor or adjacent normal 65 | label: Sample type (tumor/normal) 66 | # colnames: sample_type 67 | arguments: 68 | x: sample_type 69 | type: clinical 70 | OS: 71 | class: right_censored 72 | arguments: 73 | time: tte_OS 74 | event: event_OS 75 | label: Overall survival 76 | type: clinical 77 | description: > 78 | Overall Survival in days or months. The units still need to be standardized 79 | across trials. 80 | # colnames: ["tte_OS", "event_OS"] 81 | # argnames: ["time", "event"] 82 | -------------------------------------------------------------------------------- /tests/testthat/test-as.FacileDataSet.R: -------------------------------------------------------------------------------- 1 | context("as.FacileDataSet") 2 | 3 | test_that("We can get pdata metadata", { 4 | stopifnot(requireNamespace("Biobase", quietly = TRUE)) 5 | stopifnot(requireNamespace("survival", quietly = TRUE)) 6 | sinfo = data.frame(a = 1:4, 7 | b = survival::Surv(1:4, c(1,1,0,1)), 8 | stringsAsFactors = FALSE 9 | ) 10 | rownames(sinfo) = letters[1:4] 11 | attr(sinfo, "label") = c(a = "a is a", b = "b is b") 12 | vals = matrix(1:16, ncol = 4, dimnames = list(LETTERS[1:4], letters[1:4])) 13 | es = Biobase::ExpressionSet(vals, Biobase::AnnotatedDataFrame(sinfo)) 14 | 15 | expect_identical( 16 | FacileData::pdata_metadata(es), 17 | list(a = list(description = "a is a"), 18 | b = list(description = "b is b")) 19 | ) 20 | }) 21 | 22 | test_that("exampleFacileDataSet -> DGELists -> as.FacileDataSet", { 23 | efds <- exampleFacileDataSet() 24 | dsets <- sample_info_tbl(efds) %>% 25 | distinct(dataset) %>% 26 | pull(dataset) 27 | dlists <- sapply(dsets, function(dset) { 28 | y <- sample_info_tbl(efds) %>% 29 | filter(dataset == dset) %>% 30 | as.DGEList() 31 | y$samples <- transform(y$samples, group = NULL, samid = NULL) 32 | y$genes <- rename(y$genes, name = "symbol") 33 | colnames(y) <- sub(".*?__", "", colnames(y)) 34 | y 35 | }, simplify = FALSE) 36 | 37 | outdir <- tempfile(pattern = "TestFacileDataSet") 38 | 39 | tfds <- as.FacileDataSet(dlists, outdir, 40 | dataset_name = "TestFacileDataSet", 41 | assay_name = "rnaseq", 42 | assay_type = "rnaseq", 43 | source_assay = "counts", 44 | organism = organism(efds)) 45 | 46 | # test tumor samples are equivalent 47 | tsamples.new <- filter_samples(tfds, sample_type == "tumor") 48 | tsamples.exp <- filter_samples(efds, sample_type == "tumor") 49 | res <- inner_join( 50 | mutate(tsamples.new, source = "test"), 51 | mutate(tsamples.exp, source = "orig"), 52 | by = c("dataset", "sample_id")) 53 | expect_equal(nrow(tsamples.new), nrow(res)) 54 | expect_equal(nrow(tsamples.exp), nrow(res)) 55 | 56 | # expect factor levels are the same 57 | stage.new <- with_sample_covariates(tsamples.new, "stage") 58 | stage.exp <- with_sample_covariates(tsamples.exp, "stage") 59 | expect_factor(stage.new[["stage"]]) 60 | expect_equal(levels(stage.new[["stage"]]), levels(stage.exp[["stage"]])) 61 | stage.res <- inner_join(stage.new, stage.exp, 62 | by = c("dataset", "sample_id"), 63 | suffix = c(".new", ".exp")) 64 | expect_equal(nrow(stage.new), nrow(stage.exp)) 65 | expect_equal(nrow(stage.new), nrow(stage.res)) 66 | expect_equal(stage.res[["stage.new"]], stage.res[["stage.exp"]]) 67 | }) 68 | -------------------------------------------------------------------------------- /inst/extdata/exampleFacileDataSet/meta.yaml: -------------------------------------------------------------------------------- 1 | name: ExampleFacileTCGADataSet 2 | organism: Homo sapiens 3 | default_assay: rnaseq 4 | datasets: 5 | BLCA: 6 | url: https://portal.gdc.cancer.gov/projects/TCGA-BLCA 7 | description: Bladder urothelial carcinoma 8 | COAD: 9 | url: https://portal.gdc.cancer.gov/projects/TCGA-COAD 10 | description: Colon adenocarcinoma 11 | sample_covariates: 12 | indication: 13 | type: tumor_classification 14 | class: categorical 15 | description: High level indication of patient's cancer type 16 | label: Cancer Indication 17 | OS: 18 | type: response 19 | class: right_censored 20 | description: > 21 | Overall Survival in days or months. The units still need to be standardized 22 | across trials. 23 | label: Overall survival 24 | PFS: 25 | type: response 26 | class: right_censored 27 | description: > 28 | Progression Free Survival. The units still need to be standardized 29 | across trials. 30 | label: Progression free survival 31 | sample_type: 32 | type: clinical 33 | class: categorical 34 | levels: ['normal', 'tumor'] 35 | description: Whether sample comes from a tumor or adjacent normal 36 | label: Sample type (tumor/normal) 37 | sex: 38 | type: clinical 39 | class: categorical 40 | levels: ['m', 'f'] 41 | description: In the "ratio between chrX:chrY" sense. 42 | label: Sex 43 | stage: 44 | type: clinical 45 | class: categorical 46 | levels: ["I", "II", "III", "IV"] 47 | description: Cancer staging classification (I-IV) 48 | label: Cancer stage 49 | subtype_molecular: 50 | type: tumor_classification 51 | class: categorical 52 | description: > 53 | The luminal/basal subtyping scheme in bladder, as defined by Damrauer et al. 54 | (doi:10.1073/pnas.1318376111). For a larger umbrella review 55 | of the diversity of bladder subtypes, you can refer to doi:10.1038/nrc3817. 56 | label: Bladder cancer subtype (luminal/basal) 57 | subtype_tcga: 58 | type: tumor_classification 59 | class: categorical 60 | description: > 61 | Expression based subtypes of bladder cancer (I-IV) as described in the 62 | TCGA bladder paper (doi:10.1038/nature12965). For a larger umbrella review 63 | of the diversity of bladder subtypes, you can refer to doi:10.1038/nrc3817. 64 | label: Bladder cancer subtype (TCGA) 65 | subtype_crc_cms: 66 | type: tumor_classification 67 | class: categorical 68 | description: > 69 | The consensus molecular subtypes of CRC. 70 | label: Consensus Molecular CRC Subtypes (CMS1-4) 71 | levels: ['CMS1', 'CMS2', 'CMS3', 'CMS4'] 72 | subtype_microsatellite_instability: 73 | type: tumor_classification 74 | class: categorical 75 | description: > 76 | The consensus molecular subtypes of CRC. 77 | label: Consensus Molecular CRC Subtypes (CMS1-4) 78 | levels: ['MSI-hi', 'MSI-lo/MSS'] 79 | 80 | 81 | -------------------------------------------------------------------------------- /man/flog.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/logging.R 3 | \name{flog} 4 | \alias{flog} 5 | \title{Generates a logging message using glue and crayon, with some bells/whistles.} 6 | \usage{ 7 | flog( 8 | ..., 9 | level = "info", 10 | ns = NULL, 11 | session = NULL, 12 | file = stderr(), 13 | sep = "", 14 | fill = FALSE, 15 | labels = NULL, 16 | append = FALSE, 17 | newline = !is.null(file) 18 | ) 19 | } 20 | \arguments{ 21 | \item{...}{the string elements to pass into \code{\link[glue:glue]{glue::glue()}}} 22 | 23 | \item{level}{the "firing level" of this message. Defaults to "info"} 24 | 25 | \item{ns}{(namespace) if included, then the message checks the 26 | namespace-specific logging priority} 27 | 28 | \item{session, file, sep, fill, labels, append}{sent to \code{\link[base:cat]{base::cat()}}} 29 | 30 | \item{newline}{If \code{TRUE}, appends a \verb{\\\\n} to the message. By default, this 31 | is \code{TRUE} when \code{file} is not \code{NULL}.} 32 | } 33 | \value{ 34 | invisibly returns the text generated in the logging message. 35 | } 36 | \description{ 37 | Like other logging approaches, each message created with this function is 38 | assigned a \code{level} (priority). If the current logging level, which is 39 | returned from a call to \code{flog_level} (ostensibly determenied by the value 40 | of the \code{"facile.log.level(.*?)"} option) is less than or equal to level of 41 | this message, then the message will be generated and sent to \code{file}. 42 | You can include a \code{namespace} for the message to provide a namespace-specific 43 | level/priority hierarchy. 44 | } 45 | \details{ 46 | Conveninece wrapper functions are provided for each logging level, ie. 47 | call \code{fwarn("message")} instead of flog("message", level = "warn")\verb{. Also, each facile* package provides its own }flog()\verb{function which sets the namespace}ns` parameter to default to a package-specific namespace so you 48 | can control logging at the different package level. 49 | } 50 | \section{Logging Levels}{ 51 | 52 | 53 | Logging levels are\preformatted{.flog_levels <- c("all" = 0, "trace" = 1, "debug" = 2, "info" = 3, 54 | "warn" = 4, "error" = 5, "fatal" = 6) 55 | } 56 | } 57 | 58 | \section{crayon}{ 59 | 60 | Glue lets you put cayon functions in \code{{}} to stylize output. For instance, 61 | you can make "bold and red" the color red and also bold, like so:\preformatted{flog("This is \{red\}\{bold\}bold and red\{reset\}, right?") 62 | } 63 | 64 | Nice! It might be more convenient if we could make it a bit more terse, 65 | as shown below, but that might happen at another time.\preformatted{flog("This is rb`bold and red`, right?") 66 | } 67 | 68 | Colors: 69 | \itemize{ 70 | \item b: blue 71 | \item c: cyan 72 | \item g: green 73 | \item k: black 74 | \item m: magenta 75 | \item r: red 76 | \item y: yellow 77 | } 78 | 79 | Styles: 80 | \itemize{ 81 | \item i: italic 82 | \item s: strong (bold) 83 | \item S: striketthrough 84 | \item u: underline 85 | } 86 | } 87 | 88 | -------------------------------------------------------------------------------- /tests/testthat/test-EAV.R: -------------------------------------------------------------------------------- 1 | context("EAV Manipulation") 2 | 3 | efds. <- exampleFacileDataSet() 4 | pdata. <- efds. %>% 5 | # sex and stage are factors 6 | fetch_sample_covariates(covariates = c("sex", "stage")) %>% 7 | spread_covariates() %>% 8 | # add some other data types: numeric and character 9 | mutate(age = sample(20:70, nrow(.)), 10 | category = sample(letters, nrow(.), replace = TRUE)) 11 | emeta. <- local({ 12 | fn <- system.file("extdata", "exampleFacileDataSet", "meta.yaml", 13 | package = "FacileData", mustWork = TRUE) 14 | defined <- yaml::yaml.load_file(fn)$sample_covariates 15 | defined <- defined[names(defined) %in% colnames(pdata.)] 16 | c(defined, list( 17 | age = list(type = "atype", class = "real", description = "happy bday"), 18 | category = list(type = "atype", class = "categorical", description = "x"))) 19 | }) 20 | 21 | test_that("deafult metadata creation from data.frame", { 22 | ignore.cols <- c("dataset", "sample_id") 23 | 24 | covdefs <- eav_metadata_create(pdata., ignore = ignore.cols) 25 | expected.cols <- setdiff(names(pdata.), ignore.cols) 26 | expect_setequal(names(covdefs), expected.cols) 27 | 28 | # check that inferred covariate definitions have the required "slots", ie. 29 | # arguments, label, class, type 30 | for (cname in expected.cols) { 31 | vals <- pdata.[[cname]] 32 | expected <- emeta.[[cname]] 33 | inferred <- covdefs[[cname]] 34 | if (is.character(vals) || is.factor(vals)) { 35 | expect_equal(inferred[["class"]], "categorical", info = cname) 36 | } else if (is.numeric(vals)) { 37 | expect_equal(inferred[["class"]], "real", info = cname) 38 | } 39 | if (is.factor(vals)) { 40 | expect_equal(inferred[["levels"]], levels(vals), info = cname) 41 | } 42 | } 43 | }) 44 | 45 | test_that("custom definition supersede inferred EAV defs from data.frame", { 46 | covariate_def <- list( 47 | # Change order of levels and description in sex factor covariate 48 | sex = list( 49 | levels = rev(levels(pdata.[["sex"]])), 50 | description = "x:y chromosome ratio"), 51 | age = list( 52 | description = "years of life", 53 | type = "random")) 54 | 55 | defaults <- eav_metadata_create(pdata.) 56 | custom <- eav_metadata_create(pdata., covariate_def = covariate_def) 57 | 58 | # Check reversed levels of `sex` covariate 59 | expect_character(custom$sex$levels) 60 | expect_equal(custom$sex$levels, rev(defaults$sex$levels)) 61 | 62 | # Ensure that specified entries were overriden, and others left alone. 63 | for (cname in names(defaults)) { 64 | dvals <- defaults[[cname]] 65 | cvals <- custom[[cname]] 66 | assert_list(dvals) 67 | assert_list(cvals) 68 | assert_subset(names(dvals), names(cvals)) 69 | for (attrib in names(dvals)) { 70 | override <- covariate_def[[cname]][[attrib]] 71 | expected <- if (!is.null(override)) override else dvals[[attrib]] 72 | expect_equal(cvals[[attrib]], expected, 73 | info = paste(cname, attrib, sep = ":")) 74 | } 75 | } 76 | }) 77 | -------------------------------------------------------------------------------- /tests/testthat/test-assay-data.R: -------------------------------------------------------------------------------- 1 | context("Fetching assay level data") 2 | 3 | if (!exists("FDS")) FDS <- exampleFacileDataSet() 4 | 5 | samples <- FDS %>% 6 | filter_samples(stage == "III") %>% 7 | select(dataset, sample_id) 8 | 9 | genes <- c( 10 | PRF1='5551', 11 | GZMA='3001', 12 | CD274='29126', 13 | TIGIT='201633') 14 | 15 | features <- tibble(assay='rnaseq', feature_id=genes) 16 | 17 | test_that("fetch_assay_data limits samples correctly", { 18 | s.df <- collect(samples, n=Inf) 19 | 20 | e.sqlite <- fetch_assay_data(FDS, genes, samples) %>% collect(n=Inf) 21 | e.df <- fetch_assay_data(FDS, genes, s.df) %>% collect(n=Inf) 22 | 23 | ## results are same from tbl_df and tbl_sqlite `samples` parameter 24 | expect_equal(e.sqlite, e.df) 25 | 26 | ## samples limited correcly 27 | expect_true(setequal(paste0(e.df$dataset, e.df$sample_id), 28 | paste0(s.df$dataset, s.df$sample_id))) 29 | 30 | }) 31 | 32 | test_that("spreading data works with_assay_data", { 33 | expected <- FDS %>% 34 | fetch_assay_data(genes, samples, normalized = TRUE) %>% 35 | select(dataset, sample_id, feature_name, value) %>% 36 | tidyr::spread(feature_name, value) 37 | result <- samples %>% 38 | with_assay_data(genes, normalized = TRUE, .fds = FDS) %>% 39 | collect 40 | 41 | expect_equal(result, expected, check.attributes = FALSE) 42 | }) 43 | 44 | test_that("fetch_assay_data(..., aggregate = TRUE) provides scores", { 45 | scores <- FDS %>% 46 | fetch_assay_data(features, samples, normalized = TRUE, aggregate = TRUE) %>% 47 | arrange(sample_id, feature_name) %>% 48 | select(dataset, sample_id, feature_id, symbol=feature_name, value) %>% 49 | mutate(samid=paste(dataset, sample_id, sep="__")) 50 | 51 | dat <- FDS %>% 52 | fetch_assay_data(features, samples, normalized = TRUE, as.matrix = TRUE) 53 | ewm <- sparrow::eigenWeightedMean(dat)$score[scores$samid] 54 | expect_equal(scores$value, unname(ewm)) 55 | 56 | # test with_assay_data 57 | with.scores <- scores %>% 58 | distinct(dataset, sample_id) %>% 59 | with_assay_data(features, aggregate = TRUE) 60 | 61 | expect_equal(with.scores$aggregated, scores$value) 62 | }) 63 | 64 | # test_that("fetch_assay_data handles missing entries for requested samples", { 65 | # ## When we have multiple assays for an FDS, we can use a valid sample 66 | # ## descriptor to retrieve data, but the requested assay may not have data 67 | # ## for all requested samples, we need to handle this. 68 | # root <- rprojroot::find_root(rprojroot::is_r_package) 69 | # devtools::load_all(root) 70 | # tcga <- FacileDataSet('~/workspace/data/facile/FacileDataSets/FacileTCGADataSet-2017-03-25') 71 | # 72 | # library(reshape2) 73 | # samples <- sample_info_tbl(tcga) %>% 74 | # filter(dataset == 'BRCA') %>% 75 | # collect 76 | # 77 | # genes <- c(TIGIT='201633', CD274='29126') 78 | # rnaseq <- tcga %>% 79 | # fetch_assay_data(genes, samples, 'rnaseq', normalized=TRUE) 80 | # 81 | # ## don't have agilent data for all brca samples 82 | # agilent <- tcga %>% 83 | # fetch_assay_data(genes, samples, 'agilent', normalized=TRUE) 84 | # 85 | # }) 86 | -------------------------------------------------------------------------------- /tests/testthat/test-biocbox.R: -------------------------------------------------------------------------------- 1 | context("biocbox") 2 | 3 | if (!exists("FDS")) FDS <- exampleFacileDataSet() 4 | 5 | samples <- sample_covariate_tbl(FDS) %>% 6 | filter(variable == 'stage' & value == 'III') %>% 7 | select(dataset, sample_id) %>% 8 | collect() 9 | genes <- local({ 10 | out <- c("800", "1009", "1289", "50509", "2191", "2335", "5159") 11 | feature_info_tbl(FDS) %>% 12 | filter(feature_id %in% out) %>% 13 | collect() %>% 14 | pull(feature_id) 15 | }) 16 | 17 | # boxes and their associated packages 18 | box.info <- FacileData:::.biocboxes %>% 19 | select(class, package) %>% 20 | distinct() 21 | 22 | test_that("fetch_assay_data results converted to biocboxes", { 23 | scovs <- samples %>% 24 | with_sample_covariates() %>% 25 | as.data.frame() 26 | rownames(scovs) <- paste(scovs$dataset, scovs$sample_id, sep = "__") 27 | 28 | e <- fetch_assay_data(FDS, genes, samples, as.matrix = TRUE) 29 | e <- e[, rownames(scovs)] 30 | 31 | for (i in seq(nrow(box.info))) { 32 | class <- box.info[["class"]][i] 33 | package <- box.info[["package"]][i] 34 | 35 | rnaseq.compat <- is.element( 36 | "rnaseq", 37 | filter(FacileData:::.biocboxes, .data$class == .env$class)$assay_type) 38 | 39 | if (rnaseq.compat) { 40 | bb <- biocbox(samples, features = genes, class = class) 41 | } else { 42 | bb <- expect_warning({ 43 | biocbox(samples, features = genes, class = class) 44 | }, "not compatible.*assay_type", info = class) 45 | } 46 | 47 | expect_is(bb, class, info = class) 48 | checkmate::expect_set_equal(rownames(bb), genes, info = class) 49 | checkmate::expect_set_equal(colnames(bb), colnames(e), info = class) 50 | 51 | bb <- bb[rownames(e), colnames(e)] 52 | 53 | # Check assay data is same 54 | expect_equal(adata(bb), e, check.attributes = FALSE, 55 | info = class) 56 | 57 | # Check that sample covariates are the same 58 | pdat <- as.data.frame(pdata(bb)) 59 | checkmate::expect_subset(colnames(scovs), colnames(pdat), info = class) 60 | expect_equal(pdat[, colnames(scovs)], scovs, info = class, 61 | check.attributes = FALSE) 62 | } 63 | }) 64 | 65 | test_that("biocbox appends custom covariates from input sample table", { 66 | custom.covs <- samples %>% 67 | mutate(var1 = rnorm(nrow(samples)), var2 = sample(letters, nrow(samples))) 68 | 69 | bb <- biocbox(custom.covs, features = genes) 70 | 71 | cmp <- inner_join(custom.covs, bb$samples, by = c("dataset", "sample_id")) 72 | expect_equal(nrow(cmp), nrow(custom.covs)) 73 | expect_equal(cmp$var1.x, cmp$var1.y) 74 | expect_equal(cmp$var2.x, cmp$var2.y) 75 | }) 76 | 77 | 78 | test_that("biocbox appends custom covariates from sample_covariates param", { 79 | custom.covs <- samples %>% 80 | mutate(var1 = rnorm(nrow(samples)), var2 = sample(letters, nrow(samples))) 81 | 82 | bb <- biocbox(select(custom.covs, dataset, sample_id), 83 | features = genes, 84 | sample_covariates = custom.covs) 85 | 86 | cmp <- inner_join(custom.covs, bb$samples, by = c("dataset", "sample_id")) 87 | expect_equal(nrow(cmp), nrow(custom.covs)) 88 | expect_equal(cmp$var1.x, cmp$var1.y) 89 | expect_equal(cmp$var2.x, cmp$var2.y) 90 | }) 91 | -------------------------------------------------------------------------------- /man/addFacileAssaySet.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/construction.R 3 | \name{addFacileAssaySet} 4 | \alias{addFacileAssaySet} 5 | \title{Adds new set of assay data for all samples in a FacileDataSet} 6 | \usage{ 7 | addFacileAssaySet( 8 | x, 9 | datasets, 10 | facile_assay_name, 11 | facile_assay_type = .assay.types, 12 | facile_feature_type = .feature.types, 13 | facile_assay_description = NULL, 14 | facile_feature_info, 15 | storage_mode = .storage.modes, 16 | chunk_rows = 5000, 17 | chunk_cols = "ncol", 18 | chunk_compression = 4, 19 | assay_name = NULL, 20 | warn_existing = FALSE 21 | ) 22 | } 23 | \arguments{ 24 | \item{x}{The \code{FacileDataSet}} 25 | 26 | \item{datasets}{list of \code{ExpressionSet}, \code{SummarizedExperiment}, or 27 | \code{DGEList}s that have the new assay data across all of the datasets in \code{x}.} 28 | 29 | \item{facile_assay_name}{the name of the assay in the source dataset object} 30 | 31 | \item{facile_assay_type}{string indicating the assay_type ('rnaseq', 32 | 'affymetrix', etc.)} 33 | 34 | \item{facile_feature_type}{a string indicating the universe the features in 35 | this assay refer to, i.e. "entrez", "ensgid", "enstid", etc.} 36 | 37 | \item{facile_feature_info}{a \code{data.frame} with the required \code{feature_info} 38 | columns that describe the features in this assay. Please refer to the 39 | "Features" section of the \code{FacileDataSet} vignette for more complete 40 | description.} 41 | 42 | \item{storage_mode}{either \code{"integer"} or \code{"numeric"}, maps to the 43 | \code{storage.mode} parameter in \code{\link[rhdf5:h5createDataset]{rhdf5::h5createDataset()}}} 44 | 45 | \item{chunk_rows}{the first entry in the \code{chunk} parameter in 46 | \code{\link[rhdf5:h5createDataset]{rhdf5::h5createDataset()}} (\code{integer})} 47 | 48 | \item{chunk_cols}{the second entry in the \code{chunk} parameter in 49 | \code{\link[rhdf5:h5createDataset]{rhdf5::h5createDataset()}}. If this is \code{"ncol"}, it is set to the number 50 | of columns in each of the internal dataset matrices being added.} 51 | 52 | \item{chunk_compression}{the \code{level} parameter in \code{\link[rhdf5:h5createDataset]{rhdf5::h5createDataset()}}} 53 | 54 | \item{assay_name}{the assay name in the data containers provided in the 55 | \code{datasets} list.} 56 | 57 | \item{facie_assay_description}{a string that allows the caller to provide 58 | a "freeform" description of the assay (platform, protocol, whatever).} 59 | } 60 | \value{ 61 | a \code{tibble} subset of \code{facile_feature_info} that indicates the \emph{new} 62 | features that were added to the internal \code{feature_info_tbl}. 63 | } 64 | \description{ 65 | Once a FacileDataSet has been created and initialized, either via a 66 | low-level call to \code{\link[=initializeFacileDataSet]{initializeFacileDataSet()}}, or a call to 67 | \code{\link[=as.FacileDataSet]{as.FacileDataSet()}} over a list of BiocAssayContainers, you can add more 68 | assays (i.e. RNA-seq, microarray, etc.) to the FacileDataSet using this 69 | function. 70 | } 71 | \details{ 72 | Note that you cannot add assay data piecemeal. That is to say, you can not call 73 | this function once to add copynumber data 74 | (addFacileAssaySet(..., facile_assay_type = "cnv") to a subset of samples 75 | and later call this function again to add copynumber to the rest of the 76 | samples. The function will throw an error if 77 | facile_assay_type \%in\% assay_names(x) == TRUE. 78 | } 79 | -------------------------------------------------------------------------------- /tests/testthat/test-entity-attribute-value.R: -------------------------------------------------------------------------------- 1 | context("Entity-Attribute-Value conversions") 2 | 3 | # Checks the yaml encoding for the variable is as expected by using the 4 | # yaml encoding from "testdata/expected-meta.yaml" matches the encoding 5 | # that was programmatically generated 6 | # 7 | # @param x the recoded covariate list 8 | # @param expected the covariate list from `expected-meta.yaml` 9 | validate_eav_recode <- function(x, expected, varname) { 10 | expect_is(x, "list", info = varname) 11 | expect_is(expected, "list", info = varname) 12 | expect_equal(x$colnames, expected$colnames, info = varname) 13 | expect_equal(x$class, expected$class, info = varname) 14 | if (!is.null(expected$levels)) { 15 | expect_true(is.character(x$levels), info = varname) 16 | expect_equal(x$levels, expected$levels, info = varname) 17 | } else { 18 | expect_true(is.null(x$levels), info = varname) 19 | } 20 | } 21 | 22 | test_that("pData -> meta.yaml covariate encoding works (simple & compound)", { 23 | # Trying to recode the survival stuff isn't included in this test 24 | pdat <- example_sample_covariates() 25 | elol <- example_sample_covariate_definitions() 26 | 27 | # define covariate_def(-inition) for the compound OS facile covariate: 28 | covdef <- list( 29 | OS=list( 30 | arguments=c(time="tte_OS", event="event_OS"), 31 | class="right_censored", 32 | label="Overall survival", 33 | type="clinical", 34 | description="Overall Survival in months" 35 | )) 36 | 37 | lol <- eav_metadata_create(pdat, covariate_def = covdef) 38 | fn <- tempfile() 39 | yaml::write_yaml(lol, fn) 40 | relol <- yaml::read_yaml(fn) 41 | 42 | # Explicitly test that the tte_OS and event_OS columns from `pDat` were 43 | # compounded into the OS covariate. 44 | # Reference the "Encoding Survival Covariates" section in the 45 | # `?eav_metadata_create` helpf file for what the expected behavior of how this 46 | # compounded, multi-column-to-single-value mapping should work. 47 | compounded <- c("tte_OS", "event_OS") 48 | expect_true(all(compounded %in% names(pdat))) # in pData 49 | expect_true(!any(c("tte_OS", "event_OS") %in% names(relol))) # not in yaml 50 | expect_true(setequal(relol$OS$arguments, compounded)) # names of columns saved for posterity 51 | 52 | # ensure that variables from encoded yaml file match test meta.yaml file 53 | expect_true(setequal(names(relol), names(lol))) 54 | 55 | # encodings match 56 | for (varname in names(lol)) { 57 | validate_eav_recode(relol[[varname]], elol[[varname]], varname) 58 | } 59 | }) 60 | 61 | test_that("Successful EAV creation of data.frame with a Surv object column", { 62 | # pData with Surv 63 | df <- data.frame( 64 | dataset = "foo", 65 | sample_id = letters[1:3], 66 | x = survival::Surv(1:3, c(0,1,0)), 67 | y = 4:6, 68 | stringsAsFactors = FALSE) 69 | 70 | expected <- df %>% 71 | mutate(x = as.character(x)) %>% 72 | tidyr::gather("variable", "value", -dataset, -sample_id) %>% 73 | mutate(class = ifelse(variable == "x", "cSurv", "real"), 74 | type = "general") %>% 75 | as_tibble() 76 | 77 | long <- as.EAVtable(df) 78 | expect_equal(long, expected, check.attributes = FALSE) 79 | }) 80 | 81 | test_that("basic encoding and decoding of EAV columns works", { 82 | # survival::Surv 83 | foo <- Surv(1:3, c(0,1,0)) 84 | x <- as(foo, "cSurv") 85 | y <- eav_encode_cSurv(x) 86 | y1 <- c("1+","2","3+") 87 | attr(y1, "eavclass") <- "cSurv" 88 | expect_identical(y, y1) 89 | 90 | z <- eav_decode_cSurv(y) 91 | expect_identical(x, z) 92 | }) 93 | -------------------------------------------------------------------------------- /R/replace_na.R: -------------------------------------------------------------------------------- 1 | defaults.freplace_na <- list( 2 | numeric = "error", 3 | categorical = "NA.") 4 | 5 | #' Replaces NA's with specified values. 6 | #' 7 | #' Some the downstream uses of a FacileDataStore can throw problems when NA's 8 | #' are found in data or covariates, so we often want to fill in NA's with 9 | #' non-NA markers of missing values. Note that unless specified otherwise 10 | #' (using the `replace` and `defaults` parameters), 11 | #' 12 | #' Depending on the atomic type of the thing that NA's are being replaced with, 13 | #' a default value is assumed. These can be overriden by using the `defaults` 14 | #' parameter, or specifically by column (or list) names via the `replace` 15 | #' parameter. 16 | #' 17 | #' Missing values (NA's) come up often in FacileDataStores since we often use 18 | #' them to include data from multiple datasets, which induces "ragged" (sparse) 19 | #' covariate (pData) entries. In man 20 | #' 21 | #' @export 22 | #' @param data the thing that has NA's in it (a data.frame or vector) 23 | #' @param replace a named list of elements to use for custom replacement values 24 | #' @param defaults if named elements in `data` do not appear in `replace`, you 25 | #' can provide default values for categories of parameters (ie. 26 | #' `"categorical"` or `"numeric"`), otherwise 27 | #' FacileData:::defaults.freplace_na will be used. 28 | #' @return an NA-replaced version of `data` 29 | #' @examples 30 | #' data <- data.frame( 31 | #' a = rnorm(10), 32 | #' b = letters[1:10], 33 | #' c = factor(LETTERS[1:10])) 34 | #' data[3, ] <- NA 35 | #' r1 <- freplace_na(data, list(b = "bee"), ignore = "a") 36 | #' r2 <- freplace_na(data, list(b = "bee"), defaults = list(numeric = -Inf)) 37 | freplace_na <- function(data, replace = list(), defaults = list(), 38 | ignore = character(), ...) { 39 | UseMethod("freplace_na") 40 | } 41 | 42 | #' @export 43 | freplace_na.default <- function(data, replace = NULL, defaults = list(), 44 | ignore = character(), ...) { 45 | # ignore not used here 46 | stopifnot(is.atomic(data)) 47 | isna <- is.na(data) 48 | if (!any(isna)) return(data) 49 | 50 | if (is.null(defaults)) defaults <- list() 51 | assert_list(defaults, names = "unique") 52 | defaults <- c(defaults, defaults.freplace_na) 53 | defaults <- defaults[!duplicated(names(defaults))] 54 | 55 | if (is.null(replace)) { 56 | if (is.numeric(data)) { 57 | replace <- defaults[["numeric"]] 58 | } else { 59 | replace <- defaults[["categorical"]] 60 | } 61 | } 62 | 63 | stopifnot(is.atomic(replace) && length(replace) == 1L) 64 | 65 | if (is.numeric(data)) { 66 | if (!test_number(replace)) { 67 | stop("Can't replace numerics yet with anything but a number") 68 | } 69 | } else { 70 | replace <- as.character(replace) 71 | if (is.factor(data)) { 72 | if (!is.element(replace, levels(data))) { 73 | levels(data) <- c(levels(data), replace) 74 | } 75 | } else { 76 | data <- as.character(data) 77 | } 78 | } 79 | 80 | data[isna] <- replace 81 | data 82 | } 83 | 84 | #' @export 85 | #' @method freplace_na data.frame 86 | freplace_na.data.frame <- function(data, replace = list(), defaults = list(), 87 | ignore = character(), ...) { 88 | assert_character(ignore, null.ok = TRUE) 89 | for (cname in setdiff(colnames(data), ignore)) { 90 | vals <- data[[cname]] 91 | rep.val <- replace[[cname]] 92 | if (!identical(rep.val, "skip")) { 93 | data[[cname]] <- freplace_na(vals, replace[[cname]], defaults = defaults) 94 | } 95 | } 96 | data 97 | } 98 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # FacileData 5 | 6 | 7 | 8 | [![R build 9 | status](https://github.com/facilebio/FacileData/workflows/R-CMD-check/badge.svg)](https://github.com/facilebio/FacileData/actions) 10 | ![pkgdown](https://github.com/facilebio/FacileData/workflows/pkgdown/badge.svg) 11 | [![Project 12 | Status](http://www.repostatus.org/badges/latest/active.svg)](http://www.repostatus.org/#active) 13 | [![Lifecycle: 14 | Maturing](https://img.shields.io/badge/lifecycle-maturing-blue.svg)](https://www.tidyverse.org/lifecycle/#maturing) 15 | [![codecov](https://codecov.io/gh/facilebio/FacileData/branch/master/graph/badge.svg)](https://codecov.io/gh/facilebio/FacileData) 16 | 17 | 18 | The `FacileData` package was written to facilitate easier analysis of 19 | large, multi-assay high-throughput genomics datasets. To this end, the 20 | `FacileData` package provides two things: 21 | 22 | 1. A *FacileData Access API* that defines a fluent interface over 23 | multi-assay genomics datasets that fits into the 24 | [tidyverse](https://www.tidyverse.org/). This enables analysts to 25 | more naturally query and retrieve data for general exploratory data 26 | analysis; and 27 | 2. A reference implementation of a datastore that implements the 28 | *FacileData Access API* called a *FacileDataSet*. The 29 | `FacileDataSet` provides efficient storage and retrieval of 30 | arbitrarily large high-throughput genomics datasets. For example, a 31 | single `FacileDataSet` can be used to store *all* of the RNA-seq, 32 | microarray, RPPA, etc. data from the [The Cancer Genome 33 | Atlas](https://cancergenome.nih.gov/). This singular `FacileDataSet` 34 | allows analysts easy access to arbitrary subsets of these data 35 | without having to load all of it into memory. 36 | 37 | # Installation 38 | 39 | The FacileData suite of packages is only available from github from now. 40 | You will want to install three `FacileData*` packages to appreciate the 41 | its utility: 42 | 43 | ``` r 44 | # install.packages("devtools") 45 | devtools::install_github("facilebio/FacileData") 46 | ``` 47 | 48 | # Example Usage 49 | 50 | As a teaser, we’ll show how to plot HER2 copy number vs expression 51 | across the TCGA bladder and breast indications (“BLCA” and “BRCA”) using 52 | a `FacileDataSet`. 53 | 54 | ``` r 55 | library(ggplot2) 56 | library(FacileData) 57 | library(FacileTCGADataSet) 58 | tcga <- FacileTCGADataSet() 59 | 60 | features <- filter_features(tcga, name == "ERBB2") 61 | 62 | fdat <- tcga %>% 63 | filter_samples(indication %in% c("BLCA", "BRCA")) %>% 64 | with_assay_data(features, assay_name = "rnaseq", normalized = TRUE) %>% 65 | with_assay_data(features, assay_name = "cnv_score") %>% 66 | with_sample_covariates(c("indication", "sex")) 67 | 68 | ggplot(fdat, aes(cnv_score_ERBB2, ERBB2, color = sex)) + 69 | geom_point() + 70 | facet_wrap(~ indication) 71 | ``` 72 | 73 | 74 | 75 | Let’s compare how you might do the same using data stored in a 76 | `SummarizedExperiment` named `se.tcga` that stores RNA-seq (raw and 77 | normalized) and copy number data. 78 | 79 | ``` r 80 | # load / get `se.all` from somewhere 81 | fidx <- which(mcols(se.all)$name == "ERBB2") 82 | se <- se.all[, se.all$indication %in% c("BLCA", "BRCA")] 83 | 84 | sdat <- data.frame( 85 | ERBB2 = assay(se, "rnaseq_norm")[fidx,], 86 | cnv_score_ERBB2 = assay(se, "cnv_score")[fidx,], 87 | sex = se$sex, 88 | indication = se$indication) 89 | 90 | ggplot(fdat, aes(cnv_score_ERBB2, ERBB2, color=sex)) + 91 | geom_point() + 92 | facet_wrap(~ indication) 93 | ``` 94 | 95 | TODO: Show same analysis using MultiAssayEperiment 96 | -------------------------------------------------------------------------------- /man/sample-covariates.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/api.R, R/sample-covariates.R 3 | \name{fetch_sample_covariates} 4 | \alias{fetch_sample_covariates} 5 | \alias{with_sample_covariates} 6 | \alias{fetch_sample_covariates.FacileDataSet} 7 | \alias{fetch_sample_covariates.facile_frame} 8 | \title{Appends covariate columns to a query result} 9 | \usage{ 10 | fetch_sample_covariates( 11 | x, 12 | samples = NULL, 13 | covariates = NULL, 14 | custom_key = Sys.getenv("USER"), 15 | with_source = FALSE, 16 | ... 17 | ) 18 | 19 | with_sample_covariates( 20 | x, 21 | covariates = NULL, 22 | na.rm = FALSE, 23 | custom_key = Sys.getenv("USER"), 24 | .fds = NULL, 25 | ... 26 | ) 27 | 28 | \method{fetch_sample_covariates}{FacileDataSet}( 29 | x, 30 | samples = NULL, 31 | covariates = NULL, 32 | custom_key = Sys.getenv("USER"), 33 | with_source = FALSE, 34 | ... 35 | ) 36 | 37 | \method{fetch_sample_covariates}{facile_frame}( 38 | x, 39 | samples = NULL, 40 | covariates = NULL, 41 | custom_key = Sys.getenv("USER"), 42 | with_source = FALSE, 43 | ... 44 | ) 45 | } 46 | \arguments{ 47 | \item{x}{a \code{FacileDataSet} connection} 48 | 49 | \item{samples}{a samples descriptor \code{tbl_*}} 50 | 51 | \item{covariates}{character vector of covariate names} 52 | 53 | \item{custom_key}{The key to use to fetch more custom annotations over 54 | the given samples} 55 | 56 | \item{na.rm}{if \code{TRUE}, filters outgoing result such that only rows 57 | with nonNA values for the \code{covariates} specified here will be 58 | returned. Default: \code{FALSE}. Note that this will not check columns 59 | not specified in \code{covariates} for NA-ness.} 60 | 61 | \item{.fds}{A \code{FacileDataSet} object} 62 | } 63 | \value{ 64 | The facile \code{x} object, annotated with the specified covariates. 65 | 66 | rows from the \code{sample_covariate} table 67 | } 68 | \description{ 69 | Note that this function will force the collection of \code{x} 70 | } 71 | \seealso{ 72 | Other FacileInterface: 73 | \code{\link{facet_frame.FacileDataSet}()}, 74 | \code{\link{fetch_assay_score}()}, 75 | \code{\link{fetch_sample_statistics}()}, 76 | \code{\link{samples}()} 77 | 78 | Other API: 79 | \code{\link{fetch_assay_score.FacileDataSet}()}, 80 | \code{\link{fetch_custom_sample_covariates.FacileDataSet}()}, 81 | \code{\link{fetch_sample_statistics.FacileDataSet}()}, 82 | \code{\link{fetch_samples.FacileDataSet}()}, 83 | \code{\link{filter_features.FacileDataSet}()}, 84 | \code{\link{filter_samples.FacileDataSet}()}, 85 | \code{\link{organism.FacileDataSet}()}, 86 | \code{\link{samples.FacileDataSet}()} 87 | 88 | Other FacileInterface: 89 | \code{\link{facet_frame.FacileDataSet}()}, 90 | \code{\link{fetch_assay_score}()}, 91 | \code{\link{fetch_sample_statistics}()}, 92 | \code{\link{samples}()} 93 | 94 | Other API: 95 | \code{\link{fetch_assay_score.FacileDataSet}()}, 96 | \code{\link{fetch_custom_sample_covariates.FacileDataSet}()}, 97 | \code{\link{fetch_sample_statistics.FacileDataSet}()}, 98 | \code{\link{fetch_samples.FacileDataSet}()}, 99 | \code{\link{filter_features.FacileDataSet}()}, 100 | \code{\link{filter_samples.FacileDataSet}()}, 101 | \code{\link{organism.FacileDataSet}()}, 102 | \code{\link{samples.FacileDataSet}()} 103 | 104 | Other API: 105 | \code{\link{fetch_assay_score.FacileDataSet}()}, 106 | \code{\link{fetch_custom_sample_covariates.FacileDataSet}()}, 107 | \code{\link{fetch_sample_statistics.FacileDataSet}()}, 108 | \code{\link{fetch_samples.FacileDataSet}()}, 109 | \code{\link{filter_features.FacileDataSet}()}, 110 | \code{\link{filter_samples.FacileDataSet}()}, 111 | \code{\link{organism.FacileDataSet}()}, 112 | \code{\link{samples.FacileDataSet}()} 113 | } 114 | \concept{API} 115 | \concept{FacileInterface} 116 | -------------------------------------------------------------------------------- /man/remove_batch_effect.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/remove_batch_effect.R 3 | \name{remove_batch_effect} 4 | \alias{remove_batch_effect} 5 | \title{Regress out confounding variables from a data matrix.} 6 | \usage{ 7 | remove_batch_effect( 8 | x, 9 | sample_info, 10 | batch = NULL, 11 | main = NULL, 12 | maintain.rowmeans = FALSE, 13 | ... 14 | ) 15 | } 16 | \arguments{ 17 | \item{x}{A matrix of values that needs to be corrected} 18 | 19 | \item{sample_info}{a data.frame of covariate information for the data in \code{x}. 20 | The rows of \code{sample_info} are assumed to match the columns of \code{x}. This 21 | data.frame should have the covariates named in \code{batch} and \code{main} to use 22 | for the correction. If \code{sample_info} is a \code{facile_frame}, we will endeavor 23 | to pull any covariate named in \code{batch} and \code{main} that do not already 24 | appear in the columns of \code{sample_info}. Unlike limma's removeBatchEffect, 25 | we do not try to fish out the covariate values from anywhere in the 26 | "ether". They \emph{must} be found in this data.frame.} 27 | 28 | \item{batch}{The column names in \code{sample_info} that specify the batch 29 | covariates in the data that will be regressed out.} 30 | 31 | \item{main}{The name of a covaraite in \code{sample_info} that contains a known 32 | covariate that describes the "effect" of an experiment that should not 33 | be regressed out. Please refer to the Details section for more informaiton.} 34 | } 35 | \value{ 36 | a corrected version of the data matrix \code{x}. 37 | } 38 | \description{ 39 | Data \code{x} is assumed to be log-like, and this function provides a simplified 40 | interface to \code{\link[limma:removeBatchEffect]{limma::removeBatchEffect()}}. The \code{batch} parameter replaces 41 | \code{batch}, \code{batch2}, and \code{covariates}. The \code{design} parameter is replaced with 42 | \code{main}. This function is mostly for use within the 43 | \code{fetch_assay_data(..., normalized = TRUE, batch = 'something')} pipeline, 44 | but refactored out here for general re-use. 45 | } 46 | \details{ 47 | The \code{batch} and \code{main} parameters must be characters that will either 48 | reference already existing columns in the \code{sample_info}, or be covariates 49 | that can be retrieved from a FacileDataStore that is attached to the 50 | sample_info facile_frame. 51 | 52 | We'll use these parameters to build a model.matrix with main and batch 53 | effect and follow the use of \code{removeBatchEffect} as outlined in the post 54 | linked to below to pull the design matrix apart and call the function with 55 | the corresponding \code{design} and \code{covariates} parameters: 56 | 57 | https://support.bioconductor.org/p/83286/#83287 58 | 59 | Setting the \code{batch.scale} parameter to \code{TRUE} (the default), ensures that 60 | the \code{rowMeans} of the returned data matrix are the same as the original 61 | dataset. 62 | } 63 | \section{Missing values in batch covariates}{ 64 | 65 | It can be that some of the levels of the \code{batch} and \code{main} covariates 66 | are missing \code{NA}. When these covariates are categorical, all missing values 67 | will be replaced with a dummy value using the logic from \code{\link[=freplace_na]{freplace_na()}} 68 | 69 | If numeric covariates are missing, then this will throw an error. 70 | } 71 | 72 | \examples{ 73 | # We'll materialize a data matrix and sample_info table from the 74 | # exampleFacileDataSet, then correct the data matrix. 75 | efds <- exampleFacileDataSet() 76 | sample.info <- efds \%>\% 77 | filter_samples(indication == "CRC") \%>\% 78 | with_sample_covariates() 79 | m <- fetch_assay_data(sample.info, normalized = TRUE, as.matrix = TRUE) 80 | m.rmsex <- remove_batch_effect(m, sample.info, "sex") 81 | 82 | # this functionality is called internally from fetch_assay_data to make 83 | # your life easy from within the facile ecosystem itself 84 | m2 <- fetch_assay_data(sample.info, normalized = TRUE, 85 | batch = "sex", as.matrix = TRUE) 86 | all.equal(m.rmsex, m2) 87 | } 88 | \seealso{ 89 | \code{\link[=fetch_assay_data]{fetch_assay_data()}} when \code{batch = "something"} 90 | } 91 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | ```{r, echo = FALSE} 8 | knitr::opts_chunk$set( 9 | collapse = TRUE, 10 | echo = TRUE, 11 | message = FALSE, 12 | error = FALSE, 13 | comment = "#>", 14 | fig.path = "man/figures/" 15 | ) 16 | ``` 17 | 18 | # FacileData 19 | 20 | 21 | [![R build status](https://github.com/facilebio/FacileData/workflows/R-CMD-check/badge.svg)](https://github.com/facilebio/FacileData/actions) 22 | ![pkgdown](https://github.com/facilebio/FacileData/workflows/pkgdown/badge.svg) 23 | [![Project Status](http://www.repostatus.org/badges/latest/active.svg)](http://www.repostatus.org/#active) 24 | [![Lifecycle: Maturing](https://img.shields.io/badge/lifecycle-maturing-blue.svg)](https://www.tidyverse.org/lifecycle/#maturing) 25 | [![codecov](https://codecov.io/gh/facilebio/FacileData/branch/master/graph/badge.svg)](https://codecov.io/gh/facilebio/FacileData) 26 | 27 | 28 | The `FacileData` package was written to facilitate easier analysis of large, 29 | multi-assay high-throughput genomics datasets. To this end, the `FacileData` 30 | package provides two things: 31 | 32 | 1. A *FacileData Access API* that defines a fluent interface over multi-assay 33 | genomics datasets that fits into the [tidyverse][tidyverse]. This enables 34 | analysts to more naturally query and retrieve data for general exploratory 35 | data analysis; and 36 | 2. A reference implementation of a datastore that implements the 37 | *FacileData Access API* called a *FacileDataSet*. The `FacileDataSet` 38 | provides efficient storage and retrieval of arbitrarily large high-throughput 39 | genomics datasets. For example, a single `FacileDataSet` can be used to store 40 | *all* of the RNA-seq, microarray, RPPA, etc. data from the 41 | [The Cancer Genome Atlas][tcga]. This singular `FacileDataSet` allows 42 | analysts easy access to arbitrary subsets of these data without having to 43 | load all of it into memory. 44 | 45 | [tcga]: https://cancergenome.nih.gov/ 46 | [tidyverse]: https://www.tidyverse.org/ 47 | 48 | # Installation 49 | 50 | The FacileData suite of packages is only available from github from now. You 51 | will want to install three `FacileData*` packages to appreciate the its utility: 52 | 53 | ```{r gh-installation, eval = FALSE} 54 | # install.packages("devtools") 55 | devtools::install_github("facilebio/FacileData") 56 | ``` 57 | 58 | # Example Usage 59 | 60 | As a teaser, we'll show how to plot HER2 copy number vs expression across the 61 | TCGA bladder and breast indications ("BLCA" and "BRCA") using a `FacileDataSet`. 62 | 63 | ```{r her2-cnv-expression, eval = FALSE} 64 | library(ggplot2) 65 | library(FacileData) 66 | library(FacileTCGADataSet) 67 | tcga <- FacileTCGADataSet() 68 | 69 | features <- filter_features(tcga, name == "ERBB2") 70 | 71 | fdat <- tcga %>% 72 | filter_samples(indication %in% c("BLCA", "BRCA")) %>% 73 | with_assay_data(features, assay_name = "rnaseq", normalized = TRUE) %>% 74 | with_assay_data(features, assay_name = "cnv_score") %>% 75 | with_sample_covariates(c("indication", "sex")) 76 | 77 | ggplot(fdat, aes(cnv_score_ERBB2, ERBB2, color = sex)) + 78 | geom_point() + 79 | facet_wrap(~ indication) 80 | ``` 81 | 82 | 83 | 84 | Let's compare how you might do the same using data stored in a 85 | `SummarizedExperiment` named `se.tcga` that stores RNA-seq (raw and normalized) 86 | and copy number data. 87 | 88 | ```{r example-sumexp, eval = FALSE} 89 | # load / get `se.all` from somewhere 90 | fidx <- which(mcols(se.all)$name == "ERBB2") 91 | se <- se.all[, se.all$indication %in% c("BLCA", "BRCA")] 92 | 93 | sdat <- data.frame( 94 | ERBB2 = assay(se, "rnaseq_norm")[fidx,], 95 | cnv_score_ERBB2 = assay(se, "cnv_score")[fidx,], 96 | sex = se$sex, 97 | indication = se$indication) 98 | 99 | ggplot(fdat, aes(cnv_score_ERBB2, ERBB2, color=sex)) + 100 | geom_point() + 101 | facet_wrap(~ indication) 102 | ``` 103 | 104 | TODO: Show same analysis using MultiAssayEperiment 105 | -------------------------------------------------------------------------------- /tests/testthat/test-bioc-assay-containers.R: -------------------------------------------------------------------------------- 1 | context("Exercise as.DGEList (deprecated in favor of biocbiox())") 2 | 3 | if (!exists("FDS")) FDS <- exampleFacileDataSet() 4 | 5 | samples <- sample_covariate_tbl(FDS) %>% 6 | dplyr::filter(variable == 'stage' & value == 'III') %>% 7 | dplyr::select(dataset, sample_id) 8 | genes <- local({ 9 | out <- c("800", "1009", "1289", "50509", "2191", "2335", "5159") 10 | feature_info_tbl(FDS) %>% 11 | dplyr::filter(feature_id %in% out) %>% 12 | dplyr::collect() %>% 13 | dplyr::pull(feature_id) 14 | }) 15 | 16 | test_that("fetch_assay_data results converted to DGEList", { 17 | e <- fetch_assay_data(FDS, genes, samples) 18 | y <- as.DGEList(e) 19 | expect_is(y, 'DGEList') 20 | 21 | ## check samples 22 | expect_is(y$samples, 'data.frame') 23 | expect_true(setequal(y$samples$sample_id, collect(samples)$sample_id)) 24 | expect_type(y$samples$norm.factors, 'double') 25 | expect_type(y$samples$lib.size, 'double') 26 | expect_type(y$samples$dataset, 'character') 27 | expect_type(y$samples$sample_id, 'character') 28 | 29 | expect_is(y$genes, 'data.frame') 30 | expect_type(y$genes$feature_id, 'character') 31 | expect_true(setequal(y$genes$feature_id, genes)) 32 | expect_type(y$genes$symbol, 'character') 33 | 34 | ## Check that counts match up in DGEList as they would from raw matrix fetch 35 | m <- fetch_assay_data(FDS, genes, samples, normalize=FALSE, as.matrix=TRUE) 36 | expect_true(setequal(rownames(m), rownames(y))) 37 | expect_true(setequal(colnames(m), colnames(y))) 38 | expect_equal(m[rownames(y), colnames(y)], y$counts) 39 | }) 40 | 41 | test_that("as.DGEList appends custom covariate table correctly", { 42 | custom.covs <- c("sex", "subtype_molecular") 43 | with.covs <- with_sample_covariates(samples, custom.covs) 44 | 45 | y.ref <- as.DGEList(with.covs) 46 | y.test <- as.DGEList(with.covs, covariates = with.covs) 47 | 48 | expect_equal(dim(y.test), dim(y.ref)) 49 | expect_equal(colnames(y.test), colnames(y.ref)) 50 | 51 | expected.scols <- c( 52 | "group", "lib.size", "norm.factors", "dataset", "sample_id", "samid", 53 | custom.covs) 54 | 55 | expect_set_equal(colnames(y.test$samples), expected.scols) 56 | 57 | for (cov in custom.covs) { 58 | expect_equal(y.test$samples[[cov]], y.ref$samples[[cov]], 59 | info = paste("sample <-> custom covariate match:", cov)) 60 | } 61 | }) 62 | 63 | test_that("as.DGEList with custom lib.size and norm.factors works", { 64 | set.seed(100) 65 | y.all <- as.DGEList(samples) 66 | y.some <- y.all[sample(nrow(y.all), 1000),, keep.lib.sizes = FALSE] 67 | y.some <- edgeR::calcNormFactors(y.some) 68 | 69 | assert_true(all(y.all$samples$lib.size > y.some$samples$lib.size)) 70 | assert_false(any(y.all$samples$norm.factors == y.some$samples$norm.factors)) 71 | 72 | # cpm calculation with stored libsize and normfactors 73 | cpms.orig <- fetch_assay_data(samples, features = rownames(y.some), 74 | as.matrix = TRUE, normalized = TRUE, log = TRUE, 75 | prior.count = 2) 76 | 77 | # Add custom lib.size and norm.factors to sample facie_frame 78 | samples. <- samples %>% 79 | collect() %>% 80 | left_join(select(y.some$samples, sample_id, lib.size, norm.factors), 81 | by = "sample_id") 82 | 83 | cpms.f <- fetch_assay_data(samples., features = rownames(y.some), 84 | as.matrix = TRUE, normalized = TRUE, log = TRUE, 85 | prior.count = 2) 86 | expect_equal(rownames(cpms.orig), rownames(cpms.f)) 87 | expect_equal(colnames(cpms.orig), colnames(cpms.f)) 88 | 89 | # This is an explicit test to make sure that the differences in the CPMs are 90 | # not zero 91 | mean.diff.orig <- mean(abs(cpms.f - cpms.orig)) 92 | expect_gt(mean.diff.orig, 0) 93 | 94 | cpms.e1 <- edgeR::cpm(y.some, log = TRUE, prior.count = 2) 95 | expect_setequal(rownames(cpms.f), rownames(cpms.e1)) 96 | expect_setequal(colnames(cpms.f), colnames(cpms.e1)) 97 | cpms.f <- cpms.f[rownames(cpms.e1), colnames(cpms.e1)] 98 | mean.diff.new <- mean(abs(cpms.f - cpms.e1)) 99 | expect_equal(mean.diff.new, 0) 100 | }) 101 | -------------------------------------------------------------------------------- /R/NSE-filter-samples.R: -------------------------------------------------------------------------------- 1 | #' Filter against the sample_covariate_tbl EAV table as if it were wide. 2 | #' 3 | #' This allows the user to query the `FacileDataSet` as if it were a wide 4 | #' `pData` `data.frame` of all its covariates. 5 | #' 6 | #' This feature is only really meant to be 7 | #' used interactively, and with extreme caution ... programatically specifying 8 | #' the covariates, for instance, does not work right now. 9 | #' 10 | #' TODO: Implement using `tidyeval` 11 | #' 12 | #' @export 13 | #' @family API 14 | #' 15 | #' @param x A `FacileDataSet` 16 | #' @param ... NSE claused to use in [dplyr::filter()] expressions 17 | #' @return a sample-descriptor `data.frame` that includes the dataset,sample_id 18 | #' pairs that match the virtual `filter(covaries, ...)` clause executed here. 19 | #' 20 | #' @examples 21 | #' fds <- exampleFacileDataSet() 22 | #' 23 | #' # To identify all samples that are of "CMS3" or "CMS4" subtype( 24 | #' # stored in the "subtype_crc_cms" covariate: 25 | #' crc.34 <- filter_samples(fds, subtype_crc_cms %in% c("CMS3", "CMS4")) 26 | #' eav.query <- fds %>% 27 | #' fetch_sample_covariates(covariates = "subtype_crc_cms") %>% 28 | #' filter(value %in% c("CMS3", "CMS4")) %>% 29 | #' collect() 30 | #' setequal(crc.34$sample_id, eav.query$sample_id) 31 | #' 32 | #' # You can keep filtering a filtered dataset 33 | #' crc.34.male <- filter_samples(crc.34, sex == "m") 34 | filter_samples.FacileDataSet <- function(x, ..., samples. = samples(x), 35 | custom_key = Sys.getenv("USER"), 36 | with_covariates = FALSE) { 37 | # cov.table <- .create_wide_covariate_table(x, dots) 38 | # out <- dplyr::filter_(cov.table, .dots=dots) 39 | 40 | force(samples.) 41 | assert_sample_subset(samples.) 42 | 43 | cov.table <- .create_wide_covariate_table(x, samples., ..., 44 | custom_key = custom_key) 45 | out <- filter(cov.table, ...) 46 | if (!with_covariates) { 47 | out <- select(out, dataset, sample_id) 48 | } 49 | if (nrow(out) == 0L) { 50 | warning("All samples have been filtered out", immediate. = TRUE) 51 | } 52 | as_facile_frame(out, x) 53 | } 54 | 55 | #' @noRd 56 | #' @export 57 | filter_samples.facile_frame <- function(x, ..., 58 | custom_key = Sys.getenv("USER"), 59 | with_covariates = FALSE) { 60 | .fds <- assert_facile_data_store(fds(x)) 61 | assert_sample_subset(x) 62 | filter_samples(.fds, ..., samples. = x, custom_key = custom_key, 63 | with_covariates = with_covariates) 64 | } 65 | 66 | #' @noRd 67 | #' @importFrom lazyeval lazy_dots 68 | .create_wide_covariate_table <- function(x, samples, ..., 69 | custom_key = Sys.getenv("USER")) { 70 | assert_facile_data_store(x) 71 | assert_sample_subset(samples) 72 | 73 | out <- fetch_sample_covariates(x, samples = samples, custom_key = custom_key) 74 | dots <- lazy_dots(...) 75 | qvars <- .parse_filter_vars(x, dots) 76 | 77 | # TODO: check if any of the query variables are dataset or sample_id, then 78 | # fiter `out` on the dataset or sample_id columns, THEN play with the 79 | # other sample covariates (sc) 80 | pk.vars <- intersect(qvars, c("dataset", "sample_id")) 81 | # if (length(pk.vars)) { 82 | # out <- filter(out, pk.part.of.query) 83 | # } 84 | 85 | sc.vars <- setdiff(qvars, c("dataset", "sample_id")) 86 | if (length(sc.vars)) { 87 | out <- filter(out, variable %in% !!qvars) 88 | } 89 | out %>% 90 | spread_covariates() %>% 91 | distinct(dataset, sample_id, .keep_all = TRUE) 92 | } 93 | 94 | #' @noRd 95 | #' @importFrom lazyeval auto_name 96 | .parse_filter_vars <- function(x, dots) { 97 | assert_facile_data_store(x) 98 | stopifnot(is(dots, 'lazy_dots')) 99 | 100 | all.vars <- sample_covariate_tbl(x) %>% 101 | distinct(variable) %>% 102 | collect(n=Inf) 103 | all.vars <- c(all.vars$variable, "dataset", "sample_id") 104 | 105 | dot.exprs <- names(auto_name(dots)) 106 | hits <- sapply(all.vars, function(var) any(grepl(var, dot.exprs))) 107 | out <- names(hits)[hits] 108 | if (length(out) == 0) { 109 | stop("No sample covariates found in query: ", 110 | paste(dot.exprs, collapse=';')) 111 | } 112 | out 113 | } 114 | -------------------------------------------------------------------------------- /man/FacileDataSet.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FacileDataSet.R 3 | \name{FacileDataSet} 4 | \alias{FacileDataSet} 5 | \title{Instantiates a FacileDataSet object from disk.} 6 | \usage{ 7 | FacileDataSet( 8 | path, 9 | data.fn = NULL, 10 | sqlite.fn = NULL, 11 | hdf5.fn = NULL, 12 | meta.fn = NULL, 13 | anno.dir = NULL, 14 | cache_size = 80000, 15 | db.loc = c("reference", "temporary", "memory"), 16 | ... 17 | ) 18 | } 19 | \arguments{ 20 | \item{path}{The path to the FacileData repository} 21 | 22 | \item{data.fn}{A custom path to the database (probably don't mess with this)} 23 | 24 | \item{sqlite.fn}{name of SQLite data file in FacileDataSet} 25 | 26 | \item{hdf5.fn}{name of HDF5 data file in FacileDataSet} 27 | 28 | \item{meta.fn}{name of metadata YAML data file in FacileDataSet} 29 | 30 | \item{anno.dir}{A directory to house custom annotations/sample covariates} 31 | 32 | \item{cache_size}{A custom paramter for the SQLite database} 33 | 34 | \item{db.loc}{single character, location for the data} 35 | 36 | \item{...}{other args to pass down, not used at the moment} 37 | 38 | \item{covdef.fn}{A custom path to the yaml file that has covariate mapping info} 39 | } 40 | \value{ 41 | a \code{FacileDataSet} object 42 | } 43 | \description{ 44 | The \code{FacileDataSet} is a reference data storage implementation that 45 | implements the \strong{FacileData Access API}. It facilitates the storage and 46 | retrieval of large amounts of data by leveraging a SQLite database to store 47 | sample- and feature-level metadata ("\code{pData}" and "\code{fData}"), and an HDF5 48 | file to store all of the dense assay (matrix) data (gene counts, microarray 49 | intensities, etc.) run over the samples. 50 | } 51 | \details{ 52 | A \code{FacileDataSet} is materialized on disk by a well-structured directory, 53 | which minimally includes the following items: 54 | \enumerate{ 55 | \item A \code{data.sqlite} SQLite database that stores feature and sample metadata 56 | \item A \code{data.h5} HDF5 file that stores a multitude of dense assay matrices that 57 | are generated from the assays performed on the samples in the 58 | \code{FacileDataSet}. 59 | \item A \code{meta.yaml} file tha contains informaiton about the \code{FacileDataSet}. 60 | To better understand the structure and contents of this file, you can 61 | refer to the following: 62 | a. The included \code{testdata/expected-meta.yaml} file, which is an 63 | exemplar file for \code{\link[=exampleFacileDataSet]{exampleFacileDataSet()}}. 64 | b. The help file provided by the \code{\link[=eav_metadata_create]{eav_metadata_create()}} function, which 65 | describes in greater detail how we track a dataset's sample-level 66 | covariates (aka, "pData" in the bioconductor world). 67 | In the meantime, a short description of the entries found in the 68 | \code{meta.yaml} file is provded here: 69 | \itemize{ 70 | \item \code{name}: the name of the dataset (ie. \code{"FacileTCGADataSet"}) 71 | \item \code{organism}: \code{"Homo sapiens"}, \code{"Mus musculus"}, ec. 72 | \item \code{default_assay}: the name of the assay to use by default if none is 73 | specified in calls to \code{\link[=fetch_assay_data]{fetch_assay_data()}}, \code{\link[=with_assay_data]{with_assay_data()}}, etc. 74 | (kind of like how \code{"exprs"} is the default assay used when working with 75 | a \code{Biobase::ExpressionSet}) 76 | \item \code{datasets}: a section tha enumerates the datases included internally. 77 | The datasets are further enumerated. 78 | \item \code{sample_covariates}: a section that enumerates the covariatets that 79 | are tracked over the samples inside the \code{FacileDataSet} (ie. a mapping 80 | of the \code{pData} for the samples). Reference \code{\link[=eav_metadata_create]{eav_metadata_create()}} 81 | for more information. 82 | } 83 | \item A \code{custom-annotation} directory, which stores custom \code{sample_covariate} 84 | (aka "pData") informaiton that analysts can identify and describe during 85 | the course of an analysis, or even add from external sources. Although 86 | this directory is required in the directory structure of a valid 87 | \code{FacileDataSet}, the \code{FacileDataSet()} constructor can be called with 88 | a custom \code{anno.dir} parameter so that custom annotations are stored 89 | elsewhere. 90 | } 91 | } 92 | \examples{ 93 | fn <- system.file("extdata", "exampleFacileDataSet", package = "FacileData") 94 | fds <- FacileDataSet(fn) 95 | } 96 | \seealso{ 97 | Other FacileDataSet: 98 | \code{\link{dbfn}()}, 99 | \code{\link{hdf5fn}()}, 100 | \code{\link{meta_file}()} 101 | } 102 | \concept{FacileDataSet} 103 | -------------------------------------------------------------------------------- /man/fetch_assay_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/api.R, R/assay-data.R 3 | \name{fetch_assay_data} 4 | \alias{fetch_assay_data} 5 | \alias{with_assay_data.facile_frame} 6 | \title{Fetch assay data from single assay of choice} 7 | \usage{ 8 | fetch_assay_data( 9 | x, 10 | features, 11 | samples = NULL, 12 | assay_name = ndefault_assay(x), 13 | normalized = FALSE, 14 | batch = NULL, 15 | main = NULL, 16 | as.matrix = FALSE, 17 | ..., 18 | subset.threshold = 700, 19 | aggregate = FALSE, 20 | aggregate.by = "ewm", 21 | verbose = FALSE 22 | ) 23 | 24 | \method{with_assay_data}{facile_frame}( 25 | x, 26 | features, 27 | assay_name = NULL, 28 | normalized = TRUE, 29 | aggregate = FALSE, 30 | aggregate.by = "ewm", 31 | spread = TRUE, 32 | with_assay_name = FALSE, 33 | ..., 34 | verbose = FALSE, 35 | .fds = fds(x) 36 | ) 37 | } 38 | \arguments{ 39 | \item{x}{A \code{FacileDataSrote} object, or \code{facile_frame}} 40 | 41 | \item{features}{a feature descriptor (data.frame with assay and feature_id 42 | columms)} 43 | 44 | \item{samples}{a samples descriptor} 45 | 46 | \item{assay_name}{the name of the assay to fetch data from. Defaults to the 47 | value of \code{\link[=default_assay]{default_assay()}} for \code{x}. Must be a subset of \code{assay_names(x)}.} 48 | 49 | \item{normalized}{return normalize or raw data values, defaults to \code{FALSE}. 50 | This is only really "functional" for for \code{assay_type = "rnaseq"} types 51 | of assays, where the normalized data is log2(CPM). These values can 52 | be tweaked with \code{log = (TRUE|FALSE)} and \code{prior.count} parameters, which 53 | can passed down internally to (eventually) \code{\link[edgeR:cpm]{edgeR::cpm()}}.} 54 | 55 | \item{batch}{The column names in \code{sample_info} that specify the batch 56 | covariates in the data that will be regressed out.} 57 | 58 | \item{main}{The name of a covaraite in \code{sample_info} that contains a known 59 | covariate that describes the "effect" of an experiment that should not 60 | be regressed out. Please refer to the Details section for more informaiton.} 61 | 62 | \item{as.matrix}{by default, the data is returned in a long-form tbl-like 63 | result. If set to \code{TRUE}, the data is returned as a matrix.} 64 | 65 | \item{...}{parameters to pass to normalization methods} 66 | 67 | \item{subset.threshold}{sometimes fetching all the genes is faster than 68 | trying to subset. We have to figure out why that is, but I've previously 69 | tested random features of different lengths, and around 700 features was 70 | the elbow.} 71 | 72 | \item{aggregate.by}{do you want individual level results or geneset 73 | scores? Use 'ewm' for eigenWeightedMean, and that's all.} 74 | 75 | \item{.fds}{A \code{FacileDataSet} object} 76 | 77 | \item{feature_ids}{character vector of feature_ids} 78 | 79 | \item{with_symbols}{Do you want gene symbols returned, too?} 80 | } 81 | \value{ 82 | A \code{tibble} (lazy or not) with assay data. 83 | 84 | a tbl-like result 85 | } 86 | \description{ 87 | The \verb{(fetch|with)_assay_data} functions are some of the main workhose 88 | functions of the facile ecosystem. These calls enable you to retrieve 89 | raw and noramlized assay data from a FacileData container. 90 | } 91 | \details{ 92 | \code{fetch_assay_data(x, ...)} will return the data in long form. 93 | \code{with_assay_data(x, ...)} is most typically used when you already have 94 | a dataset \code{x} (a \code{facile_frame}) that you want to decorate with more assay 95 | data. The assay data asked for will be appended on to \code{x} in wide format. 96 | Because \code{fetch} is (most often) used at a lower level of granularity, 97 | \code{normalize} is by default set to \code{FALSE}, while it is set to \code{TRUE} in 98 | \code{with_assay_data}. 99 | } 100 | \section{Removing Batch Effects}{ 101 | 102 | When normalized data is returned, we assume these data are log-like, and you 103 | have the option to regress out batch effects using our 104 | \code{\link[=remove_batch_effect]{remove_batch_effect()}} wrapper to \code{\link[limma:removeBatchEffect]{limma::removeBatchEffect()}}. 105 | } 106 | 107 | \examples{ 108 | samples <- exampleFacileDataSet() \%>\% 109 | filter_samples(indication == "BLCA", sample_type == "tumor") 110 | features <- c(PRF1='5551', GZMA='3001', CD274='29126') 111 | dat <- with_assay_data(samples, features, normalized = TRUE, batch = "sex") 112 | dat <- with_assay_data(samples, features, normalized = TRUE, 113 | batch = c("sex", "stage")) 114 | dat <- with_assay_data(samples, features, normealized = TRUE, 115 | batch = c("sex", "stage"), main = "sample_type") 116 | } 117 | -------------------------------------------------------------------------------- /R/assemble_example_dataset.R: -------------------------------------------------------------------------------- 1 | #' Assembles an example facile dataset to play with 2 | #' 3 | #' This combines the airway and parathyroidSE RNA-seq datasets into a single 4 | #' FacileDataSet. 5 | #' 6 | #' The code here is extracted from the `FacileDataSet-assembly` vignette. Please 7 | #' read that for some of the why's and how's of the decisions made here when 8 | #' assembling datasets. 9 | #' 10 | #' @export 11 | #' @param directory The name of the parent directory to hold the dataset 12 | #' @param name A subdirectory within `directory` will be created using this 13 | #' name. 14 | #' @return The FacileDataSet object itself. 15 | assemble_example_dataset <- function(directory = tempdir(), 16 | name = "ExampleRnaFacileDataSet") { 17 | assert_directory_exists(directory, "w") 18 | full.path <- file.path(directory, name) 19 | if (file.exists(full.path)) { 20 | stop("The output directory already exists, remove it to recreate the ", 21 | "dataset:\n ", full.path) 22 | } 23 | message("Assembling dataset into: ", full.path) 24 | 25 | ns <- tryCatch(loadNamespace("SummarizedExperiment"), error = function(e) NULL) 26 | if (is.null(ns)) stop("SummarizedExperiment required") 27 | ns4 <- tryCatch(loadNamespace("S4Vectors"), error = function(e) NULL) 28 | if (is.null(ns4)) stop("S4Vectors required") 29 | 30 | # Load Data .................................................................. 31 | dat.env <- new.env() 32 | tryCatch({ 33 | data("airway", package = "airway", envir = dat.env) 34 | }, error = function(e) stop("The airway package is required")) 35 | tryCatch({ 36 | data("parathyroidGenesSE", package = "parathyroidSE", envir = dat.env) 37 | }, error = function(e) stop("The parathyroidSE package is required")) 38 | 39 | # Munge colData .............................................................. 40 | se.airway <- dat.env[["airway"]] 41 | cd.airway <- local({ 42 | cd <- ns$colData(dat.env[["airway"]]) %>% 43 | as.data.frame() %>% 44 | transmute( 45 | sample_type = "cell_line", 46 | cell_line = cell, 47 | treatment = ifelse(dex == "untrt", "control", "dex")) %>% 48 | ns4$DataFrame() 49 | rownames(cd) <- colnames(se.airway) 50 | cd 51 | }) 52 | se.airway <- ns$`colData<-`(se.airway, value = cd.airway) 53 | 54 | se.parathyroid <- dat.env[["parathyroidGenesSE"]] 55 | cd.parathyroid <- local({ 56 | cd <- ns$colData(dat.env[["parathyroidGenesSE"]]) %>% 57 | as.data.frame() %>% 58 | transmute( 59 | sample_type = "primary", 60 | subject_id = paste0("patient_", patient), 61 | treatment = tolower(as.character(treatment)), 62 | time = paste0("hrs", sub("h$", "", time))) %>% 63 | ns4$DataFrame() 64 | rownames(cd) <- colnames(se.parathyroid) 65 | cd 66 | }) 67 | se.parathyroid <- ns$`colData<-`(se.parathyroid, value = cd.parathyroid) 68 | 69 | # Munge rowData .............................................................. 70 | mart.info <- local({ 71 | fn <- system.file("extdata", "ensembl-v75-gene-info.csv.gz", 72 | package = "FacileData") 73 | con <- gzfile(fn, "rt") 74 | on.exit(close.connection(con)) 75 | read.csv(con, stringsAsFactors = FALSE) 76 | }) 77 | 78 | shared.ids <- intersect(rownames(se.airway), rownames(se.parathyroid)) 79 | gene.info <- mart.info %>% 80 | transmute(feature_id = ensembl_gene_id, 81 | feature_type = "ensgid", 82 | name = hgnc_symbol, 83 | meta = gene_biotype, 84 | source = "Ensembl_v75") %>% 85 | filter(feature_id %in% shared.ids) %>% 86 | distinct(feature_id, .keep_all = TRUE) 87 | # ns4$DataFrame() 88 | rownames(gene.info) <- gene.info[["feature_id"]] 89 | 90 | # I assemble these into DGELists because I can't figure out how to get 91 | # SummarizedExperiment subsetting working without using the loadedNamespace 92 | # mojo ... I'm dying on the inside here. 93 | # 94 | # Obviously I'm doing something wrong, but ... damn, y0 ... damn. 95 | 96 | # se.subfn <- selectMethod("[", c("SummarizedExperiment", "ANY", "ANY")) 97 | 98 | # se.airway <- se.airway[rownames(gene.info),] 99 | # se.airway <- se.subfn(se.airway, rownames(gene.info)) 100 | # se.airway <- ns$`rowData<-`(se.airway, value = gene.info) 101 | # 102 | # # se.parathyroid <- se.parathyroid[rownames(gene.info),] 103 | # se.parathyroid <- se.subfn(se.parathyroid, rownames(gene.info)) 104 | # se.parathyroid <- ns$`rowData<-`(se.parathyroid, value = gene.info) 105 | # 106 | # dat.all <- list(airway = se.airway, parathyroid = se.parathyroid) 107 | 108 | # gene.info <- ns4$as.data.frame(gene.info) 109 | 110 | y.airway <- edgeR::DGEList( 111 | counts = ns$assay(se.airway)[rownames(gene.info), ], 112 | samples = ns4$as.data.frame.DataTable(ns$colData(se.airway)), 113 | genes = gene.info) 114 | 115 | y.para <- edgeR::DGEList( 116 | counts = ns$assay(se.parathyroid)[rownames(gene.info), ], 117 | samples = ns4$as.data.frame.DataTable(ns$colData(se.parathyroid)), 118 | genes = gene.info) 119 | 120 | dat.all <- list(airway = y.airway, parathyroid = y.para) 121 | 122 | xfds <- as.FacileDataSet( 123 | dat.all, 124 | path = full.path, 125 | dataset_name = name, 126 | assay_name = "gene_counts", 127 | assay_description = "Gene counts provided by Bioconductor data packages", 128 | assay_type = "rnaseq", 129 | organism = "Homo sapiens") 130 | 131 | xfds 132 | } 133 | -------------------------------------------------------------------------------- /man/as.BiocContainer.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/as.BiocAssayContainers.R 3 | \name{as.DGEList} 4 | \alias{as.DGEList} 5 | \alias{as.DGEList.data.frame} 6 | \alias{as.DGEList.tbl} 7 | \alias{as.DGEList.facile_frame} 8 | \alias{as.DGEList.FacileDataSet} 9 | \alias{as.ExpressionSet} 10 | \alias{as.ExpressionSet.data.frame} 11 | \alias{as.ExpressionSet.FacileDataSet} 12 | \alias{as.SummarizedExperiment} 13 | \alias{as.SummarizedExperiment.data.frame} 14 | \alias{as.SummarizedExperiment.FacileDataSet} 15 | \title{Converts a "facile result" to a traditional Bioconductor assay container.} 16 | \usage{ 17 | as.DGEList(x, ...) 18 | 19 | \method{as.DGEList}{data.frame}( 20 | x, 21 | covariates = TRUE, 22 | feature_ids = NULL, 23 | assay_name = NULL, 24 | .fds = NULL, 25 | custom_key = Sys.getenv("USER"), 26 | ... 27 | ) 28 | 29 | \method{as.DGEList}{tbl}( 30 | x, 31 | covariates = TRUE, 32 | feature_ids = NULL, 33 | assay_name = NULL, 34 | .fds = NULL, 35 | custom_key = Sys.getenv("USER"), 36 | ... 37 | ) 38 | 39 | \method{as.DGEList}{facile_frame}( 40 | x, 41 | covariates = TRUE, 42 | feature_ids = NULL, 43 | assay_name = NULL, 44 | custom_key = Sys.getenv("USER"), 45 | ... 46 | ) 47 | 48 | \method{as.DGEList}{FacileDataSet}( 49 | x, 50 | covariates = TRUE, 51 | feature_ids = NULL, 52 | assay_name = NULL, 53 | custom_key = Sys.getenv("USER"), 54 | ... 55 | ) 56 | 57 | as.ExpressionSet(x, ...) 58 | 59 | \method{as.ExpressionSet}{data.frame}( 60 | x, 61 | covariates = TRUE, 62 | feature_ids = NULL, 63 | assay_name = default_assay(.fds), 64 | .fds = fds(x), 65 | custom_key = Sys.getenv("USER"), 66 | ... 67 | ) 68 | 69 | \method{as.ExpressionSet}{FacileDataSet}( 70 | x, 71 | covariates = TRUE, 72 | feature_ids = NULL, 73 | assay_name = default_assay(.fds), 74 | .fds = fds(x), 75 | custom_key = Sys.getenv("USER"), 76 | ... 77 | ) 78 | 79 | as.SummarizedExperiment(x, ...) 80 | 81 | \method{as.SummarizedExperiment}{data.frame}( 82 | x, 83 | covariates = TRUE, 84 | feature_ids = NULL, 85 | assay_name = default_assay(.fds), 86 | .fds = fds(x), 87 | custom_key = Sys.getenv("USER"), 88 | ... 89 | ) 90 | 91 | \method{as.SummarizedExperiment}{FacileDataSet}( 92 | x, 93 | covariates = TRUE, 94 | feature_ids = NULL, 95 | assay_name = default_assay(.fds), 96 | .fds = fds(x), 97 | custom_key = Sys.getenv("USER"), 98 | ... 99 | ) 100 | } 101 | \arguments{ 102 | \item{x}{a facile expression-like result} 103 | 104 | \item{covariates}{The covariates the user wants to add to the $samples of 105 | the DGEList. This can take the following forms: 106 | \itemize{ 107 | \item \code{TRUE}: All covariates are retrieved from the \code{FacileDataSet} 108 | \item \code{FALSE}: TODO: Better handle FALSE 109 | \item \code{character}: A vector of covariate names to fetch from the 110 | \code{FacileDataSet}. Must be elements of \code{names(sample_definitions(x))} 111 | \item \code{data.frame}: A wide covariate table (dataset, sample_id, covariates ...) 112 | This may be external covariates for samples not available within 113 | \code{x} (yet), ie. a table of covariates provided by a third party. 114 | \item \code{NULL}: do not decorate with \emph{any} covariates. 115 | }} 116 | 117 | \item{feature_ids}{the features to get expression for (if not specified 118 | in \code{x} descriptor). These correspond to the elements found in the 119 | \code{feature_info_tbl(x)$feature_id} column.} 120 | 121 | \item{assay_name}{the name of the assay matrix to use when populating the 122 | default assay matrix of the bioconductor container (the \verb{$counts} 123 | matrix of a \code{DGEList}, the \code{exprs()} of an \code{ExpressionSet}, etc.). 124 | The default value is the entry provided by \code{\link[=default_assay]{default_assay()}}} 125 | 126 | \item{.fds}{The \code{FacileDataSet} that \code{x} was retrieved from} 127 | 128 | \item{custom_key}{the custom key to use to fetch custom annotations from 129 | \code{.fds}} 130 | } 131 | \value{ 132 | the appropriate bioconductor assay container, ie. an \code{edgeR::DGEList} 133 | for \code{as.DGEList}, a \code{Biobase::ExpressionSet} for \code{as.ExpressionSet}, or 134 | a \code{SummarizedExperiment::SummarizedExperiment} for 135 | \code{as.SummarizedExperiment}. 136 | 137 | a \code{\link[Biobase]{ExpressionSet}} 138 | 139 | a \code{\link[SummarizedExperiment]{SummarizedExperiment}} 140 | } 141 | \description{ 142 | An entire \code{FacileDataSet} or a subset of it can be converted into 143 | bioconductor-standard assay containers, like a \code{SummarizedExperiment}, 144 | \code{DGEList}, or \code{ExpressionSet} "at any time" using various \code{as.XXX} functions, 145 | like \code{as.DGEList(...)}. 146 | } 147 | \details{ 148 | We use the term "facile object" to refer to either the entirety of a 149 | \code{FacileDataStore} or any sample-descriptor that specifies subsets of the 150 | data, eg. where \code{fds(x)} returns a \code{FacileDataStore}. See examples for 151 | specifics. 152 | 153 | Note that the order that the samples and features are materialized into the 154 | expression container are not guaranteed. 155 | } 156 | \examples{ 157 | fds <- exampleFacileDataSet() 158 | 159 | # Retrieve DGEList of gene expression for all samples 160 | y.all <- as.DGEList(fds) # gene expression of all samples 161 | 162 | # Retrieve data for only 3 genes 163 | # Suppose we only wanted female samples in our DGEList 164 | y.fem <- fds \%>\% 165 | filter_samples(sex == "f") \%>\% 166 | as.DGEList() # or `as.ExpressionSet()` 167 | } 168 | --------------------------------------------------------------------------------