├── .github
    ├── .gitignore
    └── workflows
    │   ├── test-coverage.yaml
    │   ├── R-CMD-check.yaml
    │   └── pkgdown.yaml
├── .gitignore
├── .Rbuildignore
├── vignettes
    ├── images
    │   └── icons
    │   │   ├── tip.png
    │   │   ├── up.png
    │   │   ├── wip.png
    │   │   ├── home.png
    │   │   ├── next.png
    │   │   ├── note.png
    │   │   ├── prev.png
    │   │   ├── caution.png
    │   │   ├── download.png
    │   │   ├── example.png
    │   │   ├── warning.png
    │   │   ├── wip-full.png
    │   │   ├── INFO.txt
    │   │   ├── callouts
    │   │       ├── 1.png
    │   │       ├── 2.png
    │   │       ├── 3.png
    │   │       ├── 4.png
    │   │       ├── 5.png
    │   │       ├── 6.png
    │   │       ├── 7.png
    │   │       ├── 8.png
    │   │       ├── 9.png
    │   │       ├── 10.png
    │   │       ├── 11.png
    │   │       ├── 12.png
    │   │       ├── 13.png
    │   │       ├── 14.png
    │   │       └── 15.png
    │   │   ├── important.png
    │   │   └── callout-border.png
    └── custom.css
├── inst
    ├── presentations
    │   └── facile-overview.key
    ├── testdata
    │   ├── test-sample-covariates.rds
    │   ├── generate-TCGA-tesdata.R
    │   └── expected-meta.yaml
    ├── extdata
    │   ├── ensembl-v75-gene-info.csv.gz
    │   ├── exampleFacileDataSet
    │   │   ├── data.h5
    │   │   ├── data.sqlite
    │   │   ├── custom-annotation
    │   │   │   └── README.txt
    │   │   └── meta.yaml
    │   └── test
    │   │   └── sample-meta-definitions.yaml
    └── scripts
    │   └── retrieve-parathyroidSE-gene-info.R
├── man
    ├── figures
    │   └── her2_cnv_vs_expression.png
    ├── gene_info_tbl.Rd
    ├── set_class.Rd
    ├── assay_units.Rd
    ├── fetch_sample_statistics.default.Rd
    ├── sample_stats_tbl.Rd
    ├── exampleFacileDataSet.Rd
    ├── feature_types.Rd
    ├── executeSQL.Rd
    ├── sqlFromFile.Rd
    ├── adata.Rd
    ├── nameit.Rd
    ├── is.FacileDataSet.Rd
    ├── pdata.Rd
    ├── fdata.Rd
    ├── assay_feature_type.Rd
    ├── pdata_metadata.Rd
    ├── primary_key.Rd
    ├── with_feature_info.Rd
    ├── features.Rd
    ├── labeled.Rd
    ├── hdf5fn.Rd
    ├── has_feature_type.Rd
    ├── spread_covariates.Rd
    ├── flog_level.Rd
    ├── covariate_definitions.Rd
    ├── fetch_sample_statistics.Rd
    ├── samples.Rd
    ├── fetch_assay_score.Rd
    ├── feature_name_map.Rd
    ├── dbfn.Rd
    ├── eavdef_for_column.Rd
    ├── initializeFacileDataSet.Rd
    ├── validate_covariate_def_list.Rd
    ├── assay_sample_info.Rd
    ├── assay_info_over_samples.Rd
    ├── eav_encode.Rd
    ├── extract_transcribed_info_from_ensembl_gtf.Rd
    ├── create_assay_feature_descriptor.Rd
    ├── check_facile_data_set.Rd
    ├── conform_data_frame.Rd
    ├── eav_encode_covariate.Rd
    ├── organism.FacileDataSet.Rd
    ├── covariate_meta_info.Rd
    ├── check_facile_data_store.Rd
    ├── test-helpers.Rd
    ├── ds_annot.Rd
    ├── append_facile_table.Rd
    ├── parse_sample_criterion.Rd
    ├── assemble_example_dataset.Rd
    ├── fds.Rd
    ├── dot-level_biotypes.Rd
    ├── samples.FacileDataSet.Rd
    ├── as_facile_frame.Rd
    ├── fetch_assay_score.FacileDataSet.Rd
    ├── join_samples.Rd
    ├── append_facile_feature_info.Rd
    ├── assay_feature_info.FacileDataSet.Rd
    ├── assay_info.Rd
    ├── save_custom_sample_covariates.Rd
    ├── filter_features.FacileDataSet.Rd
    ├── reexports.Rd
    ├── cSurv.Rd
    ├── as.EAVtable.Rd
    ├── fetch_samples.FacileDataSet.Rd
    ├── fetch_custom_sample_covariates.FacileDataSet.Rd
    ├── infer_feature_type.Rd
    ├── check_categorical.Rd
    ├── eav_metadata_merge.Rd
    ├── facet_frame.Rd
    ├── spread_assay_data.Rd
    ├── normalize_assay_data.Rd
    ├── meta-info.Rd
    ├── FacileData-package.Rd
    ├── fetch_sample_statistics.FacileDataSet.Rd
    ├── summary.eav_covariates.Rd
    ├── cast_covariate.Rd
    ├── biocbox.Rd
    ├── freplace_na.Rd
    ├── simple-eav-decode-functions.Rd
    ├── facilitate.Rd
    ├── assertions.Rd
    ├── eav-right-censor.Rd
    ├── filter_samples.FacileDataSet.Rd
    ├── flog.Rd
    ├── addFacileAssaySet.Rd
    ├── sample-covariates.Rd
    ├── remove_batch_effect.Rd
    ├── FacileDataSet.Rd
    ├── fetch_assay_data.Rd
    └── as.BiocContainer.Rd
├── tests
    ├── testthat.R
    └── testthat
    │   ├── test-samples.R
    │   ├── test-csurv.R
    │   ├── test-feature-info.R
    │   ├── test-assay-normalization.R
    │   ├── test-feature-types.R
    │   ├── test-FacileDataSet.R
    │   ├── test-replace_na.R
    │   ├── test-as.FacileDataSet.R
    │   ├── test-EAV.R
    │   ├── test-assay-data.R
    │   ├── test-biocbox.R
    │   ├── test-entity-attribute-value.R
    │   └── test-bioc-assay-containers.R
├── codecov.yml
├── TODO.Rmd
├── R
    ├── NSE-filter-features.R
    ├── package.R
    ├── sql.R
    ├── test-helpers.R
    ├── sample-info.R
    ├── feature-types.R
    ├── csurv.R
    ├── utilities.R
    ├── zzz.R
    ├── replace_na.R
    ├── NSE-filter-samples.R
    └── assemble_example_dataset.R
├── DESCRIPTION
├── NEWS.md
├── pkgdown
    └── extra.css
├── README.md
└── README.Rmd


/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | 


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | ^docs$
4 | ^\.github$
5 | 


--------------------------------------------------------------------------------
/vignettes/images/icons/tip.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/tip.png


--------------------------------------------------------------------------------
/vignettes/images/icons/up.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/up.png


--------------------------------------------------------------------------------
/vignettes/images/icons/wip.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/wip.png


--------------------------------------------------------------------------------
/vignettes/images/icons/home.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/home.png


--------------------------------------------------------------------------------
/vignettes/images/icons/next.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/next.png


--------------------------------------------------------------------------------
/vignettes/images/icons/note.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/note.png


--------------------------------------------------------------------------------
/vignettes/images/icons/prev.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/prev.png


--------------------------------------------------------------------------------
/vignettes/images/icons/caution.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/caution.png


--------------------------------------------------------------------------------
/vignettes/images/icons/download.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/download.png


--------------------------------------------------------------------------------
/vignettes/images/icons/example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/example.png


--------------------------------------------------------------------------------
/vignettes/images/icons/warning.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/warning.png


--------------------------------------------------------------------------------
/vignettes/images/icons/wip-full.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/wip-full.png


--------------------------------------------------------------------------------
/vignettes/images/icons/INFO.txt:
--------------------------------------------------------------------------------
1 | These icons were taken from asciidoc:
2 | 
3 |   http://www.methods.co.nz/asciidoc/images/icons/
4 | 


--------------------------------------------------------------------------------
/vignettes/images/icons/callouts/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/callouts/1.png


--------------------------------------------------------------------------------
/vignettes/images/icons/callouts/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/callouts/2.png


--------------------------------------------------------------------------------
/vignettes/images/icons/callouts/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/callouts/3.png


--------------------------------------------------------------------------------
/vignettes/images/icons/callouts/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/callouts/4.png


--------------------------------------------------------------------------------
/vignettes/images/icons/callouts/5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/callouts/5.png


--------------------------------------------------------------------------------
/vignettes/images/icons/callouts/6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/callouts/6.png


--------------------------------------------------------------------------------
/vignettes/images/icons/callouts/7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/callouts/7.png


--------------------------------------------------------------------------------
/vignettes/images/icons/callouts/8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/callouts/8.png


--------------------------------------------------------------------------------
/vignettes/images/icons/callouts/9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/callouts/9.png


--------------------------------------------------------------------------------
/vignettes/images/icons/important.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/important.png


--------------------------------------------------------------------------------
/inst/presentations/facile-overview.key:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonocarroll/FacileData/main/inst/presentations/facile-overview.key


--------------------------------------------------------------------------------
/inst/testdata/test-sample-covariates.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonocarroll/FacileData/main/inst/testdata/test-sample-covariates.rds


--------------------------------------------------------------------------------
/man/figures/her2_cnv_vs_expression.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonocarroll/FacileData/main/man/figures/her2_cnv_vs_expression.png


--------------------------------------------------------------------------------
/vignettes/images/icons/callouts/10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/callouts/10.png


--------------------------------------------------------------------------------
/vignettes/images/icons/callouts/11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/callouts/11.png


--------------------------------------------------------------------------------
/vignettes/images/icons/callouts/12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/callouts/12.png


--------------------------------------------------------------------------------
/vignettes/images/icons/callouts/13.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/callouts/13.png


--------------------------------------------------------------------------------
/vignettes/images/icons/callouts/14.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/callouts/14.png


--------------------------------------------------------------------------------
/vignettes/images/icons/callouts/15.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/callouts/15.png


--------------------------------------------------------------------------------
/inst/extdata/ensembl-v75-gene-info.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonocarroll/FacileData/main/inst/extdata/ensembl-v75-gene-info.csv.gz


--------------------------------------------------------------------------------
/inst/extdata/exampleFacileDataSet/data.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonocarroll/FacileData/main/inst/extdata/exampleFacileDataSet/data.h5


--------------------------------------------------------------------------------
/vignettes/images/icons/callout-border.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonocarroll/FacileData/main/vignettes/images/icons/callout-border.png


--------------------------------------------------------------------------------
/inst/extdata/exampleFacileDataSet/data.sqlite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonocarroll/FacileData/main/inst/extdata/exampleFacileDataSet/data.sqlite


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
 1 | library("testthat")
 2 | library("checkmate")
 3 | library("FacileData")
 4 | library("magrittr")
 5 | library("reshape2")
 6 | library("tidyr")
 7 | library("dplyr")
 8 | 
 9 | test_check("FacileData")
10 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | comment: false
 2 | 
 3 | coverage:
 4 |   status:
 5 |     project:
 6 |       default:
 7 |         target: auto
 8 |         threshold: 5%
 9 |     patch:
10 |       default:
11 |         target: auto
12 |         threshold: 5%
13 | 


--------------------------------------------------------------------------------
/inst/extdata/exampleFacileDataSet/custom-annotation/README.txt:
--------------------------------------------------------------------------------
1 | This is a dummy file to ensure that this directory is maintained in the VCS.
2 | 
3 | By default a FacileDataSet assumes that the custom annoation directory is a
4 | `custom-annotation` directory within the FacileDataSet data directory.
5 | 


--------------------------------------------------------------------------------
/TODO.Rmd:
--------------------------------------------------------------------------------
1 | * Do not depend on `FacileAtezo` artifacts in this package.
2 |   - Create a slimmer, examplar database, which FacileDb() connects to
3 |     (largely for testing)
4 |   - Change the name of the options in .onLoad
5 |   - Change the sample-meta-definitions.yaml file to be appropriate for the
6 |     new mini-test-db that FacileDb creates
7 | 


--------------------------------------------------------------------------------
/man/gene_info_tbl.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/db-and-table-functions.R
 3 | \name{gene_info_tbl}
 4 | \alias{gene_info_tbl}
 5 | \title{Mimics the old \code{gene_info} table.}
 6 | \usage{
 7 | gene_info_tbl(x)
 8 | }
 9 | \description{
10 | Mimics the old \code{gene_info} table.
11 | }
12 | 


--------------------------------------------------------------------------------
/man/set_class.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utilities.R
 3 | \name{set_class}
 4 | \alias{set_class}
 5 | \title{Set the class of an object and return the object}
 6 | \usage{
 7 | set_class(x, .class, ...)
 8 | }
 9 | \description{
10 | Set the class of an object and return the object
11 | }
12 | 


--------------------------------------------------------------------------------
/man/assay_units.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/api.R
 3 | \name{assay_units}
 4 | \alias{assay_units}
 5 | \title{Units of measure in an assay}
 6 | \usage{
 7 | assay_units(x, assay_name, normalized = FALSE, abbreviate = FALSE, ...)
 8 | }
 9 | \value{
10 | string
11 | }
12 | \description{
13 | Units of measure in an assay
14 | }
15 | 


--------------------------------------------------------------------------------
/man/fetch_sample_statistics.default.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/api.R
 3 | \name{fetch_sample_statistics.default}
 4 | \alias{fetch_sample_statistics.default}
 5 | \title{Issue #2}
 6 | \usage{
 7 | \method{fetch_sample_statistics}{default}(x, samples = NULL, semi = TRUE, assay_name = NULL)
 8 | }
 9 | \description{
10 | Issue #2
11 | }
12 | 


--------------------------------------------------------------------------------
/man/sample_stats_tbl.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/db-and-table-functions.R
 3 | \name{sample_stats_tbl}
 4 | \alias{sample_stats_tbl}
 5 | \title{Mimics old sample_stats table}
 6 | \usage{
 7 | sample_stats_tbl(x)
 8 | }
 9 | \description{
10 | This function needs to be removed and the code that relies on
11 | sample_stats_tbl be updated.
12 | }
13 | 


--------------------------------------------------------------------------------
/man/exampleFacileDataSet.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/test-helpers.R
 3 | \name{exampleFacileDataSet}
 4 | \alias{exampleFacileDataSet}
 5 | \title{Retrieves an example FacileDataSet}
 6 | \usage{
 7 | exampleFacileDataSet()
 8 | }
 9 | \description{
10 | A subset of the TCGA data from the BLCA and COAD indications is provided
11 | as a FacileDataSet.
12 | }
13 | 


--------------------------------------------------------------------------------
/man/feature_types.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/features.R
 3 | \name{feature_types}
 4 | \alias{feature_types}
 5 | \title{Enumerate the types of feature stored in a FacileDataSet}
 6 | \usage{
 7 | feature_types(x)
 8 | }
 9 | \arguments{
10 | \item{x}{A \code{FacileDataSet}}
11 | }
12 | \description{
13 | Enumerate the types of feature stored in a FacileDataSet
14 | }
15 | 


--------------------------------------------------------------------------------
/man/executeSQL.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sql.R
 3 | \name{executeSQL}
 4 | \alias{executeSQL}
 5 | \title{Execute multiple queries against a database}
 6 | \usage{
 7 | executeSQL(con, sql)
 8 | }
 9 | \arguments{
10 | \item{con}{database handle}
11 | 
12 | \item{sql}{list of charvecs (SQL statements)}
13 | }
14 | \description{
15 | Execute multiple queries against a database
16 | }
17 | 


--------------------------------------------------------------------------------
/man/sqlFromFile.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sql.R
 3 | \name{sqlFromFile}
 4 | \alias{sqlFromFile}
 5 | \title{Utility function to send more than one sql command to the database}
 6 | \usage{
 7 | sqlFromFile(file)
 8 | }
 9 | \arguments{
10 | \item{file}{single character, name of file with SQL statements}
11 | }
12 | \description{
13 | Copied from http://stackoverflow.com/questions/18914283
14 | }
15 | 


--------------------------------------------------------------------------------
/man/adata.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/as.FacileDataSet.R
 3 | \name{adata}
 4 | \alias{adata}
 5 | \title{Bioc-container specific assay data extraction functions}
 6 | \usage{
 7 | adata(x, assay = NULL, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{SummarizedExperiment, ExpressionSet or DGEList}
11 | 
12 | \item{...}{additional args, ignored for now}
13 | }
14 | \description{
15 | Get assay matrix
16 | }
17 | 


--------------------------------------------------------------------------------
/man/nameit.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utilities.R
 3 | \name{nameit}
 4 | \alias{nameit}
 5 | \title{Ensures that a vector has names for all elements if it has names for any}
 6 | \usage{
 7 | nameit(x, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{an object with names}
11 | }
12 | \value{
13 | \code{x} with all elements either being uniquely named, or NULL
14 | }
15 | \description{
16 | If the vector is not named, it remains that way
17 | }
18 | 


--------------------------------------------------------------------------------
/tests/testthat/test-samples.R:
--------------------------------------------------------------------------------
 1 | context("samples(FacileDataSet)")
 2 | 
 3 | test_that("samples() is a facile_frame", {
 4 |   efds <- exampleFacileDataSet()
 5 | 
 6 |   expected <- dplyr::tbl(efds$con, 'sample_info') %>%
 7 |     collect() %>%
 8 |     select(dataset, sample_id) %>%
 9 |     arrange(sample_id)
10 |   samples. <- samples(efds) %>%
11 |     collect() %>%
12 |     arrange(sample_id)
13 |   expect_equal(samples., expected, check.attributes = FALSE)
14 |   expect_s3_class(fds(samples.), "FacileDataSet")
15 | })
16 | 


--------------------------------------------------------------------------------
/man/is.FacileDataSet.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/FacileDataSet.R
 3 | \name{is.FacileDataSet}
 4 | \alias{is.FacileDataSet}
 5 | \title{Class and validity checker for FacileDataSet}
 6 | \usage{
 7 | is.FacileDataSet(x)
 8 | }
 9 | \arguments{
10 | \item{x}{object to test}
11 | }
12 | \value{
13 | \code{TRUE}/\code{FALSE} indicating that \code{x} nominally "looks like" a
14 | \code{FacileDataSet}
15 | }
16 | \description{
17 | Class and validity checker for FacileDataSet
18 | }
19 | 


--------------------------------------------------------------------------------
/man/pdata.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/as.FacileDataSet.R
 3 | \name{pdata}
 4 | \alias{pdata}
 5 | \title{Bioc-container specific pData extraction functions}
 6 | \usage{
 7 | pdata(x, covariate_metadata = NULL, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{SummarizedExperiment, ExpressionSet or DGEList}
11 | 
12 | \item{...}{additional args, ignored for now}
13 | }
14 | \description{
15 | This is an internal function, but exported so it is registered and found
16 | post R 4.0
17 | }
18 | 


--------------------------------------------------------------------------------
/man/fdata.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/as.FacileDataSet.R
 3 | \name{fdata}
 4 | \alias{fdata}
 5 | \title{BioC-container specific fData extraction functions}
 6 | \usage{
 7 | fdata(x, validate = FALSE, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{SummarizedExperiment, ExpressionSet or DGEList}
11 | 
12 | \item{validate}{single logical, check results}
13 | 
14 | \item{...}{additional args (ignored for now)}
15 | }
16 | \description{
17 | BioC-container specific fData extraction functions
18 | }
19 | 


--------------------------------------------------------------------------------
/man/assay_feature_type.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/assay-data.R
 3 | \name{assay_feature_type}
 4 | \alias{assay_feature_type}
 5 | \title{Returns the feature_type for a given assay}
 6 | \usage{
 7 | assay_feature_type(x, assay_name)
 8 | }
 9 | \arguments{
10 | \item{x}{\code{FacileDataSet}}
11 | 
12 | \item{assay_name}{the name of the assay}
13 | }
14 | \description{
15 | The elements of the rows for a given assay all correspond to a particular
16 | feature space (ie. feature_type='entrez')
17 | }
18 | 


--------------------------------------------------------------------------------
/man/pdata_metadata.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/as.FacileDataSet.R
 3 | \name{pdata_metadata}
 4 | \alias{pdata_metadata}
 5 | \title{Bioc-container specific pData extraction functions}
 6 | \usage{
 7 | pdata_metadata(x, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{SummarizedExperiment, ExpressionSet or DGEList}
11 | 
12 | \item{...}{additional args, ignored for now}
13 | }
14 | \description{
15 | Get metadata on columns of sample info data.frame (label, etc.) for
16 | inclusion in metadata YAML.
17 | }
18 | 


--------------------------------------------------------------------------------
/man/primary_key.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/db-and-table-functions.R
 3 | \name{primary_key}
 4 | \alias{primary_key}
 5 | \title{Query a table to identify its primary key(s)}
 6 | \usage{
 7 | primary_key(x, table_name)
 8 | }
 9 | \arguments{
10 | \item{x}{a \code{FacileDataSet} or \code{SQLiteConnection}}
11 | 
12 | \item{table_name}{the name of the table to query}
13 | }
14 | \value{
15 | a character vector of primary keys
16 | }
17 | \description{
18 | Query a table to identify its primary key(s)
19 | }
20 | 


--------------------------------------------------------------------------------
/man/with_feature_info.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/api.R
 3 | \name{with_feature_info}
 4 | \alias{with_feature_info}
 5 | \title{Append feature information columns to (feature-rows)}
 6 | \usage{
 7 | with_feature_info(x, covariates = NULL, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{a data.frame feature descriptor columns (feature_id, feature_type)}
11 | }
12 | \value{
13 | \code{x} fattened with the columns asked for
14 | }
15 | \description{
16 | Append feature information columns to (feature-rows)
17 | }
18 | 


--------------------------------------------------------------------------------
/man/features.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/api.R
 3 | \name{features}
 4 | \alias{features}
 5 | \title{Returns a table of information about the features (from an assay, or ...)}
 6 | \usage{
 7 | features(x, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{a facile object}
11 | }
12 | \value{
13 | a tibble with containing feature_id, feature_type, and whatever other
14 | columns are appropriate given \code{x}
15 | }
16 | \description{
17 | Returns a table of information about the features (from an assay, or ...)
18 | }
19 | 


--------------------------------------------------------------------------------
/man/labeled.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/api.R
 3 | \name{name}
 4 | \alias{name}
 5 | \title{Labeled acts like interface to reactive modules.}
 6 | \usage{
 7 | name(x, ...)
 8 | }
 9 | \description{
10 | Modules that implement this interface must return \code{label} and \code{name} reactive
11 | elements within them.
12 | }
13 | \details{
14 | We use these when something (like a \code{assayFeatureSelect}) needs
15 | a "computer friendly" name for itself (\code{name()}), or a more human readable
16 | name (\code{label()})
17 | }
18 | 


--------------------------------------------------------------------------------
/R/NSE-filter-features.R:
--------------------------------------------------------------------------------
 1 | #' Filter against the sample_covariate_tbl as if it were wide.
 2 | #'
 3 | #' This feature is only really meant to be used
 4 | #' interactively, and with extreme caution ... programatically specifying
 5 | #' column names in feature table, for instance, does not work right now.
 6 | #'
 7 | #' TODO: Use tidyeval
 8 | #'
 9 | #' @export
10 | #' @param x A \code{FacileDataSet}
11 | #' @param ... NSE claused to use in \code{\link[dplyr]{filter}} expressions
12 | #' @family API
13 | filter_features.FacileDataSet <- function(x, ...) {
14 |   feature_info_tbl(x) %>% filter(...) %>% set_fds(x)
15 | }
16 | 


--------------------------------------------------------------------------------
/man/hdf5fn.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/FacileDataSet.R
 3 | \name{hdf5fn}
 4 | \alias{hdf5fn}
 5 | \title{Get location of the FacileDataSet HDF5 file}
 6 | \usage{
 7 | hdf5fn(x, mustWork = TRUE)
 8 | }
 9 | \arguments{
10 | \item{x}{FacileDataSet}
11 | 
12 | \item{mustWork}{single logical}
13 | }
14 | \value{
15 | path to HDF5 file
16 | }
17 | \description{
18 | Get location of the FacileDataSet HDF5 file
19 | }
20 | \seealso{
21 | Other FacileDataSet: 
22 | \code{\link{FacileDataSet}()},
23 | \code{\link{dbfn}()},
24 | \code{\link{meta_file}()}
25 | }
26 | \concept{FacileDataSet}
27 | 


--------------------------------------------------------------------------------
/man/has_feature_type.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/features.R
 3 | \name{has_feature_type}
 4 | \alias{has_feature_type}
 5 | \title{Test if a given feature type is stored in a FacileDataSet}
 6 | \usage{
 7 | has_feature_type(x, feature_type)
 8 | }
 9 | \arguments{
10 | \item{x}{A \code{FacileDataSet}}
11 | 
12 | \item{feature_type}{a character vector of potential feature types}
13 | }
14 | \value{
15 | logical vector indicating whether or not a given \code{feature_type}
16 | is stored in \code{x}
17 | }
18 | \description{
19 | Test if a given feature type is stored in a FacileDataSet
20 | }
21 | 


--------------------------------------------------------------------------------
/man/spread_covariates.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sample-covariates.R
 3 | \name{spread_covariates}
 4 | \alias{spread_covariates}
 5 | \title{Spreads the covariates returned from database into wide data.frame}
 6 | \usage{
 7 | spread_covariates(x, .fds = fds(x), cov.def = NULL, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{output from \code{fetch_sample_covariates}}
11 | 
12 | \item{.fds}{A \code{FacileDataSet} object}
13 | }
14 | \value{
15 | a wide \code{tbl_df}-like object
16 | }
17 | \description{
18 | Samples that did not have a value for a specific covariate are assigned to
19 | have NA.
20 | }
21 | 


--------------------------------------------------------------------------------
/man/flog_level.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/logging.R
 3 | \name{flog_level}
 4 | \alias{flog_level}
 5 | \title{Retrieves the currently set logging level}
 6 | \usage{
 7 | flog_level(namespace = NULL)
 8 | }
 9 | \arguments{
10 | \item{namespace}{Package (or whoever) can provide a value here to set the
11 | level they want to listen to. If this is \code{NULL} (default), the top level
12 | \code{facile.log.level} value will be used.}
13 | }
14 | \value{
15 | the logging level, as an integer (from \code{FacileData:::.flog_levels})
16 | }
17 | \description{
18 | Retrieves the currently set logging level
19 | }
20 | 


--------------------------------------------------------------------------------
/man/covariate_definitions.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/api.R
 3 | \name{covariate_definitions}
 4 | \alias{covariate_definitions}
 5 | \title{Get description of sample metadata columns}
 6 | \usage{
 7 | covariate_definitions(x, as.list = TRUE, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{FacileDataTore}
11 | 
12 | \item{as.list}{single logical, return tibble or list}
13 | }
14 | \value{
15 | meta information about the sample covariates in \code{x}
16 | }
17 | \description{
18 | Descriptions of the sample covariates can be specified in a FacileDataSet's
19 | \code{meta.yaml} file. This function returns those.
20 | }
21 | 


--------------------------------------------------------------------------------
/tests/testthat/test-csurv.R:
--------------------------------------------------------------------------------
 1 | context("Coercion among Surv, cSurv and character")
 2 | 
 3 | test_that("We can convert among Surv, cSurv and character", {
 4 |   a <- Surv(c(14, 12, 3), event = c(1, 0, 1))
 5 |   b <- as(a, "character")
 6 |   c <- as(b, "Surv")
 7 |   expect_identical(a, c)
 8 | 
 9 |   d <- Surv(c(14, 12, 3), event = c(1, 0, 1))
10 |   e <- as(d, "cSurv")
11 |   f <- as(e, "Surv")
12 |   expect_identical(d, f)
13 | 
14 |   g <- Surv(c(14, 12, 3), event = c(1, 0, 1))
15 |   h <- as(g, "cSurv")
16 |   i <- as(h, "character")
17 |   expect_identical(as(i, "cSurv"), h)
18 |   expect_identical(as(h, "Surv"), g)
19 |   expect_identical(as(i, "Surv"), g)
20 |   expect_identical(as(i, "cSurv"), h)
21 | })
22 | 


--------------------------------------------------------------------------------
/man/fetch_sample_statistics.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/api.R
 3 | \name{fetch_sample_statistics}
 4 | \alias{fetch_sample_statistics}
 5 | \title{NOTE: fetch_sample_statistics -> \code{fetch_assay_covariates}
 6 | Issue #2}
 7 | \usage{
 8 | fetch_sample_statistics(x, samples = NULL, semi = TRUE, assay_name = NULL, ...)
 9 | }
10 | \description{
11 | NOTE: fetch_sample_statistics -> \code{fetch_assay_covariates}
12 | Issue #2
13 | }
14 | \seealso{
15 | Other FacileInterface: 
16 | \code{\link{facet_frame.FacileDataSet}()},
17 | \code{\link{fetch_assay_score}()},
18 | \code{\link{fetch_sample_covariates}()},
19 | \code{\link{samples}()}
20 | }
21 | \concept{FacileInterface}
22 | 


--------------------------------------------------------------------------------
/inst/scripts/retrieve-parathyroidSE-gene-info.R:
--------------------------------------------------------------------------------
 1 | library(SummarizedExperiment)
 2 | 
 3 | data("parathyroidGenesSE", package = "parathyroidSE")
 4 | 
 5 | bm <- loadNamespace("biomaRt")
 6 | mart <- bm$useMart(
 7 |   host = "feb2014.archive.ensembl.org",
 8 |   biomart = "ENSEMBL_MART_ENSEMBL",
 9 |   dataset = "hsapiens_gene_ensembl")
10 | mart.info <- bm$getBM(
11 |   attributes = c("ensembl_gene_id", "hgnc_symbol", "gene_biotype"),
12 |   filters = "ensembl_gene_id",
13 |   values = rownames(parathyroidGenesSE),
14 |   mart = mart)
15 | 
16 | write.csv(mart.info, "inst/extdata/parathyroidSE-gene-info.csv",
17 |           row.names=FALSE)
18 | write.csv(mart.info, "inst/extdata/airway-gene-info.csv",
19 |           row.names=FALSE)
20 | 


--------------------------------------------------------------------------------
/man/samples.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/api.R
 3 | \name{samples}
 4 | \alias{samples}
 5 | \title{Returns a facile_frame of samples from an object in the faciebio ecosystem.}
 6 | \usage{
 7 | samples(x, ...)
 8 | }
 9 | \description{
10 | \code{samples} can be extracted from a FacileDataStore itself, or the result of
11 | an analysis initiated from a FacileDataStore (like you'll find in the
12 | FacileAnalysis package).
13 | }
14 | \seealso{
15 | Other FacileInterface: 
16 | \code{\link{facet_frame.FacileDataSet}()},
17 | \code{\link{fetch_assay_score}()},
18 | \code{\link{fetch_sample_covariates}()},
19 | \code{\link{fetch_sample_statistics}()}
20 | }
21 | \concept{FacileInterface}
22 | 


--------------------------------------------------------------------------------
/man/fetch_assay_score.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/api.R
 3 | \name{fetch_assay_score}
 4 | \alias{fetch_assay_score}
 5 | \title{NOTE: is fetch_assay_score really necessary?}
 6 | \usage{
 7 | fetch_assay_score(
 8 |   x,
 9 |   features,
10 |   samples = NULL,
11 |   assay_name = NULL,
12 |   as.matrix = FALSE,
13 |   ...,
14 |   subset.threshold = 700
15 | )
16 | }
17 | \description{
18 | NOTE: is fetch_assay_score really necessary?
19 | }
20 | \seealso{
21 | Other FacileInterface: 
22 | \code{\link{facet_frame.FacileDataSet}()},
23 | \code{\link{fetch_sample_covariates}()},
24 | \code{\link{fetch_sample_statistics}()},
25 | \code{\link{samples}()}
26 | }
27 | \concept{FacileInterface}
28 | 


--------------------------------------------------------------------------------
/man/feature_name_map.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/assay-data.R, R/features.R
 3 | \name{assay_feature_name_map}
 4 | \alias{assay_feature_name_map}
 5 | \alias{feature_name_map}
 6 | \title{Returns table of names and aliases for features.}
 7 | \usage{
 8 | assay_feature_name_map(x, assay_name)
 9 | 
10 | feature_name_map(x, feature_type)
11 | }
12 | \arguments{
13 | \item{x}{\code{FacileDataSet}}
14 | 
15 | \item{assay_name}{the name of assay to get the feature map for.}
16 | 
17 | \item{feature_type}{a character vector specifying the feature type}
18 | }
19 | \value{
20 | a tibble with \code{feature_id, name, type} columns, where type
21 | is "primary" or "alias"
22 | }
23 | \description{
24 | #dropme
25 | }
26 | 


--------------------------------------------------------------------------------
/man/dbfn.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/FacileDataSet.R
 3 | \name{dbfn}
 4 | \alias{dbfn}
 5 | \title{Get location of the FacileDataSet database}
 6 | \usage{
 7 | dbfn(x, mustWork = TRUE)
 8 | }
 9 | \arguments{
10 | \item{x}{FacileDataSet}
11 | 
12 | \item{mustWork}{boolean, if \code{TRUE} (default), throws an error if the sqlite
13 | file does not exist. When \code{FALSE}, this returns the "expected" path to the
14 | sqlite file for \code{x}}
15 | }
16 | \value{
17 | the filepath to the sqlite database
18 | }
19 | \description{
20 | Get location of the FacileDataSet database
21 | }
22 | \seealso{
23 | Other FacileDataSet: 
24 | \code{\link{FacileDataSet}()},
25 | \code{\link{hdf5fn}()},
26 | \code{\link{meta_file}()}
27 | }
28 | \concept{FacileDataSet}
29 | 


--------------------------------------------------------------------------------
/man/eavdef_for_column.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/entity-attribute-value.R
 3 | \name{eavdef_for_column}
 4 | \alias{eavdef_for_column}
 5 | \title{Generate entity-attribute-value definition for a column in a data.frame}
 6 | \usage{
 7 | eavdef_for_column(column, column_name)
 8 | }
 9 | \arguments{
10 | \item{column}{a vector, e.g. a column out of a pdata}
11 | 
12 | \item{column_name}{single character, name of the colum}
13 | }
14 | \value{
15 | a generic list-of-list definition column
16 | }
17 | \description{
18 | Creates the minimal list-definition for a single column in a \code{pData}
19 | \code{data.frame}. This function is not exported on purpose. Column descriptions
20 | will be taken from the "label" attribute of data.frames or the "metadata" list
21 | for DataFrames.
22 | }
23 | 


--------------------------------------------------------------------------------
/man/initializeFacileDataSet.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/construction.R
 3 | \name{initializeFacileDataSet}
 4 | \alias{initializeFacileDataSet}
 5 | \title{Create an empty FacileDataSet}
 6 | \usage{
 7 | initializeFacileDataSet(path, meta_file, page_size = 2^12, cache_size = 2e+05)
 8 | }
 9 | \arguments{
10 | \item{path}{the directory to create which will house the
11 | \code{FacileDataSet}}
12 | 
13 | \item{page_size, cache_size}{\code{pragma} values to setup the backend SQLite
14 | database}
15 | 
16 | \item{covariate_definition}{the path to the covariate definition file}
17 | }
18 | \value{
19 | inivisibly returns the path to the successfully created datastore
20 | }
21 | \description{
22 | This is a helper function that is currently only called from
23 | \code{as.FacileDataSet}
24 | }
25 | 


--------------------------------------------------------------------------------
/.github/workflows/test-coverage.yaml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   schedule:
 3 |     # Only push the test coverage up on a schedule. We can use the full
 4 |     # facilebio image for this for expediency
 5 |     - cron: '30 12 * * *'
 6 | 
 7 | name: test-coverage
 8 | 
 9 | jobs:
10 |   test-coverage:
11 |     runs-on: ubuntu-latest
12 |     container: facilebio/facilebio
13 |     env:
14 |       ACTIONS_ALLOW_UNSECURE_COMMANDS: true
15 |       R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
16 |       NOT_CRAN: true
17 |       CI: true
18 | 
19 |     steps:
20 |       - name: Checkout Repository
21 |         uses: actions/checkout@v2
22 | 
23 |       - name: Setup R
24 |         uses: r-lib/actions/setup-r@v1
25 |         with:
26 |           install-r: false
27 | 
28 |       - name: Test coverage
29 |         run: covr::codecov(quiet = FALSE)
30 |         shell: Rscript {0}
31 | 


--------------------------------------------------------------------------------
/man/validate_covariate_def_list.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/entity-attribute-value.R
 3 | \name{validate_covariate_def_list}
 4 | \alias{validate_covariate_def_list}
 5 | \title{Validates that a covariate defintion list reasonably describes a data.frame.}
 6 | \usage{
 7 | validate_covariate_def_list(x, pdata)
 8 | }
 9 | \arguments{
10 | \item{x}{a covariate definition list-of-lists}
11 | 
12 | \item{pdata}{a \code{data.frame}}
13 | }
14 | \description{
15 | The covariates defined in \code{x} must be a subset of the columns in \code{pdata}.
16 | This method will throw an error if there is a covariate in \code{x} that does
17 | not have a matching column in \code{pdata}.
18 | }
19 | \details{
20 | This function does not check if all columns in \code{pdata} have definitions in
21 | \code{x}.
22 | }
23 | 


--------------------------------------------------------------------------------
/R/package.R:
--------------------------------------------------------------------------------
 1 | #' @import checkmate
 2 | #' @import dplyr
 3 | #' @import methods
 4 | #' @importFrom utils read.csv
 5 | "_PACKAGE"
 6 | 
 7 | #' @importFrom broom tidy
 8 | #' @export
 9 | broom::tidy
10 | 
11 | # Export oft-used dplyr stuff --------------------------------------------------
12 | # Should we just but dplyr in Depends?
13 | 
14 | #' @export
15 | dplyr::`%>%`
16 | 
17 | #' @export
18 | dplyr::arrange
19 | 
20 | #' @export
21 | dplyr::collect
22 | 
23 | #' @export
24 | dplyr::distinct
25 | 
26 | #' @export
27 | dplyr::filter
28 | 
29 | #' @export
30 | dplyr::group_by
31 | 
32 | #' @export
33 | dplyr::mutate
34 | 
35 | #' @export
36 | dplyr::select
37 | 
38 | #' @export
39 | dplyr::transmute
40 | 
41 | #' @export
42 | dplyr::ungroup
43 | 
44 | #' @export
45 | dplyr::left_join
46 | 
47 | #' @export
48 | dplyr::inner_join
49 | 
50 | #' @export
51 | dplyr::semi_join
52 | 


--------------------------------------------------------------------------------
/man/assay_sample_info.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/api.R
 3 | \name{assay_sample_info}
 4 | \alias{assay_sample_info}
 5 | \title{Utility functions to get row and column indices of rnaseq hdf5 files.}
 6 | \usage{
 7 | assay_sample_info(x, assay_name, samples = NULL, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{\code{FacileDataStore}}
11 | 
12 | \item{assay_name}{the name of the assay}
13 | 
14 | \item{samples}{a sample descriptor}
15 | }
16 | \value{
17 | an updated version of \code{samples} decorated with hd5_index,
18 | scaling factors, etc. Note that rows in \code{samples} that do not appear
19 | in \code{assay_name} will be returnd here with NA values for hd5_index and
20 | such.
21 | }
22 | \description{
23 | This is called to get things like hdf5_index and scaling factors for
24 | the samples in a given assay.
25 | }
26 | 


--------------------------------------------------------------------------------
/man/assay_info_over_samples.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/assay-data.R
 3 | \name{assay_info_over_samples}
 4 | \alias{assay_info_over_samples}
 5 | \title{Identify the number of each assay run across specific samples.}
 6 | \usage{
 7 | assay_info_over_samples(x, samples = NULL)
 8 | }
 9 | \arguments{
10 | \item{x}{FacileDataSet}
11 | 
12 | \item{samples}{sample descriptor}
13 | 
14 | \item{with_count}{return the number of samples in \code{samples} that are
15 | assayed over each assay as a column in \code{return}}
16 | }
17 | \value{
18 | rows from assay_info_tbl that correspond to the assays defined
19 | over the given samples. If no assays are defined over these samples,
20 | you're going to get an empty tibble.
21 | }
22 | \description{
23 | The default assay is listed first, the rest of the order is undetermined.
24 | }
25 | 


--------------------------------------------------------------------------------
/R/sql.R:
--------------------------------------------------------------------------------
 1 | #' Utility function to send more than one sql command to the database
 2 | #'
 3 | #' Copied from http://stackoverflow.com/questions/18914283
 4 | #'
 5 | #' @param file single character, name of file with SQL statements
 6 | sqlFromFile <- function(file){
 7 |   requireNamespace("stringr") || stop("Failed to require stringr")
 8 |   sql <- readLines(file)
 9 |   sql <- gsub("--.*$", "", sql) ## remove comments
10 |   sql <- unlist(strsplit(paste(sql,collapse=" "),";"))
11 |   sql <- sql[grep("^ *$", sql, invert=TRUE)]
12 |   sql
13 | }
14 | 
15 | #' Execute multiple queries against a database
16 | #'
17 | #' @importFrom DBI dbExecute
18 | #' @param con database handle
19 | #' @param sql list of charvecs (SQL statements)
20 | executeSQL <- function(con, sql){
21 |   execsql <- function(sql, con) {
22 |     # dbGetQuery(con,sql)
23 |     dbExecute(con, sql)
24 |   }
25 |   invisible(lapply(sql, execsql, con))
26 | }
27 | 


--------------------------------------------------------------------------------
/man/eav_encode.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/entity-attribute-value.R
 3 | \name{eav_encode}
 4 | \alias{eav_encode}
 5 | \title{Encodes column(s) from \code{pData} into character values}
 6 | \usage{
 7 | eav_encode(dat, covariate_def, varname)
 8 | }
 9 | \arguments{
10 | \item{dat}{the vector to values to encode into an EAV table}
11 | 
12 | \item{covariate_def}{the single-list-definition of this covariate}
13 | 
14 | \item{vname}{the name of the attribute column in the eav table}
15 | }
16 | \value{
17 | a four-column \code{data.frame} (dataset,sample_id,variable,value)
18 | with the encoded covariate into a single \code{value} column.
19 | }
20 | \description{
21 | This function is not exported, and should only be called from within the
22 | \code{\link[=as.EAVtable]{as.EAVtable()}} function because we rely on validity checks that are
23 | happening there.
24 | }
25 | 


--------------------------------------------------------------------------------
/man/extract_transcribed_info_from_ensembl_gtf.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/extract_transcribed_info_from_ensembl_gtf.R
 3 | \name{extract_transcribed_info_from_ensembl_gtf}
 4 | \alias{extract_transcribed_info_from_ensembl_gtf}
 5 | \title{Extract gene- and transcript-level information from an ENSEMBL gtf.}
 6 | \usage{
 7 | extract_transcribed_info_from_ensembl_gtf(
 8 |   fn,
 9 |   gene_type = "gene_type",
10 |   transcript_type = "transcript_type"
11 | )
12 | }
13 | \arguments{
14 | \item{fn}{the path to the ENSEMBL (or GENCODE) GTF}
15 | }
16 | \value{
17 | a list of tibbles with \verb{$transcript_info} and \verb{$gene_info} elements
18 | }
19 | \description{
20 | This was written for release_28 annotations. This is noted because some
21 | column names seemsed to have changed, ie. "gene_type" instead of
22 | "gene_biotype", etc. Let's see how consistent this is!
23 | }
24 | 


--------------------------------------------------------------------------------
/man/create_assay_feature_descriptor.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/assay-data.R
 3 | \name{create_assay_feature_descriptor}
 4 | \alias{create_assay_feature_descriptor}
 5 | \title{Creates a feature descriptor for interactive ease}
 6 | \usage{
 7 | create_assay_feature_descriptor(x, features = NULL, assay_name = NULL)
 8 | }
 9 | \arguments{
10 | \item{x}{FacileDataSet}
11 | 
12 | \item{features}{a character string of fearture ids (requires assay_name)
13 | or a data.frame with feature_id column.}
14 | 
15 | \item{assay_name}{the assay to get the featurespace from. if this is provided,
16 | it will trump an already existing assay_name column in \code{features}}
17 | }
18 | \value{
19 | a feature descriptor with feature_id and assay_name, which can be
20 | used to absolutely find features
21 | }
22 | \description{
23 | Creates a data.frame of features and assays they come from
24 | }
25 | 


--------------------------------------------------------------------------------
/man/check_facile_data_set.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/validation.R
 3 | \name{check_facile_data_set}
 4 | \alias{check_facile_data_set}
 5 | \alias{assert_facile_data_set}
 6 | \alias{test_facile_data_set}
 7 | \title{Check if argument is a FacileDataSet}
 8 | \usage{
 9 | check_facile_data_set(x, ...)
10 | 
11 | assert_facile_data_set(x, ..., .var.name = vname(x), add = NULL)
12 | 
13 | test_facile_data_set(x, ...)
14 | }
15 | \arguments{
16 | \item{x}{The object to check.}
17 | 
18 | \item{...}{to be determined later}
19 | 
20 | \item{.var.name}{Name of the checked object to print in assertions. Defaults
21 | to the heuristic implemented in \code{\link[checkmate:vname]{checkmate::vname()}}.}
22 | 
23 | \item{add}{An \code{\link[checkmate:AssertCollection]{checkmate::AssertCollection()}} object. Default is \code{NULL}.}
24 | }
25 | \description{
26 | Check if argument is a FacileDataSet
27 | }
28 | 


--------------------------------------------------------------------------------
/man/conform_data_frame.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utilities.R
 3 | \name{conform_data_frame}
 4 | \alias{conform_data_frame}
 5 | \title{Arranges the columns of one data.frame to another}
 6 | \usage{
 7 | conform_data_frame(x, to)
 8 | }
 9 | \arguments{
10 | \item{x}{a \code{data.frame} that needs to be checked and conformed}
11 | 
12 | \item{to}{the prototype \code{data.frame} that \code{x} needs to be aligned
13 | against.}
14 | }
15 | \value{
16 | the \code{tibble} version of \code{x} that is arranged to look
17 | like \code{to}.
18 | }
19 | \description{
20 | This function is primarily used to add data to the FacileDataSet's SQLite
21 | database. \code{x} is new data to add, and \code{to} is the a table of
22 | the form that is expected in the database. We check that the columns of
23 | \code{x} are a superset of columns in \code{x} and the matching columns are
24 | all of the same class.
25 | }
26 | 


--------------------------------------------------------------------------------
/man/eav_encode_covariate.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/entity-attribute-value.R
 3 | \name{eav_encode_covariate}
 4 | \alias{eav_encode_covariate}
 5 | \title{Encodes column(s) from \code{pData} into character values}
 6 | \usage{
 7 | eav_encode_covariate(dat, covariate_def, aname = "variable")
 8 | }
 9 | \arguments{
10 | \item{covariate_def}{the single-list-definition of this covariate}
11 | 
12 | \item{pdata}{the \code{pData} \code{data.frame}}
13 | 
14 | \item{vname}{the name of the attribute column in the eav table}
15 | }
16 | \value{
17 | a four-column \code{data.frame} (dataset,sample_id,variable,value)
18 | with the encoded covariate into a single \code{value} column.
19 | }
20 | \description{
21 | This function is not exported, and should only be called from within the
22 | \code{\link[=as.EAVtable]{as.EAVtable()}} function because we rely on validity checks that are
23 | happening there.
24 | }
25 | 


--------------------------------------------------------------------------------
/man/organism.FacileDataSet.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/FacileDataSet.R
 3 | \name{organism.FacileDataSet}
 4 | \alias{organism.FacileDataSet}
 5 | \title{Retrieves the organism the data is defined over}
 6 | \usage{
 7 | \method{organism}{FacileDataSet}(x)
 8 | }
 9 | \value{
10 | \code{"Homo sapiens"}, \code{"Mus musculus"}, etc.
11 | }
12 | \description{
13 | A FacileDataStore is only expected to hold data for one organism.
14 | }
15 | \seealso{
16 | Other API: 
17 | \code{\link{fetch_assay_score.FacileDataSet}()},
18 | \code{\link{fetch_custom_sample_covariates.FacileDataSet}()},
19 | \code{\link{fetch_sample_covariates}()},
20 | \code{\link{fetch_sample_statistics.FacileDataSet}()},
21 | \code{\link{fetch_samples.FacileDataSet}()},
22 | \code{\link{filter_features.FacileDataSet}()},
23 | \code{\link{filter_samples.FacileDataSet}()},
24 | \code{\link{samples.FacileDataSet}()}
25 | }
26 | \concept{API}
27 | 


--------------------------------------------------------------------------------
/man/covariate_meta_info.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/entity-attribute-value.R
 3 | \name{covariate_meta_info}
 4 | \alias{covariate_meta_info}
 5 | \title{Retrieve the meta information about a covariate for EAV decoding}
 6 | \usage{
 7 | covariate_meta_info(covariate, .fds, covdefs = NULL)
 8 | }
 9 | \arguments{
10 | \item{covariate}{the name of the covariate}
11 | 
12 | \item{.fds}{the \code{FacileDataSet}}
13 | 
14 | \item{covdefs}{The \code{covariate_definitions(.fds)} list}
15 | }
16 | \value{
17 | a list of covariate information with the following elements:
18 | \verb{$name}, \verb{$type}, \verb{$class}, \verb{$description},
19 | \verb{$label}, \verb{$is.factor}, (and maybe \verb{$levels})
20 | }
21 | \description{
22 | Mappings that define attribute-value encodings into R-native objects are
23 | stored in a \code{FacileDataSet}'s \code{meta.yaml} file, in the \code{sample_covariate}
24 | section.
25 | }
26 | 


--------------------------------------------------------------------------------
/man/check_facile_data_store.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/validation.R
 3 | \name{check_facile_data_store}
 4 | \alias{check_facile_data_store}
 5 | \alias{assert_facile_data_store}
 6 | \alias{test_facile_data_store}
 7 | \title{Check if argument is a FacileDataStore}
 8 | \usage{
 9 | check_facile_data_store(x, ...)
10 | 
11 | assert_facile_data_store(x, ..., .var.name = vname(x), add = NULL)
12 | 
13 | test_facile_data_store(x, ...)
14 | }
15 | \arguments{
16 | \item{x}{The object to check.}
17 | 
18 | \item{...}{to be determined later}
19 | 
20 | \item{.var.name}{Name of the checked object to print in assertions. Defaults
21 | to the heuristic implemented in \code{\link[checkmate:vname]{checkmate::vname()}}.}
22 | 
23 | \item{add}{An \code{\link[checkmate:AssertCollection]{checkmate::AssertCollection()}} object. Default is \code{NULL}.}
24 | }
25 | \description{
26 | Check if argument is a FacileDataStore
27 | }
28 | 


--------------------------------------------------------------------------------
/man/test-helpers.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/test-helpers.R
 3 | \name{example_sample_covariates}
 4 | \alias{example_sample_covariates}
 5 | \alias{example_meta}
 6 | \alias{example_sample_covariate_definitions}
 7 | \title{Fetches exemplar data for unit testing}
 8 | \usage{
 9 | example_sample_covariates()
10 | 
11 | example_meta(file.path = FALSE)
12 | 
13 | example_sample_covariate_definitions()
14 | }
15 | \arguments{
16 | \item{file.path}{If \code{TRUE}, returns the path to the yaml file, otherwise
17 | returns the list-of-list meta definition.}
18 | }
19 | \value{
20 | Either the list-of-list meta definition, or path to the \code{meta.yaml}
21 | file where these are defined.
22 | 
23 | the list-of-list definitions for the example \code{pData} returned from
24 | \code{\link[=example_sample_covariates]{example_sample_covariates()}}
25 | }
26 | \description{
27 | Fetches exemplar data for unit testing
28 | }
29 | 


--------------------------------------------------------------------------------
/man/ds_annot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/as.FacileDataSet.R
 3 | \name{ds_annot}
 4 | \alias{ds_annot}
 5 | \title{Bioc-container specific data set annotation extraction functions}
 6 | \usage{
 7 | ds_annot(x, meta = NULL, validate = FALSE, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{SummarizedExperiment, ExpressionSet or DGEList}
11 | 
12 | \item{meta}{a list of description stuff for the dataset, this can act to
13 | override what's there, already}
14 | 
15 | \item{validate}{single logical, check results}
16 | 
17 | \item{...}{additional args (ignored for now)}
18 | }
19 | \description{
20 | Takes dataset-level annotion as stored by each type. DGEList has
21 | no such slot, unfortunately, and thus gets the default. SE has a
22 | metadata slot and can provide url and description. eSet just has
23 | a character annotation and can provide a description.
24 | }
25 | \details{
26 | This is an internal helper function.
27 | }
28 | 


--------------------------------------------------------------------------------
/man/append_facile_table.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/db-and-table-functions.R
 3 | \name{append_facile_table}
 4 | \alias{append_facile_table}
 5 | \title{Adds rows to a table in a FacileDataSet}
 6 | \usage{
 7 | append_facile_table(dat, x, table_name, warn_existing = FALSE)
 8 | }
 9 | \arguments{
10 | \item{dat}{the \code{data.frame} of rows to add to the table, which must
11 | have a superset of columns present in the \code{table_name} that is being
12 | appended to}
13 | 
14 | \item{x}{the \code{FacileDataSet}}
15 | 
16 | \item{table_name}{the name of the table in \code{x} to add the rows of
17 | \code{dat} to.}
18 | }
19 | \value{
20 | invisibly returns the conformed version of \code{dat}.
21 | }
22 | \description{
23 | This function first checks the data in the target table \code{table_name}
24 | to ensure that rows in \code{dat} that exist in \code{table_name} (by
25 | checking the primary key) are not added.
26 | }
27 | 


--------------------------------------------------------------------------------
/man/parse_sample_criterion.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/samples.R
 3 | \name{parse_sample_criterion}
 4 | \alias{parse_sample_criterion}
 5 | \title{Creates a filter expression to select samples based on value of a covariate}
 6 | \usage{
 7 | parse_sample_criterion(variable, value)
 8 | }
 9 | \arguments{
10 | \item{variable}{the name of the variable to look for in the sample_covariate
11 | \code{variable} column}
12 | 
13 | \item{value}{\code{character} vector of values for the \code{variable} that
14 | you want your samples to have.}
15 | }
16 | \value{
17 | a
18 | }
19 | \description{
20 | This leverages dplyr's standard (vs non-standard) evaluation mojo. There is
21 | likely a cleaner way to do this, but to be honest I still find the
22 | \code{\link[lazyeval]{interp}} stuff rather confusing
23 | }
24 | \seealso{
25 | \href{https://cran.r-project.org/web/packages/dplyr/vignettes/nse.html}{dplyr non-standard evaluation}
26 | }
27 | 


--------------------------------------------------------------------------------
/man/assemble_example_dataset.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/assemble_example_dataset.R
 3 | \name{assemble_example_dataset}
 4 | \alias{assemble_example_dataset}
 5 | \title{Assembles an example facile dataset to play with}
 6 | \usage{
 7 | assemble_example_dataset(
 8 |   directory = tempdir(),
 9 |   name = "ExampleRnaFacileDataSet"
10 | )
11 | }
12 | \arguments{
13 | \item{directory}{The name of the parent directory to hold the dataset}
14 | 
15 | \item{name}{A subdirectory within \code{directory} will be created using this
16 | name.}
17 | }
18 | \value{
19 | The FacileDataSet object itself.
20 | }
21 | \description{
22 | This combines the airway and parathyroidSE RNA-seq datasets into a single
23 | FacileDataSet.
24 | }
25 | \details{
26 | The code here is extracted from the \code{FacileDataSet-assembly} vignette. Please
27 | read that for some of the why's and how's of the decisions made here when
28 | assembling datasets.
29 | }
30 | 


--------------------------------------------------------------------------------
/man/fds.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/api.R
 3 | \name{fds}
 4 | \alias{fds}
 5 | \alias{fds.FacileDataStore}
 6 | \alias{fds.default}
 7 | \alias{fds<-}
 8 | \alias{fds<-.tbl}
 9 | \alias{fds<-.data.frame}
10 | \alias{set_fds}
11 | \title{Get or set the FacileDataStore for an object}
12 | \usage{
13 | fds(x, ...)
14 | 
15 | \method{fds}{FacileDataStore}(x)
16 | 
17 | \method{fds}{default}(x, ...)
18 | 
19 | fds(x) <- value
20 | 
21 | \method{fds}{tbl}(x) <- value
22 | 
23 | \method{fds}{data.frame}(x) <- value
24 | 
25 | set_fds(x, value)
26 | }
27 | \arguments{
28 | \item{x}{the object}
29 | 
30 | \item{value}{The \code{FacileDataStore} object}
31 | }
32 | \description{
33 | FacileDataStores are passed along with most every object generated from
34 | functions in the facilebio universe. This makes it convenient to dig back
35 | into a large genomics objects to retrieve data from "slim" results, like
36 | a sample covariate data.frame.
37 | }
38 | 


--------------------------------------------------------------------------------
/man/dot-level_biotypes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/extract_transcribed_info_from_ensembl_gtf.R
 3 | \name{.level_biotypes}
 4 | \alias{.level_biotypes}
 5 | \title{Utility function to "factorize" biotypes into an order we care about.}
 6 | \usage{
 7 | .level_biotypes(x)
 8 | }
 9 | \arguments{
10 | \item{x}{a \code{character} vector of biotypes}
11 | }
12 | \value{
13 | a factor version of \code{x}, with \code{levels(x)} in approximately the order
14 | we care about.
15 | }
16 | \description{
17 | ENSEMBL GTFs provide biotype information for genes/transcripts. These are
18 | things like "3prime_overlapping_ncRNA", "antisense", ..., "protein_coding",
19 | etc. This function turns the "biotype"-vector \code{x} into a factor with levels
20 | in (roughly) the order we care to "unique"-ify these levels. Ie. if a gene
21 | has a "protein_coding" annotation, we will care to keep that one over one
22 | of its annotations which categorize it as a "processed_transcript"
23 | }
24 | 


--------------------------------------------------------------------------------
/man/samples.FacileDataSet.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/FacileDataSet.R
 3 | \name{samples.FacileDataSet}
 4 | \alias{samples.FacileDataSet}
 5 | \title{Retrieves the sample identifiers for all samples in a FacileDataSet.}
 6 | \usage{
 7 | \method{samples}{FacileDataSet}(x, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{a \code{FacileDataSet}}
11 | }
12 | \value{
13 | tibble of sample attributes
14 | }
15 | \description{
16 | Sample identifiers are provided as \verb{dataset,sample_id tuples}.
17 | }
18 | \seealso{
19 | Other API: 
20 | \code{\link{fetch_assay_score.FacileDataSet}()},
21 | \code{\link{fetch_custom_sample_covariates.FacileDataSet}()},
22 | \code{\link{fetch_sample_covariates}()},
23 | \code{\link{fetch_sample_statistics.FacileDataSet}()},
24 | \code{\link{fetch_samples.FacileDataSet}()},
25 | \code{\link{filter_features.FacileDataSet}()},
26 | \code{\link{filter_samples.FacileDataSet}()},
27 | \code{\link{organism.FacileDataSet}()}
28 | }
29 | \concept{API}
30 | 


--------------------------------------------------------------------------------
/tests/testthat/test-feature-info.R:
--------------------------------------------------------------------------------
 1 | context("Feature Info")
 2 | 
 3 | FDS <- exampleFacileDataSet()
 4 | genes <- tibble(
 5 |   feature_id = c("800", "1009", "1289", "50509", "2191", "2335", "5159"),
 6 |   feature_type = "entrez")
 7 | 
 8 | test_that("with_feature_info grabs the right goods", {
 9 |   finfo.all <- collect(fetch_feature_info(FDS, "entrez"), n = Inf)
10 |   finfo <- with_feature_info(genes, .fds = FDS)
11 | 
12 |   expected <- left_join(genes, finfo.all, by = c("feature_id", "feature_type"))
13 |   expect_equal(finfo, expected, check.attributes = FALSE)
14 | 
15 |   f2 <- with_feature_info(genes, c("name", "meta"), .fds = FDS)
16 |   expect_equal(f2, select(expected, !!colnames(f2)), check.attributes = FALSE)
17 | })
18 | 
19 | test_that("with_feature_info can rename feature covariates", {
20 |   expected <- genes %>%
21 |     with_feature_info(c("name", "meta"), .fds = FDS) %>%
22 |     rename(symbol = "name")
23 | 
24 |   res <- genes %>%
25 |     with_feature_info(c(symbol = "name", "meta"), .fds = FDS)
26 | 
27 |   expect_equal(res, expected)
28 | })
29 | 


--------------------------------------------------------------------------------
/tests/testthat/test-assay-normalization.R:
--------------------------------------------------------------------------------
 1 | context("Normalizaiton of assay data")
 2 | 
 3 | if (!exists("FDS")) FDS <- exampleFacileDataSet()
 4 | 
 5 | samples <- FDS %>%
 6 |   filter_samples(stage == "III") %>%
 7 |   select(dataset, sample_id)
 8 | 
 9 | genes <- c(
10 |   PRF1='5551',
11 |   GZMA='3001',
12 |   CD274='29126',
13 |   TIGIT='201633')
14 | 
15 | features <- tibble(assay='rnaseq', feature_id=genes)
16 | 
17 | test_that("Normalization of rnaseq data is equivalent to edgeR::cpm", {
18 |   y <- edgeR::calcNormFactors(as.DGEList(samples))
19 |   cpms <- edgeR::cpm(y, log = TRUE, prior.count = 0.25)[genes,]
20 | 
21 |   # use the lib.size and norm.factors from this subset of data
22 |   samples. <- samples %>%
23 |     left_join(select(y$samples, sample_id, lib.size, norm.factors),
24 |               by = "sample_id")
25 | 
26 |   normed <- fetch_assay_data(samples., genes, normalized = TRUE,
27 |                              prior.count = 0.25, as.matrix = TRUE)
28 |   normed <- normed[rownames(cpms), colnames(cpms)]
29 |   expect_equal(normed, cpms)
30 | })
31 | 
32 | 


--------------------------------------------------------------------------------
/man/as_facile_frame.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/facile_frame.R
 3 | \name{as_facile_frame}
 4 | \alias{as_facile_frame}
 5 | \title{Converts a normal tibble/data.frame to a facile_frame}
 6 | \usage{
 7 | as_facile_frame(
 8 |   x,
 9 |   datastore = fds(x),
10 |   classes = NULL,
11 |   ...,
12 |   .valid_sample_check = TRUE
13 | )
14 | }
15 | \arguments{
16 | \item{x}{a sample-like descriptor}
17 | 
18 | \item{datastore}{the FacileDataStore tied to x}
19 | 
20 | \item{classes}{more classes to append to the outgoing object. The
21 | \code{"facile_frame"} class entry is always the last one of the bunch.}
22 | 
23 | \item{...}{dots}
24 | 
25 | \item{.valid_sample_check}{If \code{TRUE} (default), will check if \code{x} is a valid
26 | subset of the FacileDataStore \code{.fds}. Internal functions may set this to
27 | \code{TRUE} to avoid the check to (1) save time; and (2) save infinite
28 | recursion in the call to \code{assert_sample_subset}.}
29 | }
30 | \description{
31 | Converts a normal tibble/data.frame to a facile_frame
32 | }
33 | 


--------------------------------------------------------------------------------
/man/fetch_assay_score.FacileDataSet.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/assay-data.R
 3 | \name{fetch_assay_score.FacileDataSet}
 4 | \alias{fetch_assay_score.FacileDataSet}
 5 | \title{Helper function to get sample assay data from single or aggregate features}
 6 | \usage{
 7 | \method{fetch_assay_score}{FacileDataSet}(
 8 |   x,
 9 |   features,
10 |   samples = NULL,
11 |   assay_name = NULL,
12 |   as.matrix = FALSE,
13 |   ...,
14 |   subset.threshold = 700
15 | )
16 | }
17 | \description{
18 | Helper function to get sample assay data from single or aggregate features
19 | }
20 | \seealso{
21 | Other API: 
22 | \code{\link{fetch_custom_sample_covariates.FacileDataSet}()},
23 | \code{\link{fetch_sample_covariates}()},
24 | \code{\link{fetch_sample_statistics.FacileDataSet}()},
25 | \code{\link{fetch_samples.FacileDataSet}()},
26 | \code{\link{filter_features.FacileDataSet}()},
27 | \code{\link{filter_samples.FacileDataSet}()},
28 | \code{\link{organism.FacileDataSet}()},
29 | \code{\link{samples.FacileDataSet}()}
30 | }
31 | \concept{API}
32 | 


--------------------------------------------------------------------------------
/man/join_samples.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/samples.R
 3 | \name{join_samples}
 4 | \alias{join_samples}
 5 | \title{Filters the samples down in a dataset to ones specified}
 6 | \usage{
 7 | join_samples(x, samples = NULL, semi = FALSE, distinct.samples = FALSE)
 8 | }
 9 | \arguments{
10 | \item{x}{likely a \code{tbl_sqlite} object, but a \code{tbl_df}-like
11 | object should work as well.}
12 | 
13 | \item{samples}{a sample descriptor \code{tbl_df}-like object (likely a
14 | \code{tbl_sqlite} object) that has \code{"dataset"} and \code{"samle_id"}
15 | columns.}
16 | 
17 | \item{semi}{if \code{TRUE}, appropximates a semi-join on the \code{samples},
18 | otherwise does an inner_join between \code{x} and \code{samples}
19 | (default \code{FALSE}).}
20 | }
21 | \value{
22 | joined result between \code{x} and \code{samples}
23 | }
24 | \description{
25 | Tables like \code{expression} and \code{sample_covariate} house different
26 | datapoints per sample, and we often want to only retreive data points over
27 | a subset of samples.
28 | }
29 | 


--------------------------------------------------------------------------------
/man/append_facile_feature_info.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/construction.R
 3 | \name{append_facile_feature_info}
 4 | \alias{append_facile_feature_info}
 5 | \title{Appends new features to \code{feature_info} table}
 6 | \usage{
 7 | append_facile_feature_info(
 8 |   x,
 9 |   feature_info,
10 |   type = feature_info$feature_type,
11 |   warn_existing = FALSE
12 | )
13 | }
14 | \arguments{
15 | \item{x}{The \code{FacileDataSet}}
16 | 
17 | \item{feature_info}{a table of new features that provides all columns
18 | in \code{feature_info_tbl(x)}}
19 | 
20 | \item{type}{A way to override (or set) the \code{feature_type} column of the
21 | \code{feature_info} table}
22 | }
23 | \value{
24 | invisible returns an annotated version of the \code{feature_info}
25 | table with an \code{$added} column with \code{TRUE/FALSE} values for the
26 | features that were new (and added) to the repository or \code{FALSE} to
27 | indicate that they were already in the database.
28 | }
29 | \description{
30 | This function only adds features (feature_type, feature_id) that are not
31 | in the \code{feature_info} table already
32 | }
33 | 


--------------------------------------------------------------------------------
/man/assay_feature_info.FacileDataSet.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/assay-data.R
 3 | \name{assay_feature_info.FacileDataSet}
 4 | \alias{assay_feature_info.FacileDataSet}
 5 | \title{Materializes a table with all feature information for a given assay.}
 6 | \usage{
 7 | \method{assay_feature_info}{FacileDataSet}(x, assay_name, feature_ids = NULL, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{\code{FacileDataSet}}
11 | 
12 | \item{assay_name}{the name of the assay}
13 | 
14 | \item{feature_ids}{a character vector of feature_ids}
15 | }
16 | \value{
17 | a \code{tbl_sqlite} result with the feature information for the
18 | features in a specified assay
19 | }
20 | \description{
21 | DEBUG: This logic is unnecessarily complex because I make sure to collect
22 | all tables from the database as opposed to copying external tables in and
23 | doing an inner_join in the database. I'm doing this becuase we are getting
24 | name collisions on some of the temporary tables. We get errors like:
25 | Warning: Error in : Table pkdtpohpsu already exists.
26 | }
27 | \details{
28 | This fetches the hdf5_index for the assays as well
29 | }
30 | 


--------------------------------------------------------------------------------
/man/assay_info.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/api.R
 3 | \name{assay_info}
 4 | \alias{assay_info}
 5 | \title{Fetches assay meta information for the assays stored in a FacileDataStore}
 6 | \usage{
 7 | assay_info(x, assay_name = NULL, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{A \code{FacileDataStore}}
11 | 
12 | \item{assay_name}{optional name of the assay to get information for}
13 | }
14 | \value{
15 | a tibble of meta information for the assays stored in \code{x}, with these
16 | columns:
17 | \itemize{
18 | \item \verb{assay <chr>}: Name of the assay
19 | \item \verb{assay_type <chr>}: \code{"rnaseq"}, \code{"lognorm"}, etc. Look at
20 | \code{FacileData:::.assay.types} vector
21 | \item \verb{feature_type <chr>}: A string from \code{FacileData:::.feature.types}, ie.
22 | \code{"ensgid"}, \code{"entrez"}, \code{"custom"}, etc.
23 | \item \verb{description <chr>}: string description
24 | \item \verb{nfeatures <int>}: number of features we have info for
25 | \item \verb{storage_mode <chr>}: \code{"integer"}, \code{"numeric"}
26 | }
27 | }
28 | \description{
29 | Fetches assay meta information for the assays stored in a FacileDataStore
30 | }
31 | 


--------------------------------------------------------------------------------
/man/save_custom_sample_covariates.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sample-covariates.R
 3 | \name{save_custom_sample_covariates}
 4 | \alias{save_custom_sample_covariates}
 5 | \title{Saves custom sample covariates to a FacileDataSet}
 6 | \usage{
 7 | save_custom_sample_covariates(
 8 |   x,
 9 |   annotation,
10 |   name = NULL,
11 |   class = "categorical",
12 |   custom_key = Sys.getenv("USER"),
13 |   file.prefix = "facile",
14 |   sample_filter_critera = NULL
15 | )
16 | }
17 | \arguments{
18 | \item{x}{the \code{FacileDataSet}}
19 | 
20 | \item{annotation}{the annotation table of covariate values to a
21 | sample-descriptor-like table}
22 | 
23 | \item{name}{the variable name of the covariate}
24 | 
25 | \item{custom_key}{the custom key (likely userid) for the annotation}
26 | 
27 | \item{file.prefix}{Vincent uses this}
28 | 
29 | \item{sample_filter_criteria}{optional list of filtering criteria that were
30 | used to drill down into the samples we have the \code{annotatino}
31 | data.frame for
32 | TODO: Figure out how to encode sample_filter_criteria into serialized
33 | (JSON) annotation file}
34 | }
35 | \description{
36 | Saves custom sample covariates to a FacileDataSet
37 | }
38 | 


--------------------------------------------------------------------------------
/man/filter_features.FacileDataSet.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/NSE-filter-features.R
 3 | \name{filter_features.FacileDataSet}
 4 | \alias{filter_features.FacileDataSet}
 5 | \title{Filter against the sample_covariate_tbl as if it were wide.}
 6 | \usage{
 7 | \method{filter_features}{FacileDataSet}(x, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{A \code{FacileDataSet}}
11 | 
12 | \item{...}{NSE claused to use in \code{\link[dplyr]{filter}} expressions}
13 | }
14 | \description{
15 | This feature is only really meant to be used
16 | interactively, and with extreme caution ... programatically specifying
17 | column names in feature table, for instance, does not work right now.
18 | }
19 | \details{
20 | TODO: Use tidyeval
21 | }
22 | \seealso{
23 | Other API: 
24 | \code{\link{fetch_assay_score.FacileDataSet}()},
25 | \code{\link{fetch_custom_sample_covariates.FacileDataSet}()},
26 | \code{\link{fetch_sample_covariates}()},
27 | \code{\link{fetch_sample_statistics.FacileDataSet}()},
28 | \code{\link{fetch_samples.FacileDataSet}()},
29 | \code{\link{filter_samples.FacileDataSet}()},
30 | \code{\link{organism.FacileDataSet}()},
31 | \code{\link{samples.FacileDataSet}()}
32 | }
33 | \concept{API}
34 | 


--------------------------------------------------------------------------------
/man/reexports.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/package.R
 3 | \docType{import}
 4 | \name{reexports}
 5 | \alias{reexports}
 6 | \alias{tidy}
 7 | \alias{\%>\%}
 8 | \alias{arrange}
 9 | \alias{collect}
10 | \alias{distinct}
11 | \alias{filter}
12 | \alias{group_by}
13 | \alias{mutate}
14 | \alias{select}
15 | \alias{transmute}
16 | \alias{ungroup}
17 | \alias{left_join}
18 | \alias{inner_join}
19 | \alias{semi_join}
20 | \title{Objects exported from other packages}
21 | \keyword{internal}
22 | \description{
23 | These objects are imported from other packages. Follow the links
24 | below to see their documentation.
25 | 
26 | \describe{
27 |   \item{broom}{\code{\link[broom:reexports]{tidy}}}
28 | 
29 |   \item{dplyr}{\code{\link[dplyr:reexports]{\%>\%}}, \code{\link[dplyr]{arrange}}, \code{\link[dplyr:compute]{collect}}, \code{\link[dplyr]{distinct}}, \code{\link[dplyr]{filter}}, \code{\link[dplyr]{group_by}}, \code{\link[dplyr:mutate-joins]{inner_join}}, \code{\link[dplyr:mutate-joins]{left_join}}, \code{\link[dplyr]{mutate}}, \code{\link[dplyr]{select}}, \code{\link[dplyr:filter-joins]{semi_join}}, \code{\link[dplyr:mutate]{transmute}}, \code{\link[dplyr:group_by]{ungroup}}}
30 | }}
31 | 
32 | 


--------------------------------------------------------------------------------
/man/cSurv.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/csurv.R
 3 | \name{cSurv}
 4 | \alias{cSurv}
 5 | \alias{as_cSurv}
 6 | \alias{as_Surv}
 7 | \title{cSurv is a character representation of survival::surv ###}
 8 | \usage{
 9 | as_cSurv(from)
10 | 
11 | as_Surv(from)
12 | }
13 | \description{
14 | cSurv serves as a more reliable way to use Surv objects as data.frame columns. A
15 | data.frame is supposed to be able to hold Surv columns. There are multiple special
16 | cases written into base for this. It seems the implementation is incomplete as
17 | subsetting the DF breaks the Surv object. cSurv cannot do anything but get subset
18 | and become a Surv again. In the FacileVerse we hold Surv objects as cSurv, which
19 | allows us to survive a round-trip through an EAV sample metadata table. Survival
20 | analyses can convert cSurv to Surv as needed. It is assumed that all Surv censoring
21 | is right-censored.
22 | }
23 | \examples{
24 | library(survival)
25 | x = Surv(c(14,12,3), event = c(1,0,1))
26 | y = as(x,"cSurv")
27 | z = as(y, "Surv")
28 | x2 = as.character(x)
29 | z2 = as(x2, "Surv")
30 | a = as(x, "cSurv")
31 | b = as(a, "character")
32 | c = as(b, "cSurv")
33 | d = as(c, "Surv")
34 | }
35 | \concept{cSurv}
36 | 


--------------------------------------------------------------------------------
/man/as.EAVtable.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/entity-attribute-value.R
 3 | \name{as.EAVtable}
 4 | \alias{as.EAVtable}
 5 | \title{Convert a \code{pData} data.frame to a melted EAV table}
 6 | \usage{
 7 | as.EAVtable(
 8 |   x,
 9 |   ignore = c("dataset", "sample_id"),
10 |   covariate_def = list(),
11 |   na.rm = TRUE
12 | )
13 | }
14 | \arguments{
15 | \item{x}{a wide \code{pData} data.frame}
16 | 
17 | \item{covariate_def}{passed to \code{\link[=eav_metadata_create]{eav_metadata_create()}} that is used to
18 | override default covariate definitions extracted from the columns of \code{x}}
19 | }
20 | \value{
21 | a melted EAV table from \code{x}
22 | }
23 | \description{
24 | Transforms a wide \code{pData} data.frame into a melted EAV table for use in
25 | a \code{FacileDataSet}. This function will also produce the list-of-list encodings
26 | that are generated from \code{\link[=eav_metadata_create]{eav_metadata_create()}} to do its thing as an
27 | attribute of the returned object.
28 | }
29 | \details{
30 | If you want to provide custom definitions for the covariates in the EAVtable
31 | that are different than the ones generated in \code{\link[=eav_metadata_create]{eav_metadata_create()}}, then
32 | provie that definition list in the \code{covariate_def} parameter.
33 | }
34 | 


--------------------------------------------------------------------------------
/tests/testthat/test-feature-types.R:
--------------------------------------------------------------------------------
 1 | context("Features Types")
 2 | 
 3 | test_that("Different classes of identifiers guessed correctly", {
 4 |   expected <- tribble(
 5 |     ~id_type,      ~id,                             ~organism,
 6 |     "refseq",      "NC_000023.11",                  "unknown",
 7 |     "refseq",      "NC_000023.10",                  "unknown",
 8 |     "refseq",      "NM_001306206.1",                "unknown",
 9 |     "refseq",      "NP_001293135.1",                "unknown",
10 |     "refseq",      "NC_000023",                     "unknown",
11 |     "refseq",      "NM_001306206",                  "unknown",
12 |     "ens_gene",    "ENSG00000101811",               "Homo sapiens",
13 |     "ens_gene",    "ENSMUSG00000030088",            "Mus musculus",
14 |     "ens_gene",    "ENSMUSG00000030088.2",          "Mus musculus",
15 |     "ens_tx",      "ENST00000415585.6",             "Homo sapiens",
16 |     "ens_tx",      "ENSMUST00000113287.7",          "Mus musculus",
17 |     "ens_tx",      "ENSMUST00000113287",            "Mus musculus",
18 |     "entrez",      "85007",                         "unknown")
19 | 
20 |   res <- infer_feature_type(expected$id)
21 |   expect_equal(res$id, expected$id)
22 |   expect_equal(res$id_type, expected$id_type)
23 |   # expect_equal(res$source_organism, expected$organism)
24 | })
25 | 


--------------------------------------------------------------------------------
/man/fetch_samples.FacileDataSet.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/samples.R
 3 | \name{fetch_samples.FacileDataSet}
 4 | \alias{fetch_samples.FacileDataSet}
 5 | \title{Fetches a sample descriptor that matches the filter criterion}
 6 | \usage{
 7 | \method{fetch_samples}{FacileDataSet}(x, samples = NULL, assay = "rnaseq", ...)
 8 | }
 9 | \arguments{
10 | \item{x}{A \code{FacileDataRepository}}
11 | 
12 | \item{...}{the NSE boolean filter criteria}
13 | }
14 | \value{
15 | a facile sample descriptor
16 | }
17 | \description{
18 | Use \code{...} as if this is a dplyr::filter call, and our
19 | sample_covariate_tbl was "wide".
20 | }
21 | \details{
22 | This is experimental, so each "term" in the filter criteria should be
23 | just one boolean operation. Multiple terms passed into \code{...} will be
24 | "AND"ed together.
25 | }
26 | \seealso{
27 | Other API: 
28 | \code{\link{fetch_assay_score.FacileDataSet}()},
29 | \code{\link{fetch_custom_sample_covariates.FacileDataSet}()},
30 | \code{\link{fetch_sample_covariates}()},
31 | \code{\link{fetch_sample_statistics.FacileDataSet}()},
32 | \code{\link{filter_features.FacileDataSet}()},
33 | \code{\link{filter_samples.FacileDataSet}()},
34 | \code{\link{organism.FacileDataSet}()},
35 | \code{\link{samples.FacileDataSet}()}
36 | }
37 | \concept{API}
38 | 


--------------------------------------------------------------------------------
/man/fetch_custom_sample_covariates.FacileDataSet.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sample-covariates.R
 3 | \name{fetch_custom_sample_covariates.FacileDataSet}
 4 | \alias{fetch_custom_sample_covariates.FacileDataSet}
 5 | \title{Fetches custom (user) annotations for a given user prefix}
 6 | \usage{
 7 | \method{fetch_custom_sample_covariates}{FacileDataSet}(
 8 |   x,
 9 |   samples = NULL,
10 |   covariates = NULL,
11 |   custom_key = Sys.getenv("USER"),
12 |   with_source = FALSE,
13 |   file.prefix = "facile",
14 |   ...
15 | )
16 | }
17 | \arguments{
18 | \item{samples}{the facile sample descriptor}
19 | 
20 | \item{custom_key}{The key to use for the custom annotation}
21 | 
22 | \item{fds}{The \code{FacileDataSet}}
23 | }
24 | \value{
25 | covariate tbl
26 | }
27 | \description{
28 | Fetches custom (user) annotations for a given user prefix
29 | }
30 | \seealso{
31 | Other API: 
32 | \code{\link{fetch_assay_score.FacileDataSet}()},
33 | \code{\link{fetch_sample_covariates}()},
34 | \code{\link{fetch_sample_statistics.FacileDataSet}()},
35 | \code{\link{fetch_samples.FacileDataSet}()},
36 | \code{\link{filter_features.FacileDataSet}()},
37 | \code{\link{filter_samples.FacileDataSet}()},
38 | \code{\link{organism.FacileDataSet}()},
39 | \code{\link{samples.FacileDataSet}()}
40 | }
41 | \concept{API}
42 | 


--------------------------------------------------------------------------------
/man/infer_feature_type.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/feature-types.R
 3 | \name{infer_feature_type}
 4 | \alias{infer_feature_type}
 5 | \title{Guesses the type of feature identifiers from a character vector.}
 6 | \usage{
 7 | infer_feature_type(x, with_organism = FALSE, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{a character vector of ids}
11 | }
12 | \value{
13 | data.frame with \code{id} (\code{x}) and \code{id_type}. If \code{with_organism = TRUE},
14 | a third \code{organism} column is added with a guess for the organism.
15 | }
16 | \description{
17 | We rely on meta-information about our data types than "usual", and its useful
18 | to know what types of identifiers we are using for different assay. This
19 | function tries to guess whether an identifier is an ensembl gene identifier,
20 | entrez id, etc.
21 | }
22 | \details{
23 | A two-column data.frame is returned for id_type and organism. Organism
24 | is "unknown" for identifiers where there this can't be inferred (like Refseq).
25 | 
26 | If an identifier matches more than one id_type, the id_type is set to
27 | \code{"ambiguous"}. If the identifier doesn't match any guesses, then \code{"unknown"}.
28 | }
29 | \examples{
30 | fids <- c("NC_000023", "ENSG00000101811", "ENSMUSG00000030088.2", "85007")
31 | infer_feature_type(fids)
32 | }
33 | 


--------------------------------------------------------------------------------
/man/check_categorical.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/validation.R
 3 | \name{check_categorical}
 4 | \alias{check_categorical}
 5 | \alias{assert_categorical}
 6 | \alias{test_categorical}
 7 | \title{Check to see if a vector is categorical (character or string)}
 8 | \usage{
 9 | check_categorical(
10 |   x,
11 |   any.missing = TRUE,
12 |   all.missing = TRUE,
13 |   len = NULL,
14 |   min.len = NULL,
15 |   max.len = NULL,
16 |   ...
17 | )
18 | 
19 | assert_categorical(
20 |   x,
21 |   any.missing = TRUE,
22 |   all.missing = TRUE,
23 |   len = NULL,
24 |   min.len = NULL,
25 |   max.len = NULL,
26 |   ...,
27 |   .var.name = vname(x),
28 |   add = NULL
29 | )
30 | 
31 | test_categorical(x, ...)
32 | }
33 | \arguments{
34 | \item{x}{a vector of things}
35 | 
36 | \item{any.missing}{are vectors with missing values allowed? Default is \code{TRUE}}
37 | 
38 | \item{all.missing}{are vectors with missing values allowed? Default is \code{TRUE}}
39 | 
40 | \item{len}{expected length of \code{x}. If provided, overrides \code{min.len} and
41 | \code{max.len}. Defaults to \code{NULL}.}
42 | 
43 | \item{min.len}{minimum length for \code{x}}
44 | 
45 | \item{max.len}{maximum length for \code{x}}
46 | 
47 | \item{...}{dots}
48 | }
49 | \description{
50 | Check to see if a vector is categorical (character or string)
51 | }
52 | 


--------------------------------------------------------------------------------
/man/eav_metadata_merge.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/entity-attribute-value.R
 3 | \name{eav_metadata_merge}
 4 | \alias{eav_metadata_merge}
 5 | \title{Merge inferred and explicit covariate column metadata.}
 6 | \usage{
 7 | eav_metadata_merge(default_def, covariate_def = list())
 8 | }
 9 | \arguments{
10 | \item{default_def}{A list of covariate-definition-lists, as would be returned
11 | from \code{\link[=eav_metadata_create]{eav_metadata_create()}}}
12 | 
13 | \item{covariate_def}{list of additional covariate info, such as 'label'. The
14 | variables defined here (defined by \code{names(covarate_def)}) need not be
15 | identical to \code{names(defeault_def)}.}
16 | }
17 | \value{
18 | list of column metadata lists
19 | }
20 | \description{
21 | Takes a list of (perhaps) default sets of entity-attribute metadata, as would
22 | be generated from \code{eav_metadata_create(pData(eSet), covariate_def = NULL)},
23 | and pulls out the sister custom-definitions from the \code{covariate_def}
24 | attribute definition list.
25 | }
26 | \details{
27 | If the \code{covariate_def} list-of-lists has information for variables not
28 | found in \code{default_def}, ie. the definitions returned from
29 | \code{setdiff(names(covariate_def), names(default_def))} will be added to
30 | the object returned from this funciton.
31 | }
32 | 


--------------------------------------------------------------------------------
/man/facet_frame.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/FacileDataSet.R, R/api.R
 3 | \name{facet_frame.FacileDataSet}
 4 | \alias{facet_frame.FacileDataSet}
 5 | \alias{facet_frame}
 6 | \title{Retrieves grouping table for samples within a FacileDataSet.}
 7 | \usage{
 8 | \method{facet_frame}{FacileDataSet}(x, name = "default", ...)
 9 | 
10 | facet_frame(x, name = "default", ...)
11 | }
12 | \arguments{
13 | \item{x}{An object of a class implementing the FacileInterface}
14 | 
15 | \item{name}{The specific facet (grouping) definition to return. Note that
16 | this parameter isn't yet used. Only one facet table was originally
17 | defined for each FacileDataSet, but we want to enable different facet
18 | definitions to be used in the future.}
19 | }
20 | \value{
21 | A \code{tibble} that defines the \verb{dataset,sample_id} tuples that belong
22 | to each "facet" (group).
23 | }
24 | \description{
25 | It is natural to define subgroups of samples within larger datasets.
26 | This function returns grouping definitions (which we call "facets") for
27 | a \code{FacileDataStore}.
28 | }
29 | \seealso{
30 | Other FacileInterface: 
31 | \code{\link{fetch_assay_score}()},
32 | \code{\link{fetch_sample_covariates}()},
33 | \code{\link{fetch_sample_statistics}()},
34 | \code{\link{samples}()}
35 | }
36 | \concept{FacileInterface}
37 | 


--------------------------------------------------------------------------------
/tests/testthat/test-FacileDataSet.R:
--------------------------------------------------------------------------------
 1 | context("Basic FacileDataSet functions")
 2 | 
 3 | FDS <- exampleFacileDataSet()
 4 | 
 5 | test_that("Fetching various database tables from FacileDataSet", {
 6 |   sctable <- sample_covariate_tbl(FDS)
 7 |   expect_true(is(sctable, 'tbl'))
 8 | 
 9 |   sstable <- sample_stats_tbl(FDS)
10 |   expect_true(is(sstable, 'tbl'))
11 | 
12 |   gitable <- gene_info_tbl(FDS)
13 |   expect_true(is(gitable, 'tbl'))
14 | })
15 | 
16 | test_that("compound filter criteria == method chaining with filter_samples()", {
17 |   s1 <- FDS %>%
18 |     filter_samples(indication == "CRC", sex == "f")
19 |   s2 <- FDS %>%
20 |     filter_samples(indication == "CRC") %>%
21 |     filter_samples(sex == "f")
22 |   expect_setequal(s2$sample_id, s1$sample_id)
23 | })
24 | 
25 | test_that("filter_samples filters against dataset and sample_id columns", {
26 |   all.samples <- FDS %>%
27 |     samples() %>%
28 |     with_sample_covariates()
29 | 
30 |   blca.f <- filter_samples(FDS, dataset == "BLCA", sex == "f")
31 |   blca.e <- filter(all.samples, dataset == "BLCA", sex == "f")
32 |   expect_setequal(blca.f$sample_id, blca.e$sample_id)
33 | 
34 |   some.ids <- sample(all.samples$sample_id, 5)
35 |   some.f <- filter_samples(FDS, sample_id %in% some.ids)
36 |   some.e <- filter(all.samples, sample_id %in% some.ids)
37 |   expect_setequal(some.f$sample_id, some.e$sample_id)
38 | })
39 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     branches:
 4 |       - main
 5 |     paths:
 6 |       - 'R/**'
 7 |       - 'DESCRIPTION'
 8 |       - 'NAMESPACE'
 9 |       - 'src/**'
10 |       - 'tests/**'
11 |   pull_request:
12 |   schedule:
13 |     # Rerun checks daily, after facilebio/facilebio_base_extra is rebuilt.
14 |     # This will catch failures due to changes in functionality of packages
15 |     # we depend on.
16 |     # facilebio_base_extra builds every day 4AM pacific time (11 AM UTC), so
17 |     # we will build this an hour after that (the same time facilebio/facilebio)
18 |     - cron: '0 12 * * *'
19 | 
20 | name: R-CMD-check
21 | 
22 | jobs:
23 |   R-CMD-check:
24 |     runs-on: ubuntu-latest
25 |     container: facilebio/facilebio
26 |     env:
27 |       ACTIONS_ALLOW_UNSECURE_COMMANDS: true
28 |       R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
29 | 
30 |     steps:
31 |       - name: Checkout Repository
32 |         uses: actions/checkout@v2
33 | 
34 |       - name: Setup R
35 |         uses: r-lib/actions/setup-r@v1
36 |         with:
37 |           install-r: false
38 | 
39 |       - name: Install dependencies
40 |         run: remotes::install_deps(dependencies = TRUE, upgrade = FALSE)
41 |         shell: Rscript {0}
42 | 
43 |       - name: Check
44 |         run: rcmdcheck::rcmdcheck(args = "--no-manual", error_on = "error")
45 |         shell: Rscript {0}
46 | 


--------------------------------------------------------------------------------
/man/spread_assay_data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/assay-data.R
 3 | \name{spread_assay_data}
 4 | \alias{spread_assay_data}
 5 | \title{Takes a result from fetch_expression and spreads out genes across columns}
 6 | \usage{
 7 | spread_assay_data(
 8 |   x,
 9 |   assay_name,
10 |   key = c("name", "feature_id"),
11 |   value = c("cpm", "value", "count"),
12 |   .fds = fds(x)
13 | )
14 | }
15 | \arguments{
16 | \item{x}{facile expression result from \code{fetch_expression}}
17 | 
18 | \item{key}{the column from the long-form \code{fetch_expression} table
19 | to put in the columns of the outgoing data.frame that the values are
20 | "spread into"}
21 | 
22 | \item{value}{the value column to spread into the \code{key} columns}
23 | 
24 | \item{.fds}{the \code{FacileDataSet}}
25 | }
26 | \value{
27 | a more stout \code{x} with the expression values spread across
28 | columns.
29 | }
30 | \description{
31 | This is a convenience function, and will try to guess what you mean if you
32 | don't explicitly specify which columns to spread and what to call them.
33 | With that mind set, if we find a cpm and symbol column, we will use them
34 | because those are the thing you will likely want to use for exploratory
35 | data analysis if they're in the incoming dataset. If those columns aren't
36 | found, then we'll pick the feature_id and count column.
37 | }
38 | 


--------------------------------------------------------------------------------
/man/normalize_assay_data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/normalize_assay_data.R
 3 | \name{normalize_assay_data}
 4 | \alias{normalize_assay_data}
 5 | \title{Helper functions to normalize assay data into log2 space.}
 6 | \usage{
 7 | normalize_assay_data(
 8 |   x,
 9 |   features,
10 |   samples,
11 |   batch = NULL,
12 |   log = TRUE,
13 |   prior.count = 0.1,
14 |   main = NULL,
15 |   verbose = FALSE,
16 |   ...,
17 |   .fds = NULL
18 | )
19 | }
20 | \arguments{
21 | \item{x}{A matrix of raw/unnormalized assay data retrieved from
22 | within the \code{fetch_assay_data()} itself.}
23 | 
24 | \item{features}{a feature descriptor data.frame that includes the
25 | feature_id's of the rows in \code{x}, as well as the assay name/type they
26 | were pulled from. We assert that all features come from the same assay
27 | type, and the rows here match 1:1 the rows in \code{x}.}
28 | 
29 | \item{samples}{a sample descriptor for the columns in \code{x}. Rows here
30 | should match columns in \code{x} 1:1.}
31 | 
32 | \item{batch, main}{paramters sent to \code{\link[=remove_batch_effects]{remove_batch_effects()}} after}
33 | }
34 | \description{
35 | This is defined for the assay_types defined within this package. If you are
36 | writing a package to handle new types of data, you need to define a
37 | \code{normalize_assay_matrix.ASSAY_TYPE} function. This is experimental.
38 | }
39 | 


--------------------------------------------------------------------------------
/R/test-helpers.R:
--------------------------------------------------------------------------------
 1 | #' Retrieves an example FacileDataSet
 2 | #'
 3 | #' A subset of the TCGA data from the BLCA and COAD indications is provided
 4 | #' as a FacileDataSet.
 5 | #'
 6 | #' @export
 7 | exampleFacileDataSet <- function() {
 8 |   fn <- system.file('extdata', 'exampleFacileDataSet', package='FacileData')
 9 |   FacileDataSet(fn)
10 | }
11 | 
12 | #' Fetches exemplar data for unit testing
13 | #'
14 | #' @export
15 | #' @rdname test-helpers
16 | example_sample_covariates <- function() {
17 |   pdat <- system.file("testdata", "test-sample-covariates.rds",
18 |                       package = "FacileData")
19 |   readRDS(pdat)
20 | }
21 | 
22 | #' @export
23 | #' @rdname test-helpers
24 | #' @param file.path If `TRUE`, returns the path to the yaml file, otherwise
25 | #'   returns the list-of-list meta definition.
26 | #' @return Either the list-of-list meta definition, or path to the `meta.yaml`
27 | #'   file where these are defined.
28 | example_meta <- function(file.path=FALSE) {
29 |   out <- system.file("testdata", "expected-meta.yaml",
30 |                      package = "FacileData")
31 |   if (!isTRUE(file.path)) {
32 |     out <- yaml::read_yaml(out)
33 |   }
34 |   out
35 | }
36 | 
37 | #' @export
38 | #' @importFrom yaml read_yaml
39 | #' @rdname test-helpers
40 | #' @return the list-of-list definitions for the example `pData` returned from
41 | #'   [example_sample_covariates()]
42 | example_sample_covariate_definitions <- function() {
43 |   out <- example_meta(file.path=FALSE)
44 |   out$sample_covariate
45 | }
46 | 


--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     branches: main
 4 |     paths:
 5 |       - 'vignettes/**'
 6 |       - 'man/**'
 7 |       - 'README.**'
 8 |   schedule:
 9 |   # Rerun pkgdown after the (expected) time package is rebuilt on the daily.
10 |   # Let's give it 30 mins
11 |   - cron: '30 12 * * *'
12 | 
13 | name: pkgdown
14 | 
15 | jobs:
16 |   pkgdown:
17 |     runs-on: ubuntu-latest
18 |     container: facilebio/facilebio
19 |     env:
20 |       ACTIONS_ALLOW_UNSECURE_COMMANDS: true
21 |       R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
22 |       NOT_CRAN: true
23 |       CI: true
24 |       # For some reason I need to put these
25 |       GIT_AUTHOR_NAME: Steve Lianoglou
26 |       GIT_AUTHOR_EMAIL: slianoglou@gmail.com
27 |       GIT_COMMITTER_NAME: Steve Lianoglou
28 |       GIT_COMMITTER_EMAIL: slianoglou@gmail.com
29 | 
30 |     steps:
31 |       - name: Checkout Repository
32 |         uses: actions/checkout@v2
33 | 
34 |       - name: Setup R
35 |         uses: r-lib/actions/setup-r@v1
36 |         with:
37 |           install-r: false
38 | 
39 |       # pandoc is already installed in the docker container
40 |       # - uses: r-lib/actions/setup-pandoc@master
41 | 
42 |       - name: Install dependencies
43 |         run: remotes::install_deps(dependencies = TRUE, upgrade = FALSE)
44 |         shell: Rscript {0}
45 | 
46 |       - name: Install package
47 |         run: R CMD INSTALL .
48 | 
49 |       - name: Deploy package
50 |         run: pkgdown::deploy_to_branch(new_process = FALSE)
51 |         shell: Rscript {0}
52 | 


--------------------------------------------------------------------------------
/man/meta-info.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/FacileDataSet.R
 3 | \name{meta_file}
 4 | \alias{meta_file}
 5 | \alias{meta_info}
 6 | \alias{default_assay.FacileDataSet}
 7 | \alias{dataset_definitions}
 8 | \title{Path to the meta information YAML file}
 9 | \usage{
10 | meta_file(x)
11 | 
12 | meta_info(x, fn = meta_file(x))
13 | 
14 | \method{default_assay}{FacileDataSet}(x)
15 | 
16 | dataset_definitions(x, as.list = TRUE)
17 | }
18 | \arguments{
19 | \item{x}{A \code{FacileDataSet}}
20 | 
21 | \item{fn}{The path to the \code{meta.yaml} file.}
22 | 
23 | \item{as.list}{boolean, if \code{FALSE} (default) returns a list, otherwise
24 | summarizes results into a tibble.}
25 | }
26 | \value{
27 | The \code{meta.yaml} file parsed into a list-of-lists representation
28 | 
29 | meta information about the datasets in \code{x} as a \code{list} or \code{tibble}
30 | }
31 | \description{
32 | Lots of useful information is stored in a \code{FacileDataSet}'s \code{meta.yaml} file.
33 | This function returns all of that in a list-of-lists
34 | 
35 | A \code{FacileDataSet} can contain assay data from different "datasets" (such
36 | as different cancer indications from the TCGA). This functions returns
37 | description and URL information that describes these datasets in more detail,
38 | which is specified in the FacileDataSets \code{meta.yaml} file.
39 | }
40 | \seealso{
41 | Other FacileDataSet: 
42 | \code{\link{FacileDataSet}()},
43 | \code{\link{dbfn}()},
44 | \code{\link{hdf5fn}()}
45 | }
46 | \concept{FacileDataSet}
47 | 


--------------------------------------------------------------------------------
/man/FacileData-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/package.R
 3 | \docType{package}
 4 | \name{FacileData-package}
 5 | \alias{FacileData}
 6 | \alias{FacileData-package}
 7 | \title{FacileData: A fluent API for accessing multi-assay high-throughput genomics data}
 8 | \description{
 9 | Defines the "FacileData" API over multi-assay, high-throughput
10 |   genomics data. The FacileData API is a fluent tidy-like grammar that
11 |   facilitates exploratory data analysis. This package also defines a
12 |   "FacileDataSet" class, which is a reference implementation of the "FacileData"
13 |   API that uses SQLite and HDF5 files to store arbitrarily large datasets and
14 |   provide fast and efficient access to arbitrary subsets of these data without
15 |   loading it all into memory.
16 | }
17 | \seealso{
18 | Useful links:
19 | \itemize{
20 |   \item \url{https://github.com/facilebio/FacileData}
21 |   \item Report bugs at \url{https://github.com/facilebio/FacileData/issues}
22 | }
23 | 
24 | }
25 | \author{
26 | \strong{Maintainer}: Steve Lianoglou \email{lianoglou@dnli.com} (\href{https://orcid.org/0000-0002-0924-1754}{ORCID})
27 | 
28 | Authors:
29 | \itemize{
30 |   \item Vincent Rouilly \email{rouilly.vincent@gene.com}
31 |   \item Peter Haverty (@phaverty on github)
32 | }
33 | 
34 | Other contributors:
35 | \itemize{
36 |   \item Jonathan Carrol \email{jono@jcarroll.com.au} [contributor]
37 |   \item Denali Therapeutics (Coypright 2019) [copyright holder, funder]
38 |   \item Genentech (Coypright 2016 - 2018) [copyright holder, funder]
39 | }
40 | 
41 | }
42 | 


--------------------------------------------------------------------------------
/man/fetch_sample_statistics.FacileDataSet.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sample-info.R
 3 | \name{fetch_sample_statistics.FacileDataSet}
 4 | \alias{fetch_sample_statistics.FacileDataSet}
 5 | \title{Fetch the sample statistics for sets of samples in the warehouse}
 6 | \usage{
 7 | \method{fetch_sample_statistics}{FacileDataSet}(
 8 |   x,
 9 |   samples = NULL,
10 |   semi = TRUE,
11 |   assay_name = default_assay(x),
12 |   ...
13 | )
14 | }
15 | \arguments{
16 | \item{x}{A \code{FacileDataSet} object}
17 | 
18 | \item{samples}{a data.frame or tbl_sqlite that has dataset and sample_id
19 | columns}
20 | 
21 | \item{semi}{use \code{semi_join}? I've found this to be slow sometimes in
22 | SQLite for some reason}
23 | 
24 | \item{assay_name}{parameter added to keep old API same with new "unhinged"
25 | FacileDataSets.}
26 | }
27 | \value{
28 | a tbl_df or tbl_sqlite result from the sample_stats table
29 | }
30 | \description{
31 | NOTE: this function needs the axe. It has been changed to use the
32 | assay_sample_info_table, but the way we handle this with the new unhinged
33 | assay needs to change.
34 | }
35 | \seealso{
36 | Other API: 
37 | \code{\link{fetch_assay_score.FacileDataSet}()},
38 | \code{\link{fetch_custom_sample_covariates.FacileDataSet}()},
39 | \code{\link{fetch_sample_covariates}()},
40 | \code{\link{fetch_samples.FacileDataSet}()},
41 | \code{\link{filter_features.FacileDataSet}()},
42 | \code{\link{filter_samples.FacileDataSet}()},
43 | \code{\link{organism.FacileDataSet}()},
44 | \code{\link{samples.FacileDataSet}()}
45 | }
46 | \concept{API}
47 | 


--------------------------------------------------------------------------------
/inst/extdata/test/sample-meta-definitions.yaml:
--------------------------------------------------------------------------------
 1 | sex:
 2 |   class: clinical
 3 |   type: categorical
 4 |   levels: ['m', 'f']
 5 | stage:
 6 |   class: clinical
 7 |   type: categorical
 8 | sample_type:
 9 |   class: clinical
10 |   type: categorical
11 |   levels: ['normal', 'tumor']
12 | indication:
13 |   class: tumor_classification
14 |   type: categorical
15 | subtype_expression:
16 |   class: tumor_classification
17 |   type: categorical
18 | subtype_breast_receptor_status:
19 |   class: tumor_classification
20 |   type: categorical
21 | subtype_microsatellite_instability:
22 |   class: tumor_classification
23 |   type: categorical
24 | subtype_crc_cms:
25 |   class: tumor_classification
26 |   type: categorical
27 | is_primary:
28 |   class: tumor_classification
29 |   type: categorical
30 | histology:
31 |   class: tumor_classification
32 |   type: categorical
33 | location:
34 |   class: tumor_classification
35 |   type: categorical
36 | cohort:
37 |   class: clinical
38 |   type: categorical
39 | smoking_status:
40 |   class: clinical
41 |   type: categorical
42 | has_lymph_met:
43 |   class: clinical
44 |   type: categorical
45 | has_liver_met:
46 |   class: clinical
47 |   type: categorical
48 | has_bone_met:
49 |   class: clinical
50 |   type: categorical
51 | PFS:
52 |   class: response
53 |   type: right_censored
54 | OS:
55 |   class: response
56 |   type: right_censored
57 | BCOR:
58 |   class: response
59 |   type: categorical
60 |   levels: ["CR", "PR", "SD", "PD", "NE"]
61 | IC:
62 |   class: IHC
63 |   type: categorical
64 |   levels: ["IC0", "IC1", "IC2", "IC3"]
65 | TC:
66 |   class: IHC
67 |   type: categorical
68 |   levels: ["TC0", "TC1", "TC2", "TC3"]
69 | 


--------------------------------------------------------------------------------
/man/summary.eav_covariates.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sample-covariates.R
 3 | \name{summary.eav_covariates}
 4 | \alias{summary.eav_covariates}
 5 | \title{Provides a summary table of sample covariates.}
 6 | \usage{
 7 | \method{summary}{eav_covariates}(object, expanded = FALSE, droplevels = TRUE, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{A sample covariate table, the likes returned from
11 | \code{\link[=fetch_sample_covariates]{fetch_sample_covariates()}}.}
12 | 
13 | \item{expanded}{includes details (rows) for each covariate per level
14 | (or quantile), depending on the covariates \code{"class"} attribute.}
15 | }
16 | \value{
17 | a tibble of summary sample covariate information with the following
18 | columns:
19 | \itemize{
20 | \item \code{variable}: name of the variable
21 | \item \code{class}: class of variable (real, categorical)
22 | \item \code{nsamples}: the number of samples that have this variable defined
23 | \item \code{level}: the level (or quantile) of the covariate
24 | (included only when \code{expanded == TRUE})
25 | \item \code{ninlevel}: the number of samples with this covariate value
26 | (included only when \code{expanded == TRUE})
27 | }
28 | }
29 | \description{
30 | Sumamrizes a set of sample covariates (returned from
31 | \code{\link[=fetch_sample_covariates]{fetch_sample_covariates()}} at different granulaities.
32 | }
33 | \examples{
34 | fds <- exampleFacileDataSet()
35 | covs <- fetch_sample_covariates(fds)
36 | smry <- summary(covs)
37 | details <- summary(covs, expanded = TRUE)
38 | catdeetz <- covs \%>\%
39 |   filter(class == "categorical") \%>\%
40 |   summary(expanded = TRUE)
41 | }
42 | 


--------------------------------------------------------------------------------
/man/cast_covariate.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/entity-attribute-value.R
 3 | \name{cast_covariate}
 4 | \alias{cast_covariate}
 5 | \title{Casts the character EAV values to their R-native defined types.}
 6 | \usage{
 7 | cast_covariate(covariate, values, cov.def, .fds)
 8 | }
 9 | \arguments{
10 | \item{covariate}{the name of the covariate}
11 | 
12 | \item{values}{the covariate values (which is a \code{character}) as it is
13 | pulled from the database.}
14 | 
15 | \item{cov.def}{the un-yamled covariate definitions, if missing we rely on
16 | pulling this out from the \code{FacileDataSet} object \code{.fds}}
17 | 
18 | \item{.fds}{If \code{missing(cov.def)}, this is the \code{FacileDataSet} to
19 | get the covariate definitions from.}
20 | }
21 | \value{
22 | values cast to appropriate type if a valid definition was found for
23 | \code{covariate}, otherwise values is returned "as is". Most of the time
24 | this is a single vector, but others it can be a data.frame (for
25 | \code{right_censored} data, for instance)
26 | }
27 | \description{
28 | For most things, a single value will be returned from each cast, but in the
29 | case of "time_to_event" data, the value is expended to a two column
30 | data.frame with a \verb{tte_<covariate>} column for time to event, and an
31 | \verb{event_<covariate>} column to indicate event (1) or right censored (2).
32 | }
33 | \details{
34 | The mechanics of how values in the \code{sample_covariate} table are converted
35 | into R objects are handled by the information stored in the
36 | \code{FacileDataSets}'s \code{meta.yaml} file.
37 | }
38 | \seealso{
39 | \code{\link[=covariate_meta_info]{covariate_meta_info()}}, \code{\link[=covariate_definitions]{covariate_definitions()}}
40 | }
41 | 


--------------------------------------------------------------------------------
/R/sample-info.R:
--------------------------------------------------------------------------------
 1 | #' Fetch the sample statistics for sets of samples in the warehouse
 2 | #'
 3 | #' NOTE: this function needs the axe. It has been changed to use the
 4 | #' assay_sample_info_table, but the way we handle this with the new unhinged
 5 | #' assay needs to change.
 6 | #'
 7 | #' @export
 8 | #' @param x A \code{FacileDataSet} object
 9 | #' @param samples a data.frame or tbl_sqlite that has dataset and sample_id
10 | #'   columns
11 | #' @param semi use \code{semi_join}? I've found this to be slow sometimes in
12 | #'   SQLite for some reason
13 | #' @param assay_name parameter added to keep old API same with new "unhinged"
14 | #'   FacileDataSets.
15 | #' @return a tbl_df or tbl_sqlite result from the sample_stats table
16 | #' @family API
17 | fetch_sample_statistics.FacileDataSet <- function(x, samples = NULL,
18 |                                                   semi = TRUE,
19 |                                                   assay_name = default_assay(x),
20 |                                                   ...) {
21 |   warning("`fetch_sample_statistics` will be removed from FacileData API\n,",
22 |           "See Issue #2\n",
23 |           "https://github.com/denalitherapeutics/FacileData/issues/2",
24 |           immediate. = TRUE)
25 | 
26 |   assert_string(assay_name)
27 |   stopifnot(assay_name %in% assay_names(x))
28 | 
29 |   ss <- assay_sample_info_tbl(x) %>%
30 |     filter(assay == assay_name) %>%
31 |     set_fds(x)
32 | 
33 |   if (is.null(samples)) {
34 |     out <- ss
35 |   } else {
36 |     # TODO: Need to write unit tests here to exercise what we want to do with
37 |     #       these results when samples are provided
38 |     samples <- assert_sample_subset(samples)
39 |     out <- join_samples(ss, samples, semi)
40 |   }
41 | 
42 |   set_fds(out, x)
43 | }
44 | 


--------------------------------------------------------------------------------
/man/biocbox.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/api.R, R/biocbox.R
 3 | \name{biocbox}
 4 | \alias{biocbox}
 5 | \alias{biocbox.facile_frame}
 6 | \title{Materialize a Bioconductor assay container from some facile object.}
 7 | \usage{
 8 | biocbox(x, ...)
 9 | 
10 | \method{biocbox}{facile_frame}(
11 |   x,
12 |   class = NULL,
13 |   assay_name = NULL,
14 |   features = NULL,
15 |   sample_covariates = NULL,
16 |   feature_covariates = NULL,
17 |   normalized = FALSE,
18 |   with_fds = FALSE,
19 |   custom_key = Sys.getenv("USER"),
20 |   ...
21 | )
22 | }
23 | \arguments{
24 | \item{x}{A facile object}
25 | 
26 | \item{sample_covariates}{If \code{NULL} (default), all sample covariates will
27 | be included over samples in x. If a data.frame, we will treat the
28 | extra columns as custom covariates, and include them in the outgoing
29 | box, along with the internal ones.}
30 | }
31 | \description{
32 | Most often, this will be from some facile_frame to create a Bioconductor
33 | assay container object, but this function can be overloaded for other
34 | purposes.
35 | }
36 | \details{
37 | The FacileAnalysis package, for example, uses this function to materialize
38 | bioconductor objects of different flavors from different analysis results,
39 | ie. a DGEList, or perhaps a limma fit object, etc.
40 | }
41 | \section{facile_frame}{
42 | 
43 | We can materialize a Bioconductor data container for a given assay over a set
44 | of samples.
45 | 
46 | There is a default bioc class provided for different assay types, however
47 | the class type can be overrided by the \code{class} parameter. This function
48 | simply puts the assay data requested into the container. There is no
49 | sepcial functionality that happens downstream of that (for instance,
50 | DGEList lib.size calculated from the data that made its way into the DGEList)
51 | }
52 | 
53 | 


--------------------------------------------------------------------------------
/tests/testthat/test-replace_na.R:
--------------------------------------------------------------------------------
 1 | context("freplace_na")
 2 | 
 3 | .def.categorical <- FacileData:::defaults.freplace_na$categorical
 4 | 
 5 | test_that("freplace_na handles factors", {
 6 |   data <- data.frame(
 7 |     a = rnorm(10),
 8 |     b = letters[1:10],
 9 |     c = factor(LETTERS[1:10]))
10 |   data[3, 2:3] <- NA
11 | 
12 |   r1 <- freplace_na(data)
13 |   expect_true(all(complete.cases(r1)))
14 |   checkmate::expect_factor(
15 |     r1$c,
16 |     levels = c(head(LETTERS, nrow(data)), .def.categorical))
17 | })
18 | 
19 | test_that("freplace_na errors on numerics unless given explicit replacement", {
20 |   data <- data.frame(
21 |     a = rnorm(10),
22 |     b = letters[1:10],
23 |     c = factor(LETTERS[1:10]))
24 |   data[3, ] <- NA
25 |   expect_error(freplace_na(data), "numerics.*number")
26 | 
27 |   r <- freplace_na(data, defaults = list(numeric = -1))
28 |   expect_equal(r$a[3], -1)
29 |   expect_equal(r$b[3], .def.categorical)
30 |   expect_equal(as.character(r$c[3]), .def.categorical)
31 | })
32 | 
33 | test_that("freplace_na handles custom values per column", {
34 |   data <- data.frame(
35 |     a = rnorm(10),
36 |     b = letters[1:10],
37 |     c = factor(LETTERS[1:10]))
38 |   data[3, 2:3] <- NA
39 | 
40 |   r <- freplace_na(data, list(b = "bee"))
41 |   expect_equal(r$b[3], "bee")
42 |   expect_equal(as.character(r$c[3]), .def.categorical)
43 | })
44 | 
45 | test_that("freplace_na ignores specified columns", {
46 |   data <- data.frame(
47 |     a = rnorm(10),
48 |     b = letters[1:10],
49 |     c = factor(LETTERS[1:10]))
50 |   data[3, ] <- NA
51 | 
52 |   # Since a is numeric and has NA, this should error, but we explicitly ask to
53 |   # skip the numeric column
54 |   r <- freplace_na(data, list(b = "bee"), ignore = "a")
55 |   checkmate::expect_scalar_na(r$a[3])
56 |   expect_equal(r$b[3], "bee")
57 |   expect_equal(as.character(r$c[3]), .def.categorical)
58 | })
59 | 


--------------------------------------------------------------------------------
/man/freplace_na.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/replace_na.R
 3 | \name{freplace_na}
 4 | \alias{freplace_na}
 5 | \title{Replaces NA's with specified values.}
 6 | \usage{
 7 | freplace_na(
 8 |   data,
 9 |   replace = list(),
10 |   defaults = list(),
11 |   ignore = character(),
12 |   ...
13 | )
14 | }
15 | \arguments{
16 | \item{data}{the thing that has NA's in it (a data.frame or vector)}
17 | 
18 | \item{replace}{a named list of elements to use for custom replacement values}
19 | 
20 | \item{defaults}{if named elements in \code{data} do not appear in \code{replace}, you
21 | can provide default values for categories of parameters (ie.
22 | \code{"categorical"} or \code{"numeric"}), otherwise
23 | FacileData:::defaults.freplace_na will be used.}
24 | }
25 | \value{
26 | an NA-replaced version of \code{data}
27 | }
28 | \description{
29 | Some the downstream uses of a FacileDataStore can throw problems when NA's
30 | are found in data or covariates, so we often want to fill in NA's with
31 | non-NA markers of missing values. Note that unless specified otherwise
32 | (using the \code{replace} and \code{defaults} parameters),
33 | }
34 | \details{
35 | Depending on the atomic type of the thing that NA's are being replaced with,
36 | a default value is assumed. These can be overriden by using the \code{defaults}
37 | parameter, or specifically by column (or list) names via the \code{replace}
38 | parameter.
39 | 
40 | Missing values (NA's) come up often in FacileDataStores since we often use
41 | them to include data from multiple datasets, which induces "ragged" (sparse)
42 | covariate (pData) entries. In man
43 | }
44 | \examples{
45 | data <- data.frame(
46 |   a = rnorm(10),
47 |   b = letters[1:10],
48 |   c = factor(LETTERS[1:10]))
49 | data[3, ] <- NA
50 | r1 <- freplace_na(data, list(b = "bee"), ignore = "a")
51 | r2 <- freplace_na(data, list(b = "bee"), defaults = list(numeric = -Inf))
52 | }
53 | 


--------------------------------------------------------------------------------
/man/simple-eav-decode-functions.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/entity-attribute-value.R
 3 | \name{eav_decode_real}
 4 | \alias{eav_decode_real}
 5 | \alias{eav_encode_real}
 6 | \alias{eav_encode_logical}
 7 | \alias{eav_decode_logical}
 8 | \alias{eav_encode_cSurv}
 9 | \alias{eav_decode_cSurv}
10 | \alias{eav_decode_categorical}
11 | \alias{eav_encode_categorical}
12 | \title{Entity-attribute-value decoding for real values.}
13 | \usage{
14 | eav_decode_real(x, attrname = character(), def = list(), ...)
15 | 
16 | eav_encode_real(x, ...)
17 | 
18 | eav_encode_logical(x, ...)
19 | 
20 | eav_decode_logical(x, attrname = character(), def = list(), ...)
21 | 
22 | eav_encode_cSurv(x, ...)
23 | 
24 | eav_decode_cSurv(x, attrname = character(), def = list(), ...)
25 | 
26 | eav_decode_categorical(
27 |   x,
28 |   attrname = character(),
29 |   def = list(),
30 |   droplevels = TRUE,
31 |   ...
32 | )
33 | 
34 | eav_encode_categorical(x, ...)
35 | }
36 | \arguments{
37 | \item{x}{the values column from the \code{EAV} table for this covariate}
38 | 
39 | \item{attrname}{the name of "attribute" (covariate) in the EAV table.}
40 | 
41 | \item{def}{the \code{covariate_definition} list for this covariate}
42 | }
43 | \value{
44 | a \code{numeric} vector of \code{length(x)}
45 | }
46 | \description{
47 | This is a simple function to handle converting numeric values in the EAV
48 | table to numeric data in R.
49 | 
50 | This is essentially a pass through-function for categorical/character
51 | values in the EAV table. If the \code{def} list contains a \code{levels} entry, then
52 | the returned value is converted to a factor, with the levels in the order
53 | as defined in \code{def$levels}. If more levels appear in \code{x} than exist in
54 | \code{def$levels} they are appended to the end of the factor levels in
55 | alphabetical order. If more levels are defined in \code{def$levels} than appear
56 | in \code{x}, they are by default dropped, set \code{droplevels = FALSE} to keep them.
57 | }
58 | 


--------------------------------------------------------------------------------
/inst/testdata/generate-TCGA-tesdata.R:
--------------------------------------------------------------------------------
 1 | # Using FacileTCGADataSet to create some testdata
 2 | library(FacileTCGADataSet)
 3 | library(magrittr)
 4 | tcga <- FacileTCGADataSet()
 5 | 
 6 | ## let's get some 20 samples
 7 | set.seed(0xBEEF)
 8 | bsamples.all <- tcga %>%
 9 |   filter_samples(indication %in% c("BLCA", "BRCA")) %>%
10 |   with_sample_covariates %>%
11 |   filter(sample_type != 'tumor_metastatic')
12 | 
13 | bsamples <- bsamples.all %>%
14 |   group_by(dataset, sample_type) %>%
15 |   sample_n(5) %>%
16 |   ungroup %>%
17 |   set_fds(tcga)
18 | 
19 | # pData for testing entity-attribute-value encodings
20 | # Create a covariate pData object with non-default factor levels to test
21 | scovs <- bsamples %>%
22 |   mutate(stage = factor(stage),
23 |          sex = factor(sex, c("male", "female"))) %>%
24 |   select(dataset, sample_id, stage, sex, age, sample_type,
25 |          subtype_molecular_bladder, subtype_receptor_breast,
26 |          tte_OS, event_OS)
27 | 
28 | # Let's fill the categorical variables with all levels, even though our sampling
29 | # can't possibly do that.
30 | scovs %<>% mutate(stage = sub("[ab]$", "", stage))
31 | scovs %<>% mutate(stage = factor(stage, paste("stage", c("i", "ii", "iii", "iv"))))
32 | is.blca.tumor <- with(bsamples, dataset == "BLCA" & sample_type == "tumor")
33 | is.brca.tumor <- with(bsamples, dataset == "BRCA" & sample_type == "tumor")
34 | blca.sub.lvls <- c("luminal", "basal")
35 | brca.sub.lvls <- c("ER+/PR+", "Her2+", "TNBC")
36 | scovs %<>%
37 |   mutate(subtype_molecular_bladder = ifelse(
38 |     is.blca.tumor,
39 |     sample(blca.sub.lvls, sum(is.blca.tumor), replace = TRUE), NA))
40 | scovs %<>%
41 |   mutate(subtype_receptor_breast = ifelse(
42 |     is.brca.tumor,
43 |     sample(brca.sub.lvls, sum(is.brca.tumor), replace = TRUE), NA))
44 | scovs %<>%
45 |   mutate(
46 |     # keep subtype_molecular_bladder as just a character
47 |     # subtype_molecular_bladder = factor(subtype_molecular_bladder, blca.sub.lvls),
48 |     subtype_receptor_breast = factor(subtype_receptor_breast, brca.sub.lvls))
49 | saveRDS(scovs, "test-sample-covariates.rds")
50 | 
51 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: FacileData
 2 | Type: Package
 3 | Title: A fluent API for accessing multi-assay high-throughput genomics data
 4 | Version: 0.98.1
 5 | Authors@R: c(
 6 |     person("Steve", "Lianoglou", , "lianoglou@dnli.com", c("aut", "cre"),
 7 |       comment = c(ORCID = "0000-0002-0924-1754")),
 8 |   person("Vincent", "Rouilly", , "rouilly.vincent@gene.com", role = "aut"),
 9 |   person("Peter", "Haverty", , comment = "@phaverty on github", role = "aut"),
10 |   person("Jonathan", "Carrol", , "jono@jcarroll.com.au", role = "ctb"),
11 |   person("Denali Therapeutics", role = c("cph", "fnd"),
12 |          comment = "Coypright 2019"),
13 |   person("Genentech", role = c("cph", "fnd"),
14 |          comment = "Coypright 2016 - 2018"))
15 | Description: Defines the "FacileData" API over multi-assay, high-throughput
16 |   genomics data. The FacileData API is a fluent tidy-like grammar that
17 |   facilitates exploratory data analysis. This package also defines a
18 |   "FacileDataSet" class, which is a reference implementation of the "FacileData"
19 |   API that uses SQLite and HDF5 files to store arbitrarily large datasets and
20 |   provide fast and efficient access to arbitrary subsets of these data without
21 |   loading it all into memory.
22 | URL: https://github.com/facilebio/FacileData
23 | BugReports: https://github.com/facilebio/FacileData/issues
24 | Depends:
25 |   R (>= 3.6.0),
26 | Imports:
27 |   broom,
28 |   checkmate (>= 1.8.5),
29 |   crayon,
30 |   DBI,
31 |   data.table,
32 |   dbplyr (>= 1.4.0),
33 |   dplyr (>= 1.0.0),
34 |   glue,
35 |   edgeR,
36 |   jsonlite,
37 |   lazyeval,
38 |   limma,
39 |   methods,
40 |   sparrow,
41 |   reshape2,
42 |   rhdf5,
43 |   RSQLite,
44 |   survival,
45 |   utils,
46 |   yaml
47 | Suggests:
48 |   airway,
49 |   Biobase,
50 |   knitr,
51 |   parathyroidSE,
52 |   rmarkdown,
53 |   roxygen2 (>= 6.1.0),
54 |   stringr,
55 |   S4Vectors,
56 |   SummarizedExperiment,
57 |   testthat (>= 1.0.2),
58 |   tidyr
59 | Remotes:
60 |   lianos/sparrow
61 | biocViews: Infrastructure, DataRepresentation
62 | RoxygenNote: 7.1.1
63 | Roxygen: list(markdown = TRUE)
64 | License: Apache License (>= 2.0)
65 | Encoding: UTF-8
66 | 


--------------------------------------------------------------------------------
/man/facilitate.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/api.R
 3 | \name{facilitate}
 4 | \alias{facilitate}
 5 | \title{Converts an arbitrary object into one that works in the facile ecosystem.}
 6 | \usage{
 7 | facilitate(x, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{A non-facile object that we want to bring into the facile ecosystem}
11 | 
12 | \item{...}{we're going to need a lot of flexibility in the implementation of
13 | this function for different types of analyses}
14 | }
15 | \value{
16 | A facile-subclass of \code{x} that can take advantage of the interactive
17 | facile ecosystem.
18 | }
19 | \description{
20 | There will be many times when the particular analysis you want to conduct
21 | is not well supported in the facileverse. In this case, we will endeavor
22 | to implement ways for you to take these results and bring them back into
23 | the facile ecosystem so that you can benefit from the interactivity provided
24 | therein.
25 | }
26 | \details{
27 | We'll want to define \code{facilitate()} over a wide variety of objects. For
28 | instance:
29 | \itemize{
30 | \item \code{facilitate(a_DGElist)} would convert an \code{\link[edgeR:DGEList]{edgeR::DGEList()}} object into
31 | a \code{FacileDGEList}, which is just the same DGEList that implements the
32 | FacileData API. This is a work in progress and will be implemented in the
33 | FacileBioc package.
34 | \item You might perform a differential expression analysis using standard a
35 | standard limma pipeline, but you'll want to be able to drop this result
36 | into the facile ecosystem provided in the FacileAnalysis package.
37 | The particulars of this \code{faciltate()} implementation would be defined in
38 | the FacileAnalysis package, and migth look something like this:\preformatted{fit <- eBayes(lmFit(elist, design))
39 | limma.res <- topTable(fit, coef = "something", n = Inf)
40 | facile.res <- facilitate(elist, fit, limma.res)
41 | }
42 | }
43 | 
44 | It's not clear how well well we'll be able to do this, or if this is even
45 | the right way to do it, but we'll need to do something.
46 | }
47 | \seealso{
48 | https://github.com/facilebio/FacileBiocData
49 | }
50 | 


--------------------------------------------------------------------------------
/man/assertions.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/validation.R
 3 | \name{assert_sample_subset}
 4 | \alias{assert_sample_subset}
 5 | \alias{check_sample_subset}
 6 | \alias{test_sample_subset}
 7 | \alias{assert_facet_descriptor}
 8 | \alias{is_facet_descriptor}
 9 | \alias{assert_assay_feature_descriptor}
10 | \alias{is_assay_feature_descriptor}
11 | \alias{assert_expression_result}
12 | \alias{is_expression_result}
13 | \alias{assert_sample_statistics}
14 | \alias{is_sample_statistics}
15 | \alias{assert_sample_covariates}
16 | \alias{is_sample_covariates}
17 | \alias{assert_columns}
18 | \alias{has_columns}
19 | \alias{assert_covariate_definitions}
20 | \alias{is_covariate_definitions}
21 | \title{Check to see that samples are referenced correctly}
22 | \usage{
23 | assert_sample_subset(x, fds = NULL, ..., .var.name = vname(x), add = NULL)
24 | 
25 | check_sample_subset(x, fds = NULL, ...)
26 | 
27 | test_sample_subset(x, fds = NULL, ...)
28 | 
29 | assert_facet_descriptor(x)
30 | 
31 | is_facet_descriptor(x)
32 | 
33 | assert_assay_feature_descriptor(x, .fds = NULL)
34 | 
35 | is_assay_feature_descriptor(x, .fds = NULL)
36 | 
37 | assert_expression_result(x)
38 | 
39 | is_expression_result(x)
40 | 
41 | assert_sample_statistics(x)
42 | 
43 | is_sample_statistics(x)
44 | 
45 | assert_sample_covariates(x)
46 | 
47 | is_sample_covariates(x)
48 | 
49 | assert_columns(x, req.cols)
50 | 
51 | has_columns(x, req.cols, warn = TRUE)
52 | 
53 | assert_covariate_definitions(x, required = NULL)
54 | 
55 | is_covariate_definitions(x, required = NULL)
56 | }
57 | \description{
58 | Samples have compound keys: dataset,sample_id. If we want to index into
59 | them, we can either:
60 | }
61 | \details{
62 | \enumerate{
63 | \item pass a data.frame around with dataset and sample_id columns
64 | \item pass a "loaded up" tbl_sqlite" over the sample_covariate table which
65 | has your filters of interest set
66 | }
67 | }
68 | \section{assay_feature_descriptor}{
69 | 
70 | If .fds is provided, it must be a \code{FaclieDataSet} and these functions
71 | will check to ensure that the \code{x[['assay']]} is a valid assay element
72 | in \code{.fds}
73 | }
74 | 
75 | 


--------------------------------------------------------------------------------
/man/eav-right-censor.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/entity-attribute-value.R
 3 | \name{eav_encode_right_censored}
 4 | \alias{eav_encode_right_censored}
 5 | \alias{eav_decode_right_censored}
 6 | \title{Entity-attribute-value encodings for survival data.}
 7 | \usage{
 8 | eav_encode_right_censored(time, event, sas.encoding = FALSE, ...)
 9 | 
10 | eav_decode_right_censored(
11 |   x,
12 |   attrname = character(),
13 |   def = list(),
14 |   suffix = attrname,
15 |   sas.encoding = FALSE,
16 |   ...
17 | )
18 | }
19 | \arguments{
20 | \item{time}{\code{numeric} time to event}
21 | 
22 | \item{event}{0/1 vector encoded in the "R sense". "1" is an event, "0" is
23 | right censored.}
24 | 
25 | \item{sas.encoding}{Is the 'event' vector "SAS encoded"? In the SAS world,
26 | 1 means censored, and 0 is event. This is \code{FALSE} by default.}
27 | 
28 | \item{x}{the time to event}
29 | 
30 | \item{def}{the covariate definition for this variable}
31 | 
32 | \item{suffix}{adds \verb{_<suffix>} to the \code{tte} and \code{event} columns of the
33 | outgoing \code{data.frame}}
34 | }
35 | \value{
36 | returns a numeric vector that combines time-to-event and censoring
37 | info (sign of the value).
38 | 
39 | two column \code{data.frame} with \verb{tte(_SUFFIX)?} and \verb{event(_SUFFIX)?}
40 | columns.
41 | }
42 | \description{
43 | Entity-attribute-value encodings for survival data.
44 | }
45 | \details{
46 | Encoding of survival data in R requires two columns, one to store
47 | the time-to-event and another to indicate if there was an "event" at stored
48 | time, or if it was censored. A \code{FacileDataSet} stores these two \code{pData}
49 | columns into one "value" column in its entity-attribute-value
50 | \code{sample_covariate} table.
51 | 
52 | The \code{encode_right_censored} function takes the time-to-event and censoring
53 | vectors and encodes them into a single signed time-to-event numeric value.
54 | Positive values indicate an event, and negative value are censored.
55 | 
56 | The \code{decode_right_censored} function re-instantiates the two-column R-native
57 | storage of this data.
58 | }
59 | \seealso{
60 | \code{\link[=eav_metadata_create]{eav_metadata_create()}}
61 | }
62 | 


--------------------------------------------------------------------------------
/man/filter_samples.FacileDataSet.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/NSE-filter-samples.R
 3 | \name{filter_samples.FacileDataSet}
 4 | \alias{filter_samples.FacileDataSet}
 5 | \title{Filter against the sample_covariate_tbl EAV table as if it were wide.}
 6 | \usage{
 7 | \method{filter_samples}{FacileDataSet}(
 8 |   x,
 9 |   ...,
10 |   samples. = samples(x),
11 |   custom_key = Sys.getenv("USER"),
12 |   with_covariates = FALSE
13 | )
14 | }
15 | \arguments{
16 | \item{x}{A \code{FacileDataSet}}
17 | 
18 | \item{...}{NSE claused to use in \code{\link[dplyr:filter]{dplyr::filter()}} expressions}
19 | }
20 | \value{
21 | a sample-descriptor \code{data.frame} that includes the dataset,sample_id
22 | pairs that match the virtual \code{filter(covaries, ...)} clause executed here.
23 | }
24 | \description{
25 | This allows the user to query the \code{FacileDataSet} as if it were a wide
26 | \code{pData} \code{data.frame} of all its covariates.
27 | }
28 | \details{
29 | This feature is only really meant to be
30 | used interactively, and with extreme caution ... programatically specifying
31 | the covariates, for instance, does not work right now.
32 | 
33 | TODO: Implement using \code{tidyeval}
34 | }
35 | \examples{
36 | fds <- exampleFacileDataSet()
37 | 
38 | # To identify all samples that are of "CMS3" or "CMS4" subtype(
39 | # stored in the "subtype_crc_cms" covariate:
40 | crc.34 <- filter_samples(fds, subtype_crc_cms \%in\% c("CMS3", "CMS4"))
41 | eav.query <- fds \%>\%
42 |   fetch_sample_covariates(covariates = "subtype_crc_cms") \%>\%
43 |   filter(value \%in\% c("CMS3", "CMS4")) \%>\%
44 |   collect()
45 | setequal(crc.34$sample_id, eav.query$sample_id)
46 | 
47 | # You can keep filtering a filtered dataset
48 | crc.34.male <- filter_samples(crc.34, sex == "m")
49 | }
50 | \seealso{
51 | Other API: 
52 | \code{\link{fetch_assay_score.FacileDataSet}()},
53 | \code{\link{fetch_custom_sample_covariates.FacileDataSet}()},
54 | \code{\link{fetch_sample_covariates}()},
55 | \code{\link{fetch_sample_statistics.FacileDataSet}()},
56 | \code{\link{fetch_samples.FacileDataSet}()},
57 | \code{\link{filter_features.FacileDataSet}()},
58 | \code{\link{organism.FacileDataSet}()},
59 | \code{\link{samples.FacileDataSet}()}
60 | }
61 | \concept{API}
62 | 


--------------------------------------------------------------------------------
/vignettes/custom.css:
--------------------------------------------------------------------------------
 1 | /* ------------------ callout boxes ----------------------------------------- */
 2 | /* let's take desiree's approach and merge with the multiGSEA style           */
 3 | div.caution, div.demo, div.download, div.note, div.tip, div.warning, div.wip {
 4 |   margin: 10px 10px 10px 0px;
 5 |   min-height: 55px;
 6 |   padding: 2px 40px 2px 85px;
 7 |   background-position: 5px 5px, 68px 0px;
 8 |   background-repeat: no-repeat, repeat-y;
 9 |   width: 90%;
10 | }
11 | 
12 | div.caution {
13 |   background-image: url("images/icons/caution.png"), url('images/icons/callout-border.png');
14 | }
15 | 
16 | div.demo {
17 |   background-image: url("images/icons/example.png"), url('images/icons/callout-border.png');
18 | }
19 | 
20 | div.download {
21 |   background-image: url("images/icons/download.png"), url('images/icons/callout-border.png');
22 | }
23 | 
24 | div.note {
25 |   background-image: url("images/icons/note.png"), url('images/icons/callout-border.png');
26 | }
27 | 
28 | div.tip {
29 |  background-image: url("images/icons/tip.png"), url('images/icons/callout-border.png');
30 | }
31 | 
32 | div.warning {
33 |   background-image: url("images/icons/warning.png"), url('images/icons/callout-border.png');
34 | }
35 | 
36 | div.wip {
37 |   background-image: url("images/icons/wip.png"), url('images/icons/callout-border.png');
38 | }
39 | 
40 | /* http://desiree.rbind.io/post/2019/making-tip-boxes-with-bookdown-and-rmarkdown/ */
41 | 
42 | /*
43 | div.caution, div.demo, div.download, div.note, div.tip, div.wip {
44 |   padding: 1em;
45 |   margin: 1em 0;
46 |   padding-left: 100px;
47 |   background-size: 70px;
48 |   background-repeat: no-repeat;
49 |   background-position: 15px center;
50 |   min-height: 120px;
51 |   color: #1f5386;
52 |   background-color: #bed3ec;
53 |   border: solid 5px #dfedff;
54 | }
55 | 
56 | div.caution {
57 |   background-image: url("images/icons/caution.png");
58 | }
59 | 
60 | div.demo {
61 |   background-image: url("images/icons/example.png");
62 | }
63 | 
64 | div.download {
65 |   background-image: url("images/icons/download.png");
66 | }
67 | 
68 | div.note {
69 |   background-image: url("images/icons/note.png");
70 | }
71 | 
72 | div.tip {
73 |  background-image: url("images/icons/tip.png");
74 | }
75 | 
76 | div.warning {
77 |   background-image: url("images/icons/warning.png");
78 | }
79 | 
80 | div.wip {
81 |   background-image: url("images/icons/wip.png");
82 | }
83 | */
84 | 


--------------------------------------------------------------------------------
/R/feature-types.R:
--------------------------------------------------------------------------------
 1 | #' Guesses the type of feature identifiers from a character vector.
 2 | #'
 3 | #' We rely on meta-information about our data types than "usual", and its useful
 4 | #' to know what types of identifiers we are using for different assay. This
 5 | #' function tries to guess whether an identifier is an ensembl gene identifier,
 6 | #' entrez id, etc.
 7 | #'
 8 | #' A two-column data.frame is returned for id_type and organism. Organism
 9 | #' is "unknown" for identifiers where there this can't be inferred (like Refseq).
10 | #'
11 | #' If an identifier matches more than one id_type, the id_type is set to
12 | #' `"ambiguous"`. If the identifier doesn't match any guesses, then `"unknown"`.
13 | #'
14 | #' @export
15 | #' @param x a character vector of ids
16 | #' @return data.frame with `id` (`x`) and `id_type`. If `with_organism = TRUE`,
17 | #'   a third `organism` column is added with a guess for the organism.
18 | #' @examples
19 | #' fids <- c("NC_000023", "ENSG00000101811", "ENSMUSG00000030088.2", "85007")
20 | #' infer_feature_type(fids)
21 | infer_feature_type <- function(x, with_organism = FALSE, ...) {
22 |   regex <- list(
23 |     ens_gene = "^ENS[A-Z]*G\\d+(\\.\\d+)?$",
24 |     ens_tx   = "^ENS[A-Z]*?T\\d+(\\.\\d+)?$",
25 |     refseq   = "^[NXW][CGMRP]_\\d+(\\.\\d+)?$",
26 |     entrez   = "^\\d+$")
27 | 
28 |   bool <- sapply(regex, grepl, x)
29 |   nmatch <- rowSums(bool)
30 |   type <- names(regex)[apply(bool, 1, function(vals) which(vals)[1])]
31 |   type <- ifelse(nmatch == 1L, type, "ambiguous")
32 |   type <- ifelse(nmatch == 0L, "unknown", type)
33 | 
34 |   is.bad <- type %in% c("ambiguous", "unknown")
35 |   if (any(is.bad)) {
36 |     warning(sum(is.bad), " identifiers were either ambiguous or unknown",
37 |             immediate. = TRUE)
38 |   }
39 | 
40 |   out <- tibble(
41 |     id = x,
42 |     id_type = type)
43 | 
44 |   if (with_organism) {
45 |     is.ens <- grepl("^ens_", out[["id_type"]])
46 |     ens <- sub("^ENS", "", out[["id"]])
47 |     is.human <- is.ens & grepl("^[TG]\\d+", ens)
48 |     is.mouse <- is.ens & grepl("MUS[TG]\\d+", ens)
49 |     out[["source_organism"]] <- ifelse(is.human, "Homo sapiens", "unknown")
50 |     out[["source_organism"]] <- ifelse(is.mouse, "Mus musculus", out[["source_organism"]])
51 |     unk <- out[["source_organism"]] == "unknown"
52 |     if (any(unk)) {
53 |       warning(sum(unk), " identifiers could not be matched to an organism",
54 |               immediate. = TRUE)
55 |     }
56 |   }
57 | 
58 |   out
59 | }
60 | 


--------------------------------------------------------------------------------
/R/csurv.R:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | # Make a type called cSurv that is a character representation of survival::surv
 3 | ################################################################################
 4 | 
 5 | #' @name cSurv
 6 | #' @title cSurv is a character representation of survival::surv ###
 7 | #'
 8 | #' @description cSurv serves as a more reliable way to use Surv objects as data.frame columns. A
 9 | #' data.frame is supposed to be able to hold Surv columns. There are multiple special
10 | #' cases written into base for this. It seems the implementation is incomplete as
11 | #' subsetting the DF breaks the Surv object. cSurv cannot do anything but get subset
12 | #' and become a Surv again. In the FacileVerse we hold Surv objects as cSurv, which
13 | #' allows us to survive a round-trip through an EAV sample metadata table. Survival
14 | #' analyses can convert cSurv to Surv as needed. It is assumed that all Surv censoring
15 | #' is right-censored.
16 | #' @importFrom survival Surv
17 | #' @examples
18 | #' library(survival)
19 | #' x = Surv(c(14,12,3), event = c(1,0,1))
20 | #' y = as(x,"cSurv")
21 | #' z = as(y, "Surv")
22 | #' x2 = as.character(x)
23 | #' z2 = as(x2, "Surv")
24 | #' a = as(x, "cSurv")
25 | #' b = as(a, "character")
26 | #' c = as(b, "cSurv")
27 | #' d = as(c, "Surv")
28 | NULL
29 | 
30 | setOldClass("Surv")
31 | setOldClass("cSurv")
32 | 
33 | #' @rdname cSurv
34 | #' @family cSurv
35 | #' @export
36 | as_cSurv <- function(from) {
37 |   structure(as.character(from), class = "cSurv")
38 | }
39 | 
40 | #' @rdname cSurv
41 | #' @family cSurv
42 | #' @export
43 | as_Surv <- function(from) {
44 |   ns <- tryCatch(loadNamespace("survival"), error = function(e) NULL)
45 |   if (is.null(ns)) stop("survival package required")
46 | 
47 |   from <- as.character(from) # Both check type and drop attributes
48 |   stopifnot(all(is.na(from) | grepl("\\d[\\+ ]*$", from)))
49 |   status <- ifelse(endsWith(from, "+"), 0, 1)
50 |   ns$Surv(as.numeric(gsub("[\\+ ]*$", "", from)), status)
51 | }
52 | 
53 | #' @family cSurv
54 | setAs(
55 |   from = "Surv",
56 |   to = "cSurv",
57 |   def = as_cSurv
58 | )
59 | 
60 | #' @family cSurv
61 | setAs(
62 |   from = "cSurv",
63 |   to = "Surv",
64 |   def = as_Surv
65 | )
66 | 
67 | #' @family cSurv
68 | setAs(
69 |   from = "character",
70 |   to = "Surv",
71 |   def = as_Surv
72 | )
73 | 
74 | #' @family cSurv
75 | setAs(
76 |   from = "character",
77 |   to = "cSurv",
78 |   def = function(from) {
79 |     as_cSurv(as_Surv(from))
80 |   }
81 | )
82 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | # Cleanup Branch
 2 | 
 3 | This includes changes that:
 4 | 
 5 | 1. Lays some groundwork down for factoring out the "FacileData API" from the
 6 |    FacileDataSet.
 7 | 2. Largely fills out documentation needed to placate `R CMD check` [WIP]
 8 | 3. Fills out some vignettes [WIP]
 9 | 4. pkgdown [not started]
10 | 5. Addresses unit test failures introduced by code changes in this branch [not started]
11 |   
12 | ## Introduction of FacileDataStore
13 | 
14 | Introduced a `FacileDataStore` "abstract class" in anticipation of refactoring
15 | out the "FacileData API" into a top-level `FacileData` package.
16 | 
17 | The idea is that:
18 |   
19 |   1. Any object that implements the "FacileData API" must include
20 |      `"FacileDataStore"` in its class hierarchy (at the root(?)). For instance,
21 |      `class(exampleFacileDataSet())` returns
22 |      `"ExampleFacileTCGADataSet" "FacileDataSet" "FacileDataStore"`
23 |   
24 |   2. The S3 methods that will be factored out of this package to define the
25 |      "FacileData API" will effectively use `*.FacileDataStore` as their
26 |      "default" methods. `*.default` FacileData API S3 methods should either
27 |      (i) not be defined; or (ii) throw an error.
28 |   
29 | Note that the names of the to-be-factored-out base package name ("FacileData",
30 | here) and "FacileData API" are up for discussion. I'm just using them here
31 | as placeholders to reference the concept we are all working towards.
32 | 
33 | Random notes in this orbit:
34 | 
35 | * I added `@family API` roxygen tags to methods that I (loosely) think should
36 |   make up the "FacileData API". I'm pretty sure
37 | 
38 | * `@family API` methods should probably `assert_facile_data_store()` instead 
39 |   of `assert_facile_data_set()`
40 | 
41 | ## FacileDataSet validity checking
42 | 
43 | This is currently split across many functions:
44 | 
45 |   * `(assert|check|test)_facile_data_set()` are [checkmate][checkmate]-esque
46 |   * The `FacileDataSet()` constructor calls `validate.facile.dirs()` which
47 |     is a workhorse-of a function.
48 |   * `is.FacileDataSet()` now delegates to `assert_facile_data_set()`
49 | 
50 | ASK: Should `check_facile_data_set()` do all of the checking that
51 | `validate.facile.dirs()` does? The downside is that `assert_facile_data_set()`
52 | is likely called a lot, and `validate.facile.dirs()` may be doing too much
53 | all of the time?
54 | 
55 | [checkmate]: https://CRAN.R-project.org/package=checkmate
56 | 
57 | ## Minor Changes
58 | 
59 | * Enables roxygen markdown parsing as default in DESCRIPTION.
60 | * Most `##'` documentation blocks are changed to `#'`
61 | 
62 | 


--------------------------------------------------------------------------------
/R/utilities.R:
--------------------------------------------------------------------------------
 1 | #' Convenience wrapper to require specified packages
 2 | #'
 3 | #' @noRd
 4 | #' @param pkg A character vector of packages to require
 5 | #' @param quietly defaults to true
 6 | #' @param ... passed into [requireNamespace()]
 7 | reqpkg <- function(pkg, quietly = TRUE, ...) {
 8 |   assert_character(pkg)
 9 |   for (p in pkg) {
10 |     if (!requireNamespace(p, ..., quietly = quietly)) {
11 |       stop("'", p, "' package required, please install it.", call. = FALSE)
12 |     }
13 |   }
14 | }
15 | 
16 | 
17 | #' Arranges the columns of one data.frame to another
18 | #'
19 | #' This function is primarily used to add data to the FacileDataSet's SQLite
20 | #' database. \code{x} is new data to add, and \code{to} is the a table of
21 | #' the form that is expected in the database. We check that the columns of
22 | #' \code{x} are a superset of columns in \code{x} and the matching columns are
23 | #' all of the same class.
24 | #'
25 | #' @export
26 | #' @param x a \code{data.frame} that needs to be checked and conformed
27 | #' @param to the prototype \code{data.frame} that \code{x} needs to be aligned
28 | #'   against.
29 | #' @return the \code{tibble} version of \code{x} that is arranged to look
30 | #'   like \code{to}.
31 | conform_data_frame <- function(x, to) {
32 |   stopifnot(is.data.frame(x))
33 |   to <- suppressWarnings(collect(to, n=1L))
34 |   stopifnot(is.data.frame(to))
35 |   assert_columns(x, colnames(to))
36 |   for (cname in colnames(to)) {
37 |     if (!cname %in% colnames(x)) {
38 |       stop("Expected columnt not found in target data.frame: ", cname)
39 |     }
40 |     p.class <- class(to[[cname]])[1L]
41 |     x.class <- class(x[[cname]])[1L]
42 |     if (p.class != x.class) {
43 |       stop("Expected class `", p.class, "` for column '", cname, "', but got ",
44 |            "`", x.class, "` instead")
45 |     }
46 |   }
47 |   x <- as_tibble(x)
48 |   x[, colnames(to)]
49 | }
50 | 
51 | #' Set the class of an object and return the object
52 | #'
53 | #' @export
54 | set_class <- function(x, .class, ...) {
55 |   assert_character(.class)
56 |   class(x) <- unique(c(.class, class(x)))
57 |   x
58 | }
59 | 
60 | #' Ensures that a vector has names for all elements if it has names for any
61 | #'
62 | #' If the vector is not named, it remains that way
63 | #' @export
64 | #' @param x an object with names
65 | #' @return `x` with all elements either being uniquely named, or NULL
66 | nameit <- function(x, ...) {
67 |   if (is.null(names(x))) return(x)
68 |   noname <- nchar(names(x)) == 0L
69 |   names(x)[noname] <- x[noname]
70 |   names(x) <- make.names(names(x), unique = TRUE)
71 |   x
72 | }
73 | 


--------------------------------------------------------------------------------
/pkgdown/extra.css:
--------------------------------------------------------------------------------
 1 | /* This version of the css file is different from vignettes because this css
 2 |  * file needs to link into the articles folder for the image assets
 3 |  */
 4 | 
 5 | /************************ Callouts ********************************************/
 6 | /* Too bad you can't inhert in CSS, change one of these you should change all */
 7 | div.tip {
 8 |   margin: 2px 10px 10px 0px;
 9 |   min-height: 55px;
10 |   padding: 2px 10px 2px 85px;
11 |   background-position: 5px 5px, 68px 0px;
12 |   background-image: url('articles/images/icons/tip.png'), url('articles/images/icons/callout-border.png');
13 |   background-repeat: no-repeat, repeat-y;
14 |   width: 90%;
15 | }
16 | 
17 | div.note {
18 |   margin: 2px 10px 10px 0px;
19 |   min-height: 55px;
20 |   padding: 2px 10px 2px 85px;
21 |   background-position: 5px 5px, 68px 0px;
22 |   background-image: url('articles/images/icons/note.png'), url('articles/images/icons/callout-border.png');
23 |   background-repeat: no-repeat, repeat-y;
24 |   width: 90%;
25 | }
26 | 
27 | div.caution {
28 |   margin: 2px 10px 10px 0px;
29 |   min-height: 55px;
30 |   padding: 2px 10px 2px 85px;
31 |   background-position: 5px 5px, 68px 0px;
32 |   background-image: url('articles/images/icons/caution.png'), url('articles/images/icons/callout-border.png');
33 |   background-repeat: no-repeat, repeat-y;
34 |   width: 90%;
35 | }
36 | 
37 | div.warning {
38 |   margin: 2px 10px 10px 0px;
39 |   min-height: 55px;
40 |   padding: 2px 10px 2px 85px;
41 |   background-position: 5px 5px, 68px 0px;
42 |   background-image: url('articles/images/icons/warning.png'), url('articles/images/icons/callout-border.png');
43 |   background-repeat: no-repeat, repeat-y;
44 |   width: 90%;
45 | }
46 | 
47 | div.important {
48 |   margin: 2px 10px 10px 0px;
49 |   min-height: 55px;
50 |   padding: 2px 10px 2px 85px;
51 |   background-position: 5px 5px, 68px 0px;
52 |   background-image: url('articles/images/icons/important.png'), url('articles/images/icons/callout-border.png');
53 |   background-repeat: no-repeat, repeat-y;
54 |   width: 90%;
55 | }
56 | 
57 | div.example {
58 |   margin: 2px 10px 10px 0px;
59 |   min-height: 55px;
60 |   padding: 2px 10px 2px 85px;
61 |   background-position: 5px 5px, 68px 0px;
62 |   background-image: url('articles/images/icons/example.png'), url('articles/images/icons/callout-border.png');
63 |   background-repeat: no-repeat, repeat-y;
64 |   width: 90%;
65 | }
66 | 
67 | div.download {
68 |   margin: 2px 10px 10px 0px;
69 |   min-height: 55px;
70 |   padding: 2px 10px 2px 85px;
71 |   background-position: 5px 5px, 68px 0px;
72 |   background-image: url('articles/images/icons/download.png'), url('articles/images/icons/callout-border.png');
73 |   background-repeat: no-repeat, repeat-y;
74 |   width: 90%;
75 | }
76 | 


--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
 1 | # .onLoad <- function(libname, pkgname) {
 2 | #   impl.prefix <- 'ftest'
 3 | #   ## This package serves as an "abstract implementation" to a FacileDb database.
 4 | #   ## The packages that implement access to a FacileWareHouse should define
 5 | #   ## the following options:
 6 | #   ##
 7 | #   ##   - *.datapath
 8 | #   ##   - *.dbpath
 9 | #   ##   - *.covdef
10 | #   ##   - *.cachedir
11 | #   ##
12 | #   ## Since this FacileRepo package should not be tied to a specific FacileRepo
13 | #   ## "implementation", we set the impl.prefex to be "ftest". This means that
14 | #   ## this setup will create (or reuse) the following global options
15 | #   ##
16 | #   ##   - ftest.datapath
17 | #   ##   - ftest.dbpath
18 | #   ##   - ftest.covdef
19 | #   ##   - ftest.cachedir
20 | #
21 | #   ## Default database this will point to is configured so everything works
22 | #   ## when this package is loaded (deployed) on rescomp
23 | #   ## TODO: Update this database path to a test db instead of Atezo
24 | #
25 | #   ## Until we create a test database and distribute within the package, I'm
26 | #   ## affraid we can't avoid explicity defining the *.datapath. This is because
27 | #   ## the unit tests are run in a "clean" (R --vanilla) environment which doesn't
28 | #   ## load the stuff in your .Rprofile
29 | #   ## dpath <- system.file('extdata', 'test', package='FacileData')
30 | #   if (dir.exists('/gne')) {
31 | #     dpath <- '/gne/home/lianogls/workspace/data/facile/test'
32 | #   } else {
33 | #     dpath <- '~/workspace/data/facile/test'
34 | #   }
35 | #   db.name <- 'TcgaDb-test.sqlite'
36 | #   dpath <- getOption(sprintf('%s.datapath', impl.prefix), dpath)
37 | #
38 | #   pkg.opts <- list(
39 | #     datapath=dpath,
40 | #     dbpath=file.path(dpath, db.name),
41 | #     cachedir=file.path(dpath, 'cache'),
42 | #     covdef=file.path(dpath, 'sample-meta-definitions.yaml'))
43 | #   names(pkg.opts) <- sprintf('%s.%s', impl.prefix, names(pkg.opts))
44 | #
45 | #   ## We only set these options if they aren't already set in the global options
46 | #   ## The developers should set the appropriate options in the ~/.Rprofile
47 | #   opts <- options()
48 | #   toset <- !(names(pkg.opts) %in% names(opts))
49 | #   if (any(toset)) {
50 | #     options(pkg.opts[toset])
51 | #   }
52 | #
53 | #   ## Check options
54 | #   db.path <- getOption(paste0(impl.prefix, '.dbpath'))
55 | #   if (!file.exists(db.path)) {
56 | #     msg <- paste0(
57 | #       "Default path to faciledb is not a valid file: ", db.path, "\n",
58 | #       "Set options('facile.datapath') before loading the facilewarehouse ",
59 | #       "package to a valid path to the SQLite database to skip this message.\n",
60 | #       "A good place to do this for your local work is in your ~/.Rprofile")
61 | #     ## warning(msg, immediate.=TRUE)
62 | #   }
63 | #
64 | #   invisible()
65 | # }
66 | 


--------------------------------------------------------------------------------
/inst/testdata/expected-meta.yaml:
--------------------------------------------------------------------------------
 1 | name: TestFacileDataSet
 2 | organism: Homo sapiens
 3 | default_assay: rnaseq
 4 | datasets:
 5 |   BLCA:
 6 |     url: https://portal.gdc.cancer.gov/projects/TCGA-BLCA
 7 |     description: Bladder urothelial carcinoma
 8 |   BRCA:
 9 |     url: https://portal.gdc.cancer.gov/projects/TCGA-BRCA
10 |     description: Breast invasive carcinoma
11 | sample_covariates:
12 |   stage:
13 |     class: categorical
14 |     levels: ["stage i", "stage ii", "stage iii", "stage iv"]
15 |     description: Cancer staging classification (I-IV)
16 |     label: Pathological Tumor Staging
17 |     # colnames: stage
18 |     arguments:
19 |       x: stage
20 |     type: clinical
21 |   sex:
22 |     class: categorical
23 |     levels: ['male', 'female'] # reversed factor
24 |     description: "chrX:chrY ratio"
25 |     label: Sex
26 |     # colnames: sex
27 |     arguments:
28 |       x: sex
29 |     type: clinical
30 |   age:
31 |     class: real
32 |     description: Age of patient in years
33 |     label: Age (years)
34 |     # colnames: age
35 |     arguments:
36 |       x: age
37 |     type: clinical
38 |   subtype_molecular_bladder:
39 |     class: categorical
40 |     # levels: ["luminal", "basal"] # for test, we don't specify this is factor
41 |     description: >
42 |       The luminal/basal subtyping scheme in bladder, as defined by Damrauer et al.
43 |       (doi:10.1073/pnas.1318376111). For a larger umbrella review
44 |       of the diversity of bladder subtypes, you can refer to doi:10.1038/nrc3817.
45 |     label: Bladder cancer subtype (luminal/basal)
46 |     # colnames: subtype_molecular_bladder
47 |     arguments:
48 |       x: subtype_molecular_bladder
49 |     type: tumor_classification
50 |   subtype_receptor_breast:
51 |     class: categorical
52 |     levels: ["ER+/PR+", "Her2+", "TNBC"]
53 |     description: >
54 |       Breast cancer classification based on amplification and/or deletion of
55 |       different receptors.
56 |     label: Breast cancer subtype (receptor status)
57 |     # colnames: subtype_receptor_breast
58 |     arguments:
59 |       x: subtype_receptor_breast
60 |     type: tumor_classification
61 |   sample_type:
62 |     class: categorical
63 |     levels: ['normal', 'tumor', 'tumor_metastatic']
64 |     description: Whether sample comes from a tumor or adjacent normal
65 |     label: Sample type (tumor/normal)
66 |     # colnames: sample_type
67 |     arguments:
68 |       x: sample_type
69 |     type: clinical
70 |   OS:
71 |     class: right_censored
72 |     arguments:
73 |       time: tte_OS
74 |       event: event_OS
75 |     label: Overall survival
76 |     type: clinical
77 |     description: >
78 |       Overall Survival in days or months. The units still need to be standardized
79 |       across trials.
80 |     # colnames: ["tte_OS", "event_OS"]
81 |     # argnames: ["time", "event"]
82 | 


--------------------------------------------------------------------------------
/tests/testthat/test-as.FacileDataSet.R:
--------------------------------------------------------------------------------
 1 | context("as.FacileDataSet")
 2 | 
 3 | test_that("We can get pdata metadata", {
 4 |   stopifnot(requireNamespace("Biobase", quietly = TRUE))
 5 |   stopifnot(requireNamespace("survival", quietly = TRUE))
 6 |   sinfo = data.frame(a = 1:4,
 7 |                      b = survival::Surv(1:4, c(1,1,0,1)),
 8 |                      stringsAsFactors = FALSE
 9 |   )
10 |   rownames(sinfo) = letters[1:4]
11 |   attr(sinfo, "label") = c(a = "a is a", b = "b is b")
12 |   vals = matrix(1:16, ncol = 4, dimnames = list(LETTERS[1:4], letters[1:4]))
13 |   es = Biobase::ExpressionSet(vals, Biobase::AnnotatedDataFrame(sinfo))
14 | 
15 |   expect_identical(
16 |     FacileData::pdata_metadata(es),
17 |     list(a = list(description = "a is a"),
18 |          b = list(description = "b is b"))
19 |   )
20 | })
21 | 
22 | test_that("exampleFacileDataSet -> DGELists -> as.FacileDataSet", {
23 |   efds <- exampleFacileDataSet()
24 |   dsets <- sample_info_tbl(efds) %>%
25 |     distinct(dataset) %>%
26 |     pull(dataset)
27 |   dlists <- sapply(dsets, function(dset) {
28 |     y <- sample_info_tbl(efds) %>%
29 |       filter(dataset == dset) %>%
30 |       as.DGEList()
31 |     y$samples <- transform(y$samples, group = NULL, samid = NULL)
32 |     y$genes <- rename(y$genes, name = "symbol")
33 |     colnames(y) <- sub(".*?__", "", colnames(y))
34 |     y
35 |   }, simplify = FALSE)
36 | 
37 |   outdir <- tempfile(pattern = "TestFacileDataSet")
38 | 
39 |   tfds <- as.FacileDataSet(dlists, outdir,
40 |                            dataset_name = "TestFacileDataSet",
41 |                            assay_name = "rnaseq",
42 |                            assay_type = "rnaseq",
43 |                            source_assay = "counts",
44 |                            organism = organism(efds))
45 | 
46 |   # test tumor samples are equivalent
47 |   tsamples.new <- filter_samples(tfds, sample_type == "tumor")
48 |   tsamples.exp <- filter_samples(efds, sample_type == "tumor")
49 |   res <- inner_join(
50 |     mutate(tsamples.new, source = "test"),
51 |     mutate(tsamples.exp, source = "orig"),
52 |     by = c("dataset", "sample_id"))
53 |   expect_equal(nrow(tsamples.new), nrow(res))
54 |   expect_equal(nrow(tsamples.exp), nrow(res))
55 | 
56 |   # expect factor levels are the same
57 |   stage.new <- with_sample_covariates(tsamples.new, "stage")
58 |   stage.exp <- with_sample_covariates(tsamples.exp, "stage")
59 |   expect_factor(stage.new[["stage"]])
60 |   expect_equal(levels(stage.new[["stage"]]), levels(stage.exp[["stage"]]))
61 |   stage.res <- inner_join(stage.new, stage.exp,
62 |                           by = c("dataset", "sample_id"),
63 |                           suffix = c(".new", ".exp"))
64 |   expect_equal(nrow(stage.new), nrow(stage.exp))
65 |   expect_equal(nrow(stage.new), nrow(stage.res))
66 |   expect_equal(stage.res[["stage.new"]], stage.res[["stage.exp"]])
67 | })
68 | 


--------------------------------------------------------------------------------
/inst/extdata/exampleFacileDataSet/meta.yaml:
--------------------------------------------------------------------------------
 1 | name: ExampleFacileTCGADataSet
 2 | organism: Homo sapiens
 3 | default_assay: rnaseq
 4 | datasets:
 5 |   BLCA:
 6 |     url: https://portal.gdc.cancer.gov/projects/TCGA-BLCA
 7 |     description: Bladder urothelial carcinoma
 8 |   COAD:
 9 |     url: https://portal.gdc.cancer.gov/projects/TCGA-COAD
10 |     description: Colon adenocarcinoma
11 | sample_covariates:
12 |   indication:
13 |     type: tumor_classification
14 |     class: categorical
15 |     description: High level indication of patient's cancer type
16 |     label: Cancer Indication
17 |   OS:
18 |     type: response
19 |     class: right_censored
20 |     description: >
21 |       Overall Survival in days or months. The units still need to be standardized
22 |       across trials.
23 |     label: Overall survival
24 |   PFS:
25 |     type: response
26 |     class: right_censored
27 |     description: >
28 |       Progression Free Survival. The units still need to be standardized
29 |       across trials.
30 |     label: Progression free survival
31 |   sample_type:
32 |     type: clinical
33 |     class: categorical
34 |     levels: ['normal', 'tumor']
35 |     description: Whether sample comes from a tumor or adjacent normal
36 |     label: Sample type (tumor/normal)
37 |   sex:
38 |     type: clinical
39 |     class: categorical
40 |     levels: ['m', 'f']
41 |     description: In the "ratio between chrX:chrY" sense.
42 |     label: Sex
43 |   stage:
44 |     type: clinical
45 |     class: categorical
46 |     levels: ["I", "II", "III", "IV"]
47 |     description: Cancer staging classification (I-IV)
48 |     label: Cancer stage
49 |   subtype_molecular:
50 |     type: tumor_classification
51 |     class: categorical
52 |     description: >
53 |       The luminal/basal subtyping scheme in bladder, as defined by Damrauer et al.
54 |       (doi:10.1073/pnas.1318376111). For a larger umbrella review
55 |       of the diversity of bladder subtypes, you can refer to doi:10.1038/nrc3817.
56 |     label: Bladder cancer subtype (luminal/basal)
57 |   subtype_tcga:
58 |     type: tumor_classification
59 |     class: categorical
60 |     description: >
61 |       Expression based subtypes of bladder cancer (I-IV) as described in the
62 |       TCGA bladder paper (doi:10.1038/nature12965). For a larger umbrella review
63 |       of the diversity of bladder subtypes, you can refer to doi:10.1038/nrc3817.
64 |     label: Bladder cancer subtype (TCGA)
65 |   subtype_crc_cms:
66 |     type: tumor_classification
67 |     class: categorical
68 |     description: >
69 |       The consensus molecular subtypes of CRC.
70 |     label: Consensus Molecular CRC Subtypes (CMS1-4)
71 |     levels: ['CMS1', 'CMS2', 'CMS3', 'CMS4']
72 |   subtype_microsatellite_instability:
73 |     type: tumor_classification
74 |     class: categorical
75 |     description: >
76 |       The consensus molecular subtypes of CRC.
77 |     label: Consensus Molecular CRC Subtypes (CMS1-4)
78 |     levels: ['MSI-hi', 'MSI-lo/MSS']
79 | 
80 | 
81 | 


--------------------------------------------------------------------------------
/man/flog.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/logging.R
 3 | \name{flog}
 4 | \alias{flog}
 5 | \title{Generates a logging message using glue and crayon, with some bells/whistles.}
 6 | \usage{
 7 | flog(
 8 |   ...,
 9 |   level = "info",
10 |   ns = NULL,
11 |   session = NULL,
12 |   file = stderr(),
13 |   sep = "",
14 |   fill = FALSE,
15 |   labels = NULL,
16 |   append = FALSE,
17 |   newline = !is.null(file)
18 | )
19 | }
20 | \arguments{
21 | \item{...}{the string elements to pass into \code{\link[glue:glue]{glue::glue()}}}
22 | 
23 | \item{level}{the "firing level" of this message. Defaults to "info"}
24 | 
25 | \item{ns}{(namespace) if included, then the message checks the
26 | namespace-specific logging priority}
27 | 
28 | \item{session, file, sep, fill, labels, append}{sent to \code{\link[base:cat]{base::cat()}}}
29 | 
30 | \item{newline}{If \code{TRUE}, appends a \verb{\\\\n} to the message. By default, this
31 | is \code{TRUE} when \code{file} is not \code{NULL}.}
32 | }
33 | \value{
34 | invisibly returns the text generated in the logging message.
35 | }
36 | \description{
37 | Like other logging approaches, each message created with this function is
38 | assigned a \code{level} (priority). If the current logging level, which is
39 | returned from a call to \code{flog_level} (ostensibly determenied by the value
40 | of the \code{"facile.log.level(.*?)"} option) is less than or equal to level of
41 | this message, then the message will be generated and sent to \code{file}.
42 | You can include a \code{namespace} for the message to provide a namespace-specific
43 | level/priority hierarchy.
44 | }
45 | \details{
46 | Conveninece wrapper functions are provided for each logging level, ie.
47 | call \code{fwarn("message")} instead of flog("message", level = "warn")\verb{. Also, each facile* package provides its own }flog()\verb{function which sets the namespace}ns` parameter to default to a package-specific namespace so you
48 | can control logging at the different package level.
49 | }
50 | \section{Logging Levels}{
51 | 
52 | 
53 | Logging levels are\preformatted{.flog_levels <- c("all"  = 0, "trace" = 1, "debug" = 2, "info" = 3,
54 |                   "warn" = 4, "error" = 5, "fatal" = 6)
55 | }
56 | }
57 | 
58 | \section{crayon}{
59 | 
60 | Glue lets you put cayon functions in \code{{}} to stylize output. For instance,
61 | you can make "bold and red" the color red and also bold, like so:\preformatted{flog("This is \{red\}\{bold\}bold and red\{reset\}, right?")
62 | }
63 | 
64 | Nice! It might be more convenient if we could make it a bit more terse,
65 | as shown below, but that might happen at another time.\preformatted{flog("This is rb`bold and red`, right?")
66 | }
67 | 
68 | Colors:
69 | \itemize{
70 | \item b: blue
71 | \item c: cyan
72 | \item g: green
73 | \item k: black
74 | \item m: magenta
75 | \item r: red
76 | \item y: yellow
77 | }
78 | 
79 | Styles:
80 | \itemize{
81 | \item i: italic
82 | \item s: strong (bold)
83 | \item S: striketthrough
84 | \item u: underline
85 | }
86 | }
87 | 
88 | 


--------------------------------------------------------------------------------
/tests/testthat/test-EAV.R:
--------------------------------------------------------------------------------
 1 | context("EAV Manipulation")
 2 | 
 3 | efds. <- exampleFacileDataSet()
 4 | pdata. <- efds. %>%
 5 |   # sex and stage are factors
 6 |   fetch_sample_covariates(covariates = c("sex", "stage")) %>%
 7 |   spread_covariates() %>%
 8 |   # add some other data types: numeric and character
 9 |   mutate(age = sample(20:70, nrow(.)),
10 |          category = sample(letters, nrow(.), replace = TRUE))
11 | emeta. <- local({
12 |   fn <- system.file("extdata", "exampleFacileDataSet", "meta.yaml",
13 |                     package = "FacileData", mustWork = TRUE)
14 |   defined <- yaml::yaml.load_file(fn)$sample_covariates
15 |   defined <- defined[names(defined) %in% colnames(pdata.)]
16 |   c(defined, list(
17 |     age = list(type = "atype", class = "real", description = "happy bday"),
18 |     category = list(type = "atype", class = "categorical", description = "x")))
19 | })
20 | 
21 | test_that("deafult metadata creation from data.frame", {
22 |   ignore.cols <- c("dataset", "sample_id")
23 | 
24 |   covdefs <- eav_metadata_create(pdata., ignore = ignore.cols)
25 |   expected.cols <- setdiff(names(pdata.), ignore.cols)
26 |   expect_setequal(names(covdefs), expected.cols)
27 | 
28 |   # check that inferred covariate definitions have the required "slots", ie.
29 |   # arguments, label, class, type
30 |   for (cname in expected.cols) {
31 |     vals <- pdata.[[cname]]
32 |     expected <- emeta.[[cname]]
33 |     inferred <- covdefs[[cname]]
34 |     if (is.character(vals) || is.factor(vals)) {
35 |       expect_equal(inferred[["class"]], "categorical", info = cname)
36 |     } else if (is.numeric(vals)) {
37 |       expect_equal(inferred[["class"]], "real", info = cname)
38 |     }
39 |     if (is.factor(vals)) {
40 |       expect_equal(inferred[["levels"]], levels(vals), info = cname)
41 |     }
42 |   }
43 | })
44 | 
45 | test_that("custom definition supersede inferred EAV defs from data.frame", {
46 |   covariate_def <- list(
47 |     # Change order of levels and description in sex factor covariate
48 |     sex = list(
49 |       levels = rev(levels(pdata.[["sex"]])),
50 |       description = "x:y chromosome ratio"),
51 |     age = list(
52 |       description = "years of life",
53 |       type = "random"))
54 | 
55 |   defaults <- eav_metadata_create(pdata.)
56 |   custom <-  eav_metadata_create(pdata., covariate_def = covariate_def)
57 | 
58 |   # Check reversed levels of `sex` covariate
59 |   expect_character(custom$sex$levels)
60 |   expect_equal(custom$sex$levels, rev(defaults$sex$levels))
61 | 
62 |   # Ensure that specified entries were overriden, and others left alone.
63 |   for (cname in names(defaults)) {
64 |     dvals <- defaults[[cname]]
65 |     cvals <- custom[[cname]]
66 |     assert_list(dvals)
67 |     assert_list(cvals)
68 |     assert_subset(names(dvals), names(cvals))
69 |     for (attrib in names(dvals)) {
70 |       override <- covariate_def[[cname]][[attrib]]
71 |       expected <- if (!is.null(override)) override else dvals[[attrib]]
72 |       expect_equal(cvals[[attrib]], expected,
73 |                    info = paste(cname, attrib, sep = ":"))
74 |     }
75 |   }
76 | })
77 | 


--------------------------------------------------------------------------------
/tests/testthat/test-assay-data.R:
--------------------------------------------------------------------------------
 1 | context("Fetching assay level data")
 2 | 
 3 | if (!exists("FDS")) FDS <- exampleFacileDataSet()
 4 | 
 5 | samples <- FDS %>%
 6 |   filter_samples(stage == "III") %>%
 7 |   select(dataset, sample_id)
 8 | 
 9 | genes <- c(
10 |   PRF1='5551',
11 |   GZMA='3001',
12 |   CD274='29126',
13 |   TIGIT='201633')
14 | 
15 | features <- tibble(assay='rnaseq', feature_id=genes)
16 | 
17 | test_that("fetch_assay_data limits samples correctly", {
18 |   s.df <- collect(samples, n=Inf)
19 | 
20 |   e.sqlite <- fetch_assay_data(FDS, genes, samples) %>% collect(n=Inf)
21 |   e.df <- fetch_assay_data(FDS, genes, s.df) %>% collect(n=Inf)
22 | 
23 |   ## results are same from tbl_df and tbl_sqlite `samples` parameter
24 |   expect_equal(e.sqlite, e.df)
25 | 
26 |   ## samples limited correcly
27 |   expect_true(setequal(paste0(e.df$dataset, e.df$sample_id),
28 |                        paste0(s.df$dataset, s.df$sample_id)))
29 | 
30 | })
31 | 
32 | test_that("spreading data works with_assay_data", {
33 |   expected <- FDS %>%
34 |     fetch_assay_data(genes, samples, normalized = TRUE) %>%
35 |     select(dataset, sample_id, feature_name, value) %>%
36 |     tidyr::spread(feature_name, value)
37 |   result <- samples %>%
38 |     with_assay_data(genes, normalized = TRUE, .fds = FDS) %>%
39 |     collect
40 | 
41 |   expect_equal(result, expected, check.attributes = FALSE)
42 | })
43 | 
44 | test_that("fetch_assay_data(..., aggregate = TRUE) provides scores", {
45 |   scores <- FDS %>%
46 |     fetch_assay_data(features, samples, normalized = TRUE, aggregate = TRUE) %>%
47 |     arrange(sample_id, feature_name) %>%
48 |     select(dataset, sample_id, feature_id, symbol=feature_name, value) %>%
49 |     mutate(samid=paste(dataset, sample_id, sep="__"))
50 | 
51 |   dat <- FDS %>%
52 |     fetch_assay_data(features, samples, normalized = TRUE, as.matrix = TRUE)
53 |   ewm <- sparrow::eigenWeightedMean(dat)$score[scores$samid]
54 |   expect_equal(scores$value, unname(ewm))
55 | 
56 |   # test with_assay_data
57 |   with.scores <- scores %>%
58 |     distinct(dataset, sample_id) %>%
59 |     with_assay_data(features, aggregate = TRUE)
60 | 
61 |   expect_equal(with.scores$aggregated, scores$value)
62 | })
63 | 
64 | # test_that("fetch_assay_data handles missing entries for requested samples", {
65 | #   ## When we have multiple assays for an FDS, we can use a valid sample
66 | #   ## descriptor to retrieve data, but the requested assay may not have data
67 | #   ## for all requested samples, we need to handle this.
68 | #   root <- rprojroot::find_root(rprojroot::is_r_package)
69 | #   devtools::load_all(root)
70 | #   tcga <- FacileDataSet('~/workspace/data/facile/FacileDataSets/FacileTCGADataSet-2017-03-25')
71 | #
72 | #   library(reshape2)
73 | #   samples <- sample_info_tbl(tcga) %>%
74 | #     filter(dataset == 'BRCA') %>%
75 | #     collect
76 | #
77 | #   genes <- c(TIGIT='201633', CD274='29126')
78 | #   rnaseq <- tcga %>%
79 | #     fetch_assay_data(genes, samples, 'rnaseq', normalized=TRUE)
80 | #
81 | #   ## don't have agilent data for all brca samples
82 | #   agilent <- tcga %>%
83 | #     fetch_assay_data(genes, samples, 'agilent', normalized=TRUE)
84 | #
85 | # })
86 | 


--------------------------------------------------------------------------------
/tests/testthat/test-biocbox.R:
--------------------------------------------------------------------------------
 1 | context("biocbox")
 2 | 
 3 | if (!exists("FDS")) FDS <- exampleFacileDataSet()
 4 | 
 5 | samples <- sample_covariate_tbl(FDS) %>%
 6 |   filter(variable == 'stage' & value == 'III') %>%
 7 |   select(dataset, sample_id) %>%
 8 |   collect()
 9 | genes <- local({
10 |   out <- c("800", "1009", "1289", "50509", "2191", "2335", "5159")
11 |   feature_info_tbl(FDS) %>%
12 |     filter(feature_id %in% out) %>%
13 |     collect() %>%
14 |     pull(feature_id)
15 | })
16 | 
17 | # boxes and their associated packages
18 | box.info <- FacileData:::.biocboxes %>%
19 |   select(class, package) %>%
20 |   distinct()
21 | 
22 | test_that("fetch_assay_data results converted to biocboxes", {
23 |   scovs <- samples %>%
24 |     with_sample_covariates() %>%
25 |     as.data.frame()
26 |   rownames(scovs) <- paste(scovs$dataset, scovs$sample_id, sep = "__")
27 | 
28 |   e <- fetch_assay_data(FDS, genes, samples, as.matrix = TRUE)
29 |   e <- e[, rownames(scovs)]
30 | 
31 |   for (i in seq(nrow(box.info))) {
32 |     class <- box.info[["class"]][i]
33 |     package <- box.info[["package"]][i]
34 | 
35 |     rnaseq.compat <- is.element(
36 |       "rnaseq",
37 |       filter(FacileData:::.biocboxes, .data$class == .env$class)$assay_type)
38 | 
39 |     if (rnaseq.compat) {
40 |       bb <- biocbox(samples, features = genes, class = class)
41 |     } else {
42 |       bb <- expect_warning({
43 |         biocbox(samples, features = genes, class = class)
44 |       }, "not compatible.*assay_type", info = class)
45 |     }
46 | 
47 |     expect_is(bb, class, info = class)
48 |     checkmate::expect_set_equal(rownames(bb), genes, info = class)
49 |     checkmate::expect_set_equal(colnames(bb), colnames(e), info = class)
50 | 
51 |     bb <- bb[rownames(e), colnames(e)]
52 | 
53 |     # Check assay data is same
54 |     expect_equal(adata(bb), e, check.attributes = FALSE,
55 |                  info = class)
56 | 
57 |     # Check that sample covariates are the same
58 |     pdat <- as.data.frame(pdata(bb))
59 |     checkmate::expect_subset(colnames(scovs), colnames(pdat), info = class)
60 |     expect_equal(pdat[, colnames(scovs)], scovs, info = class,
61 |                  check.attributes = FALSE)
62 |   }
63 | })
64 | 
65 | test_that("biocbox appends custom covariates from input sample table", {
66 |   custom.covs <- samples %>%
67 |     mutate(var1 = rnorm(nrow(samples)), var2 = sample(letters, nrow(samples)))
68 | 
69 |   bb <- biocbox(custom.covs, features = genes)
70 | 
71 |   cmp <- inner_join(custom.covs, bb$samples, by = c("dataset", "sample_id"))
72 |   expect_equal(nrow(cmp), nrow(custom.covs))
73 |   expect_equal(cmp$var1.x, cmp$var1.y)
74 |   expect_equal(cmp$var2.x, cmp$var2.y)
75 | })
76 | 
77 | 
78 | test_that("biocbox appends custom covariates from sample_covariates param", {
79 |   custom.covs <- samples %>%
80 |     mutate(var1 = rnorm(nrow(samples)), var2 = sample(letters, nrow(samples)))
81 | 
82 |   bb <- biocbox(select(custom.covs, dataset, sample_id),
83 |                 features = genes,
84 |                 sample_covariates = custom.covs)
85 | 
86 |   cmp <- inner_join(custom.covs, bb$samples, by = c("dataset", "sample_id"))
87 |   expect_equal(nrow(cmp), nrow(custom.covs))
88 |   expect_equal(cmp$var1.x, cmp$var1.y)
89 |   expect_equal(cmp$var2.x, cmp$var2.y)
90 | })
91 | 


--------------------------------------------------------------------------------
/man/addFacileAssaySet.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/construction.R
 3 | \name{addFacileAssaySet}
 4 | \alias{addFacileAssaySet}
 5 | \title{Adds new set of assay data for all samples in a FacileDataSet}
 6 | \usage{
 7 | addFacileAssaySet(
 8 |   x,
 9 |   datasets,
10 |   facile_assay_name,
11 |   facile_assay_type = .assay.types,
12 |   facile_feature_type = .feature.types,
13 |   facile_assay_description = NULL,
14 |   facile_feature_info,
15 |   storage_mode = .storage.modes,
16 |   chunk_rows = 5000,
17 |   chunk_cols = "ncol",
18 |   chunk_compression = 4,
19 |   assay_name = NULL,
20 |   warn_existing = FALSE
21 | )
22 | }
23 | \arguments{
24 | \item{x}{The \code{FacileDataSet}}
25 | 
26 | \item{datasets}{list of \code{ExpressionSet}, \code{SummarizedExperiment}, or
27 | \code{DGEList}s that have the new assay data across all of the datasets in \code{x}.}
28 | 
29 | \item{facile_assay_name}{the name of the assay in the source dataset object}
30 | 
31 | \item{facile_assay_type}{string indicating the assay_type ('rnaseq',
32 | 'affymetrix', etc.)}
33 | 
34 | \item{facile_feature_type}{a string indicating the universe the features in
35 | this assay refer to, i.e. "entrez", "ensgid", "enstid", etc.}
36 | 
37 | \item{facile_feature_info}{a \code{data.frame} with the required \code{feature_info}
38 | columns that describe the features in this assay. Please refer to the
39 | "Features" section of the \code{FacileDataSet} vignette for more complete
40 | description.}
41 | 
42 | \item{storage_mode}{either \code{"integer"} or \code{"numeric"}, maps to the
43 | \code{storage.mode} parameter in \code{\link[rhdf5:h5createDataset]{rhdf5::h5createDataset()}}}
44 | 
45 | \item{chunk_rows}{the first entry in the \code{chunk} parameter in
46 | \code{\link[rhdf5:h5createDataset]{rhdf5::h5createDataset()}} (\code{integer})}
47 | 
48 | \item{chunk_cols}{the second entry in the \code{chunk} parameter in
49 | \code{\link[rhdf5:h5createDataset]{rhdf5::h5createDataset()}}. If this is \code{"ncol"}, it is set to the number
50 | of columns in each of the internal dataset matrices being added.}
51 | 
52 | \item{chunk_compression}{the \code{level} parameter in \code{\link[rhdf5:h5createDataset]{rhdf5::h5createDataset()}}}
53 | 
54 | \item{assay_name}{the assay name in the data containers provided in the
55 | \code{datasets} list.}
56 | 
57 | \item{facie_assay_description}{a string that allows the caller to provide
58 | a "freeform" description of the assay (platform, protocol, whatever).}
59 | }
60 | \value{
61 | a \code{tibble} subset of \code{facile_feature_info} that indicates the \emph{new}
62 | features that were added to the internal \code{feature_info_tbl}.
63 | }
64 | \description{
65 | Once a FacileDataSet has been created and initialized, either via a
66 | low-level call to \code{\link[=initializeFacileDataSet]{initializeFacileDataSet()}}, or a call to
67 | \code{\link[=as.FacileDataSet]{as.FacileDataSet()}} over a list of BiocAssayContainers, you can add more
68 | assays (i.e. RNA-seq, microarray, etc.) to the FacileDataSet using this
69 | function.
70 | }
71 | \details{
72 | Note that you cannot add assay data piecemeal. That is to say, you can not call
73 | this function once to add copynumber data
74 | (addFacileAssaySet(..., facile_assay_type = "cnv") to a subset of samples
75 | and later call this function again to add copynumber to the rest of the
76 | samples. The function will throw an error if
77 | facile_assay_type \%in\% assay_names(x) == TRUE.
78 | }
79 | 


--------------------------------------------------------------------------------
/tests/testthat/test-entity-attribute-value.R:
--------------------------------------------------------------------------------
 1 | context("Entity-Attribute-Value conversions")
 2 | 
 3 | # Checks the yaml encoding for the variable is as expected by using the
 4 | # yaml encoding from "testdata/expected-meta.yaml" matches the encoding
 5 | # that was programmatically generated
 6 | #
 7 | # @param x the recoded covariate list
 8 | # @param expected the covariate list from `expected-meta.yaml`
 9 | validate_eav_recode <- function(x, expected, varname) {
10 |   expect_is(x, "list", info = varname)
11 |   expect_is(expected, "list", info = varname)
12 |   expect_equal(x$colnames, expected$colnames, info = varname)
13 |   expect_equal(x$class, expected$class, info = varname)
14 |   if (!is.null(expected$levels)) {
15 |     expect_true(is.character(x$levels), info = varname)
16 |     expect_equal(x$levels, expected$levels, info = varname)
17 |   } else {
18 |     expect_true(is.null(x$levels), info = varname)
19 |   }
20 | }
21 | 
22 | test_that("pData -> meta.yaml covariate encoding works (simple & compound)", {
23 |   # Trying to recode the survival stuff isn't included in this test
24 |   pdat <- example_sample_covariates()
25 |   elol <- example_sample_covariate_definitions()
26 | 
27 |   # define covariate_def(-inition) for the compound OS facile covariate:
28 |   covdef <- list(
29 |     OS=list(
30 |       arguments=c(time="tte_OS", event="event_OS"),
31 |       class="right_censored",
32 |       label="Overall survival",
33 |       type="clinical",
34 |       description="Overall Survival in months"
35 |     ))
36 | 
37 |   lol <- eav_metadata_create(pdat, covariate_def = covdef)
38 |   fn <- tempfile()
39 |   yaml::write_yaml(lol, fn)
40 |   relol <- yaml::read_yaml(fn)
41 | 
42 |   # Explicitly test that the tte_OS and event_OS columns from `pDat` were
43 |   # compounded into the OS covariate.
44 |   # Reference the "Encoding Survival Covariates" section in the
45 |   # `?eav_metadata_create` helpf file for what the expected behavior of how this
46 |   # compounded, multi-column-to-single-value mapping should work.
47 |   compounded <- c("tte_OS", "event_OS")
48 |   expect_true(all(compounded %in% names(pdat))) # in pData
49 |   expect_true(!any(c("tte_OS", "event_OS") %in% names(relol))) # not in yaml
50 |   expect_true(setequal(relol$OS$arguments, compounded)) # names of columns saved for posterity
51 | 
52 |   # ensure that variables from encoded yaml file match test meta.yaml file
53 |   expect_true(setequal(names(relol), names(lol)))
54 | 
55 |   # encodings match
56 |   for (varname in names(lol)) {
57 |     validate_eav_recode(relol[[varname]], elol[[varname]], varname)
58 |   }
59 | })
60 | 
61 | test_that("Successful EAV creation of data.frame with a Surv object column", {
62 |   # pData with Surv
63 |   df <- data.frame(
64 |     dataset = "foo",
65 |     sample_id = letters[1:3],
66 |     x = survival::Surv(1:3, c(0,1,0)),
67 |     y = 4:6,
68 |     stringsAsFactors = FALSE)
69 | 
70 |   expected <- df %>%
71 |     mutate(x = as.character(x)) %>%
72 |     tidyr::gather("variable", "value", -dataset, -sample_id) %>%
73 |     mutate(class = ifelse(variable == "x", "cSurv", "real"),
74 |            type = "general") %>%
75 |     as_tibble()
76 | 
77 |   long <- as.EAVtable(df)
78 |   expect_equal(long, expected, check.attributes = FALSE)
79 | })
80 | 
81 | test_that("basic encoding and decoding of EAV columns works", {
82 |   # survival::Surv
83 |   foo <- Surv(1:3, c(0,1,0))
84 |   x <- as(foo, "cSurv")
85 |   y <- eav_encode_cSurv(x)
86 |   y1 <- c("1+","2","3+")
87 |   attr(y1, "eavclass") <- "cSurv"
88 |   expect_identical(y, y1)
89 | 
90 |   z <- eav_decode_cSurv(y)
91 |   expect_identical(x, z)
92 | })
93 | 


--------------------------------------------------------------------------------
/R/replace_na.R:
--------------------------------------------------------------------------------
 1 | defaults.freplace_na <- list(
 2 |   numeric = "error",
 3 |   categorical = "NA.")
 4 | 
 5 | #' Replaces NA's with specified values.
 6 | #'
 7 | #' Some the downstream uses of a FacileDataStore can throw problems when NA's
 8 | #' are found in data or covariates, so we often want to fill in NA's with
 9 | #' non-NA markers of missing values. Note that unless specified otherwise
10 | #' (using the `replace` and `defaults` parameters),
11 | #'
12 | #' Depending on the atomic type of the thing that NA's are being replaced with,
13 | #' a default value is assumed. These can be overriden by using the `defaults`
14 | #' parameter, or specifically by column (or list) names via the `replace`
15 | #' parameter.
16 | #'
17 | #' Missing values (NA's) come up often in FacileDataStores since we often use
18 | #' them to include data from multiple datasets, which induces "ragged" (sparse)
19 | #' covariate (pData) entries. In man
20 | #'
21 | #' @export
22 | #' @param data the thing that has NA's in it (a data.frame or vector)
23 | #' @param replace a named list of elements to use for custom replacement values
24 | #' @param defaults if named elements in `data` do not appear in `replace`, you
25 | #'   can provide default values for categories of parameters (ie.
26 | #'   `"categorical"` or `"numeric"`), otherwise
27 | #'   FacileData:::defaults.freplace_na will be used.
28 | #' @return an NA-replaced version of `data`
29 | #' @examples
30 | #' data <- data.frame(
31 | #'   a = rnorm(10),
32 | #'   b = letters[1:10],
33 | #'   c = factor(LETTERS[1:10]))
34 | #' data[3, ] <- NA
35 | #' r1 <- freplace_na(data, list(b = "bee"), ignore = "a")
36 | #' r2 <- freplace_na(data, list(b = "bee"), defaults = list(numeric = -Inf))
37 | freplace_na <- function(data, replace = list(), defaults = list(),
38 |                         ignore = character(), ...) {
39 |   UseMethod("freplace_na")
40 | }
41 | 
42 | #' @export
43 | freplace_na.default <- function(data, replace = NULL, defaults = list(),
44 |                                 ignore = character(), ...) {
45 |   # ignore not used here
46 |   stopifnot(is.atomic(data))
47 |   isna <- is.na(data)
48 |   if (!any(isna)) return(data)
49 | 
50 |   if (is.null(defaults)) defaults <- list()
51 |   assert_list(defaults, names = "unique")
52 |   defaults <- c(defaults, defaults.freplace_na)
53 |   defaults <- defaults[!duplicated(names(defaults))]
54 | 
55 |   if (is.null(replace)) {
56 |     if (is.numeric(data)) {
57 |       replace <- defaults[["numeric"]]
58 |     } else {
59 |       replace <- defaults[["categorical"]]
60 |     }
61 |   }
62 | 
63 |   stopifnot(is.atomic(replace) && length(replace) == 1L)
64 | 
65 |   if (is.numeric(data)) {
66 |     if (!test_number(replace)) {
67 |       stop("Can't replace numerics yet with anything but a number")
68 |     }
69 |   } else {
70 |     replace <- as.character(replace)
71 |     if (is.factor(data)) {
72 |       if (!is.element(replace, levels(data))) {
73 |         levels(data) <- c(levels(data), replace)
74 |       }
75 |     } else {
76 |       data <- as.character(data)
77 |     }
78 |   }
79 | 
80 |   data[isna] <- replace
81 |   data
82 | }
83 | 
84 | #' @export
85 | #' @method freplace_na data.frame
86 | freplace_na.data.frame <- function(data, replace = list(), defaults = list(),
87 |                                    ignore = character(), ...) {
88 |   assert_character(ignore, null.ok = TRUE)
89 |   for (cname in setdiff(colnames(data), ignore)) {
90 |     vals <- data[[cname]]
91 |     rep.val <- replace[[cname]]
92 |     if (!identical(rep.val, "skip")) {
93 |       data[[cname]] <- freplace_na(vals, replace[[cname]], defaults = defaults)
94 |     }
95 |   }
96 |   data
97 | }
98 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | <!-- README.md is generated from README.Rmd. Please edit that file -->
 3 | 
 4 | # FacileData
 5 | 
 6 | <!-- badges: start -->
 7 | 
 8 | [![R build
 9 | status](https://github.com/facilebio/FacileData/workflows/R-CMD-check/badge.svg)](https://github.com/facilebio/FacileData/actions)
10 | ![pkgdown](https://github.com/facilebio/FacileData/workflows/pkgdown/badge.svg)
11 | [![Project
12 | Status](http://www.repostatus.org/badges/latest/active.svg)](http://www.repostatus.org/#active)
13 | [![Lifecycle:
14 | Maturing](https://img.shields.io/badge/lifecycle-maturing-blue.svg)](https://www.tidyverse.org/lifecycle/#maturing)
15 | [![codecov](https://codecov.io/gh/facilebio/FacileData/branch/master/graph/badge.svg)](https://codecov.io/gh/facilebio/FacileData)
16 | <!-- badges: end -->
17 | 
18 | The `FacileData` package was written to facilitate easier analysis of
19 | large, multi-assay high-throughput genomics datasets. To this end, the
20 | `FacileData` package provides two things:
21 | 
22 | 1.  A *FacileData Access API* that defines a fluent interface over
23 |     multi-assay genomics datasets that fits into the
24 |     [tidyverse](https://www.tidyverse.org/). This enables analysts to
25 |     more naturally query and retrieve data for general exploratory data
26 |     analysis; and
27 | 2.  A reference implementation of a datastore that implements the
28 |     *FacileData Access API* called a *FacileDataSet*. The
29 |     `FacileDataSet` provides efficient storage and retrieval of
30 |     arbitrarily large high-throughput genomics datasets. For example, a
31 |     single `FacileDataSet` can be used to store *all* of the RNA-seq,
32 |     microarray, RPPA, etc. data from the [The Cancer Genome
33 |     Atlas](https://cancergenome.nih.gov/). This singular `FacileDataSet`
34 |     allows analysts easy access to arbitrary subsets of these data
35 |     without having to load all of it into memory.
36 | 
37 | # Installation
38 | 
39 | The FacileData suite of packages is only available from github from now.
40 | You will want to install three `FacileData*` packages to appreciate the
41 | its utility:
42 | 
43 | ``` r
44 | # install.packages("devtools")
45 | devtools::install_github("facilebio/FacileData")
46 | ```
47 | 
48 | # Example Usage
49 | 
50 | As a teaser, we’ll show how to plot HER2 copy number vs expression
51 | across the TCGA bladder and breast indications (“BLCA” and “BRCA”) using
52 | a `FacileDataSet`.
53 | 
54 | ``` r
55 | library(ggplot2)
56 | library(FacileData)
57 | library(FacileTCGADataSet)
58 | tcga <- FacileTCGADataSet()
59 | 
60 | features <- filter_features(tcga, name == "ERBB2")
61 | 
62 | fdat <- tcga %>%
63 |   filter_samples(indication %in% c("BLCA", "BRCA")) %>%
64 |   with_assay_data(features, assay_name = "rnaseq", normalized = TRUE) %>%
65 |   with_assay_data(features, assay_name = "cnv_score") %>%
66 |   with_sample_covariates(c("indication", "sex"))
67 | 
68 | ggplot(fdat, aes(cnv_score_ERBB2, ERBB2, color = sex)) +
69 |   geom_point() +
70 |   facet_wrap(~ indication)
71 | ```
72 | 
73 | <img src="man/figures/her2_cnv_vs_expression.png" width="66%" />
74 | 
75 | Let’s compare how you might do the same using data stored in a
76 | `SummarizedExperiment` named `se.tcga` that stores RNA-seq (raw and
77 | normalized) and copy number data.
78 | 
79 | ``` r
80 | # load / get `se.all` from somewhere
81 | fidx <- which(mcols(se.all)$name == "ERBB2")
82 | se <- se.all[, se.all$indication %in% c("BLCA", "BRCA")]
83 | 
84 | sdat <- data.frame(
85 |   ERBB2 = assay(se, "rnaseq_norm")[fidx,],
86 |   cnv_score_ERBB2 = assay(se, "cnv_score")[fidx,],
87 |   sex = se$sex,
88 |   indication = se$indication)
89 | 
90 | ggplot(fdat, aes(cnv_score_ERBB2, ERBB2, color=sex)) +
91 |   geom_point() +
92 |   facet_wrap(~ indication)
93 | ```
94 | 
95 | TODO: Show same analysis using MultiAssayEperiment
96 | 


--------------------------------------------------------------------------------
/man/sample-covariates.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/api.R, R/sample-covariates.R
  3 | \name{fetch_sample_covariates}
  4 | \alias{fetch_sample_covariates}
  5 | \alias{with_sample_covariates}
  6 | \alias{fetch_sample_covariates.FacileDataSet}
  7 | \alias{fetch_sample_covariates.facile_frame}
  8 | \title{Appends covariate columns to a query result}
  9 | \usage{
 10 | fetch_sample_covariates(
 11 |   x,
 12 |   samples = NULL,
 13 |   covariates = NULL,
 14 |   custom_key = Sys.getenv("USER"),
 15 |   with_source = FALSE,
 16 |   ...
 17 | )
 18 | 
 19 | with_sample_covariates(
 20 |   x,
 21 |   covariates = NULL,
 22 |   na.rm = FALSE,
 23 |   custom_key = Sys.getenv("USER"),
 24 |   .fds = NULL,
 25 |   ...
 26 | )
 27 | 
 28 | \method{fetch_sample_covariates}{FacileDataSet}(
 29 |   x,
 30 |   samples = NULL,
 31 |   covariates = NULL,
 32 |   custom_key = Sys.getenv("USER"),
 33 |   with_source = FALSE,
 34 |   ...
 35 | )
 36 | 
 37 | \method{fetch_sample_covariates}{facile_frame}(
 38 |   x,
 39 |   samples = NULL,
 40 |   covariates = NULL,
 41 |   custom_key = Sys.getenv("USER"),
 42 |   with_source = FALSE,
 43 |   ...
 44 | )
 45 | }
 46 | \arguments{
 47 | \item{x}{a \code{FacileDataSet} connection}
 48 | 
 49 | \item{samples}{a samples descriptor \code{tbl_*}}
 50 | 
 51 | \item{covariates}{character vector of covariate names}
 52 | 
 53 | \item{custom_key}{The key to use to fetch more custom annotations over
 54 | the given samples}
 55 | 
 56 | \item{na.rm}{if \code{TRUE}, filters outgoing result such that only rows
 57 | with nonNA values for the \code{covariates} specified here will be
 58 | returned. Default: \code{FALSE}. Note that this will not check columns
 59 | not specified in \code{covariates} for NA-ness.}
 60 | 
 61 | \item{.fds}{A \code{FacileDataSet} object}
 62 | }
 63 | \value{
 64 | The facile \code{x} object, annotated with the specified covariates.
 65 | 
 66 | rows from the \code{sample_covariate} table
 67 | }
 68 | \description{
 69 | Note that this function will force the collection of \code{x}
 70 | }
 71 | \seealso{
 72 | Other FacileInterface: 
 73 | \code{\link{facet_frame.FacileDataSet}()},
 74 | \code{\link{fetch_assay_score}()},
 75 | \code{\link{fetch_sample_statistics}()},
 76 | \code{\link{samples}()}
 77 | 
 78 | Other API: 
 79 | \code{\link{fetch_assay_score.FacileDataSet}()},
 80 | \code{\link{fetch_custom_sample_covariates.FacileDataSet}()},
 81 | \code{\link{fetch_sample_statistics.FacileDataSet}()},
 82 | \code{\link{fetch_samples.FacileDataSet}()},
 83 | \code{\link{filter_features.FacileDataSet}()},
 84 | \code{\link{filter_samples.FacileDataSet}()},
 85 | \code{\link{organism.FacileDataSet}()},
 86 | \code{\link{samples.FacileDataSet}()}
 87 | 
 88 | Other FacileInterface: 
 89 | \code{\link{facet_frame.FacileDataSet}()},
 90 | \code{\link{fetch_assay_score}()},
 91 | \code{\link{fetch_sample_statistics}()},
 92 | \code{\link{samples}()}
 93 | 
 94 | Other API: 
 95 | \code{\link{fetch_assay_score.FacileDataSet}()},
 96 | \code{\link{fetch_custom_sample_covariates.FacileDataSet}()},
 97 | \code{\link{fetch_sample_statistics.FacileDataSet}()},
 98 | \code{\link{fetch_samples.FacileDataSet}()},
 99 | \code{\link{filter_features.FacileDataSet}()},
100 | \code{\link{filter_samples.FacileDataSet}()},
101 | \code{\link{organism.FacileDataSet}()},
102 | \code{\link{samples.FacileDataSet}()}
103 | 
104 | Other API: 
105 | \code{\link{fetch_assay_score.FacileDataSet}()},
106 | \code{\link{fetch_custom_sample_covariates.FacileDataSet}()},
107 | \code{\link{fetch_sample_statistics.FacileDataSet}()},
108 | \code{\link{fetch_samples.FacileDataSet}()},
109 | \code{\link{filter_features.FacileDataSet}()},
110 | \code{\link{filter_samples.FacileDataSet}()},
111 | \code{\link{organism.FacileDataSet}()},
112 | \code{\link{samples.FacileDataSet}()}
113 | }
114 | \concept{API}
115 | \concept{FacileInterface}
116 | 


--------------------------------------------------------------------------------
/man/remove_batch_effect.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/remove_batch_effect.R
 3 | \name{remove_batch_effect}
 4 | \alias{remove_batch_effect}
 5 | \title{Regress out confounding variables from a data matrix.}
 6 | \usage{
 7 | remove_batch_effect(
 8 |   x,
 9 |   sample_info,
10 |   batch = NULL,
11 |   main = NULL,
12 |   maintain.rowmeans = FALSE,
13 |   ...
14 | )
15 | }
16 | \arguments{
17 | \item{x}{A matrix of values that needs to be corrected}
18 | 
19 | \item{sample_info}{a data.frame of covariate information for the data in \code{x}.
20 | The rows of \code{sample_info} are assumed to match the columns of \code{x}. This
21 | data.frame should have the covariates named in \code{batch} and \code{main} to use
22 | for the correction. If \code{sample_info} is a \code{facile_frame}, we will endeavor
23 | to pull any covariate named in \code{batch} and \code{main} that do not already
24 | appear in the columns of \code{sample_info}. Unlike limma's removeBatchEffect,
25 | we do not try to fish out the covariate values from anywhere in the
26 | "ether". They \emph{must} be found in this data.frame.}
27 | 
28 | \item{batch}{The column names in \code{sample_info} that specify the batch
29 | covariates in the data that will be regressed out.}
30 | 
31 | \item{main}{The name of a covaraite in \code{sample_info} that contains a known
32 | covariate that describes the "effect" of an experiment that should not
33 | be regressed out. Please refer to the Details section for more informaiton.}
34 | }
35 | \value{
36 | a corrected version of the data matrix \code{x}.
37 | }
38 | \description{
39 | Data \code{x} is assumed to be log-like, and this function provides a simplified
40 | interface to \code{\link[limma:removeBatchEffect]{limma::removeBatchEffect()}}.  The \code{batch} parameter replaces
41 | \code{batch}, \code{batch2}, and \code{covariates}. The \code{design} parameter is replaced with
42 | \code{main}. This function is mostly for use within the
43 | \code{fetch_assay_data(..., normalized = TRUE, batch = 'something')} pipeline,
44 | but refactored out here for general re-use.
45 | }
46 | \details{
47 | The \code{batch} and \code{main} parameters must be characters that will either
48 | reference already existing columns in the \code{sample_info}, or be covariates
49 | that can be retrieved from a FacileDataStore that is attached to the
50 | sample_info facile_frame.
51 | 
52 | We'll use these parameters to build a model.matrix with main and batch
53 | effect and follow the use of \code{removeBatchEffect} as outlined in the post
54 | linked to below to pull the design matrix apart and call the function with
55 | the corresponding \code{design} and \code{covariates} parameters:
56 | 
57 | https://support.bioconductor.org/p/83286/#83287
58 | 
59 | Setting the \code{batch.scale} parameter to \code{TRUE} (the default), ensures that
60 | the \code{rowMeans} of the returned data matrix are the same as the original
61 | dataset.
62 | }
63 | \section{Missing values in batch covariates}{
64 | 
65 | It can be that some of the levels of the \code{batch} and \code{main} covariates
66 | are missing \code{NA}. When these covariates are categorical, all missing values
67 | will be replaced with a dummy value using the logic from \code{\link[=freplace_na]{freplace_na()}}
68 | 
69 | If numeric covariates are missing, then this will throw an error.
70 | }
71 | 
72 | \examples{
73 | # We'll materialize a data matrix and sample_info table from the
74 | # exampleFacileDataSet, then correct the data matrix.
75 | efds <- exampleFacileDataSet()
76 | sample.info <- efds \%>\%
77 |   filter_samples(indication == "CRC") \%>\%
78 |   with_sample_covariates()
79 | m <- fetch_assay_data(sample.info, normalized = TRUE, as.matrix = TRUE)
80 | m.rmsex <- remove_batch_effect(m, sample.info, "sex")
81 | 
82 | # this functionality is called internally from fetch_assay_data to make
83 | # your life easy from within the facile ecosystem itself
84 | m2 <- fetch_assay_data(sample.info, normalized = TRUE,
85 |                        batch = "sex", as.matrix = TRUE)
86 | all.equal(m.rmsex, m2)
87 | }
88 | \seealso{
89 | \code{\link[=fetch_assay_data]{fetch_assay_data()}} when \code{batch = "something"}
90 | }
91 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | output: github_document
  3 | ---
  4 | 
  5 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  6 | 
  7 | ```{r, echo = FALSE}
  8 | knitr::opts_chunk$set(
  9 |   collapse = TRUE,
 10 |   echo = TRUE,
 11 |   message = FALSE,
 12 |   error = FALSE,
 13 |   comment = "#>",
 14 |   fig.path = "man/figures/"
 15 | )
 16 | ```
 17 | 
 18 | # FacileData
 19 | 
 20 | <!-- badges: start -->
 21 | [![R build status](https://github.com/facilebio/FacileData/workflows/R-CMD-check/badge.svg)](https://github.com/facilebio/FacileData/actions)
 22 | ![pkgdown](https://github.com/facilebio/FacileData/workflows/pkgdown/badge.svg)
 23 | [![Project Status](http://www.repostatus.org/badges/latest/active.svg)](http://www.repostatus.org/#active)
 24 | [![Lifecycle: Maturing](https://img.shields.io/badge/lifecycle-maturing-blue.svg)](https://www.tidyverse.org/lifecycle/#maturing)
 25 | [![codecov](https://codecov.io/gh/facilebio/FacileData/branch/master/graph/badge.svg)](https://codecov.io/gh/facilebio/FacileData)
 26 | <!-- badges: end -->
 27 | 
 28 | The `FacileData` package was written to facilitate easier analysis of large,
 29 | multi-assay high-throughput genomics datasets. To this end, the `FacileData`
 30 | package provides two things:
 31 | 
 32 | 1. A *FacileData Access API* that defines a fluent interface over multi-assay
 33 |    genomics datasets that fits into the [tidyverse][tidyverse]. This enables
 34 |    analysts to more naturally query and retrieve data for general exploratory
 35 |    data analysis; and
 36 | 2. A reference implementation of a datastore that implements the
 37 |    *FacileData Access API* called a *FacileDataSet*. The `FacileDataSet`
 38 |    provides efficient storage and retrieval of arbitrarily large high-throughput
 39 |    genomics datasets. For example, a single `FacileDataSet` can be used to store
 40 |    *all* of the RNA-seq, microarray, RPPA, etc. data from the
 41 |    [The Cancer Genome Atlas][tcga]. This singular `FacileDataSet` allows
 42 |    analysts easy access to arbitrary subsets of these data without having to
 43 |    load all of it into memory.
 44 | 
 45 | [tcga]: https://cancergenome.nih.gov/
 46 | [tidyverse]: https://www.tidyverse.org/
 47 | 
 48 | # Installation
 49 | 
 50 | The FacileData suite of packages is only available from github from now. You
 51 | will want to install three `FacileData*` packages to appreciate the its utility:
 52 | 
 53 | ```{r gh-installation, eval = FALSE}
 54 | # install.packages("devtools")
 55 | devtools::install_github("facilebio/FacileData")
 56 | ```
 57 | 
 58 | # Example Usage
 59 | 
 60 | As a teaser, we'll show how to plot HER2 copy number vs expression across the
 61 | TCGA bladder and breast indications ("BLCA" and "BRCA") using a `FacileDataSet`.
 62 | 
 63 | ```{r her2-cnv-expression, eval = FALSE}
 64 | library(ggplot2)
 65 | library(FacileData)
 66 | library(FacileTCGADataSet)
 67 | tcga <- FacileTCGADataSet()
 68 | 
 69 | features <- filter_features(tcga, name == "ERBB2")
 70 | 
 71 | fdat <- tcga %>%
 72 |   filter_samples(indication %in% c("BLCA", "BRCA")) %>%
 73 |   with_assay_data(features, assay_name = "rnaseq", normalized = TRUE) %>%
 74 |   with_assay_data(features, assay_name = "cnv_score") %>%
 75 |   with_sample_covariates(c("indication", "sex"))
 76 | 
 77 | ggplot(fdat, aes(cnv_score_ERBB2, ERBB2, color = sex)) +
 78 |   geom_point() +
 79 |   facet_wrap(~ indication)
 80 | ```
 81 | 
 82 | <img src="man/figures/her2_cnv_vs_expression.png" width="66%" />
 83 | 
 84 | Let's compare how you might do the same using data stored in a
 85 | `SummarizedExperiment` named `se.tcga` that stores RNA-seq (raw and normalized)
 86 | and copy number data.
 87 | 
 88 | ```{r example-sumexp, eval = FALSE}
 89 | # load / get `se.all` from somewhere
 90 | fidx <- which(mcols(se.all)$name == "ERBB2")
 91 | se <- se.all[, se.all$indication %in% c("BLCA", "BRCA")]
 92 | 
 93 | sdat <- data.frame(
 94 |   ERBB2 = assay(se, "rnaseq_norm")[fidx,],
 95 |   cnv_score_ERBB2 = assay(se, "cnv_score")[fidx,],
 96 |   sex = se$sex,
 97 |   indication = se$indication)
 98 | 
 99 | ggplot(fdat, aes(cnv_score_ERBB2, ERBB2, color=sex)) +
100 |   geom_point() +
101 |   facet_wrap(~ indication)
102 | ```
103 | 
104 | TODO: Show same analysis using MultiAssayEperiment
105 | 


--------------------------------------------------------------------------------
/tests/testthat/test-bioc-assay-containers.R:
--------------------------------------------------------------------------------
  1 | context("Exercise as.DGEList (deprecated in favor of biocbiox())")
  2 | 
  3 | if (!exists("FDS")) FDS <- exampleFacileDataSet()
  4 | 
  5 | samples <- sample_covariate_tbl(FDS) %>%
  6 |   dplyr::filter(variable == 'stage' & value == 'III') %>%
  7 |   dplyr::select(dataset, sample_id)
  8 | genes <- local({
  9 |   out <- c("800", "1009", "1289", "50509", "2191", "2335", "5159")
 10 |   feature_info_tbl(FDS) %>%
 11 |     dplyr::filter(feature_id %in% out) %>%
 12 |     dplyr::collect() %>%
 13 |     dplyr::pull(feature_id)
 14 | })
 15 | 
 16 | test_that("fetch_assay_data results converted to DGEList", {
 17 |   e <- fetch_assay_data(FDS, genes, samples)
 18 |   y <- as.DGEList(e)
 19 |   expect_is(y, 'DGEList')
 20 | 
 21 |   ## check samples
 22 |   expect_is(y$samples, 'data.frame')
 23 |   expect_true(setequal(y$samples$sample_id, collect(samples)$sample_id))
 24 |   expect_type(y$samples$norm.factors, 'double')
 25 |   expect_type(y$samples$lib.size, 'double')
 26 |   expect_type(y$samples$dataset, 'character')
 27 |   expect_type(y$samples$sample_id, 'character')
 28 | 
 29 |   expect_is(y$genes, 'data.frame')
 30 |   expect_type(y$genes$feature_id, 'character')
 31 |   expect_true(setequal(y$genes$feature_id, genes))
 32 |   expect_type(y$genes$symbol, 'character')
 33 | 
 34 |   ## Check that counts match up in DGEList as they would from raw matrix fetch
 35 |   m <- fetch_assay_data(FDS, genes, samples, normalize=FALSE, as.matrix=TRUE)
 36 |   expect_true(setequal(rownames(m), rownames(y)))
 37 |   expect_true(setequal(colnames(m), colnames(y)))
 38 |   expect_equal(m[rownames(y), colnames(y)], y$counts)
 39 | })
 40 | 
 41 | test_that("as.DGEList appends custom covariate table correctly", {
 42 |   custom.covs <- c("sex", "subtype_molecular")
 43 |   with.covs <- with_sample_covariates(samples, custom.covs)
 44 | 
 45 |   y.ref <- as.DGEList(with.covs)
 46 |   y.test <- as.DGEList(with.covs, covariates = with.covs)
 47 | 
 48 |   expect_equal(dim(y.test), dim(y.ref))
 49 |   expect_equal(colnames(y.test), colnames(y.ref))
 50 | 
 51 |   expected.scols <- c(
 52 |     "group", "lib.size", "norm.factors", "dataset", "sample_id", "samid",
 53 |     custom.covs)
 54 | 
 55 |   expect_set_equal(colnames(y.test$samples), expected.scols)
 56 | 
 57 |   for (cov in custom.covs) {
 58 |     expect_equal(y.test$samples[[cov]], y.ref$samples[[cov]],
 59 |                  info = paste("sample <-> custom covariate match:", cov))
 60 |   }
 61 | })
 62 | 
 63 | test_that("as.DGEList with custom lib.size and norm.factors works", {
 64 |   set.seed(100)
 65 |   y.all <- as.DGEList(samples)
 66 |   y.some <- y.all[sample(nrow(y.all), 1000),, keep.lib.sizes = FALSE]
 67 |   y.some <- edgeR::calcNormFactors(y.some)
 68 | 
 69 |   assert_true(all(y.all$samples$lib.size > y.some$samples$lib.size))
 70 |   assert_false(any(y.all$samples$norm.factors == y.some$samples$norm.factors))
 71 | 
 72 |   # cpm calculation with stored libsize and normfactors
 73 |   cpms.orig <- fetch_assay_data(samples, features = rownames(y.some),
 74 |                                 as.matrix = TRUE, normalized = TRUE, log = TRUE,
 75 |                                 prior.count = 2)
 76 | 
 77 |   # Add custom lib.size and norm.factors to sample facie_frame
 78 |   samples. <- samples %>%
 79 |     collect() %>%
 80 |     left_join(select(y.some$samples, sample_id, lib.size, norm.factors),
 81 |               by = "sample_id")
 82 | 
 83 |   cpms.f <- fetch_assay_data(samples., features = rownames(y.some),
 84 |                              as.matrix = TRUE, normalized = TRUE, log = TRUE,
 85 |                              prior.count = 2)
 86 |   expect_equal(rownames(cpms.orig), rownames(cpms.f))
 87 |   expect_equal(colnames(cpms.orig), colnames(cpms.f))
 88 | 
 89 |   # This is an explicit test to make sure that the differences in the CPMs are
 90 |   # not zero
 91 |   mean.diff.orig <- mean(abs(cpms.f - cpms.orig))
 92 |   expect_gt(mean.diff.orig, 0)
 93 | 
 94 |   cpms.e1 <- edgeR::cpm(y.some, log = TRUE, prior.count = 2)
 95 |   expect_setequal(rownames(cpms.f), rownames(cpms.e1))
 96 |   expect_setequal(colnames(cpms.f), colnames(cpms.e1))
 97 |   cpms.f <- cpms.f[rownames(cpms.e1), colnames(cpms.e1)]
 98 |   mean.diff.new <- mean(abs(cpms.f - cpms.e1))
 99 |   expect_equal(mean.diff.new, 0)
100 | })
101 | 


--------------------------------------------------------------------------------
/R/NSE-filter-samples.R:
--------------------------------------------------------------------------------
  1 | #' Filter against the sample_covariate_tbl EAV table as if it were wide.
  2 | #'
  3 | #' This allows the user to query the `FacileDataSet` as if it were a wide
  4 | #' `pData` `data.frame` of all its covariates.
  5 | #'
  6 | #' This feature is only really meant to be
  7 | #' used interactively, and with extreme caution ... programatically specifying
  8 | #' the covariates, for instance, does not work right now.
  9 | #'
 10 | #' TODO: Implement using `tidyeval`
 11 | #'
 12 | #' @export
 13 | #' @family API
 14 | #'
 15 | #' @param x A `FacileDataSet`
 16 | #' @param ... NSE claused to use in [dplyr::filter()] expressions
 17 | #' @return a sample-descriptor `data.frame` that includes the dataset,sample_id
 18 | #'   pairs that match the virtual `filter(covaries, ...)` clause executed here.
 19 | #'
 20 | #' @examples
 21 | #' fds <- exampleFacileDataSet()
 22 | #'
 23 | #' # To identify all samples that are of "CMS3" or "CMS4" subtype(
 24 | #' # stored in the "subtype_crc_cms" covariate:
 25 | #' crc.34 <- filter_samples(fds, subtype_crc_cms %in% c("CMS3", "CMS4"))
 26 | #' eav.query <- fds %>%
 27 | #'   fetch_sample_covariates(covariates = "subtype_crc_cms") %>%
 28 | #'   filter(value %in% c("CMS3", "CMS4")) %>%
 29 | #'   collect()
 30 | #' setequal(crc.34$sample_id, eav.query$sample_id)
 31 | #'
 32 | #' # You can keep filtering a filtered dataset
 33 | #' crc.34.male <- filter_samples(crc.34, sex == "m")
 34 | filter_samples.FacileDataSet <- function(x, ..., samples. = samples(x),
 35 |                                          custom_key = Sys.getenv("USER"),
 36 |                                          with_covariates = FALSE) {
 37 |   # cov.table <- .create_wide_covariate_table(x, dots)
 38 |   # out <- dplyr::filter_(cov.table, .dots=dots)
 39 | 
 40 |   force(samples.)
 41 |   assert_sample_subset(samples.)
 42 | 
 43 |   cov.table <- .create_wide_covariate_table(x, samples., ...,
 44 |                                             custom_key = custom_key)
 45 |   out <- filter(cov.table, ...)
 46 |   if (!with_covariates) {
 47 |     out <- select(out, dataset, sample_id)
 48 |   }
 49 |   if (nrow(out) == 0L) {
 50 |     warning("All samples have been filtered out", immediate. = TRUE)
 51 |   }
 52 |   as_facile_frame(out, x)
 53 | }
 54 | 
 55 | #' @noRd
 56 | #' @export
 57 | filter_samples.facile_frame <- function(x, ...,
 58 |                                         custom_key = Sys.getenv("USER"),
 59 |                                         with_covariates = FALSE) {
 60 |   .fds <- assert_facile_data_store(fds(x))
 61 |   assert_sample_subset(x)
 62 |   filter_samples(.fds, ..., samples. = x, custom_key = custom_key,
 63 |                  with_covariates = with_covariates)
 64 | }
 65 | 
 66 | #' @noRd
 67 | #' @importFrom lazyeval lazy_dots
 68 | .create_wide_covariate_table <- function(x, samples, ...,
 69 |                                          custom_key = Sys.getenv("USER")) {
 70 |   assert_facile_data_store(x)
 71 |   assert_sample_subset(samples)
 72 | 
 73 |   out <- fetch_sample_covariates(x, samples = samples, custom_key = custom_key)
 74 |   dots <- lazy_dots(...)
 75 |   qvars <- .parse_filter_vars(x, dots)
 76 | 
 77 |   # TODO: check if any of the query variables are dataset or sample_id, then
 78 |   # fiter `out` on the dataset or sample_id columns, THEN play with the
 79 |   # other sample covariates (sc)
 80 |   pk.vars <- intersect(qvars, c("dataset", "sample_id"))
 81 |   # if (length(pk.vars)) {
 82 |   #   out <- filter(out, pk.part.of.query)
 83 |   # }
 84 | 
 85 |   sc.vars <- setdiff(qvars, c("dataset", "sample_id"))
 86 |   if (length(sc.vars)) {
 87 |     out <- filter(out, variable %in% !!qvars)
 88 |   }
 89 |   out %>%
 90 |     spread_covariates() %>%
 91 |     distinct(dataset, sample_id, .keep_all = TRUE)
 92 | }
 93 | 
 94 | #' @noRd
 95 | #' @importFrom lazyeval auto_name
 96 | .parse_filter_vars <- function(x, dots) {
 97 |   assert_facile_data_store(x)
 98 |   stopifnot(is(dots, 'lazy_dots'))
 99 | 
100 |   all.vars <- sample_covariate_tbl(x) %>%
101 |     distinct(variable) %>%
102 |     collect(n=Inf)
103 |   all.vars <- c(all.vars$variable, "dataset", "sample_id")
104 | 
105 |   dot.exprs <- names(auto_name(dots))
106 |   hits <- sapply(all.vars, function(var) any(grepl(var, dot.exprs)))
107 |   out <- names(hits)[hits]
108 |   if (length(out) == 0) {
109 |     stop("No sample covariates found in query: ",
110 |          paste(dot.exprs, collapse=';'))
111 |   }
112 |   out
113 | }
114 | 


--------------------------------------------------------------------------------
/man/FacileDataSet.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/FacileDataSet.R
  3 | \name{FacileDataSet}
  4 | \alias{FacileDataSet}
  5 | \title{Instantiates a FacileDataSet object from disk.}
  6 | \usage{
  7 | FacileDataSet(
  8 |   path,
  9 |   data.fn = NULL,
 10 |   sqlite.fn = NULL,
 11 |   hdf5.fn = NULL,
 12 |   meta.fn = NULL,
 13 |   anno.dir = NULL,
 14 |   cache_size = 80000,
 15 |   db.loc = c("reference", "temporary", "memory"),
 16 |   ...
 17 | )
 18 | }
 19 | \arguments{
 20 | \item{path}{The path to the FacileData repository}
 21 | 
 22 | \item{data.fn}{A custom path to the database (probably don't mess with this)}
 23 | 
 24 | \item{sqlite.fn}{name of SQLite data file in FacileDataSet}
 25 | 
 26 | \item{hdf5.fn}{name of HDF5 data file in FacileDataSet}
 27 | 
 28 | \item{meta.fn}{name of metadata YAML data file in FacileDataSet}
 29 | 
 30 | \item{anno.dir}{A directory to house custom annotations/sample covariates}
 31 | 
 32 | \item{cache_size}{A custom paramter for the SQLite database}
 33 | 
 34 | \item{db.loc}{single character, location for the data}
 35 | 
 36 | \item{...}{other args to pass down, not used at the moment}
 37 | 
 38 | \item{covdef.fn}{A custom path to the yaml file that has covariate mapping info}
 39 | }
 40 | \value{
 41 | a \code{FacileDataSet} object
 42 | }
 43 | \description{
 44 | The \code{FacileDataSet} is a reference data storage implementation that
 45 | implements the \strong{FacileData Access API}. It facilitates the storage and
 46 | retrieval of large amounts of data by leveraging a SQLite database to store
 47 | sample- and feature-level metadata ("\code{pData}" and "\code{fData}"), and an HDF5
 48 | file to store all of the dense assay (matrix) data (gene counts, microarray
 49 | intensities, etc.) run over the samples.
 50 | }
 51 | \details{
 52 | A \code{FacileDataSet} is materialized on disk by a well-structured directory,
 53 | which minimally includes the following items:
 54 | \enumerate{
 55 | \item A \code{data.sqlite} SQLite database that stores feature and sample metadata
 56 | \item A \code{data.h5} HDF5 file that stores a multitude of dense assay matrices that
 57 | are generated from the assays performed on the samples in the
 58 | \code{FacileDataSet}.
 59 | \item A \code{meta.yaml} file tha contains informaiton about the \code{FacileDataSet}.
 60 | To better understand the structure and contents of this file, you can
 61 | refer to the following:
 62 | a. The included \code{testdata/expected-meta.yaml} file, which is an
 63 | exemplar file for \code{\link[=exampleFacileDataSet]{exampleFacileDataSet()}}.
 64 | b. The help file provided by the \code{\link[=eav_metadata_create]{eav_metadata_create()}} function, which
 65 | describes in greater detail how we track a dataset's sample-level
 66 | covariates (aka, "pData" in the bioconductor world).
 67 | In the meantime, a short description of the entries found in the
 68 | \code{meta.yaml} file is provded here:
 69 | \itemize{
 70 | \item \code{name}: the name of the dataset (ie. \code{"FacileTCGADataSet"})
 71 | \item \code{organism}: \code{"Homo sapiens"}, \code{"Mus musculus"}, ec.
 72 | \item \code{default_assay}: the name of the assay to use by default if none is
 73 | specified in calls to \code{\link[=fetch_assay_data]{fetch_assay_data()}}, \code{\link[=with_assay_data]{with_assay_data()}}, etc.
 74 | (kind of like how \code{"exprs"} is the default assay used when working with
 75 | a \code{Biobase::ExpressionSet})
 76 | \item \code{datasets}: a section tha enumerates the datases included internally.
 77 | The datasets are further enumerated.
 78 | \item \code{sample_covariates}: a section that enumerates the covariatets that
 79 | are tracked over the samples inside the \code{FacileDataSet} (ie. a mapping
 80 | of the \code{pData} for the samples). Reference \code{\link[=eav_metadata_create]{eav_metadata_create()}}
 81 | for more information.
 82 | }
 83 | \item A \code{custom-annotation} directory, which stores custom \code{sample_covariate}
 84 | (aka "pData") informaiton that analysts can identify and describe during
 85 | the course of an analysis, or even add from external sources. Although
 86 | this directory is required in the directory structure of a valid
 87 | \code{FacileDataSet}, the \code{FacileDataSet()} constructor can be called with
 88 | a custom \code{anno.dir} parameter so that custom annotations are stored
 89 | elsewhere.
 90 | }
 91 | }
 92 | \examples{
 93 | fn <- system.file("extdata", "exampleFacileDataSet", package = "FacileData")
 94 | fds <- FacileDataSet(fn)
 95 | }
 96 | \seealso{
 97 | Other FacileDataSet: 
 98 | \code{\link{dbfn}()},
 99 | \code{\link{hdf5fn}()},
100 | \code{\link{meta_file}()}
101 | }
102 | \concept{FacileDataSet}
103 | 


--------------------------------------------------------------------------------
/man/fetch_assay_data.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/api.R, R/assay-data.R
  3 | \name{fetch_assay_data}
  4 | \alias{fetch_assay_data}
  5 | \alias{with_assay_data.facile_frame}
  6 | \title{Fetch assay data from single assay of choice}
  7 | \usage{
  8 | fetch_assay_data(
  9 |   x,
 10 |   features,
 11 |   samples = NULL,
 12 |   assay_name = ndefault_assay(x),
 13 |   normalized = FALSE,
 14 |   batch = NULL,
 15 |   main = NULL,
 16 |   as.matrix = FALSE,
 17 |   ...,
 18 |   subset.threshold = 700,
 19 |   aggregate = FALSE,
 20 |   aggregate.by = "ewm",
 21 |   verbose = FALSE
 22 | )
 23 | 
 24 | \method{with_assay_data}{facile_frame}(
 25 |   x,
 26 |   features,
 27 |   assay_name = NULL,
 28 |   normalized = TRUE,
 29 |   aggregate = FALSE,
 30 |   aggregate.by = "ewm",
 31 |   spread = TRUE,
 32 |   with_assay_name = FALSE,
 33 |   ...,
 34 |   verbose = FALSE,
 35 |   .fds = fds(x)
 36 | )
 37 | }
 38 | \arguments{
 39 | \item{x}{A \code{FacileDataSrote} object, or \code{facile_frame}}
 40 | 
 41 | \item{features}{a feature descriptor (data.frame with assay and feature_id
 42 | columms)}
 43 | 
 44 | \item{samples}{a samples descriptor}
 45 | 
 46 | \item{assay_name}{the name of the assay to fetch data from. Defaults to the
 47 | value of \code{\link[=default_assay]{default_assay()}} for \code{x}. Must be a subset of \code{assay_names(x)}.}
 48 | 
 49 | \item{normalized}{return normalize or raw data values, defaults to \code{FALSE}.
 50 | This is only really "functional" for for \code{assay_type = "rnaseq"} types
 51 | of assays, where the normalized data is log2(CPM). These values can
 52 | be tweaked with \code{log = (TRUE|FALSE)} and \code{prior.count} parameters, which
 53 | can passed down internally to (eventually) \code{\link[edgeR:cpm]{edgeR::cpm()}}.}
 54 | 
 55 | \item{batch}{The column names in \code{sample_info} that specify the batch
 56 | covariates in the data that will be regressed out.}
 57 | 
 58 | \item{main}{The name of a covaraite in \code{sample_info} that contains a known
 59 | covariate that describes the "effect" of an experiment that should not
 60 | be regressed out. Please refer to the Details section for more informaiton.}
 61 | 
 62 | \item{as.matrix}{by default, the data is returned in a long-form tbl-like
 63 | result. If set to \code{TRUE}, the data is returned as a matrix.}
 64 | 
 65 | \item{...}{parameters to pass to normalization methods}
 66 | 
 67 | \item{subset.threshold}{sometimes fetching all the genes is faster than
 68 | trying to subset. We have to figure out why that is, but I've previously
 69 | tested random features of different lengths, and around 700 features was
 70 | the elbow.}
 71 | 
 72 | \item{aggregate.by}{do you want individual level results or geneset
 73 | scores? Use 'ewm' for eigenWeightedMean, and that's all.}
 74 | 
 75 | \item{.fds}{A \code{FacileDataSet} object}
 76 | 
 77 | \item{feature_ids}{character vector of feature_ids}
 78 | 
 79 | \item{with_symbols}{Do you want gene symbols returned, too?}
 80 | }
 81 | \value{
 82 | A \code{tibble} (lazy or not) with assay data.
 83 | 
 84 | a tbl-like result
 85 | }
 86 | \description{
 87 | The \verb{(fetch|with)_assay_data} functions are some of the main workhose
 88 | functions of the facile ecosystem. These calls enable you to retrieve
 89 | raw and noramlized assay data from a FacileData container.
 90 | }
 91 | \details{
 92 | \code{fetch_assay_data(x, ...)} will return the data in long form.
 93 | \code{with_assay_data(x, ...)} is most typically used when you already have
 94 | a dataset \code{x} (a \code{facile_frame}) that you want to decorate with more assay
 95 | data. The assay data asked for will be appended on to \code{x} in wide format.
 96 | Because \code{fetch} is (most often) used at a lower level of granularity,
 97 | \code{normalize} is by default set to \code{FALSE}, while it is set to \code{TRUE} in
 98 | \code{with_assay_data}.
 99 | }
100 | \section{Removing Batch Effects}{
101 | 
102 | When normalized data is returned, we assume these data are log-like, and you
103 | have the option to regress out batch effects using our
104 | \code{\link[=remove_batch_effect]{remove_batch_effect()}} wrapper to \code{\link[limma:removeBatchEffect]{limma::removeBatchEffect()}}.
105 | }
106 | 
107 | \examples{
108 | samples <- exampleFacileDataSet() \%>\%
109 |   filter_samples(indication == "BLCA", sample_type == "tumor")
110 | features <- c(PRF1='5551', GZMA='3001', CD274='29126')
111 | dat <- with_assay_data(samples, features, normalized = TRUE, batch = "sex")
112 | dat <- with_assay_data(samples, features, normalized = TRUE,
113 |                        batch = c("sex", "stage"))
114 | dat <- with_assay_data(samples, features, normealized = TRUE,
115 |                        batch = c("sex", "stage"), main = "sample_type")
116 | }
117 | 


--------------------------------------------------------------------------------
/R/assemble_example_dataset.R:
--------------------------------------------------------------------------------
  1 | #' Assembles an example facile dataset to play with
  2 | #'
  3 | #' This combines the airway and parathyroidSE RNA-seq datasets into a single
  4 | #' FacileDataSet.
  5 | #'
  6 | #' The code here is extracted from the `FacileDataSet-assembly` vignette. Please
  7 | #' read that for some of the why's and how's of the decisions made here when
  8 | #' assembling datasets.
  9 | #'
 10 | #' @export
 11 | #' @param directory The name of the parent directory to hold the dataset
 12 | #' @param name A subdirectory within `directory` will be created using this
 13 | #'   name.
 14 | #' @return The FacileDataSet object itself.
 15 | assemble_example_dataset <- function(directory = tempdir(),
 16 |                                      name = "ExampleRnaFacileDataSet") {
 17 |   assert_directory_exists(directory, "w")
 18 |   full.path <- file.path(directory, name)
 19 |   if (file.exists(full.path)) {
 20 |     stop("The output directory already exists, remove it to recreate the ",
 21 |          "dataset:\n  ", full.path)
 22 |   }
 23 |   message("Assembling dataset into: ", full.path)
 24 | 
 25 |   ns <- tryCatch(loadNamespace("SummarizedExperiment"), error = function(e) NULL)
 26 |   if (is.null(ns)) stop("SummarizedExperiment required")
 27 |   ns4 <- tryCatch(loadNamespace("S4Vectors"), error = function(e) NULL)
 28 |   if (is.null(ns4)) stop("S4Vectors required")
 29 | 
 30 |   # Load Data ..................................................................
 31 |   dat.env <- new.env()
 32 |   tryCatch({
 33 |     data("airway", package = "airway", envir = dat.env)
 34 |   }, error = function(e) stop("The airway package is required"))
 35 |   tryCatch({
 36 |     data("parathyroidGenesSE", package = "parathyroidSE", envir = dat.env)
 37 |   }, error = function(e) stop("The parathyroidSE package is required"))
 38 | 
 39 |   # Munge colData ..............................................................
 40 |   se.airway <- dat.env[["airway"]]
 41 |   cd.airway <- local({
 42 |     cd <- ns$colData(dat.env[["airway"]]) %>%
 43 |       as.data.frame() %>%
 44 |       transmute(
 45 |         sample_type = "cell_line",
 46 |         cell_line = cell,
 47 |         treatment = ifelse(dex == "untrt", "control", "dex")) %>%
 48 |       ns4$DataFrame()
 49 |     rownames(cd) <- colnames(se.airway)
 50 |     cd
 51 |   })
 52 |   se.airway <- ns$`colData<-`(se.airway, value = cd.airway)
 53 | 
 54 |   se.parathyroid <- dat.env[["parathyroidGenesSE"]]
 55 |   cd.parathyroid <- local({
 56 |     cd <- ns$colData(dat.env[["parathyroidGenesSE"]]) %>%
 57 |       as.data.frame() %>%
 58 |       transmute(
 59 |         sample_type = "primary",
 60 |         subject_id = paste0("patient_", patient),
 61 |         treatment = tolower(as.character(treatment)),
 62 |         time = paste0("hrs", sub("h$", "", time))) %>%
 63 |       ns4$DataFrame()
 64 |     rownames(cd) <- colnames(se.parathyroid)
 65 |     cd
 66 |   })
 67 |   se.parathyroid <- ns$`colData<-`(se.parathyroid, value = cd.parathyroid)
 68 | 
 69 |   # Munge rowData ..............................................................
 70 |   mart.info <- local({
 71 |     fn <- system.file("extdata", "ensembl-v75-gene-info.csv.gz",
 72 |                       package = "FacileData")
 73 |     con <- gzfile(fn, "rt")
 74 |     on.exit(close.connection(con))
 75 |     read.csv(con, stringsAsFactors = FALSE)
 76 |   })
 77 | 
 78 |   shared.ids <- intersect(rownames(se.airway), rownames(se.parathyroid))
 79 |   gene.info <- mart.info %>%
 80 |     transmute(feature_id = ensembl_gene_id,
 81 |               feature_type = "ensgid",
 82 |               name = hgnc_symbol,
 83 |               meta = gene_biotype,
 84 |               source = "Ensembl_v75") %>%
 85 |     filter(feature_id %in% shared.ids) %>%
 86 |     distinct(feature_id, .keep_all = TRUE)
 87 |     # ns4$DataFrame()
 88 |   rownames(gene.info) <- gene.info[["feature_id"]]
 89 | 
 90 |   # I assemble these into DGELists because I can't figure out how to get
 91 |   # SummarizedExperiment subsetting working without using the loadedNamespace
 92 |   # mojo ... I'm dying on the inside here.
 93 |   #
 94 |   # Obviously I'm doing something wrong, but ... damn, y0 ... damn.
 95 | 
 96 |   # se.subfn <- selectMethod("[", c("SummarizedExperiment", "ANY", "ANY"))
 97 | 
 98 |   # se.airway <- se.airway[rownames(gene.info),]
 99 |   # se.airway <- se.subfn(se.airway, rownames(gene.info))
100 |   # se.airway <- ns$`rowData<-`(se.airway, value = gene.info)
101 |   #
102 |   # # se.parathyroid <- se.parathyroid[rownames(gene.info),]
103 |   # se.parathyroid <- se.subfn(se.parathyroid, rownames(gene.info))
104 |   # se.parathyroid <- ns$`rowData<-`(se.parathyroid, value = gene.info)
105 |   #
106 |   # dat.all <- list(airway = se.airway, parathyroid = se.parathyroid)
107 | 
108 |   # gene.info <- ns4$as.data.frame(gene.info)
109 | 
110 |   y.airway <- edgeR::DGEList(
111 |     counts = ns$assay(se.airway)[rownames(gene.info), ],
112 |     samples = ns4$as.data.frame.DataTable(ns$colData(se.airway)),
113 |     genes = gene.info)
114 | 
115 |   y.para <- edgeR::DGEList(
116 |     counts = ns$assay(se.parathyroid)[rownames(gene.info), ],
117 |     samples = ns4$as.data.frame.DataTable(ns$colData(se.parathyroid)),
118 |     genes = gene.info)
119 | 
120 |   dat.all <- list(airway = y.airway, parathyroid = y.para)
121 | 
122 |   xfds <- as.FacileDataSet(
123 |     dat.all,
124 |     path = full.path,
125 |     dataset_name = name,
126 |     assay_name = "gene_counts",
127 |     assay_description = "Gene counts provided by Bioconductor data packages",
128 |     assay_type = "rnaseq",
129 |     organism = "Homo sapiens")
130 | 
131 |   xfds
132 | }
133 | 


--------------------------------------------------------------------------------
/man/as.BiocContainer.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/as.BiocAssayContainers.R
  3 | \name{as.DGEList}
  4 | \alias{as.DGEList}
  5 | \alias{as.DGEList.data.frame}
  6 | \alias{as.DGEList.tbl}
  7 | \alias{as.DGEList.facile_frame}
  8 | \alias{as.DGEList.FacileDataSet}
  9 | \alias{as.ExpressionSet}
 10 | \alias{as.ExpressionSet.data.frame}
 11 | \alias{as.ExpressionSet.FacileDataSet}
 12 | \alias{as.SummarizedExperiment}
 13 | \alias{as.SummarizedExperiment.data.frame}
 14 | \alias{as.SummarizedExperiment.FacileDataSet}
 15 | \title{Converts a "facile result" to a traditional Bioconductor assay container.}
 16 | \usage{
 17 | as.DGEList(x, ...)
 18 | 
 19 | \method{as.DGEList}{data.frame}(
 20 |   x,
 21 |   covariates = TRUE,
 22 |   feature_ids = NULL,
 23 |   assay_name = NULL,
 24 |   .fds = NULL,
 25 |   custom_key = Sys.getenv("USER"),
 26 |   ...
 27 | )
 28 | 
 29 | \method{as.DGEList}{tbl}(
 30 |   x,
 31 |   covariates = TRUE,
 32 |   feature_ids = NULL,
 33 |   assay_name = NULL,
 34 |   .fds = NULL,
 35 |   custom_key = Sys.getenv("USER"),
 36 |   ...
 37 | )
 38 | 
 39 | \method{as.DGEList}{facile_frame}(
 40 |   x,
 41 |   covariates = TRUE,
 42 |   feature_ids = NULL,
 43 |   assay_name = NULL,
 44 |   custom_key = Sys.getenv("USER"),
 45 |   ...
 46 | )
 47 | 
 48 | \method{as.DGEList}{FacileDataSet}(
 49 |   x,
 50 |   covariates = TRUE,
 51 |   feature_ids = NULL,
 52 |   assay_name = NULL,
 53 |   custom_key = Sys.getenv("USER"),
 54 |   ...
 55 | )
 56 | 
 57 | as.ExpressionSet(x, ...)
 58 | 
 59 | \method{as.ExpressionSet}{data.frame}(
 60 |   x,
 61 |   covariates = TRUE,
 62 |   feature_ids = NULL,
 63 |   assay_name = default_assay(.fds),
 64 |   .fds = fds(x),
 65 |   custom_key = Sys.getenv("USER"),
 66 |   ...
 67 | )
 68 | 
 69 | \method{as.ExpressionSet}{FacileDataSet}(
 70 |   x,
 71 |   covariates = TRUE,
 72 |   feature_ids = NULL,
 73 |   assay_name = default_assay(.fds),
 74 |   .fds = fds(x),
 75 |   custom_key = Sys.getenv("USER"),
 76 |   ...
 77 | )
 78 | 
 79 | as.SummarizedExperiment(x, ...)
 80 | 
 81 | \method{as.SummarizedExperiment}{data.frame}(
 82 |   x,
 83 |   covariates = TRUE,
 84 |   feature_ids = NULL,
 85 |   assay_name = default_assay(.fds),
 86 |   .fds = fds(x),
 87 |   custom_key = Sys.getenv("USER"),
 88 |   ...
 89 | )
 90 | 
 91 | \method{as.SummarizedExperiment}{FacileDataSet}(
 92 |   x,
 93 |   covariates = TRUE,
 94 |   feature_ids = NULL,
 95 |   assay_name = default_assay(.fds),
 96 |   .fds = fds(x),
 97 |   custom_key = Sys.getenv("USER"),
 98 |   ...
 99 | )
100 | }
101 | \arguments{
102 | \item{x}{a facile expression-like result}
103 | 
104 | \item{covariates}{The covariates the user wants to add to the $samples of
105 | the DGEList. This can take the following forms:
106 | \itemize{
107 | \item \code{TRUE}: All covariates are retrieved from the \code{FacileDataSet}
108 | \item \code{FALSE}: TODO: Better handle FALSE
109 | \item \code{character}: A vector of covariate names to fetch from the
110 | \code{FacileDataSet}. Must be elements of \code{names(sample_definitions(x))}
111 | \item \code{data.frame}: A wide covariate table (dataset, sample_id, covariates ...)
112 | This may be external covariates for samples not available within
113 | \code{x} (yet), ie. a table of covariates provided by a third party.
114 | \item \code{NULL}: do not decorate with \emph{any} covariates.
115 | }}
116 | 
117 | \item{feature_ids}{the features to get expression for (if not specified
118 | in \code{x} descriptor). These correspond to the elements found in the
119 | \code{feature_info_tbl(x)$feature_id} column.}
120 | 
121 | \item{assay_name}{the name of the assay matrix to use when populating the
122 | default assay matrix of the bioconductor container (the \verb{$counts}
123 | matrix of a \code{DGEList}, the \code{exprs()} of an \code{ExpressionSet}, etc.).
124 | The default value is the entry provided by \code{\link[=default_assay]{default_assay()}}}
125 | 
126 | \item{.fds}{The \code{FacileDataSet} that \code{x} was retrieved from}
127 | 
128 | \item{custom_key}{the custom key to use to fetch custom annotations from
129 | \code{.fds}}
130 | }
131 | \value{
132 | the appropriate bioconductor assay container, ie. an \code{edgeR::DGEList}
133 | for \code{as.DGEList}, a \code{Biobase::ExpressionSet} for \code{as.ExpressionSet}, or
134 | a \code{SummarizedExperiment::SummarizedExperiment} for
135 | \code{as.SummarizedExperiment}.
136 | 
137 | a \code{\link[Biobase]{ExpressionSet}}
138 | 
139 | a \code{\link[SummarizedExperiment]{SummarizedExperiment}}
140 | }
141 | \description{
142 | An entire \code{FacileDataSet} or a subset of it can be converted into
143 | bioconductor-standard assay containers, like a \code{SummarizedExperiment},
144 | \code{DGEList}, or \code{ExpressionSet} "at any time" using various \code{as.XXX} functions,
145 | like \code{as.DGEList(...)}.
146 | }
147 | \details{
148 | We use the term "facile object" to refer to either the entirety of a
149 | \code{FacileDataStore} or any sample-descriptor that specifies subsets of the
150 | data, eg. where \code{fds(x)} returns a \code{FacileDataStore}. See examples for
151 | specifics.
152 | 
153 | Note that the order that the samples and features are materialized into the
154 | expression container are not guaranteed.
155 | }
156 | \examples{
157 | fds <- exampleFacileDataSet()
158 | 
159 | # Retrieve DGEList of gene expression for all samples
160 | y.all <- as.DGEList(fds) # gene expression of all samples
161 | 
162 | # Retrieve data for only 3 genes
163 | # Suppose we only wanted female samples in our DGEList
164 | y.fem <- fds \%>\%
165 |   filter_samples(sex == "f") \%>\%
166 |   as.DGEList() # or `as.ExpressionSet()`
167 | }
168 | 


--------------------------------------------------------------------------------