├── LICENSE
├── .travis.yml
├── tests
├── testthat.R
└── testthat
│ ├── test_detectionP.R
│ └── test_impute.R
├── vignettes
├── include
│ ├── logo.png
│ ├── header.html
│ ├── after_body.html
│ └── site.css
├── _output.yaml
├── _site.yml
└── QC.Rmd
├── NEWS
├── inst
├── extdata
│ └── GSE36369_NonEBV_SignalA_SignalB_3samples_1k.txt.gz
└── CITATION
├── .Rbuildignore
├── man
├── negControls.Rd
├── liftOver.Rd
├── SDFcollapseToPfx.Rd
├── prepSesameList.Rd
├── sesameQC-class.Rd
├── segmentBins.Rd
├── dataFrame2sesameQC.Rd
├── prefixMaskButC.Rd
├── prefixMaskButCG.Rd
├── MValueToBetaValue.Rd
├── binSignals.Rd
├── sesame_checkVersion.Rd
├── recommendedMaskNames.Rd
├── print.fileSet.Rd
├── getAFs.Rd
├── noMasked.Rd
├── listAvailableMasks.Rd
├── controls.Rd
├── BetaValueToMValue.Rd
├── palgen.Rd
├── dmContrasts.Rd
├── getBinCoordinates.Rd
├── normControls.Rd
├── sesameQCtoDF.Rd
├── sesamize.Rd
├── signalMU.Rd
├── sdfPlatform.Rd
├── sdf_write_table.Rd
├── resetMask.Rd
├── mapToMammal40.Rd
├── addMask.Rd
├── sesameAnno_buildAddressFile.Rd
├── SigDF.Rd
├── prefixMask.Rd
├── sesameAnno_readManifestTSV.Rd
├── setMask.Rd
├── sesameQC_plotRedGrnQQ.Rd
├── summaryExtractTest.Rd
├── print.DMLSummary.Rd
├── scrub.Rd
├── sesameQC_plotHeatSNPs.Rd
├── scrubSoft.Rd
├── calcEffectSize.Rd
├── predictAgeHorvath353.Rd
├── medianTotalIntensity.Rd
├── predictAgeSkinBlood.Rd
├── probeID_designType.Rd
├── sesameQC_getStats.Rd
├── diffRefSet.Rd
├── initFileSet.Rd
├── totalIntensities.Rd
├── dyeBiasL.Rd
├── inferEthnicity.Rd
├── sdf_read_table.Rd
├── sesameQC_plotBar.Rd
├── dyeBiasCorr.Rd
├── getAFTypeIbySumAlleles.Rd
├── checkLevels.Rd
├── updateSigDF.Rd
├── openSesameToFile.Rd
├── predictMouseAgeInMonth.Rd
├── compareMouseStrainReference.Rd
├── getMask.Rd
├── imputeBetasMatrixByMean.Rd
├── probeSuccessRate.Rd
├── matchDesign.Rd
├── readFileSet.Rd
├── detectionPnegEcdf.Rd
├── sesameQC_rankStats.Rd
├── sesameQC_plotBetaByDesign.Rd
├── sesameAnno_attachManifest.Rd
├── mapFileSet.Rd
├── meanIntensity.Rd
├── reIdentify.Rd
├── dyeBiasCorrMostBalanced.Rd
├── imputeBetas.Rd
├── imputeBetasByGenomicNeighbors.Rd
├── sesameAnno_buildManifestGRanges.Rd
├── betasCollapseToPfx.Rd
├── chipAddressToSignal.Rd
├── getRefSet.Rd
├── noob.Rd
├── getBetas.Rd
├── compareMouseTissueReference.Rd
├── qualityMask.Rd
├── parseGEOsignalMU.Rd
├── visualizeSegments.Rd
├── inferStrain.Rd
├── ELBAR.Rd
├── inferInfiniumIChannel.Rd
├── sesameQC_calcStats.Rd
├── deidentify.Rd
├── dyeBiasNL.Rd
├── twoCompsEst2.Rd
├── prepSesame.Rd
├── sliceFileSet.Rd
├── readIDATpair.Rd
├── searchIDATprefixes.Rd
├── formatVCF.Rd
├── inferSex.Rd
├── sesame-package.Rd
├── createUCSCtrack.Rd
├── pOOBAH.Rd
├── sesameAnno_download.Rd
├── sesameQC_plotIntensVsBetas.Rd
├── compareReference.Rd
├── convertProbeID.Rd
├── assemble_plots.Rd
├── estimateLeukocyte.Rd
├── visualizeGene.Rd
├── inferSpecies.Rd
├── bisConversionControl.Rd
├── visualizeProbes.Rd
├── DML.Rd
├── openSesame.Rd
├── predictAge.Rd
├── inferTissue.Rd
├── DMLpredict.Rd
├── visualizeRegion.Rd
├── cnSegmentation.Rd
├── DMR.Rd
└── mLiftOver.Rd
├── R
├── zzz.R
├── ethnicity.R
├── palgen.R
├── GEO.R
├── track.R
├── feature_selection.R
├── channel_inference.R
├── utils.R
├── vcf.R
├── age.R
├── impute.R
├── deidentify.R
├── strain.R
├── match_design.R
├── open.R
├── species.R
└── tissue.R
├── .gitignore
├── LICENSE.md
├── README.md
├── DESCRIPTION
└── NAMESPACE
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2024
2 | COPYRIGHT HOLDER: Wanding Zhou
3 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: r
2 |
3 | cache: packages
4 |
5 | r: bioc-devel
--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(sesame)
3 |
4 | test_check("sesame")
5 |
--------------------------------------------------------------------------------
/vignettes/include/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zwdzwd/sesame/HEAD/vignettes/include/logo.png
--------------------------------------------------------------------------------
/NEWS:
--------------------------------------------------------------------------------
1 | CHANGES IN VERSION 1.0.0
2 | -------------------------
3 |
4 | o First submission of SeSAMe package.
--------------------------------------------------------------------------------
/inst/extdata/GSE36369_NonEBV_SignalA_SignalB_3samples_1k.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zwdzwd/sesame/HEAD/inst/extdata/GSE36369_NonEBV_SignalA_SignalB_3samples_1k.txt.gz
--------------------------------------------------------------------------------
/tests/testthat/test_detectionP.R:
--------------------------------------------------------------------------------
1 | context("detectionP")
2 | test_that("test='detectionP' gives correct errors", {
3 | sdf <- sesameDataGet("EPIC.1.SigDF")
4 | expect_is(pOOBAH(sdf), "SigDF")
5 | })
6 |
7 |
--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^Meta$
2 | ^doc$
3 | ^.*\.Rproj$
4 | ^\.Rproj\.user$
5 | ## Ignore travis config file
6 | ^\.travis\.yml$
7 | ^\.git.*
8 | ^\..Rcheck$
9 | ^inst/data$
10 | .DS_Store
11 | ^.Renv-version$
12 | ^LICENSE\.md$
13 |
--------------------------------------------------------------------------------
/vignettes/_output.yaml:
--------------------------------------------------------------------------------
1 | html_document:
2 | self_contained: true
3 | number_sections: no
4 | theme: flatly
5 | highlight: zenburn
6 | mathjax: null
7 | toc: true
8 | toc_float:
9 | collapsed: false
10 | toc_depth: 3
11 | df_print: paged
12 | css: include/site.css
13 | includes:
14 | in_header: include/header.html
15 | after_body: include/after_body.html
16 |
17 |
--------------------------------------------------------------------------------
/man/negControls.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/detection.R
3 | \name{negControls}
4 | \alias{negControls}
5 | \title{get negative control signal}
6 | \usage{
7 | negControls(sdf)
8 | }
9 | \arguments{
10 | \item{sdf}{a SigDF}
11 | }
12 | \value{
13 | a data frame of negative control signals
14 | }
15 | \description{
16 | get negative control signal
17 | }
18 |
--------------------------------------------------------------------------------
/man/liftOver.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/mLiftOver.R
3 | \name{liftOver}
4 | \alias{liftOver}
5 | \title{liftOver, see mLiftOver (renamed)}
6 | \usage{
7 | liftOver(...)
8 | }
9 | \arguments{
10 | \item{...}{see mLiftOver}
11 | }
12 | \value{
13 | imputed data, vector, matrix, SigDF(s)
14 | }
15 | \description{
16 | liftOver, see mLiftOver (renamed)
17 | }
18 |
--------------------------------------------------------------------------------
/man/SDFcollapseToPfx.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/sesame.R
3 | \name{SDFcollapseToPfx}
4 | \alias{SDFcollapseToPfx}
5 | \title{collapse to probe prefix}
6 | \usage{
7 | SDFcollapseToPfx(sdf)
8 | }
9 | \arguments{
10 | \item{sdf}{a SigDF object}
11 | }
12 | \value{
13 | a data frame with updated Probe_ID
14 | }
15 | \description{
16 | collapse to probe prefix
17 | }
18 |
--------------------------------------------------------------------------------
/man/prepSesameList.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/open.R
3 | \name{prepSesameList}
4 | \alias{prepSesameList}
5 | \title{List supported prepSesame functions}
6 | \usage{
7 | prepSesameList()
8 | }
9 | \value{
10 | a data frame with code, func, description
11 | }
12 | \description{
13 | List supported prepSesame functions
14 | }
15 | \examples{
16 | prepSesameList()
17 | }
18 |
--------------------------------------------------------------------------------
/man/sesameQC-class.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/QC.R
3 | \docType{class}
4 | \name{sesameQC-class}
5 | \alias{sesameQC-class}
6 | \title{An S4 class to hold QC statistics}
7 | \value{
8 | sesameQC object
9 | }
10 | \description{
11 | An S4 class to hold QC statistics
12 | }
13 | \section{Slots}{
14 |
15 | \describe{
16 | \item{\code{stat}}{a list to store qc stats}
17 | }}
18 |
19 |
--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
1 | .onAttach <- function(libname, pkgname) {
2 | packageStartupMessage('
3 | ----------------------------------------------------------
4 | | SEnsible Step-wise Analysis of DNA MEthylation (SeSAMe)
5 | | --------------------------------------------------------
6 | | Please cache auxiliary data by "sesameDataCache()".
7 | | This needs to be done only once per SeSAMe installation.
8 | ----------------------------------------------------------
9 | ')
10 | }
11 |
--------------------------------------------------------------------------------
/man/segmentBins.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/cnv.R
3 | \name{segmentBins}
4 | \alias{segmentBins}
5 | \title{Segment bins using DNAcopy}
6 | \usage{
7 | segmentBins(bin.signals, bin.coords)
8 | }
9 | \arguments{
10 | \item{bin.signals}{bin signals (input)}
11 |
12 | \item{bin.coords}{bin coordinates}
13 | }
14 | \value{
15 | segment signal data frame
16 | }
17 | \description{
18 | Segment bins using DNAcopy
19 | }
20 |
--------------------------------------------------------------------------------
/man/dataFrame2sesameQC.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/QC.R
3 | \name{dataFrame2sesameQC}
4 | \alias{dataFrame2sesameQC}
5 | \title{Convert data frame to sesameQC object}
6 | \usage{
7 | dataFrame2sesameQC(df)
8 | }
9 | \arguments{
10 | \item{df}{a publicQC data frame}
11 | }
12 | \value{
13 | a list sesameQC objects
14 | }
15 | \description{
16 | The function convert a data frame back to a list of sesameQC objects
17 | }
18 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | Meta
2 | doc
3 | # History files
4 | .Rhistory
5 | .Rapp.history
6 |
7 | # Session Data files
8 | .RData
9 | # Example code in package build process
10 | *-Ex.R
11 | # RStudio files
12 | .Rproj.user/
13 | # produced vignettes
14 | vignettes/*.html
15 | vignettes/*.pdf
16 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
17 | .httr-oauth
18 | .Rproj.user
19 | /doc/
20 | /Meta/
21 | .DS_Store
22 | vignettes/_site
23 | vignettes/.DS_Store
24 | inst/.DS_Store
25 |
--------------------------------------------------------------------------------
/man/prefixMaskButC.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/mask.R
3 | \name{prefixMaskButC}
4 | \alias{prefixMaskButC}
5 | \title{Mask all but C probes in SigDF}
6 | \usage{
7 | prefixMaskButC(sdf)
8 | }
9 | \arguments{
10 | \item{sdf}{SigDF}
11 | }
12 | \value{
13 | SigDF
14 | }
15 | \description{
16 | Mask all but C probes in SigDF
17 | }
18 | \examples{
19 | sdf <- resetMask(sesameDataGet("MM285.1.SigDF"))
20 | sum(prefixMaskButC(sdf)$mask)
21 | }
22 |
--------------------------------------------------------------------------------
/man/prefixMaskButCG.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/mask.R
3 | \name{prefixMaskButCG}
4 | \alias{prefixMaskButCG}
5 | \title{Mask all but CG probes in SigDF}
6 | \usage{
7 | prefixMaskButCG(sdf)
8 | }
9 | \arguments{
10 | \item{sdf}{SigDF}
11 | }
12 | \value{
13 | SigDF
14 | }
15 | \description{
16 | Mask all but CG probes in SigDF
17 | }
18 | \examples{
19 | sdf <- resetMask(sesameDataGet("MM285.1.SigDF"))
20 | sum(prefixMaskButCG(sdf)$mask)
21 | }
22 |
--------------------------------------------------------------------------------
/tests/testthat/test_impute.R:
--------------------------------------------------------------------------------
1 | context("impute")
2 |
3 | test_that("Impute mean functions properly", {
4 | mx <- cbind(a = c(NA, 2, 3, 4), b = c(1, 6, 5, NA), c = c(3, 6, 7, 8))
5 | mx_imputed_cols <- imputeBetasMatrixByMean(mx, axis = 2)
6 | mx_imputed_rows <- imputeBetasMatrixByMean(mx, axis = 1)
7 | expect_true(mx_imputed_cols[1,1] == 3)
8 | expect_true(mx_imputed_cols[4,2] == 4)
9 | expect_true(mx_imputed_rows[1,1] == 2)
10 | expect_true((mx_imputed_rows[4,2]) == 6)
11 | })
12 |
--------------------------------------------------------------------------------
/man/MValueToBetaValue.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/utils.R
3 | \name{MValueToBetaValue}
4 | \alias{MValueToBetaValue}
5 | \title{Convert M-value to beta-value}
6 | \usage{
7 | MValueToBetaValue(m)
8 | }
9 | \arguments{
10 | \item{m}{a vector of M values}
11 | }
12 | \value{
13 | a vector of beta values
14 | }
15 | \description{
16 | Convert M-value to beta-value (aka inverse logit transform)
17 | }
18 | \examples{
19 | MValueToBetaValue(c(-3, 0, 3))
20 | }
21 |
--------------------------------------------------------------------------------
/man/binSignals.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/cnv.R
3 | \name{binSignals}
4 | \alias{binSignals}
5 | \title{Bin signals from probe signals}
6 | \usage{
7 | binSignals(probe.signals, bin.coords, probeCoords)
8 | }
9 | \arguments{
10 | \item{probe.signals}{probe signals}
11 |
12 | \item{bin.coords}{bin coordinates}
13 |
14 | \item{probeCoords}{probe coordinates}
15 | }
16 | \value{
17 | bin signals
18 | }
19 | \description{
20 | require GenomicRanges
21 | }
22 |
--------------------------------------------------------------------------------
/man/sesame_checkVersion.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/utils.R
3 | \name{sesame_checkVersion}
4 | \alias{sesame_checkVersion}
5 | \title{Check SeSAMe versions}
6 | \usage{
7 | sesame_checkVersion()
8 | }
9 | \value{
10 | print the version of sesame, sesameData, biocondcutor and R
11 | }
12 | \description{
13 | print package verison of sesame and depended packages to help troubleshoot
14 | installation issues.
15 | }
16 | \examples{
17 | sesame_checkVersion()
18 | }
19 |
--------------------------------------------------------------------------------
/man/recommendedMaskNames.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/mask.R
3 | \name{recommendedMaskNames}
4 | \alias{recommendedMaskNames}
5 | \title{Recommended mask names for each Infinium platform}
6 | \usage{
7 | recommendedMaskNames()
8 | }
9 | \value{
10 | a named list of mask names
11 | }
12 | \description{
13 | The returned name is the db name used in KYCG.mask
14 | }
15 | \examples{
16 | recommendedMaskNames()[["EPICv2"]]
17 | recommendedMaskNames()[["EPIC"]]
18 |
19 | }
20 |
--------------------------------------------------------------------------------
/man/print.fileSet.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/fileSet.R
3 | \name{print.fileSet}
4 | \alias{print.fileSet}
5 | \title{Print a fileSet}
6 | \usage{
7 | \method{print}{fileSet}(x, ...)
8 | }
9 | \arguments{
10 | \item{x}{a sesame::fileSet}
11 |
12 | \item{...}{stuff for print}
13 | }
14 | \value{
15 | string representation
16 | }
17 | \description{
18 | Print a fileSet
19 | }
20 | \examples{
21 |
22 | fset <- initFileSet('mybetas2', 'HM27', c('s1','s2'))
23 | fset
24 |
25 | }
26 |
--------------------------------------------------------------------------------
/man/getAFs.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/sesame.R
3 | \name{getAFs}
4 | \alias{getAFs}
5 | \title{Get allele frequency}
6 | \usage{
7 | getAFs(sdf, ...)
8 | }
9 | \arguments{
10 | \item{sdf}{\code{SigDF}}
11 |
12 | \item{...}{additional options to getBetas}
13 | }
14 | \value{
15 | allele frequency
16 | }
17 | \description{
18 | Get allele frequency
19 | }
20 | \examples{
21 | sesameDataCache() # if not done yet
22 | sdf <- sesameDataGet('EPIC.1.SigDF')
23 | af <- getAFs(sdf)
24 | }
25 |
--------------------------------------------------------------------------------
/vignettes/include/header.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
11 |
12 |
--------------------------------------------------------------------------------
/man/noMasked.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/SigDFMethods.R
3 | \name{noMasked}
4 | \alias{noMasked}
5 | \title{remove masked probes from SigDF}
6 | \usage{
7 | noMasked(sdf)
8 | }
9 | \arguments{
10 | \item{sdf}{input SigDF object}
11 | }
12 | \value{
13 | a SigDF object without masked probes
14 | }
15 | \description{
16 | remove masked probes from SigDF
17 | }
18 | \examples{
19 | sesameDataCache()
20 | sdf <- sesameDataGet("EPIC.1.SigDF")
21 | sdf <- pOOBAH(sdf)
22 |
23 | sdf_noMasked <- noMasked(sdf)
24 |
25 | }
26 |
--------------------------------------------------------------------------------
/man/listAvailableMasks.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/mask.R
3 | \name{listAvailableMasks}
4 | \alias{listAvailableMasks}
5 | \title{list existing quality masks for a SigDF}
6 | \usage{
7 | listAvailableMasks(platform, verbose = FALSE)
8 | }
9 | \arguments{
10 | \item{platform}{EPIC, MM285, HM450 etc}
11 |
12 | \item{verbose}{print more messages}
13 | }
14 | \value{
15 | a tibble of masks
16 | }
17 | \description{
18 | list existing quality masks for a SigDF
19 | }
20 | \examples{
21 | listAvailableMasks("EPICv2")
22 | }
23 |
--------------------------------------------------------------------------------
/man/controls.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/SigDFMethods.R
3 | \name{controls}
4 | \alias{controls}
5 | \title{get the controls attributes}
6 | \usage{
7 | controls(sdf, verbose = FALSE)
8 | }
9 | \arguments{
10 | \item{sdf}{a \code{SigDF}}
11 |
12 | \item{verbose}{print more messages}
13 | }
14 | \value{
15 | the controls data frame
16 | }
17 | \description{
18 | get the controls attributes
19 | }
20 | \examples{
21 | sesameDataCache() # if not done yet
22 | sdf <- sesameDataGet('EPIC.1.SigDF')
23 | head(controls(sdf))
24 | }
25 |
--------------------------------------------------------------------------------
/man/BetaValueToMValue.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/utils.R
3 | \name{BetaValueToMValue}
4 | \alias{BetaValueToMValue}
5 | \title{Convert beta-value to M-value}
6 | \usage{
7 | BetaValueToMValue(b)
8 | }
9 | \arguments{
10 | \item{b}{vector of beta values}
11 | }
12 | \value{
13 | a vector of M values
14 | }
15 | \description{
16 | Logit transform a beta value vector to M-value vector.
17 | }
18 | \details{
19 | Convert beta-value to M-value (aka logit transform)
20 | }
21 | \examples{
22 | BetaValueToMValue(c(0.1, 0.5, 0.9))
23 | }
24 |
--------------------------------------------------------------------------------
/vignettes/include/after_body.html:
--------------------------------------------------------------------------------
1 |
11 |
--------------------------------------------------------------------------------
/man/palgen.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/palgen.R
3 | \name{palgen}
4 | \alias{palgen}
5 | \title{Generate some additional color palettes}
6 | \usage{
7 | palgen(pal, n = 150, space = "Lab")
8 | }
9 | \arguments{
10 | \item{pal}{a string for adhoc pals}
11 |
12 | \item{n}{the number of colors for interpolation}
13 |
14 | \item{space}{rgb or Lab}
15 | }
16 | \value{
17 | a palette-generating function
18 | }
19 | \description{
20 | Generate some additional color palettes
21 | }
22 | \examples{
23 | library(pals)
24 | pal.bands(palgen("whiteturbo"))
25 | }
26 |
--------------------------------------------------------------------------------
/man/dmContrasts.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dm.R
3 | \name{dmContrasts}
4 | \alias{dmContrasts}
5 | \title{List all contrasts of a DMLSummary}
6 | \usage{
7 | dmContrasts(smry)
8 | }
9 | \arguments{
10 | \item{smry}{a DMLSummary object}
11 | }
12 | \value{
13 | a character vector of contrasts
14 | }
15 | \description{
16 | List all contrasts of a DMLSummary
17 | }
18 | \examples{
19 | data <- sesameDataGet('HM450.76.TCGA.matched')
20 | smry <- DML(data$betas[1:10,], ~type, meta=data$sampleInfo)
21 | dmContrasts(smry)
22 |
23 | sesameDataGet_resetEnv()
24 | }
25 |
--------------------------------------------------------------------------------
/man/getBinCoordinates.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/cnv.R
3 | \name{getBinCoordinates}
4 | \alias{getBinCoordinates}
5 | \title{Get bin coordinates}
6 | \usage{
7 | getBinCoordinates(seqLength, gapInfo, tilewidth = 50000, probeCoords)
8 | }
9 | \arguments{
10 | \item{seqLength}{chromosome information object}
11 |
12 | \item{gapInfo}{chromosome gap information}
13 |
14 | \item{tilewidth}{tile width for smoothing}
15 |
16 | \item{probeCoords}{probe coordinates}
17 | }
18 | \value{
19 | bin.coords
20 | }
21 | \description{
22 | requires GenomicRanges, IRanges
23 | }
24 |
--------------------------------------------------------------------------------
/man/normControls.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dye_bias.R
3 | \name{normControls}
4 | \alias{normControls}
5 | \title{get normalization control signal}
6 | \usage{
7 | normControls(sdf, average = FALSE, verbose = FALSE)
8 | }
9 | \arguments{
10 | \item{sdf}{a SigDF}
11 |
12 | \item{average}{whether to average}
13 |
14 | \item{verbose}{print more messages}
15 | }
16 | \value{
17 | a data frame of normalization control signals
18 | }
19 | \description{
20 | get normalization control signal from SigDF.
21 | The function optionally takes mean for each channel.
22 | }
23 |
--------------------------------------------------------------------------------
/man/sesameQCtoDF.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/QC.R
3 | \name{sesameQCtoDF}
4 | \alias{sesameQCtoDF}
5 | \title{Convert a list of sesameQC to data frame}
6 | \usage{
7 | sesameQCtoDF(qcs, cols = c("frac_dt_cg", "RGdistort", "RGratio"))
8 | }
9 | \arguments{
10 | \item{qcs}{sesameQCs}
11 |
12 | \item{cols}{QC columns, use NULL to report all}
13 | }
14 | \value{
15 | a data frame
16 | }
17 | \description{
18 | Convert a list of sesameQC to data frame
19 | }
20 | \examples{
21 | sdf <- sesameDataGet("EPIC.1.SigDF")
22 | qcs <- sesameQC_calcStats(sdf, "detection")
23 | sesameQCtoDF(qcs)
24 | }
25 |
--------------------------------------------------------------------------------
/man/sesamize.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/utils.R
3 | \name{sesamize}
4 | \alias{sesamize}
5 | \title{sesamize function is deprecated.
6 | Please check https://github.com/zwdzwd/sesamize for previous scripts}
7 | \usage{
8 | sesamize(...)
9 | }
10 | \arguments{
11 | \item{...}{arguments for sesamize}
12 | }
13 | \value{
14 | a message text for deprecated function
15 | }
16 | \description{
17 | sesamize function is deprecated.
18 | Please check https://github.com/zwdzwd/sesamize for previous scripts
19 | }
20 | \examples{
21 | cat("Deprecated. see https://github.com/zwdzwd/sesamize")
22 | }
23 |
--------------------------------------------------------------------------------
/man/signalMU.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/SigDFMethods.R
3 | \name{signalMU}
4 | \alias{signalMU}
5 | \title{report M and U for regular probes}
6 | \usage{
7 | signalMU(sdf, mask = TRUE, MU = FALSE)
8 | }
9 | \arguments{
10 | \item{sdf}{a \code{SigDF}}
11 |
12 | \item{mask}{whether to apply mask}
13 |
14 | \item{MU}{add a column for M+U}
15 | }
16 | \value{
17 | a data frame of M and U columns
18 | }
19 | \description{
20 | report M and U for regular probes
21 | }
22 | \examples{
23 | sesameDataCache() # if not done yet
24 | sdf <- sesameDataGet('EPIC.1.SigDF')
25 | head(signalMU(sdf))
26 | }
27 |
--------------------------------------------------------------------------------
/man/sdfPlatform.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/SigDFMethods.R
3 | \name{sdfPlatform}
4 | \alias{sdfPlatform}
5 | \title{Convenience function to output platform attribute of SigDF}
6 | \usage{
7 | sdfPlatform(sdf, verbose = FALSE)
8 | }
9 | \arguments{
10 | \item{sdf}{a SigDF object}
11 |
12 | \item{verbose}{print more messages}
13 | }
14 | \value{
15 | the platform string for the SigDF object
16 | }
17 | \description{
18 | Convenience function to output platform attribute of SigDF
19 | }
20 | \examples{
21 | sesameDataCache()
22 | sdf <- sesameDataGet('EPIC.1.SigDF')
23 | sdfPlatform(sdf)
24 |
25 | }
26 |
--------------------------------------------------------------------------------
/man/sdf_write_table.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/SigDFMethods.R
3 | \name{sdf_write_table}
4 | \alias{sdf_write_table}
5 | \title{write SigDF to table file}
6 | \usage{
7 | sdf_write_table(sdf, ...)
8 | }
9 | \arguments{
10 | \item{sdf}{the \code{SigDF} to output}
11 |
12 | \item{...}{additional argument to write.table}
13 | }
14 | \value{
15 | write SigDF to table file
16 | }
17 | \description{
18 | write SigDF to table file
19 | }
20 | \examples{
21 | sesameDataCache() # if not done yet
22 | sdf <- sesameDataGet('EPIC.1.SigDF')
23 | sdf_write_table(sdf, file=sprintf("\%s/sigdf.txt", tempdir()))
24 | }
25 |
--------------------------------------------------------------------------------
/man/resetMask.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/mask.R
3 | \name{resetMask}
4 | \alias{resetMask}
5 | \title{Reset Masking}
6 | \usage{
7 | resetMask(sdf, verbose = FALSE)
8 | }
9 | \arguments{
10 | \item{sdf}{a \code{SigDF}}
11 |
12 | \item{verbose}{print more messages}
13 | }
14 | \value{
15 | a new \code{SigDF} with mask reset to all FALSE
16 | }
17 | \description{
18 | Reset Masking
19 | }
20 | \examples{
21 | sesameDataCache() # if not done yet
22 | sdf <- sesameDataGet('EPIC.1.SigDF')
23 | sum(sdf$mask)
24 | sdf <- addMask(sdf, c("cg14057072", "cg22344912"))
25 | sum(sdf$mask)
26 | sum(resetMask(sdf)$mask)
27 | }
28 |
--------------------------------------------------------------------------------
/man/mapToMammal40.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/species.R
3 | \name{mapToMammal40}
4 | \alias{mapToMammal40}
5 | \title{Map the SDF (from overlap array platforms)
6 | Replicates are merged by picking the best detection}
7 | \usage{
8 | mapToMammal40(sdf)
9 | }
10 | \arguments{
11 | \item{sdf}{a \code{SigDF} object}
12 | }
13 | \value{
14 | a named numeric vector for beta values
15 | }
16 | \description{
17 | Map the SDF (from overlap array platforms)
18 | Replicates are merged by picking the best detection
19 | }
20 | \examples{
21 | sdf <- sesameDataGet("Mammal40.1.SigDF")
22 | betas <- mapToMammal40(sdf[1:10,])
23 | }
24 |
--------------------------------------------------------------------------------
/man/addMask.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/mask.R
3 | \name{addMask}
4 | \alias{addMask}
5 | \title{Add probes to mask}
6 | \usage{
7 | addMask(sdf, probes)
8 | }
9 | \arguments{
10 | \item{sdf}{a \code{SigDF}}
11 |
12 | \item{probes}{a vector of probe IDs or a logical vector with TRUE
13 | representing masked probes}
14 | }
15 | \value{
16 | a \code{SigDF} with added mask
17 | }
18 | \description{
19 | This function essentially merge existing probe masking
20 | with new probes to mask
21 | }
22 | \examples{
23 | sdf <- sesameDataGet('EPIC.1.SigDF')
24 | sum(sdf$mask)
25 | sum(addMask(sdf, c("cg14057072", "cg22344912"))$mask)
26 | }
27 |
--------------------------------------------------------------------------------
/man/sesameAnno_buildAddressFile.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/sesameAnno.R
3 | \name{sesameAnno_buildAddressFile}
4 | \alias{sesameAnno_buildAddressFile}
5 | \title{Build sesame ordering address file from tsv}
6 | \usage{
7 | sesameAnno_buildAddressFile(tsv)
8 | }
9 | \arguments{
10 | \item{tsv}{a platform name, a file path or a tibble/data.frame manifest file}
11 | }
12 | \value{
13 | a list of ordering and controls
14 | }
15 | \description{
16 | Build sesame ordering address file from tsv
17 | }
18 | \examples{
19 | \dontrun{
20 | tsv = sesameAnno_download("HM450.hg38.manifest.tsv.gz")
21 | addr <- sesameAnno_buildAddressFile(tsv)
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/man/SigDF.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/SigDFMethods.R
3 | \name{SigDF}
4 | \alias{SigDF}
5 | \title{SigDF validation from a plain data frame}
6 | \usage{
7 | SigDF(df, platform = "EPIC", ctl = NULL)
8 | }
9 | \arguments{
10 | \item{df}{a \code{data.frame} with Probe_ID, MG, MR, UG, UR, col and mask}
11 |
12 | \item{platform}{a string to specify the array platform}
13 |
14 | \item{ctl}{optional control probe data frame}
15 | }
16 | \value{
17 | a \code{SigDF} object
18 | }
19 | \description{
20 | SigDF validation from a plain data frame
21 | }
22 | \examples{
23 | sesameDataCache() # if not done yet
24 | sdf <- sesameDataGet('EPIC.1.SigDF')
25 | }
26 |
--------------------------------------------------------------------------------
/man/prefixMask.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/mask.R
3 | \name{prefixMask}
4 | \alias{prefixMask}
5 | \title{Mask SigDF by probe ID prefix}
6 | \usage{
7 | prefixMask(sdf, prefixes = NULL, invert = FALSE)
8 | }
9 | \arguments{
10 | \item{sdf}{SigDF}
11 |
12 | \item{prefixes}{prefix characters}
13 |
14 | \item{invert}{use the complement set}
15 | }
16 | \value{
17 | SigDF
18 | }
19 | \description{
20 | Mask SigDF by probe ID prefix
21 | }
22 | \examples{
23 | sdf <- resetMask(sesameDataGet("MM285.1.SigDF"))
24 | sum(prefixMask(sdf, c("ctl","rs"))$mask)
25 | sum(prefixMask(sdf, c("ctl"))$mask)
26 | sum(prefixMask(sdf, c("ctl","rs","ch"))$mask)
27 | }
28 |
--------------------------------------------------------------------------------
/man/sesameAnno_readManifestTSV.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/sesameAnno.R
3 | \name{sesameAnno_readManifestTSV}
4 | \alias{sesameAnno_readManifestTSV}
5 | \title{Read manifest file to a tsv format}
6 | \usage{
7 | sesameAnno_readManifestTSV(tsv_fn)
8 | }
9 | \arguments{
10 | \item{tsv_fn}{tsv file path}
11 | }
12 | \value{
13 | a manifest as a tibble
14 | }
15 | \description{
16 | Read manifest file to a tsv format
17 | }
18 | \examples{
19 | \dontrun{
20 | tsv = sesameAnno_download("HM450.hg38.manifest.tsv.gz")
21 | mft <- sesameAnno_readManifestTSV(tsv)
22 | ## direct access
23 | mft <- sesameAnno_readManifestTSV("HM450.hg38.manifest")
24 | }
25 | }
26 |
--------------------------------------------------------------------------------
/man/setMask.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/mask.R
3 | \name{setMask}
4 | \alias{setMask}
5 | \title{Set mask to only the probes specified}
6 | \usage{
7 | setMask(sdf, probes)
8 | }
9 | \arguments{
10 | \item{sdf}{a \code{SigDF}}
11 |
12 | \item{probes}{a vector of probe IDs or a logical vector with TRUE
13 | representing masked probes}
14 | }
15 | \value{
16 | a \code{SigDF} with added mask
17 | }
18 | \description{
19 | Set mask to only the probes specified
20 | }
21 | \examples{
22 | sdf <- sesameDataGet('EPIC.1.SigDF')
23 | sum(sdf$mask)
24 | sum(setMask(sdf, "cg14959801")$mask)
25 | sum(setMask(sdf, c("cg14057072", "cg22344912"))$mask)
26 | }
27 |
--------------------------------------------------------------------------------
/man/sesameQC_plotRedGrnQQ.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/QC.R
3 | \name{sesameQC_plotRedGrnQQ}
4 | \alias{sesameQC_plotRedGrnQQ}
5 | \title{Plot red-green QQ-Plot using Infinium-I Probes}
6 | \usage{
7 | sesameQC_plotRedGrnQQ(sdf, main = "R-G QQ Plot", ...)
8 | }
9 | \arguments{
10 | \item{sdf}{a \code{SigDF}}
11 |
12 | \item{main}{plot title}
13 |
14 | \item{...}{additional options to qqplot}
15 | }
16 | \value{
17 | create a qqplot
18 | }
19 | \description{
20 | Plot red-green QQ-Plot using Infinium-I Probes
21 | }
22 | \examples{
23 | sesameDataCache() # if not done yet
24 | sdf <- sesameDataGet('EPIC.1.SigDF')
25 | sesameQC_plotRedGrnQQ(sdf)
26 | }
27 |
--------------------------------------------------------------------------------
/vignettes/include/site.css:
--------------------------------------------------------------------------------
1 | .html-widget {
2 | margin-bottom: 1em;
3 | }
4 | h1 .header-section-number::after {
5 | content: ".";
6 | }
7 | th {
8 | background-color: #336699;
9 | color: white;
10 | }
11 | tr:nth-child(even) {background-color: #f2f2f2;}
12 | table td {
13 | padding: 3px 10px;
14 | border-top: none;
15 | border-left: none;
16 | border-bottom: none;
17 | border-right: none;
18 | }
19 | h1 {
20 | font-size: 28px;
21 | }
22 |
23 | h2 {
24 | font-size: 28px;
25 | }
26 |
27 | .section.level2 h2 {
28 | padding-top: 65px;
29 | margin-top: -40px;
30 | }
31 | .section.level1 h1 {
32 | padding-top: 65px;
33 | margin-top: -40px;
34 | }
--------------------------------------------------------------------------------
/man/summaryExtractTest.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dm.R
3 | \name{summaryExtractTest}
4 | \alias{summaryExtractTest}
5 | \title{Extract slope information from DMLSummary}
6 | \usage{
7 | summaryExtractTest(smry)
8 | }
9 | \arguments{
10 | \item{smry}{DMLSummary from DML command}
11 | }
12 | \value{
13 | a table of slope and p-value
14 | }
15 | \description{
16 | Extract slope information from DMLSummary
17 | }
18 | \examples{
19 | sesameDataCache() # in case not done yet
20 | data <- sesameDataGet('HM450.76.TCGA.matched')
21 | smry <- DML(data$betas[1:10,], ~type, meta=data$sampleInfo)
22 | slopes <- summaryExtractTest(smry)
23 |
24 | sesameDataGet_resetEnv()
25 | }
26 |
--------------------------------------------------------------------------------
/man/print.DMLSummary.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dm.R
3 | \name{print.DMLSummary}
4 | \alias{print.DMLSummary}
5 | \title{Print DMLSummary object}
6 | \usage{
7 | \method{print}{DMLSummary}(x, ...)
8 | }
9 | \arguments{
10 | \item{x}{a DMLSummary object}
11 |
12 | \item{...}{extra parameter for print}
13 | }
14 | \value{
15 | print DMLSummary result on screen
16 | }
17 | \description{
18 | Print DMLSummary object
19 | }
20 | \examples{
21 | sesameDataCache() # in case not done yet
22 | data <- sesameDataGet('HM450.76.TCGA.matched')
23 | ## test the first 10
24 | smry <- DML(data$betas[1:10,], ~type, meta=data$sampleInfo)
25 | smry
26 |
27 | sesameDataGet_resetEnv()
28 | }
29 |
--------------------------------------------------------------------------------
/man/scrub.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/background.R
3 | \name{scrub}
4 | \alias{scrub}
5 | \title{SCRUB background correction}
6 | \usage{
7 | scrub(sdf)
8 | }
9 | \arguments{
10 | \item{sdf}{a \code{SigDF}}
11 | }
12 | \value{
13 | a new \code{SigDF} with noob background correction
14 | }
15 | \description{
16 | This function takes a \code{SigDF} and returns a modified \code{SigDF}
17 | with background subtracted. scrub subtracts residual background using
18 | background median
19 | }
20 | \details{
21 | This function is meant to be used after noob.
22 | }
23 | \examples{
24 | sdf <- sesameDataGet('EPIC.1.SigDF')
25 | sdf.nb <- noob(sdf)
26 | sdf.nb.scrub <- scrub(sdf.nb)
27 | }
28 |
--------------------------------------------------------------------------------
/R/ethnicity.R:
--------------------------------------------------------------------------------
1 | #' Infer Ethnicity
2 | #'
3 | #' This function uses both the built-in rsprobes as well as the type I
4 | #' Color-Channel-Switching probes to infer ethnicity.
5 | #'
6 | #' s better be background subtracted and dyebias corrected for
7 | #' best accuracy
8 | #'
9 | #' Please note: the betas should come from SigDF *without*
10 | #' channel inference.
11 | #'
12 | #' @param sdf a \code{SigDF}
13 | #' @param verbose print more messages
14 | #' @return string of ethnicity
15 | #' @import sesameData
16 | #' @examples
17 | #' sdf <- sesameDataGet('EPIC.1.SigDF')
18 | #' ## inferEthnicity(sdf)
19 | #' @export
20 | inferEthnicity <- function(sdf, verbose = FALSE) {
21 | .Deprecated("Please use CytoMethIC::cmi_classify.")
22 | }
23 |
--------------------------------------------------------------------------------
/man/sesameQC_plotHeatSNPs.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/QC.R
3 | \name{sesameQC_plotHeatSNPs}
4 | \alias{sesameQC_plotHeatSNPs}
5 | \title{Plot SNP heatmap}
6 | \usage{
7 | sesameQC_plotHeatSNPs(sdfs, cluster = TRUE, filter.nonvariant = TRUE)
8 | }
9 | \arguments{
10 | \item{sdfs}{beta value matrix, row: probes; column: samples}
11 |
12 | \item{cluster}{show clustered heatmap}
13 |
14 | \item{filter.nonvariant}{whether to filter nonvariant (range < 0.3)}
15 | }
16 | \value{
17 | a grid graphics object
18 | }
19 | \description{
20 | Plot SNP heatmap
21 | }
22 | \examples{
23 |
24 | sdfs <- sesameDataGet("EPIC.5.SigDF.normal")[1:2]
25 | plt <- sesameQC_plotHeatSNPs(sdfs, filter.nonvariant = FALSE)
26 | }
27 |
--------------------------------------------------------------------------------
/man/scrubSoft.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/background.R
3 | \name{scrubSoft}
4 | \alias{scrubSoft}
5 | \title{SCRUB background correction}
6 | \usage{
7 | scrubSoft(sdf)
8 | }
9 | \arguments{
10 | \item{sdf}{a \code{SigDF}}
11 | }
12 | \value{
13 | a new \code{SigDF} with noob background correction
14 | }
15 | \description{
16 | This function takes a \code{SigDF} and returns a modified \code{SigDF}
17 | with background subtracted. scrubSoft subtracts residual background using a
18 | noob-like procedure.
19 | }
20 | \details{
21 | This function is meant to be used after noob.
22 | }
23 | \examples{
24 | sdf <- sesameDataGet('EPIC.1.SigDF')
25 | sdf.nb <- noob(sdf)
26 | sdf.nb.scrubSoft <- scrubSoft(sdf.nb)
27 | }
28 |
--------------------------------------------------------------------------------
/man/calcEffectSize.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dm.R
3 | \name{calcEffectSize}
4 | \alias{calcEffectSize}
5 | \title{Compute effect size for different variables from prediction matrix}
6 | \usage{
7 | calcEffectSize(pred)
8 | }
9 | \arguments{
10 | \item{pred}{predictions}
11 | }
12 | \value{
13 | a data.frame of effect sizes. Columns are different variables.
14 | Rows are different probes.
15 | }
16 | \description{
17 | The effect size is defined by the maximum variation of a variable with all
18 | the other variables controled constant.
19 | }
20 | \examples{
21 | data <- sesameDataGet('HM450.76.TCGA.matched')
22 | res <- DMLpredict(data$betas[1:10,], ~type, meta=data$sampleInfo)
23 | head(calcEffectSize(res))
24 | }
25 |
--------------------------------------------------------------------------------
/man/predictAgeHorvath353.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/age.R
3 | \name{predictAgeHorvath353}
4 | \alias{predictAgeHorvath353}
5 | \title{Horvath 353 age predictor}
6 | \usage{
7 | predictAgeHorvath353(betas)
8 | }
9 | \arguments{
10 | \item{betas}{a probeID-named vector of beta values}
11 | }
12 | \value{
13 | age in years
14 | }
15 | \description{
16 | The function takes a named numeric vector of beta values. The name attribute
17 | contains the probe ID (cg, ch or rs IDs). The function looks for overlapping
18 | probes and estimate age using Horvath aging model (Horvath 2013
19 | Genome Biology). The function outputs a single numeric of age in years.
20 | }
21 | \examples{
22 | cat("Deprecated. See predictAge")
23 | }
24 |
--------------------------------------------------------------------------------
/man/medianTotalIntensity.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/sesame.R
3 | \name{medianTotalIntensity}
4 | \alias{medianTotalIntensity}
5 | \title{Whole-dataset-wide Median Total Intensity (M+U)}
6 | \usage{
7 | medianTotalIntensity(sdf, mask = TRUE)
8 | }
9 | \arguments{
10 | \item{sdf}{a \code{SigDF}}
11 |
12 | \item{mask}{whether to mask probes using mask column}
13 | }
14 | \value{
15 | median of all intensities
16 | }
17 | \description{
18 | The function takes one single \code{SigDF} and computes median
19 | intensity of M+U for each probe. This function outputs a single
20 | numeric for the median.
21 | }
22 | \examples{
23 | sesameDataCache() # if not done yet
24 | sdf <- sesameDataGet('EPIC.1.SigDF')
25 | medianTotalIntensity(sdf)
26 | }
27 |
--------------------------------------------------------------------------------
/man/predictAgeSkinBlood.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/age.R
3 | \name{predictAgeSkinBlood}
4 | \alias{predictAgeSkinBlood}
5 | \title{Horvath Skin and Blood age predictor}
6 | \usage{
7 | predictAgeSkinBlood(betas)
8 | }
9 | \arguments{
10 | \item{betas}{a probeID-named vector of beta values}
11 | }
12 | \value{
13 | age in years
14 | }
15 | \description{
16 | The function takes a named numeric vector of beta values. The name attribute
17 | contains the probe ID (cg, ch or rs IDs). The function looks for overlapping
18 | probes and estimate age using Horvath aging model (Horvath et al. 2018
19 | Aging, 391 probes). The function outputs a single numeric of age in years.
20 | }
21 | \examples{
22 | cat("Deprecated. See predictAge")
23 | }
24 |
--------------------------------------------------------------------------------
/man/probeID_designType.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/utils.R
3 | \name{probeID_designType}
4 | \alias{probeID_designType}
5 | \title{Extract the probe type field from probe ID
6 | This only works with the new probe ID system.
7 | See https://github.com/zhou-lab/InfiniumAnnotation for illustration}
8 | \usage{
9 | probeID_designType(Probe_ID)
10 | }
11 | \arguments{
12 | \item{Probe_ID}{Probe ID}
13 | }
14 | \value{
15 | a vector of '1' and '2' suggesting Infinium-I and Infinium-II
16 | }
17 | \description{
18 | Extract the probe type field from probe ID
19 | This only works with the new probe ID system.
20 | See https://github.com/zhou-lab/InfiniumAnnotation for illustration
21 | }
22 | \examples{
23 | probeID_designType("cg36609548_TC21")
24 | }
25 |
--------------------------------------------------------------------------------
/man/sesameQC_getStats.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/QC.R
3 | \name{sesameQC_getStats}
4 | \alias{sesameQC_getStats}
5 | \title{Get stat numbers from an sesameQC object}
6 | \usage{
7 | sesameQC_getStats(qc, stat_names = NULL, drop = TRUE)
8 | }
9 | \arguments{
10 | \item{qc}{a sesameQC object}
11 |
12 | \item{stat_names}{which stat(s) to retrieve, default to all.}
13 |
14 | \item{drop}{whether to drop to a string when stats_names has
15 | only one element.}
16 | }
17 | \value{
18 | a list of named stats to be retrieved
19 | }
20 | \description{
21 | Get stat numbers from an sesameQC object
22 | }
23 | \examples{
24 | sdf <- sesameDataGet("EPIC.1.SigDF")
25 | qc <- sesameQC_calcStats(sdf, "detection")
26 | sesameQC_getStats(qc, "frac_dt")
27 | }
28 |
--------------------------------------------------------------------------------
/man/diffRefSet.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/cell_composition.R
3 | \name{diffRefSet}
4 | \alias{diffRefSet}
5 | \title{Restrict refset to differentially methylated probes
6 | use with care, might introduce bias}
7 | \usage{
8 | diffRefSet(g)
9 | }
10 | \arguments{
11 | \item{g}{a matrix with probes on the rows and cell types on the columns}
12 | }
13 | \value{
14 | g a matrix with a subset of input probes (rows)
15 | }
16 | \description{
17 | The function takes a matrix with probes on the rows and cell types on
18 | the columns and output a subset matrix and only probes that show
19 | discordant methylation levels among the cell types.
20 | }
21 | \examples{
22 |
23 | g = diffRefSet(getRefSet(platform='HM450'))
24 | sesameDataGet_resetEnv()
25 |
26 | }
27 |
--------------------------------------------------------------------------------
/man/initFileSet.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/fileSet.R
3 | \name{initFileSet}
4 | \alias{initFileSet}
5 | \title{initialize a fileSet class by allocating appropriate storage}
6 | \usage{
7 | initFileSet(map_path, platform, samples, probes = NULL, inc = 4)
8 | }
9 | \arguments{
10 | \item{map_path}{path of file to map}
11 |
12 | \item{platform}{EPIC, HM450 or HM27, consistent with sdfPlatform(sdf)}
13 |
14 | \item{samples}{sample names}
15 |
16 | \item{probes}{probe names}
17 |
18 | \item{inc}{bytes per unit data storage}
19 | }
20 | \value{
21 | a sesame::fileSet object
22 | }
23 | \description{
24 | initialize a fileSet class by allocating appropriate storage
25 | }
26 | \examples{
27 |
28 | fset <- initFileSet('mybetas2', 'HM27', c('s1','s2'))
29 | }
30 |
--------------------------------------------------------------------------------
/man/totalIntensities.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/sesame.R
3 | \name{totalIntensities}
4 | \alias{totalIntensities}
5 | \title{M+U Intensities Array}
6 | \usage{
7 | totalIntensities(sdf, mask = FALSE)
8 | }
9 | \arguments{
10 | \item{sdf}{a \code{SigDF}}
11 |
12 | \item{mask}{whether to mask probes using mask column}
13 | }
14 | \value{
15 | a vector of M+U signal for each probe
16 | }
17 | \description{
18 | The function takes one single \code{SigDF} and computes total
19 | intensity of all the in-band measurements by summing methylated and
20 | unmethylated alleles. This function outputs a single numeric for the mean.
21 | }
22 | \examples{
23 | sesameDataCache() # if not done yet
24 | sdf <- sesameDataGet('EPIC.1.SigDF')
25 | intensities <- totalIntensities(sdf)
26 | }
27 |
--------------------------------------------------------------------------------
/man/dyeBiasL.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dye_bias.R
3 | \name{dyeBiasL}
4 | \alias{dyeBiasL}
5 | \title{Correct dye bias in by linear scaling.}
6 | \usage{
7 | dyeBiasL(sdf, ref = NULL)
8 | }
9 | \arguments{
10 | \item{sdf}{a \code{SigDF}}
11 |
12 | \item{ref}{reference signal level}
13 | }
14 | \value{
15 | a normalized \code{SigDF}
16 | }
17 | \description{
18 | The function takes a \code{SigDF} as input and scale both the Grn and Red
19 | signal to a reference (ref) level. If the reference level is not given, it
20 | is set to the mean intensity of all the in-band signals. The function
21 | returns a \code{SigDF} with dye bias corrected.
22 | }
23 | \examples{
24 | sesameDataCache() # if not done yet
25 | sdf <- sesameDataGet('EPIC.1.SigDF')
26 | sdf.db <- dyeBiasL(sdf)
27 | }
28 |
--------------------------------------------------------------------------------
/man/inferEthnicity.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/ethnicity.R
3 | \name{inferEthnicity}
4 | \alias{inferEthnicity}
5 | \title{Infer Ethnicity}
6 | \usage{
7 | inferEthnicity(sdf, verbose = FALSE)
8 | }
9 | \arguments{
10 | \item{sdf}{a \code{SigDF}}
11 |
12 | \item{verbose}{print more messages}
13 | }
14 | \value{
15 | string of ethnicity
16 | }
17 | \description{
18 | This function uses both the built-in rsprobes as well as the type I
19 | Color-Channel-Switching probes to infer ethnicity.
20 | }
21 | \details{
22 | s better be background subtracted and dyebias corrected for
23 | best accuracy
24 |
25 | Please note: the betas should come from SigDF *without*
26 | channel inference.
27 | }
28 | \examples{
29 | sdf <- sesameDataGet('EPIC.1.SigDF')
30 | ## inferEthnicity(sdf)
31 | }
32 |
--------------------------------------------------------------------------------
/man/sdf_read_table.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/SigDFMethods.R
3 | \name{sdf_read_table}
4 | \alias{sdf_read_table}
5 | \title{read a table file to SigDF}
6 | \usage{
7 | sdf_read_table(fname, platform = NULL, verbose = FALSE, ...)
8 | }
9 | \arguments{
10 | \item{fname}{file name}
11 |
12 | \item{platform}{array platform (will infer if not given)}
13 |
14 | \item{verbose}{print more information}
15 |
16 | \item{...}{additional argument to read.table}
17 | }
18 | \value{
19 | read table file to SigDF
20 | }
21 | \description{
22 | read a table file to SigDF
23 | }
24 | \examples{
25 | sesameDataCache() # if not done yet
26 | sdf <- sesameDataGet('EPIC.1.SigDF')
27 | fname <- sprintf("\%s/sigdf.txt", tempdir())
28 | sdf_write_table(sdf, file=fname)
29 | sdf2 <- sdf_read_table(fname)
30 | }
31 |
--------------------------------------------------------------------------------
/man/sesameQC_plotBar.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/QC.R
3 | \name{sesameQC_plotBar}
4 | \alias{sesameQC_plotBar}
5 | \title{Bar plots for sesameQC}
6 | \usage{
7 | sesameQC_plotBar(qcs, keys = NULL)
8 | }
9 | \arguments{
10 | \item{qcs}{a list of SigDFs}
11 |
12 | \item{keys}{optional, other key to plot, instead of the default
13 | keys can be found in the parenthesis of the print output of each
14 | sesameQC output.}
15 | }
16 | \value{
17 | a bar plot comparing different QC metrics
18 | }
19 | \description{
20 | By default, it plots median_beta_cg, median_beta_ch, RGratio,
21 | RGdistort, frac_dt
22 | }
23 | \examples{
24 | sesameDataCache() # if not done yet
25 | sdfs <- sesameDataGet("EPIC.5.SigDF.normal")[1:2]
26 | sesameQC_plotBar(lapply(sdfs, sesameQC_calcStats, "detection"))
27 | }
28 |
--------------------------------------------------------------------------------
/man/dyeBiasCorr.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dye_bias.R
3 | \name{dyeBiasCorr}
4 | \alias{dyeBiasCorr}
5 | \title{Correct dye bias in by linear scaling.}
6 | \usage{
7 | dyeBiasCorr(sdf, ref = NULL)
8 | }
9 | \arguments{
10 | \item{sdf}{a \code{SigDF}}
11 |
12 | \item{ref}{reference signal level}
13 | }
14 | \value{
15 | a normalized \code{SigDF}
16 | }
17 | \description{
18 | The function takes a \code{SigDF} as input and scale both the Grn and Red
19 | signal to a reference (ref) level. If the reference level is not given, it
20 | is set to the mean intensity of all the in-band signals. The function
21 | returns a \code{SigDF} with dye bias corrected.
22 | }
23 | \examples{
24 | sesameDataCache() # if not done yet
25 | sdf <- sesameDataGet('EPIC.1.SigDF')
26 | sdf.db <- dyeBiasCorr(sdf)
27 | }
28 |
--------------------------------------------------------------------------------
/vignettes/_site.yml:
--------------------------------------------------------------------------------
1 | name: "SeSAMe"
2 | navbar:
3 | title: '
'
4 | left:
5 | - text: "Basics"
6 | icon: fa-home
7 | href: sesame.html
8 | - text: "QC"
9 | icon: fa-check-circle
10 | href: QC.html
11 | - text: "Non-human Array"
12 | icon: fa-paw
13 | href: nonhuman.html
14 | - text: "Modeling"
15 | icon: fa-cogs
16 | href: modeling.html
17 | - text: "Inference"
18 | icon: fa-database
19 | href: inferences.html
20 | - text: "KnowYourCG"
21 | icon: fa-binoculars
22 | href: KYCG.html
23 | - text: "Supplemental"
24 | icon: fa-book
25 | href: https://zhou-lab.github.io/sesame/dev/supplemental.html
26 |
--------------------------------------------------------------------------------
/man/getAFTypeIbySumAlleles.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/sesame.R
3 | \name{getAFTypeIbySumAlleles}
4 | \alias{getAFTypeIbySumAlleles}
5 | \title{Get allele frequency treating type I by summing alleles}
6 | \usage{
7 | getAFTypeIbySumAlleles(sdf, known.ccs.only = TRUE)
8 | }
9 | \arguments{
10 | \item{sdf}{\code{SigDF}}
11 |
12 | \item{known.ccs.only}{consider only known CCS probes}
13 | }
14 | \value{
15 | beta values
16 | }
17 | \description{
18 | Takes a \code{SigDF} as input and returns a numeric vector containing
19 | extra allele frequencies based on Color-Channel-Switching (CCS) probes.
20 | If no CCS probes exist in the \code{SigDF}, then an numeric(0) is
21 | returned.
22 | }
23 | \examples{
24 | sesameDataCache() # if not done yet
25 | sdf <- sesameDataGet('EPIC.1.SigDF')
26 | af <- getAFTypeIbySumAlleles(sdf)
27 | }
28 |
--------------------------------------------------------------------------------
/man/checkLevels.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dm.R
3 | \name{checkLevels}
4 | \alias{checkLevels}
5 | \title{filter data matrix by factor completeness
6 | only works for discrete factors}
7 | \usage{
8 | checkLevels(betas, fc)
9 | }
10 | \arguments{
11 | \item{betas}{matrix data}
12 |
13 | \item{fc}{factors, or characters}
14 | }
15 | \value{
16 | a boolean vector whether there is non-NA value for each tested
17 | group for each probe
18 | }
19 | \description{
20 | filter data matrix by factor completeness
21 | only works for discrete factors
22 | }
23 | \examples{
24 | se0 <- sesameDataGet("MM285.10.SE.tissue")[1:100,]
25 | se_ok <- checkLevels(SummarizedExperiment::assay(se0),
26 | SummarizedExperiment::colData(se0)$tissue)
27 | sum(se_ok) # number of good probes
28 | se1 <- se0[se_ok,]
29 |
30 | sesameDataGet_resetEnv()
31 | }
32 |
--------------------------------------------------------------------------------
/man/updateSigDF.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/species.R
3 | \name{updateSigDF}
4 | \alias{updateSigDF}
5 | \title{Set color and mask using strain/species-specific manifest}
6 | \usage{
7 | updateSigDF(sdf, species = NULL, strain = NULL, addr = NULL, verbose = FALSE)
8 | }
9 | \arguments{
10 | \item{sdf}{a \code{SigDF}}
11 |
12 | \item{species}{the species the sample is considered to be}
13 |
14 | \item{strain}{the strain the sample is considered to be}
15 |
16 | \item{addr}{species-specific address species, optional}
17 |
18 | \item{verbose}{print more messages}
19 | }
20 | \value{
21 | a \code{SigDF} with updated color channel and mask
22 | }
23 | \description{
24 | also sets attr(,"species")
25 | }
26 | \examples{
27 | sdf <- sesameDataGet('Mammal40.1.SigDF')
28 | sdf_mouse <- updateSigDF(sdf, species="mus_musculus")
29 |
30 | }
31 |
--------------------------------------------------------------------------------
/man/openSesameToFile.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/fileSet.R
3 | \name{openSesameToFile}
4 | \alias{openSesameToFile}
5 | \title{openSesame pipeline with file-backed storage}
6 | \usage{
7 | openSesameToFile(map_path, idat_dir, BPPARAM = SerialParam(), inc = 4)
8 | }
9 | \arguments{
10 | \item{map_path}{path of file to be mapped (beta values file)}
11 |
12 | \item{idat_dir}{source IDAT directory}
13 |
14 | \item{BPPARAM}{get parallel with MulticoreParam(2)}
15 |
16 | \item{inc}{bytes per item data storage. increase to 8 if precision
17 | is important. Most cases 32-bit representation is enough.}
18 | }
19 | \value{
20 | a sesame::fileSet
21 | }
22 | \description{
23 | openSesame pipeline with file-backed storage
24 | }
25 | \examples{
26 |
27 | openSesameToFile('mybetas',
28 | system.file('extdata',package='sesameData'))
29 |
30 | }
31 |
--------------------------------------------------------------------------------
/man/predictMouseAgeInMonth.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/age.R
3 | \name{predictMouseAgeInMonth}
4 | \alias{predictMouseAgeInMonth}
5 | \title{Mouse age predictor}
6 | \usage{
7 | predictMouseAgeInMonth(betas, na_fallback = TRUE)
8 | }
9 | \arguments{
10 | \item{betas}{a probeID-named vector of beta values}
11 |
12 | \item{na_fallback}{use the fallback default for NAs.}
13 | }
14 | \value{
15 | age in month
16 | }
17 | \description{
18 | The function takes a named numeric vector of beta values. The name attribute
19 | contains the probe ID. The function looks for overlapping
20 | probes and estimate age using an aging model built from 321 MM285 probes.
21 | The function outputs a single numeric of age in months. The clock is most
22 | accurate with the sesame preprocessing.
23 | }
24 | \examples{
25 | cat("Deprecated. See predictAge")
26 | }
27 |
--------------------------------------------------------------------------------
/man/compareMouseStrainReference.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/strain.R
3 | \name{compareMouseStrainReference}
4 | \alias{compareMouseStrainReference}
5 | \title{Compare Strain SNPs with a reference panel}
6 | \usage{
7 | compareMouseStrainReference(
8 | betas = NULL,
9 | show_sample_names = FALSE,
10 | query_width = NULL
11 | )
12 | }
13 | \arguments{
14 | \item{betas}{beta value vector or matrix (for multiple samples)}
15 |
16 | \item{show_sample_names}{whether to show sample name}
17 |
18 | \item{query_width}{optional argument for adjusting query width}
19 | }
20 | \value{
21 | grid object that contrast the target sample with
22 | pre-built mouse strain reference
23 | }
24 | \description{
25 | Compare Strain SNPs with a reference panel
26 | }
27 | \examples{
28 | sesameDataCache() # if not done yet
29 | compareMouseStrainReference()
30 | }
31 |
--------------------------------------------------------------------------------
/man/getMask.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/mask.R
3 | \name{getMask}
4 | \alias{getMask}
5 | \title{get probe masking by mask names}
6 | \usage{
7 | getMask(platform = "EPICv2", mask_names = "recommended")
8 | }
9 | \arguments{
10 | \item{platform}{EPICv2, EPIC, HM450, HM27, ...}
11 |
12 | \item{mask_names}{mask names (see listAvailableMasks)
13 | by default: "recommended"
14 | see recommendedMaskNames() for detail.}
15 | }
16 | \value{
17 | a vector of probe ID
18 | }
19 | \description{
20 | get probe masking by mask names
21 | }
22 | \examples{
23 |
24 | length(getMask("MSA", "recommended"))
25 | length(getMask("EPICv2", "recommended"))
26 | length(getMask("EPICv2", c("recommended", "M_SNPcommon_1pt")))
27 | length(getMask("EPICv2", "M_mapping"))
28 | length(getMask("EPIC"))
29 | length(getMask("HM450"))
30 | length(getMask("MM285"))
31 |
32 | }
33 |
--------------------------------------------------------------------------------
/man/imputeBetasMatrixByMean.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/impute.R
3 | \name{imputeBetasMatrixByMean}
4 | \alias{imputeBetasMatrixByMean}
5 | \title{Impute Missing Values with Mean
6 | This function replaces missing values (NA) in a matrix, default is row
7 | means.}
8 | \usage{
9 | imputeBetasMatrixByMean(mx, axis = 1)
10 | }
11 | \arguments{
12 | \item{mx}{A matrix}
13 |
14 | \item{axis}{A single integer. Use 1 to impute column means (default),
15 | and 2 to impute row means.}
16 | }
17 | \value{
18 | A matrix with missing values imputed.
19 | }
20 | \description{
21 | Impute Missing Values with Mean
22 | This function replaces missing values (NA) in a matrix, default is row
23 | means.
24 | }
25 | \examples{
26 | mx <- cbind(c(1, 2, NA, 4), c(NA, 2, 3, 4))
27 | imputeBetasMatrixByMean(mx, axis = 1)
28 | imputeBetasMatrixByMean(mx, axis = 2)
29 | }
30 |
--------------------------------------------------------------------------------
/man/probeSuccessRate.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/sesame.R
3 | \name{probeSuccessRate}
4 | \alias{probeSuccessRate}
5 | \title{Whole-dataset-wide Probe Success Rate}
6 | \usage{
7 | probeSuccessRate(sdf, mask = TRUE, max_pval = 0.05)
8 | }
9 | \arguments{
10 | \item{sdf}{a \code{SigDF}}
11 |
12 | \item{mask}{whether or not we count the masked probes in SigDF}
13 |
14 | \item{max_pval}{the maximum p-value to consider detection success}
15 | }
16 | \value{
17 | a fraction number as probe success rate
18 | }
19 | \description{
20 | This function calculates the probe success rate using
21 | pOOBAH detection p-values. Probes that has a detection p-value
22 | higher than a specific threshold are considered failed probes.
23 | }
24 | \examples{
25 | sesameDataCache() # if not done yet
26 | sdf <- sesameDataGet('EPIC.1.SigDF')
27 | probeSuccessRate(sdf)
28 | }
29 |
--------------------------------------------------------------------------------
/man/matchDesign.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/match_design.R
3 | \name{matchDesign}
4 | \alias{matchDesign}
5 | \title{normalize Infinium I probe betas to Infinium II}
6 | \usage{
7 | matchDesign(sdf, min_dbeta = 0.3)
8 | }
9 | \arguments{
10 | \item{sdf}{SigDF}
11 |
12 | \item{min_dbeta}{the default algorithm perform 2-state
13 | quantile-normalization of the unmethylated and methylated modes
14 | separately. However, when the two modes are too close, we fall back
15 | to a one-mode normalization. The threshold defines the maximum
16 | inter-mode distance.}
17 | }
18 | \value{
19 | SigDF
20 | }
21 | \description{
22 | This is designed to counter tail inflation in Infinium I probes.
23 | }
24 | \examples{
25 |
26 | library(RPMM)
27 | sdf <- sesameDataGet("MM285.1.SigDF")
28 | sesameQC_plotBetaByDesign(sdf)
29 | sesameQC_plotBetaByDesign(matchDesign(sdf))
30 |
31 | }
32 |
--------------------------------------------------------------------------------
/man/readFileSet.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/fileSet.R
3 | \name{readFileSet}
4 | \alias{readFileSet}
5 | \title{Read an existing fileSet from storage}
6 | \usage{
7 | readFileSet(map_path)
8 | }
9 | \arguments{
10 | \item{map_path}{path of file to map (should contain valid _idx.rds index)}
11 | }
12 | \value{
13 | a sesame::fileSet object
14 | }
15 | \description{
16 | This function only reads the meta-data.
17 | }
18 | \examples{
19 |
20 | ## create two samples
21 | fset <- initFileSet('mybetas2', 'HM27', c('s1','s2'))
22 |
23 | ## a hypothetical numeric array (can be beta values, intensities etc)
24 | hypothetical <- setNames(runif(fset$n), fset$probes)
25 |
26 | ## map the numeric to file
27 | mapFileSet(fset, 's1', hypothetical)
28 |
29 | ## read it from file
30 | fset <- readFileSet('mybetas2')
31 |
32 | ## get data
33 | sliceFileSet(fset, 's1', 'cg00000292')
34 |
35 | }
36 |
--------------------------------------------------------------------------------
/man/detectionPnegEcdf.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/detection.R
3 | \name{detectionPnegEcdf}
4 | \alias{detectionPnegEcdf}
5 | \title{Detection P-value based on ECDF of negative control}
6 | \usage{
7 | detectionPnegEcdf(sdf, return.pval = FALSE, pval.threshold = 0.05)
8 | }
9 | \arguments{
10 | \item{sdf}{a \code{SigDF}}
11 |
12 | \item{return.pval}{whether to return p-values, instead of a
13 | masked \code{SigDF}}
14 |
15 | \item{pval.threshold}{minimum p-value to mask}
16 | }
17 | \value{
18 | a \code{SigDF}, or a p-value vector if return.pval is TRUE
19 | }
20 | \description{
21 | The function takes a \code{SigDF} as input, computes detection p-value
22 | using negative control probes' empirical distribution and returns a new
23 | \code{SigDF} with an updated mask slot.
24 | }
25 | \examples{
26 | sdf <- sesameDataGet("EPIC.1.SigDF")
27 | sum(sdf$mask)
28 | sum(detectionPnegEcdf(sdf)$mask)
29 | }
30 |
--------------------------------------------------------------------------------
/man/sesameQC_rankStats.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/QC.R
3 | \name{sesameQC_rankStats}
4 | \alias{sesameQC_rankStats}
5 | \title{This function compares the input sample with public data.
6 | Only overlapping metrics will be compared.}
7 | \usage{
8 | sesameQC_rankStats(qc, publicQC = NULL, platform = "EPIC")
9 | }
10 | \arguments{
11 | \item{qc}{a sesameQC object}
12 |
13 | \item{publicQC}{public QC statistics, filtered from e.g.: EPIC.publicQC,
14 | MM285.publicQC and Mammal40.publicQC}
15 |
16 | \item{platform}{EPIC, MM285 or Mammal40, used when publicQC is not given}
17 | }
18 | \value{
19 | a sesameQC
20 | }
21 | \description{
22 | This function compares the input sample with public data.
23 | Only overlapping metrics will be compared.
24 | }
25 | \examples{
26 |
27 | sesameDataCache() # if not done yet
28 | sdf <- sesameDataGet('EPIC.1.SigDF')
29 | sesameQC_rankStats(sesameQC_calcStats(sdf, "intensity"))
30 |
31 | }
32 |
--------------------------------------------------------------------------------
/man/sesameQC_plotBetaByDesign.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/QC.R
3 | \name{sesameQC_plotBetaByDesign}
4 | \alias{sesameQC_plotBetaByDesign}
5 | \title{Plot betas distinguishing different Infinium chemistries}
6 | \usage{
7 | sesameQC_plotBetaByDesign(
8 | sdf,
9 | prep = NULL,
10 | legend_pos = "top",
11 | mar = c(3, 3, 1, 1),
12 | main = "",
13 | ...
14 | )
15 | }
16 | \arguments{
17 | \item{sdf}{SigDF}
18 |
19 | \item{prep}{prep codes to step through}
20 |
21 | \item{legend_pos}{legend position (default: top)}
22 |
23 | \item{mar}{margin of layout when showing steps of prep}
24 |
25 | \item{main}{main title in plots}
26 |
27 | \item{...}{additional options to plot}
28 | }
29 | \value{
30 | create a density plot
31 | }
32 | \description{
33 | Plot betas distinguishing different Infinium chemistries
34 | }
35 | \examples{
36 | sdf <- sesameDataGet("EPIC.1.SigDF")
37 | sesameQC_plotBetaByDesign(sdf, prep="DB")
38 | }
39 |
--------------------------------------------------------------------------------
/man/sesameAnno_attachManifest.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/sesameAnno.R
3 | \name{sesameAnno_attachManifest}
4 | \alias{sesameAnno_attachManifest}
5 | \title{Annotate a data.frame using manifest}
6 | \usage{
7 | sesameAnno_attachManifest(
8 | df,
9 | probe_id = "Probe_ID",
10 | platform = NULL,
11 | genome = NULL
12 | )
13 | }
14 | \arguments{
15 | \item{df}{input data frame with Probe_ID as a column}
16 |
17 | \item{probe_id}{the Probe_ID column name, default to "Probe_ID" or
18 | rownames}
19 |
20 | \item{platform}{which array platform, guess from probe ID if not given}
21 |
22 | \item{genome}{the genome build, use default if not given}
23 | }
24 | \value{
25 | a new data.frame with manifest attached
26 | }
27 | \description{
28 | Annotate a data.frame using manifest
29 | }
30 | \examples{
31 | \dontrun{
32 | df <- data.frame(Probe_ID = c("cg00101675_BC21", "cg00116289_BC21"))
33 | sesameAnno_attachManifest(df)
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/man/mapFileSet.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/fileSet.R
3 | \name{mapFileSet}
4 | \alias{mapFileSet}
5 | \title{Deposit data of one sample to a fileSet (and hence to file)}
6 | \usage{
7 | mapFileSet(fset, sample, named_values)
8 | }
9 | \arguments{
10 | \item{fset}{a sesame::fileSet, as obtained via readFileSet}
11 |
12 | \item{sample}{sample name as a string}
13 |
14 | \item{named_values}{value vector named by probes}
15 | }
16 | \value{
17 | a sesame::fileSet
18 | }
19 | \description{
20 | Deposit data of one sample to a fileSet (and hence to file)
21 | }
22 | \examples{
23 |
24 | ## create two samples
25 | fset <- initFileSet('mybetas2', 'HM27', c('s1','s2'))
26 |
27 | ## a hypothetical numeric array (can be beta values, intensities etc)
28 | hypothetical <- setNames(runif(fset$n), fset$probes)
29 |
30 | ## map the numeric to file
31 | mapFileSet(fset, 's1', hypothetical)
32 |
33 | ## get data
34 | sliceFileSet(fset, 's1', 'cg00000292')
35 |
36 | }
37 |
--------------------------------------------------------------------------------
/man/meanIntensity.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/sesame.R
3 | \name{meanIntensity}
4 | \alias{meanIntensity}
5 | \title{Whole-dataset-wide Mean Intensity}
6 | \usage{
7 | meanIntensity(sdf, mask = TRUE)
8 | }
9 | \arguments{
10 | \item{sdf}{a \code{SigDF}}
11 |
12 | \item{mask}{whether to mask probes using mask column}
13 | }
14 | \value{
15 | mean of all intensities
16 | }
17 | \description{
18 | The function takes one single \code{SigDF} and computes mean
19 | intensity of all the in-band measurements. This includes all Type-I
20 | in-band measurements and all Type-II probe measurements. Both methylated
21 | and unmethylated alleles are considered. This function outputs a single
22 | numeric for the mean.
23 | }
24 | \details{
25 | Note: mean in this case is more informative than median because
26 | methylation level is mostly bimodal.
27 | }
28 | \examples{
29 | sesameDataCache() # if not done yet
30 | sdf <- sesameDataGet('EPIC.1.SigDF')
31 | meanIntensity(sdf)
32 | }
33 |
--------------------------------------------------------------------------------
/man/reIdentify.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/deidentify.R
3 | \name{reIdentify}
4 | \alias{reIdentify}
5 | \title{Re-identify IDATs by restoring scrambled SNP intensities}
6 | \usage{
7 | reIdentify(path, out_path = NULL, snps = NULL, mft = NULL)
8 | }
9 | \arguments{
10 | \item{path}{input IDAT file}
11 |
12 | \item{out_path}{output IDAT file}
13 |
14 | \item{snps}{SNP definition, if not given, default to SNP probes}
15 |
16 | \item{mft}{sesame-compatible manifest if non-standard}
17 | }
18 | \value{
19 | NULL, changes made to the IDAT files
20 | }
21 | \description{
22 | This requries setting a seed with a secret number that was used to
23 | de-identify the IDAT (see example).
24 | This requires a secret number that was used to de-idenitfy the IDAT
25 | }
26 | \examples{
27 |
28 | temp_out <- tempfile("test")
29 |
30 | set.seed(123)
31 | reIdentify(system.file(
32 | "extdata", "4207113116_A_Grn.idat", package = "sesameData"), temp_out)
33 | unlink(temp_out)
34 | }
35 |
--------------------------------------------------------------------------------
/man/dyeBiasCorrMostBalanced.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dye_bias.R
3 | \name{dyeBiasCorrMostBalanced}
4 | \alias{dyeBiasCorrMostBalanced}
5 | \title{Correct dye bias using most balanced sample as the reference}
6 | \usage{
7 | dyeBiasCorrMostBalanced(sdfs)
8 | }
9 | \arguments{
10 | \item{sdfs}{a list of normalized \code{SigDF}s}
11 | }
12 | \value{
13 | a list of normalized \code{SigDF}s
14 | }
15 | \description{
16 | The function chose the reference signal level from a list of \code{SigDF}.
17 | The chosen sample has the smallest difference in Grn and Red signal
18 | intensity as measured using the normalization control probes. In practice,
19 | it doesn't matter which sample is chosen as long as the reference level
20 | does not deviate much. The function returns a list of \code{SigDF}s with
21 | dye bias corrected.
22 | }
23 | \examples{
24 | sesameDataCache() # if not done yet
25 | sdfs <- sesameDataGet('HM450.10.SigDF')[1:2]
26 | sdfs.db <- dyeBiasCorrMostBalanced(sdfs)
27 | }
28 |
--------------------------------------------------------------------------------
/man/imputeBetas.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/impute.R
3 | \name{imputeBetas}
4 | \alias{imputeBetas}
5 | \title{Impute of missing data of specific platform}
6 | \usage{
7 | imputeBetas(
8 | betas,
9 | platform = NULL,
10 | BPPARAM = SerialParam(),
11 | celltype = NULL,
12 | sd_max = 999
13 | )
14 | }
15 | \arguments{
16 | \item{betas}{named vector of beta values}
17 |
18 | \item{platform}{platform}
19 |
20 | \item{BPPARAM}{use MulticoreParam(n) for parallel processing}
21 |
22 | \item{celltype}{celltype/tissue context of imputation, if not given, will
23 | use nearest neighbor to determine.}
24 |
25 | \item{sd_max}{maximum standard deviation in imputation confidence}
26 | }
27 | \value{
28 | imputed data, vector or matrix
29 | }
30 | \description{
31 | Impute of missing data of specific platform
32 | }
33 | \examples{
34 | betas = openSesame(sesameDataGet("EPIC.1.SigDF"))
35 | sum(is.na(betas))
36 | betas2 = imputeBetas(betas, "EPIC")
37 | sum(is.na(betas2))
38 |
39 | }
40 |
--------------------------------------------------------------------------------
/man/imputeBetasByGenomicNeighbors.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/impute.R
3 | \name{imputeBetasByGenomicNeighbors}
4 | \alias{imputeBetasByGenomicNeighbors}
5 | \title{Impute missing data based on genomic neighbors.}
6 | \usage{
7 | imputeBetasByGenomicNeighbors(
8 | betas,
9 | platform = NULL,
10 | BPPARAM = SerialParam(),
11 | max_neighbors = 3,
12 | max_dist = 10000
13 | )
14 | }
15 | \arguments{
16 | \item{betas}{named vector of beta values}
17 |
18 | \item{platform}{platform}
19 |
20 | \item{BPPARAM}{use MulticoreParam(n) for parallel processing}
21 |
22 | \item{max_neighbors}{maximum neighbors to use for dense regions}
23 |
24 | \item{max_dist}{maximum distance to count as neighbor}
25 | }
26 | \value{
27 | imputed data, vector or matrix
28 | }
29 | \description{
30 | Impute missing data based on genomic neighbors.
31 | }
32 | \examples{
33 | betas = openSesame(sesameDataGet("EPICv2.8.SigDF")[[1]])
34 | sum(is.na(betas))
35 | betas2 = imputeBetasByGenomicNeighbors(betas, "EPICv2")
36 | sum(is.na(betas2))
37 |
38 | }
39 |
--------------------------------------------------------------------------------
/man/sesameAnno_buildManifestGRanges.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/sesameAnno.R
3 | \name{sesameAnno_buildManifestGRanges}
4 | \alias{sesameAnno_buildManifestGRanges}
5 | \title{Build manifest GRanges from tsv}
6 | \usage{
7 | sesameAnno_buildManifestGRanges(
8 | tsv,
9 | genome = NULL,
10 | decoy = FALSE,
11 | columns = NULL
12 | )
13 | }
14 | \arguments{
15 | \item{tsv}{a file path, a platform (e.g., EPIC), or
16 | a tibble/data.frame object}
17 |
18 | \item{genome}{a genome string, e.g., hg38, mm10}
19 |
20 | \item{decoy}{consider decoy sequence in chromosome order}
21 |
22 | \item{columns}{the columns to include in the GRanges}
23 | }
24 | \value{
25 | GRanges
26 | }
27 | \description{
28 | manifest tsv files can be downloaded from
29 | http://zwdzwd.github.io/InfiniumAnnotation
30 | }
31 | \examples{
32 | \dontrun{
33 | tsv = sesameAnno_download("HM450.hg38.manifest.tsv.gz")
34 | gr <- sesameAnno_buildManifestGRanges(tsv)
35 | ## direct access
36 | gr <- sesameAnno_buildManifestGRanges("HM450.hg38.manifest")
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/man/betasCollapseToPfx.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/sesame.R
3 | \name{betasCollapseToPfx}
4 | \alias{betasCollapseToPfx}
5 | \title{Collapse betas by averagng probes with common probe ID prefix}
6 | \usage{
7 | betasCollapseToPfx(betas, BPPARAM = SerialParam())
8 | }
9 | \arguments{
10 | \item{betas}{either a named numeric vector or a numeric matrix
11 | (row: probes, column: samples)}
12 |
13 | \item{BPPARAM}{use MulticoreParam(n) for parallel processing}
14 | }
15 | \value{
16 | either named numeric vector or a numeric matrix of collapsed
17 | beta value matrix
18 | }
19 | \description{
20 | Collapse betas by averagng probes with common probe ID prefix
21 | }
22 | \examples{
23 |
24 | ## input is a matrix
25 | m <- matrix(seq(0,1,length.out=9), nrow=3)
26 | rownames(m) <- c("cg00004963_TC21", "cg00004963_TC22", "cg00004747_TC21")
27 | colnames(m) <- c("A","B","C")
28 | betasCollapseToPfx(m)
29 |
30 | ## input is a vector
31 | m <- setNames(seq(0,1,length.out=3),
32 | c("cg00004963_TC21", "cg00004963_TC22", "cg00004747_TC21"))
33 | betasCollapseToPfx(m)
34 | }
35 |
--------------------------------------------------------------------------------
/man/chipAddressToSignal.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/sesame.R
3 | \name{chipAddressToSignal}
4 | \alias{chipAddressToSignal}
5 | \title{Lookup address in one sample}
6 | \usage{
7 | chipAddressToSignal(dm, mft, min_beads = NULL)
8 | }
9 | \arguments{
10 | \item{dm}{data frame in chip address, 2 columns: cy3/Grn and cy5/Red}
11 |
12 | \item{mft}{a data frame with columns Probe_ID, M, U and col}
13 |
14 | \item{min_beads}{minimum bead counts, otherwise masked}
15 | }
16 | \value{
17 | a SigDF, indexed by probe ID address
18 | }
19 | \description{
20 | Lookup address and transform address to probe
21 | }
22 | \details{
23 | Translate data in chip address to probe address.
24 | Type I probes can be separated into Red and Grn channels. The
25 | methylated allele and unmethylated allele are at different
26 | addresses. For type II probes methylation allele and unmethylated allele are
27 | at the same address. Grn channel is for methylated allele and Red channel is
28 | for unmethylated allele. The out-of-band signals are type I probes measured
29 | using the other channel.
30 | }
31 |
--------------------------------------------------------------------------------
/R/palgen.R:
--------------------------------------------------------------------------------
1 |
2 | #' Generate some additional color palettes
3 | #'
4 | #' @param pal a string for adhoc pals
5 | #' @param n the number of colors for interpolation
6 | #' @param space rgb or Lab
7 | #' @return a palette-generating function
8 | #' @examples
9 | #' library(pals)
10 | #' pal.bands(palgen("whiteturbo"))
11 | #' @export
12 | palgen <- function(pal, n=150, space = "Lab") {
13 |
14 | requireNamespace("pals")
15 | adhoc_pals <- list(
16 | whiteturbo = c("white","white",pals::turbo(10)[seq(2,10)]),
17 | whitejet = c("white","white","lightblue",
18 | "blue","green","yellow","orange","red","darkred"),
19 | whiteblack = c("white", "black"))
20 |
21 | if (length(pal) == 1 && is.character(pal) &&
22 | (pal %in% names(adhoc_pals))) {
23 | pal <- adhoc_pals[[pal]]
24 | }
25 |
26 | if (is.character(pal)) {
27 | requireNamespace("grDevices")
28 | grDevices::colorRampPalette(pal, space = space)
29 | } else if (is.function(pal)) {
30 | pal
31 | } else {
32 | stop("Please provide the right pal format.")
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/man/getRefSet.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/cell_composition.R
3 | \name{getRefSet}
4 | \alias{getRefSet}
5 | \title{Retrieve reference set}
6 | \usage{
7 | getRefSet(cells = NULL, platform = c("EPIC", "HM450"))
8 | }
9 | \arguments{
10 | \item{cells}{reference cell types}
11 |
12 | \item{platform}{EPIC or HM450}
13 | }
14 | \value{
15 | g, a 0/1 matrix with probes on the rows and specified cell types
16 | on the columns.
17 | }
18 | \description{
19 | The function retrieves the curated reference DNA methylation status for
20 | a set of cell type names under the Infinium platform. Supported cell types
21 | include "CD4T", "CD19B", "CD56NK", "CD14Monocytes", "granulocytes", "scFat",
22 | "skin" etc. See package sesameData for more details. The function output a
23 | matrix with probes on the rows and specified cell types on the columns.
24 | 0 suggests unmethylation and 1 suggests methylation. Intermediate
25 | methylation and nonclusive calls are left with NA.
26 | }
27 | \examples{
28 |
29 | betas = getRefSet('CD4T', platform='HM450')
30 | sesameDataGet_resetEnv()
31 |
32 | }
33 |
--------------------------------------------------------------------------------
/man/noob.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/background.R
3 | \name{noob}
4 | \alias{noob}
5 | \title{Noob background subtraction}
6 | \usage{
7 | noob(sdf, combine.neg = TRUE, offset = 15)
8 | }
9 | \arguments{
10 | \item{sdf}{a \code{SigDF}}
11 |
12 | \item{combine.neg}{whether to combine negative control probe.}
13 |
14 | \item{offset}{offset}
15 | }
16 | \value{
17 | a new \code{SigDF} with noob background correction
18 | }
19 | \description{
20 | The function takes a \code{SigDF} and returns a modified \code{SigDF}
21 | with background subtracted. Background was modelled in a normal distribution
22 | and true signal in an exponential distribution. The Norm-Exp deconvolution
23 | is parameterized using Out-Of-Band (oob) probes. For species-specific
24 | processing, one should call inferSpecies on SigDF first. Multi-mapping
25 | probes are excluded.
26 | }
27 | \details{
28 | When combine.neg = TRUE, background will be parameterized by both
29 | negative control and out-of-band probes.
30 | }
31 | \examples{
32 | sdf <- sesameDataGet('EPIC.1.SigDF')
33 | sdf.nb <- noob(sdf)
34 | }
35 |
--------------------------------------------------------------------------------
/man/getBetas.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/sesame.R
3 | \name{getBetas}
4 | \alias{getBetas}
5 | \title{Get beta Values}
6 | \usage{
7 | getBetas(
8 | sdf,
9 | mask = TRUE,
10 | sum.TypeI = FALSE,
11 | collapseToPfx = FALSE,
12 | collapseMethod = c("mean", "minPval")
13 | )
14 | }
15 | \arguments{
16 | \item{sdf}{\code{SigDF}}
17 |
18 | \item{mask}{whether to use mask}
19 |
20 | \item{sum.TypeI}{whether to sum type I channels}
21 |
22 | \item{collapseToPfx}{remove replicate to prefix (e.g., cg number) and
23 | remove the suffix}
24 |
25 | \item{collapseMethod}{mean or minPval}
26 | }
27 | \value{
28 | a numeric vector, beta values
29 | }
30 | \description{
31 | sum.typeI is used for rescuing beta values on
32 | Color-Channel-Switching CCS probes. The function takes a \code{SigDF}
33 | and returns beta value except that Type-I in-band signal and out-of-band
34 | signal are combined. This prevents color-channel switching due to SNPs.
35 | }
36 | \examples{
37 | sesameDataCache() # if not done yet
38 | sdf <- sesameDataGet('EPIC.1.SigDF')
39 | betas <- getBetas(sdf)
40 | }
41 |
--------------------------------------------------------------------------------
/man/compareMouseTissueReference.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/tissue.R
3 | \name{compareMouseTissueReference}
4 | \alias{compareMouseTissueReference}
5 | \title{Compare mouse array data with mouse tissue references}
6 | \usage{
7 | compareMouseTissueReference(
8 | betas = NULL,
9 | ref = NULL,
10 | color = "blueYellow",
11 | query_width = 0.3
12 | )
13 | }
14 | \arguments{
15 | \item{betas}{matrix of betas for the target sample
16 | This argument is optional. If not given, only the reference will be shown.}
17 |
18 | \item{ref}{the reference beta values in SummarizedExperiment.
19 | This argument is optional. If not given, the reference will be downloaded
20 | from the sesameData package.}
21 |
22 | \item{color}{either blueYellow or fullJet}
23 |
24 | \item{query_width}{the width of the query beta value matrix}
25 | }
26 | \value{
27 | grid object that contrast the target sample with
28 | pre-built mouse tissue reference
29 | }
30 | \description{
31 | Compare mouse array data with mouse tissue references
32 | }
33 | \examples{
34 | cat("Deprecated, see compareReference")
35 | }
36 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | # MIT License
2 |
3 | Copyright (c) 2024 Wanding Zhou
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/man/qualityMask.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/mask.R
3 | \name{qualityMask}
4 | \alias{qualityMask}
5 | \title{Mask beta values by design quality}
6 | \usage{
7 | qualityMask(sdf, mask_names = "recommended", verbose = TRUE)
8 | }
9 | \arguments{
10 | \item{sdf}{a \code{SigDF} object}
11 |
12 | \item{mask_names}{a vector of masking groups, see listAvailableMasks
13 | use "recommended" for recommended masking. One can also combine
14 | "recommended" with other masking groups by specifying a vector, e.g.,
15 | c("recommended", "M_mapping")}
16 |
17 | \item{verbose}{be verbose}
18 | }
19 | \value{
20 | a filtered \code{SigDF}
21 | }
22 | \description{
23 | Currently quality masking only supports three platforms
24 | see also listAvailableMasks(sdfPlatform(sdf))
25 | }
26 | \examples{
27 | sesameDataCache() # if not done yet
28 | sdf <- sesameDataGet('EPIC.1.SigDF')
29 | sum(sdf$mask)
30 | sum(qualityMask(sdf)$mask)
31 | sum(qualityMask(sdf, mask_names = NULL)$mask)
32 |
33 | ## list available masks, the dbname column
34 | listAvailableMasks(sdfPlatform(sdf))
35 | listAvailableMasks("EPICv2")
36 |
37 | }
38 |
--------------------------------------------------------------------------------
/man/parseGEOsignalMU.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/GEO.R
3 | \name{parseGEOsignalMU}
4 | \alias{parseGEOsignalMU}
5 | \title{Convert signal M and U to SigDF}
6 | \usage{
7 | parseGEOsignalMU(
8 | sigM,
9 | sigU,
10 | Probe_IDs,
11 | oob.mean = 500,
12 | oob.sd = 300,
13 | platform = NULL
14 | )
15 | }
16 | \arguments{
17 | \item{sigM}{methylated signal, a numeric vector}
18 |
19 | \item{sigU}{unmethylated signal, a numirc vector}
20 |
21 | \item{Probe_IDs}{probe ID vector}
22 |
23 | \item{oob.mean}{assumed mean for out-of-band signals}
24 |
25 | \item{oob.sd}{assumed standard deviation for out-of-band signals}
26 |
27 | \item{platform}{platform code, will infer if not given}
28 | }
29 | \value{
30 | SigDF
31 | }
32 | \description{
33 | This overcomes the issue of missing IDAT files. However,
34 | out-of-band signals will be missing or faked (sampled from a
35 | normal distribution).
36 | }
37 | \examples{
38 | sigM <- c(11436, 6068, 2864)
39 | sigU <- c(1476, 804, 393)
40 | probes <- c("cg07881041", "cg23229610", "cg03513874")
41 | sdf <- parseGEOsignalMU(sigM, sigU, probes, platform = "EPIC")
42 | }
43 |
--------------------------------------------------------------------------------
/man/visualizeSegments.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/cnv.R
3 | \name{visualizeSegments}
4 | \alias{visualizeSegments}
5 | \title{Visualize segments}
6 | \usage{
7 | visualizeSegments(seg, to.plot = NULL, genes.to.label = NULL)
8 | }
9 | \arguments{
10 | \item{seg}{a \code{CNSegment} object}
11 |
12 | \item{to.plot}{chromosome to plot (by default plot all chromosomes)}
13 |
14 | \item{genes.to.label}{gene(s) to label}
15 | }
16 | \value{
17 | plot graphics
18 | }
19 | \description{
20 | The function takes a \code{CNSegment} object obtained from cnSegmentation
21 | and plot the bin signals and segments (as horizontal lines).
22 | }
23 | \details{
24 | require ggplot2, scales
25 | }
26 | \examples{
27 |
28 | sesameDataCache()
29 | \dontrun{
30 | sdfs <- sesameDataGet('EPICv2.8.SigDF')
31 | sdf <- sdfs[["K562_206909630040_R01C01"]]
32 | seg <- cnSegmentation(sdf)
33 | seg <- cnSegmentation(sdf, return.probe.signals=TRUE)
34 | visualizeSegments(seg)
35 | visualizeSegments(seg, to.plot=c("chr9","chr22"))
36 | visualizeSegments(seg, genes.to.label=c("ABL1","BCR"))
37 | }
38 |
39 | sesameDataGet_resetEnv()
40 |
41 | }
42 |
--------------------------------------------------------------------------------
/man/inferStrain.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/strain.R
3 | \name{inferStrain}
4 | \alias{inferStrain}
5 | \title{Infer strain information for mouse array}
6 | \usage{
7 | inferStrain(
8 | sdf,
9 | return.strain = FALSE,
10 | return.probability = FALSE,
11 | return.pval = FALSE,
12 | min_frac_dt = 0.2,
13 | verbose = FALSE
14 | )
15 | }
16 | \arguments{
17 | \item{sdf}{SigDF}
18 |
19 | \item{return.strain}{return strain name}
20 |
21 | \item{return.probability}{return probability vector for all strains}
22 |
23 | \item{return.pval}{return p-value}
24 |
25 | \item{min_frac_dt}{minimum fraction of detected signal (DEFAULT: 0.2)
26 | otherwise, we give up strain inference and return NA.}
27 |
28 | \item{verbose}{print more messages}
29 | }
30 | \value{
31 | a list of best guess, p-value of the best guess
32 | and the probabilities of all strains
33 | }
34 | \description{
35 | Infer strain information for mouse array
36 | }
37 | \examples{
38 | sesameDataCache() # if not done yet
39 | sdf <- sesameDataGet('MM285.1.SigDF')
40 | inferStrain(sdf, return.strain = TRUE)
41 | sdf.strain <- inferStrain(sdf)
42 | }
43 |
--------------------------------------------------------------------------------
/man/ELBAR.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/detection.R
3 | \name{ELBAR}
4 | \alias{ELBAR}
5 | \title{ELiminate BAckground-dominated Reading (ELBAR)}
6 | \usage{
7 | ELBAR(
8 | sdf,
9 | return.pval = FALSE,
10 | pval.threshold = 0.05,
11 | margin = 0.05,
12 | capMU = 3000,
13 | delta.beta = 0.2,
14 | n.windows = 500
15 | )
16 | }
17 | \arguments{
18 | \item{sdf}{a \code{SigDF}}
19 |
20 | \item{return.pval}{whether to return p-values, instead of a SigDF}
21 |
22 | \item{pval.threshold}{minimum p-value to mask}
23 |
24 | \item{margin}{the percentile margin to define envelope, the smaller
25 | the value the more aggressive the masking.}
26 |
27 | \item{capMU}{the maximum M+U to search for intermediate betas}
28 |
29 | \item{delta.beta}{maximum beta value change from
30 | sheer background-dominated readings}
31 |
32 | \item{n.windows}{number of windows for smoothing}
33 | }
34 | \value{
35 | a \code{SigDF} with mask added
36 | }
37 | \description{
38 | ELiminate BAckground-dominated Reading (ELBAR)
39 | }
40 | \examples{
41 | sdf <- sesameDataGet("EPIC.1.SigDF")
42 | sum(sdf$mask)
43 | sum(ELBAR(sdf)$mask)
44 | }
45 |
--------------------------------------------------------------------------------
/man/inferInfiniumIChannel.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/channel_inference.R
3 | \name{inferInfiniumIChannel}
4 | \alias{inferInfiniumIChannel}
5 | \title{Infer and reset color channel for Type-I probes instead of
6 | using what is specified in manifest. The results are stored to
7 | sdf@extra$IGG and sdf@extra$IRR slot.}
8 | \usage{
9 | inferInfiniumIChannel(
10 | sdf,
11 | switch_failed = FALSE,
12 | mask_failed = FALSE,
13 | verbose = FALSE,
14 | summary = FALSE
15 | )
16 | }
17 | \arguments{
18 | \item{sdf}{a \code{SigDF}}
19 |
20 | \item{switch_failed}{whether to switch failed probes (default to FALSE)}
21 |
22 | \item{mask_failed}{whether to mask failed probes (default to FALSE)}
23 |
24 | \item{verbose}{whether to print correction summary}
25 |
26 | \item{summary}{return summarized numbers only.}
27 | }
28 | \value{
29 | a \code{SigDF}, or numerics if summary == TRUE
30 | }
31 | \description{
32 | IGG => Type-I green that is inferred to be green
33 | IRR => Type-I red that is inferred to be red
34 | }
35 | \examples{
36 |
37 | sdf <- sesameDataGet('EPIC.1.SigDF')
38 | inferInfiniumIChannel(sdf)
39 |
40 | }
41 |
--------------------------------------------------------------------------------
/man/sesameQC_calcStats.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/QC.R
3 | \name{sesameQC_calcStats}
4 | \alias{sesameQC_calcStats}
5 | \title{Calculate QC statistics}
6 | \usage{
7 | sesameQC_calcStats(sdf, funs = NULL)
8 | }
9 | \arguments{
10 | \item{sdf}{a SigDF object}
11 |
12 | \item{funs}{a sesameQC_calcStats_* function or a list of them
13 | default to all functions. One can also use a string such as
14 | "detection" or c("detection", "intensity") to reduce typing}
15 | }
16 | \value{
17 | a sesameQC object
18 | }
19 | \description{
20 | It is a function to call one or multiple
21 | sesameQC_calcStats functions
22 | }
23 | \details{
24 | currently supporting: detection, intensity, numProbes, channel,
25 | dyeBias, betas
26 | }
27 | \examples{
28 | sesameDataCache() # if not done yet
29 | sdf <- sesameDataGet('EPIC.1.SigDF')
30 | sesameQC_calcStats(sdf)
31 | sesameQC_calcStats(sdf, "detection")
32 | sesameQC_calcStats(sdf, c("detection", "channel"))
33 | ## retrieve stats as a list
34 | sesameQC_getStats(sesameQC_calcStats(sdf, "detection"))
35 | ## or as data frames
36 | as.data.frame(sesameQC_calcStats(sdf, "detection"))
37 |
38 | }
39 |
--------------------------------------------------------------------------------
/man/deidentify.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/deidentify.R
3 | \name{deIdentify}
4 | \alias{deIdentify}
5 | \title{De-identify IDATs by removing SNP probes}
6 | \usage{
7 | deIdentify(path, out_path = NULL, snps = NULL, mft = NULL, randomize = FALSE)
8 | }
9 | \arguments{
10 | \item{path}{input IDAT file}
11 |
12 | \item{out_path}{output IDAT file}
13 |
14 | \item{snps}{SNP definition, if not given, default to SNP probes}
15 |
16 | \item{mft}{sesame-compatible manifest if non-standard}
17 |
18 | \item{randomize}{whether to randomize the SNPs. if TRUE,
19 | randomize the signal intensities. one can use set.seed to
20 | reidentify the IDAT with the secret seed (see examples).
21 | If FALSE, this sets all SNP intensities to zero.}
22 | }
23 | \value{
24 | NULL, changes made to the IDAT files
25 | }
26 | \description{
27 | Mask SNP probe intensity mean by zero.
28 | }
29 | \examples{
30 |
31 | my_secret <- 13412084
32 | set.seed(my_secret)
33 | temp_out <- tempfile("test")
34 | deIdentify(system.file(
35 | "extdata", "4207113116_A_Grn.idat", package = "sesameData"),
36 | temp_out, randomize = TRUE)
37 | unlink(temp_out)
38 | }
39 |
--------------------------------------------------------------------------------
/man/dyeBiasNL.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dye_bias.R
3 | \name{dyeBiasNL}
4 | \alias{dyeBiasNL}
5 | \alias{dyeBiasCorrTypeINorm}
6 | \title{Dye bias correction by matching green and red to mid point}
7 | \usage{
8 | dyeBiasNL(sdf, mask = TRUE, verbose = FALSE)
9 |
10 | dyeBiasCorrTypeINorm(sdf, mask = TRUE, verbose = FALSE)
11 | }
12 | \arguments{
13 | \item{sdf}{a \code{SigDF}}
14 |
15 | \item{mask}{include masked probes in Infinium-I probes. No big difference is
16 | noted in practice. More probes are generally better.}
17 |
18 | \item{verbose}{print more messages}
19 | }
20 | \value{
21 | a \code{SigDF} after dye bias correction.
22 | }
23 | \description{
24 | This function compares the Type-I Red probes and Type-I Grn probes and
25 | generates and mapping to correct signal of the two channels to the middle.
26 | The function takes one single \code{SigDF} and returns a \code{SigDF}
27 | with dye bias corrected.
28 | }
29 | \examples{
30 | sesameDataCache() # if not done yet
31 | sdf <- sesameDataGet('EPIC.1.SigDF')
32 | sdf.db <- dyeBiasNL(sdf)
33 | sdf <- sesameDataGet('EPIC.1.SigDF')
34 | sdf <- dyeBiasCorrTypeINorm(sdf)
35 | }
36 |
--------------------------------------------------------------------------------
/man/twoCompsEst2.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/cell_composition.R
3 | \name{twoCompsEst2}
4 | \alias{twoCompsEst2}
5 | \title{Estimate the fraction of the 2nd component in a 2-component mixture}
6 | \usage{
7 | twoCompsEst2(
8 | pop1,
9 | pop2,
10 | target,
11 | use.ave = TRUE,
12 | diff_1m2u = NULL,
13 | diff_1u2m = NULL
14 | )
15 | }
16 | \arguments{
17 | \item{pop1}{Reference methylation level matrix for population 1}
18 |
19 | \item{pop2}{Reference methylation level matrix for population 2}
20 |
21 | \item{target}{Target methylation level matrix to be analyzed}
22 |
23 | \item{use.ave}{use population average in selecting differentially
24 | methylated probes}
25 |
26 | \item{diff_1m2u}{A vector of differentially methylated probes (methylated
27 | in population 1 but unmethylated in population 2)}
28 |
29 | \item{diff_1u2m}{A vector of differentially methylated probes (unmethylated
30 | in population 1 but methylated in population 2)}
31 | }
32 | \value{
33 | Estimate of the 2nd component in the 2-component mixture
34 | }
35 | \description{
36 | Estimate the fraction of the 2nd component in a 2-component mixture
37 | }
38 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # SeSAMe - SEnsible Step-wise Analysis of Methylation data
2 |
3 | [](https://github.com/zwdzwd/sesame/commits/master)
4 |
5 | SeSAMe is an R package for processing Infinium DNA methylation data. SeSAMe currently supports EPIC, HM450 and HM27 platforms and dynamically generated manifest.
6 |
7 | To install from Github,
8 | ```R
9 | BiocManager::install("zwdzwd/sesame")
10 | ```
11 |
12 | See the package [Home Page on Bioconductor](https://bioconductor.org/packages/release/bioc/html/sesame.html) and the [Developmental Branch](https://bioconductor.org/packages/devel/bioc/html/sesame.html).
13 |
14 | It also has a depended [data package](https://github.com/zwdzwd/sesameData) for annotation and example data.
15 |
16 | ## Bugs
17 |
18 | Bug reports are appreciated. Register issues at the SeSAMe [issue tracker](http://github.com/zwdzwd/sesame/issues).
19 |
20 |
21 | ## About
22 |
23 | Please cite and reference [SeSAMe: reducing artifactual detection of DNA methylation by Infinium BeadChips in genomic deletions](https://doi.org/10.1093/nar/gky691) for more details.
24 |
--------------------------------------------------------------------------------
/man/prepSesame.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/open.R
3 | \name{prepSesame}
4 | \alias{prepSesame}
5 | \title{Apply a chain of sesame preprocessing functions in an arbitrary order}
6 | \usage{
7 | prepSesame(sdf, prep = "QCDPB", prep_args = NULL)
8 | }
9 | \arguments{
10 | \item{sdf}{SigDF}
11 |
12 | \item{prep}{code that indicates preprocessing functions and their
13 | execution order (functions on the left is executed first).}
14 |
15 | \item{prep_args}{optional argument list to individual functions, e.g.,
16 | prepSesame(sdf, prep_args=list(Q=list(mask_names = "design_issue")))
17 | sets qualityMask(sdf, mask_names = "design_issue")}
18 | }
19 | \value{
20 | SigDF
21 | }
22 | \description{
23 | Notes on the order of operation:
24 | 1. qualityMask and inferSpecies should go before noob and pOOBAH,
25 | otherwise the background is too high because of Multi,
26 | uk and other probes
27 | 2. dyeBias correction needs to happen early
28 | 3. channel inference before dyebias
29 | 4. noob should happen last, pOOBAH before noob because noob modifies oob
30 | }
31 | \examples{
32 | sdf <- sesameDataGet("MM285.1.SigDF")
33 | sdf1 <- prepSesame(sdf, "QCDPB")
34 | }
35 |
--------------------------------------------------------------------------------
/man/sliceFileSet.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/fileSet.R
3 | \name{sliceFileSet}
4 | \alias{sliceFileSet}
5 | \title{Slice a fileSet with samples and probes}
6 | \usage{
7 | sliceFileSet(fset, samples = fset$samples, probes = fset$probes, memmax = 10^5)
8 | }
9 | \arguments{
10 | \item{fset}{a sesame::fileSet, as obtained via readFileSet}
11 |
12 | \item{samples}{samples to query (default to all samples)}
13 |
14 | \item{probes}{probes to query (default to all probes)}
15 |
16 | \item{memmax}{maximum items to read from file to memory, to protect from
17 | accidental memory congestion.}
18 | }
19 | \value{
20 | a numeric matrix of length(samples) columns and length(probes) rows
21 | }
22 | \description{
23 | Slice a fileSet with samples and probes
24 | }
25 | \examples{
26 |
27 | ## create two samples
28 | fset <- initFileSet('mybetas2', 'HM27', c('s1','s2'))
29 |
30 | ## a hypothetical numeric array (can be beta values, intensities etc)
31 | hypothetical <- setNames(runif(fset$n), fset$probes)
32 |
33 | ## map the numeric to file
34 | mapFileSet(fset, 's1', hypothetical)
35 |
36 | ## get data
37 | sliceFileSet(fset, 's1', 'cg00000292')
38 |
39 | }
40 |
--------------------------------------------------------------------------------
/man/readIDATpair.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/sesame.R
3 | \name{readIDATpair}
4 | \alias{readIDATpair}
5 | \title{Import a pair of IDATs from one sample}
6 | \usage{
7 | readIDATpair(
8 | prefix.path,
9 | manifest = NULL,
10 | platform = "",
11 | min_beads = NULL,
12 | controls = NULL,
13 | verbose = FALSE
14 | )
15 | }
16 | \arguments{
17 | \item{prefix.path}{sample prefix without _Grn.idat and _Red.idat}
18 |
19 | \item{manifest}{optional design manifest file}
20 |
21 | \item{platform}{EPIC, HM450 and HM27 etc.}
22 |
23 | \item{min_beads}{minimum bead number, probes with R or G smaller than
24 | this threshold will be masked. If NULL, no filtering based on bead
25 | count will be applied.}
26 |
27 | \item{controls}{optional control probe manifest file}
28 |
29 | \item{verbose}{be verbose? (FALSE)}
30 | }
31 | \value{
32 | a \code{SigDF}
33 | }
34 | \description{
35 | The function takes a prefix string that are shared with _Grn.idat
36 | and _Red.idat. The function returns a \code{SigDF}.
37 | }
38 | \examples{
39 | sdf <- readIDATpair(sub('_Grn.idat','',system.file(
40 | "extdata", "4207113116_A_Grn.idat", package = "sesameData")))
41 | }
42 |
--------------------------------------------------------------------------------
/man/searchIDATprefixes.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/sesame.R
3 | \name{searchIDATprefixes}
4 | \alias{searchIDATprefixes}
5 | \title{Identify IDATs from a directory}
6 | \usage{
7 | searchIDATprefixes(dir.name, recursive = TRUE, use.basename = TRUE)
8 | }
9 | \arguments{
10 | \item{dir.name}{the directory containing the IDAT files.}
11 |
12 | \item{recursive}{search IDAT files recursively}
13 |
14 | \item{use.basename}{basename of each IDAT path is used as sample name
15 | This won't work in rare situation where there are duplicate IDAT files.}
16 | }
17 | \value{
18 | the IDAT prefixes (a vector of character strings).
19 | }
20 | \description{
21 | The input is the directory name as a string. The function identifies all
22 | the IDAT files under the directory. The function returns a vector of such
23 | IDAT prefixes under the directory.
24 | }
25 | \examples{
26 | ## only search what are directly under
27 | IDATprefixes <- searchIDATprefixes(
28 | system.file("extdata", "", package = "sesameData"))
29 |
30 | ## search files recursively is by default
31 | IDATprefixes <- searchIDATprefixes(
32 | system.file(package = "sesameData"), recursive=TRUE)
33 | }
34 |
--------------------------------------------------------------------------------
/man/formatVCF.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/vcf.R
3 | \name{formatVCF}
4 | \alias{formatVCF}
5 | \title{Convert SNP from Infinium array to VCF file}
6 | \usage{
7 | formatVCF(sdf, anno, vcf = NULL, genome = "hg38", verbose = FALSE)
8 | }
9 | \arguments{
10 | \item{sdf}{SigDF}
11 |
12 | \item{anno}{SNP variant annotation, available at
13 | https://github.com/zhou-lab/InfiniumAnnotationV1/tree/main/Anno/EPIC
14 | EPIC.hg38.snp.tsv.gz}
15 |
16 | \item{vcf}{output VCF file path, if NULL output to console}
17 |
18 | \item{genome}{genome}
19 |
20 | \item{verbose}{print more messages}
21 | }
22 | \value{
23 | VCF file. If vcf is NULL, a data.frame is output to
24 | console. The data.frame does not contain VCF headers.
25 | Note the output vcf is not sorted.
26 | }
27 | \description{
28 | Convert SNP from Infinium array to VCF file
29 | }
30 | \examples{
31 | sesameDataCacheAll() # if not done yet
32 | sdf <- sesameDataGet('EPIC.1.SigDF')
33 |
34 | \dontrun{
35 | ## download anno from
36 | ## http://zwdzwd.github.io/InfiniumAnnotation
37 | ## output to console
38 | anno = read_tsv(sesameAnno_download("EPICv2.hg38.snp.tsv.gz"))
39 | head(formatVCF(sdf, anno))
40 | }
41 |
42 | }
43 |
--------------------------------------------------------------------------------
/man/inferSex.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/sex.R
3 | \name{inferSex}
4 | \alias{inferSex}
5 | \title{Infer sex.}
6 | \usage{
7 | inferSex(betas, platform = NULL)
8 | }
9 | \arguments{
10 | \item{betas}{DNA methylation beta}
11 |
12 | \item{platform}{EPICv2, EPIC, HM450, MM285, etc.}
13 | }
14 | \value{
15 | Inferred sex of sample
16 | }
17 | \description{
18 | We established our sex calling based on the CpGs hypermethylated in
19 | inactive X (XiH), CpGs hypomethylated in inactive X (XiL).
20 | }
21 | \details{
22 | Note genotype abnormalities such as Dnmt genotype,
23 | XXY male (Klinefelter's),
24 | 45,X female (Turner's) can confuse the model sometimes.
25 | This function works on a single sample.
26 | }
27 | \examples{
28 |
29 | ## EPICv2 input
30 | betas = openSesame(sesameDataGet("EPICv2.8.SigDF")[[1]])
31 | inferSex(betas)
32 |
33 | \dontrun{
34 | ## MM285 input
35 | betas = openSesame(sesameDataGet("MM285.1.SigDF"))
36 | inferSex(betas)
37 |
38 | ## EPIC input
39 | betas = openSesame(sesameDataGet('EPIC.1.SigDF'))
40 | inferSex(betas)
41 |
42 | ## HM450 input
43 | betas = openSesame(sesameDataGet("HM450.10.SigDF")[[1]])
44 | inferSex(betas)
45 | }
46 |
47 | }
48 |
--------------------------------------------------------------------------------
/man/sesame-package.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/sesame.R
3 | \docType{package}
4 | \name{sesame-package}
5 | \alias{sesame-package}
6 | \alias{sesame}
7 | \title{Analyze DNA methylation data}
8 | \value{
9 | package
10 | }
11 | \description{
12 | SEnsible and step-wise analysis of DNA methylation data
13 | }
14 | \details{
15 | This package complements array functionalities that allow
16 | processing >10,000 samples in parallel on clusters.
17 | }
18 | \examples{
19 |
20 | sdf <- readIDATpair(sub('_Grn.idat','',system.file(
21 | 'extdata','4207113116_A_Grn.idat',package='sesameData')))
22 |
23 | ## The OpenSesame pipeline
24 | betas <- openSesame(sdf)
25 |
26 | }
27 | \references{
28 | Zhou W, Triche TJ, Laird PW, Shen H (2018)
29 | }
30 | \seealso{
31 | Useful links:
32 | \itemize{
33 | \item \url{https://github.com/zwdzwd/sesame}
34 | \item Report bugs at \url{https://github.com/zwdzwd/sesame/issues}
35 | }
36 |
37 | }
38 | \author{
39 | Wanding Zhou \email{Wanding.Zhou@vai.org},
40 | Hui Shen \email{Hui.Shen@vai.org}
41 | Timothy J Triche Jr \email{Tim.Triche@vai.org}
42 | }
43 | \keyword{DNAMethylation}
44 | \keyword{Microarray}
45 | \keyword{QualityControl}
46 |
--------------------------------------------------------------------------------
/man/createUCSCtrack.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/track.R
3 | \name{createUCSCtrack}
4 | \alias{createUCSCtrack}
5 | \title{Turn beta values into a UCSC browser track}
6 | \usage{
7 | createUCSCtrack(betas, output = NULL, platform = "HM450", genome = "hg38")
8 | }
9 | \arguments{
10 | \item{betas}{a named numeric vector}
11 |
12 | \item{output}{output file name}
13 |
14 | \item{platform}{HM450, EPIC etc.}
15 |
16 | \item{genome}{hg38, mm10, ..., will infer if not given.
17 | For additional mapping, download the GRanges object from
18 | http://zwdzwd.github.io/InfiniumAnnotation
19 | and provide the following argument
20 | ..., genome = sesameAnno_buildManifestGRanges("downloaded_file"),...
21 | to this function.}
22 | }
23 | \value{
24 | when output is null, return a data.frame, otherwise NULL
25 | }
26 | \description{
27 | Turn beta values into a UCSC browser track
28 | }
29 | \examples{
30 |
31 | betas.tissue <- sesameDataGet('HM450.1.TCGA.PAAD')$betas
32 | ## add output to create an actual file
33 | df <- createUCSCtrack(betas.tissue)
34 |
35 | ## to convert to bigBed
36 | ## sort -k1,1 -k2,2n output.bed >output_sorted.bed
37 | ## bedToBigBed output_sorted.bed hg38.chrom output.bb
38 | }
39 |
--------------------------------------------------------------------------------
/man/pOOBAH.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/detection.R
3 | \name{pOOBAH}
4 | \alias{pOOBAH}
5 | \title{Detection P-value based on ECDF of out-of-band signal}
6 | \usage{
7 | pOOBAH(
8 | sdf,
9 | return.pval = FALSE,
10 | combine.neg = TRUE,
11 | pval.threshold = 0.05,
12 | verbose = FALSE
13 | )
14 | }
15 | \arguments{
16 | \item{sdf}{a \code{SigDF}}
17 |
18 | \item{return.pval}{whether to return p-values, instead of a
19 | masked \code{SigDF}}
20 |
21 | \item{combine.neg}{whether to combine negative control probes with
22 | the out-of-band probes in simulating the signal background}
23 |
24 | \item{pval.threshold}{minimum p-value to mask}
25 |
26 | \item{verbose}{print more messages}
27 | }
28 | \value{
29 | a \code{SigDF}, or a p-value vector if return.pval is TRUE
30 | }
31 | \description{
32 | aka pOOBAH (p-vals by Out-Of-Band Array Hybridization)
33 | }
34 | \details{
35 | The function takes a \code{SigDF} as input, computes detection p-value
36 | using out-of-band probes empirical distribution and returns a new
37 | \code{SigDF} with an updated mask slot.
38 | }
39 | \examples{
40 | sdf <- sesameDataGet("EPIC.1.SigDF")
41 | sum(sdf$mask)
42 | sum(pOOBAH(sdf)$mask)
43 |
44 | }
45 |
--------------------------------------------------------------------------------
/man/sesameAnno_download.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/sesameAnno.R
3 | \name{sesameAnno_download}
4 | \alias{sesameAnno_download}
5 | \title{Download SeSAMe annotation files}
6 | \usage{
7 | sesameAnno_download(url, destfile = tempfile(basename(url)))
8 | }
9 | \arguments{
10 | \item{url}{url or title of the annotation file}
11 |
12 | \item{destfile}{download to this file, a temp file if unspecified}
13 | }
14 | \value{
15 | the path to downloaded file
16 | }
17 | \description{
18 | see also
19 | http://zwdzwd.github.io/InfiniumAnnotation
20 | }
21 | \details{
22 | This function acts similarly as sesameAnno_get except that it directly
23 | download files without invoking BiocFileCache. This is needed in some
24 | situation because BiocFileCache may change the file name and downstream
25 | program may depend on the correct file names. It also lets you download
26 | files in a cleaner way without routing through BiocFileCache
27 | }
28 | \examples{
29 |
30 | \dontrun{
31 | ## avoid testing as this function uses external host
32 | sesameAnno_download("Test/3999492009_R01C01_Grn.idat")
33 | sesameAnno_download("EPIC.hg38.manifest.tsv.gz")
34 | sesameAnno_download("EPIC.hg38.snp.tsv.gz")
35 | }
36 |
37 | }
38 |
--------------------------------------------------------------------------------
/man/sesameQC_plotIntensVsBetas.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/QC.R
3 | \name{sesameQC_plotIntensVsBetas}
4 | \alias{sesameQC_plotIntensVsBetas}
5 | \title{Plot Total Signal Intensities vs Beta Values
6 | This plot is helpful in revealing the extent of signal background
7 | and dye bias.}
8 | \usage{
9 | sesameQC_plotIntensVsBetas(
10 | sdf,
11 | mask = TRUE,
12 | use_max = FALSE,
13 | intens.range = c(5, 15),
14 | pal = "whiteturbo",
15 | ...
16 | )
17 | }
18 | \arguments{
19 | \item{sdf}{a \code{SigDF}}
20 |
21 | \item{mask}{whether to remove probes that are masked}
22 |
23 | \item{use_max}{to use max(M,U) or M+U}
24 |
25 | \item{intens.range}{plot range of signal intensity}
26 |
27 | \item{pal}{color palette, whiteturbo, whiteblack, whitejet}
28 |
29 | \item{...}{additional arguments to smoothScatter}
30 | }
31 | \value{
32 | create a total signal intensity vs beta value plot
33 | }
34 | \description{
35 | Plot Total Signal Intensities vs Beta Values
36 | This plot is helpful in revealing the extent of signal background
37 | and dye bias.
38 | }
39 | \examples{
40 | sesameDataCache() # if not done yet
41 | sdf <- sesameDataGet('EPIC.1.SigDF')
42 | sesameQC_plotIntensVsBetas(sdf)
43 | }
44 |
--------------------------------------------------------------------------------
/man/compareReference.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/tissue.R
3 | \name{compareReference}
4 | \alias{compareReference}
5 | \title{Compare array data with references (e.g., tissue, cell types)}
6 | \usage{
7 | compareReference(
8 | ref,
9 | betas = NULL,
10 | stop.points = NULL,
11 | query_width = 0.3,
12 | show_sample_names = FALSE
13 | )
14 | }
15 | \arguments{
16 | \item{ref}{the reference beta values in SummarizedExperiment.
17 | One can download them from the sesameData package. See examples.}
18 |
19 | \item{betas}{matrix of betas for the target sample
20 | This argument is optional. If not given, only the reference will be shown.}
21 |
22 | \item{stop.points}{stop points for the color palette.
23 | Default to blue, yellow.}
24 |
25 | \item{query_width}{the width of the query beta value matrix}
26 |
27 | \item{show_sample_names}{whether to show sample names (default: FALSE)}
28 | }
29 | \value{
30 | grid object that contrast the target sample with
31 | references.
32 | }
33 | \description{
34 | Compare array data with references (e.g., tissue, cell types)
35 | }
36 | \examples{
37 |
38 | sesameDataCache() # if not done yet
39 | compareReference(sesameDataGet("MM285.tissueSignature"))
40 | sesameDataGet_resetEnv()
41 |
42 | }
43 |
--------------------------------------------------------------------------------
/man/convertProbeID.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/mLiftOver.R
3 | \name{convertProbeID}
4 | \alias{convertProbeID}
5 | \title{Convert Probe ID}
6 | \usage{
7 | convertProbeID(
8 | x,
9 | target_platform,
10 | source_platform = NULL,
11 | mapping = NULL,
12 | target_uniq = TRUE,
13 | include_new = FALSE,
14 | include_old = FALSE,
15 | return_mapping = FALSE
16 | )
17 | }
18 | \arguments{
19 | \item{x}{source probe IDs}
20 |
21 | \item{target_platform}{the platform to take the data to}
22 |
23 | \item{source_platform}{optional source platform}
24 |
25 | \item{mapping}{a liftOver mapping file. Typically this file
26 | contains empirical evidence whether a probe mapping is reliable.
27 | If given, probe ID-based mapping will be skipped. This is to
28 | perform more stringent probe ID mapping.}
29 |
30 | \item{target_uniq}{whether the target Probe ID should be kept unique.}
31 |
32 | \item{include_new}{if true, include mapping of added probes}
33 |
34 | \item{include_old}{if true, include mapping of deleted probes}
35 |
36 | \item{return_mapping}{return mapping table, instead of the target IDs.}
37 | }
38 | \value{
39 | mapped probe IDs, or mapping table if return_mapping = T
40 | }
41 | \description{
42 | Convert Probe ID
43 | }
44 |
--------------------------------------------------------------------------------
/man/assemble_plots.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/visualizeHelper.R
3 | \name{assemble_plots}
4 | \alias{assemble_plots}
5 | \title{assemble plots}
6 | \usage{
7 | assemble_plots(
8 | betas,
9 | txns,
10 | probes,
11 | plt.txns,
12 | plt.mapLines,
13 | plt.cytoband,
14 | heat.height = NULL,
15 | mapLine.height = 0.2,
16 | show.probeNames = TRUE,
17 | show.samples.n = NULL,
18 | show.sampleNames = TRUE,
19 | sample.name.fontsize = 10,
20 | dmin = 0,
21 | dmax = 1
22 | )
23 | }
24 | \arguments{
25 | \item{betas}{beta value}
26 |
27 | \item{txns}{transcripts GRanges}
28 |
29 | \item{probes}{probe GRanges}
30 |
31 | \item{plt.txns}{transcripts plot objects}
32 |
33 | \item{plt.mapLines}{map line plot objects}
34 |
35 | \item{plt.cytoband}{cytoband plot objects}
36 |
37 | \item{heat.height}{heatmap height (auto inferred based on rows)}
38 |
39 | \item{mapLine.height}{height of the map lines}
40 |
41 | \item{show.probeNames}{whether to show probe names}
42 |
43 | \item{show.samples.n}{number of samples to show (default: all)}
44 |
45 | \item{show.sampleNames}{whether to show sample names}
46 |
47 | \item{sample.name.fontsize}{sample name font size}
48 |
49 | \item{dmin}{data min}
50 |
51 | \item{dmax}{data max}
52 | }
53 | \value{
54 | a grid object
55 | }
56 | \description{
57 | assemble plots
58 | }
59 |
--------------------------------------------------------------------------------
/man/estimateLeukocyte.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/cell_composition.R
3 | \name{estimateLeukocyte}
4 | \alias{estimateLeukocyte}
5 | \title{Estimate leukocyte fraction using a two-component model}
6 | \usage{
7 | estimateLeukocyte(
8 | betas.tissue,
9 | betas.leuko = NULL,
10 | betas.tumor = NULL,
11 | platform = c("EPIC", "HM450", "HM27")
12 | )
13 | }
14 | \arguments{
15 | \item{betas.tissue}{tissue beta value matrix (#probes X #samples)}
16 |
17 | \item{betas.leuko}{leukocyte beta value matrix,
18 | if missing, use the SeSAMe default by infinium platform}
19 |
20 | \item{betas.tumor}{optional, tumor beta value matrix}
21 |
22 | \item{platform}{"HM450", "HM27" or "EPIC"}
23 | }
24 | \value{
25 | leukocyte estimate, a numeric vector
26 | }
27 | \description{
28 | The method assumes only two components in the mixture: the leukocyte
29 | component and the target tissue component. The function takes the beta
30 | values matrix of the target tissue and the beta value matrix of the
31 | leukocyte. Both matrices have probes on the row and samples on the column.
32 | Row names should have probe IDs from the platform. The function outputs
33 | a single numeric describing the fraction of leukocyte.
34 | }
35 | \examples{
36 |
37 | betas.tissue <- sesameDataGet('HM450.1.TCGA.PAAD')$betas
38 | estimateLeukocyte(betas.tissue)
39 | sesameDataGet_resetEnv()
40 |
41 | }
42 |
--------------------------------------------------------------------------------
/man/visualizeGene.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/visualize.R
3 | \name{visualizeGene}
4 | \alias{visualizeGene}
5 | \title{Visualize Gene}
6 | \usage{
7 | visualizeGene(
8 | gene_name,
9 | betas,
10 | platform = NULL,
11 | genome = NULL,
12 | upstream = 2000,
13 | dwstream = 2000,
14 | ...
15 | )
16 | }
17 | \arguments{
18 | \item{gene_name}{gene name}
19 |
20 | \item{betas}{beta value matrix (row: probes, column: samples)}
21 |
22 | \item{platform}{HM450, EPIC, or MM285 (default)}
23 |
24 | \item{genome}{hg19, hg38, or mm10 (default)}
25 |
26 | \item{upstream}{distance to extend upstream}
27 |
28 | \item{dwstream}{distance to extend downstream}
29 |
30 | \item{...}{additional options, see visualizeRegion, assemble_plots}
31 | }
32 | \value{
33 | None
34 | }
35 | \description{
36 | Visualize the beta value in heatmaps for a given gene. The function takes
37 | a gene name which is taken from the UCSC refGene. It searches all the
38 | transcripts for the given gene and optionally extend the span by certain
39 | number of base pairs. The function also takes a beta value matrix with
40 | sample names on the columns and probe names on the rows. The function can
41 | also work on different genome builds (default to hg38, can be hg19).
42 | }
43 | \examples{
44 | betas <- sesameDataGet('HM450.76.TCGA.matched')$betas
45 | visualizeGene('ADA', betas, 'HM450')
46 | }
47 |
--------------------------------------------------------------------------------
/man/inferSpecies.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/species.R
3 | \name{inferSpecies}
4 | \alias{inferSpecies}
5 | \title{Infer Species}
6 | \usage{
7 | inferSpecies(
8 | sdf,
9 | topN = 1000,
10 | threshold.pos = 0.01,
11 | threshold.neg = 0.1,
12 | return.auc = FALSE,
13 | return.species = FALSE,
14 | verbose = FALSE
15 | )
16 | }
17 | \arguments{
18 | \item{sdf}{a \code{SigDF}}
19 |
20 | \item{topN}{Top n positive and negative probes used to infer species.
21 | increase this number can sometimes improve accuracy (DEFAULT: 1000)}
22 |
23 | \item{threshold.pos}{pvalue < threshold.pos are considered positive
24 | (default: 0.01).}
25 |
26 | \item{threshold.neg}{pvalue > threshold.neg are considered negative
27 | (default: 0.2).}
28 |
29 | \item{return.auc}{return AUC calculated, override return.species}
30 |
31 | \item{return.species}{return a string to represent species}
32 |
33 | \item{verbose}{print more messaeges}
34 | }
35 | \value{
36 | a SigDF
37 | }
38 | \description{
39 | We infer species based on probes pvalues and alignment score.
40 | AUC was calculated for each specie, y_true is 1 or 0
41 | for pval < threshold.pos or pval > threshold.neg, respeceively,
42 | }
43 | \examples{
44 | sdf <- sesameDataGet("MM285.1.SigDF")
45 | sdf <- inferSpecies(sdf)
46 |
47 | ## all available species
48 | all_species <- names(sesameDataGet(sprintf(
49 | "\%s.addressSpecies", sdfPlatform(sdf)))$species)
50 |
51 | }
52 |
--------------------------------------------------------------------------------
/man/bisConversionControl.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/sesame.R
3 | \name{bisConversionControl}
4 | \alias{bisConversionControl}
5 | \title{Compute internal bisulfite conversion control}
6 | \usage{
7 | bisConversionControl(sdf, extR = NULL, extA = NULL, verbose = FALSE)
8 | }
9 | \arguments{
10 | \item{sdf}{a SigDF}
11 |
12 | \item{extR}{a vector of probe IDs for Infinium-I probes that extend to
13 | converted A}
14 |
15 | \item{extA}{a vector of probe IDs for Infinium-I probes that extend to
16 | original A}
17 |
18 | \item{verbose}{print more messages}
19 | }
20 | \value{
21 | GCT score (the higher, the more incomplete conversion)
22 | }
23 | \description{
24 | Compute GCT score for internal bisulfite conversion control. The function
25 | takes a \code{SigSet} as input. The higher the GCT score, the more likely
26 | the incomplete conversion.
27 | }
28 | \examples{
29 | sesameDataCache() # if not done yet
30 | sdf <- sesameDataGet('EPIC.1.SigDF')
31 | bisConversionControl(sdf)
32 |
33 | ## For more recent platforms like EPICv2, MSA:
34 | ## One need extR and extA of other arrays using the sesameAnno
35 | \dontrun{
36 | mft = sesameAnno_buildManifestGRanges(sprintf(
37 | "\%s/EPICv2/EPICv2.hg38.manifest.tsv.gz",
38 | "https://github.com/zhou-lab/InfiniumAnnotationV1/raw/main/Anno/"),
39 | columns="nextBase")
40 | extR = names(mft)[!is.na(mft$nextBase) & mft$nextBase=="R"]
41 | extA = names(mft)[!is.na(mft$nextBase) & mft$nextBase=="A"]
42 | }
43 |
44 | }
45 |
--------------------------------------------------------------------------------
/man/visualizeProbes.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/visualize.R
3 | \name{visualizeProbes}
4 | \alias{visualizeProbes}
5 | \title{Visualize Region that Contains the Specified Probes}
6 | \usage{
7 | visualizeProbes(
8 | probeNames,
9 | betas,
10 | platform = NULL,
11 | genome = NULL,
12 | upstream = 1000,
13 | dwstream = 1000,
14 | ...
15 | )
16 | }
17 | \arguments{
18 | \item{probeNames}{probe names}
19 |
20 | \item{betas}{beta value matrix (row: probes, column: samples)}
21 |
22 | \item{platform}{HM450, EPIC or MM285 (default)}
23 |
24 | \item{genome}{hg19, hg38 or mm10 (default)}
25 |
26 | \item{upstream}{distance to extend upstream}
27 |
28 | \item{dwstream}{distance to extend downstream}
29 |
30 | \item{...}{additional options, see visualizeRegion and assemble_plots}
31 | }
32 | \value{
33 | None
34 | }
35 | \description{
36 | Visualize the beta value in heatmaps for the genomic region containing
37 | specified probes. The function works only if specified probes can be
38 | spanned by a single genomic region. The region can cover more probes
39 | than specified. Hence the plotting heatmap may encompass more probes.
40 | The function takes as input a string vector of probe IDs (cg/ch/rs-numbers).
41 | if draw is FALSE, the function returns the subset beta value matrix
42 | otherwise it returns the grid graphics object.
43 | }
44 | \examples{
45 | betas <- sesameDataGet('HM450.76.TCGA.matched')$betas
46 | visualizeProbes(c('cg22316575', 'cg16084772', 'cg20622019'), betas, 'HM450')
47 | }
48 |
--------------------------------------------------------------------------------
/man/DML.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dm.R
3 | \name{DML}
4 | \alias{DML}
5 | \title{Test differential methylation on each locus}
6 | \usage{
7 | DML(betas, fm, meta = NULL, BPPARAM = SerialParam())
8 | }
9 | \arguments{
10 | \item{betas}{beta values, matrix or SummarizedExperiment
11 | rows are probes and columns are samples.}
12 |
13 | \item{fm}{formula}
14 |
15 | \item{meta}{data frame for sample information, column names
16 | are predictor variables (e.g., sex, age, treatment, tumor/normal etc)
17 | and are referenced in formula. Rows are samples.
18 | When the betas argument is a SummarizedExperiment object, this
19 | is ignored. colData(betas) will be used instead. The row order of the
20 | data frame must match the column order of the beta value matrix.}
21 |
22 | \item{BPPARAM}{number of cores for parallel processing, default to
23 | SerialParam()
24 | Use MulticoreParam(mc.cores) for parallel processing.
25 | For Windows, try DoparParam or SnowParam.}
26 | }
27 | \value{
28 | a list of test summaries, summary.lm objects
29 | }
30 | \description{
31 | The function takes a beta value matrix with probes on the rows and
32 | samples on the columns. It also takes a sample information data frame
33 | (meta) and formula for testing. The function outputs a list of
34 | coefficient tables for each factor tested.
35 | }
36 | \examples{
37 | sesameDataCache() # in case not done yet
38 | data <- sesameDataGet('HM450.76.TCGA.matched')
39 | smry <- DML(data$betas[1:1000,], ~type, meta=data$sampleInfo)
40 |
41 | sesameDataGet_resetEnv()
42 | }
43 |
--------------------------------------------------------------------------------
/man/openSesame.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/open.R
3 | \name{openSesame}
4 | \alias{openSesame}
5 | \title{The openSesame pipeline}
6 | \usage{
7 | openSesame(
8 | x,
9 | prep = "QCDPB",
10 | prep_args = NULL,
11 | manifest = NULL,
12 | func = getBetas,
13 | BPPARAM = SerialParam(),
14 | platform = "",
15 | min_beads = 1,
16 | ...
17 | )
18 | }
19 | \arguments{
20 | \item{x}{SigDF(s), IDAT prefix(es)}
21 |
22 | \item{prep}{preprocessing code, see ?prepSesame}
23 |
24 | \item{prep_args}{optional preprocessing argument list, see ?prepSesame}
25 |
26 | \item{manifest}{optional dynamic manifest}
27 |
28 | \item{func}{either getBetas or getAFs, if NULL, then return SigDF list}
29 |
30 | \item{BPPARAM}{get parallel with MulticoreParam(n)}
31 |
32 | \item{platform}{optional platform string}
33 |
34 | \item{min_beads}{minimum bead number, probes with R or G smaller than
35 | this threshold will be masked. If NULL, no filtering based on bead
36 | count will be applied. Default to 1.}
37 |
38 | \item{...}{parameters to getBetas}
39 | }
40 | \value{
41 | a numeric vector for processed beta values
42 | }
43 | \description{
44 | This function is a simple wrapper of noob + nonlinear dye bias
45 | correction + pOOBAH masking.
46 | }
47 | \details{
48 | Please use mask=FALSE to turn off masking.
49 |
50 | If the input is an IDAT prefix or a \code{SigDF}, the output is
51 | the beta value numerics.
52 | }
53 | \examples{
54 |
55 | in_dir <- system.file("extdata", "", package = "sesameData")
56 | betas <- openSesame(in_dir)
57 | ## or
58 | IDATprefixes <- searchIDATprefixes(in_dir)
59 | betas <- openSesame(IDATprefixes)
60 |
61 | }
62 |
--------------------------------------------------------------------------------
/man/predictAge.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/age.R
3 | \name{predictAge}
4 | \alias{predictAge}
5 | \title{Predict age using linear models}
6 | \usage{
7 | predictAge(betas, model, na_fallback = FALSE, min_nonna = 10)
8 | }
9 | \arguments{
10 | \item{betas}{a probeID-named vector of beta values}
11 |
12 | \item{model}{a model object from sesameDataGet. should contain
13 | param, intercept, response2age. default to the Horvath353 model.}
14 |
15 | \item{na_fallback}{use fall back values if na}
16 |
17 | \item{min_nonna}{the minimum number of non-NA values.}
18 | }
19 | \value{
20 | age in the unit specified in the model (usually in year, but
21 | sometimes can be month, like in the mouse clocks).
22 | }
23 | \description{
24 | The function takes a named numeric vector of beta values. The name attribute
25 | contains the probe ID (cg, ch or rs IDs). The function looks for overlapping
26 | probes and estimate age using different models.
27 | }
28 | \details{
29 | You can get the models such as the Horvath aging model (Horvath 2013
30 | Genome Biology) from sesameDataGet. The function outputs a single numeric
31 | of age in years.
32 |
33 | Here are some built-in age models:
34 | Anno/HM450/Clock_Horvath353.rds
35 | Anno/HM450/Clock_Hannum.rds
36 | Anno/HM450/Clock_SkinBlood.rds
37 | Anno/EPIC/Clock_PhenoAge.rds
38 | Anno/MM285/Clock_Zhou347.rds
39 | see vignette inferences.html#Age__Epigenetic_Clock for details
40 | }
41 | \examples{
42 | betas <- sesameDataGet('HM450.1.TCGA.PAAD')$betas
43 | \dontrun{
44 | ## download age models from
45 | ## https://github.com/zhou-lab/InfiniumAnnotationV1/tree/main/Anno
46 | ## e.g., Anno/HM450/Clock_Horvath353.rds
47 | predictAge(betas, model)
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/R/GEO.R:
--------------------------------------------------------------------------------
1 |
2 | #' Convert signal M and U to SigDF
3 | #'
4 | #' This overcomes the issue of missing IDAT files. However,
5 | #' out-of-band signals will be missing or faked (sampled from a
6 | #' normal distribution).
7 | #'
8 | #' @param sigM methylated signal, a numeric vector
9 | #' @param sigU unmethylated signal, a numirc vector
10 | #' @param Probe_IDs probe ID vector
11 | #' @param oob.mean assumed mean for out-of-band signals
12 | #' @param oob.sd assumed standard deviation for out-of-band signals
13 | #' @param platform platform code, will infer if not given
14 | #' @return SigDF
15 | #' @examples
16 | #' sigM <- c(11436, 6068, 2864)
17 | #' sigU <- c(1476, 804, 393)
18 | #' probes <- c("cg07881041", "cg23229610", "cg03513874")
19 | #' sdf <- parseGEOsignalMU(sigM, sigU, probes, platform = "EPIC")
20 | #' @export
21 | parseGEOsignalMU <- function(
22 | sigM, sigU, Probe_IDs, oob.mean = 500, oob.sd = 300, platform = NULL) {
23 |
24 | if (is.null(platform)) {
25 | platform <- inferPlatformFromProbeIDs(Probe_IDs) }
26 | addr <- sesameDataGet(paste0(platform, ".address"))$ordering
27 | M <- sigM[match(addr$Probe_ID, Probe_IDs)]
28 | U <- sigU[match(addr$Probe_ID, Probe_IDs)]
29 | col <- ifelse(is.na(addr$col), "2", as.character(addr$col))
30 | oobs <- pmax(50,rnorm(length(col), mean = oob.mean, sd = oob.sd))
31 | MG <- ifelse(col == "2", NA, ifelse(col == "G", M, oobs))
32 | MR <- ifelse(col == "2", NA, ifelse(col == "R", M, oobs))
33 | UG <- ifelse(col == "2", M, ifelse(col == "G", U, oobs))
34 | UR <- ifelse(col == "2", U, ifelse(col == "R", U, oobs))
35 | sdf <- data.frame(Probe_ID = addr$Probe_ID,
36 | MG = MG, MR = MR, UG = UG, UR = UR,
37 | col = factor(col, levels=c("G","R","2")), mask = addr$mask)
38 | class(sdf) <- c("SigDF", class(sdf))
39 | sdf
40 | }
41 |
42 |
--------------------------------------------------------------------------------
/man/inferTissue.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/tissue.R
3 | \name{inferTissue}
4 | \alias{inferTissue}
5 | \title{inferTissue infers the tissue of a single sample (as identified through
6 | the branchIDs in the row data of the reference) by reporting independent
7 | composition through cell type deconvolution.}
8 | \usage{
9 | inferTissue(
10 | betas,
11 | reference = NULL,
12 | platform = NULL,
13 | abs_delta_beta_min = 0.3,
14 | auc_min = 0.99,
15 | coverage_min = 0.8,
16 | topN = 15
17 | )
18 | }
19 | \arguments{
20 | \item{betas}{Named vector with probes and their corresponding beta value
21 | measurement}
22 |
23 | \item{reference}{Summarized Experiment with either hypomethylated or
24 | hypermethylated probe selection (row data), sample selection (column data),
25 | meta data, and the betas (assay)}
26 |
27 | \item{platform}{String representing the array type of the betas and
28 | reference}
29 |
30 | \item{abs_delta_beta_min}{Numerical value indicating the absolute minimum
31 | required delta beta for the probe selection criteria}
32 |
33 | \item{auc_min}{Numeric value corresponding to the minimum AUC value
34 | required for a probe to be considered}
35 |
36 | \item{coverage_min}{Numeric value corresponding to the minimum coverage
37 | requirement for a probe to be considered. Coverage is defined here as the
38 | proportion of samples without an NA value at a given probe.}
39 |
40 | \item{topN}{number of probes to at most use for each branch}
41 | }
42 | \value{
43 | inferred tissue as a string
44 | }
45 | \description{
46 | inferTissue infers the tissue of a single sample (as identified through
47 | the branchIDs in the row data of the reference) by reporting independent
48 | composition through cell type deconvolution.
49 | }
50 | \examples{
51 | sesameDataCache() # if not done yet
52 | sdf <- sesameDataGet("MM285.1.SigDF")
53 | inferTissue(getBetas(dyeBiasNL(noob(sdf))))
54 |
55 | sesameDataGet_resetEnv()
56 |
57 | }
58 |
--------------------------------------------------------------------------------
/R/track.R:
--------------------------------------------------------------------------------
1 |
2 | #' Turn beta values into a UCSC browser track
3 | #'
4 | #' @param betas a named numeric vector
5 | #' @param output output file name
6 | #' @param platform HM450, EPIC etc.
7 | #' @param genome hg38, mm10, ..., will infer if not given.
8 | #' For additional mapping, download the GRanges object from
9 | #' http://zwdzwd.github.io/InfiniumAnnotation
10 | #' and provide the following argument
11 | #' ..., genome = sesameAnno_buildManifestGRanges("downloaded_file"),...
12 | #' to this function.
13 | #' @return when output is null, return a data.frame, otherwise NULL
14 | #' @importFrom utils write.table
15 | #' @examples
16 | #'
17 | #' betas.tissue <- sesameDataGet('HM450.1.TCGA.PAAD')$betas
18 | #' ## add output to create an actual file
19 | #' df <- createUCSCtrack(betas.tissue)
20 | #'
21 | #' ## to convert to bigBed
22 | #' ## sort -k1,1 -k2,2n output.bed >output_sorted.bed
23 | #' ## bedToBigBed output_sorted.bed hg38.chrom output.bb
24 | #' @export
25 | createUCSCtrack <- function(
26 | betas, output=NULL, platform='HM450', genome='hg38') {
27 |
28 | probeInfo <- sesameData_getManifestGRanges(platform, genome)
29 |
30 | betas <- betas[names(probeInfo)]
31 | df <- data.frame(
32 | chrm = GenomicRanges::seqnames(probeInfo),
33 | beg = GenomicRanges::start(probeInfo)-1,
34 | end = GenomicRanges::end(probeInfo),
35 | name = names(probeInfo),
36 | score = ifelse(is.na(betas), 0, as.integer(betas*1000)),
37 | strand = GenomicRanges::strand(probeInfo),
38 | thickStart = GenomicRanges::start(probeInfo)-1,
39 | thickEnd = GenomicRanges::end(probeInfo),
40 | itemRgb = ifelse(
41 | is.na(betas), '0,0,0',
42 | ifelse(
43 | betas < 0.3, '0,0,255', # blue
44 | ifelse(
45 | betas > 0.7, '255,0,0', # red
46 | '50,150,0'))) # green
47 | )
48 |
49 | if (is.null(output))
50 | df
51 | else
52 | write.table(
53 | df, file=output, col.names=FALSE,
54 | row.names=FALSE, quote=FALSE, sep='\t')
55 | }
56 |
--------------------------------------------------------------------------------
/R/feature_selection.R:
--------------------------------------------------------------------------------
1 |
2 |
3 | getSignatureU <- function(
4 | betas, grouping, u_max = 0.2, m_min = 0.7,
5 | max_na_in = 0, max_na_out = 0) {
6 |
7 | groups <- unique(grouping)
8 | is_na <- is.na(betas)
9 | sigs <- lapply(groups, function(g) {
10 | m1 <- rowMeans(betas[,grouping==g], na.rm=TRUE) < u_max
11 | m2 <- rowMeans(betas[,grouping!=g], na.rm=TRUE) > m_min
12 | ps1 <- rowSums(is_na[,grouping==g]) <= max_na_in
13 | ps2 <- rowSums(is_na[,grouping!=g]) <= max_na_out
14 | names(which(m1 & m2 & ps1 & ps2)) })
15 | names(sigs) <- groups
16 | sigs
17 | }
18 |
19 | getSignatureUTop <- function(
20 | betas, grouping, n=100,
21 | max_na_in = 0, max_na_out = 0) {
22 |
23 | groups <- unique(grouping)
24 | is_na <- is.na(betas)
25 | sigs <- lapply(groups, function(g) {
26 | mean1 <- rowMeans(betas[,grouping == g], na.rm=TRUE)
27 | mean0 <- rowMeans(betas[,grouping != g], na.rm=TRUE)
28 | ps1 <- rowSums(is_na[,grouping == g]) <= max_na_in
29 | ps2 <- rowSums(is_na[,grouping != g] <= max_na_out)
30 | head(names(sort((mean1 - mean0)[ps1 & ps2])), n=n)
31 | })
32 | names(sigs) <- groups
33 | sigs
34 | }
35 |
36 | clusterWithSignature <- function(betas, grouping, sigs) {
37 | pbs <- do.call(c, lapply(names(sigs), function(g) {
38 | if (length(sigs[[g]]) > 5)
39 | rownames(row.cluster(betas[intersect(
40 | rownames(betas), sigs[[g]]),])$mat)
41 | else
42 | NULL
43 | }))
44 | spl <- do.call(c, lapply(names(sigs), function(g) {
45 | colnames(column.cluster(betas[,grouping == g])$mat)
46 | }))
47 | betas[pbs, spl]
48 | }
49 |
50 | clusterWithSampleGrouping <- function(
51 | betas, grouping, groups=unique(grouping)) {
52 |
53 | do.call(cbind, lapply(groups, function(g) {
54 | column.cluster(betas[,grouping == g])$mat
55 | }))
56 | }
57 |
58 | clusterWithinRowGroups <- function(betas, sigs) {
59 | do.call(rbind, lapply(sigs, function(x) {
60 | row.cluster(betas[x,])$mat
61 | }))
62 | }
63 |
--------------------------------------------------------------------------------
/man/DMLpredict.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dm.R
3 | \name{DMLpredict}
4 | \alias{DMLpredict}
5 | \title{Predict new data from DML}
6 | \usage{
7 | DMLpredict(betas, fm, pred = NULL, meta = NULL, BPPARAM = SerialParam())
8 | }
9 | \arguments{
10 | \item{betas}{beta values, matrix or SummarizedExperiment
11 | rows are probes and columns are samples.}
12 |
13 | \item{fm}{formula}
14 |
15 | \item{pred}{new data for prediction, useful for studying effect size.
16 | This argument is a data.frame to specify new data.
17 | If the argument is NULL, all combinations of all contrasts will be used
18 | as input. It might not work if there is a continuous variable input.
19 | One may need to explicitly provide the input in a data frame.}
20 |
21 | \item{meta}{data frame for sample information, column names
22 | are predictor variables (e.g., sex, age, treatment, tumor/normal etc)
23 | and are referenced in formula. Rows are samples.
24 | When the betas argument is a SummarizedExperiment object, this
25 | is ignored. colData(betas) will be used instead.}
26 |
27 | \item{BPPARAM}{number of cores for parallel processing, default to
28 | SerialParam()
29 | Use MulticoreParam(mc.cores) for parallel processing.
30 | For Windows, try DoparParam or SnowParam.}
31 | }
32 | \value{
33 | a SummarizedExperiment of predictions. The colData describes
34 | the input of the prediction.
35 | }
36 | \description{
37 | This function is also important for investigating factor interactions.
38 | }
39 | \examples{
40 | data <- sesameDataGet('HM450.76.TCGA.matched')
41 |
42 | ## use all contrasts as new input
43 | res <- DMLpredict(data$betas[1:10,], ~type, meta=data$sampleInfo)
44 |
45 | ## specify new input
46 | res <- DMLpredict(data$betas[1:10,], ~type, meta=data$sampleInfo,
47 | pred = data.frame(type=c("Normal","Tumour")))
48 |
49 | ## note that the prediction needs to be a factor of the same
50 | ## level structure as the original training data.
51 | pred = data.frame(type=factor(c("Normal"), levels=c("Normal","Tumour")))
52 | res <- DMLpredict(data$betas[1:10,], ~type,
53 | meta=data$sampleInfo, pred = pred)
54 |
55 | }
56 |
--------------------------------------------------------------------------------
/man/visualizeRegion.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/visualize.R
3 | \name{visualizeRegion}
4 | \alias{visualizeRegion}
5 | \title{Visualize Region}
6 | \usage{
7 | visualizeRegion(
8 | chrm,
9 | beg,
10 | end,
11 | betas,
12 | platform = NULL,
13 | genome = NULL,
14 | draw = TRUE,
15 | cluster.samples = FALSE,
16 | na.rm = FALSE,
17 | nprobes.max = 1000,
18 | txn.types = "protein_coding",
19 | txn.font.size = 6,
20 | ...
21 | )
22 | }
23 | \arguments{
24 | \item{chrm}{chromosome}
25 |
26 | \item{beg}{begin of the region}
27 |
28 | \item{end}{end of the region}
29 |
30 | \item{betas}{beta value matrix (row: probes, column: samples)}
31 |
32 | \item{platform}{EPIC, HM450, or MM285}
33 |
34 | \item{genome}{hg38, mm10, ..., will infer if not given.
35 | For additional mapping, download the GRanges object from
36 | http://zwdzwd.github.io/InfiniumAnnotation
37 | and provide the following argument
38 | ..., genome = sesameAnno_buildManifestGRanges("downloaded_file"),...
39 | to this function.}
40 |
41 | \item{draw}{draw figure or return betas}
42 |
43 | \item{cluster.samples}{whether to cluster samples}
44 |
45 | \item{na.rm}{remove probes with all NA.}
46 |
47 | \item{nprobes.max}{maximum number of probes to plot}
48 |
49 | \item{txn.types}{default to protein_coding, use NULL for all}
50 |
51 | \item{txn.font.size}{transcript name font size}
52 |
53 | \item{...}{additional options, see assemble_plots}
54 | }
55 | \value{
56 | graphics or a matrix containing the captured beta values
57 | }
58 | \description{
59 | The function takes a genomic coordinate (chromosome, start and end) and a
60 | beta value matrix (probes on the row and samples on the column). It plots
61 | the beta values as a heatmap for all probes falling into the genomic region.
62 | If `draw=TRUE` the function returns the plotted grid graphics object.
63 | Otherwise, the selected beta value matrix is returned.
64 | `cluster.samples=TRUE/FALSE` controls whether hierarchical clustering is
65 | applied to the subset beta value matrix.
66 | }
67 | \examples{
68 | betas <- sesameDataGet('HM450.76.TCGA.matched')$betas
69 | visualizeRegion('chr20', 44648623, 44652152, betas, 'HM450')
70 | }
71 |
--------------------------------------------------------------------------------
/R/channel_inference.R:
--------------------------------------------------------------------------------
1 | #' Infer and reset color channel for Type-I probes instead of
2 | #' using what is specified in manifest. The results are stored to
3 | #' sdf@extra$IGG and sdf@extra$IRR slot.
4 | #'
5 | #' IGG => Type-I green that is inferred to be green
6 | #' IRR => Type-I red that is inferred to be red
7 | #'
8 | #' @param sdf a \code{SigDF}
9 | #' @param verbose whether to print correction summary
10 | #' @param switch_failed whether to switch failed probes (default to FALSE)
11 | #' @param mask_failed whether to mask failed probes (default to FALSE)
12 | #' @param summary return summarized numbers only.
13 | #' @return a \code{SigDF}, or numerics if summary == TRUE
14 | #' @examples
15 | #'
16 | #' sdf <- sesameDataGet('EPIC.1.SigDF')
17 | #' inferInfiniumIChannel(sdf)
18 | #'
19 | #' @export
20 | inferInfiniumIChannel <- function(
21 | sdf, switch_failed = FALSE, mask_failed = FALSE,
22 | verbose = FALSE, summary = FALSE) {
23 |
24 | inf1_idx <- which(sdf$col != "2")
25 | sdf1 <- sdf[inf1_idx,]
26 | red_max <- pmax(sdf1$MR, sdf1$UR)
27 | grn_max <- pmax(sdf1$MG, sdf1$UG)
28 | new_col <- factor(ifelse(
29 | red_max > grn_max, "R", "G"), levels=c("G","R","2"))
30 | d1R <- sdf1[new_col == "R",]
31 | d1G <- sdf1[new_col == "G",]
32 | bg_max <- quantile(c(d1R$MG,d1R$UG,d1G$MR,d1G$UR), 0.95, na.rm=TRUE)
33 |
34 | ## revert to the original for failed probes if so desire
35 | idx <- (is.na(red_max) | is.na(grn_max) | pmax(red_max, grn_max) < bg_max)
36 | if (!switch_failed) {
37 | new_col[idx] <- sdf1$col[idx]
38 | }
39 | if (mask_failed) {
40 | sdf$mask[inf1_idx[idx]] <- TRUE
41 | }
42 | sdf$col[inf1_idx] <- factor(new_col, levels=c("G","R","2"))
43 |
44 | smry <- c(
45 | R2R = sum(sdf1$col == "R" & new_col == "R", na.rm=TRUE),
46 | G2G = sum(sdf1$col == "G" & new_col == "G", na.rm=TRUE),
47 | R2G = sum(sdf1$col == "R" & new_col == "G", na.rm=TRUE),
48 | G2R = sum(sdf1$col == "G" & new_col == "R", na.rm=TRUE))
49 |
50 | if (summary) { return(smry) }
51 |
52 | sdfMsg(sdf, verbose, "%s: R>R:%d;G>G:%d;R>G:%d;G>R:%d",
53 | "Infinium-I color channel reset",
54 | smry["R2R"], smry["G2G"], smry["R2G"], smry["G2R"])
55 | }
56 |
57 |
--------------------------------------------------------------------------------
/man/cnSegmentation.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/cnv.R
3 | \name{cnSegmentation}
4 | \alias{cnSegmentation}
5 | \title{Perform copy number segmentation}
6 | \usage{
7 | cnSegmentation(
8 | sdf,
9 | sdfs.normal = NULL,
10 | genomeInfo = NULL,
11 | probeCoords = NULL,
12 | tilewidth = 50000,
13 | verbose = FALSE,
14 | return.probe.signals = FALSE
15 | )
16 | }
17 | \arguments{
18 | \item{sdf}{\code{SigDF}}
19 |
20 | \item{sdfs.normal}{a list of \code{SigDF}s for normalization, if not given,
21 | use the stored normal data from sesameData. However, we do recommend using
22 | a matched copy number normal dataset for normalization.
23 | assembly}
24 |
25 | \item{genomeInfo}{the genomeInfo files. The default is retrieved from
26 | sesameData. Alternative genomeInfo files can be found at
27 | https://github.com/zhou-lab/GenomeInfo}
28 |
29 | \item{probeCoords}{the probe coordinates in the corresponding genome
30 | if NULL (default), then the default genome assembly is used.
31 | Default genome is given by, e.g., sesameData_check_genome(NULL, "EPIC")
32 | For additional mapping, download the GRanges object from
33 | http://zwdzwd.github.io/InfiniumAnnotation
34 | and provide the following argument
35 | ..., probeCoords = sesameAnno_buildManifestGRanges("downloaded_file"),...
36 | to this function.}
37 |
38 | \item{tilewidth}{tile width for smoothing}
39 |
40 | \item{verbose}{print more messages}
41 |
42 | \item{return.probe.signals}{return probe-level instead of bin-level signal}
43 | }
44 | \value{
45 | an object of \code{CNSegment}
46 | }
47 | \description{
48 | Perform copy number segmentation using the signals in the signal set.
49 | The function takes a \code{SigDF} for the target sample and a set of
50 | normal \code{SigDF} for the normal samples. An optional arguments specifies
51 | the version of genome build that the inference will operate on. The function
52 | outputs an object of class \code{CNSegment} with signals for the segments (
53 | seg.signals), the bin coordinates (
54 | bin.coords) and bin signals (bin.signals).
55 | }
56 | \examples{
57 |
58 | sesameDataCache()
59 |
60 | \dontrun{
61 | sdfs <- sesameDataGet('EPICv2.8.SigDF')
62 | sdf <- sdfs[["K562_206909630040_R01C01"]]
63 | seg <- cnSegmentation(sdf)
64 | seg <- cnSegmentation(sdf, return.probe.signals=TRUE)
65 | visualizeSegments(seg)
66 | }
67 |
68 | }
69 |
--------------------------------------------------------------------------------
/man/DMR.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dm.R
3 | \name{DMR}
4 | \alias{DMR}
5 | \title{Find Differentially Methylated Region (DMR)}
6 | \usage{
7 | DMR(
8 | betas,
9 | smry,
10 | contrast,
11 | platform = NULL,
12 | probe.coords = NULL,
13 | dist.cutoff = NULL,
14 | seg.per.locus = 0.5
15 | )
16 | }
17 | \arguments{
18 | \item{betas}{beta values for distance calculation}
19 |
20 | \item{smry}{DML}
21 |
22 | \item{contrast}{the pair-wise comparison or contrast
23 | check colnames(attr(smry, "model.matrix")) if uncertain}
24 |
25 | \item{platform}{EPIC, HM450, MM285, ...}
26 |
27 | \item{probe.coords}{GRanges object that defines CG coordinates
28 | if NULL (default), then the default genome assembly is used.
29 | Default genome is given by, e.g., sesameData_check_genome(NULL, "EPIC")
30 | For additional mapping, download the GRanges object from
31 | http://zwdzwd.github.io/InfiniumAnnotation
32 | and provide the following argument
33 | ..., probe.coords = sesameAnno_buildManifestGRanges("downloaded_file"),...
34 | to this function.}
35 |
36 | \item{dist.cutoff}{cutoff of beta value differences for two neighboring CGs
37 | to be considered the same DMR (by default it's determined using the
38 | quantile function on seg.per.locus)}
39 |
40 | \item{seg.per.locus}{number of segments per locus
41 | higher value leads to more segments}
42 | }
43 | \value{
44 | coefficient table with segment ID and segment P-value
45 | each row is a locus, multiple loci may share a segment ID if
46 | they are merged to the same segment. Records are ordered by Seg_Est.
47 | }
48 | \description{
49 | This subroutine uses Euclidean distance to group CpGs and
50 | then combine p-values for each segment. The function performs DML test first
51 | if cf is NULL. It groups the probe testing results into differential
52 | methylated regions in a coefficient table with additional columns
53 | designating the segment ID and statistical significance (P-value) testing
54 | the segment.
55 | }
56 | \examples{
57 |
58 | sesameDataCache() # in case not done yet
59 |
60 | data <- sesameDataGet('HM450.76.TCGA.matched')
61 | smry <- DML(data$betas[1:1000,], ~type, meta=data$sampleInfo)
62 | colnames(attr(smry, "model.matrix")) # pick a contrast from here
63 | ## showing on a small set of 100 CGs
64 | merged_segs <- DMR(data$betas[1:1000,], smry, "typeTumour", platform="HM450")
65 |
66 | sesameDataGet_resetEnv()
67 |
68 | }
69 |
--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
1 |
2 | #' Extract the probe type field from probe ID
3 | #' This only works with the new probe ID system.
4 | #' See https://github.com/zhou-lab/InfiniumAnnotation for illustration
5 | #'
6 | #' @param Probe_ID Probe ID
7 | #' @return a vector of '1' and '2' suggesting Infinium-I and Infinium-II
8 | #' @import stringr
9 | #' @examples
10 | #' probeID_designType("cg36609548_TC21")
11 | #' @export
12 | probeID_designType <- function(Probe_ID) {
13 | stopifnot(all(grepl('_', Probe_ID))) # make sure it's the new ID system
14 | vapply(Probe_ID, function(x) substr(
15 | strsplit(x,'_')[[1]][2],3,3), character(1))
16 | }
17 |
18 | #' Convert beta-value to M-value
19 | #'
20 | #' Logit transform a beta value vector to M-value vector.
21 | #'
22 | #' Convert beta-value to M-value (aka logit transform)
23 | #' @param b vector of beta values
24 | #' @return a vector of M values
25 | #' @examples
26 | #' BetaValueToMValue(c(0.1, 0.5, 0.9))
27 | #' @export
28 | BetaValueToMValue <- function(b) {
29 | log2(b/(1-b))
30 | }
31 |
32 | #' Convert M-value to beta-value
33 | #'
34 | #' Convert M-value to beta-value (aka inverse logit transform)
35 | #'
36 | #' @param m a vector of M values
37 | #' @return a vector of beta values
38 | #' @examples
39 | #' MValueToBetaValue(c(-3, 0, 3))
40 | #' @export
41 | MValueToBetaValue <- function(m) {
42 | 2^m/(1+2^m)
43 | }
44 |
45 | #' Check SeSAMe versions
46 | #'
47 | #' print package verison of sesame and depended packages to help troubleshoot
48 | #' installation issues.
49 | #'
50 | #' @return print the version of sesame, sesameData, biocondcutor and R
51 | #' @importFrom utils packageVersion
52 | #' @export
53 | #' @examples
54 | #' sesame_checkVersion()
55 | sesame_checkVersion <- function() {
56 | rv <- R.Version()
57 | msg <- paste0(
58 | "SeSAMe requires matched versions of ",
59 | "R, sesame, sesameData and ExperimentHub.\n",
60 | "Here is the current versions installed:\n",
61 | sprintf("R: %s.%s\n", rv$major, rv$minor),
62 | sprintf("Bioconductor: %s\n", BiocManager::version()),
63 | sprintf("sesame: %s\n", packageVersion("sesame")),
64 | sprintf("sesameData: %s\n", packageVersion("sesameData")),
65 | sprintf("ExperimentHub: %s\n", packageVersion("ExperimentHub")))
66 | message(msg)
67 | }
68 |
69 |
70 | #' sesamize function is deprecated.
71 | #' Please check https://github.com/zwdzwd/sesamize for previous scripts
72 | #'
73 | #' @param ... arguments for sesamize
74 | #' @return a message text for deprecated function
75 | #' @export
76 | #' @examples
77 | #' cat("Deprecated. see https://github.com/zwdzwd/sesamize")
78 | sesamize <- function(...) {
79 | .Deprecated("https://github.com/zwdzwd/sesamize")
80 | }
81 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: sesame
2 | Type: Package
3 | Title: SEnsible Step-wise Analysis of DNA MEthylation BeadChips
4 | Description: Tools For analyzing Illumina Infinium DNA methylation arrays. SeSAMe provides utilities to support analyses of multiple generations of Infinium DNA methylation BeadChips, including preprocessing, quality control, visualization and inference. SeSAMe features accurate detection calling, intelligent inference of ethnicity, sex and advanced quality control routines.
5 | Version: 1.25.3
6 | Authors@R: c(person("Wanding", "Zhou", role = c("aut","cre"), email = "zhouwanding@gmail.com", comment = c(ORCID = "0000-0001-9126-1932")),
7 | person("Wubin", "Ding", role = "ctb"),
8 | person("David", "Goldberg", role = "ctb"),
9 | person("Ethan", "Moyer", role = "ctb"),
10 | person("Bret", "Barnes", role = "ctb"),
11 | person("Timothy", "Triche", role = "ctb"),
12 | person("Hui", "Shen", role = c("aut"), email = "Hui.Shen@vai.org"))
13 | Depends: R (>= 4.5.0),
14 | sesameData
15 | License: MIT + file LICENSE
16 | RoxygenNote: 7.3.2
17 | Imports:
18 | graphics,
19 | BiocParallel,
20 | utils,
21 | methods,
22 | stringr,
23 | readr,
24 | tibble,
25 | MASS,
26 | wheatmap (>= 0.2.0),
27 | GenomicRanges,
28 | IRanges,
29 | grid,
30 | preprocessCore,
31 | S4Vectors,
32 | ggplot2,
33 | BiocFileCache,
34 | GenomeInfoDb,
35 | stats,
36 | SummarizedExperiment,
37 | dplyr,
38 | reshape2
39 | Suggests: scales,
40 | BiocManager,
41 | knitr,
42 | DNAcopy,
43 | e1071,
44 | randomForest,
45 | RPMM,
46 | rmarkdown,
47 | testthat,
48 | tidyr,
49 | BiocStyle,
50 | ggrepel,
51 | grDevices,
52 | KernSmooth,
53 | pals
54 | Encoding: UTF-8
55 | VignetteBuilder: knitr
56 | URL: https://github.com/zwdzwd/sesame
57 | BugReports: https://github.com/zwdzwd/sesame/issues
58 | biocViews: DNAMethylation, MethylationArray, Preprocessing, QualityControl
59 | Collate:
60 | 'readIDAT.R'
61 | 'sex.R'
62 | 'species.R'
63 | 'QC.R'
64 | 'GEO.R'
65 | 'SigDFMethods.R'
66 | 'sesame.R'
67 | 'age.R'
68 | 'background.R'
69 | 'cell_composition.R'
70 | 'channel_inference.R'
71 | 'cnv.R'
72 | 'impute.R'
73 | 'mLiftOver.R'
74 | 'ethnicity.R'
75 | 'deidentify.R'
76 | 'detection.R'
77 | 'dm.R'
78 | 'dye_bias.R'
79 | 'feature_selection.R'
80 | 'fileSet.R'
81 | 'mask.R'
82 | 'sesameAnno.R'
83 | 'open.R'
84 | 'strain.R'
85 | 'tissue.R'
86 | 'track.R'
87 | 'match_design.R'
88 | 'utils.R'
89 | 'vcf.R'
90 | 'visualize.R'
91 | 'visualizeHelper.R'
92 | 'zzz.R'
93 | 'palgen.R'
94 |
--------------------------------------------------------------------------------
/R/vcf.R:
--------------------------------------------------------------------------------
1 | ## very simple genotyper
2 | genotyper <- function(x, model_background=0.1, model_nbeads=40) {
3 |
4 | GL <- vapply(
5 | c(model_background, 0.5, 1-model_background),
6 | function(af) {
7 | dbinom(
8 | round(x*model_nbeads),
9 | size=model_nbeads, prob=af)}, numeric(1))
10 |
11 | ind <- which.max(GL)
12 | GT <- c('0/0','0/1','1/1')[ind]
13 | GS <- floor(-log10(1-GL[ind] / sum(GL))*10) # assuming equal prior
14 | list(GT=GT, GS=GS)
15 | }
16 |
17 | vcf_header <- function(genome) {
18 | c('##fileformat=VCFv4.0',
19 | sprintf('##fileDate=%s',format(Sys.time(),"%Y%m%d")),
20 | sprintf('##reference=%s', genome),
21 | paste0('##INFO='),
23 | paste0('##INFO='),
25 | paste0('##INFO='),
27 | paste0('##INFO='),
29 | paste0('##INFO='))
31 | }
32 |
33 | #' Convert SNP from Infinium array to VCF file
34 | #'
35 | #' @param sdf SigDF
36 | #' @param anno SNP variant annotation, available at
37 | #' https://github.com/zhou-lab/InfiniumAnnotationV1/tree/main/Anno/EPIC
38 | #' EPIC.hg38.snp.tsv.gz
39 | #' @param vcf output VCF file path, if NULL output to console
40 | #' @param genome genome
41 | #' @param verbose print more messages
42 | #' @return VCF file. If vcf is NULL, a data.frame is output to
43 | #' console. The data.frame does not contain VCF headers.
44 | #' Note the output vcf is not sorted.
45 | #'
46 | #' @importFrom utils write.table
47 | #' @examples
48 | #' sesameDataCacheAll() # if not done yet
49 | #' sdf <- sesameDataGet('EPIC.1.SigDF')
50 | #'
51 | #' \dontrun{
52 | #' ## download anno from
53 | #' ## http://zwdzwd.github.io/InfiniumAnnotation
54 | #' ## output to console
55 | #' anno = read_tsv(sesameAnno_download("EPICv2.hg38.snp.tsv.gz"))
56 | #' head(formatVCF(sdf, anno))
57 | #' }
58 | #'
59 | #' @export
60 | formatVCF <- function(
61 | sdf, anno, vcf=NULL, genome="hg38", verbose = FALSE) {
62 |
63 | platform <- sdfPlatform(sdf, verbose = verbose)
64 | betas <- getBetas(sdf)[anno$Probe_ID]
65 | af <- getAFTypeIbySumAlleles(sdf, known.ccs.only=FALSE)
66 | vafs <- ifelse(anno$U == "ALT", 1-betas, betas)
67 | vafs <- ifelse(anno$U == "REF_InfI", af[anno$Probe_ID], vafs)
68 |
69 | gts <- lapply(vafs, genotyper)
70 | GT <- vapply(gts, function(g) g$GT, character(1))
71 | GS <- vapply(gts, function(g) g$GS, numeric(1))
72 | anno$REF[anno$REF == "ACT"] <- "H"
73 | anno$REF[anno$REF == "AGT"] <- "D"
74 | anno$ALT[anno$ALT == "ACT"] <- "H"
75 | anno$ALT[anno$ALT == "AGT"] <- "D"
76 | vcflines <- cbind(anno$chrm, anno$end,
77 | ".", anno$REF, anno$ALT, GS, ifelse(GS>20,'PASS','FAIL'),
78 | paste0(sprintf(
79 | "PVF=%1.3f;GT=%s;GS=%d;Probe_ID=%s",
80 | vafs, GT, GS, anno$Probe_ID),
81 | ifelse(is.na(anno$rs), "", paste0(";rs_ID=", anno$rs))))
82 |
83 | header <- vcf_header(genome)
84 | out <- data.frame(vcflines)
85 | colnames(out) <- c("#CHROM","POS","ID","REF","ALT","QUAL","FILTER","INFO")
86 | out <- out[order(out[['#CHROM']], as.numeric(out[['POS']])),]
87 |
88 | if(is.null(vcf)) { return(out);
89 | } else {
90 | writeLines(header, vcf)
91 | write.table(out, file=vcf, append=TRUE, sep='\t',
92 | row.names = FALSE, col.names = FALSE, quote = FALSE) }
93 | }
94 |
--------------------------------------------------------------------------------
/R/age.R:
--------------------------------------------------------------------------------
1 |
2 | #' Predict age using linear models
3 | #'
4 | #' The function takes a named numeric vector of beta values. The name attribute
5 | #' contains the probe ID (cg, ch or rs IDs). The function looks for overlapping
6 | #' probes and estimate age using different models.
7 | #'
8 | #' You can get the models such as the Horvath aging model (Horvath 2013
9 | #' Genome Biology) from sesameDataGet. The function outputs a single numeric
10 | #' of age in years.
11 | #'
12 | #' Here are some built-in age models:
13 | #' Anno/HM450/Clock_Horvath353.rds
14 | #' Anno/HM450/Clock_Hannum.rds
15 | #' Anno/HM450/Clock_SkinBlood.rds
16 | #' Anno/EPIC/Clock_PhenoAge.rds
17 | #' Anno/MM285/Clock_Zhou347.rds
18 | #' see vignette inferences.html#Age__Epigenetic_Clock for details
19 | #'
20 | #' @param betas a probeID-named vector of beta values
21 | #' @param model a model object from sesameDataGet. should contain
22 | #' param, intercept, response2age. default to the Horvath353 model.
23 | #' @param na_fallback use fall back values if na
24 | #' @param min_nonna the minimum number of non-NA values.
25 | #' @return age in the unit specified in the model (usually in year, but
26 | #' sometimes can be month, like in the mouse clocks).
27 | #' @examples
28 | #' betas <- sesameDataGet('HM450.1.TCGA.PAAD')$betas
29 | #' \dontrun{
30 | #' ## download age models from
31 | #' ## https://github.com/zhou-lab/InfiniumAnnotationV1/tree/main/Anno
32 | #' ## e.g., Anno/HM450/Clock_Horvath353.rds
33 | #' predictAge(betas, model)
34 | #' }
35 | #' @export
36 | predictAge <- function(betas, model, na_fallback=FALSE, min_nonna = 10) {
37 |
38 | betas <- betas[model$param$Probe_ID]
39 | if (sum(!is.na(betas)) < min_nonna) {
40 | stop("Fewer than 10 matching probes left. Age prediction abort.")
41 | }
42 | if (sum(is.na(betas)) > 0) {
43 | if (na_fallback) {
44 | k <- is.na(betas)
45 | betas[k] <- model$param$na_fallback[k]
46 | } else {
47 | probes <- intersect(names(na.omit(betas)), model$param$Probe_ID)
48 | betas <- betas[probes]
49 | model$param <- model$param[match(probes, model$param$Probe_ID),]
50 | }
51 | }
52 | drop(model$response2age(betas %*% model$param$slope + model$intercept))
53 | }
54 |
55 | #' Mouse age predictor
56 | #'
57 | #' The function takes a named numeric vector of beta values. The name attribute
58 | #' contains the probe ID. The function looks for overlapping
59 | #' probes and estimate age using an aging model built from 321 MM285 probes.
60 | #' The function outputs a single numeric of age in months. The clock is most
61 | #' accurate with the sesame preprocessing.
62 | #'
63 | #' @param betas a probeID-named vector of beta values
64 | #' @param na_fallback use the fallback default for NAs.
65 | #' @return age in month
66 | #' @examples
67 | #' cat("Deprecated. See predictAge")
68 | #' @export
69 | predictMouseAgeInMonth <- function(betas, na_fallback=TRUE) {
70 | .Deprecated("predictAge")
71 | }
72 |
73 | #' Horvath 353 age predictor
74 | #'
75 | #' The function takes a named numeric vector of beta values. The name attribute
76 | #' contains the probe ID (cg, ch or rs IDs). The function looks for overlapping
77 | #' probes and estimate age using Horvath aging model (Horvath 2013
78 | #' Genome Biology). The function outputs a single numeric of age in years.
79 | #'
80 | #' @param betas a probeID-named vector of beta values
81 | #' @return age in years
82 | #' @examples
83 | #' cat("Deprecated. See predictAge")
84 | #' @export
85 | predictAgeHorvath353 <- function(betas) {
86 | .Deprecated("predictAge")
87 | }
88 |
89 | #' Horvath Skin and Blood age predictor
90 | #'
91 | #' The function takes a named numeric vector of beta values. The name attribute
92 | #' contains the probe ID (cg, ch or rs IDs). The function looks for overlapping
93 | #' probes and estimate age using Horvath aging model (Horvath et al. 2018
94 | #' Aging, 391 probes). The function outputs a single numeric of age in years.
95 | #'
96 | #' @param betas a probeID-named vector of beta values
97 | #' @return age in years
98 | #' @examples
99 | #' cat("Deprecated. See predictAge")
100 | #' @export
101 | predictAgeSkinBlood <- function(betas) {
102 | .Deprecated("predictAge")
103 | }
104 |
105 |
106 | ## Hv.age2response <- function(x, adult.age=20) {
107 | ## ## trafo
108 | ## x <- (x+1)/(adult.age+1)
109 | ## ifelse(x<=1,log(x),x-1)
110 | ## }
111 |
112 | ## Hv.response2age <- function(x, adult.age=20) {
113 | ## ## anti.trafo
114 | ## ifelse(
115 | ## x<0,
116 | ## (1+adult.age)*exp(x)-1,
117 | ## (1+adult.age)*x+adult.age)
118 | ## }
119 |
--------------------------------------------------------------------------------
/R/impute.R:
--------------------------------------------------------------------------------
1 | #' Impute of missing data of specific platform
2 | #'
3 | #' @param betas named vector of beta values
4 | #' @param platform platform
5 | #' @param celltype celltype/tissue context of imputation, if not given, will
6 | #' use nearest neighbor to determine.
7 | #' @param sd_max maximum standard deviation in imputation confidence
8 | #' @param BPPARAM use MulticoreParam(n) for parallel processing
9 | #' @return imputed data, vector or matrix
10 | #' @examples
11 | #' betas = openSesame(sesameDataGet("EPIC.1.SigDF"))
12 | #' sum(is.na(betas))
13 | #' betas2 = imputeBetas(betas, "EPIC")
14 | #' sum(is.na(betas2))
15 | #'
16 | #' @export
17 | imputeBetas <- function(betas, platform = NULL, BPPARAM = SerialParam(),
18 | celltype = NULL, sd_max = 999) {
19 |
20 | if (is.matrix(betas)) {
21 | betas <- do.call(cbind, bplapply(seq_len(ncol(betas)), function(i) {
22 | imputeBetas(betas[,i], platform = NULL,
23 | celltype = celltype, sd_max = sd_max)}, BPPARAM=BPPARAM))
24 | colnames(betas) <- colnames(betas)
25 | return(betas)
26 | }
27 |
28 | platform <- sesameData_check_platform(platform, names(betas))
29 | df <- sesameDataGet(sprintf("%s.imputationDefault", platform))
30 | d2q <- match(names(betas), df$Probe_ID)
31 | celltype <- names(which.max(vapply(df$data, function(x) cor(
32 | betas, x$median[d2q], use="na.or.complete"), numeric(1))))
33 | if (is.null(celltype)) {
34 | celltype <- "Blood"
35 | }
36 | idx <- is.na(betas)
37 | mn <- df$data[[celltype]]$median[d2q][idx]
38 | sd <- df$data[[celltype]]$sd[d2q][idx]
39 | mn[sd > sd_max] <- NA
40 | betas[idx] <- mn
41 | betas
42 | }
43 |
44 | #' Impute missing data based on genomic neighbors.
45 | #'
46 | #' @param betas named vector of beta values
47 | #' @param platform platform
48 | #' @param max_neighbors maximum neighbors to use for dense regions
49 | #' @param max_dist maximum distance to count as neighbor
50 | #' @param BPPARAM use MulticoreParam(n) for parallel processing
51 | #' @return imputed data, vector or matrix
52 | #' @importFrom GenomicRanges resize
53 | #' @importFrom GenomicRanges findOverlaps
54 | #' @importFrom S4Vectors subjectHits
55 | #' @importFrom S4Vectors queryHits
56 | #' @importFrom dplyr summarize
57 | #' @examples
58 | #' betas = openSesame(sesameDataGet("EPICv2.8.SigDF")[[1]])
59 | #' sum(is.na(betas))
60 | #' betas2 = imputeBetasByGenomicNeighbors(betas, "EPICv2")
61 | #' sum(is.na(betas2))
62 | #'
63 | #' @export
64 | imputeBetasByGenomicNeighbors <- function(betas, platform = NULL,
65 | BPPARAM = SerialParam(), max_neighbors = 3, max_dist = 10000) {
66 |
67 | platform <- sesameData_check_platform(platform, names(betas))
68 | mft <- sesameData_getManifestGRanges(platform)
69 | mft_missing <- mft[names(mft) %in% names(which(is.na(betas)))]
70 | mft_nonmiss <- mft[names(which(!is.na(betas)))]
71 | index <- findOverlaps(resize(mft_missing, max_dist), mft_nonmiss)
72 | gm <- mft_missing[queryHits(index)]
73 | gn <- mft_nonmiss[subjectHits(index)]
74 | df <- tibble(
75 | cg = names(gm), beg_m = start(gm), end_m = end(gm),
76 | cg_n = names(gn), beg_n = start(gn), end_n = end(gn))
77 |
78 | df$d1 <- df$beg_m - df$end_n - 1
79 | df$d2 <- df$beg_n - df$end_m - 1
80 | df$betas <- betas[df$cg_n]
81 | df$dist <- pmax(df$d1, df$d2)
82 | df <- summarize(slice_min(group_by(df, .data[['cg']]),
83 | n = max_neighbors, order_by = .data[['dist']]),
84 | mbetas = mean(.data[['betas']]))
85 | betas[df$cg] <- df$mbetas
86 | betas
87 | }
88 |
89 | #' Impute Missing Values with Mean
90 | #' This function replaces missing values (NA) in a matrix, default is row
91 | #' means.
92 | #'
93 | #' @param mx A matrix
94 | #' @param axis A single integer. Use 1 to impute column means (default),
95 | #' and 2 to impute row means.
96 | #' @return A matrix with missing values imputed.
97 | #' @examples
98 | #' mx <- cbind(c(1, 2, NA, 4), c(NA, 2, 3, 4))
99 | #' imputeBetasMatrixByMean(mx, axis = 1)
100 | #' imputeBetasMatrixByMean(mx, axis = 2)
101 | #' @export
102 | imputeBetasMatrixByMean <- function(mx, axis = 1) {
103 | stopifnot(is.matrix(mx))
104 | if (axis == 1) {
105 | t(apply(mx, 1, function(x) {
106 | x[is.na(x)] <- mean(x, na.rm = TRUE);
107 | x
108 | }))
109 | } else if (axis == 2) {
110 | apply(mx, 2, function(x) {
111 | x[is.na(x)] <- mean(x, na.rm = TRUE);
112 | x
113 | })
114 | } else {
115 | stop("Invalid axis. Use 1 for columns or 2 for rows.")
116 | }
117 | }
118 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | S3method(print,DMLSummary)
4 | S3method(print,fileSet)
5 | export(BetaValueToMValue)
6 | export(DML)
7 | export(DMLpredict)
8 | export(DMR)
9 | export(ELBAR)
10 | export(MValueToBetaValue)
11 | export(SigDF)
12 | export(addMask)
13 | export(betasCollapseToPfx)
14 | export(bisConversionControl)
15 | export(calcEffectSize)
16 | export(checkLevels)
17 | export(cnSegmentation)
18 | export(compareMouseStrainReference)
19 | export(compareMouseTissueReference)
20 | export(compareReference)
21 | export(controls)
22 | export(createUCSCtrack)
23 | export(deIdentify)
24 | export(detectionPnegEcdf)
25 | export(diffRefSet)
26 | export(dmContrasts)
27 | export(dyeBiasCorr)
28 | export(dyeBiasCorrMostBalanced)
29 | export(dyeBiasCorrTypeINorm)
30 | export(dyeBiasL)
31 | export(dyeBiasNL)
32 | export(estimateLeukocyte)
33 | export(formatVCF)
34 | export(getAFTypeIbySumAlleles)
35 | export(getAFs)
36 | export(getBetas)
37 | export(getMask)
38 | export(getRefSet)
39 | export(imputeBetas)
40 | export(imputeBetasByGenomicNeighbors)
41 | export(imputeBetasMatrixByMean)
42 | export(inferEthnicity)
43 | export(inferInfiniumIChannel)
44 | export(inferSex)
45 | export(inferSpecies)
46 | export(inferStrain)
47 | export(inferTissue)
48 | export(initFileSet)
49 | export(liftOver)
50 | export(listAvailableMasks)
51 | export(mLiftOver)
52 | export(mapFileSet)
53 | export(mapToMammal40)
54 | export(matchDesign)
55 | export(meanIntensity)
56 | export(medianTotalIntensity)
57 | export(noMasked)
58 | export(noob)
59 | export(openSesame)
60 | export(openSesameToFile)
61 | export(pOOBAH)
62 | export(palgen)
63 | export(parseGEOsignalMU)
64 | export(predictAge)
65 | export(predictAgeHorvath353)
66 | export(predictAgeSkinBlood)
67 | export(predictMouseAgeInMonth)
68 | export(prefixMask)
69 | export(prefixMaskButC)
70 | export(prefixMaskButCG)
71 | export(prepSesame)
72 | export(prepSesameList)
73 | export(probeID_designType)
74 | export(probeSuccessRate)
75 | export(qualityMask)
76 | export(reIdentify)
77 | export(readFileSet)
78 | export(readIDATpair)
79 | export(recommendedMaskNames)
80 | export(resetMask)
81 | export(scrub)
82 | export(scrubSoft)
83 | export(sdfPlatform)
84 | export(sdf_read_table)
85 | export(sdf_write_table)
86 | export(searchIDATprefixes)
87 | export(sesameAnno_attachManifest)
88 | export(sesameAnno_buildAddressFile)
89 | export(sesameAnno_buildManifestGRanges)
90 | export(sesameAnno_download)
91 | export(sesameAnno_readManifestTSV)
92 | export(sesameQC_calcStats)
93 | export(sesameQC_getStats)
94 | export(sesameQC_plotBar)
95 | export(sesameQC_plotBetaByDesign)
96 | export(sesameQC_plotHeatSNPs)
97 | export(sesameQC_plotIntensVsBetas)
98 | export(sesameQC_plotRedGrnQQ)
99 | export(sesameQC_rankStats)
100 | export(sesameQCtoDF)
101 | export(sesame_checkVersion)
102 | export(sesamize)
103 | export(setMask)
104 | export(signalMU)
105 | export(sliceFileSet)
106 | export(summaryExtractTest)
107 | export(totalIntensities)
108 | export(updateSigDF)
109 | export(visualizeGene)
110 | export(visualizeProbes)
111 | export(visualizeRegion)
112 | export(visualizeSegments)
113 | import(BiocParallel)
114 | import(ggplot2)
115 | import(graphics)
116 | import(grid)
117 | import(sesameData)
118 | import(stats)
119 | import(stringr)
120 | import(tibble)
121 | import(wheatmap)
122 | importFrom(BiocFileCache,BiocFileCache)
123 | importFrom(BiocFileCache,bfcrpath)
124 | importFrom(GenomeInfoDb,Seqinfo)
125 | importFrom(GenomicRanges,GRanges)
126 | importFrom(GenomicRanges,end)
127 | importFrom(GenomicRanges,findOverlaps)
128 | importFrom(GenomicRanges,resize)
129 | importFrom(GenomicRanges,seqinfo)
130 | importFrom(GenomicRanges,seqnames)
131 | importFrom(GenomicRanges,start)
132 | importFrom(S4Vectors,metadata)
133 | importFrom(S4Vectors,queryHits)
134 | importFrom(S4Vectors,subjectHits)
135 | importFrom(SummarizedExperiment,"metadata<-")
136 | importFrom(SummarizedExperiment,SummarizedExperiment)
137 | importFrom(SummarizedExperiment,assay)
138 | importFrom(SummarizedExperiment,colData)
139 | importFrom(SummarizedExperiment,rowData)
140 | importFrom(dplyr,bind_cols)
141 | importFrom(dplyr,bind_rows)
142 | importFrom(dplyr,distinct)
143 | importFrom(dplyr,full_join)
144 | importFrom(dplyr,group_by)
145 | importFrom(dplyr,slice_min)
146 | importFrom(dplyr,summarize)
147 | importFrom(methods,.hasSlot)
148 | importFrom(methods,is)
149 | importFrom(methods,new)
150 | importFrom(preprocessCore,normalize.quantiles.use.target)
151 | importFrom(readr,col_character)
152 | importFrom(readr,col_integer)
153 | importFrom(readr,cols)
154 | importFrom(readr,read_tsv)
155 | importFrom(reshape2,melt)
156 | importFrom(stats,approx)
157 | importFrom(stats,setNames)
158 | importFrom(utils,download.file)
159 | importFrom(utils,head)
160 | importFrom(utils,packageVersion)
161 | importFrom(utils,read.table)
162 | importFrom(utils,tail)
163 | importFrom(utils,write.table)
164 | importMethodsFrom(IRanges,subsetByOverlaps)
165 |
--------------------------------------------------------------------------------
/R/deidentify.R:
--------------------------------------------------------------------------------
1 |
2 | #' De-identify IDATs by removing SNP probes
3 | #'
4 | #' Mask SNP probe intensity mean by zero.
5 | #'
6 | #' @param path input IDAT file
7 | #' @param out_path output IDAT file
8 | #' @param snps SNP definition, if not given, default to SNP probes
9 | #' @param mft sesame-compatible manifest if non-standard
10 | #' @param randomize whether to randomize the SNPs. if TRUE,
11 | #' randomize the signal intensities. one can use set.seed to
12 | #' reidentify the IDAT with the secret seed (see examples).
13 | #' If FALSE, this sets all SNP intensities to zero.
14 | #' @return NULL, changes made to the IDAT files
15 | #' @examples
16 | #'
17 | #' my_secret <- 13412084
18 | #' set.seed(my_secret)
19 | #' temp_out <- tempfile("test")
20 | #' deIdentify(system.file(
21 | #' "extdata", "4207113116_A_Grn.idat", package = "sesameData"),
22 | #' temp_out, randomize = TRUE)
23 | #' unlink(temp_out)
24 | #' @export
25 | deIdentify <- function(
26 | path, out_path=NULL, snps=NULL, mft=NULL, randomize=FALSE) {
27 |
28 | res <- suppressWarnings(readIDAT(path))
29 | platform <- inferPlatformFromTango(res)
30 |
31 | if(is.null(out_path)) {
32 | pfx <- sub('.idat(.gz)?$','', path)
33 | if(grepl('_Grn$', pfx)) {
34 | out_path <- paste0(sub('_Grn$','',pfx), '_noid_Grn.idat')
35 | } else if (grepl('_Red$', pfx)) {
36 | out_path <- paste0(sub('_Red$','',pfx), '_noid_Red.idat') }}
37 |
38 | if (is.null(mft)) {
39 | mft <- sesameDataGet(paste0(platform, '.address'))$ordering }
40 | if (is.null(snps)) {
41 | snps <- grep("^rs", mft$Probe_ID, value=TRUE) }
42 | mft <- mft[mft$Probe_ID %in% snps,]
43 |
44 | snpsTango <- na.omit(c(mft$M, mft$U))
45 | qt <- res$Quants
46 | snpsIdx <- match(snpsTango, rownames(qt))
47 | dt <- qt[,'Mean']
48 | if (randomize) {
49 | snpsIdx <- snpsIdx[!is.na(snpsIdx)]
50 | dt[snpsIdx] <- sample(dt[snpsIdx])
51 | } else { dt[snpsIdx] <- 0 }
52 |
53 | if(grepl("\\.gz$", path)) { con <- gzfile(path, "rb")
54 | } else { con <- file(path, "rb") }
55 | con2 <- file(out_path, "wb")
56 |
57 | ## before Mean section
58 | writeBin(readBin(con, "raw", n = res$fields["Mean", 'byteOffset']), con2)
59 |
60 | ## write new Mean section
61 | writeBin(as.integer(dt), con2, size=2, endian='little')
62 |
63 | ## after Mean section
64 | ## skip by reading..., seek might not work for gzfile
65 | a <- readBin(con, "raw", n = res$nSNPsRead*2)
66 | while (length(a <- readBin(con, 'raw', n=1))>0) writeBin(a, con2)
67 |
68 | close(con)
69 | close(con2)
70 | }
71 |
72 | #' Re-identify IDATs by restoring scrambled SNP intensities
73 | #'
74 | #' This requries setting a seed with a secret number that was used to
75 | #' de-identify the IDAT (see example).
76 | #' This requires a secret number that was used to de-idenitfy the IDAT
77 | #'
78 | #' @param path input IDAT file
79 | #' @param out_path output IDAT file
80 | #' @param snps SNP definition, if not given, default to SNP probes
81 | #' @param mft sesame-compatible manifest if non-standard
82 | #' @return NULL, changes made to the IDAT files
83 | #' @examples
84 | #'
85 | #' temp_out <- tempfile("test")
86 | #'
87 | #' set.seed(123)
88 | #' reIdentify(system.file(
89 | #' "extdata", "4207113116_A_Grn.idat", package = "sesameData"), temp_out)
90 | #' unlink(temp_out)
91 | #' @export
92 | reIdentify <- function(path, out_path=NULL, snps=NULL, mft=NULL) {
93 |
94 | res <- suppressWarnings(readIDAT(path))
95 | platform <- inferPlatformFromTango(res)
96 |
97 | if(is.null(out_path)) {
98 | pfx <- sub('.idat(.gz)?$','', path)
99 | if(grepl('_Grn$', pfx)) {
100 | out_path <- paste0(sub('_Grn$','',pfx), '_reid_Grn.idat')
101 | } else if (grepl('_Red$', pfx)) {
102 | out_path <- paste0(sub('_Red$','',pfx), '_reid_Red.idat') }}
103 |
104 | if (is.null(mft)) {
105 | mft <- sesameDataGet(paste0(platform, '.address'))$ordering }
106 | if (is.null(snps)) {
107 | snps <- grep("^rs", mft$Probe_ID, value=TRUE) }
108 | mft <- mft[mft$Probe_ID %in% snps,]
109 |
110 | snpsTango <- na.omit(c(mft$M, mft$U))
111 | qt <- res$Quants
112 | snpsIdx <- match(snpsTango, rownames(qt))
113 | dt <- qt[,'Mean']
114 | snpsIdx <- snpsIdx[!is.na(snpsIdx)]
115 | idx <- seq_along(snpsIdx)
116 | dt[snpsIdx] <- dt[snpsIdx[match(idx, sample(idx))]]
117 |
118 | if(grepl("\\.gz$", path)) { con <- gzfile(path, "rb")
119 | } else { con <- file(path, "rb"); }
120 |
121 | con2 <- file(out_path, "wb")
122 |
123 | ## before Mean section
124 | writeBin(readBin(con, "raw", n = res$fields["Mean", 'byteOffset']), con2)
125 |
126 | ## write new Mean section
127 | writeBin(as.integer(dt), con2, size=2, endian='little')
128 |
129 | ## after Mean section
130 | ## skip by reading..., seek might not work for gzfile
131 | a <- readBin(con, "raw", n = res$nSNPsRead*2)
132 | while (length(a <- readBin(con, 'raw', n=1))>0) writeBin(a, con2)
133 |
134 | close(con)
135 | close(con2)
136 | }
137 |
--------------------------------------------------------------------------------
/vignettes/QC.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Quality Control"
3 | date: "`r BiocStyle::doc_date()`"
4 | package: sesame
5 | output: BiocStyle::html_document
6 | fig_width: 6
7 | fig_height: 5
8 | vignette: >
9 | %\VignetteEngine{knitr::rmarkdown}
10 | %\VignetteIndexEntry{1. Quality Control}
11 | %\VignetteEncoding{UTF-8}
12 | ---
13 |
14 | ```{r message=FALSE, warning=FALSE, results="hide"}
15 | library(sesame)
16 | sesameDataCache()
17 | ```
18 |
19 | # Calculate Quality Metrics
20 |
21 | The main function to calculate the quality metrics is `sesameQC_calcStats`.
22 | This function takes a SigDF, calculates the QC statistics, and returns a single
23 | S4 `sesameQC` object, which can be printed directly to the console. To calculate
24 | QC metrics on a given list of samples or all IDATs in a folder, one can use
25 | `sesameQC_calcStats` within the standard `openSesame` pipeline. When used with
26 | `openSesame`, a list of `sesameQC`s will be returned. Note that one should turn
27 | off preprocessing using `prep=""`:
28 |
29 | ```{r qc1, eval=FALSE}
30 | ## calculate metrics on all IDATs in a specific folder
31 | sesameQCtoDF(openSesame(idat_dir, prep="", func=sesameQC_calcStats))
32 | ## or a list of prefixes, with parallel processing
33 | sesameQCtoDF(openSesame(sprintf("%s/%s", idat_dir, idat_prefixes), prep="",
34 | func=sesameQC_calcStats, BPPARAM=BiocParallel::MulticoreParam(24)))
35 | ```
36 |
37 | The results display `frac_dt_cg`, `RGratio`, `RGdistort` by default. For other
38 | QC metrics, SeSAMe divides sample quality metrics into multiple groups. These
39 | groups are listed below and can be referred to by short keys. For example,
40 | "intensity" generates signal intensity-related quality metrics.
41 |
42 | ```{r echo=FALSE}
43 | library(knitr)
44 | kable(data.frame(
45 | "Short Key" = c(
46 | "detection",
47 | "numProbes",
48 | "intensity",
49 | "channel",
50 | "dyeBias",
51 | "betas"),
52 | "Description" = c(
53 | "Signal Detection",
54 | "Number of Probes",
55 | "Signal Intensity",
56 | "Color Channel",
57 | "Dye Bias",
58 | "Beta Value")))
59 | ```
60 |
61 | By default, `sesameQC_calcStats` calculates all QC groups. To save time, one
62 | can compute a specific QC group by specifying one or multiple short keys in
63 | the `funs=` argument:
64 |
65 | ```{r qc2}
66 | sdfs <- sesameDataGet("EPIC.5.SigDF.normal")[1:2] # get two examples
67 | ## only compute signal detection stats
68 | qcs = openSesame(sdfs, prep="", func=sesameQC_calcStats, funs="detection")
69 | qcs[[1]]
70 | ```
71 |
72 | > We consider signal detection the most important QC metric.
73 |
74 | One can retrieve the actual stat numbers from `sesameQC` using the
75 | sesameQC_getStats (the following generates the fraction of probes with
76 | detection success):
77 |
78 | ```{r qc3}
79 | sesameQC_getStats(qcs[[1]], "frac_dt")
80 | ```
81 |
82 | After computing the QCs, one can optionally combine the `sesameQC` objects into
83 | a data frame for easy comparison.
84 |
85 | ```{r qc4}
86 | ## combine a list of sesameQC into a data frame
87 | head(do.call(rbind, lapply(qcs, as.data.frame)))
88 | ```
89 |
90 | Note that when the input is an `SigDF` object, calling `sesameQC_calcStats`
91 | within `openSesame` and as a standalone function are equivalent.
92 |
93 | ```{r qc5, message=FALSE}
94 | sdf <- sesameDataGet('EPIC.1.SigDF')
95 | qc = openSesame(sdf, prep="", func=sesameQC_calcStats, funs=c("detection"))
96 | ## equivalent direct call
97 | qc = sesameQC_calcStats(sdf, c("detection"))
98 | qc
99 | ```
100 |
101 | # Rank Quality Metrics
102 |
103 | ```{r qc6, echo=FALSE}
104 | options(rmarkdown.html_vignette.check_title = FALSE)
105 | ```
106 |
107 | SeSAMe features comparison of your sample with public data sets. The
108 | `sesameQC_rankStats()` function ranks the input `sesameQC` object with
109 | `sesameQC` calculated from public datasets. It shows the rank percentage of the
110 | input sample as well as the number of datasets compared.
111 |
112 | ```{r qc7}
113 | sdf <- sesameDataGet('EPIC.1.SigDF')
114 | qc <- sesameQC_calcStats(sdf, "intensity")
115 | qc
116 | sesameQC_rankStats(qc, platform="EPIC")
117 | ```
118 |
119 | # Quality Control Plots
120 |
121 | SeSAMe provides functions to create QC plots. Some functions takes sesameQC as
122 | input while others directly plot the SigDF objects. Here are some examples:
123 |
124 | - `sesameQC_plotBar()` takes a list of sesameQC objects and creates bar
125 | plot for each metric calculated.
126 |
127 | - `sesameQC_plotRedGrnQQ()` graphs the dye bias between the two color channels.
128 |
129 | - `sesameQC_plotIntensVsBetas()` plots the relationship between $\beta$ values
130 | and signal intensity and can be used to diagnose artificial readout and
131 | influence of signal background.
132 |
133 | - `sesameQC_plotHeatSNPs()` plots SNP probes and can be used to detect sample
134 | swaps.
135 |
136 | More about quality control plots can be found in [Supplemental
137 | Vignette](https://zhou-lab.github.io/sesame/v1.16/supplemental.html#qc).
138 |
139 | # Session Info
140 |
141 | ```{r}
142 | sessionInfo()
143 | ```
144 |
--------------------------------------------------------------------------------
/man/mLiftOver.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/mLiftOver.R
3 | \name{mLiftOver}
4 | \alias{mLiftOver}
5 | \title{Lift over beta values or SigDFs to another Infinium platform
6 | This function wraps ID conversion and provide optional
7 | imputation functionality.}
8 | \usage{
9 | mLiftOver(
10 | x,
11 | target_platform,
12 | source_platform = NULL,
13 | BPPARAM = SerialParam(),
14 | mapping = NULL,
15 | impute = FALSE,
16 | sd_max = 999,
17 | celltype = "Blood",
18 | ...
19 | )
20 | }
21 | \arguments{
22 | \item{x}{either named beta value (vector or matrix), probe IDs
23 | or SigDF(s)
24 | if input is a matrix, probe IDs should be in the row names
25 | if input is a numeric vector, probe IDs should be in the vector
26 | names.
27 | If input is a character vector, the input will be
28 | considered probe IDs.}
29 |
30 | \item{target_platform}{the platform to take the data to}
31 |
32 | \item{source_platform}{optional information of the source data
33 | platform (when there might be ambiguity).}
34 |
35 | \item{BPPARAM}{use MulticoreParam(n) for parallel processing}
36 |
37 | \item{mapping}{a liftOver mapping file. Typically this file
38 | contains empirical evidence whether a probe mapping is reliable.
39 | If given, probe ID-based mapping will be skipped. This is to
40 | perform more stringent probe ID mapping.}
41 |
42 | \item{impute}{whether to impute or not, default is FALSE}
43 |
44 | \item{sd_max}{the maximum standard deviation for filtering low
45 | confidence imputation.}
46 |
47 | \item{celltype}{the cell type / tissue context of imputation,
48 | if not given, will use nearest neighbor to find out.}
49 |
50 | \item{...}{extra arguments, see ?convertProbeID}
51 | }
52 | \value{
53 | imputed data, vector, matrix, SigDF(s)
54 | }
55 | \description{
56 | Lift over beta values or SigDFs to another Infinium platform
57 | This function wraps ID conversion and provide optional
58 | imputation functionality.
59 | }
60 | \examples{
61 |
62 | \dontrun{
63 | sesameDataCache()
64 |
65 | ## lift SigDF
66 |
67 | sdf = sesameDataGet("EPICv2.8.SigDF")[["GM12878_206909630042_R08C01"]]
68 | dim(mLiftOver(sdf, "EPICv2"))
69 | dim(mLiftOver(sdf, "EPIC"))
70 | dim(mLiftOver(sdf, "HM450"))
71 |
72 | sdfs = sesameDataGet("EPICv2.8.SigDF")[1:2]
73 | sdfs_hm450 = mLiftOver(sdfs, "HM450")
74 | ## parallel processing
75 | sdfs_hm450 = mLiftOver(sdfs, "HM450", BPPARAM=BiocParallel::MulticoreParam(2))
76 |
77 | sdf = sesameDataGet("EPIC.5.SigDF.normal")[[1]]
78 | dim(mLiftOver(sdf, "EPICv2"))
79 | dim(mLiftOver(sdf, "EPIC"))
80 | dim(mLiftOver(sdf, "HM450"))
81 |
82 | sdf = sesameDataGet("HM450.10.SigDF")[[1]]
83 | dim(mLiftOver(sdf, "EPICv2"))
84 | dim(mLiftOver(sdf, "EPIC"))
85 | dim(mLiftOver(sdf, "HM450"))
86 |
87 | ## lift beta values
88 |
89 | betas = openSesame(sesameDataGet("EPICv2.8.SigDF")[[1]])
90 | betas_hm450 = mLiftOver(betas, "HM450", impute=TRUE)
91 | length(betas_hm450)
92 | sum(is.na(betas_hm450))
93 | betas_hm450 <- mLiftOver(betas, "HM450", impute=FALSE)
94 | length(betas_hm450)
95 | sum(is.na(betas_hm450))
96 | betas_epic1 <- mLiftOver(betas, "EPIC", impute=TRUE)
97 | length(betas_epic1)
98 | sum(is.na(betas_epic1))
99 | betas_epic1 <- mLiftOver(betas, "EPIC", impute=FALSE)
100 | length(betas_epic1)
101 | sum(is.na(betas_epic1))
102 |
103 | betas_matrix = openSesame(sesameDataGet("EPICv2.8.SigDF")[1:4])
104 | dim(betas_matrix)
105 | betas_matrix_hm450 = mLiftOver(betas_matrix, "HM450", impute=T)
106 | dim(betas_matrix_hm450)
107 | ## parallel processing
108 | betas_matrix_hm450 = mLiftOver(betas_matrix, "HM450", impute=T,
109 | BPPARAM=BiocParallel::MulticoreParam(4))
110 |
111 | ## use empirical evidence in mLiftOver
112 | mapping = sesameDataGet("liftOver.EPICv2ToEPIC")
113 | betas_matrix = openSesame(sesameDataGet("EPICv2.8.SigDF")[1:4])
114 | dim(mLiftOver(betas_matrix, "EPIC", mapping = mapping))
115 | ## compare to without using empirical evidence
116 | dim(mLiftOver(betas_matrix, "EPIC"))
117 |
118 | betas <- c("cg04707299"=0.2, "cg13380562"=0.9, "cg00000103"=0.1)
119 | head(mLiftOver(betas, "HM450", impute=TRUE))
120 |
121 | betas <- c("cg00004963_TC21"=0, "cg00004963_TC22"=0.5, "cg00004747_TC21"=1.0)
122 | betas_hm450 <- mLiftOver(betas, "HM450", impute=TRUE)
123 | head(na.omit(mLiftOver(betas, "HM450", impute=FALSE)))
124 |
125 | ## lift probe IDs
126 |
127 | cg_epic2 = names(sesameData_getManifestGRanges("EPICv2"))
128 | head(mLiftOver(cg_epic2, "HM450"))
129 |
130 | cg_epic2 = grep("cg", names(sesameData_getManifestGRanges("EPICv2")), value=T)
131 | head(mLiftOver(cg_epic2, "HM450"))
132 |
133 | cg_hm450 = grep("cg", names(sesameData_getManifestGRanges("HM450")), value=T)
134 | head(mLiftOver(cg_hm450, "EPICv2"))
135 |
136 | rs_epic2 = grep("rs", names(sesameData_getManifestGRanges("EPICv2")), value=T)
137 | head(mLiftOver(rs_epic2, "HM450", source_platform="EPICv2"))
138 |
139 | probes_epic2 = names(sesameData_getManifestGRanges("EPICv2"))
140 | head(mLiftOver(probes_epic2, "EPIC"))
141 | head(mLiftOver(probes_epic2, "EPIC", target_uniq = TRUE))
142 | head(mLiftOver(probes_epic2, "EPIC", include_new = FALSE))
143 | head(mLiftOver(probes_epic2, "EPIC", include_old = FALSE))
144 | head(mLiftOver(probes_epic2, "EPIC", return_mapping=TRUE))
145 |
146 | }
147 | }
148 |
--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
1 | c(
2 | bibentry(bibtype = "Article",
3 | key = "lee2024input",
4 | title = "Low-input and single-cell methods for Infinium DNA methylation BeadChips",
5 | author = c(
6 | person(given = "Sol Moe", family = "Lee"),
7 | person(given = "Christian", family = "Loo"),
8 | person(given = "Rexxi", family = "Prasasya"),
9 | person(given = "Marisa", family = "Bartolomei"),
10 | person(given = "Rahul", family = "Kohli"),
11 | person(given = "Wanding", family = "Zhou")),
12 | year = "2024", month = "Feb", journal = "Nucleic acids research",
13 | ## volume = "24", number = "1",
14 | doi = "10.1093/nar/gkae127",
15 | language = "eng", issn = "1362-4962",
16 | header = "The ELBAR detection calling is described in:"),
17 |
18 | bibentry(bibtype = "Article",
19 | key = "BIB",
20 | title = "Comparative epigenome analysis using Infinium DNA methylation BeadChips",
21 | author = c(
22 | person(given = "Wubin", family = "Ding"),
23 | person(given = "Diljeet", family = "Kaur"),
24 | person(given = "Steve", family = "Horvath"),
25 | person(given = "Wanding", family = "Zhou")),
26 | year = "2023", month = "Jan", journal = "Briefings in bioinformatics",
27 | volume = "24", number = "1", eprint = "36617464",
28 | doi = "10.1093/bib/bbac617",
29 | language = "eng", issn = "1477-4054",
30 | header = "Data analysis on non-standard genome is described in:"),
31 |
32 | bibentry(bibtype = "Article",
33 | key = "MM285",
34 | title = "DNA methylation dynamics and dysregulation delineated by high-throughput profiling in the mouse",
35 | author = c(
36 | person(given = "Wanding", family = "Zhou"),
37 | person(given = "Toshinori", family = "Hinoue"),
38 | person(given = "Bret", family = "Barnes"),
39 | person(given = "Owen", family = "Mitchell"),
40 | person(given = "Waleed", family = "Iqbal"),
41 | person(given = c("Sol", "Moe"), family = "Lee"),
42 | person(given = c("Kelly", "K"), family = "Foy"),
43 | person(given = "Kwang-Ho", family = "Lee"),
44 | person(given = c("Ethan", "J"), family = "Moyer"),
45 | person(given = "Alexandra", family = "VanderArk"),
46 | person(given = c("Julie", "M"), family = "Koeman"),
47 | person(given = "Wubin", family = "Ding"),
48 | person(given = "Manpreet", family = "Kalkat"),
49 | person(given = c("Nathan", "J"), family = "Spix"),
50 | person(given = "Bryn", family = "Eagleson"),
51 | person(given = c("John", "Andrew"), family = "Pospisilik"),
52 | person(given = c("Piroska", "E"), family = "Szabó"),
53 | person(given = c("Marisa", "S"), family = "Bartolomei"),
54 | person(given = c("Nicole", "A", "Vander"), family = "Schaaf"),
55 | person(given = "Liang", family = "Kang"),
56 | person(given = c("Ashley", "K"), family = "Wiseman"),
57 | person(given = c("Peter", "A"), family = "Jones"),
58 | person(given = c("Connie", "M"), family = "Krawczyk"),
59 | person(given = "Marie", family = "Adams"),
60 | person(given = "Rishi", family = "Porecha"),
61 | person(given = c("Brian", "H"), family = "Chen"),
62 | person(given = "Hui", family = "Shen"),
63 | person(given = c("Peter", "W"), family = "Laird")),
64 | year = "2022", month = "Jul", journal = "Cell genomics",
65 | volume = "2", number = "7", eprint = "35873672",
66 | doi = "10.1016/j.xgen.2022.100144", language = "eng",
67 | issn = "2666-979X", eprinttype = "pubmed",
68 | header = "Mouse (MM285) array informatics is described in:"),
69 |
70 | bibentry(bibtype = "Article",
71 | key = "sesame",
72 | title = "{SeSAMe: reducing artifactual detection of DNA methylation by Infinium BeadChips in genomic deletions}",
73 | author = c(
74 | person("Wanding", "Zhou"),
75 | person(c("Timothy", "J."), "Triche"),
76 | person(c("Peter", "W."), "Laird"),
77 | person("Hui", "Shen")
78 | ),
79 | year = "2018",
80 | journal = "Nucleic Acids Research",
81 | volume = "gky691",
82 | doi = "10.1093/nar/gky691",
83 | pubmed = "30085201",
84 | header = "pOOBAH detection calling is described in:"),
85 |
86 | bibentry(bibtype = "Article",
87 | key = "noob",
88 | author = c(person(given = c("Timothy", "J."), family = "Triche"),
89 | person(given = c("Daniel", "J."), family = "Weisenberger"),
90 | person(given = c("David"), family = "Van Den Berg"),
91 | person(given = c("Peter", "W."), family = "Laird"),
92 | person(given = c("Kimberly", "D."), family = "Siegmund")),
93 | title = "Low-level processing of {Illumina} {Infinium} {DNA} {Methylation} {BeadArrays}",
94 | journal = "Nucleic Acids Research",
95 | year = "2013",
96 | volume = "41",
97 | number = "7",
98 | pages = "e90",
99 | doi = "10.1093/nar/gkt090",
100 | pubmed = "23476028",
101 | header = "noob background correction is described in:")
102 | )
103 |
--------------------------------------------------------------------------------
/R/strain.R:
--------------------------------------------------------------------------------
1 | mouseBetaToAF <- function(betas) {
2 | se <- sesameDataGet('MM285.addressStrain')$strain_snps
3 | rd <- rowData(se)
4 | af <- betas[rd$Probe_ID]
5 | af[rd$flipToAF] <- 1 - af[rd$flipToAF]
6 | af
7 | }
8 |
9 | #' Infer strain information for mouse array
10 | #'
11 | #' @param sdf SigDF
12 | #' @param min_frac_dt minimum fraction of detected signal (DEFAULT: 0.2)
13 | #' otherwise, we give up strain inference and return NA.
14 | #' @param return.probability return probability vector for all strains
15 | #' @param return.pval return p-value
16 | #' @param return.strain return strain name
17 | #' @param verbose print more messages
18 | #' @return a list of best guess, p-value of the best guess
19 | #' and the probabilities of all strains
20 | #' @examples
21 | #' sesameDataCache() # if not done yet
22 | #' sdf <- sesameDataGet('MM285.1.SigDF')
23 | #' inferStrain(sdf, return.strain = TRUE)
24 | #' sdf.strain <- inferStrain(sdf)
25 | #' @import tibble
26 | #' @export
27 | inferStrain <- function(
28 | sdf, return.strain = FALSE, return.probability = FALSE,
29 | return.pval = FALSE, min_frac_dt = 0.2, verbose = FALSE) {
30 |
31 | addr <- sesameDataGet("MM285.addressStrain")
32 | se <- addr$strain_snps
33 | cd <- SummarizedExperiment::colData(se)
34 | rd <- SummarizedExperiment::rowData(se)
35 | md <- metadata(se)
36 |
37 | ## C57BL_6J is the first strain in the table
38 | strain_snps <- rd[,which(colnames(rd)=="C57BL_6J"):ncol(rd)]
39 |
40 | ## give up if the success rate is low
41 | pvals <- pOOBAH(sdf, return.pval=TRUE)
42 | if (sum(pvals[rd$Probe_ID] < 0.05) / nrow(rd) < min_frac_dt) {
43 | if (return.strain) { return(NA)
44 | } else if (return.probability) { return(rep(NA, ncol(strain_snps)))
45 | } else if (return.pval) { return(NA)
46 | } else { return(sdfMsg(sdf, verbose,
47 | "Abort strain inference for low detection rate.")) }
48 | }
49 |
50 | vafs <- getBetas(dyeBiasNL(noob(sdf)), mask=FALSE)[rd$Probe_ID]
51 | vafs[is.na(vafs)] <- 0.5 # just in case
52 | vafs[rd$flipToAF] <- 1 - vafs[rd$flipToAF]
53 |
54 | probes <- intersect(names(vafs), rd$Probe_ID[rd$QC!="FAIL"])
55 | vafs <- vafs[probes]
56 | bbloglik <- vapply(strain_snps[match(probes, rd$Probe_ID),],
57 | function(x) sum(log(dnorm(x - vafs, mean=0, sd=0.8))), numeric(1))
58 | probs <- setNames(exp(bbloglik - max(bbloglik)), colnames(strain_snps))
59 |
60 | best.index <- which.max(probs)
61 | strain <- names(best.index)
62 | if (return.strain) {
63 | strain # addr$strain[[strain]][c("JAX_ID","MGP_ID")]
64 | } else if (return.probability) {
65 | probs / sum(probs)
66 | } else if (return.pval) {
67 | 1 - probs[best.index] / sum(probs)
68 | } else {
69 | updateSigDF(sdf, strain = strain, addr = addr, verbose = verbose) }
70 | }
71 |
72 | #' Compare Strain SNPs with a reference panel
73 | #'
74 | #' @param betas beta value vector or matrix (for multiple samples)
75 | #' @param show_sample_names whether to show sample name
76 | #' @param query_width optional argument for adjusting query width
77 | #' @return grid object that contrast the target sample with
78 | #' pre-built mouse strain reference
79 | #' @importFrom S4Vectors metadata
80 | #' @import wheatmap
81 | #' @export
82 | #' @examples
83 | #' sesameDataCache() # if not done yet
84 | #' compareMouseStrainReference()
85 | #' @export
86 | compareMouseStrainReference <- function(
87 | betas = NULL, show_sample_names = FALSE, query_width = NULL) {
88 |
89 | ## betas = NULL; show_sample_names = FALSE;
90 | se <- sesameDataGet("MM285.addressStrain")$strain_snps
91 |
92 | cd <- as_tibble(SummarizedExperiment::colData(se))
93 | rd <- as_tibble(SummarizedExperiment::rowData(se))
94 | md <- metadata(se)
95 | se <- se[rd$QC != "FAIL",]; rd <- rd[rd$QC != "FAIL",]
96 |
97 | if (!is.null(betas) && is.null(dim(betas))) { # in case a vector
98 | betas <- cbind(betas) }
99 |
100 | afs <- do.call(rbind, lapply(seq_along(rd$flipToAF), function(i)
101 | if(xor(rd$flipToAF[i], rd$flipForRefBias[i])) {
102 | 1-assay(se)[i,]} else {assay(se)[i,]}))
103 | rownames(afs) <- rd$Probe_ID
104 |
105 | stops <- c("white", "black")
106 | g <- WHeatmap(afs, cmp=CMPar(stop.points=stops, dmin=0, dmax=1),
107 | xticklabels = show_sample_names, xticklabels.n=ncol(afs), name="b1")
108 | if (!is.null(betas)) { # query samples
109 | afs2 <- do.call(rbind, lapply(seq_along(rd$flipToAF), function(i) {
110 | if(xor(rd$flipToAF[i], rd$flipForRefBias[i])) {
111 | 1 - betas[rd$Probe_ID[i],]
112 | } else { betas[rd$Probe_ID[i],] }}))
113 | g <- g + WHeatmap(afs2, RightOf("b1", width=query_width),
114 | cmp=CMPar(stop.points=stops, dmin=0, dmax=1),
115 | name="b2", xticklabels=TRUE, xticklabels.n=ncol(betas))
116 | right <- "b2"
117 | } else { # in case target is not given, plot just the reference
118 | right <- "b1"
119 | }
120 |
121 | ## branch color bar (vertical)
122 | g <- g + WColorBarV(rd$BranchLong, RightOf(right, width=0.03),
123 | cmp=CMPar(label2color=md$strain.colors), name="bh")
124 | ## strain color bar (horizontal)
125 | g <- g + WColorBarH(cd$strain, TopOf("b1",height=0.03),
126 | cmp=CMPar(label2color=md$strain.colors), name="st")
127 | ## legends
128 | g <- g + WLegendV("st",
129 | TopRightOf("bh", just=c('left','top'), h.pad=0.02),
130 | height=0.03)
131 | ## g <- g + WLegendV('bh', Beneath(pad=0.06))
132 | g + WCustomize(mar.bottom=0.15, mar.right=0.06)
133 | }
134 |
--------------------------------------------------------------------------------
/R/match_design.R:
--------------------------------------------------------------------------------
1 |
2 | normalizeSetM <- function(input, ref, U) {
3 | bn <- normalize.quantiles.use.target(matrix(input), ref)
4 | U * bn / (1-bn)
5 | }
6 |
7 | calcMode <- function(x) {
8 | dd <- density(na.omit(x))
9 | dd$x[which.max(dd$y)]
10 | }
11 |
12 | valleyDescent <- function(x1, x2) {
13 |
14 | m1 <- calcMode(x1)
15 | m2 <- calcMode(x2)
16 | dd <- density(na.omit(c(x1, x2)))
17 | dfunc <- approxfun(dd$x, dd$y)
18 | lo <- min(m1, m2)
19 | hi <- max(m1, m2)
20 | va <- min(dfunc(c(x1[x1 >= lo & x1 <= hi], x2[x2 >= lo & x2 <= hi])),
21 | na.rm=TRUE)
22 | va / min(dfunc(c(lo, hi)), na.rm=TRUE)
23 | }
24 |
25 | match1To2_1state <- function(sdf) {
26 | dR <- noMasked(InfIR(sdf))
27 | bR <- getBetas(dR)
28 | dG <- noMasked(InfIG(sdf))
29 | bG <- getBetas(dG)
30 | d2 <- noMasked(InfII(sdf))
31 | b2 <- getBetas(d2)
32 |
33 | dG$MG <- normalizeSetM(bG, b2, dG$UG)
34 | dR$MR <- normalizeSetM(bR, b2, dR$UR)
35 | sdf2 <- rbind(dR, dG, d2)
36 | sdf2 <- rbind(sdf2, sdf[!(sdf$Probe_ID %in% sdf2$Probe_ID),])
37 | sdf2[order(sdf2$Probe_ID),]
38 | }
39 |
40 | match1To2_3states <- function(sdf) {
41 | dR <- noMasked(InfIR(sdf))
42 | bR <- getBetas(dR)
43 | dG <- noMasked(InfIG(sdf))
44 | bG <- getBetas(dG)
45 | d2 <- noMasked(InfII(sdf))
46 | b2 <- getBetas(d2)
47 |
48 | mR <- as.integer(betaMix3States(bR))
49 | mG <- as.integer(betaMix3States(bG))
50 | m2 <- as.integer(betaMix3States(b2))
51 |
52 | dR$MR[mR==1] <- normalizeSetM(bR[mR==1], b2[m2==1], dR$UR[mR==1])
53 | dR$MR[mR==2] <- normalizeSetM(bR[mR==2], b2[m2==2], dR$UR[mR==2])
54 | dR$MR[mR==3] <- normalizeSetM(bR[mR==3], b2[m2==3], dR$UR[mR==3])
55 | dG$MG[mG==1] <- normalizeSetM(bG[mG==1], b2[m2==1], dG$UG[mG==1])
56 | dG$MG[mG==2] <- normalizeSetM(bG[mG==2], b2[m2==2], dG$UG[mG==2])
57 | dG$MG[mG==3] <- normalizeSetM(bG[mG==3], b2[m2==3], dG$UG[mG==3])
58 | sdf2 <- rbind(dR, dG, d2)
59 | sdf2 <- rbind(sdf2, sdf[!(sdf$Probe_ID %in% sdf2$Probe_ID),])
60 | sdf2[order(sdf2$Probe_ID),]
61 | }
62 |
63 | #' normalize Infinium I probe betas to Infinium II
64 | #'
65 | #' This is designed to counter tail inflation in Infinium I probes.
66 | #'
67 | #' @param sdf SigDF
68 | #' @param min_dbeta the default algorithm perform 2-state
69 | #' quantile-normalization of the unmethylated and methylated modes
70 | #' separately. However, when the two modes are too close, we fall back
71 | #' to a one-mode normalization. The threshold defines the maximum
72 | #' inter-mode distance.
73 | #' @return SigDF
74 | #' @examples
75 | #'
76 | #' library(RPMM)
77 | #' sdf <- sesameDataGet("MM285.1.SigDF")
78 | #' sesameQC_plotBetaByDesign(sdf)
79 | #' sesameQC_plotBetaByDesign(matchDesign(sdf))
80 | #'
81 | #' @export
82 | matchDesign <- function(sdf, min_dbeta = 0.3) {
83 | dR <- noMasked(InfIR(sdf))
84 | dG <- noMasked(InfIG(sdf))
85 | d2 <- noMasked(InfII(sdf))
86 |
87 | b2 <- getBetas(d2)
88 | m2 <- as.integer(betaMix2States(b2))
89 |
90 | ## message(calcMode(b2[m2 == 1]), " ", calcMode(b2[m2 == 2]))
91 | ## message(valleyDescent(b2[m2 == 1], b2[m2 == 2]))
92 | if (sum(m2==1, na.rm=TRUE) > 100 &&
93 | sum(m2==2, na.rm=TRUE) > 100 &&
94 | abs(calcMode(b2[m2 == 1]) - calcMode(b2[m2 == 2])) > 0.7) {
95 | return(match1To2_3states(sdf)) }
96 |
97 | if (sum(m2==1, na.rm=TRUE) < 10 ||
98 | sum(m2==2, na.rm=TRUE) < 10 ||
99 | valleyDescent(b2[m2==1], b2[m2==2]) >= 0.8 ||
100 | abs(calcMode(b2[m2 == 1]) - calcMode(b2[m2 == 2])) < min_dbeta) {
101 | return(match1To2_1state(sdf)) }
102 |
103 | bR <- getBetas(dR, mask = FALSE)
104 | mR <- as.integer(betaMix2States(bR))
105 | bG <- getBetas(dG, mask = FALSE)
106 | mG <- as.integer(betaMix2States(bG))
107 |
108 | dR$MR[mR==1] <- normalizeSetM(bR[mR==1], b2[m2==1], dR$UR[mR==1])
109 | dR$MR[mR==2] <- normalizeSetM(bR[mR==2], b2[m2==2], dR$UR[mR==2])
110 | dG$MG[mG==1] <- normalizeSetM(bG[mG==1], b2[m2==1], dG$UG[mG==1])
111 | dG$MG[mG==2] <- normalizeSetM(bG[mG==2], b2[m2==2], dG$UG[mG==2])
112 | sdf2 <- rbind(dR, dG, d2)
113 | sdf2 <- rbind(sdf2, sdf[!(sdf$Probe_ID %in% sdf2$Probe_ID),])
114 | sdf2[order(sdf2$Probe_ID),]
115 | }
116 |
117 | betaMix2States <- function(x, n_samples = 10000, th_init = 0.5) {
118 | if (sum(!is.na(x)) > n_samples) {
119 | x1 <- sample(na.omit(x), n_samples)
120 | } else {
121 | x1 <- na.omit(x)
122 | }
123 | m <- matrix(0, nrow = length(x1), ncol = 2) # membership matrix
124 | m[x1 <= th_init, 1] <- 1
125 | m[x1 > th_init, 2] <- 1
126 |
127 | fitres <- RPMM::blc(
128 | matrix(x1), m, maxiter = 5, tol = 0.001, verbose = FALSE)
129 | m1 <- apply(fitres$w, 1, which.max)
130 | th <- mean(max(x1[m1 == 1]), min(x1[m1 == 2]))
131 | m2 <- cut(x, breaks=c(0, th, 1), include.lowest = TRUE)
132 | names(m2) <- names(x)
133 | m2
134 | }
135 |
136 | betaMix3States <- function(
137 | x, n_samples = 10000, th_init1 = 0.2, th_init2 = 0.7) {
138 |
139 | if (sum(!is.na(x)) > n_samples) {
140 | x1 <- sample(na.omit(x), n_samples)
141 | } else {
142 | x1 <- na.omit(x)
143 | }
144 | m <- matrix(0, nrow = length(x1), ncol = 3) # membership matrix
145 | m[x1 <= th_init1, 1] <- 1
146 | m[x1 > th_init1 & x1 <= th_init2, 2] <- 1
147 | m[x1 > th_init2, 3] <- 1
148 |
149 | fitres <- RPMM::blc(
150 | matrix(x1), m, maxiter = 5, tol = 0.001, verbose = FALSE)
151 | m1 <- apply(fitres$w, 1, which.max)
152 | th1 <- mean(max(x1[m1 == 1]), min(x1[m1 == 2]))
153 | th2 <- mean(max(x1[m1 == 2]), min(x1[m1 == 3]))
154 | m2 <- cut(x, breaks=c(0, th1, th2, 1), include.lowest = TRUE)
155 | names(m2) <- names(x)
156 | m2
157 | }
158 |
--------------------------------------------------------------------------------
/R/open.R:
--------------------------------------------------------------------------------
1 | #' List supported prepSesame functions
2 | #'
3 | #' @return a data frame with code, func, description
4 | #' @examples
5 | #' prepSesameList()
6 | #' @export
7 | prepSesameList <- function() {
8 | x <- data.frame(rbind(
9 | c("0", "resetMask", "Reset mask to all FALSE"),
10 | c("Q", "qualityMask", "Mask probes of poor design"),
11 | c("G", "prefixMaskButCG", "Mask all but cg- probes"),
12 | c("H", "prefixMaskButC", "Mask all but cg- and ch-probes"),
13 | c("C", "inferInfiniumIChannel", "Infer channel for Infinium-I probes"),
14 | c("D", "dyeBiasNL", "Dye bias correction (non-linear)"),
15 | c("E", "dyeBiasL", "Dye bias correction (linear)"),
16 | c("P", "pOOBAH", "Detection p-value masking using oob"),
17 | c("I", "ELBAR", "Mask background-dominated readings"),
18 | c("B", "noob", "Background subtraction using oob"),
19 | c("U", "scrub", "More aggressive background subtraction using scrub"),
20 | c("S", "inferSpecies", "Set species-specific mask"),
21 | c("T", "inferStrain", "Set strain-specific mask (mouse)"),
22 | c("M", "matchDesign", "Match Inf-I/II in beta distribution")))
23 | colnames(x) <- c("code", "func", "description")
24 | x
25 | }
26 |
27 | #' Apply a chain of sesame preprocessing functions in an arbitrary order
28 | #'
29 | #' Notes on the order of operation:
30 | #' 1. qualityMask and inferSpecies should go before noob and pOOBAH,
31 | #' otherwise the background is too high because of Multi,
32 | #' uk and other probes
33 | #' 2. dyeBias correction needs to happen early
34 | #' 3. channel inference before dyebias
35 | #' 4. noob should happen last, pOOBAH before noob because noob modifies oob
36 | #'
37 | #' @param sdf SigDF
38 | #' @param prep code that indicates preprocessing functions and their
39 | #' execution order (functions on the left is executed first).
40 | #' @param prep_args optional argument list to individual functions, e.g.,
41 | #' prepSesame(sdf, prep_args=list(Q=list(mask_names = "design_issue")))
42 | #' sets qualityMask(sdf, mask_names = "design_issue")
43 | #' @return SigDF
44 | #' @examples
45 | #' sdf <- sesameDataGet("MM285.1.SigDF")
46 | #' sdf1 <- prepSesame(sdf, "QCDPB")
47 | #' @export
48 | prepSesame <- function(sdf, prep = "QCDPB", prep_args = NULL) {
49 | cfuns <- prepSesameList()
50 |
51 | codes <- str_split(prep,"")[[1]]
52 | stopifnot(all(codes %in% cfuns$code))
53 | x <- sdf
54 | for(c1 in codes) {
55 | x <- do.call(get(cfuns[cfuns$code == c1, "func"]),
56 | c(list(x), prep_args[[c1]]))
57 | }
58 | x
59 | }
60 |
61 | wrap_openSesame1 <- function(func, ret, ...) {
62 | if (is.null(func)) {
63 | ret
64 | } else {
65 | func(ret, ...)
66 | }
67 | }
68 |
69 | wrap_openSesame <- function(x, ret) {
70 | if (all(vapply(ret, is.numeric, logical(1))) &&
71 | length(unique(vapply(ret, length, integer(1)))) == 1) {
72 | ## getBetas, getAFs, ...
73 | ret <- do.call(cbind, ret)
74 | if (is.null(colnames(ret)) &&
75 | is.character(x) && length(x) == ncol(ret)) {
76 | colnames(ret) <- basename(x)
77 | }
78 | ret
79 | } else { # others
80 | if (is.null(names(ret)) &&
81 | is.character(x) && length(x) == length(ret)) {
82 | names(ret) <- basename(x)
83 | }
84 | ret
85 |
86 | }
87 | }
88 |
89 | #' The openSesame pipeline
90 | #'
91 | #' This function is a simple wrapper of noob + nonlinear dye bias
92 | #' correction + pOOBAH masking.
93 | #'
94 | #' Please use mask=FALSE to turn off masking.
95 | #'
96 | #' If the input is an IDAT prefix or a \code{SigDF}, the output is
97 | #' the beta value numerics.
98 | #'
99 | #' @param x SigDF(s), IDAT prefix(es)
100 | #' @param prep preprocessing code, see ?prepSesame
101 | #' @param prep_args optional preprocessing argument list, see ?prepSesame
102 | #' @param manifest optional dynamic manifest
103 | #' @param func either getBetas or getAFs, if NULL, then return SigDF list
104 | #' @param platform optional platform string
105 | #' @param BPPARAM get parallel with MulticoreParam(n)
106 | #' @param min_beads minimum bead number, probes with R or G smaller than
107 | #' this threshold will be masked. If NULL, no filtering based on bead
108 | #' count will be applied. Default to 1.
109 | #' @param ... parameters to getBetas
110 | #' @return a numeric vector for processed beta values
111 | #' @import BiocParallel
112 | #' @examples
113 | #'
114 | #' in_dir <- system.file("extdata", "", package = "sesameData")
115 | #' betas <- openSesame(in_dir)
116 | #' ## or
117 | #' IDATprefixes <- searchIDATprefixes(in_dir)
118 | #' betas <- openSesame(IDATprefixes)
119 | #'
120 | #' @export
121 | openSesame <- function(
122 | x, prep = "QCDPB", prep_args = NULL, manifest = NULL,
123 | func = getBetas, BPPARAM=SerialParam(), platform = "",
124 | min_beads = 1, ...) {
125 |
126 | ## expand if a directory
127 | if (length(x) == 1 && is(x, 'character') && dir.exists(x)) {
128 | x <- searchIDATprefixes(x)
129 | }
130 |
131 | if (is(x, "SigDF")) {
132 | wrap_openSesame1(func, prepSesame(x, prep, prep_args), ...)
133 | } else if (is(x, 'character')) {
134 | if (length(x) == 1) {
135 | wrap_openSesame1(func, prepSesame(readIDATpair(
136 | x, platform = platform, manifest = manifest,
137 | min_beads = min_beads), prep, prep_args), ...)
138 | } else { # multiple IDAT prefixes / SigDFs
139 | wrap_openSesame(x, bplapply(x, openSesame,
140 | platform = platform, prep = prep, prep_args = prep_args,
141 | func = func, manifest = manifest, BPPARAM=BPPARAM, ...))
142 | }
143 | } else if (is(x, "list") && is(x[[1]], "SigDF")) {
144 | wrap_openSesame(x, bplapply(x, openSesame,
145 | platform = platform, prep = prep, prep_args = prep_args,
146 | fun = func, manifest = manifest, BPPARAM=BPPARAM, ...))
147 | } else {
148 | stop("Unsupported input")
149 | }
150 | }
151 |
152 |
--------------------------------------------------------------------------------
/R/species.R:
--------------------------------------------------------------------------------
1 | speciesInfo <- function(addr, species) {
2 | res <- addr$species[[species]]
3 | res[c("scientificName", "taxonID", "commonName", "assembly")]
4 | }
5 |
6 | #' Set color and mask using strain/species-specific manifest
7 | #'
8 | #' also sets attr(,"species")
9 | #'
10 | #' @param sdf a \code{SigDF}
11 | #' @param species the species the sample is considered to be
12 | #' @param strain the strain the sample is considered to be
13 | #' @param addr species-specific address species, optional
14 | #' @param verbose print more messages
15 | #' @return a \code{SigDF} with updated color channel and mask
16 | #' @examples
17 | #' sdf <- sesameDataGet('Mammal40.1.SigDF')
18 | #' sdf_mouse <- updateSigDF(sdf, species="mus_musculus")
19 | #'
20 | #' @export
21 | updateSigDF <- function(
22 | sdf, species = NULL, strain = NULL, addr = NULL, verbose = FALSE) {
23 |
24 | if (!is.null(species)) {
25 | if (is.null(addr)) {
26 | addr <- sesameDataGet(sprintf(
27 | "%s.addressSpecies", sdfPlatform(sdf, verbose = verbose)))
28 | }
29 | stopifnot(species %in% names(addr$species))
30 | addrS <- addr$species[[species]]
31 | sdf <- sdfMsg(sdf, verbose, "Update using species: %s", species)
32 | } else if (!is.null(strain)) {
33 | if (is.null(addr)) {
34 | addr <- sesameDataGet(sprintf(
35 | "%s.addressStrain", sdfPlatform(sdf, verbose = verbose))) }
36 | stopifnot(strain %in% names(addr$strain))
37 | addrS <- addr$strain[[strain]]
38 | sdf <- sdfMsg(sdf, verbose, "Update using strain: %s", strain)
39 | } else {
40 | stop("Please specify a species or strain.")
41 | }
42 |
43 | ## set color
44 | m <- match(sdf$Probe_ID, addr$ordering$Probe_ID)
45 | ## matched Inf-I probes with non-NA value
46 | ## (NA can be mapping issues)
47 | m_idx <- (!is.na(m)) & !is.na(addrS$col[m]) & (sdf$col != "2")
48 | nc <- as.character(addrS$col[m[m_idx]])
49 | nc[is.na(nc)] <- '2'
50 | sdf$col[m_idx] <- factor(nc, levels=c("G","R","2"))
51 |
52 | ## add mask
53 | sdf$mask <- sdf$mask | (!is.na(m) & addrS$mask[m])
54 | sdf
55 | }
56 |
57 | species_ret <- function(
58 | return.auc, return.species, species, auc, sdf, addr, verbose) {
59 | if (return.auc){
60 | auc
61 | } else if (return.species) {
62 | speciesInfo(addr, species)
63 | } else {
64 | updateSigDF(sdf, species=species, addr=addr, verbose=verbose)
65 | }
66 | }
67 |
68 | #' Infer Species
69 | #'
70 | #' We infer species based on probes pvalues and alignment score.
71 | #' AUC was calculated for each specie, y_true is 1 or 0
72 | #' for pval < threshold.pos or pval > threshold.neg, respeceively,
73 | #'
74 | #' @param sdf a \code{SigDF}
75 | #' @param topN Top n positive and negative probes used to infer species.
76 | #' increase this number can sometimes improve accuracy (DEFAULT: 1000)
77 | #' @param threshold.pos pvalue < threshold.pos are considered positive
78 | #' (default: 0.01).
79 | #' @param threshold.neg pvalue > threshold.neg are considered negative
80 | #' (default: 0.2).
81 | #' @param return.auc return AUC calculated, override return.species
82 | #' @param return.species return a string to represent species
83 | #' @param verbose print more messaeges
84 | #' @return a SigDF
85 | #' @examples
86 | #' sdf <- sesameDataGet("MM285.1.SigDF")
87 | #' sdf <- inferSpecies(sdf)
88 | #'
89 | #' ## all available species
90 | #' all_species <- names(sesameDataGet(sprintf(
91 | #' "%s.addressSpecies", sdfPlatform(sdf)))$species)
92 | #'
93 | #' @export
94 | inferSpecies <- function(sdf, topN = 1000, threshold.pos = 0.01,
95 | threshold.neg = 0.1, return.auc = FALSE, return.species = FALSE,
96 | verbose = FALSE) {
97 |
98 | addr <- sesameDataGet(sprintf(
99 | "%s.addressSpecies", sdfPlatform(sdf, verbose = verbose)))
100 | df_as <- do.call(cbind, lapply(addr$species, function(x) x$AS))
101 | rownames(df_as) <- addr$ordering$Probe_ID
102 | pvalue <- pOOBAH(sdf, return.pval=TRUE)
103 | pvalue <- pvalue[intersect(names(pvalue),rownames(df_as))] # shared probes
104 | pos_probes <- sort(pvalue[pvalue <= threshold.pos],decreasing = FALSE)
105 | neg_probes <- sort(pvalue[pvalue >= threshold.neg],decreasing = TRUE)
106 | success.rate <- length(pvalue[pvalue<=0.05]) / length(pvalue)
107 |
108 | ## keep the same number of positive and negative probes.
109 | topN1 <- min(length(neg_probes),length(pos_probes), topN)
110 | pos <- pos_probes[seq_len(topN1)]
111 | neg <- neg_probes[seq_len(topN1)]
112 |
113 | y_true <- structure(c( # y_true = 1 for pos and y_true = 0 for neg
114 | rep(TRUE,length(pos)),rep(FALSE,length(neg))),
115 | names = c(names(pos), names(neg)))
116 |
117 | if (length(y_true) == 0){
118 | warning("Lack of useful signal. Use reference.")
119 | return(species_ret(return.auc, return.species,
120 | addr$reference, NULL, sdf, addr, verbose)) }
121 |
122 | n1 <- as.numeric(sum(y_true))
123 | n2 <- as.numeric(sum(!y_true))
124 | df_as <- df_as[names(y_true),,drop = FALSE]
125 | ## df_as[df_as < 35] <- 35 # all under 35 is qualitatively the same
126 | auc <- vapply(colnames(df_as),function(s) {
127 | R1 <- sum(rank(df_as[,s])[seq_along(pos)])
128 | U1 <- R1 - n1 * (n1 + 1)/2
129 | U1/(n1 * n2)}, numeric(1))
130 |
131 | ## the following is a empirical ladder where one is going to call
132 | ## reference for lack of negative probes
133 | if (success.rate >= 0.95 || (success.rate >= 0.80 && max(auc) < 0.50)) {
134 | sdf <- sdfMsg(sdf, verbose, "Lack of negative probes. Use reference.")
135 | species <- addr$reference
136 | } else { species <- names(which.max(auc)) }
137 |
138 | species_ret(return.auc, return.species, species, auc, sdf, addr, verbose)
139 | }
140 |
141 | #' Map the SDF (from overlap array platforms)
142 | #' Replicates are merged by picking the best detection
143 | #'
144 | #' @param sdf a \code{SigDF} object
145 | #' @return a named numeric vector for beta values
146 | #' @examples
147 | #' sdf <- sesameDataGet("Mammal40.1.SigDF")
148 | #' betas <- mapToMammal40(sdf[1:10,])
149 | #' @export
150 | mapToMammal40 <- function(sdf) {
151 | addr <- sesameDataGet("Mammal40.address")
152 | betas <- getBetas(sdf, collapseToPfx = TRUE)[addr$ordering$Probe_ID]
153 | names(betas) <- addr$ordering$Probe_ID
154 | betas
155 | }
156 |
--------------------------------------------------------------------------------
/R/tissue.R:
--------------------------------------------------------------------------------
1 | #' Compare array data with references (e.g., tissue, cell types)
2 | #'
3 | #' @param ref the reference beta values in SummarizedExperiment.
4 | #' One can download them from the sesameData package. See examples.
5 | #' @param betas matrix of betas for the target sample
6 | #' This argument is optional. If not given, only the reference will be shown.
7 | #' @param stop.points stop points for the color palette.
8 | #' Default to blue, yellow.
9 | #' @param query_width the width of the query beta value matrix
10 | #' @param show_sample_names whether to show sample names (default: FALSE)
11 | #' @return grid object that contrast the target sample with
12 | #' references.
13 | #' @export
14 | #' @examples
15 | #'
16 | #' sesameDataCache() # if not done yet
17 | #' compareReference(sesameDataGet("MM285.tissueSignature"))
18 | #' sesameDataGet_resetEnv()
19 | #'
20 | #' @importFrom SummarizedExperiment assay
21 | #' @importFrom SummarizedExperiment colData
22 | #' @importFrom SummarizedExperiment rowData
23 | compareReference <- function(
24 | ref, betas = NULL, stop.points = NULL, query_width=0.3,
25 | show_sample_names = FALSE) {
26 |
27 | if (is.null(stop.points)) { stop.points <- c("blue","yellow") }
28 |
29 | cd <- as_tibble(colData(ref))
30 | rd <- as_tibble(rowData(ref))
31 | md <- metadata(ref)
32 | if (!is.null(betas) && is.null(dim(betas))) { # in case a vector
33 | betas <- cbind(betas)
34 | }
35 |
36 | ## reference
37 | g <- WHeatmap(assay(ref), cmp=CMPar(stop.points=stop.points,
38 | dmin=0, dmax=1), xticklabels = show_sample_names, name="b1")
39 | ## query samples
40 | if (!is.null(betas)) {
41 | g <- g + WHeatmap(betas[rd$Probe_ID,], RightOf("b1", width=query_width),
42 | cmp=CMPar(stop.points=stop.points, dmin=0, dmax=1),
43 | name="b2", xticklabels = show_sample_names,
44 | xticklabels.n=ncol(betas))
45 | right <- "b2"
46 | } else { # in case target is not given, plot just the reference
47 | right <- "b1"
48 | }
49 | ## branch color bar (vertical)
50 | g <- g + WColorBarV(rd$branch, RightOf(right, width=0.03),
51 | cmp=CMPar(label2color=md$branch_color), name="bh")
52 | ## tissue color bar (horizontal), branch should be replaced by CellType
53 | g <- g + WColorBarH(cd$branch, TopOf("b1",height=0.03),
54 | cmp=CMPar(label2color=md$branch_color), name="ti")
55 | ## legends
56 | g <- g + WLegendV("ti", TopRightOf("bh", just=c('left','top'), h.pad=0.02),
57 | height=0.02)
58 | g + WCustomize(mar.bottom=0.15, mar.right=0.06)
59 | }
60 |
61 | #' Compare mouse array data with mouse tissue references
62 | #'
63 | #' @param betas matrix of betas for the target sample
64 | #' This argument is optional. If not given, only the reference will be shown.
65 | #' @param ref the reference beta values in SummarizedExperiment.
66 | #' This argument is optional. If not given, the reference will be downloaded
67 | #' from the sesameData package.
68 | #' @param color either blueYellow or fullJet
69 | #' @param query_width the width of the query beta value matrix
70 | #' @return grid object that contrast the target sample with
71 | #' pre-built mouse tissue reference
72 | #' @export
73 | #' @examples
74 | #' cat("Deprecated, see compareReference")
75 | #' @importFrom SummarizedExperiment assay
76 | #' @importFrom SummarizedExperiment colData
77 | #' @importFrom SummarizedExperiment rowData
78 | compareMouseTissueReference <- function(
79 | betas=NULL, ref=NULL, color="blueYellow", query_width=0.3) {
80 | .Deprecated("compareReference")
81 | }
82 |
83 | #' inferTissue infers the tissue of a single sample (as identified through
84 | #' the branchIDs in the row data of the reference) by reporting independent
85 | #' composition through cell type deconvolution.
86 | #'
87 | #' @param betas Named vector with probes and their corresponding beta value
88 | #' measurement
89 | #' @param reference Summarized Experiment with either hypomethylated or
90 | #' hypermethylated probe selection (row data), sample selection (column data),
91 | #' meta data, and the betas (assay)
92 | #' @param platform String representing the array type of the betas and
93 | #' reference
94 | #' @param abs_delta_beta_min Numerical value indicating the absolute minimum
95 | #' required delta beta for the probe selection criteria
96 | #' @param auc_min Numeric value corresponding to the minimum AUC value
97 | #' required for a probe to be considered
98 | #' @param coverage_min Numeric value corresponding to the minimum coverage
99 | #' requirement for a probe to be considered. Coverage is defined here as the
100 | #' proportion of samples without an NA value at a given probe.
101 | #' @param topN number of probes to at most use for each branch
102 | #'
103 | #' @return inferred tissue as a string
104 | #' @examples
105 | #' sesameDataCache() # if not done yet
106 | #' sdf <- sesameDataGet("MM285.1.SigDF")
107 | #' inferTissue(getBetas(dyeBiasNL(noob(sdf))))
108 | #'
109 | #' sesameDataGet_resetEnv()
110 | #'
111 | #' @export
112 | inferTissue <- function(betas, reference = NULL, platform = NULL,
113 | abs_delta_beta_min = 0.3, auc_min = 0.99, coverage_min = 0.80, topN = 15) {
114 |
115 | stopifnot(is.numeric(betas))
116 |
117 | if (is.null(reference)) {
118 | if (is.null(platform)) {
119 | platform <- inferPlatformFromProbeIDs(names(betas))
120 | }
121 | stopifnot(platform %in% c("MM285")) # TODO: add human
122 | reference <- sesameDataGet(sprintf("%s.tissueSignature", platform))
123 | }
124 |
125 | rd <- rowData(reference)
126 | fracs <- sort(vapply(unique(rd$branch), function(branch) {
127 | rd1 <- rd[
128 | rd$branch == branch & abs(rd$delta_beta) >= abs_delta_beta_min, ]
129 |
130 | rd1 <- head(rd1[order(-abs(rd1$delta_beta)), ], n = topN)
131 |
132 | fracs1 <- c(1 - betas[rd1[rd1$delta_beta < 0, "Probe_ID"]],
133 | betas[rd1[rd1$delta_beta > 0, "Probe_ID"]])
134 |
135 | mean(fracs1, na.rm = TRUE)
136 | }, numeric(1)), decreasing = TRUE)
137 | sprintf("[%s](%1.1f) [%s](%1.1f)",
138 | names(fracs)[1], fracs[1], names(fracs)[2], fracs[2])
139 |
140 | ## results <- results[!(names(results) %in% ignore_branches)]
141 | ## cd <- meta[match(colnames(results), meta$betas),]
142 | ## se <- SummarizedExperiment(assays=list(results=results), colData=cd)
143 | ## metadata(se)$tissue_color <- metadata(reference)$tissue_color
144 | ## metadata(se)$branchID_color <- metadata(reference)$branchID_color
145 | ## se
146 | }
147 |
148 |
149 |
--------------------------------------------------------------------------------