├── LICENSE ├── .travis.yml ├── tests ├── testthat.R └── testthat │ ├── test_detectionP.R │ └── test_impute.R ├── vignettes ├── include │ ├── logo.png │ ├── header.html │ ├── after_body.html │ └── site.css ├── _output.yaml ├── _site.yml └── QC.Rmd ├── NEWS ├── inst ├── extdata │ └── GSE36369_NonEBV_SignalA_SignalB_3samples_1k.txt.gz └── CITATION ├── .Rbuildignore ├── man ├── negControls.Rd ├── liftOver.Rd ├── SDFcollapseToPfx.Rd ├── prepSesameList.Rd ├── sesameQC-class.Rd ├── segmentBins.Rd ├── dataFrame2sesameQC.Rd ├── prefixMaskButC.Rd ├── prefixMaskButCG.Rd ├── MValueToBetaValue.Rd ├── binSignals.Rd ├── sesame_checkVersion.Rd ├── recommendedMaskNames.Rd ├── print.fileSet.Rd ├── getAFs.Rd ├── noMasked.Rd ├── listAvailableMasks.Rd ├── controls.Rd ├── BetaValueToMValue.Rd ├── palgen.Rd ├── dmContrasts.Rd ├── getBinCoordinates.Rd ├── normControls.Rd ├── sesameQCtoDF.Rd ├── sesamize.Rd ├── signalMU.Rd ├── sdfPlatform.Rd ├── sdf_write_table.Rd ├── resetMask.Rd ├── mapToMammal40.Rd ├── addMask.Rd ├── sesameAnno_buildAddressFile.Rd ├── SigDF.Rd ├── prefixMask.Rd ├── sesameAnno_readManifestTSV.Rd ├── setMask.Rd ├── sesameQC_plotRedGrnQQ.Rd ├── summaryExtractTest.Rd ├── print.DMLSummary.Rd ├── scrub.Rd ├── sesameQC_plotHeatSNPs.Rd ├── scrubSoft.Rd ├── calcEffectSize.Rd ├── predictAgeHorvath353.Rd ├── medianTotalIntensity.Rd ├── predictAgeSkinBlood.Rd ├── probeID_designType.Rd ├── sesameQC_getStats.Rd ├── diffRefSet.Rd ├── initFileSet.Rd ├── totalIntensities.Rd ├── dyeBiasL.Rd ├── inferEthnicity.Rd ├── sdf_read_table.Rd ├── sesameQC_plotBar.Rd ├── dyeBiasCorr.Rd ├── getAFTypeIbySumAlleles.Rd ├── checkLevels.Rd ├── updateSigDF.Rd ├── openSesameToFile.Rd ├── predictMouseAgeInMonth.Rd ├── compareMouseStrainReference.Rd ├── getMask.Rd ├── imputeBetasMatrixByMean.Rd ├── probeSuccessRate.Rd ├── matchDesign.Rd ├── readFileSet.Rd ├── detectionPnegEcdf.Rd ├── sesameQC_rankStats.Rd ├── sesameQC_plotBetaByDesign.Rd ├── sesameAnno_attachManifest.Rd ├── mapFileSet.Rd ├── meanIntensity.Rd ├── reIdentify.Rd ├── dyeBiasCorrMostBalanced.Rd ├── imputeBetas.Rd ├── imputeBetasByGenomicNeighbors.Rd ├── sesameAnno_buildManifestGRanges.Rd ├── betasCollapseToPfx.Rd ├── chipAddressToSignal.Rd ├── getRefSet.Rd ├── noob.Rd ├── getBetas.Rd ├── compareMouseTissueReference.Rd ├── qualityMask.Rd ├── parseGEOsignalMU.Rd ├── visualizeSegments.Rd ├── inferStrain.Rd ├── ELBAR.Rd ├── inferInfiniumIChannel.Rd ├── sesameQC_calcStats.Rd ├── deidentify.Rd ├── dyeBiasNL.Rd ├── twoCompsEst2.Rd ├── prepSesame.Rd ├── sliceFileSet.Rd ├── readIDATpair.Rd ├── searchIDATprefixes.Rd ├── formatVCF.Rd ├── inferSex.Rd ├── sesame-package.Rd ├── createUCSCtrack.Rd ├── pOOBAH.Rd ├── sesameAnno_download.Rd ├── sesameQC_plotIntensVsBetas.Rd ├── compareReference.Rd ├── convertProbeID.Rd ├── assemble_plots.Rd ├── estimateLeukocyte.Rd ├── visualizeGene.Rd ├── inferSpecies.Rd ├── bisConversionControl.Rd ├── visualizeProbes.Rd ├── DML.Rd ├── openSesame.Rd ├── predictAge.Rd ├── inferTissue.Rd ├── DMLpredict.Rd ├── visualizeRegion.Rd ├── cnSegmentation.Rd ├── DMR.Rd └── mLiftOver.Rd ├── R ├── zzz.R ├── ethnicity.R ├── palgen.R ├── GEO.R ├── track.R ├── feature_selection.R ├── channel_inference.R ├── utils.R ├── vcf.R ├── age.R ├── impute.R ├── deidentify.R ├── strain.R ├── match_design.R ├── open.R ├── species.R └── tissue.R ├── .gitignore ├── LICENSE.md ├── README.md ├── DESCRIPTION └── NAMESPACE /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2024 2 | COPYRIGHT HOLDER: Wanding Zhou 3 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: r 2 | 3 | cache: packages 4 | 5 | r: bioc-devel -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(sesame) 3 | 4 | test_check("sesame") 5 | -------------------------------------------------------------------------------- /vignettes/include/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zwdzwd/sesame/HEAD/vignettes/include/logo.png -------------------------------------------------------------------------------- /NEWS: -------------------------------------------------------------------------------- 1 | CHANGES IN VERSION 1.0.0 2 | ------------------------- 3 | 4 | o First submission of SeSAMe package. -------------------------------------------------------------------------------- /inst/extdata/GSE36369_NonEBV_SignalA_SignalB_3samples_1k.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zwdzwd/sesame/HEAD/inst/extdata/GSE36369_NonEBV_SignalA_SignalB_3samples_1k.txt.gz -------------------------------------------------------------------------------- /tests/testthat/test_detectionP.R: -------------------------------------------------------------------------------- 1 | context("detectionP") 2 | test_that("test='detectionP' gives correct errors", { 3 | sdf <- sesameDataGet("EPIC.1.SigDF") 4 | expect_is(pOOBAH(sdf), "SigDF") 5 | }) 6 | 7 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^Meta$ 2 | ^doc$ 3 | ^.*\.Rproj$ 4 | ^\.Rproj\.user$ 5 | ## Ignore travis config file 6 | ^\.travis\.yml$ 7 | ^\.git.* 8 | ^\..Rcheck$ 9 | ^inst/data$ 10 | .DS_Store 11 | ^.Renv-version$ 12 | ^LICENSE\.md$ 13 | -------------------------------------------------------------------------------- /vignettes/_output.yaml: -------------------------------------------------------------------------------- 1 | html_document: 2 | self_contained: true 3 | number_sections: no 4 | theme: flatly 5 | highlight: zenburn 6 | mathjax: null 7 | toc: true 8 | toc_float: 9 | collapsed: false 10 | toc_depth: 3 11 | df_print: paged 12 | css: include/site.css 13 | includes: 14 | in_header: include/header.html 15 | after_body: include/after_body.html 16 | 17 | -------------------------------------------------------------------------------- /man/negControls.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/detection.R 3 | \name{negControls} 4 | \alias{negControls} 5 | \title{get negative control signal} 6 | \usage{ 7 | negControls(sdf) 8 | } 9 | \arguments{ 10 | \item{sdf}{a SigDF} 11 | } 12 | \value{ 13 | a data frame of negative control signals 14 | } 15 | \description{ 16 | get negative control signal 17 | } 18 | -------------------------------------------------------------------------------- /man/liftOver.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mLiftOver.R 3 | \name{liftOver} 4 | \alias{liftOver} 5 | \title{liftOver, see mLiftOver (renamed)} 6 | \usage{ 7 | liftOver(...) 8 | } 9 | \arguments{ 10 | \item{...}{see mLiftOver} 11 | } 12 | \value{ 13 | imputed data, vector, matrix, SigDF(s) 14 | } 15 | \description{ 16 | liftOver, see mLiftOver (renamed) 17 | } 18 | -------------------------------------------------------------------------------- /man/SDFcollapseToPfx.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sesame.R 3 | \name{SDFcollapseToPfx} 4 | \alias{SDFcollapseToPfx} 5 | \title{collapse to probe prefix} 6 | \usage{ 7 | SDFcollapseToPfx(sdf) 8 | } 9 | \arguments{ 10 | \item{sdf}{a SigDF object} 11 | } 12 | \value{ 13 | a data frame with updated Probe_ID 14 | } 15 | \description{ 16 | collapse to probe prefix 17 | } 18 | -------------------------------------------------------------------------------- /man/prepSesameList.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/open.R 3 | \name{prepSesameList} 4 | \alias{prepSesameList} 5 | \title{List supported prepSesame functions} 6 | \usage{ 7 | prepSesameList() 8 | } 9 | \value{ 10 | a data frame with code, func, description 11 | } 12 | \description{ 13 | List supported prepSesame functions 14 | } 15 | \examples{ 16 | prepSesameList() 17 | } 18 | -------------------------------------------------------------------------------- /man/sesameQC-class.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/QC.R 3 | \docType{class} 4 | \name{sesameQC-class} 5 | \alias{sesameQC-class} 6 | \title{An S4 class to hold QC statistics} 7 | \value{ 8 | sesameQC object 9 | } 10 | \description{ 11 | An S4 class to hold QC statistics 12 | } 13 | \section{Slots}{ 14 | 15 | \describe{ 16 | \item{\code{stat}}{a list to store qc stats} 17 | }} 18 | 19 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | .onAttach <- function(libname, pkgname) { 2 | packageStartupMessage(' 3 | ---------------------------------------------------------- 4 | | SEnsible Step-wise Analysis of DNA MEthylation (SeSAMe) 5 | | -------------------------------------------------------- 6 | | Please cache auxiliary data by "sesameDataCache()". 7 | | This needs to be done only once per SeSAMe installation. 8 | ---------------------------------------------------------- 9 | ') 10 | } 11 | -------------------------------------------------------------------------------- /man/segmentBins.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cnv.R 3 | \name{segmentBins} 4 | \alias{segmentBins} 5 | \title{Segment bins using DNAcopy} 6 | \usage{ 7 | segmentBins(bin.signals, bin.coords) 8 | } 9 | \arguments{ 10 | \item{bin.signals}{bin signals (input)} 11 | 12 | \item{bin.coords}{bin coordinates} 13 | } 14 | \value{ 15 | segment signal data frame 16 | } 17 | \description{ 18 | Segment bins using DNAcopy 19 | } 20 | -------------------------------------------------------------------------------- /man/dataFrame2sesameQC.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/QC.R 3 | \name{dataFrame2sesameQC} 4 | \alias{dataFrame2sesameQC} 5 | \title{Convert data frame to sesameQC object} 6 | \usage{ 7 | dataFrame2sesameQC(df) 8 | } 9 | \arguments{ 10 | \item{df}{a publicQC data frame} 11 | } 12 | \value{ 13 | a list sesameQC objects 14 | } 15 | \description{ 16 | The function convert a data frame back to a list of sesameQC objects 17 | } 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | Meta 2 | doc 3 | # History files 4 | .Rhistory 5 | .Rapp.history 6 | 7 | # Session Data files 8 | .RData 9 | # Example code in package build process 10 | *-Ex.R 11 | # RStudio files 12 | .Rproj.user/ 13 | # produced vignettes 14 | vignettes/*.html 15 | vignettes/*.pdf 16 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 17 | .httr-oauth 18 | .Rproj.user 19 | /doc/ 20 | /Meta/ 21 | .DS_Store 22 | vignettes/_site 23 | vignettes/.DS_Store 24 | inst/.DS_Store 25 | -------------------------------------------------------------------------------- /man/prefixMaskButC.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mask.R 3 | \name{prefixMaskButC} 4 | \alias{prefixMaskButC} 5 | \title{Mask all but C probes in SigDF} 6 | \usage{ 7 | prefixMaskButC(sdf) 8 | } 9 | \arguments{ 10 | \item{sdf}{SigDF} 11 | } 12 | \value{ 13 | SigDF 14 | } 15 | \description{ 16 | Mask all but C probes in SigDF 17 | } 18 | \examples{ 19 | sdf <- resetMask(sesameDataGet("MM285.1.SigDF")) 20 | sum(prefixMaskButC(sdf)$mask) 21 | } 22 | -------------------------------------------------------------------------------- /man/prefixMaskButCG.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mask.R 3 | \name{prefixMaskButCG} 4 | \alias{prefixMaskButCG} 5 | \title{Mask all but CG probes in SigDF} 6 | \usage{ 7 | prefixMaskButCG(sdf) 8 | } 9 | \arguments{ 10 | \item{sdf}{SigDF} 11 | } 12 | \value{ 13 | SigDF 14 | } 15 | \description{ 16 | Mask all but CG probes in SigDF 17 | } 18 | \examples{ 19 | sdf <- resetMask(sesameDataGet("MM285.1.SigDF")) 20 | sum(prefixMaskButCG(sdf)$mask) 21 | } 22 | -------------------------------------------------------------------------------- /tests/testthat/test_impute.R: -------------------------------------------------------------------------------- 1 | context("impute") 2 | 3 | test_that("Impute mean functions properly", { 4 | mx <- cbind(a = c(NA, 2, 3, 4), b = c(1, 6, 5, NA), c = c(3, 6, 7, 8)) 5 | mx_imputed_cols <- imputeBetasMatrixByMean(mx, axis = 2) 6 | mx_imputed_rows <- imputeBetasMatrixByMean(mx, axis = 1) 7 | expect_true(mx_imputed_cols[1,1] == 3) 8 | expect_true(mx_imputed_cols[4,2] == 4) 9 | expect_true(mx_imputed_rows[1,1] == 2) 10 | expect_true((mx_imputed_rows[4,2]) == 6) 11 | }) 12 | -------------------------------------------------------------------------------- /man/MValueToBetaValue.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{MValueToBetaValue} 4 | \alias{MValueToBetaValue} 5 | \title{Convert M-value to beta-value} 6 | \usage{ 7 | MValueToBetaValue(m) 8 | } 9 | \arguments{ 10 | \item{m}{a vector of M values} 11 | } 12 | \value{ 13 | a vector of beta values 14 | } 15 | \description{ 16 | Convert M-value to beta-value (aka inverse logit transform) 17 | } 18 | \examples{ 19 | MValueToBetaValue(c(-3, 0, 3)) 20 | } 21 | -------------------------------------------------------------------------------- /man/binSignals.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cnv.R 3 | \name{binSignals} 4 | \alias{binSignals} 5 | \title{Bin signals from probe signals} 6 | \usage{ 7 | binSignals(probe.signals, bin.coords, probeCoords) 8 | } 9 | \arguments{ 10 | \item{probe.signals}{probe signals} 11 | 12 | \item{bin.coords}{bin coordinates} 13 | 14 | \item{probeCoords}{probe coordinates} 15 | } 16 | \value{ 17 | bin signals 18 | } 19 | \description{ 20 | require GenomicRanges 21 | } 22 | -------------------------------------------------------------------------------- /man/sesame_checkVersion.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{sesame_checkVersion} 4 | \alias{sesame_checkVersion} 5 | \title{Check SeSAMe versions} 6 | \usage{ 7 | sesame_checkVersion() 8 | } 9 | \value{ 10 | print the version of sesame, sesameData, biocondcutor and R 11 | } 12 | \description{ 13 | print package verison of sesame and depended packages to help troubleshoot 14 | installation issues. 15 | } 16 | \examples{ 17 | sesame_checkVersion() 18 | } 19 | -------------------------------------------------------------------------------- /man/recommendedMaskNames.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mask.R 3 | \name{recommendedMaskNames} 4 | \alias{recommendedMaskNames} 5 | \title{Recommended mask names for each Infinium platform} 6 | \usage{ 7 | recommendedMaskNames() 8 | } 9 | \value{ 10 | a named list of mask names 11 | } 12 | \description{ 13 | The returned name is the db name used in KYCG.mask 14 | } 15 | \examples{ 16 | recommendedMaskNames()[["EPICv2"]] 17 | recommendedMaskNames()[["EPIC"]] 18 | 19 | } 20 | -------------------------------------------------------------------------------- /man/print.fileSet.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fileSet.R 3 | \name{print.fileSet} 4 | \alias{print.fileSet} 5 | \title{Print a fileSet} 6 | \usage{ 7 | \method{print}{fileSet}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{a sesame::fileSet} 11 | 12 | \item{...}{stuff for print} 13 | } 14 | \value{ 15 | string representation 16 | } 17 | \description{ 18 | Print a fileSet 19 | } 20 | \examples{ 21 | 22 | fset <- initFileSet('mybetas2', 'HM27', c('s1','s2')) 23 | fset 24 | 25 | } 26 | -------------------------------------------------------------------------------- /man/getAFs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sesame.R 3 | \name{getAFs} 4 | \alias{getAFs} 5 | \title{Get allele frequency} 6 | \usage{ 7 | getAFs(sdf, ...) 8 | } 9 | \arguments{ 10 | \item{sdf}{\code{SigDF}} 11 | 12 | \item{...}{additional options to getBetas} 13 | } 14 | \value{ 15 | allele frequency 16 | } 17 | \description{ 18 | Get allele frequency 19 | } 20 | \examples{ 21 | sesameDataCache() # if not done yet 22 | sdf <- sesameDataGet('EPIC.1.SigDF') 23 | af <- getAFs(sdf) 24 | } 25 | -------------------------------------------------------------------------------- /vignettes/include/header.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 11 | 12 | -------------------------------------------------------------------------------- /man/noMasked.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SigDFMethods.R 3 | \name{noMasked} 4 | \alias{noMasked} 5 | \title{remove masked probes from SigDF} 6 | \usage{ 7 | noMasked(sdf) 8 | } 9 | \arguments{ 10 | \item{sdf}{input SigDF object} 11 | } 12 | \value{ 13 | a SigDF object without masked probes 14 | } 15 | \description{ 16 | remove masked probes from SigDF 17 | } 18 | \examples{ 19 | sesameDataCache() 20 | sdf <- sesameDataGet("EPIC.1.SigDF") 21 | sdf <- pOOBAH(sdf) 22 | 23 | sdf_noMasked <- noMasked(sdf) 24 | 25 | } 26 | -------------------------------------------------------------------------------- /man/listAvailableMasks.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mask.R 3 | \name{listAvailableMasks} 4 | \alias{listAvailableMasks} 5 | \title{list existing quality masks for a SigDF} 6 | \usage{ 7 | listAvailableMasks(platform, verbose = FALSE) 8 | } 9 | \arguments{ 10 | \item{platform}{EPIC, MM285, HM450 etc} 11 | 12 | \item{verbose}{print more messages} 13 | } 14 | \value{ 15 | a tibble of masks 16 | } 17 | \description{ 18 | list existing quality masks for a SigDF 19 | } 20 | \examples{ 21 | listAvailableMasks("EPICv2") 22 | } 23 | -------------------------------------------------------------------------------- /man/controls.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SigDFMethods.R 3 | \name{controls} 4 | \alias{controls} 5 | \title{get the controls attributes} 6 | \usage{ 7 | controls(sdf, verbose = FALSE) 8 | } 9 | \arguments{ 10 | \item{sdf}{a \code{SigDF}} 11 | 12 | \item{verbose}{print more messages} 13 | } 14 | \value{ 15 | the controls data frame 16 | } 17 | \description{ 18 | get the controls attributes 19 | } 20 | \examples{ 21 | sesameDataCache() # if not done yet 22 | sdf <- sesameDataGet('EPIC.1.SigDF') 23 | head(controls(sdf)) 24 | } 25 | -------------------------------------------------------------------------------- /man/BetaValueToMValue.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{BetaValueToMValue} 4 | \alias{BetaValueToMValue} 5 | \title{Convert beta-value to M-value} 6 | \usage{ 7 | BetaValueToMValue(b) 8 | } 9 | \arguments{ 10 | \item{b}{vector of beta values} 11 | } 12 | \value{ 13 | a vector of M values 14 | } 15 | \description{ 16 | Logit transform a beta value vector to M-value vector. 17 | } 18 | \details{ 19 | Convert beta-value to M-value (aka logit transform) 20 | } 21 | \examples{ 22 | BetaValueToMValue(c(0.1, 0.5, 0.9)) 23 | } 24 | -------------------------------------------------------------------------------- /vignettes/include/after_body.html: -------------------------------------------------------------------------------- 1 | 11 | -------------------------------------------------------------------------------- /man/palgen.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/palgen.R 3 | \name{palgen} 4 | \alias{palgen} 5 | \title{Generate some additional color palettes} 6 | \usage{ 7 | palgen(pal, n = 150, space = "Lab") 8 | } 9 | \arguments{ 10 | \item{pal}{a string for adhoc pals} 11 | 12 | \item{n}{the number of colors for interpolation} 13 | 14 | \item{space}{rgb or Lab} 15 | } 16 | \value{ 17 | a palette-generating function 18 | } 19 | \description{ 20 | Generate some additional color palettes 21 | } 22 | \examples{ 23 | library(pals) 24 | pal.bands(palgen("whiteturbo")) 25 | } 26 | -------------------------------------------------------------------------------- /man/dmContrasts.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dm.R 3 | \name{dmContrasts} 4 | \alias{dmContrasts} 5 | \title{List all contrasts of a DMLSummary} 6 | \usage{ 7 | dmContrasts(smry) 8 | } 9 | \arguments{ 10 | \item{smry}{a DMLSummary object} 11 | } 12 | \value{ 13 | a character vector of contrasts 14 | } 15 | \description{ 16 | List all contrasts of a DMLSummary 17 | } 18 | \examples{ 19 | data <- sesameDataGet('HM450.76.TCGA.matched') 20 | smry <- DML(data$betas[1:10,], ~type, meta=data$sampleInfo) 21 | dmContrasts(smry) 22 | 23 | sesameDataGet_resetEnv() 24 | } 25 | -------------------------------------------------------------------------------- /man/getBinCoordinates.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cnv.R 3 | \name{getBinCoordinates} 4 | \alias{getBinCoordinates} 5 | \title{Get bin coordinates} 6 | \usage{ 7 | getBinCoordinates(seqLength, gapInfo, tilewidth = 50000, probeCoords) 8 | } 9 | \arguments{ 10 | \item{seqLength}{chromosome information object} 11 | 12 | \item{gapInfo}{chromosome gap information} 13 | 14 | \item{tilewidth}{tile width for smoothing} 15 | 16 | \item{probeCoords}{probe coordinates} 17 | } 18 | \value{ 19 | bin.coords 20 | } 21 | \description{ 22 | requires GenomicRanges, IRanges 23 | } 24 | -------------------------------------------------------------------------------- /man/normControls.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dye_bias.R 3 | \name{normControls} 4 | \alias{normControls} 5 | \title{get normalization control signal} 6 | \usage{ 7 | normControls(sdf, average = FALSE, verbose = FALSE) 8 | } 9 | \arguments{ 10 | \item{sdf}{a SigDF} 11 | 12 | \item{average}{whether to average} 13 | 14 | \item{verbose}{print more messages} 15 | } 16 | \value{ 17 | a data frame of normalization control signals 18 | } 19 | \description{ 20 | get normalization control signal from SigDF. 21 | The function optionally takes mean for each channel. 22 | } 23 | -------------------------------------------------------------------------------- /man/sesameQCtoDF.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/QC.R 3 | \name{sesameQCtoDF} 4 | \alias{sesameQCtoDF} 5 | \title{Convert a list of sesameQC to data frame} 6 | \usage{ 7 | sesameQCtoDF(qcs, cols = c("frac_dt_cg", "RGdistort", "RGratio")) 8 | } 9 | \arguments{ 10 | \item{qcs}{sesameQCs} 11 | 12 | \item{cols}{QC columns, use NULL to report all} 13 | } 14 | \value{ 15 | a data frame 16 | } 17 | \description{ 18 | Convert a list of sesameQC to data frame 19 | } 20 | \examples{ 21 | sdf <- sesameDataGet("EPIC.1.SigDF") 22 | qcs <- sesameQC_calcStats(sdf, "detection") 23 | sesameQCtoDF(qcs) 24 | } 25 | -------------------------------------------------------------------------------- /man/sesamize.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{sesamize} 4 | \alias{sesamize} 5 | \title{sesamize function is deprecated. 6 | Please check https://github.com/zwdzwd/sesamize for previous scripts} 7 | \usage{ 8 | sesamize(...) 9 | } 10 | \arguments{ 11 | \item{...}{arguments for sesamize} 12 | } 13 | \value{ 14 | a message text for deprecated function 15 | } 16 | \description{ 17 | sesamize function is deprecated. 18 | Please check https://github.com/zwdzwd/sesamize for previous scripts 19 | } 20 | \examples{ 21 | cat("Deprecated. see https://github.com/zwdzwd/sesamize") 22 | } 23 | -------------------------------------------------------------------------------- /man/signalMU.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SigDFMethods.R 3 | \name{signalMU} 4 | \alias{signalMU} 5 | \title{report M and U for regular probes} 6 | \usage{ 7 | signalMU(sdf, mask = TRUE, MU = FALSE) 8 | } 9 | \arguments{ 10 | \item{sdf}{a \code{SigDF}} 11 | 12 | \item{mask}{whether to apply mask} 13 | 14 | \item{MU}{add a column for M+U} 15 | } 16 | \value{ 17 | a data frame of M and U columns 18 | } 19 | \description{ 20 | report M and U for regular probes 21 | } 22 | \examples{ 23 | sesameDataCache() # if not done yet 24 | sdf <- sesameDataGet('EPIC.1.SigDF') 25 | head(signalMU(sdf)) 26 | } 27 | -------------------------------------------------------------------------------- /man/sdfPlatform.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SigDFMethods.R 3 | \name{sdfPlatform} 4 | \alias{sdfPlatform} 5 | \title{Convenience function to output platform attribute of SigDF} 6 | \usage{ 7 | sdfPlatform(sdf, verbose = FALSE) 8 | } 9 | \arguments{ 10 | \item{sdf}{a SigDF object} 11 | 12 | \item{verbose}{print more messages} 13 | } 14 | \value{ 15 | the platform string for the SigDF object 16 | } 17 | \description{ 18 | Convenience function to output platform attribute of SigDF 19 | } 20 | \examples{ 21 | sesameDataCache() 22 | sdf <- sesameDataGet('EPIC.1.SigDF') 23 | sdfPlatform(sdf) 24 | 25 | } 26 | -------------------------------------------------------------------------------- /man/sdf_write_table.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SigDFMethods.R 3 | \name{sdf_write_table} 4 | \alias{sdf_write_table} 5 | \title{write SigDF to table file} 6 | \usage{ 7 | sdf_write_table(sdf, ...) 8 | } 9 | \arguments{ 10 | \item{sdf}{the \code{SigDF} to output} 11 | 12 | \item{...}{additional argument to write.table} 13 | } 14 | \value{ 15 | write SigDF to table file 16 | } 17 | \description{ 18 | write SigDF to table file 19 | } 20 | \examples{ 21 | sesameDataCache() # if not done yet 22 | sdf <- sesameDataGet('EPIC.1.SigDF') 23 | sdf_write_table(sdf, file=sprintf("\%s/sigdf.txt", tempdir())) 24 | } 25 | -------------------------------------------------------------------------------- /man/resetMask.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mask.R 3 | \name{resetMask} 4 | \alias{resetMask} 5 | \title{Reset Masking} 6 | \usage{ 7 | resetMask(sdf, verbose = FALSE) 8 | } 9 | \arguments{ 10 | \item{sdf}{a \code{SigDF}} 11 | 12 | \item{verbose}{print more messages} 13 | } 14 | \value{ 15 | a new \code{SigDF} with mask reset to all FALSE 16 | } 17 | \description{ 18 | Reset Masking 19 | } 20 | \examples{ 21 | sesameDataCache() # if not done yet 22 | sdf <- sesameDataGet('EPIC.1.SigDF') 23 | sum(sdf$mask) 24 | sdf <- addMask(sdf, c("cg14057072", "cg22344912")) 25 | sum(sdf$mask) 26 | sum(resetMask(sdf)$mask) 27 | } 28 | -------------------------------------------------------------------------------- /man/mapToMammal40.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/species.R 3 | \name{mapToMammal40} 4 | \alias{mapToMammal40} 5 | \title{Map the SDF (from overlap array platforms) 6 | Replicates are merged by picking the best detection} 7 | \usage{ 8 | mapToMammal40(sdf) 9 | } 10 | \arguments{ 11 | \item{sdf}{a \code{SigDF} object} 12 | } 13 | \value{ 14 | a named numeric vector for beta values 15 | } 16 | \description{ 17 | Map the SDF (from overlap array platforms) 18 | Replicates are merged by picking the best detection 19 | } 20 | \examples{ 21 | sdf <- sesameDataGet("Mammal40.1.SigDF") 22 | betas <- mapToMammal40(sdf[1:10,]) 23 | } 24 | -------------------------------------------------------------------------------- /man/addMask.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mask.R 3 | \name{addMask} 4 | \alias{addMask} 5 | \title{Add probes to mask} 6 | \usage{ 7 | addMask(sdf, probes) 8 | } 9 | \arguments{ 10 | \item{sdf}{a \code{SigDF}} 11 | 12 | \item{probes}{a vector of probe IDs or a logical vector with TRUE 13 | representing masked probes} 14 | } 15 | \value{ 16 | a \code{SigDF} with added mask 17 | } 18 | \description{ 19 | This function essentially merge existing probe masking 20 | with new probes to mask 21 | } 22 | \examples{ 23 | sdf <- sesameDataGet('EPIC.1.SigDF') 24 | sum(sdf$mask) 25 | sum(addMask(sdf, c("cg14057072", "cg22344912"))$mask) 26 | } 27 | -------------------------------------------------------------------------------- /man/sesameAnno_buildAddressFile.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sesameAnno.R 3 | \name{sesameAnno_buildAddressFile} 4 | \alias{sesameAnno_buildAddressFile} 5 | \title{Build sesame ordering address file from tsv} 6 | \usage{ 7 | sesameAnno_buildAddressFile(tsv) 8 | } 9 | \arguments{ 10 | \item{tsv}{a platform name, a file path or a tibble/data.frame manifest file} 11 | } 12 | \value{ 13 | a list of ordering and controls 14 | } 15 | \description{ 16 | Build sesame ordering address file from tsv 17 | } 18 | \examples{ 19 | \dontrun{ 20 | tsv = sesameAnno_download("HM450.hg38.manifest.tsv.gz") 21 | addr <- sesameAnno_buildAddressFile(tsv) 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /man/SigDF.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SigDFMethods.R 3 | \name{SigDF} 4 | \alias{SigDF} 5 | \title{SigDF validation from a plain data frame} 6 | \usage{ 7 | SigDF(df, platform = "EPIC", ctl = NULL) 8 | } 9 | \arguments{ 10 | \item{df}{a \code{data.frame} with Probe_ID, MG, MR, UG, UR, col and mask} 11 | 12 | \item{platform}{a string to specify the array platform} 13 | 14 | \item{ctl}{optional control probe data frame} 15 | } 16 | \value{ 17 | a \code{SigDF} object 18 | } 19 | \description{ 20 | SigDF validation from a plain data frame 21 | } 22 | \examples{ 23 | sesameDataCache() # if not done yet 24 | sdf <- sesameDataGet('EPIC.1.SigDF') 25 | } 26 | -------------------------------------------------------------------------------- /man/prefixMask.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mask.R 3 | \name{prefixMask} 4 | \alias{prefixMask} 5 | \title{Mask SigDF by probe ID prefix} 6 | \usage{ 7 | prefixMask(sdf, prefixes = NULL, invert = FALSE) 8 | } 9 | \arguments{ 10 | \item{sdf}{SigDF} 11 | 12 | \item{prefixes}{prefix characters} 13 | 14 | \item{invert}{use the complement set} 15 | } 16 | \value{ 17 | SigDF 18 | } 19 | \description{ 20 | Mask SigDF by probe ID prefix 21 | } 22 | \examples{ 23 | sdf <- resetMask(sesameDataGet("MM285.1.SigDF")) 24 | sum(prefixMask(sdf, c("ctl","rs"))$mask) 25 | sum(prefixMask(sdf, c("ctl"))$mask) 26 | sum(prefixMask(sdf, c("ctl","rs","ch"))$mask) 27 | } 28 | -------------------------------------------------------------------------------- /man/sesameAnno_readManifestTSV.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sesameAnno.R 3 | \name{sesameAnno_readManifestTSV} 4 | \alias{sesameAnno_readManifestTSV} 5 | \title{Read manifest file to a tsv format} 6 | \usage{ 7 | sesameAnno_readManifestTSV(tsv_fn) 8 | } 9 | \arguments{ 10 | \item{tsv_fn}{tsv file path} 11 | } 12 | \value{ 13 | a manifest as a tibble 14 | } 15 | \description{ 16 | Read manifest file to a tsv format 17 | } 18 | \examples{ 19 | \dontrun{ 20 | tsv = sesameAnno_download("HM450.hg38.manifest.tsv.gz") 21 | mft <- sesameAnno_readManifestTSV(tsv) 22 | ## direct access 23 | mft <- sesameAnno_readManifestTSV("HM450.hg38.manifest") 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /man/setMask.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mask.R 3 | \name{setMask} 4 | \alias{setMask} 5 | \title{Set mask to only the probes specified} 6 | \usage{ 7 | setMask(sdf, probes) 8 | } 9 | \arguments{ 10 | \item{sdf}{a \code{SigDF}} 11 | 12 | \item{probes}{a vector of probe IDs or a logical vector with TRUE 13 | representing masked probes} 14 | } 15 | \value{ 16 | a \code{SigDF} with added mask 17 | } 18 | \description{ 19 | Set mask to only the probes specified 20 | } 21 | \examples{ 22 | sdf <- sesameDataGet('EPIC.1.SigDF') 23 | sum(sdf$mask) 24 | sum(setMask(sdf, "cg14959801")$mask) 25 | sum(setMask(sdf, c("cg14057072", "cg22344912"))$mask) 26 | } 27 | -------------------------------------------------------------------------------- /man/sesameQC_plotRedGrnQQ.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/QC.R 3 | \name{sesameQC_plotRedGrnQQ} 4 | \alias{sesameQC_plotRedGrnQQ} 5 | \title{Plot red-green QQ-Plot using Infinium-I Probes} 6 | \usage{ 7 | sesameQC_plotRedGrnQQ(sdf, main = "R-G QQ Plot", ...) 8 | } 9 | \arguments{ 10 | \item{sdf}{a \code{SigDF}} 11 | 12 | \item{main}{plot title} 13 | 14 | \item{...}{additional options to qqplot} 15 | } 16 | \value{ 17 | create a qqplot 18 | } 19 | \description{ 20 | Plot red-green QQ-Plot using Infinium-I Probes 21 | } 22 | \examples{ 23 | sesameDataCache() # if not done yet 24 | sdf <- sesameDataGet('EPIC.1.SigDF') 25 | sesameQC_plotRedGrnQQ(sdf) 26 | } 27 | -------------------------------------------------------------------------------- /vignettes/include/site.css: -------------------------------------------------------------------------------- 1 | .html-widget { 2 | margin-bottom: 1em; 3 | } 4 | h1 .header-section-number::after { 5 | content: "."; 6 | } 7 | th { 8 | background-color: #336699; 9 | color: white; 10 | } 11 | tr:nth-child(even) {background-color: #f2f2f2;} 12 | table td { 13 | padding: 3px 10px; 14 | border-top: none; 15 | border-left: none; 16 | border-bottom: none; 17 | border-right: none; 18 | } 19 | h1 { 20 | font-size: 28px; 21 | } 22 | 23 | h2 { 24 | font-size: 28px; 25 | } 26 | 27 | .section.level2 h2 { 28 | padding-top: 65px; 29 | margin-top: -40px; 30 | } 31 | .section.level1 h1 { 32 | padding-top: 65px; 33 | margin-top: -40px; 34 | } -------------------------------------------------------------------------------- /man/summaryExtractTest.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dm.R 3 | \name{summaryExtractTest} 4 | \alias{summaryExtractTest} 5 | \title{Extract slope information from DMLSummary} 6 | \usage{ 7 | summaryExtractTest(smry) 8 | } 9 | \arguments{ 10 | \item{smry}{DMLSummary from DML command} 11 | } 12 | \value{ 13 | a table of slope and p-value 14 | } 15 | \description{ 16 | Extract slope information from DMLSummary 17 | } 18 | \examples{ 19 | sesameDataCache() # in case not done yet 20 | data <- sesameDataGet('HM450.76.TCGA.matched') 21 | smry <- DML(data$betas[1:10,], ~type, meta=data$sampleInfo) 22 | slopes <- summaryExtractTest(smry) 23 | 24 | sesameDataGet_resetEnv() 25 | } 26 | -------------------------------------------------------------------------------- /man/print.DMLSummary.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dm.R 3 | \name{print.DMLSummary} 4 | \alias{print.DMLSummary} 5 | \title{Print DMLSummary object} 6 | \usage{ 7 | \method{print}{DMLSummary}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{a DMLSummary object} 11 | 12 | \item{...}{extra parameter for print} 13 | } 14 | \value{ 15 | print DMLSummary result on screen 16 | } 17 | \description{ 18 | Print DMLSummary object 19 | } 20 | \examples{ 21 | sesameDataCache() # in case not done yet 22 | data <- sesameDataGet('HM450.76.TCGA.matched') 23 | ## test the first 10 24 | smry <- DML(data$betas[1:10,], ~type, meta=data$sampleInfo) 25 | smry 26 | 27 | sesameDataGet_resetEnv() 28 | } 29 | -------------------------------------------------------------------------------- /man/scrub.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/background.R 3 | \name{scrub} 4 | \alias{scrub} 5 | \title{SCRUB background correction} 6 | \usage{ 7 | scrub(sdf) 8 | } 9 | \arguments{ 10 | \item{sdf}{a \code{SigDF}} 11 | } 12 | \value{ 13 | a new \code{SigDF} with noob background correction 14 | } 15 | \description{ 16 | This function takes a \code{SigDF} and returns a modified \code{SigDF} 17 | with background subtracted. scrub subtracts residual background using 18 | background median 19 | } 20 | \details{ 21 | This function is meant to be used after noob. 22 | } 23 | \examples{ 24 | sdf <- sesameDataGet('EPIC.1.SigDF') 25 | sdf.nb <- noob(sdf) 26 | sdf.nb.scrub <- scrub(sdf.nb) 27 | } 28 | -------------------------------------------------------------------------------- /R/ethnicity.R: -------------------------------------------------------------------------------- 1 | #' Infer Ethnicity 2 | #' 3 | #' This function uses both the built-in rsprobes as well as the type I 4 | #' Color-Channel-Switching probes to infer ethnicity. 5 | #' 6 | #' s better be background subtracted and dyebias corrected for 7 | #' best accuracy 8 | #' 9 | #' Please note: the betas should come from SigDF *without* 10 | #' channel inference. 11 | #' 12 | #' @param sdf a \code{SigDF} 13 | #' @param verbose print more messages 14 | #' @return string of ethnicity 15 | #' @import sesameData 16 | #' @examples 17 | #' sdf <- sesameDataGet('EPIC.1.SigDF') 18 | #' ## inferEthnicity(sdf) 19 | #' @export 20 | inferEthnicity <- function(sdf, verbose = FALSE) { 21 | .Deprecated("Please use CytoMethIC::cmi_classify.") 22 | } 23 | -------------------------------------------------------------------------------- /man/sesameQC_plotHeatSNPs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/QC.R 3 | \name{sesameQC_plotHeatSNPs} 4 | \alias{sesameQC_plotHeatSNPs} 5 | \title{Plot SNP heatmap} 6 | \usage{ 7 | sesameQC_plotHeatSNPs(sdfs, cluster = TRUE, filter.nonvariant = TRUE) 8 | } 9 | \arguments{ 10 | \item{sdfs}{beta value matrix, row: probes; column: samples} 11 | 12 | \item{cluster}{show clustered heatmap} 13 | 14 | \item{filter.nonvariant}{whether to filter nonvariant (range < 0.3)} 15 | } 16 | \value{ 17 | a grid graphics object 18 | } 19 | \description{ 20 | Plot SNP heatmap 21 | } 22 | \examples{ 23 | 24 | sdfs <- sesameDataGet("EPIC.5.SigDF.normal")[1:2] 25 | plt <- sesameQC_plotHeatSNPs(sdfs, filter.nonvariant = FALSE) 26 | } 27 | -------------------------------------------------------------------------------- /man/scrubSoft.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/background.R 3 | \name{scrubSoft} 4 | \alias{scrubSoft} 5 | \title{SCRUB background correction} 6 | \usage{ 7 | scrubSoft(sdf) 8 | } 9 | \arguments{ 10 | \item{sdf}{a \code{SigDF}} 11 | } 12 | \value{ 13 | a new \code{SigDF} with noob background correction 14 | } 15 | \description{ 16 | This function takes a \code{SigDF} and returns a modified \code{SigDF} 17 | with background subtracted. scrubSoft subtracts residual background using a 18 | noob-like procedure. 19 | } 20 | \details{ 21 | This function is meant to be used after noob. 22 | } 23 | \examples{ 24 | sdf <- sesameDataGet('EPIC.1.SigDF') 25 | sdf.nb <- noob(sdf) 26 | sdf.nb.scrubSoft <- scrubSoft(sdf.nb) 27 | } 28 | -------------------------------------------------------------------------------- /man/calcEffectSize.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dm.R 3 | \name{calcEffectSize} 4 | \alias{calcEffectSize} 5 | \title{Compute effect size for different variables from prediction matrix} 6 | \usage{ 7 | calcEffectSize(pred) 8 | } 9 | \arguments{ 10 | \item{pred}{predictions} 11 | } 12 | \value{ 13 | a data.frame of effect sizes. Columns are different variables. 14 | Rows are different probes. 15 | } 16 | \description{ 17 | The effect size is defined by the maximum variation of a variable with all 18 | the other variables controled constant. 19 | } 20 | \examples{ 21 | data <- sesameDataGet('HM450.76.TCGA.matched') 22 | res <- DMLpredict(data$betas[1:10,], ~type, meta=data$sampleInfo) 23 | head(calcEffectSize(res)) 24 | } 25 | -------------------------------------------------------------------------------- /man/predictAgeHorvath353.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/age.R 3 | \name{predictAgeHorvath353} 4 | \alias{predictAgeHorvath353} 5 | \title{Horvath 353 age predictor} 6 | \usage{ 7 | predictAgeHorvath353(betas) 8 | } 9 | \arguments{ 10 | \item{betas}{a probeID-named vector of beta values} 11 | } 12 | \value{ 13 | age in years 14 | } 15 | \description{ 16 | The function takes a named numeric vector of beta values. The name attribute 17 | contains the probe ID (cg, ch or rs IDs). The function looks for overlapping 18 | probes and estimate age using Horvath aging model (Horvath 2013 19 | Genome Biology). The function outputs a single numeric of age in years. 20 | } 21 | \examples{ 22 | cat("Deprecated. See predictAge") 23 | } 24 | -------------------------------------------------------------------------------- /man/medianTotalIntensity.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sesame.R 3 | \name{medianTotalIntensity} 4 | \alias{medianTotalIntensity} 5 | \title{Whole-dataset-wide Median Total Intensity (M+U)} 6 | \usage{ 7 | medianTotalIntensity(sdf, mask = TRUE) 8 | } 9 | \arguments{ 10 | \item{sdf}{a \code{SigDF}} 11 | 12 | \item{mask}{whether to mask probes using mask column} 13 | } 14 | \value{ 15 | median of all intensities 16 | } 17 | \description{ 18 | The function takes one single \code{SigDF} and computes median 19 | intensity of M+U for each probe. This function outputs a single 20 | numeric for the median. 21 | } 22 | \examples{ 23 | sesameDataCache() # if not done yet 24 | sdf <- sesameDataGet('EPIC.1.SigDF') 25 | medianTotalIntensity(sdf) 26 | } 27 | -------------------------------------------------------------------------------- /man/predictAgeSkinBlood.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/age.R 3 | \name{predictAgeSkinBlood} 4 | \alias{predictAgeSkinBlood} 5 | \title{Horvath Skin and Blood age predictor} 6 | \usage{ 7 | predictAgeSkinBlood(betas) 8 | } 9 | \arguments{ 10 | \item{betas}{a probeID-named vector of beta values} 11 | } 12 | \value{ 13 | age in years 14 | } 15 | \description{ 16 | The function takes a named numeric vector of beta values. The name attribute 17 | contains the probe ID (cg, ch or rs IDs). The function looks for overlapping 18 | probes and estimate age using Horvath aging model (Horvath et al. 2018 19 | Aging, 391 probes). The function outputs a single numeric of age in years. 20 | } 21 | \examples{ 22 | cat("Deprecated. See predictAge") 23 | } 24 | -------------------------------------------------------------------------------- /man/probeID_designType.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{probeID_designType} 4 | \alias{probeID_designType} 5 | \title{Extract the probe type field from probe ID 6 | This only works with the new probe ID system. 7 | See https://github.com/zhou-lab/InfiniumAnnotation for illustration} 8 | \usage{ 9 | probeID_designType(Probe_ID) 10 | } 11 | \arguments{ 12 | \item{Probe_ID}{Probe ID} 13 | } 14 | \value{ 15 | a vector of '1' and '2' suggesting Infinium-I and Infinium-II 16 | } 17 | \description{ 18 | Extract the probe type field from probe ID 19 | This only works with the new probe ID system. 20 | See https://github.com/zhou-lab/InfiniumAnnotation for illustration 21 | } 22 | \examples{ 23 | probeID_designType("cg36609548_TC21") 24 | } 25 | -------------------------------------------------------------------------------- /man/sesameQC_getStats.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/QC.R 3 | \name{sesameQC_getStats} 4 | \alias{sesameQC_getStats} 5 | \title{Get stat numbers from an sesameQC object} 6 | \usage{ 7 | sesameQC_getStats(qc, stat_names = NULL, drop = TRUE) 8 | } 9 | \arguments{ 10 | \item{qc}{a sesameQC object} 11 | 12 | \item{stat_names}{which stat(s) to retrieve, default to all.} 13 | 14 | \item{drop}{whether to drop to a string when stats_names has 15 | only one element.} 16 | } 17 | \value{ 18 | a list of named stats to be retrieved 19 | } 20 | \description{ 21 | Get stat numbers from an sesameQC object 22 | } 23 | \examples{ 24 | sdf <- sesameDataGet("EPIC.1.SigDF") 25 | qc <- sesameQC_calcStats(sdf, "detection") 26 | sesameQC_getStats(qc, "frac_dt") 27 | } 28 | -------------------------------------------------------------------------------- /man/diffRefSet.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cell_composition.R 3 | \name{diffRefSet} 4 | \alias{diffRefSet} 5 | \title{Restrict refset to differentially methylated probes 6 | use with care, might introduce bias} 7 | \usage{ 8 | diffRefSet(g) 9 | } 10 | \arguments{ 11 | \item{g}{a matrix with probes on the rows and cell types on the columns} 12 | } 13 | \value{ 14 | g a matrix with a subset of input probes (rows) 15 | } 16 | \description{ 17 | The function takes a matrix with probes on the rows and cell types on 18 | the columns and output a subset matrix and only probes that show 19 | discordant methylation levels among the cell types. 20 | } 21 | \examples{ 22 | 23 | g = diffRefSet(getRefSet(platform='HM450')) 24 | sesameDataGet_resetEnv() 25 | 26 | } 27 | -------------------------------------------------------------------------------- /man/initFileSet.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fileSet.R 3 | \name{initFileSet} 4 | \alias{initFileSet} 5 | \title{initialize a fileSet class by allocating appropriate storage} 6 | \usage{ 7 | initFileSet(map_path, platform, samples, probes = NULL, inc = 4) 8 | } 9 | \arguments{ 10 | \item{map_path}{path of file to map} 11 | 12 | \item{platform}{EPIC, HM450 or HM27, consistent with sdfPlatform(sdf)} 13 | 14 | \item{samples}{sample names} 15 | 16 | \item{probes}{probe names} 17 | 18 | \item{inc}{bytes per unit data storage} 19 | } 20 | \value{ 21 | a sesame::fileSet object 22 | } 23 | \description{ 24 | initialize a fileSet class by allocating appropriate storage 25 | } 26 | \examples{ 27 | 28 | fset <- initFileSet('mybetas2', 'HM27', c('s1','s2')) 29 | } 30 | -------------------------------------------------------------------------------- /man/totalIntensities.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sesame.R 3 | \name{totalIntensities} 4 | \alias{totalIntensities} 5 | \title{M+U Intensities Array} 6 | \usage{ 7 | totalIntensities(sdf, mask = FALSE) 8 | } 9 | \arguments{ 10 | \item{sdf}{a \code{SigDF}} 11 | 12 | \item{mask}{whether to mask probes using mask column} 13 | } 14 | \value{ 15 | a vector of M+U signal for each probe 16 | } 17 | \description{ 18 | The function takes one single \code{SigDF} and computes total 19 | intensity of all the in-band measurements by summing methylated and 20 | unmethylated alleles. This function outputs a single numeric for the mean. 21 | } 22 | \examples{ 23 | sesameDataCache() # if not done yet 24 | sdf <- sesameDataGet('EPIC.1.SigDF') 25 | intensities <- totalIntensities(sdf) 26 | } 27 | -------------------------------------------------------------------------------- /man/dyeBiasL.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dye_bias.R 3 | \name{dyeBiasL} 4 | \alias{dyeBiasL} 5 | \title{Correct dye bias in by linear scaling.} 6 | \usage{ 7 | dyeBiasL(sdf, ref = NULL) 8 | } 9 | \arguments{ 10 | \item{sdf}{a \code{SigDF}} 11 | 12 | \item{ref}{reference signal level} 13 | } 14 | \value{ 15 | a normalized \code{SigDF} 16 | } 17 | \description{ 18 | The function takes a \code{SigDF} as input and scale both the Grn and Red 19 | signal to a reference (ref) level. If the reference level is not given, it 20 | is set to the mean intensity of all the in-band signals. The function 21 | returns a \code{SigDF} with dye bias corrected. 22 | } 23 | \examples{ 24 | sesameDataCache() # if not done yet 25 | sdf <- sesameDataGet('EPIC.1.SigDF') 26 | sdf.db <- dyeBiasL(sdf) 27 | } 28 | -------------------------------------------------------------------------------- /man/inferEthnicity.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ethnicity.R 3 | \name{inferEthnicity} 4 | \alias{inferEthnicity} 5 | \title{Infer Ethnicity} 6 | \usage{ 7 | inferEthnicity(sdf, verbose = FALSE) 8 | } 9 | \arguments{ 10 | \item{sdf}{a \code{SigDF}} 11 | 12 | \item{verbose}{print more messages} 13 | } 14 | \value{ 15 | string of ethnicity 16 | } 17 | \description{ 18 | This function uses both the built-in rsprobes as well as the type I 19 | Color-Channel-Switching probes to infer ethnicity. 20 | } 21 | \details{ 22 | s better be background subtracted and dyebias corrected for 23 | best accuracy 24 | 25 | Please note: the betas should come from SigDF *without* 26 | channel inference. 27 | } 28 | \examples{ 29 | sdf <- sesameDataGet('EPIC.1.SigDF') 30 | ## inferEthnicity(sdf) 31 | } 32 | -------------------------------------------------------------------------------- /man/sdf_read_table.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SigDFMethods.R 3 | \name{sdf_read_table} 4 | \alias{sdf_read_table} 5 | \title{read a table file to SigDF} 6 | \usage{ 7 | sdf_read_table(fname, platform = NULL, verbose = FALSE, ...) 8 | } 9 | \arguments{ 10 | \item{fname}{file name} 11 | 12 | \item{platform}{array platform (will infer if not given)} 13 | 14 | \item{verbose}{print more information} 15 | 16 | \item{...}{additional argument to read.table} 17 | } 18 | \value{ 19 | read table file to SigDF 20 | } 21 | \description{ 22 | read a table file to SigDF 23 | } 24 | \examples{ 25 | sesameDataCache() # if not done yet 26 | sdf <- sesameDataGet('EPIC.1.SigDF') 27 | fname <- sprintf("\%s/sigdf.txt", tempdir()) 28 | sdf_write_table(sdf, file=fname) 29 | sdf2 <- sdf_read_table(fname) 30 | } 31 | -------------------------------------------------------------------------------- /man/sesameQC_plotBar.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/QC.R 3 | \name{sesameQC_plotBar} 4 | \alias{sesameQC_plotBar} 5 | \title{Bar plots for sesameQC} 6 | \usage{ 7 | sesameQC_plotBar(qcs, keys = NULL) 8 | } 9 | \arguments{ 10 | \item{qcs}{a list of SigDFs} 11 | 12 | \item{keys}{optional, other key to plot, instead of the default 13 | keys can be found in the parenthesis of the print output of each 14 | sesameQC output.} 15 | } 16 | \value{ 17 | a bar plot comparing different QC metrics 18 | } 19 | \description{ 20 | By default, it plots median_beta_cg, median_beta_ch, RGratio, 21 | RGdistort, frac_dt 22 | } 23 | \examples{ 24 | sesameDataCache() # if not done yet 25 | sdfs <- sesameDataGet("EPIC.5.SigDF.normal")[1:2] 26 | sesameQC_plotBar(lapply(sdfs, sesameQC_calcStats, "detection")) 27 | } 28 | -------------------------------------------------------------------------------- /man/dyeBiasCorr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dye_bias.R 3 | \name{dyeBiasCorr} 4 | \alias{dyeBiasCorr} 5 | \title{Correct dye bias in by linear scaling.} 6 | \usage{ 7 | dyeBiasCorr(sdf, ref = NULL) 8 | } 9 | \arguments{ 10 | \item{sdf}{a \code{SigDF}} 11 | 12 | \item{ref}{reference signal level} 13 | } 14 | \value{ 15 | a normalized \code{SigDF} 16 | } 17 | \description{ 18 | The function takes a \code{SigDF} as input and scale both the Grn and Red 19 | signal to a reference (ref) level. If the reference level is not given, it 20 | is set to the mean intensity of all the in-band signals. The function 21 | returns a \code{SigDF} with dye bias corrected. 22 | } 23 | \examples{ 24 | sesameDataCache() # if not done yet 25 | sdf <- sesameDataGet('EPIC.1.SigDF') 26 | sdf.db <- dyeBiasCorr(sdf) 27 | } 28 | -------------------------------------------------------------------------------- /vignettes/_site.yml: -------------------------------------------------------------------------------- 1 | name: "SeSAMe" 2 | navbar: 3 | title: '' 4 | left: 5 | - text: "Basics" 6 | icon: fa-home 7 | href: sesame.html 8 | - text: "QC" 9 | icon: fa-check-circle 10 | href: QC.html 11 | - text: "Non-human Array" 12 | icon: fa-paw 13 | href: nonhuman.html 14 | - text: "Modeling" 15 | icon: fa-cogs 16 | href: modeling.html 17 | - text: "Inference" 18 | icon: fa-database 19 | href: inferences.html 20 | - text: "KnowYourCG" 21 | icon: fa-binoculars 22 | href: KYCG.html 23 | - text: "Supplemental" 24 | icon: fa-book 25 | href: https://zhou-lab.github.io/sesame/dev/supplemental.html 26 | -------------------------------------------------------------------------------- /man/getAFTypeIbySumAlleles.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sesame.R 3 | \name{getAFTypeIbySumAlleles} 4 | \alias{getAFTypeIbySumAlleles} 5 | \title{Get allele frequency treating type I by summing alleles} 6 | \usage{ 7 | getAFTypeIbySumAlleles(sdf, known.ccs.only = TRUE) 8 | } 9 | \arguments{ 10 | \item{sdf}{\code{SigDF}} 11 | 12 | \item{known.ccs.only}{consider only known CCS probes} 13 | } 14 | \value{ 15 | beta values 16 | } 17 | \description{ 18 | Takes a \code{SigDF} as input and returns a numeric vector containing 19 | extra allele frequencies based on Color-Channel-Switching (CCS) probes. 20 | If no CCS probes exist in the \code{SigDF}, then an numeric(0) is 21 | returned. 22 | } 23 | \examples{ 24 | sesameDataCache() # if not done yet 25 | sdf <- sesameDataGet('EPIC.1.SigDF') 26 | af <- getAFTypeIbySumAlleles(sdf) 27 | } 28 | -------------------------------------------------------------------------------- /man/checkLevels.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dm.R 3 | \name{checkLevels} 4 | \alias{checkLevels} 5 | \title{filter data matrix by factor completeness 6 | only works for discrete factors} 7 | \usage{ 8 | checkLevels(betas, fc) 9 | } 10 | \arguments{ 11 | \item{betas}{matrix data} 12 | 13 | \item{fc}{factors, or characters} 14 | } 15 | \value{ 16 | a boolean vector whether there is non-NA value for each tested 17 | group for each probe 18 | } 19 | \description{ 20 | filter data matrix by factor completeness 21 | only works for discrete factors 22 | } 23 | \examples{ 24 | se0 <- sesameDataGet("MM285.10.SE.tissue")[1:100,] 25 | se_ok <- checkLevels(SummarizedExperiment::assay(se0), 26 | SummarizedExperiment::colData(se0)$tissue) 27 | sum(se_ok) # number of good probes 28 | se1 <- se0[se_ok,] 29 | 30 | sesameDataGet_resetEnv() 31 | } 32 | -------------------------------------------------------------------------------- /man/updateSigDF.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/species.R 3 | \name{updateSigDF} 4 | \alias{updateSigDF} 5 | \title{Set color and mask using strain/species-specific manifest} 6 | \usage{ 7 | updateSigDF(sdf, species = NULL, strain = NULL, addr = NULL, verbose = FALSE) 8 | } 9 | \arguments{ 10 | \item{sdf}{a \code{SigDF}} 11 | 12 | \item{species}{the species the sample is considered to be} 13 | 14 | \item{strain}{the strain the sample is considered to be} 15 | 16 | \item{addr}{species-specific address species, optional} 17 | 18 | \item{verbose}{print more messages} 19 | } 20 | \value{ 21 | a \code{SigDF} with updated color channel and mask 22 | } 23 | \description{ 24 | also sets attr(,"species") 25 | } 26 | \examples{ 27 | sdf <- sesameDataGet('Mammal40.1.SigDF') 28 | sdf_mouse <- updateSigDF(sdf, species="mus_musculus") 29 | 30 | } 31 | -------------------------------------------------------------------------------- /man/openSesameToFile.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fileSet.R 3 | \name{openSesameToFile} 4 | \alias{openSesameToFile} 5 | \title{openSesame pipeline with file-backed storage} 6 | \usage{ 7 | openSesameToFile(map_path, idat_dir, BPPARAM = SerialParam(), inc = 4) 8 | } 9 | \arguments{ 10 | \item{map_path}{path of file to be mapped (beta values file)} 11 | 12 | \item{idat_dir}{source IDAT directory} 13 | 14 | \item{BPPARAM}{get parallel with MulticoreParam(2)} 15 | 16 | \item{inc}{bytes per item data storage. increase to 8 if precision 17 | is important. Most cases 32-bit representation is enough.} 18 | } 19 | \value{ 20 | a sesame::fileSet 21 | } 22 | \description{ 23 | openSesame pipeline with file-backed storage 24 | } 25 | \examples{ 26 | 27 | openSesameToFile('mybetas', 28 | system.file('extdata',package='sesameData')) 29 | 30 | } 31 | -------------------------------------------------------------------------------- /man/predictMouseAgeInMonth.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/age.R 3 | \name{predictMouseAgeInMonth} 4 | \alias{predictMouseAgeInMonth} 5 | \title{Mouse age predictor} 6 | \usage{ 7 | predictMouseAgeInMonth(betas, na_fallback = TRUE) 8 | } 9 | \arguments{ 10 | \item{betas}{a probeID-named vector of beta values} 11 | 12 | \item{na_fallback}{use the fallback default for NAs.} 13 | } 14 | \value{ 15 | age in month 16 | } 17 | \description{ 18 | The function takes a named numeric vector of beta values. The name attribute 19 | contains the probe ID. The function looks for overlapping 20 | probes and estimate age using an aging model built from 321 MM285 probes. 21 | The function outputs a single numeric of age in months. The clock is most 22 | accurate with the sesame preprocessing. 23 | } 24 | \examples{ 25 | cat("Deprecated. See predictAge") 26 | } 27 | -------------------------------------------------------------------------------- /man/compareMouseStrainReference.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/strain.R 3 | \name{compareMouseStrainReference} 4 | \alias{compareMouseStrainReference} 5 | \title{Compare Strain SNPs with a reference panel} 6 | \usage{ 7 | compareMouseStrainReference( 8 | betas = NULL, 9 | show_sample_names = FALSE, 10 | query_width = NULL 11 | ) 12 | } 13 | \arguments{ 14 | \item{betas}{beta value vector or matrix (for multiple samples)} 15 | 16 | \item{show_sample_names}{whether to show sample name} 17 | 18 | \item{query_width}{optional argument for adjusting query width} 19 | } 20 | \value{ 21 | grid object that contrast the target sample with 22 | pre-built mouse strain reference 23 | } 24 | \description{ 25 | Compare Strain SNPs with a reference panel 26 | } 27 | \examples{ 28 | sesameDataCache() # if not done yet 29 | compareMouseStrainReference() 30 | } 31 | -------------------------------------------------------------------------------- /man/getMask.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mask.R 3 | \name{getMask} 4 | \alias{getMask} 5 | \title{get probe masking by mask names} 6 | \usage{ 7 | getMask(platform = "EPICv2", mask_names = "recommended") 8 | } 9 | \arguments{ 10 | \item{platform}{EPICv2, EPIC, HM450, HM27, ...} 11 | 12 | \item{mask_names}{mask names (see listAvailableMasks) 13 | by default: "recommended" 14 | see recommendedMaskNames() for detail.} 15 | } 16 | \value{ 17 | a vector of probe ID 18 | } 19 | \description{ 20 | get probe masking by mask names 21 | } 22 | \examples{ 23 | 24 | length(getMask("MSA", "recommended")) 25 | length(getMask("EPICv2", "recommended")) 26 | length(getMask("EPICv2", c("recommended", "M_SNPcommon_1pt"))) 27 | length(getMask("EPICv2", "M_mapping")) 28 | length(getMask("EPIC")) 29 | length(getMask("HM450")) 30 | length(getMask("MM285")) 31 | 32 | } 33 | -------------------------------------------------------------------------------- /man/imputeBetasMatrixByMean.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/impute.R 3 | \name{imputeBetasMatrixByMean} 4 | \alias{imputeBetasMatrixByMean} 5 | \title{Impute Missing Values with Mean 6 | This function replaces missing values (NA) in a matrix, default is row 7 | means.} 8 | \usage{ 9 | imputeBetasMatrixByMean(mx, axis = 1) 10 | } 11 | \arguments{ 12 | \item{mx}{A matrix} 13 | 14 | \item{axis}{A single integer. Use 1 to impute column means (default), 15 | and 2 to impute row means.} 16 | } 17 | \value{ 18 | A matrix with missing values imputed. 19 | } 20 | \description{ 21 | Impute Missing Values with Mean 22 | This function replaces missing values (NA) in a matrix, default is row 23 | means. 24 | } 25 | \examples{ 26 | mx <- cbind(c(1, 2, NA, 4), c(NA, 2, 3, 4)) 27 | imputeBetasMatrixByMean(mx, axis = 1) 28 | imputeBetasMatrixByMean(mx, axis = 2) 29 | } 30 | -------------------------------------------------------------------------------- /man/probeSuccessRate.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sesame.R 3 | \name{probeSuccessRate} 4 | \alias{probeSuccessRate} 5 | \title{Whole-dataset-wide Probe Success Rate} 6 | \usage{ 7 | probeSuccessRate(sdf, mask = TRUE, max_pval = 0.05) 8 | } 9 | \arguments{ 10 | \item{sdf}{a \code{SigDF}} 11 | 12 | \item{mask}{whether or not we count the masked probes in SigDF} 13 | 14 | \item{max_pval}{the maximum p-value to consider detection success} 15 | } 16 | \value{ 17 | a fraction number as probe success rate 18 | } 19 | \description{ 20 | This function calculates the probe success rate using 21 | pOOBAH detection p-values. Probes that has a detection p-value 22 | higher than a specific threshold are considered failed probes. 23 | } 24 | \examples{ 25 | sesameDataCache() # if not done yet 26 | sdf <- sesameDataGet('EPIC.1.SigDF') 27 | probeSuccessRate(sdf) 28 | } 29 | -------------------------------------------------------------------------------- /man/matchDesign.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/match_design.R 3 | \name{matchDesign} 4 | \alias{matchDesign} 5 | \title{normalize Infinium I probe betas to Infinium II} 6 | \usage{ 7 | matchDesign(sdf, min_dbeta = 0.3) 8 | } 9 | \arguments{ 10 | \item{sdf}{SigDF} 11 | 12 | \item{min_dbeta}{the default algorithm perform 2-state 13 | quantile-normalization of the unmethylated and methylated modes 14 | separately. However, when the two modes are too close, we fall back 15 | to a one-mode normalization. The threshold defines the maximum 16 | inter-mode distance.} 17 | } 18 | \value{ 19 | SigDF 20 | } 21 | \description{ 22 | This is designed to counter tail inflation in Infinium I probes. 23 | } 24 | \examples{ 25 | 26 | library(RPMM) 27 | sdf <- sesameDataGet("MM285.1.SigDF") 28 | sesameQC_plotBetaByDesign(sdf) 29 | sesameQC_plotBetaByDesign(matchDesign(sdf)) 30 | 31 | } 32 | -------------------------------------------------------------------------------- /man/readFileSet.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fileSet.R 3 | \name{readFileSet} 4 | \alias{readFileSet} 5 | \title{Read an existing fileSet from storage} 6 | \usage{ 7 | readFileSet(map_path) 8 | } 9 | \arguments{ 10 | \item{map_path}{path of file to map (should contain valid _idx.rds index)} 11 | } 12 | \value{ 13 | a sesame::fileSet object 14 | } 15 | \description{ 16 | This function only reads the meta-data. 17 | } 18 | \examples{ 19 | 20 | ## create two samples 21 | fset <- initFileSet('mybetas2', 'HM27', c('s1','s2')) 22 | 23 | ## a hypothetical numeric array (can be beta values, intensities etc) 24 | hypothetical <- setNames(runif(fset$n), fset$probes) 25 | 26 | ## map the numeric to file 27 | mapFileSet(fset, 's1', hypothetical) 28 | 29 | ## read it from file 30 | fset <- readFileSet('mybetas2') 31 | 32 | ## get data 33 | sliceFileSet(fset, 's1', 'cg00000292') 34 | 35 | } 36 | -------------------------------------------------------------------------------- /man/detectionPnegEcdf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/detection.R 3 | \name{detectionPnegEcdf} 4 | \alias{detectionPnegEcdf} 5 | \title{Detection P-value based on ECDF of negative control} 6 | \usage{ 7 | detectionPnegEcdf(sdf, return.pval = FALSE, pval.threshold = 0.05) 8 | } 9 | \arguments{ 10 | \item{sdf}{a \code{SigDF}} 11 | 12 | \item{return.pval}{whether to return p-values, instead of a 13 | masked \code{SigDF}} 14 | 15 | \item{pval.threshold}{minimum p-value to mask} 16 | } 17 | \value{ 18 | a \code{SigDF}, or a p-value vector if return.pval is TRUE 19 | } 20 | \description{ 21 | The function takes a \code{SigDF} as input, computes detection p-value 22 | using negative control probes' empirical distribution and returns a new 23 | \code{SigDF} with an updated mask slot. 24 | } 25 | \examples{ 26 | sdf <- sesameDataGet("EPIC.1.SigDF") 27 | sum(sdf$mask) 28 | sum(detectionPnegEcdf(sdf)$mask) 29 | } 30 | -------------------------------------------------------------------------------- /man/sesameQC_rankStats.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/QC.R 3 | \name{sesameQC_rankStats} 4 | \alias{sesameQC_rankStats} 5 | \title{This function compares the input sample with public data. 6 | Only overlapping metrics will be compared.} 7 | \usage{ 8 | sesameQC_rankStats(qc, publicQC = NULL, platform = "EPIC") 9 | } 10 | \arguments{ 11 | \item{qc}{a sesameQC object} 12 | 13 | \item{publicQC}{public QC statistics, filtered from e.g.: EPIC.publicQC, 14 | MM285.publicQC and Mammal40.publicQC} 15 | 16 | \item{platform}{EPIC, MM285 or Mammal40, used when publicQC is not given} 17 | } 18 | \value{ 19 | a sesameQC 20 | } 21 | \description{ 22 | This function compares the input sample with public data. 23 | Only overlapping metrics will be compared. 24 | } 25 | \examples{ 26 | 27 | sesameDataCache() # if not done yet 28 | sdf <- sesameDataGet('EPIC.1.SigDF') 29 | sesameQC_rankStats(sesameQC_calcStats(sdf, "intensity")) 30 | 31 | } 32 | -------------------------------------------------------------------------------- /man/sesameQC_plotBetaByDesign.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/QC.R 3 | \name{sesameQC_plotBetaByDesign} 4 | \alias{sesameQC_plotBetaByDesign} 5 | \title{Plot betas distinguishing different Infinium chemistries} 6 | \usage{ 7 | sesameQC_plotBetaByDesign( 8 | sdf, 9 | prep = NULL, 10 | legend_pos = "top", 11 | mar = c(3, 3, 1, 1), 12 | main = "", 13 | ... 14 | ) 15 | } 16 | \arguments{ 17 | \item{sdf}{SigDF} 18 | 19 | \item{prep}{prep codes to step through} 20 | 21 | \item{legend_pos}{legend position (default: top)} 22 | 23 | \item{mar}{margin of layout when showing steps of prep} 24 | 25 | \item{main}{main title in plots} 26 | 27 | \item{...}{additional options to plot} 28 | } 29 | \value{ 30 | create a density plot 31 | } 32 | \description{ 33 | Plot betas distinguishing different Infinium chemistries 34 | } 35 | \examples{ 36 | sdf <- sesameDataGet("EPIC.1.SigDF") 37 | sesameQC_plotBetaByDesign(sdf, prep="DB") 38 | } 39 | -------------------------------------------------------------------------------- /man/sesameAnno_attachManifest.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sesameAnno.R 3 | \name{sesameAnno_attachManifest} 4 | \alias{sesameAnno_attachManifest} 5 | \title{Annotate a data.frame using manifest} 6 | \usage{ 7 | sesameAnno_attachManifest( 8 | df, 9 | probe_id = "Probe_ID", 10 | platform = NULL, 11 | genome = NULL 12 | ) 13 | } 14 | \arguments{ 15 | \item{df}{input data frame with Probe_ID as a column} 16 | 17 | \item{probe_id}{the Probe_ID column name, default to "Probe_ID" or 18 | rownames} 19 | 20 | \item{platform}{which array platform, guess from probe ID if not given} 21 | 22 | \item{genome}{the genome build, use default if not given} 23 | } 24 | \value{ 25 | a new data.frame with manifest attached 26 | } 27 | \description{ 28 | Annotate a data.frame using manifest 29 | } 30 | \examples{ 31 | \dontrun{ 32 | df <- data.frame(Probe_ID = c("cg00101675_BC21", "cg00116289_BC21")) 33 | sesameAnno_attachManifest(df) 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /man/mapFileSet.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fileSet.R 3 | \name{mapFileSet} 4 | \alias{mapFileSet} 5 | \title{Deposit data of one sample to a fileSet (and hence to file)} 6 | \usage{ 7 | mapFileSet(fset, sample, named_values) 8 | } 9 | \arguments{ 10 | \item{fset}{a sesame::fileSet, as obtained via readFileSet} 11 | 12 | \item{sample}{sample name as a string} 13 | 14 | \item{named_values}{value vector named by probes} 15 | } 16 | \value{ 17 | a sesame::fileSet 18 | } 19 | \description{ 20 | Deposit data of one sample to a fileSet (and hence to file) 21 | } 22 | \examples{ 23 | 24 | ## create two samples 25 | fset <- initFileSet('mybetas2', 'HM27', c('s1','s2')) 26 | 27 | ## a hypothetical numeric array (can be beta values, intensities etc) 28 | hypothetical <- setNames(runif(fset$n), fset$probes) 29 | 30 | ## map the numeric to file 31 | mapFileSet(fset, 's1', hypothetical) 32 | 33 | ## get data 34 | sliceFileSet(fset, 's1', 'cg00000292') 35 | 36 | } 37 | -------------------------------------------------------------------------------- /man/meanIntensity.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sesame.R 3 | \name{meanIntensity} 4 | \alias{meanIntensity} 5 | \title{Whole-dataset-wide Mean Intensity} 6 | \usage{ 7 | meanIntensity(sdf, mask = TRUE) 8 | } 9 | \arguments{ 10 | \item{sdf}{a \code{SigDF}} 11 | 12 | \item{mask}{whether to mask probes using mask column} 13 | } 14 | \value{ 15 | mean of all intensities 16 | } 17 | \description{ 18 | The function takes one single \code{SigDF} and computes mean 19 | intensity of all the in-band measurements. This includes all Type-I 20 | in-band measurements and all Type-II probe measurements. Both methylated 21 | and unmethylated alleles are considered. This function outputs a single 22 | numeric for the mean. 23 | } 24 | \details{ 25 | Note: mean in this case is more informative than median because 26 | methylation level is mostly bimodal. 27 | } 28 | \examples{ 29 | sesameDataCache() # if not done yet 30 | sdf <- sesameDataGet('EPIC.1.SigDF') 31 | meanIntensity(sdf) 32 | } 33 | -------------------------------------------------------------------------------- /man/reIdentify.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/deidentify.R 3 | \name{reIdentify} 4 | \alias{reIdentify} 5 | \title{Re-identify IDATs by restoring scrambled SNP intensities} 6 | \usage{ 7 | reIdentify(path, out_path = NULL, snps = NULL, mft = NULL) 8 | } 9 | \arguments{ 10 | \item{path}{input IDAT file} 11 | 12 | \item{out_path}{output IDAT file} 13 | 14 | \item{snps}{SNP definition, if not given, default to SNP probes} 15 | 16 | \item{mft}{sesame-compatible manifest if non-standard} 17 | } 18 | \value{ 19 | NULL, changes made to the IDAT files 20 | } 21 | \description{ 22 | This requries setting a seed with a secret number that was used to 23 | de-identify the IDAT (see example). 24 | This requires a secret number that was used to de-idenitfy the IDAT 25 | } 26 | \examples{ 27 | 28 | temp_out <- tempfile("test") 29 | 30 | set.seed(123) 31 | reIdentify(system.file( 32 | "extdata", "4207113116_A_Grn.idat", package = "sesameData"), temp_out) 33 | unlink(temp_out) 34 | } 35 | -------------------------------------------------------------------------------- /man/dyeBiasCorrMostBalanced.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dye_bias.R 3 | \name{dyeBiasCorrMostBalanced} 4 | \alias{dyeBiasCorrMostBalanced} 5 | \title{Correct dye bias using most balanced sample as the reference} 6 | \usage{ 7 | dyeBiasCorrMostBalanced(sdfs) 8 | } 9 | \arguments{ 10 | \item{sdfs}{a list of normalized \code{SigDF}s} 11 | } 12 | \value{ 13 | a list of normalized \code{SigDF}s 14 | } 15 | \description{ 16 | The function chose the reference signal level from a list of \code{SigDF}. 17 | The chosen sample has the smallest difference in Grn and Red signal 18 | intensity as measured using the normalization control probes. In practice, 19 | it doesn't matter which sample is chosen as long as the reference level 20 | does not deviate much. The function returns a list of \code{SigDF}s with 21 | dye bias corrected. 22 | } 23 | \examples{ 24 | sesameDataCache() # if not done yet 25 | sdfs <- sesameDataGet('HM450.10.SigDF')[1:2] 26 | sdfs.db <- dyeBiasCorrMostBalanced(sdfs) 27 | } 28 | -------------------------------------------------------------------------------- /man/imputeBetas.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/impute.R 3 | \name{imputeBetas} 4 | \alias{imputeBetas} 5 | \title{Impute of missing data of specific platform} 6 | \usage{ 7 | imputeBetas( 8 | betas, 9 | platform = NULL, 10 | BPPARAM = SerialParam(), 11 | celltype = NULL, 12 | sd_max = 999 13 | ) 14 | } 15 | \arguments{ 16 | \item{betas}{named vector of beta values} 17 | 18 | \item{platform}{platform} 19 | 20 | \item{BPPARAM}{use MulticoreParam(n) for parallel processing} 21 | 22 | \item{celltype}{celltype/tissue context of imputation, if not given, will 23 | use nearest neighbor to determine.} 24 | 25 | \item{sd_max}{maximum standard deviation in imputation confidence} 26 | } 27 | \value{ 28 | imputed data, vector or matrix 29 | } 30 | \description{ 31 | Impute of missing data of specific platform 32 | } 33 | \examples{ 34 | betas = openSesame(sesameDataGet("EPIC.1.SigDF")) 35 | sum(is.na(betas)) 36 | betas2 = imputeBetas(betas, "EPIC") 37 | sum(is.na(betas2)) 38 | 39 | } 40 | -------------------------------------------------------------------------------- /man/imputeBetasByGenomicNeighbors.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/impute.R 3 | \name{imputeBetasByGenomicNeighbors} 4 | \alias{imputeBetasByGenomicNeighbors} 5 | \title{Impute missing data based on genomic neighbors.} 6 | \usage{ 7 | imputeBetasByGenomicNeighbors( 8 | betas, 9 | platform = NULL, 10 | BPPARAM = SerialParam(), 11 | max_neighbors = 3, 12 | max_dist = 10000 13 | ) 14 | } 15 | \arguments{ 16 | \item{betas}{named vector of beta values} 17 | 18 | \item{platform}{platform} 19 | 20 | \item{BPPARAM}{use MulticoreParam(n) for parallel processing} 21 | 22 | \item{max_neighbors}{maximum neighbors to use for dense regions} 23 | 24 | \item{max_dist}{maximum distance to count as neighbor} 25 | } 26 | \value{ 27 | imputed data, vector or matrix 28 | } 29 | \description{ 30 | Impute missing data based on genomic neighbors. 31 | } 32 | \examples{ 33 | betas = openSesame(sesameDataGet("EPICv2.8.SigDF")[[1]]) 34 | sum(is.na(betas)) 35 | betas2 = imputeBetasByGenomicNeighbors(betas, "EPICv2") 36 | sum(is.na(betas2)) 37 | 38 | } 39 | -------------------------------------------------------------------------------- /man/sesameAnno_buildManifestGRanges.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sesameAnno.R 3 | \name{sesameAnno_buildManifestGRanges} 4 | \alias{sesameAnno_buildManifestGRanges} 5 | \title{Build manifest GRanges from tsv} 6 | \usage{ 7 | sesameAnno_buildManifestGRanges( 8 | tsv, 9 | genome = NULL, 10 | decoy = FALSE, 11 | columns = NULL 12 | ) 13 | } 14 | \arguments{ 15 | \item{tsv}{a file path, a platform (e.g., EPIC), or 16 | a tibble/data.frame object} 17 | 18 | \item{genome}{a genome string, e.g., hg38, mm10} 19 | 20 | \item{decoy}{consider decoy sequence in chromosome order} 21 | 22 | \item{columns}{the columns to include in the GRanges} 23 | } 24 | \value{ 25 | GRanges 26 | } 27 | \description{ 28 | manifest tsv files can be downloaded from 29 | http://zwdzwd.github.io/InfiniumAnnotation 30 | } 31 | \examples{ 32 | \dontrun{ 33 | tsv = sesameAnno_download("HM450.hg38.manifest.tsv.gz") 34 | gr <- sesameAnno_buildManifestGRanges(tsv) 35 | ## direct access 36 | gr <- sesameAnno_buildManifestGRanges("HM450.hg38.manifest") 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /man/betasCollapseToPfx.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sesame.R 3 | \name{betasCollapseToPfx} 4 | \alias{betasCollapseToPfx} 5 | \title{Collapse betas by averagng probes with common probe ID prefix} 6 | \usage{ 7 | betasCollapseToPfx(betas, BPPARAM = SerialParam()) 8 | } 9 | \arguments{ 10 | \item{betas}{either a named numeric vector or a numeric matrix 11 | (row: probes, column: samples)} 12 | 13 | \item{BPPARAM}{use MulticoreParam(n) for parallel processing} 14 | } 15 | \value{ 16 | either named numeric vector or a numeric matrix of collapsed 17 | beta value matrix 18 | } 19 | \description{ 20 | Collapse betas by averagng probes with common probe ID prefix 21 | } 22 | \examples{ 23 | 24 | ## input is a matrix 25 | m <- matrix(seq(0,1,length.out=9), nrow=3) 26 | rownames(m) <- c("cg00004963_TC21", "cg00004963_TC22", "cg00004747_TC21") 27 | colnames(m) <- c("A","B","C") 28 | betasCollapseToPfx(m) 29 | 30 | ## input is a vector 31 | m <- setNames(seq(0,1,length.out=3), 32 | c("cg00004963_TC21", "cg00004963_TC22", "cg00004747_TC21")) 33 | betasCollapseToPfx(m) 34 | } 35 | -------------------------------------------------------------------------------- /man/chipAddressToSignal.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sesame.R 3 | \name{chipAddressToSignal} 4 | \alias{chipAddressToSignal} 5 | \title{Lookup address in one sample} 6 | \usage{ 7 | chipAddressToSignal(dm, mft, min_beads = NULL) 8 | } 9 | \arguments{ 10 | \item{dm}{data frame in chip address, 2 columns: cy3/Grn and cy5/Red} 11 | 12 | \item{mft}{a data frame with columns Probe_ID, M, U and col} 13 | 14 | \item{min_beads}{minimum bead counts, otherwise masked} 15 | } 16 | \value{ 17 | a SigDF, indexed by probe ID address 18 | } 19 | \description{ 20 | Lookup address and transform address to probe 21 | } 22 | \details{ 23 | Translate data in chip address to probe address. 24 | Type I probes can be separated into Red and Grn channels. The 25 | methylated allele and unmethylated allele are at different 26 | addresses. For type II probes methylation allele and unmethylated allele are 27 | at the same address. Grn channel is for methylated allele and Red channel is 28 | for unmethylated allele. The out-of-band signals are type I probes measured 29 | using the other channel. 30 | } 31 | -------------------------------------------------------------------------------- /R/palgen.R: -------------------------------------------------------------------------------- 1 | 2 | #' Generate some additional color palettes 3 | #' 4 | #' @param pal a string for adhoc pals 5 | #' @param n the number of colors for interpolation 6 | #' @param space rgb or Lab 7 | #' @return a palette-generating function 8 | #' @examples 9 | #' library(pals) 10 | #' pal.bands(palgen("whiteturbo")) 11 | #' @export 12 | palgen <- function(pal, n=150, space = "Lab") { 13 | 14 | requireNamespace("pals") 15 | adhoc_pals <- list( 16 | whiteturbo = c("white","white",pals::turbo(10)[seq(2,10)]), 17 | whitejet = c("white","white","lightblue", 18 | "blue","green","yellow","orange","red","darkred"), 19 | whiteblack = c("white", "black")) 20 | 21 | if (length(pal) == 1 && is.character(pal) && 22 | (pal %in% names(adhoc_pals))) { 23 | pal <- adhoc_pals[[pal]] 24 | } 25 | 26 | if (is.character(pal)) { 27 | requireNamespace("grDevices") 28 | grDevices::colorRampPalette(pal, space = space) 29 | } else if (is.function(pal)) { 30 | pal 31 | } else { 32 | stop("Please provide the right pal format.") 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /man/getRefSet.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cell_composition.R 3 | \name{getRefSet} 4 | \alias{getRefSet} 5 | \title{Retrieve reference set} 6 | \usage{ 7 | getRefSet(cells = NULL, platform = c("EPIC", "HM450")) 8 | } 9 | \arguments{ 10 | \item{cells}{reference cell types} 11 | 12 | \item{platform}{EPIC or HM450} 13 | } 14 | \value{ 15 | g, a 0/1 matrix with probes on the rows and specified cell types 16 | on the columns. 17 | } 18 | \description{ 19 | The function retrieves the curated reference DNA methylation status for 20 | a set of cell type names under the Infinium platform. Supported cell types 21 | include "CD4T", "CD19B", "CD56NK", "CD14Monocytes", "granulocytes", "scFat", 22 | "skin" etc. See package sesameData for more details. The function output a 23 | matrix with probes on the rows and specified cell types on the columns. 24 | 0 suggests unmethylation and 1 suggests methylation. Intermediate 25 | methylation and nonclusive calls are left with NA. 26 | } 27 | \examples{ 28 | 29 | betas = getRefSet('CD4T', platform='HM450') 30 | sesameDataGet_resetEnv() 31 | 32 | } 33 | -------------------------------------------------------------------------------- /man/noob.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/background.R 3 | \name{noob} 4 | \alias{noob} 5 | \title{Noob background subtraction} 6 | \usage{ 7 | noob(sdf, combine.neg = TRUE, offset = 15) 8 | } 9 | \arguments{ 10 | \item{sdf}{a \code{SigDF}} 11 | 12 | \item{combine.neg}{whether to combine negative control probe.} 13 | 14 | \item{offset}{offset} 15 | } 16 | \value{ 17 | a new \code{SigDF} with noob background correction 18 | } 19 | \description{ 20 | The function takes a \code{SigDF} and returns a modified \code{SigDF} 21 | with background subtracted. Background was modelled in a normal distribution 22 | and true signal in an exponential distribution. The Norm-Exp deconvolution 23 | is parameterized using Out-Of-Band (oob) probes. For species-specific 24 | processing, one should call inferSpecies on SigDF first. Multi-mapping 25 | probes are excluded. 26 | } 27 | \details{ 28 | When combine.neg = TRUE, background will be parameterized by both 29 | negative control and out-of-band probes. 30 | } 31 | \examples{ 32 | sdf <- sesameDataGet('EPIC.1.SigDF') 33 | sdf.nb <- noob(sdf) 34 | } 35 | -------------------------------------------------------------------------------- /man/getBetas.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sesame.R 3 | \name{getBetas} 4 | \alias{getBetas} 5 | \title{Get beta Values} 6 | \usage{ 7 | getBetas( 8 | sdf, 9 | mask = TRUE, 10 | sum.TypeI = FALSE, 11 | collapseToPfx = FALSE, 12 | collapseMethod = c("mean", "minPval") 13 | ) 14 | } 15 | \arguments{ 16 | \item{sdf}{\code{SigDF}} 17 | 18 | \item{mask}{whether to use mask} 19 | 20 | \item{sum.TypeI}{whether to sum type I channels} 21 | 22 | \item{collapseToPfx}{remove replicate to prefix (e.g., cg number) and 23 | remove the suffix} 24 | 25 | \item{collapseMethod}{mean or minPval} 26 | } 27 | \value{ 28 | a numeric vector, beta values 29 | } 30 | \description{ 31 | sum.typeI is used for rescuing beta values on 32 | Color-Channel-Switching CCS probes. The function takes a \code{SigDF} 33 | and returns beta value except that Type-I in-band signal and out-of-band 34 | signal are combined. This prevents color-channel switching due to SNPs. 35 | } 36 | \examples{ 37 | sesameDataCache() # if not done yet 38 | sdf <- sesameDataGet('EPIC.1.SigDF') 39 | betas <- getBetas(sdf) 40 | } 41 | -------------------------------------------------------------------------------- /man/compareMouseTissueReference.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tissue.R 3 | \name{compareMouseTissueReference} 4 | \alias{compareMouseTissueReference} 5 | \title{Compare mouse array data with mouse tissue references} 6 | \usage{ 7 | compareMouseTissueReference( 8 | betas = NULL, 9 | ref = NULL, 10 | color = "blueYellow", 11 | query_width = 0.3 12 | ) 13 | } 14 | \arguments{ 15 | \item{betas}{matrix of betas for the target sample 16 | This argument is optional. If not given, only the reference will be shown.} 17 | 18 | \item{ref}{the reference beta values in SummarizedExperiment. 19 | This argument is optional. If not given, the reference will be downloaded 20 | from the sesameData package.} 21 | 22 | \item{color}{either blueYellow or fullJet} 23 | 24 | \item{query_width}{the width of the query beta value matrix} 25 | } 26 | \value{ 27 | grid object that contrast the target sample with 28 | pre-built mouse tissue reference 29 | } 30 | \description{ 31 | Compare mouse array data with mouse tissue references 32 | } 33 | \examples{ 34 | cat("Deprecated, see compareReference") 35 | } 36 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2024 Wanding Zhou 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /man/qualityMask.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mask.R 3 | \name{qualityMask} 4 | \alias{qualityMask} 5 | \title{Mask beta values by design quality} 6 | \usage{ 7 | qualityMask(sdf, mask_names = "recommended", verbose = TRUE) 8 | } 9 | \arguments{ 10 | \item{sdf}{a \code{SigDF} object} 11 | 12 | \item{mask_names}{a vector of masking groups, see listAvailableMasks 13 | use "recommended" for recommended masking. One can also combine 14 | "recommended" with other masking groups by specifying a vector, e.g., 15 | c("recommended", "M_mapping")} 16 | 17 | \item{verbose}{be verbose} 18 | } 19 | \value{ 20 | a filtered \code{SigDF} 21 | } 22 | \description{ 23 | Currently quality masking only supports three platforms 24 | see also listAvailableMasks(sdfPlatform(sdf)) 25 | } 26 | \examples{ 27 | sesameDataCache() # if not done yet 28 | sdf <- sesameDataGet('EPIC.1.SigDF') 29 | sum(sdf$mask) 30 | sum(qualityMask(sdf)$mask) 31 | sum(qualityMask(sdf, mask_names = NULL)$mask) 32 | 33 | ## list available masks, the dbname column 34 | listAvailableMasks(sdfPlatform(sdf)) 35 | listAvailableMasks("EPICv2") 36 | 37 | } 38 | -------------------------------------------------------------------------------- /man/parseGEOsignalMU.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/GEO.R 3 | \name{parseGEOsignalMU} 4 | \alias{parseGEOsignalMU} 5 | \title{Convert signal M and U to SigDF} 6 | \usage{ 7 | parseGEOsignalMU( 8 | sigM, 9 | sigU, 10 | Probe_IDs, 11 | oob.mean = 500, 12 | oob.sd = 300, 13 | platform = NULL 14 | ) 15 | } 16 | \arguments{ 17 | \item{sigM}{methylated signal, a numeric vector} 18 | 19 | \item{sigU}{unmethylated signal, a numirc vector} 20 | 21 | \item{Probe_IDs}{probe ID vector} 22 | 23 | \item{oob.mean}{assumed mean for out-of-band signals} 24 | 25 | \item{oob.sd}{assumed standard deviation for out-of-band signals} 26 | 27 | \item{platform}{platform code, will infer if not given} 28 | } 29 | \value{ 30 | SigDF 31 | } 32 | \description{ 33 | This overcomes the issue of missing IDAT files. However, 34 | out-of-band signals will be missing or faked (sampled from a 35 | normal distribution). 36 | } 37 | \examples{ 38 | sigM <- c(11436, 6068, 2864) 39 | sigU <- c(1476, 804, 393) 40 | probes <- c("cg07881041", "cg23229610", "cg03513874") 41 | sdf <- parseGEOsignalMU(sigM, sigU, probes, platform = "EPIC") 42 | } 43 | -------------------------------------------------------------------------------- /man/visualizeSegments.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cnv.R 3 | \name{visualizeSegments} 4 | \alias{visualizeSegments} 5 | \title{Visualize segments} 6 | \usage{ 7 | visualizeSegments(seg, to.plot = NULL, genes.to.label = NULL) 8 | } 9 | \arguments{ 10 | \item{seg}{a \code{CNSegment} object} 11 | 12 | \item{to.plot}{chromosome to plot (by default plot all chromosomes)} 13 | 14 | \item{genes.to.label}{gene(s) to label} 15 | } 16 | \value{ 17 | plot graphics 18 | } 19 | \description{ 20 | The function takes a \code{CNSegment} object obtained from cnSegmentation 21 | and plot the bin signals and segments (as horizontal lines). 22 | } 23 | \details{ 24 | require ggplot2, scales 25 | } 26 | \examples{ 27 | 28 | sesameDataCache() 29 | \dontrun{ 30 | sdfs <- sesameDataGet('EPICv2.8.SigDF') 31 | sdf <- sdfs[["K562_206909630040_R01C01"]] 32 | seg <- cnSegmentation(sdf) 33 | seg <- cnSegmentation(sdf, return.probe.signals=TRUE) 34 | visualizeSegments(seg) 35 | visualizeSegments(seg, to.plot=c("chr9","chr22")) 36 | visualizeSegments(seg, genes.to.label=c("ABL1","BCR")) 37 | } 38 | 39 | sesameDataGet_resetEnv() 40 | 41 | } 42 | -------------------------------------------------------------------------------- /man/inferStrain.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/strain.R 3 | \name{inferStrain} 4 | \alias{inferStrain} 5 | \title{Infer strain information for mouse array} 6 | \usage{ 7 | inferStrain( 8 | sdf, 9 | return.strain = FALSE, 10 | return.probability = FALSE, 11 | return.pval = FALSE, 12 | min_frac_dt = 0.2, 13 | verbose = FALSE 14 | ) 15 | } 16 | \arguments{ 17 | \item{sdf}{SigDF} 18 | 19 | \item{return.strain}{return strain name} 20 | 21 | \item{return.probability}{return probability vector for all strains} 22 | 23 | \item{return.pval}{return p-value} 24 | 25 | \item{min_frac_dt}{minimum fraction of detected signal (DEFAULT: 0.2) 26 | otherwise, we give up strain inference and return NA.} 27 | 28 | \item{verbose}{print more messages} 29 | } 30 | \value{ 31 | a list of best guess, p-value of the best guess 32 | and the probabilities of all strains 33 | } 34 | \description{ 35 | Infer strain information for mouse array 36 | } 37 | \examples{ 38 | sesameDataCache() # if not done yet 39 | sdf <- sesameDataGet('MM285.1.SigDF') 40 | inferStrain(sdf, return.strain = TRUE) 41 | sdf.strain <- inferStrain(sdf) 42 | } 43 | -------------------------------------------------------------------------------- /man/ELBAR.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/detection.R 3 | \name{ELBAR} 4 | \alias{ELBAR} 5 | \title{ELiminate BAckground-dominated Reading (ELBAR)} 6 | \usage{ 7 | ELBAR( 8 | sdf, 9 | return.pval = FALSE, 10 | pval.threshold = 0.05, 11 | margin = 0.05, 12 | capMU = 3000, 13 | delta.beta = 0.2, 14 | n.windows = 500 15 | ) 16 | } 17 | \arguments{ 18 | \item{sdf}{a \code{SigDF}} 19 | 20 | \item{return.pval}{whether to return p-values, instead of a SigDF} 21 | 22 | \item{pval.threshold}{minimum p-value to mask} 23 | 24 | \item{margin}{the percentile margin to define envelope, the smaller 25 | the value the more aggressive the masking.} 26 | 27 | \item{capMU}{the maximum M+U to search for intermediate betas} 28 | 29 | \item{delta.beta}{maximum beta value change from 30 | sheer background-dominated readings} 31 | 32 | \item{n.windows}{number of windows for smoothing} 33 | } 34 | \value{ 35 | a \code{SigDF} with mask added 36 | } 37 | \description{ 38 | ELiminate BAckground-dominated Reading (ELBAR) 39 | } 40 | \examples{ 41 | sdf <- sesameDataGet("EPIC.1.SigDF") 42 | sum(sdf$mask) 43 | sum(ELBAR(sdf)$mask) 44 | } 45 | -------------------------------------------------------------------------------- /man/inferInfiniumIChannel.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/channel_inference.R 3 | \name{inferInfiniumIChannel} 4 | \alias{inferInfiniumIChannel} 5 | \title{Infer and reset color channel for Type-I probes instead of 6 | using what is specified in manifest. The results are stored to 7 | sdf@extra$IGG and sdf@extra$IRR slot.} 8 | \usage{ 9 | inferInfiniumIChannel( 10 | sdf, 11 | switch_failed = FALSE, 12 | mask_failed = FALSE, 13 | verbose = FALSE, 14 | summary = FALSE 15 | ) 16 | } 17 | \arguments{ 18 | \item{sdf}{a \code{SigDF}} 19 | 20 | \item{switch_failed}{whether to switch failed probes (default to FALSE)} 21 | 22 | \item{mask_failed}{whether to mask failed probes (default to FALSE)} 23 | 24 | \item{verbose}{whether to print correction summary} 25 | 26 | \item{summary}{return summarized numbers only.} 27 | } 28 | \value{ 29 | a \code{SigDF}, or numerics if summary == TRUE 30 | } 31 | \description{ 32 | IGG => Type-I green that is inferred to be green 33 | IRR => Type-I red that is inferred to be red 34 | } 35 | \examples{ 36 | 37 | sdf <- sesameDataGet('EPIC.1.SigDF') 38 | inferInfiniumIChannel(sdf) 39 | 40 | } 41 | -------------------------------------------------------------------------------- /man/sesameQC_calcStats.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/QC.R 3 | \name{sesameQC_calcStats} 4 | \alias{sesameQC_calcStats} 5 | \title{Calculate QC statistics} 6 | \usage{ 7 | sesameQC_calcStats(sdf, funs = NULL) 8 | } 9 | \arguments{ 10 | \item{sdf}{a SigDF object} 11 | 12 | \item{funs}{a sesameQC_calcStats_* function or a list of them 13 | default to all functions. One can also use a string such as 14 | "detection" or c("detection", "intensity") to reduce typing} 15 | } 16 | \value{ 17 | a sesameQC object 18 | } 19 | \description{ 20 | It is a function to call one or multiple 21 | sesameQC_calcStats functions 22 | } 23 | \details{ 24 | currently supporting: detection, intensity, numProbes, channel, 25 | dyeBias, betas 26 | } 27 | \examples{ 28 | sesameDataCache() # if not done yet 29 | sdf <- sesameDataGet('EPIC.1.SigDF') 30 | sesameQC_calcStats(sdf) 31 | sesameQC_calcStats(sdf, "detection") 32 | sesameQC_calcStats(sdf, c("detection", "channel")) 33 | ## retrieve stats as a list 34 | sesameQC_getStats(sesameQC_calcStats(sdf, "detection")) 35 | ## or as data frames 36 | as.data.frame(sesameQC_calcStats(sdf, "detection")) 37 | 38 | } 39 | -------------------------------------------------------------------------------- /man/deidentify.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/deidentify.R 3 | \name{deIdentify} 4 | \alias{deIdentify} 5 | \title{De-identify IDATs by removing SNP probes} 6 | \usage{ 7 | deIdentify(path, out_path = NULL, snps = NULL, mft = NULL, randomize = FALSE) 8 | } 9 | \arguments{ 10 | \item{path}{input IDAT file} 11 | 12 | \item{out_path}{output IDAT file} 13 | 14 | \item{snps}{SNP definition, if not given, default to SNP probes} 15 | 16 | \item{mft}{sesame-compatible manifest if non-standard} 17 | 18 | \item{randomize}{whether to randomize the SNPs. if TRUE, 19 | randomize the signal intensities. one can use set.seed to 20 | reidentify the IDAT with the secret seed (see examples). 21 | If FALSE, this sets all SNP intensities to zero.} 22 | } 23 | \value{ 24 | NULL, changes made to the IDAT files 25 | } 26 | \description{ 27 | Mask SNP probe intensity mean by zero. 28 | } 29 | \examples{ 30 | 31 | my_secret <- 13412084 32 | set.seed(my_secret) 33 | temp_out <- tempfile("test") 34 | deIdentify(system.file( 35 | "extdata", "4207113116_A_Grn.idat", package = "sesameData"), 36 | temp_out, randomize = TRUE) 37 | unlink(temp_out) 38 | } 39 | -------------------------------------------------------------------------------- /man/dyeBiasNL.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dye_bias.R 3 | \name{dyeBiasNL} 4 | \alias{dyeBiasNL} 5 | \alias{dyeBiasCorrTypeINorm} 6 | \title{Dye bias correction by matching green and red to mid point} 7 | \usage{ 8 | dyeBiasNL(sdf, mask = TRUE, verbose = FALSE) 9 | 10 | dyeBiasCorrTypeINorm(sdf, mask = TRUE, verbose = FALSE) 11 | } 12 | \arguments{ 13 | \item{sdf}{a \code{SigDF}} 14 | 15 | \item{mask}{include masked probes in Infinium-I probes. No big difference is 16 | noted in practice. More probes are generally better.} 17 | 18 | \item{verbose}{print more messages} 19 | } 20 | \value{ 21 | a \code{SigDF} after dye bias correction. 22 | } 23 | \description{ 24 | This function compares the Type-I Red probes and Type-I Grn probes and 25 | generates and mapping to correct signal of the two channels to the middle. 26 | The function takes one single \code{SigDF} and returns a \code{SigDF} 27 | with dye bias corrected. 28 | } 29 | \examples{ 30 | sesameDataCache() # if not done yet 31 | sdf <- sesameDataGet('EPIC.1.SigDF') 32 | sdf.db <- dyeBiasNL(sdf) 33 | sdf <- sesameDataGet('EPIC.1.SigDF') 34 | sdf <- dyeBiasCorrTypeINorm(sdf) 35 | } 36 | -------------------------------------------------------------------------------- /man/twoCompsEst2.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cell_composition.R 3 | \name{twoCompsEst2} 4 | \alias{twoCompsEst2} 5 | \title{Estimate the fraction of the 2nd component in a 2-component mixture} 6 | \usage{ 7 | twoCompsEst2( 8 | pop1, 9 | pop2, 10 | target, 11 | use.ave = TRUE, 12 | diff_1m2u = NULL, 13 | diff_1u2m = NULL 14 | ) 15 | } 16 | \arguments{ 17 | \item{pop1}{Reference methylation level matrix for population 1} 18 | 19 | \item{pop2}{Reference methylation level matrix for population 2} 20 | 21 | \item{target}{Target methylation level matrix to be analyzed} 22 | 23 | \item{use.ave}{use population average in selecting differentially 24 | methylated probes} 25 | 26 | \item{diff_1m2u}{A vector of differentially methylated probes (methylated 27 | in population 1 but unmethylated in population 2)} 28 | 29 | \item{diff_1u2m}{A vector of differentially methylated probes (unmethylated 30 | in population 1 but methylated in population 2)} 31 | } 32 | \value{ 33 | Estimate of the 2nd component in the 2-component mixture 34 | } 35 | \description{ 36 | Estimate the fraction of the 2nd component in a 2-component mixture 37 | } 38 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SeSAMe - SEnsible Step-wise Analysis of Methylation data 2 | 3 | [![last commit](https://img.shields.io/github/last-commit/zwdzwd/sesame.svg?style=flat-square)](https://github.com/zwdzwd/sesame/commits/master) 4 | 5 | SeSAMe is an R package for processing Infinium DNA methylation data. SeSAMe currently supports EPIC, HM450 and HM27 platforms and dynamically generated manifest. 6 | 7 | To install from Github, 8 | ```R 9 | BiocManager::install("zwdzwd/sesame") 10 | ``` 11 | 12 | See the package [Home Page on Bioconductor](https://bioconductor.org/packages/release/bioc/html/sesame.html) and the [Developmental Branch](https://bioconductor.org/packages/devel/bioc/html/sesame.html). 13 | 14 | It also has a depended [data package](https://github.com/zwdzwd/sesameData) for annotation and example data. 15 | 16 | ## Bugs 17 | 18 | Bug reports are appreciated. Register issues at the SeSAMe [issue tracker](http://github.com/zwdzwd/sesame/issues). 19 | 20 | 21 | ## About 22 | 23 | Please cite and reference [SeSAMe: reducing artifactual detection of DNA methylation by Infinium BeadChips in genomic deletions](https://doi.org/10.1093/nar/gky691) for more details. 24 | -------------------------------------------------------------------------------- /man/prepSesame.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/open.R 3 | \name{prepSesame} 4 | \alias{prepSesame} 5 | \title{Apply a chain of sesame preprocessing functions in an arbitrary order} 6 | \usage{ 7 | prepSesame(sdf, prep = "QCDPB", prep_args = NULL) 8 | } 9 | \arguments{ 10 | \item{sdf}{SigDF} 11 | 12 | \item{prep}{code that indicates preprocessing functions and their 13 | execution order (functions on the left is executed first).} 14 | 15 | \item{prep_args}{optional argument list to individual functions, e.g., 16 | prepSesame(sdf, prep_args=list(Q=list(mask_names = "design_issue"))) 17 | sets qualityMask(sdf, mask_names = "design_issue")} 18 | } 19 | \value{ 20 | SigDF 21 | } 22 | \description{ 23 | Notes on the order of operation: 24 | 1. qualityMask and inferSpecies should go before noob and pOOBAH, 25 | otherwise the background is too high because of Multi, 26 | uk and other probes 27 | 2. dyeBias correction needs to happen early 28 | 3. channel inference before dyebias 29 | 4. noob should happen last, pOOBAH before noob because noob modifies oob 30 | } 31 | \examples{ 32 | sdf <- sesameDataGet("MM285.1.SigDF") 33 | sdf1 <- prepSesame(sdf, "QCDPB") 34 | } 35 | -------------------------------------------------------------------------------- /man/sliceFileSet.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fileSet.R 3 | \name{sliceFileSet} 4 | \alias{sliceFileSet} 5 | \title{Slice a fileSet with samples and probes} 6 | \usage{ 7 | sliceFileSet(fset, samples = fset$samples, probes = fset$probes, memmax = 10^5) 8 | } 9 | \arguments{ 10 | \item{fset}{a sesame::fileSet, as obtained via readFileSet} 11 | 12 | \item{samples}{samples to query (default to all samples)} 13 | 14 | \item{probes}{probes to query (default to all probes)} 15 | 16 | \item{memmax}{maximum items to read from file to memory, to protect from 17 | accidental memory congestion.} 18 | } 19 | \value{ 20 | a numeric matrix of length(samples) columns and length(probes) rows 21 | } 22 | \description{ 23 | Slice a fileSet with samples and probes 24 | } 25 | \examples{ 26 | 27 | ## create two samples 28 | fset <- initFileSet('mybetas2', 'HM27', c('s1','s2')) 29 | 30 | ## a hypothetical numeric array (can be beta values, intensities etc) 31 | hypothetical <- setNames(runif(fset$n), fset$probes) 32 | 33 | ## map the numeric to file 34 | mapFileSet(fset, 's1', hypothetical) 35 | 36 | ## get data 37 | sliceFileSet(fset, 's1', 'cg00000292') 38 | 39 | } 40 | -------------------------------------------------------------------------------- /man/readIDATpair.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sesame.R 3 | \name{readIDATpair} 4 | \alias{readIDATpair} 5 | \title{Import a pair of IDATs from one sample} 6 | \usage{ 7 | readIDATpair( 8 | prefix.path, 9 | manifest = NULL, 10 | platform = "", 11 | min_beads = NULL, 12 | controls = NULL, 13 | verbose = FALSE 14 | ) 15 | } 16 | \arguments{ 17 | \item{prefix.path}{sample prefix without _Grn.idat and _Red.idat} 18 | 19 | \item{manifest}{optional design manifest file} 20 | 21 | \item{platform}{EPIC, HM450 and HM27 etc.} 22 | 23 | \item{min_beads}{minimum bead number, probes with R or G smaller than 24 | this threshold will be masked. If NULL, no filtering based on bead 25 | count will be applied.} 26 | 27 | \item{controls}{optional control probe manifest file} 28 | 29 | \item{verbose}{be verbose? (FALSE)} 30 | } 31 | \value{ 32 | a \code{SigDF} 33 | } 34 | \description{ 35 | The function takes a prefix string that are shared with _Grn.idat 36 | and _Red.idat. The function returns a \code{SigDF}. 37 | } 38 | \examples{ 39 | sdf <- readIDATpair(sub('_Grn.idat','',system.file( 40 | "extdata", "4207113116_A_Grn.idat", package = "sesameData"))) 41 | } 42 | -------------------------------------------------------------------------------- /man/searchIDATprefixes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sesame.R 3 | \name{searchIDATprefixes} 4 | \alias{searchIDATprefixes} 5 | \title{Identify IDATs from a directory} 6 | \usage{ 7 | searchIDATprefixes(dir.name, recursive = TRUE, use.basename = TRUE) 8 | } 9 | \arguments{ 10 | \item{dir.name}{the directory containing the IDAT files.} 11 | 12 | \item{recursive}{search IDAT files recursively} 13 | 14 | \item{use.basename}{basename of each IDAT path is used as sample name 15 | This won't work in rare situation where there are duplicate IDAT files.} 16 | } 17 | \value{ 18 | the IDAT prefixes (a vector of character strings). 19 | } 20 | \description{ 21 | The input is the directory name as a string. The function identifies all 22 | the IDAT files under the directory. The function returns a vector of such 23 | IDAT prefixes under the directory. 24 | } 25 | \examples{ 26 | ## only search what are directly under 27 | IDATprefixes <- searchIDATprefixes( 28 | system.file("extdata", "", package = "sesameData")) 29 | 30 | ## search files recursively is by default 31 | IDATprefixes <- searchIDATprefixes( 32 | system.file(package = "sesameData"), recursive=TRUE) 33 | } 34 | -------------------------------------------------------------------------------- /man/formatVCF.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/vcf.R 3 | \name{formatVCF} 4 | \alias{formatVCF} 5 | \title{Convert SNP from Infinium array to VCF file} 6 | \usage{ 7 | formatVCF(sdf, anno, vcf = NULL, genome = "hg38", verbose = FALSE) 8 | } 9 | \arguments{ 10 | \item{sdf}{SigDF} 11 | 12 | \item{anno}{SNP variant annotation, available at 13 | https://github.com/zhou-lab/InfiniumAnnotationV1/tree/main/Anno/EPIC 14 | EPIC.hg38.snp.tsv.gz} 15 | 16 | \item{vcf}{output VCF file path, if NULL output to console} 17 | 18 | \item{genome}{genome} 19 | 20 | \item{verbose}{print more messages} 21 | } 22 | \value{ 23 | VCF file. If vcf is NULL, a data.frame is output to 24 | console. The data.frame does not contain VCF headers. 25 | Note the output vcf is not sorted. 26 | } 27 | \description{ 28 | Convert SNP from Infinium array to VCF file 29 | } 30 | \examples{ 31 | sesameDataCacheAll() # if not done yet 32 | sdf <- sesameDataGet('EPIC.1.SigDF') 33 | 34 | \dontrun{ 35 | ## download anno from 36 | ## http://zwdzwd.github.io/InfiniumAnnotation 37 | ## output to console 38 | anno = read_tsv(sesameAnno_download("EPICv2.hg38.snp.tsv.gz")) 39 | head(formatVCF(sdf, anno)) 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /man/inferSex.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sex.R 3 | \name{inferSex} 4 | \alias{inferSex} 5 | \title{Infer sex.} 6 | \usage{ 7 | inferSex(betas, platform = NULL) 8 | } 9 | \arguments{ 10 | \item{betas}{DNA methylation beta} 11 | 12 | \item{platform}{EPICv2, EPIC, HM450, MM285, etc.} 13 | } 14 | \value{ 15 | Inferred sex of sample 16 | } 17 | \description{ 18 | We established our sex calling based on the CpGs hypermethylated in 19 | inactive X (XiH), CpGs hypomethylated in inactive X (XiL). 20 | } 21 | \details{ 22 | Note genotype abnormalities such as Dnmt genotype, 23 | XXY male (Klinefelter's), 24 | 45,X female (Turner's) can confuse the model sometimes. 25 | This function works on a single sample. 26 | } 27 | \examples{ 28 | 29 | ## EPICv2 input 30 | betas = openSesame(sesameDataGet("EPICv2.8.SigDF")[[1]]) 31 | inferSex(betas) 32 | 33 | \dontrun{ 34 | ## MM285 input 35 | betas = openSesame(sesameDataGet("MM285.1.SigDF")) 36 | inferSex(betas) 37 | 38 | ## EPIC input 39 | betas = openSesame(sesameDataGet('EPIC.1.SigDF')) 40 | inferSex(betas) 41 | 42 | ## HM450 input 43 | betas = openSesame(sesameDataGet("HM450.10.SigDF")[[1]]) 44 | inferSex(betas) 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /man/sesame-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sesame.R 3 | \docType{package} 4 | \name{sesame-package} 5 | \alias{sesame-package} 6 | \alias{sesame} 7 | \title{Analyze DNA methylation data} 8 | \value{ 9 | package 10 | } 11 | \description{ 12 | SEnsible and step-wise analysis of DNA methylation data 13 | } 14 | \details{ 15 | This package complements array functionalities that allow 16 | processing >10,000 samples in parallel on clusters. 17 | } 18 | \examples{ 19 | 20 | sdf <- readIDATpair(sub('_Grn.idat','',system.file( 21 | 'extdata','4207113116_A_Grn.idat',package='sesameData'))) 22 | 23 | ## The OpenSesame pipeline 24 | betas <- openSesame(sdf) 25 | 26 | } 27 | \references{ 28 | Zhou W, Triche TJ, Laird PW, Shen H (2018) 29 | } 30 | \seealso{ 31 | Useful links: 32 | \itemize{ 33 | \item \url{https://github.com/zwdzwd/sesame} 34 | \item Report bugs at \url{https://github.com/zwdzwd/sesame/issues} 35 | } 36 | 37 | } 38 | \author{ 39 | Wanding Zhou \email{Wanding.Zhou@vai.org}, 40 | Hui Shen \email{Hui.Shen@vai.org} 41 | Timothy J Triche Jr \email{Tim.Triche@vai.org} 42 | } 43 | \keyword{DNAMethylation} 44 | \keyword{Microarray} 45 | \keyword{QualityControl} 46 | -------------------------------------------------------------------------------- /man/createUCSCtrack.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/track.R 3 | \name{createUCSCtrack} 4 | \alias{createUCSCtrack} 5 | \title{Turn beta values into a UCSC browser track} 6 | \usage{ 7 | createUCSCtrack(betas, output = NULL, platform = "HM450", genome = "hg38") 8 | } 9 | \arguments{ 10 | \item{betas}{a named numeric vector} 11 | 12 | \item{output}{output file name} 13 | 14 | \item{platform}{HM450, EPIC etc.} 15 | 16 | \item{genome}{hg38, mm10, ..., will infer if not given. 17 | For additional mapping, download the GRanges object from 18 | http://zwdzwd.github.io/InfiniumAnnotation 19 | and provide the following argument 20 | ..., genome = sesameAnno_buildManifestGRanges("downloaded_file"),... 21 | to this function.} 22 | } 23 | \value{ 24 | when output is null, return a data.frame, otherwise NULL 25 | } 26 | \description{ 27 | Turn beta values into a UCSC browser track 28 | } 29 | \examples{ 30 | 31 | betas.tissue <- sesameDataGet('HM450.1.TCGA.PAAD')$betas 32 | ## add output to create an actual file 33 | df <- createUCSCtrack(betas.tissue) 34 | 35 | ## to convert to bigBed 36 | ## sort -k1,1 -k2,2n output.bed >output_sorted.bed 37 | ## bedToBigBed output_sorted.bed hg38.chrom output.bb 38 | } 39 | -------------------------------------------------------------------------------- /man/pOOBAH.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/detection.R 3 | \name{pOOBAH} 4 | \alias{pOOBAH} 5 | \title{Detection P-value based on ECDF of out-of-band signal} 6 | \usage{ 7 | pOOBAH( 8 | sdf, 9 | return.pval = FALSE, 10 | combine.neg = TRUE, 11 | pval.threshold = 0.05, 12 | verbose = FALSE 13 | ) 14 | } 15 | \arguments{ 16 | \item{sdf}{a \code{SigDF}} 17 | 18 | \item{return.pval}{whether to return p-values, instead of a 19 | masked \code{SigDF}} 20 | 21 | \item{combine.neg}{whether to combine negative control probes with 22 | the out-of-band probes in simulating the signal background} 23 | 24 | \item{pval.threshold}{minimum p-value to mask} 25 | 26 | \item{verbose}{print more messages} 27 | } 28 | \value{ 29 | a \code{SigDF}, or a p-value vector if return.pval is TRUE 30 | } 31 | \description{ 32 | aka pOOBAH (p-vals by Out-Of-Band Array Hybridization) 33 | } 34 | \details{ 35 | The function takes a \code{SigDF} as input, computes detection p-value 36 | using out-of-band probes empirical distribution and returns a new 37 | \code{SigDF} with an updated mask slot. 38 | } 39 | \examples{ 40 | sdf <- sesameDataGet("EPIC.1.SigDF") 41 | sum(sdf$mask) 42 | sum(pOOBAH(sdf)$mask) 43 | 44 | } 45 | -------------------------------------------------------------------------------- /man/sesameAnno_download.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sesameAnno.R 3 | \name{sesameAnno_download} 4 | \alias{sesameAnno_download} 5 | \title{Download SeSAMe annotation files} 6 | \usage{ 7 | sesameAnno_download(url, destfile = tempfile(basename(url))) 8 | } 9 | \arguments{ 10 | \item{url}{url or title of the annotation file} 11 | 12 | \item{destfile}{download to this file, a temp file if unspecified} 13 | } 14 | \value{ 15 | the path to downloaded file 16 | } 17 | \description{ 18 | see also 19 | http://zwdzwd.github.io/InfiniumAnnotation 20 | } 21 | \details{ 22 | This function acts similarly as sesameAnno_get except that it directly 23 | download files without invoking BiocFileCache. This is needed in some 24 | situation because BiocFileCache may change the file name and downstream 25 | program may depend on the correct file names. It also lets you download 26 | files in a cleaner way without routing through BiocFileCache 27 | } 28 | \examples{ 29 | 30 | \dontrun{ 31 | ## avoid testing as this function uses external host 32 | sesameAnno_download("Test/3999492009_R01C01_Grn.idat") 33 | sesameAnno_download("EPIC.hg38.manifest.tsv.gz") 34 | sesameAnno_download("EPIC.hg38.snp.tsv.gz") 35 | } 36 | 37 | } 38 | -------------------------------------------------------------------------------- /man/sesameQC_plotIntensVsBetas.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/QC.R 3 | \name{sesameQC_plotIntensVsBetas} 4 | \alias{sesameQC_plotIntensVsBetas} 5 | \title{Plot Total Signal Intensities vs Beta Values 6 | This plot is helpful in revealing the extent of signal background 7 | and dye bias.} 8 | \usage{ 9 | sesameQC_plotIntensVsBetas( 10 | sdf, 11 | mask = TRUE, 12 | use_max = FALSE, 13 | intens.range = c(5, 15), 14 | pal = "whiteturbo", 15 | ... 16 | ) 17 | } 18 | \arguments{ 19 | \item{sdf}{a \code{SigDF}} 20 | 21 | \item{mask}{whether to remove probes that are masked} 22 | 23 | \item{use_max}{to use max(M,U) or M+U} 24 | 25 | \item{intens.range}{plot range of signal intensity} 26 | 27 | \item{pal}{color palette, whiteturbo, whiteblack, whitejet} 28 | 29 | \item{...}{additional arguments to smoothScatter} 30 | } 31 | \value{ 32 | create a total signal intensity vs beta value plot 33 | } 34 | \description{ 35 | Plot Total Signal Intensities vs Beta Values 36 | This plot is helpful in revealing the extent of signal background 37 | and dye bias. 38 | } 39 | \examples{ 40 | sesameDataCache() # if not done yet 41 | sdf <- sesameDataGet('EPIC.1.SigDF') 42 | sesameQC_plotIntensVsBetas(sdf) 43 | } 44 | -------------------------------------------------------------------------------- /man/compareReference.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tissue.R 3 | \name{compareReference} 4 | \alias{compareReference} 5 | \title{Compare array data with references (e.g., tissue, cell types)} 6 | \usage{ 7 | compareReference( 8 | ref, 9 | betas = NULL, 10 | stop.points = NULL, 11 | query_width = 0.3, 12 | show_sample_names = FALSE 13 | ) 14 | } 15 | \arguments{ 16 | \item{ref}{the reference beta values in SummarizedExperiment. 17 | One can download them from the sesameData package. See examples.} 18 | 19 | \item{betas}{matrix of betas for the target sample 20 | This argument is optional. If not given, only the reference will be shown.} 21 | 22 | \item{stop.points}{stop points for the color palette. 23 | Default to blue, yellow.} 24 | 25 | \item{query_width}{the width of the query beta value matrix} 26 | 27 | \item{show_sample_names}{whether to show sample names (default: FALSE)} 28 | } 29 | \value{ 30 | grid object that contrast the target sample with 31 | references. 32 | } 33 | \description{ 34 | Compare array data with references (e.g., tissue, cell types) 35 | } 36 | \examples{ 37 | 38 | sesameDataCache() # if not done yet 39 | compareReference(sesameDataGet("MM285.tissueSignature")) 40 | sesameDataGet_resetEnv() 41 | 42 | } 43 | -------------------------------------------------------------------------------- /man/convertProbeID.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mLiftOver.R 3 | \name{convertProbeID} 4 | \alias{convertProbeID} 5 | \title{Convert Probe ID} 6 | \usage{ 7 | convertProbeID( 8 | x, 9 | target_platform, 10 | source_platform = NULL, 11 | mapping = NULL, 12 | target_uniq = TRUE, 13 | include_new = FALSE, 14 | include_old = FALSE, 15 | return_mapping = FALSE 16 | ) 17 | } 18 | \arguments{ 19 | \item{x}{source probe IDs} 20 | 21 | \item{target_platform}{the platform to take the data to} 22 | 23 | \item{source_platform}{optional source platform} 24 | 25 | \item{mapping}{a liftOver mapping file. Typically this file 26 | contains empirical evidence whether a probe mapping is reliable. 27 | If given, probe ID-based mapping will be skipped. This is to 28 | perform more stringent probe ID mapping.} 29 | 30 | \item{target_uniq}{whether the target Probe ID should be kept unique.} 31 | 32 | \item{include_new}{if true, include mapping of added probes} 33 | 34 | \item{include_old}{if true, include mapping of deleted probes} 35 | 36 | \item{return_mapping}{return mapping table, instead of the target IDs.} 37 | } 38 | \value{ 39 | mapped probe IDs, or mapping table if return_mapping = T 40 | } 41 | \description{ 42 | Convert Probe ID 43 | } 44 | -------------------------------------------------------------------------------- /man/assemble_plots.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/visualizeHelper.R 3 | \name{assemble_plots} 4 | \alias{assemble_plots} 5 | \title{assemble plots} 6 | \usage{ 7 | assemble_plots( 8 | betas, 9 | txns, 10 | probes, 11 | plt.txns, 12 | plt.mapLines, 13 | plt.cytoband, 14 | heat.height = NULL, 15 | mapLine.height = 0.2, 16 | show.probeNames = TRUE, 17 | show.samples.n = NULL, 18 | show.sampleNames = TRUE, 19 | sample.name.fontsize = 10, 20 | dmin = 0, 21 | dmax = 1 22 | ) 23 | } 24 | \arguments{ 25 | \item{betas}{beta value} 26 | 27 | \item{txns}{transcripts GRanges} 28 | 29 | \item{probes}{probe GRanges} 30 | 31 | \item{plt.txns}{transcripts plot objects} 32 | 33 | \item{plt.mapLines}{map line plot objects} 34 | 35 | \item{plt.cytoband}{cytoband plot objects} 36 | 37 | \item{heat.height}{heatmap height (auto inferred based on rows)} 38 | 39 | \item{mapLine.height}{height of the map lines} 40 | 41 | \item{show.probeNames}{whether to show probe names} 42 | 43 | \item{show.samples.n}{number of samples to show (default: all)} 44 | 45 | \item{show.sampleNames}{whether to show sample names} 46 | 47 | \item{sample.name.fontsize}{sample name font size} 48 | 49 | \item{dmin}{data min} 50 | 51 | \item{dmax}{data max} 52 | } 53 | \value{ 54 | a grid object 55 | } 56 | \description{ 57 | assemble plots 58 | } 59 | -------------------------------------------------------------------------------- /man/estimateLeukocyte.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cell_composition.R 3 | \name{estimateLeukocyte} 4 | \alias{estimateLeukocyte} 5 | \title{Estimate leukocyte fraction using a two-component model} 6 | \usage{ 7 | estimateLeukocyte( 8 | betas.tissue, 9 | betas.leuko = NULL, 10 | betas.tumor = NULL, 11 | platform = c("EPIC", "HM450", "HM27") 12 | ) 13 | } 14 | \arguments{ 15 | \item{betas.tissue}{tissue beta value matrix (#probes X #samples)} 16 | 17 | \item{betas.leuko}{leukocyte beta value matrix, 18 | if missing, use the SeSAMe default by infinium platform} 19 | 20 | \item{betas.tumor}{optional, tumor beta value matrix} 21 | 22 | \item{platform}{"HM450", "HM27" or "EPIC"} 23 | } 24 | \value{ 25 | leukocyte estimate, a numeric vector 26 | } 27 | \description{ 28 | The method assumes only two components in the mixture: the leukocyte 29 | component and the target tissue component. The function takes the beta 30 | values matrix of the target tissue and the beta value matrix of the 31 | leukocyte. Both matrices have probes on the row and samples on the column. 32 | Row names should have probe IDs from the platform. The function outputs 33 | a single numeric describing the fraction of leukocyte. 34 | } 35 | \examples{ 36 | 37 | betas.tissue <- sesameDataGet('HM450.1.TCGA.PAAD')$betas 38 | estimateLeukocyte(betas.tissue) 39 | sesameDataGet_resetEnv() 40 | 41 | } 42 | -------------------------------------------------------------------------------- /man/visualizeGene.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/visualize.R 3 | \name{visualizeGene} 4 | \alias{visualizeGene} 5 | \title{Visualize Gene} 6 | \usage{ 7 | visualizeGene( 8 | gene_name, 9 | betas, 10 | platform = NULL, 11 | genome = NULL, 12 | upstream = 2000, 13 | dwstream = 2000, 14 | ... 15 | ) 16 | } 17 | \arguments{ 18 | \item{gene_name}{gene name} 19 | 20 | \item{betas}{beta value matrix (row: probes, column: samples)} 21 | 22 | \item{platform}{HM450, EPIC, or MM285 (default)} 23 | 24 | \item{genome}{hg19, hg38, or mm10 (default)} 25 | 26 | \item{upstream}{distance to extend upstream} 27 | 28 | \item{dwstream}{distance to extend downstream} 29 | 30 | \item{...}{additional options, see visualizeRegion, assemble_plots} 31 | } 32 | \value{ 33 | None 34 | } 35 | \description{ 36 | Visualize the beta value in heatmaps for a given gene. The function takes 37 | a gene name which is taken from the UCSC refGene. It searches all the 38 | transcripts for the given gene and optionally extend the span by certain 39 | number of base pairs. The function also takes a beta value matrix with 40 | sample names on the columns and probe names on the rows. The function can 41 | also work on different genome builds (default to hg38, can be hg19). 42 | } 43 | \examples{ 44 | betas <- sesameDataGet('HM450.76.TCGA.matched')$betas 45 | visualizeGene('ADA', betas, 'HM450') 46 | } 47 | -------------------------------------------------------------------------------- /man/inferSpecies.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/species.R 3 | \name{inferSpecies} 4 | \alias{inferSpecies} 5 | \title{Infer Species} 6 | \usage{ 7 | inferSpecies( 8 | sdf, 9 | topN = 1000, 10 | threshold.pos = 0.01, 11 | threshold.neg = 0.1, 12 | return.auc = FALSE, 13 | return.species = FALSE, 14 | verbose = FALSE 15 | ) 16 | } 17 | \arguments{ 18 | \item{sdf}{a \code{SigDF}} 19 | 20 | \item{topN}{Top n positive and negative probes used to infer species. 21 | increase this number can sometimes improve accuracy (DEFAULT: 1000)} 22 | 23 | \item{threshold.pos}{pvalue < threshold.pos are considered positive 24 | (default: 0.01).} 25 | 26 | \item{threshold.neg}{pvalue > threshold.neg are considered negative 27 | (default: 0.2).} 28 | 29 | \item{return.auc}{return AUC calculated, override return.species} 30 | 31 | \item{return.species}{return a string to represent species} 32 | 33 | \item{verbose}{print more messaeges} 34 | } 35 | \value{ 36 | a SigDF 37 | } 38 | \description{ 39 | We infer species based on probes pvalues and alignment score. 40 | AUC was calculated for each specie, y_true is 1 or 0 41 | for pval < threshold.pos or pval > threshold.neg, respeceively, 42 | } 43 | \examples{ 44 | sdf <- sesameDataGet("MM285.1.SigDF") 45 | sdf <- inferSpecies(sdf) 46 | 47 | ## all available species 48 | all_species <- names(sesameDataGet(sprintf( 49 | "\%s.addressSpecies", sdfPlatform(sdf)))$species) 50 | 51 | } 52 | -------------------------------------------------------------------------------- /man/bisConversionControl.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sesame.R 3 | \name{bisConversionControl} 4 | \alias{bisConversionControl} 5 | \title{Compute internal bisulfite conversion control} 6 | \usage{ 7 | bisConversionControl(sdf, extR = NULL, extA = NULL, verbose = FALSE) 8 | } 9 | \arguments{ 10 | \item{sdf}{a SigDF} 11 | 12 | \item{extR}{a vector of probe IDs for Infinium-I probes that extend to 13 | converted A} 14 | 15 | \item{extA}{a vector of probe IDs for Infinium-I probes that extend to 16 | original A} 17 | 18 | \item{verbose}{print more messages} 19 | } 20 | \value{ 21 | GCT score (the higher, the more incomplete conversion) 22 | } 23 | \description{ 24 | Compute GCT score for internal bisulfite conversion control. The function 25 | takes a \code{SigSet} as input. The higher the GCT score, the more likely 26 | the incomplete conversion. 27 | } 28 | \examples{ 29 | sesameDataCache() # if not done yet 30 | sdf <- sesameDataGet('EPIC.1.SigDF') 31 | bisConversionControl(sdf) 32 | 33 | ## For more recent platforms like EPICv2, MSA: 34 | ## One need extR and extA of other arrays using the sesameAnno 35 | \dontrun{ 36 | mft = sesameAnno_buildManifestGRanges(sprintf( 37 | "\%s/EPICv2/EPICv2.hg38.manifest.tsv.gz", 38 | "https://github.com/zhou-lab/InfiniumAnnotationV1/raw/main/Anno/"), 39 | columns="nextBase") 40 | extR = names(mft)[!is.na(mft$nextBase) & mft$nextBase=="R"] 41 | extA = names(mft)[!is.na(mft$nextBase) & mft$nextBase=="A"] 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /man/visualizeProbes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/visualize.R 3 | \name{visualizeProbes} 4 | \alias{visualizeProbes} 5 | \title{Visualize Region that Contains the Specified Probes} 6 | \usage{ 7 | visualizeProbes( 8 | probeNames, 9 | betas, 10 | platform = NULL, 11 | genome = NULL, 12 | upstream = 1000, 13 | dwstream = 1000, 14 | ... 15 | ) 16 | } 17 | \arguments{ 18 | \item{probeNames}{probe names} 19 | 20 | \item{betas}{beta value matrix (row: probes, column: samples)} 21 | 22 | \item{platform}{HM450, EPIC or MM285 (default)} 23 | 24 | \item{genome}{hg19, hg38 or mm10 (default)} 25 | 26 | \item{upstream}{distance to extend upstream} 27 | 28 | \item{dwstream}{distance to extend downstream} 29 | 30 | \item{...}{additional options, see visualizeRegion and assemble_plots} 31 | } 32 | \value{ 33 | None 34 | } 35 | \description{ 36 | Visualize the beta value in heatmaps for the genomic region containing 37 | specified probes. The function works only if specified probes can be 38 | spanned by a single genomic region. The region can cover more probes 39 | than specified. Hence the plotting heatmap may encompass more probes. 40 | The function takes as input a string vector of probe IDs (cg/ch/rs-numbers). 41 | if draw is FALSE, the function returns the subset beta value matrix 42 | otherwise it returns the grid graphics object. 43 | } 44 | \examples{ 45 | betas <- sesameDataGet('HM450.76.TCGA.matched')$betas 46 | visualizeProbes(c('cg22316575', 'cg16084772', 'cg20622019'), betas, 'HM450') 47 | } 48 | -------------------------------------------------------------------------------- /man/DML.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dm.R 3 | \name{DML} 4 | \alias{DML} 5 | \title{Test differential methylation on each locus} 6 | \usage{ 7 | DML(betas, fm, meta = NULL, BPPARAM = SerialParam()) 8 | } 9 | \arguments{ 10 | \item{betas}{beta values, matrix or SummarizedExperiment 11 | rows are probes and columns are samples.} 12 | 13 | \item{fm}{formula} 14 | 15 | \item{meta}{data frame for sample information, column names 16 | are predictor variables (e.g., sex, age, treatment, tumor/normal etc) 17 | and are referenced in formula. Rows are samples. 18 | When the betas argument is a SummarizedExperiment object, this 19 | is ignored. colData(betas) will be used instead. The row order of the 20 | data frame must match the column order of the beta value matrix.} 21 | 22 | \item{BPPARAM}{number of cores for parallel processing, default to 23 | SerialParam() 24 | Use MulticoreParam(mc.cores) for parallel processing. 25 | For Windows, try DoparParam or SnowParam.} 26 | } 27 | \value{ 28 | a list of test summaries, summary.lm objects 29 | } 30 | \description{ 31 | The function takes a beta value matrix with probes on the rows and 32 | samples on the columns. It also takes a sample information data frame 33 | (meta) and formula for testing. The function outputs a list of 34 | coefficient tables for each factor tested. 35 | } 36 | \examples{ 37 | sesameDataCache() # in case not done yet 38 | data <- sesameDataGet('HM450.76.TCGA.matched') 39 | smry <- DML(data$betas[1:1000,], ~type, meta=data$sampleInfo) 40 | 41 | sesameDataGet_resetEnv() 42 | } 43 | -------------------------------------------------------------------------------- /man/openSesame.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/open.R 3 | \name{openSesame} 4 | \alias{openSesame} 5 | \title{The openSesame pipeline} 6 | \usage{ 7 | openSesame( 8 | x, 9 | prep = "QCDPB", 10 | prep_args = NULL, 11 | manifest = NULL, 12 | func = getBetas, 13 | BPPARAM = SerialParam(), 14 | platform = "", 15 | min_beads = 1, 16 | ... 17 | ) 18 | } 19 | \arguments{ 20 | \item{x}{SigDF(s), IDAT prefix(es)} 21 | 22 | \item{prep}{preprocessing code, see ?prepSesame} 23 | 24 | \item{prep_args}{optional preprocessing argument list, see ?prepSesame} 25 | 26 | \item{manifest}{optional dynamic manifest} 27 | 28 | \item{func}{either getBetas or getAFs, if NULL, then return SigDF list} 29 | 30 | \item{BPPARAM}{get parallel with MulticoreParam(n)} 31 | 32 | \item{platform}{optional platform string} 33 | 34 | \item{min_beads}{minimum bead number, probes with R or G smaller than 35 | this threshold will be masked. If NULL, no filtering based on bead 36 | count will be applied. Default to 1.} 37 | 38 | \item{...}{parameters to getBetas} 39 | } 40 | \value{ 41 | a numeric vector for processed beta values 42 | } 43 | \description{ 44 | This function is a simple wrapper of noob + nonlinear dye bias 45 | correction + pOOBAH masking. 46 | } 47 | \details{ 48 | Please use mask=FALSE to turn off masking. 49 | 50 | If the input is an IDAT prefix or a \code{SigDF}, the output is 51 | the beta value numerics. 52 | } 53 | \examples{ 54 | 55 | in_dir <- system.file("extdata", "", package = "sesameData") 56 | betas <- openSesame(in_dir) 57 | ## or 58 | IDATprefixes <- searchIDATprefixes(in_dir) 59 | betas <- openSesame(IDATprefixes) 60 | 61 | } 62 | -------------------------------------------------------------------------------- /man/predictAge.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/age.R 3 | \name{predictAge} 4 | \alias{predictAge} 5 | \title{Predict age using linear models} 6 | \usage{ 7 | predictAge(betas, model, na_fallback = FALSE, min_nonna = 10) 8 | } 9 | \arguments{ 10 | \item{betas}{a probeID-named vector of beta values} 11 | 12 | \item{model}{a model object from sesameDataGet. should contain 13 | param, intercept, response2age. default to the Horvath353 model.} 14 | 15 | \item{na_fallback}{use fall back values if na} 16 | 17 | \item{min_nonna}{the minimum number of non-NA values.} 18 | } 19 | \value{ 20 | age in the unit specified in the model (usually in year, but 21 | sometimes can be month, like in the mouse clocks). 22 | } 23 | \description{ 24 | The function takes a named numeric vector of beta values. The name attribute 25 | contains the probe ID (cg, ch or rs IDs). The function looks for overlapping 26 | probes and estimate age using different models. 27 | } 28 | \details{ 29 | You can get the models such as the Horvath aging model (Horvath 2013 30 | Genome Biology) from sesameDataGet. The function outputs a single numeric 31 | of age in years. 32 | 33 | Here are some built-in age models: 34 | Anno/HM450/Clock_Horvath353.rds 35 | Anno/HM450/Clock_Hannum.rds 36 | Anno/HM450/Clock_SkinBlood.rds 37 | Anno/EPIC/Clock_PhenoAge.rds 38 | Anno/MM285/Clock_Zhou347.rds 39 | see vignette inferences.html#Age__Epigenetic_Clock for details 40 | } 41 | \examples{ 42 | betas <- sesameDataGet('HM450.1.TCGA.PAAD')$betas 43 | \dontrun{ 44 | ## download age models from 45 | ## https://github.com/zhou-lab/InfiniumAnnotationV1/tree/main/Anno 46 | ## e.g., Anno/HM450/Clock_Horvath353.rds 47 | predictAge(betas, model) 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /R/GEO.R: -------------------------------------------------------------------------------- 1 | 2 | #' Convert signal M and U to SigDF 3 | #' 4 | #' This overcomes the issue of missing IDAT files. However, 5 | #' out-of-band signals will be missing or faked (sampled from a 6 | #' normal distribution). 7 | #' 8 | #' @param sigM methylated signal, a numeric vector 9 | #' @param sigU unmethylated signal, a numirc vector 10 | #' @param Probe_IDs probe ID vector 11 | #' @param oob.mean assumed mean for out-of-band signals 12 | #' @param oob.sd assumed standard deviation for out-of-band signals 13 | #' @param platform platform code, will infer if not given 14 | #' @return SigDF 15 | #' @examples 16 | #' sigM <- c(11436, 6068, 2864) 17 | #' sigU <- c(1476, 804, 393) 18 | #' probes <- c("cg07881041", "cg23229610", "cg03513874") 19 | #' sdf <- parseGEOsignalMU(sigM, sigU, probes, platform = "EPIC") 20 | #' @export 21 | parseGEOsignalMU <- function( 22 | sigM, sigU, Probe_IDs, oob.mean = 500, oob.sd = 300, platform = NULL) { 23 | 24 | if (is.null(platform)) { 25 | platform <- inferPlatformFromProbeIDs(Probe_IDs) } 26 | addr <- sesameDataGet(paste0(platform, ".address"))$ordering 27 | M <- sigM[match(addr$Probe_ID, Probe_IDs)] 28 | U <- sigU[match(addr$Probe_ID, Probe_IDs)] 29 | col <- ifelse(is.na(addr$col), "2", as.character(addr$col)) 30 | oobs <- pmax(50,rnorm(length(col), mean = oob.mean, sd = oob.sd)) 31 | MG <- ifelse(col == "2", NA, ifelse(col == "G", M, oobs)) 32 | MR <- ifelse(col == "2", NA, ifelse(col == "R", M, oobs)) 33 | UG <- ifelse(col == "2", M, ifelse(col == "G", U, oobs)) 34 | UR <- ifelse(col == "2", U, ifelse(col == "R", U, oobs)) 35 | sdf <- data.frame(Probe_ID = addr$Probe_ID, 36 | MG = MG, MR = MR, UG = UG, UR = UR, 37 | col = factor(col, levels=c("G","R","2")), mask = addr$mask) 38 | class(sdf) <- c("SigDF", class(sdf)) 39 | sdf 40 | } 41 | 42 | -------------------------------------------------------------------------------- /man/inferTissue.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tissue.R 3 | \name{inferTissue} 4 | \alias{inferTissue} 5 | \title{inferTissue infers the tissue of a single sample (as identified through 6 | the branchIDs in the row data of the reference) by reporting independent 7 | composition through cell type deconvolution.} 8 | \usage{ 9 | inferTissue( 10 | betas, 11 | reference = NULL, 12 | platform = NULL, 13 | abs_delta_beta_min = 0.3, 14 | auc_min = 0.99, 15 | coverage_min = 0.8, 16 | topN = 15 17 | ) 18 | } 19 | \arguments{ 20 | \item{betas}{Named vector with probes and their corresponding beta value 21 | measurement} 22 | 23 | \item{reference}{Summarized Experiment with either hypomethylated or 24 | hypermethylated probe selection (row data), sample selection (column data), 25 | meta data, and the betas (assay)} 26 | 27 | \item{platform}{String representing the array type of the betas and 28 | reference} 29 | 30 | \item{abs_delta_beta_min}{Numerical value indicating the absolute minimum 31 | required delta beta for the probe selection criteria} 32 | 33 | \item{auc_min}{Numeric value corresponding to the minimum AUC value 34 | required for a probe to be considered} 35 | 36 | \item{coverage_min}{Numeric value corresponding to the minimum coverage 37 | requirement for a probe to be considered. Coverage is defined here as the 38 | proportion of samples without an NA value at a given probe.} 39 | 40 | \item{topN}{number of probes to at most use for each branch} 41 | } 42 | \value{ 43 | inferred tissue as a string 44 | } 45 | \description{ 46 | inferTissue infers the tissue of a single sample (as identified through 47 | the branchIDs in the row data of the reference) by reporting independent 48 | composition through cell type deconvolution. 49 | } 50 | \examples{ 51 | sesameDataCache() # if not done yet 52 | sdf <- sesameDataGet("MM285.1.SigDF") 53 | inferTissue(getBetas(dyeBiasNL(noob(sdf)))) 54 | 55 | sesameDataGet_resetEnv() 56 | 57 | } 58 | -------------------------------------------------------------------------------- /R/track.R: -------------------------------------------------------------------------------- 1 | 2 | #' Turn beta values into a UCSC browser track 3 | #' 4 | #' @param betas a named numeric vector 5 | #' @param output output file name 6 | #' @param platform HM450, EPIC etc. 7 | #' @param genome hg38, mm10, ..., will infer if not given. 8 | #' For additional mapping, download the GRanges object from 9 | #' http://zwdzwd.github.io/InfiniumAnnotation 10 | #' and provide the following argument 11 | #' ..., genome = sesameAnno_buildManifestGRanges("downloaded_file"),... 12 | #' to this function. 13 | #' @return when output is null, return a data.frame, otherwise NULL 14 | #' @importFrom utils write.table 15 | #' @examples 16 | #' 17 | #' betas.tissue <- sesameDataGet('HM450.1.TCGA.PAAD')$betas 18 | #' ## add output to create an actual file 19 | #' df <- createUCSCtrack(betas.tissue) 20 | #' 21 | #' ## to convert to bigBed 22 | #' ## sort -k1,1 -k2,2n output.bed >output_sorted.bed 23 | #' ## bedToBigBed output_sorted.bed hg38.chrom output.bb 24 | #' @export 25 | createUCSCtrack <- function( 26 | betas, output=NULL, platform='HM450', genome='hg38') { 27 | 28 | probeInfo <- sesameData_getManifestGRanges(platform, genome) 29 | 30 | betas <- betas[names(probeInfo)] 31 | df <- data.frame( 32 | chrm = GenomicRanges::seqnames(probeInfo), 33 | beg = GenomicRanges::start(probeInfo)-1, 34 | end = GenomicRanges::end(probeInfo), 35 | name = names(probeInfo), 36 | score = ifelse(is.na(betas), 0, as.integer(betas*1000)), 37 | strand = GenomicRanges::strand(probeInfo), 38 | thickStart = GenomicRanges::start(probeInfo)-1, 39 | thickEnd = GenomicRanges::end(probeInfo), 40 | itemRgb = ifelse( 41 | is.na(betas), '0,0,0', 42 | ifelse( 43 | betas < 0.3, '0,0,255', # blue 44 | ifelse( 45 | betas > 0.7, '255,0,0', # red 46 | '50,150,0'))) # green 47 | ) 48 | 49 | if (is.null(output)) 50 | df 51 | else 52 | write.table( 53 | df, file=output, col.names=FALSE, 54 | row.names=FALSE, quote=FALSE, sep='\t') 55 | } 56 | -------------------------------------------------------------------------------- /R/feature_selection.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | getSignatureU <- function( 4 | betas, grouping, u_max = 0.2, m_min = 0.7, 5 | max_na_in = 0, max_na_out = 0) { 6 | 7 | groups <- unique(grouping) 8 | is_na <- is.na(betas) 9 | sigs <- lapply(groups, function(g) { 10 | m1 <- rowMeans(betas[,grouping==g], na.rm=TRUE) < u_max 11 | m2 <- rowMeans(betas[,grouping!=g], na.rm=TRUE) > m_min 12 | ps1 <- rowSums(is_na[,grouping==g]) <= max_na_in 13 | ps2 <- rowSums(is_na[,grouping!=g]) <= max_na_out 14 | names(which(m1 & m2 & ps1 & ps2)) }) 15 | names(sigs) <- groups 16 | sigs 17 | } 18 | 19 | getSignatureUTop <- function( 20 | betas, grouping, n=100, 21 | max_na_in = 0, max_na_out = 0) { 22 | 23 | groups <- unique(grouping) 24 | is_na <- is.na(betas) 25 | sigs <- lapply(groups, function(g) { 26 | mean1 <- rowMeans(betas[,grouping == g], na.rm=TRUE) 27 | mean0 <- rowMeans(betas[,grouping != g], na.rm=TRUE) 28 | ps1 <- rowSums(is_na[,grouping == g]) <= max_na_in 29 | ps2 <- rowSums(is_na[,grouping != g] <= max_na_out) 30 | head(names(sort((mean1 - mean0)[ps1 & ps2])), n=n) 31 | }) 32 | names(sigs) <- groups 33 | sigs 34 | } 35 | 36 | clusterWithSignature <- function(betas, grouping, sigs) { 37 | pbs <- do.call(c, lapply(names(sigs), function(g) { 38 | if (length(sigs[[g]]) > 5) 39 | rownames(row.cluster(betas[intersect( 40 | rownames(betas), sigs[[g]]),])$mat) 41 | else 42 | NULL 43 | })) 44 | spl <- do.call(c, lapply(names(sigs), function(g) { 45 | colnames(column.cluster(betas[,grouping == g])$mat) 46 | })) 47 | betas[pbs, spl] 48 | } 49 | 50 | clusterWithSampleGrouping <- function( 51 | betas, grouping, groups=unique(grouping)) { 52 | 53 | do.call(cbind, lapply(groups, function(g) { 54 | column.cluster(betas[,grouping == g])$mat 55 | })) 56 | } 57 | 58 | clusterWithinRowGroups <- function(betas, sigs) { 59 | do.call(rbind, lapply(sigs, function(x) { 60 | row.cluster(betas[x,])$mat 61 | })) 62 | } 63 | -------------------------------------------------------------------------------- /man/DMLpredict.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dm.R 3 | \name{DMLpredict} 4 | \alias{DMLpredict} 5 | \title{Predict new data from DML} 6 | \usage{ 7 | DMLpredict(betas, fm, pred = NULL, meta = NULL, BPPARAM = SerialParam()) 8 | } 9 | \arguments{ 10 | \item{betas}{beta values, matrix or SummarizedExperiment 11 | rows are probes and columns are samples.} 12 | 13 | \item{fm}{formula} 14 | 15 | \item{pred}{new data for prediction, useful for studying effect size. 16 | This argument is a data.frame to specify new data. 17 | If the argument is NULL, all combinations of all contrasts will be used 18 | as input. It might not work if there is a continuous variable input. 19 | One may need to explicitly provide the input in a data frame.} 20 | 21 | \item{meta}{data frame for sample information, column names 22 | are predictor variables (e.g., sex, age, treatment, tumor/normal etc) 23 | and are referenced in formula. Rows are samples. 24 | When the betas argument is a SummarizedExperiment object, this 25 | is ignored. colData(betas) will be used instead.} 26 | 27 | \item{BPPARAM}{number of cores for parallel processing, default to 28 | SerialParam() 29 | Use MulticoreParam(mc.cores) for parallel processing. 30 | For Windows, try DoparParam or SnowParam.} 31 | } 32 | \value{ 33 | a SummarizedExperiment of predictions. The colData describes 34 | the input of the prediction. 35 | } 36 | \description{ 37 | This function is also important for investigating factor interactions. 38 | } 39 | \examples{ 40 | data <- sesameDataGet('HM450.76.TCGA.matched') 41 | 42 | ## use all contrasts as new input 43 | res <- DMLpredict(data$betas[1:10,], ~type, meta=data$sampleInfo) 44 | 45 | ## specify new input 46 | res <- DMLpredict(data$betas[1:10,], ~type, meta=data$sampleInfo, 47 | pred = data.frame(type=c("Normal","Tumour"))) 48 | 49 | ## note that the prediction needs to be a factor of the same 50 | ## level structure as the original training data. 51 | pred = data.frame(type=factor(c("Normal"), levels=c("Normal","Tumour"))) 52 | res <- DMLpredict(data$betas[1:10,], ~type, 53 | meta=data$sampleInfo, pred = pred) 54 | 55 | } 56 | -------------------------------------------------------------------------------- /man/visualizeRegion.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/visualize.R 3 | \name{visualizeRegion} 4 | \alias{visualizeRegion} 5 | \title{Visualize Region} 6 | \usage{ 7 | visualizeRegion( 8 | chrm, 9 | beg, 10 | end, 11 | betas, 12 | platform = NULL, 13 | genome = NULL, 14 | draw = TRUE, 15 | cluster.samples = FALSE, 16 | na.rm = FALSE, 17 | nprobes.max = 1000, 18 | txn.types = "protein_coding", 19 | txn.font.size = 6, 20 | ... 21 | ) 22 | } 23 | \arguments{ 24 | \item{chrm}{chromosome} 25 | 26 | \item{beg}{begin of the region} 27 | 28 | \item{end}{end of the region} 29 | 30 | \item{betas}{beta value matrix (row: probes, column: samples)} 31 | 32 | \item{platform}{EPIC, HM450, or MM285} 33 | 34 | \item{genome}{hg38, mm10, ..., will infer if not given. 35 | For additional mapping, download the GRanges object from 36 | http://zwdzwd.github.io/InfiniumAnnotation 37 | and provide the following argument 38 | ..., genome = sesameAnno_buildManifestGRanges("downloaded_file"),... 39 | to this function.} 40 | 41 | \item{draw}{draw figure or return betas} 42 | 43 | \item{cluster.samples}{whether to cluster samples} 44 | 45 | \item{na.rm}{remove probes with all NA.} 46 | 47 | \item{nprobes.max}{maximum number of probes to plot} 48 | 49 | \item{txn.types}{default to protein_coding, use NULL for all} 50 | 51 | \item{txn.font.size}{transcript name font size} 52 | 53 | \item{...}{additional options, see assemble_plots} 54 | } 55 | \value{ 56 | graphics or a matrix containing the captured beta values 57 | } 58 | \description{ 59 | The function takes a genomic coordinate (chromosome, start and end) and a 60 | beta value matrix (probes on the row and samples on the column). It plots 61 | the beta values as a heatmap for all probes falling into the genomic region. 62 | If `draw=TRUE` the function returns the plotted grid graphics object. 63 | Otherwise, the selected beta value matrix is returned. 64 | `cluster.samples=TRUE/FALSE` controls whether hierarchical clustering is 65 | applied to the subset beta value matrix. 66 | } 67 | \examples{ 68 | betas <- sesameDataGet('HM450.76.TCGA.matched')$betas 69 | visualizeRegion('chr20', 44648623, 44652152, betas, 'HM450') 70 | } 71 | -------------------------------------------------------------------------------- /R/channel_inference.R: -------------------------------------------------------------------------------- 1 | #' Infer and reset color channel for Type-I probes instead of 2 | #' using what is specified in manifest. The results are stored to 3 | #' sdf@extra$IGG and sdf@extra$IRR slot. 4 | #' 5 | #' IGG => Type-I green that is inferred to be green 6 | #' IRR => Type-I red that is inferred to be red 7 | #' 8 | #' @param sdf a \code{SigDF} 9 | #' @param verbose whether to print correction summary 10 | #' @param switch_failed whether to switch failed probes (default to FALSE) 11 | #' @param mask_failed whether to mask failed probes (default to FALSE) 12 | #' @param summary return summarized numbers only. 13 | #' @return a \code{SigDF}, or numerics if summary == TRUE 14 | #' @examples 15 | #' 16 | #' sdf <- sesameDataGet('EPIC.1.SigDF') 17 | #' inferInfiniumIChannel(sdf) 18 | #' 19 | #' @export 20 | inferInfiniumIChannel <- function( 21 | sdf, switch_failed = FALSE, mask_failed = FALSE, 22 | verbose = FALSE, summary = FALSE) { 23 | 24 | inf1_idx <- which(sdf$col != "2") 25 | sdf1 <- sdf[inf1_idx,] 26 | red_max <- pmax(sdf1$MR, sdf1$UR) 27 | grn_max <- pmax(sdf1$MG, sdf1$UG) 28 | new_col <- factor(ifelse( 29 | red_max > grn_max, "R", "G"), levels=c("G","R","2")) 30 | d1R <- sdf1[new_col == "R",] 31 | d1G <- sdf1[new_col == "G",] 32 | bg_max <- quantile(c(d1R$MG,d1R$UG,d1G$MR,d1G$UR), 0.95, na.rm=TRUE) 33 | 34 | ## revert to the original for failed probes if so desire 35 | idx <- (is.na(red_max) | is.na(grn_max) | pmax(red_max, grn_max) < bg_max) 36 | if (!switch_failed) { 37 | new_col[idx] <- sdf1$col[idx] 38 | } 39 | if (mask_failed) { 40 | sdf$mask[inf1_idx[idx]] <- TRUE 41 | } 42 | sdf$col[inf1_idx] <- factor(new_col, levels=c("G","R","2")) 43 | 44 | smry <- c( 45 | R2R = sum(sdf1$col == "R" & new_col == "R", na.rm=TRUE), 46 | G2G = sum(sdf1$col == "G" & new_col == "G", na.rm=TRUE), 47 | R2G = sum(sdf1$col == "R" & new_col == "G", na.rm=TRUE), 48 | G2R = sum(sdf1$col == "G" & new_col == "R", na.rm=TRUE)) 49 | 50 | if (summary) { return(smry) } 51 | 52 | sdfMsg(sdf, verbose, "%s: R>R:%d;G>G:%d;R>G:%d;G>R:%d", 53 | "Infinium-I color channel reset", 54 | smry["R2R"], smry["G2G"], smry["R2G"], smry["G2R"]) 55 | } 56 | 57 | -------------------------------------------------------------------------------- /man/cnSegmentation.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cnv.R 3 | \name{cnSegmentation} 4 | \alias{cnSegmentation} 5 | \title{Perform copy number segmentation} 6 | \usage{ 7 | cnSegmentation( 8 | sdf, 9 | sdfs.normal = NULL, 10 | genomeInfo = NULL, 11 | probeCoords = NULL, 12 | tilewidth = 50000, 13 | verbose = FALSE, 14 | return.probe.signals = FALSE 15 | ) 16 | } 17 | \arguments{ 18 | \item{sdf}{\code{SigDF}} 19 | 20 | \item{sdfs.normal}{a list of \code{SigDF}s for normalization, if not given, 21 | use the stored normal data from sesameData. However, we do recommend using 22 | a matched copy number normal dataset for normalization. 23 | assembly} 24 | 25 | \item{genomeInfo}{the genomeInfo files. The default is retrieved from 26 | sesameData. Alternative genomeInfo files can be found at 27 | https://github.com/zhou-lab/GenomeInfo} 28 | 29 | \item{probeCoords}{the probe coordinates in the corresponding genome 30 | if NULL (default), then the default genome assembly is used. 31 | Default genome is given by, e.g., sesameData_check_genome(NULL, "EPIC") 32 | For additional mapping, download the GRanges object from 33 | http://zwdzwd.github.io/InfiniumAnnotation 34 | and provide the following argument 35 | ..., probeCoords = sesameAnno_buildManifestGRanges("downloaded_file"),... 36 | to this function.} 37 | 38 | \item{tilewidth}{tile width for smoothing} 39 | 40 | \item{verbose}{print more messages} 41 | 42 | \item{return.probe.signals}{return probe-level instead of bin-level signal} 43 | } 44 | \value{ 45 | an object of \code{CNSegment} 46 | } 47 | \description{ 48 | Perform copy number segmentation using the signals in the signal set. 49 | The function takes a \code{SigDF} for the target sample and a set of 50 | normal \code{SigDF} for the normal samples. An optional arguments specifies 51 | the version of genome build that the inference will operate on. The function 52 | outputs an object of class \code{CNSegment} with signals for the segments ( 53 | seg.signals), the bin coordinates ( 54 | bin.coords) and bin signals (bin.signals). 55 | } 56 | \examples{ 57 | 58 | sesameDataCache() 59 | 60 | \dontrun{ 61 | sdfs <- sesameDataGet('EPICv2.8.SigDF') 62 | sdf <- sdfs[["K562_206909630040_R01C01"]] 63 | seg <- cnSegmentation(sdf) 64 | seg <- cnSegmentation(sdf, return.probe.signals=TRUE) 65 | visualizeSegments(seg) 66 | } 67 | 68 | } 69 | -------------------------------------------------------------------------------- /man/DMR.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dm.R 3 | \name{DMR} 4 | \alias{DMR} 5 | \title{Find Differentially Methylated Region (DMR)} 6 | \usage{ 7 | DMR( 8 | betas, 9 | smry, 10 | contrast, 11 | platform = NULL, 12 | probe.coords = NULL, 13 | dist.cutoff = NULL, 14 | seg.per.locus = 0.5 15 | ) 16 | } 17 | \arguments{ 18 | \item{betas}{beta values for distance calculation} 19 | 20 | \item{smry}{DML} 21 | 22 | \item{contrast}{the pair-wise comparison or contrast 23 | check colnames(attr(smry, "model.matrix")) if uncertain} 24 | 25 | \item{platform}{EPIC, HM450, MM285, ...} 26 | 27 | \item{probe.coords}{GRanges object that defines CG coordinates 28 | if NULL (default), then the default genome assembly is used. 29 | Default genome is given by, e.g., sesameData_check_genome(NULL, "EPIC") 30 | For additional mapping, download the GRanges object from 31 | http://zwdzwd.github.io/InfiniumAnnotation 32 | and provide the following argument 33 | ..., probe.coords = sesameAnno_buildManifestGRanges("downloaded_file"),... 34 | to this function.} 35 | 36 | \item{dist.cutoff}{cutoff of beta value differences for two neighboring CGs 37 | to be considered the same DMR (by default it's determined using the 38 | quantile function on seg.per.locus)} 39 | 40 | \item{seg.per.locus}{number of segments per locus 41 | higher value leads to more segments} 42 | } 43 | \value{ 44 | coefficient table with segment ID and segment P-value 45 | each row is a locus, multiple loci may share a segment ID if 46 | they are merged to the same segment. Records are ordered by Seg_Est. 47 | } 48 | \description{ 49 | This subroutine uses Euclidean distance to group CpGs and 50 | then combine p-values for each segment. The function performs DML test first 51 | if cf is NULL. It groups the probe testing results into differential 52 | methylated regions in a coefficient table with additional columns 53 | designating the segment ID and statistical significance (P-value) testing 54 | the segment. 55 | } 56 | \examples{ 57 | 58 | sesameDataCache() # in case not done yet 59 | 60 | data <- sesameDataGet('HM450.76.TCGA.matched') 61 | smry <- DML(data$betas[1:1000,], ~type, meta=data$sampleInfo) 62 | colnames(attr(smry, "model.matrix")) # pick a contrast from here 63 | ## showing on a small set of 100 CGs 64 | merged_segs <- DMR(data$betas[1:1000,], smry, "typeTumour", platform="HM450") 65 | 66 | sesameDataGet_resetEnv() 67 | 68 | } 69 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | 2 | #' Extract the probe type field from probe ID 3 | #' This only works with the new probe ID system. 4 | #' See https://github.com/zhou-lab/InfiniumAnnotation for illustration 5 | #' 6 | #' @param Probe_ID Probe ID 7 | #' @return a vector of '1' and '2' suggesting Infinium-I and Infinium-II 8 | #' @import stringr 9 | #' @examples 10 | #' probeID_designType("cg36609548_TC21") 11 | #' @export 12 | probeID_designType <- function(Probe_ID) { 13 | stopifnot(all(grepl('_', Probe_ID))) # make sure it's the new ID system 14 | vapply(Probe_ID, function(x) substr( 15 | strsplit(x,'_')[[1]][2],3,3), character(1)) 16 | } 17 | 18 | #' Convert beta-value to M-value 19 | #' 20 | #' Logit transform a beta value vector to M-value vector. 21 | #' 22 | #' Convert beta-value to M-value (aka logit transform) 23 | #' @param b vector of beta values 24 | #' @return a vector of M values 25 | #' @examples 26 | #' BetaValueToMValue(c(0.1, 0.5, 0.9)) 27 | #' @export 28 | BetaValueToMValue <- function(b) { 29 | log2(b/(1-b)) 30 | } 31 | 32 | #' Convert M-value to beta-value 33 | #' 34 | #' Convert M-value to beta-value (aka inverse logit transform) 35 | #' 36 | #' @param m a vector of M values 37 | #' @return a vector of beta values 38 | #' @examples 39 | #' MValueToBetaValue(c(-3, 0, 3)) 40 | #' @export 41 | MValueToBetaValue <- function(m) { 42 | 2^m/(1+2^m) 43 | } 44 | 45 | #' Check SeSAMe versions 46 | #' 47 | #' print package verison of sesame and depended packages to help troubleshoot 48 | #' installation issues. 49 | #' 50 | #' @return print the version of sesame, sesameData, biocondcutor and R 51 | #' @importFrom utils packageVersion 52 | #' @export 53 | #' @examples 54 | #' sesame_checkVersion() 55 | sesame_checkVersion <- function() { 56 | rv <- R.Version() 57 | msg <- paste0( 58 | "SeSAMe requires matched versions of ", 59 | "R, sesame, sesameData and ExperimentHub.\n", 60 | "Here is the current versions installed:\n", 61 | sprintf("R: %s.%s\n", rv$major, rv$minor), 62 | sprintf("Bioconductor: %s\n", BiocManager::version()), 63 | sprintf("sesame: %s\n", packageVersion("sesame")), 64 | sprintf("sesameData: %s\n", packageVersion("sesameData")), 65 | sprintf("ExperimentHub: %s\n", packageVersion("ExperimentHub"))) 66 | message(msg) 67 | } 68 | 69 | 70 | #' sesamize function is deprecated. 71 | #' Please check https://github.com/zwdzwd/sesamize for previous scripts 72 | #' 73 | #' @param ... arguments for sesamize 74 | #' @return a message text for deprecated function 75 | #' @export 76 | #' @examples 77 | #' cat("Deprecated. see https://github.com/zwdzwd/sesamize") 78 | sesamize <- function(...) { 79 | .Deprecated("https://github.com/zwdzwd/sesamize") 80 | } 81 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: sesame 2 | Type: Package 3 | Title: SEnsible Step-wise Analysis of DNA MEthylation BeadChips 4 | Description: Tools For analyzing Illumina Infinium DNA methylation arrays. SeSAMe provides utilities to support analyses of multiple generations of Infinium DNA methylation BeadChips, including preprocessing, quality control, visualization and inference. SeSAMe features accurate detection calling, intelligent inference of ethnicity, sex and advanced quality control routines. 5 | Version: 1.25.3 6 | Authors@R: c(person("Wanding", "Zhou", role = c("aut","cre"), email = "zhouwanding@gmail.com", comment = c(ORCID = "0000-0001-9126-1932")), 7 | person("Wubin", "Ding", role = "ctb"), 8 | person("David", "Goldberg", role = "ctb"), 9 | person("Ethan", "Moyer", role = "ctb"), 10 | person("Bret", "Barnes", role = "ctb"), 11 | person("Timothy", "Triche", role = "ctb"), 12 | person("Hui", "Shen", role = c("aut"), email = "Hui.Shen@vai.org")) 13 | Depends: R (>= 4.5.0), 14 | sesameData 15 | License: MIT + file LICENSE 16 | RoxygenNote: 7.3.2 17 | Imports: 18 | graphics, 19 | BiocParallel, 20 | utils, 21 | methods, 22 | stringr, 23 | readr, 24 | tibble, 25 | MASS, 26 | wheatmap (>= 0.2.0), 27 | GenomicRanges, 28 | IRanges, 29 | grid, 30 | preprocessCore, 31 | S4Vectors, 32 | ggplot2, 33 | BiocFileCache, 34 | GenomeInfoDb, 35 | stats, 36 | SummarizedExperiment, 37 | dplyr, 38 | reshape2 39 | Suggests: scales, 40 | BiocManager, 41 | knitr, 42 | DNAcopy, 43 | e1071, 44 | randomForest, 45 | RPMM, 46 | rmarkdown, 47 | testthat, 48 | tidyr, 49 | BiocStyle, 50 | ggrepel, 51 | grDevices, 52 | KernSmooth, 53 | pals 54 | Encoding: UTF-8 55 | VignetteBuilder: knitr 56 | URL: https://github.com/zwdzwd/sesame 57 | BugReports: https://github.com/zwdzwd/sesame/issues 58 | biocViews: DNAMethylation, MethylationArray, Preprocessing, QualityControl 59 | Collate: 60 | 'readIDAT.R' 61 | 'sex.R' 62 | 'species.R' 63 | 'QC.R' 64 | 'GEO.R' 65 | 'SigDFMethods.R' 66 | 'sesame.R' 67 | 'age.R' 68 | 'background.R' 69 | 'cell_composition.R' 70 | 'channel_inference.R' 71 | 'cnv.R' 72 | 'impute.R' 73 | 'mLiftOver.R' 74 | 'ethnicity.R' 75 | 'deidentify.R' 76 | 'detection.R' 77 | 'dm.R' 78 | 'dye_bias.R' 79 | 'feature_selection.R' 80 | 'fileSet.R' 81 | 'mask.R' 82 | 'sesameAnno.R' 83 | 'open.R' 84 | 'strain.R' 85 | 'tissue.R' 86 | 'track.R' 87 | 'match_design.R' 88 | 'utils.R' 89 | 'vcf.R' 90 | 'visualize.R' 91 | 'visualizeHelper.R' 92 | 'zzz.R' 93 | 'palgen.R' 94 | -------------------------------------------------------------------------------- /R/vcf.R: -------------------------------------------------------------------------------- 1 | ## very simple genotyper 2 | genotyper <- function(x, model_background=0.1, model_nbeads=40) { 3 | 4 | GL <- vapply( 5 | c(model_background, 0.5, 1-model_background), 6 | function(af) { 7 | dbinom( 8 | round(x*model_nbeads), 9 | size=model_nbeads, prob=af)}, numeric(1)) 10 | 11 | ind <- which.max(GL) 12 | GT <- c('0/0','0/1','1/1')[ind] 13 | GS <- floor(-log10(1-GL[ind] / sum(GL))*10) # assuming equal prior 14 | list(GT=GT, GS=GS) 15 | } 16 | 17 | vcf_header <- function(genome) { 18 | c('##fileformat=VCFv4.0', 19 | sprintf('##fileDate=%s',format(Sys.time(),"%Y%m%d")), 20 | sprintf('##reference=%s', genome), 21 | paste0('##INFO='), 23 | paste0('##INFO='), 25 | paste0('##INFO='), 27 | paste0('##INFO='), 29 | paste0('##INFO=')) 31 | } 32 | 33 | #' Convert SNP from Infinium array to VCF file 34 | #' 35 | #' @param sdf SigDF 36 | #' @param anno SNP variant annotation, available at 37 | #' https://github.com/zhou-lab/InfiniumAnnotationV1/tree/main/Anno/EPIC 38 | #' EPIC.hg38.snp.tsv.gz 39 | #' @param vcf output VCF file path, if NULL output to console 40 | #' @param genome genome 41 | #' @param verbose print more messages 42 | #' @return VCF file. If vcf is NULL, a data.frame is output to 43 | #' console. The data.frame does not contain VCF headers. 44 | #' Note the output vcf is not sorted. 45 | #' 46 | #' @importFrom utils write.table 47 | #' @examples 48 | #' sesameDataCacheAll() # if not done yet 49 | #' sdf <- sesameDataGet('EPIC.1.SigDF') 50 | #' 51 | #' \dontrun{ 52 | #' ## download anno from 53 | #' ## http://zwdzwd.github.io/InfiniumAnnotation 54 | #' ## output to console 55 | #' anno = read_tsv(sesameAnno_download("EPICv2.hg38.snp.tsv.gz")) 56 | #' head(formatVCF(sdf, anno)) 57 | #' } 58 | #' 59 | #' @export 60 | formatVCF <- function( 61 | sdf, anno, vcf=NULL, genome="hg38", verbose = FALSE) { 62 | 63 | platform <- sdfPlatform(sdf, verbose = verbose) 64 | betas <- getBetas(sdf)[anno$Probe_ID] 65 | af <- getAFTypeIbySumAlleles(sdf, known.ccs.only=FALSE) 66 | vafs <- ifelse(anno$U == "ALT", 1-betas, betas) 67 | vafs <- ifelse(anno$U == "REF_InfI", af[anno$Probe_ID], vafs) 68 | 69 | gts <- lapply(vafs, genotyper) 70 | GT <- vapply(gts, function(g) g$GT, character(1)) 71 | GS <- vapply(gts, function(g) g$GS, numeric(1)) 72 | anno$REF[anno$REF == "ACT"] <- "H" 73 | anno$REF[anno$REF == "AGT"] <- "D" 74 | anno$ALT[anno$ALT == "ACT"] <- "H" 75 | anno$ALT[anno$ALT == "AGT"] <- "D" 76 | vcflines <- cbind(anno$chrm, anno$end, 77 | ".", anno$REF, anno$ALT, GS, ifelse(GS>20,'PASS','FAIL'), 78 | paste0(sprintf( 79 | "PVF=%1.3f;GT=%s;GS=%d;Probe_ID=%s", 80 | vafs, GT, GS, anno$Probe_ID), 81 | ifelse(is.na(anno$rs), "", paste0(";rs_ID=", anno$rs)))) 82 | 83 | header <- vcf_header(genome) 84 | out <- data.frame(vcflines) 85 | colnames(out) <- c("#CHROM","POS","ID","REF","ALT","QUAL","FILTER","INFO") 86 | out <- out[order(out[['#CHROM']], as.numeric(out[['POS']])),] 87 | 88 | if(is.null(vcf)) { return(out); 89 | } else { 90 | writeLines(header, vcf) 91 | write.table(out, file=vcf, append=TRUE, sep='\t', 92 | row.names = FALSE, col.names = FALSE, quote = FALSE) } 93 | } 94 | -------------------------------------------------------------------------------- /R/age.R: -------------------------------------------------------------------------------- 1 | 2 | #' Predict age using linear models 3 | #' 4 | #' The function takes a named numeric vector of beta values. The name attribute 5 | #' contains the probe ID (cg, ch or rs IDs). The function looks for overlapping 6 | #' probes and estimate age using different models. 7 | #' 8 | #' You can get the models such as the Horvath aging model (Horvath 2013 9 | #' Genome Biology) from sesameDataGet. The function outputs a single numeric 10 | #' of age in years. 11 | #' 12 | #' Here are some built-in age models: 13 | #' Anno/HM450/Clock_Horvath353.rds 14 | #' Anno/HM450/Clock_Hannum.rds 15 | #' Anno/HM450/Clock_SkinBlood.rds 16 | #' Anno/EPIC/Clock_PhenoAge.rds 17 | #' Anno/MM285/Clock_Zhou347.rds 18 | #' see vignette inferences.html#Age__Epigenetic_Clock for details 19 | #' 20 | #' @param betas a probeID-named vector of beta values 21 | #' @param model a model object from sesameDataGet. should contain 22 | #' param, intercept, response2age. default to the Horvath353 model. 23 | #' @param na_fallback use fall back values if na 24 | #' @param min_nonna the minimum number of non-NA values. 25 | #' @return age in the unit specified in the model (usually in year, but 26 | #' sometimes can be month, like in the mouse clocks). 27 | #' @examples 28 | #' betas <- sesameDataGet('HM450.1.TCGA.PAAD')$betas 29 | #' \dontrun{ 30 | #' ## download age models from 31 | #' ## https://github.com/zhou-lab/InfiniumAnnotationV1/tree/main/Anno 32 | #' ## e.g., Anno/HM450/Clock_Horvath353.rds 33 | #' predictAge(betas, model) 34 | #' } 35 | #' @export 36 | predictAge <- function(betas, model, na_fallback=FALSE, min_nonna = 10) { 37 | 38 | betas <- betas[model$param$Probe_ID] 39 | if (sum(!is.na(betas)) < min_nonna) { 40 | stop("Fewer than 10 matching probes left. Age prediction abort.") 41 | } 42 | if (sum(is.na(betas)) > 0) { 43 | if (na_fallback) { 44 | k <- is.na(betas) 45 | betas[k] <- model$param$na_fallback[k] 46 | } else { 47 | probes <- intersect(names(na.omit(betas)), model$param$Probe_ID) 48 | betas <- betas[probes] 49 | model$param <- model$param[match(probes, model$param$Probe_ID),] 50 | } 51 | } 52 | drop(model$response2age(betas %*% model$param$slope + model$intercept)) 53 | } 54 | 55 | #' Mouse age predictor 56 | #' 57 | #' The function takes a named numeric vector of beta values. The name attribute 58 | #' contains the probe ID. The function looks for overlapping 59 | #' probes and estimate age using an aging model built from 321 MM285 probes. 60 | #' The function outputs a single numeric of age in months. The clock is most 61 | #' accurate with the sesame preprocessing. 62 | #' 63 | #' @param betas a probeID-named vector of beta values 64 | #' @param na_fallback use the fallback default for NAs. 65 | #' @return age in month 66 | #' @examples 67 | #' cat("Deprecated. See predictAge") 68 | #' @export 69 | predictMouseAgeInMonth <- function(betas, na_fallback=TRUE) { 70 | .Deprecated("predictAge") 71 | } 72 | 73 | #' Horvath 353 age predictor 74 | #' 75 | #' The function takes a named numeric vector of beta values. The name attribute 76 | #' contains the probe ID (cg, ch or rs IDs). The function looks for overlapping 77 | #' probes and estimate age using Horvath aging model (Horvath 2013 78 | #' Genome Biology). The function outputs a single numeric of age in years. 79 | #' 80 | #' @param betas a probeID-named vector of beta values 81 | #' @return age in years 82 | #' @examples 83 | #' cat("Deprecated. See predictAge") 84 | #' @export 85 | predictAgeHorvath353 <- function(betas) { 86 | .Deprecated("predictAge") 87 | } 88 | 89 | #' Horvath Skin and Blood age predictor 90 | #' 91 | #' The function takes a named numeric vector of beta values. The name attribute 92 | #' contains the probe ID (cg, ch or rs IDs). The function looks for overlapping 93 | #' probes and estimate age using Horvath aging model (Horvath et al. 2018 94 | #' Aging, 391 probes). The function outputs a single numeric of age in years. 95 | #' 96 | #' @param betas a probeID-named vector of beta values 97 | #' @return age in years 98 | #' @examples 99 | #' cat("Deprecated. See predictAge") 100 | #' @export 101 | predictAgeSkinBlood <- function(betas) { 102 | .Deprecated("predictAge") 103 | } 104 | 105 | 106 | ## Hv.age2response <- function(x, adult.age=20) { 107 | ## ## trafo 108 | ## x <- (x+1)/(adult.age+1) 109 | ## ifelse(x<=1,log(x),x-1) 110 | ## } 111 | 112 | ## Hv.response2age <- function(x, adult.age=20) { 113 | ## ## anti.trafo 114 | ## ifelse( 115 | ## x<0, 116 | ## (1+adult.age)*exp(x)-1, 117 | ## (1+adult.age)*x+adult.age) 118 | ## } 119 | -------------------------------------------------------------------------------- /R/impute.R: -------------------------------------------------------------------------------- 1 | #' Impute of missing data of specific platform 2 | #' 3 | #' @param betas named vector of beta values 4 | #' @param platform platform 5 | #' @param celltype celltype/tissue context of imputation, if not given, will 6 | #' use nearest neighbor to determine. 7 | #' @param sd_max maximum standard deviation in imputation confidence 8 | #' @param BPPARAM use MulticoreParam(n) for parallel processing 9 | #' @return imputed data, vector or matrix 10 | #' @examples 11 | #' betas = openSesame(sesameDataGet("EPIC.1.SigDF")) 12 | #' sum(is.na(betas)) 13 | #' betas2 = imputeBetas(betas, "EPIC") 14 | #' sum(is.na(betas2)) 15 | #' 16 | #' @export 17 | imputeBetas <- function(betas, platform = NULL, BPPARAM = SerialParam(), 18 | celltype = NULL, sd_max = 999) { 19 | 20 | if (is.matrix(betas)) { 21 | betas <- do.call(cbind, bplapply(seq_len(ncol(betas)), function(i) { 22 | imputeBetas(betas[,i], platform = NULL, 23 | celltype = celltype, sd_max = sd_max)}, BPPARAM=BPPARAM)) 24 | colnames(betas) <- colnames(betas) 25 | return(betas) 26 | } 27 | 28 | platform <- sesameData_check_platform(platform, names(betas)) 29 | df <- sesameDataGet(sprintf("%s.imputationDefault", platform)) 30 | d2q <- match(names(betas), df$Probe_ID) 31 | celltype <- names(which.max(vapply(df$data, function(x) cor( 32 | betas, x$median[d2q], use="na.or.complete"), numeric(1)))) 33 | if (is.null(celltype)) { 34 | celltype <- "Blood" 35 | } 36 | idx <- is.na(betas) 37 | mn <- df$data[[celltype]]$median[d2q][idx] 38 | sd <- df$data[[celltype]]$sd[d2q][idx] 39 | mn[sd > sd_max] <- NA 40 | betas[idx] <- mn 41 | betas 42 | } 43 | 44 | #' Impute missing data based on genomic neighbors. 45 | #' 46 | #' @param betas named vector of beta values 47 | #' @param platform platform 48 | #' @param max_neighbors maximum neighbors to use for dense regions 49 | #' @param max_dist maximum distance to count as neighbor 50 | #' @param BPPARAM use MulticoreParam(n) for parallel processing 51 | #' @return imputed data, vector or matrix 52 | #' @importFrom GenomicRanges resize 53 | #' @importFrom GenomicRanges findOverlaps 54 | #' @importFrom S4Vectors subjectHits 55 | #' @importFrom S4Vectors queryHits 56 | #' @importFrom dplyr summarize 57 | #' @examples 58 | #' betas = openSesame(sesameDataGet("EPICv2.8.SigDF")[[1]]) 59 | #' sum(is.na(betas)) 60 | #' betas2 = imputeBetasByGenomicNeighbors(betas, "EPICv2") 61 | #' sum(is.na(betas2)) 62 | #' 63 | #' @export 64 | imputeBetasByGenomicNeighbors <- function(betas, platform = NULL, 65 | BPPARAM = SerialParam(), max_neighbors = 3, max_dist = 10000) { 66 | 67 | platform <- sesameData_check_platform(platform, names(betas)) 68 | mft <- sesameData_getManifestGRanges(platform) 69 | mft_missing <- mft[names(mft) %in% names(which(is.na(betas)))] 70 | mft_nonmiss <- mft[names(which(!is.na(betas)))] 71 | index <- findOverlaps(resize(mft_missing, max_dist), mft_nonmiss) 72 | gm <- mft_missing[queryHits(index)] 73 | gn <- mft_nonmiss[subjectHits(index)] 74 | df <- tibble( 75 | cg = names(gm), beg_m = start(gm), end_m = end(gm), 76 | cg_n = names(gn), beg_n = start(gn), end_n = end(gn)) 77 | 78 | df$d1 <- df$beg_m - df$end_n - 1 79 | df$d2 <- df$beg_n - df$end_m - 1 80 | df$betas <- betas[df$cg_n] 81 | df$dist <- pmax(df$d1, df$d2) 82 | df <- summarize(slice_min(group_by(df, .data[['cg']]), 83 | n = max_neighbors, order_by = .data[['dist']]), 84 | mbetas = mean(.data[['betas']])) 85 | betas[df$cg] <- df$mbetas 86 | betas 87 | } 88 | 89 | #' Impute Missing Values with Mean 90 | #' This function replaces missing values (NA) in a matrix, default is row 91 | #' means. 92 | #' 93 | #' @param mx A matrix 94 | #' @param axis A single integer. Use 1 to impute column means (default), 95 | #' and 2 to impute row means. 96 | #' @return A matrix with missing values imputed. 97 | #' @examples 98 | #' mx <- cbind(c(1, 2, NA, 4), c(NA, 2, 3, 4)) 99 | #' imputeBetasMatrixByMean(mx, axis = 1) 100 | #' imputeBetasMatrixByMean(mx, axis = 2) 101 | #' @export 102 | imputeBetasMatrixByMean <- function(mx, axis = 1) { 103 | stopifnot(is.matrix(mx)) 104 | if (axis == 1) { 105 | t(apply(mx, 1, function(x) { 106 | x[is.na(x)] <- mean(x, na.rm = TRUE); 107 | x 108 | })) 109 | } else if (axis == 2) { 110 | apply(mx, 2, function(x) { 111 | x[is.na(x)] <- mean(x, na.rm = TRUE); 112 | x 113 | }) 114 | } else { 115 | stop("Invalid axis. Use 1 for columns or 2 for rows.") 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(print,DMLSummary) 4 | S3method(print,fileSet) 5 | export(BetaValueToMValue) 6 | export(DML) 7 | export(DMLpredict) 8 | export(DMR) 9 | export(ELBAR) 10 | export(MValueToBetaValue) 11 | export(SigDF) 12 | export(addMask) 13 | export(betasCollapseToPfx) 14 | export(bisConversionControl) 15 | export(calcEffectSize) 16 | export(checkLevels) 17 | export(cnSegmentation) 18 | export(compareMouseStrainReference) 19 | export(compareMouseTissueReference) 20 | export(compareReference) 21 | export(controls) 22 | export(createUCSCtrack) 23 | export(deIdentify) 24 | export(detectionPnegEcdf) 25 | export(diffRefSet) 26 | export(dmContrasts) 27 | export(dyeBiasCorr) 28 | export(dyeBiasCorrMostBalanced) 29 | export(dyeBiasCorrTypeINorm) 30 | export(dyeBiasL) 31 | export(dyeBiasNL) 32 | export(estimateLeukocyte) 33 | export(formatVCF) 34 | export(getAFTypeIbySumAlleles) 35 | export(getAFs) 36 | export(getBetas) 37 | export(getMask) 38 | export(getRefSet) 39 | export(imputeBetas) 40 | export(imputeBetasByGenomicNeighbors) 41 | export(imputeBetasMatrixByMean) 42 | export(inferEthnicity) 43 | export(inferInfiniumIChannel) 44 | export(inferSex) 45 | export(inferSpecies) 46 | export(inferStrain) 47 | export(inferTissue) 48 | export(initFileSet) 49 | export(liftOver) 50 | export(listAvailableMasks) 51 | export(mLiftOver) 52 | export(mapFileSet) 53 | export(mapToMammal40) 54 | export(matchDesign) 55 | export(meanIntensity) 56 | export(medianTotalIntensity) 57 | export(noMasked) 58 | export(noob) 59 | export(openSesame) 60 | export(openSesameToFile) 61 | export(pOOBAH) 62 | export(palgen) 63 | export(parseGEOsignalMU) 64 | export(predictAge) 65 | export(predictAgeHorvath353) 66 | export(predictAgeSkinBlood) 67 | export(predictMouseAgeInMonth) 68 | export(prefixMask) 69 | export(prefixMaskButC) 70 | export(prefixMaskButCG) 71 | export(prepSesame) 72 | export(prepSesameList) 73 | export(probeID_designType) 74 | export(probeSuccessRate) 75 | export(qualityMask) 76 | export(reIdentify) 77 | export(readFileSet) 78 | export(readIDATpair) 79 | export(recommendedMaskNames) 80 | export(resetMask) 81 | export(scrub) 82 | export(scrubSoft) 83 | export(sdfPlatform) 84 | export(sdf_read_table) 85 | export(sdf_write_table) 86 | export(searchIDATprefixes) 87 | export(sesameAnno_attachManifest) 88 | export(sesameAnno_buildAddressFile) 89 | export(sesameAnno_buildManifestGRanges) 90 | export(sesameAnno_download) 91 | export(sesameAnno_readManifestTSV) 92 | export(sesameQC_calcStats) 93 | export(sesameQC_getStats) 94 | export(sesameQC_plotBar) 95 | export(sesameQC_plotBetaByDesign) 96 | export(sesameQC_plotHeatSNPs) 97 | export(sesameQC_plotIntensVsBetas) 98 | export(sesameQC_plotRedGrnQQ) 99 | export(sesameQC_rankStats) 100 | export(sesameQCtoDF) 101 | export(sesame_checkVersion) 102 | export(sesamize) 103 | export(setMask) 104 | export(signalMU) 105 | export(sliceFileSet) 106 | export(summaryExtractTest) 107 | export(totalIntensities) 108 | export(updateSigDF) 109 | export(visualizeGene) 110 | export(visualizeProbes) 111 | export(visualizeRegion) 112 | export(visualizeSegments) 113 | import(BiocParallel) 114 | import(ggplot2) 115 | import(graphics) 116 | import(grid) 117 | import(sesameData) 118 | import(stats) 119 | import(stringr) 120 | import(tibble) 121 | import(wheatmap) 122 | importFrom(BiocFileCache,BiocFileCache) 123 | importFrom(BiocFileCache,bfcrpath) 124 | importFrom(GenomeInfoDb,Seqinfo) 125 | importFrom(GenomicRanges,GRanges) 126 | importFrom(GenomicRanges,end) 127 | importFrom(GenomicRanges,findOverlaps) 128 | importFrom(GenomicRanges,resize) 129 | importFrom(GenomicRanges,seqinfo) 130 | importFrom(GenomicRanges,seqnames) 131 | importFrom(GenomicRanges,start) 132 | importFrom(S4Vectors,metadata) 133 | importFrom(S4Vectors,queryHits) 134 | importFrom(S4Vectors,subjectHits) 135 | importFrom(SummarizedExperiment,"metadata<-") 136 | importFrom(SummarizedExperiment,SummarizedExperiment) 137 | importFrom(SummarizedExperiment,assay) 138 | importFrom(SummarizedExperiment,colData) 139 | importFrom(SummarizedExperiment,rowData) 140 | importFrom(dplyr,bind_cols) 141 | importFrom(dplyr,bind_rows) 142 | importFrom(dplyr,distinct) 143 | importFrom(dplyr,full_join) 144 | importFrom(dplyr,group_by) 145 | importFrom(dplyr,slice_min) 146 | importFrom(dplyr,summarize) 147 | importFrom(methods,.hasSlot) 148 | importFrom(methods,is) 149 | importFrom(methods,new) 150 | importFrom(preprocessCore,normalize.quantiles.use.target) 151 | importFrom(readr,col_character) 152 | importFrom(readr,col_integer) 153 | importFrom(readr,cols) 154 | importFrom(readr,read_tsv) 155 | importFrom(reshape2,melt) 156 | importFrom(stats,approx) 157 | importFrom(stats,setNames) 158 | importFrom(utils,download.file) 159 | importFrom(utils,head) 160 | importFrom(utils,packageVersion) 161 | importFrom(utils,read.table) 162 | importFrom(utils,tail) 163 | importFrom(utils,write.table) 164 | importMethodsFrom(IRanges,subsetByOverlaps) 165 | -------------------------------------------------------------------------------- /R/deidentify.R: -------------------------------------------------------------------------------- 1 | 2 | #' De-identify IDATs by removing SNP probes 3 | #' 4 | #' Mask SNP probe intensity mean by zero. 5 | #' 6 | #' @param path input IDAT file 7 | #' @param out_path output IDAT file 8 | #' @param snps SNP definition, if not given, default to SNP probes 9 | #' @param mft sesame-compatible manifest if non-standard 10 | #' @param randomize whether to randomize the SNPs. if TRUE, 11 | #' randomize the signal intensities. one can use set.seed to 12 | #' reidentify the IDAT with the secret seed (see examples). 13 | #' If FALSE, this sets all SNP intensities to zero. 14 | #' @return NULL, changes made to the IDAT files 15 | #' @examples 16 | #' 17 | #' my_secret <- 13412084 18 | #' set.seed(my_secret) 19 | #' temp_out <- tempfile("test") 20 | #' deIdentify(system.file( 21 | #' "extdata", "4207113116_A_Grn.idat", package = "sesameData"), 22 | #' temp_out, randomize = TRUE) 23 | #' unlink(temp_out) 24 | #' @export 25 | deIdentify <- function( 26 | path, out_path=NULL, snps=NULL, mft=NULL, randomize=FALSE) { 27 | 28 | res <- suppressWarnings(readIDAT(path)) 29 | platform <- inferPlatformFromTango(res) 30 | 31 | if(is.null(out_path)) { 32 | pfx <- sub('.idat(.gz)?$','', path) 33 | if(grepl('_Grn$', pfx)) { 34 | out_path <- paste0(sub('_Grn$','',pfx), '_noid_Grn.idat') 35 | } else if (grepl('_Red$', pfx)) { 36 | out_path <- paste0(sub('_Red$','',pfx), '_noid_Red.idat') }} 37 | 38 | if (is.null(mft)) { 39 | mft <- sesameDataGet(paste0(platform, '.address'))$ordering } 40 | if (is.null(snps)) { 41 | snps <- grep("^rs", mft$Probe_ID, value=TRUE) } 42 | mft <- mft[mft$Probe_ID %in% snps,] 43 | 44 | snpsTango <- na.omit(c(mft$M, mft$U)) 45 | qt <- res$Quants 46 | snpsIdx <- match(snpsTango, rownames(qt)) 47 | dt <- qt[,'Mean'] 48 | if (randomize) { 49 | snpsIdx <- snpsIdx[!is.na(snpsIdx)] 50 | dt[snpsIdx] <- sample(dt[snpsIdx]) 51 | } else { dt[snpsIdx] <- 0 } 52 | 53 | if(grepl("\\.gz$", path)) { con <- gzfile(path, "rb") 54 | } else { con <- file(path, "rb") } 55 | con2 <- file(out_path, "wb") 56 | 57 | ## before Mean section 58 | writeBin(readBin(con, "raw", n = res$fields["Mean", 'byteOffset']), con2) 59 | 60 | ## write new Mean section 61 | writeBin(as.integer(dt), con2, size=2, endian='little') 62 | 63 | ## after Mean section 64 | ## skip by reading..., seek might not work for gzfile 65 | a <- readBin(con, "raw", n = res$nSNPsRead*2) 66 | while (length(a <- readBin(con, 'raw', n=1))>0) writeBin(a, con2) 67 | 68 | close(con) 69 | close(con2) 70 | } 71 | 72 | #' Re-identify IDATs by restoring scrambled SNP intensities 73 | #' 74 | #' This requries setting a seed with a secret number that was used to 75 | #' de-identify the IDAT (see example). 76 | #' This requires a secret number that was used to de-idenitfy the IDAT 77 | #' 78 | #' @param path input IDAT file 79 | #' @param out_path output IDAT file 80 | #' @param snps SNP definition, if not given, default to SNP probes 81 | #' @param mft sesame-compatible manifest if non-standard 82 | #' @return NULL, changes made to the IDAT files 83 | #' @examples 84 | #' 85 | #' temp_out <- tempfile("test") 86 | #' 87 | #' set.seed(123) 88 | #' reIdentify(system.file( 89 | #' "extdata", "4207113116_A_Grn.idat", package = "sesameData"), temp_out) 90 | #' unlink(temp_out) 91 | #' @export 92 | reIdentify <- function(path, out_path=NULL, snps=NULL, mft=NULL) { 93 | 94 | res <- suppressWarnings(readIDAT(path)) 95 | platform <- inferPlatformFromTango(res) 96 | 97 | if(is.null(out_path)) { 98 | pfx <- sub('.idat(.gz)?$','', path) 99 | if(grepl('_Grn$', pfx)) { 100 | out_path <- paste0(sub('_Grn$','',pfx), '_reid_Grn.idat') 101 | } else if (grepl('_Red$', pfx)) { 102 | out_path <- paste0(sub('_Red$','',pfx), '_reid_Red.idat') }} 103 | 104 | if (is.null(mft)) { 105 | mft <- sesameDataGet(paste0(platform, '.address'))$ordering } 106 | if (is.null(snps)) { 107 | snps <- grep("^rs", mft$Probe_ID, value=TRUE) } 108 | mft <- mft[mft$Probe_ID %in% snps,] 109 | 110 | snpsTango <- na.omit(c(mft$M, mft$U)) 111 | qt <- res$Quants 112 | snpsIdx <- match(snpsTango, rownames(qt)) 113 | dt <- qt[,'Mean'] 114 | snpsIdx <- snpsIdx[!is.na(snpsIdx)] 115 | idx <- seq_along(snpsIdx) 116 | dt[snpsIdx] <- dt[snpsIdx[match(idx, sample(idx))]] 117 | 118 | if(grepl("\\.gz$", path)) { con <- gzfile(path, "rb") 119 | } else { con <- file(path, "rb"); } 120 | 121 | con2 <- file(out_path, "wb") 122 | 123 | ## before Mean section 124 | writeBin(readBin(con, "raw", n = res$fields["Mean", 'byteOffset']), con2) 125 | 126 | ## write new Mean section 127 | writeBin(as.integer(dt), con2, size=2, endian='little') 128 | 129 | ## after Mean section 130 | ## skip by reading..., seek might not work for gzfile 131 | a <- readBin(con, "raw", n = res$nSNPsRead*2) 132 | while (length(a <- readBin(con, 'raw', n=1))>0) writeBin(a, con2) 133 | 134 | close(con) 135 | close(con2) 136 | } 137 | -------------------------------------------------------------------------------- /vignettes/QC.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Quality Control" 3 | date: "`r BiocStyle::doc_date()`" 4 | package: sesame 5 | output: BiocStyle::html_document 6 | fig_width: 6 7 | fig_height: 5 8 | vignette: > 9 | %\VignetteEngine{knitr::rmarkdown} 10 | %\VignetteIndexEntry{1. Quality Control} 11 | %\VignetteEncoding{UTF-8} 12 | --- 13 | 14 | ```{r message=FALSE, warning=FALSE, results="hide"} 15 | library(sesame) 16 | sesameDataCache() 17 | ``` 18 | 19 | # Calculate Quality Metrics 20 | 21 | The main function to calculate the quality metrics is `sesameQC_calcStats`. 22 | This function takes a SigDF, calculates the QC statistics, and returns a single 23 | S4 `sesameQC` object, which can be printed directly to the console. To calculate 24 | QC metrics on a given list of samples or all IDATs in a folder, one can use 25 | `sesameQC_calcStats` within the standard `openSesame` pipeline. When used with 26 | `openSesame`, a list of `sesameQC`s will be returned. Note that one should turn 27 | off preprocessing using `prep=""`: 28 | 29 | ```{r qc1, eval=FALSE} 30 | ## calculate metrics on all IDATs in a specific folder 31 | sesameQCtoDF(openSesame(idat_dir, prep="", func=sesameQC_calcStats)) 32 | ## or a list of prefixes, with parallel processing 33 | sesameQCtoDF(openSesame(sprintf("%s/%s", idat_dir, idat_prefixes), prep="", 34 | func=sesameQC_calcStats, BPPARAM=BiocParallel::MulticoreParam(24))) 35 | ``` 36 | 37 | The results display `frac_dt_cg`, `RGratio`, `RGdistort` by default. For other 38 | QC metrics, SeSAMe divides sample quality metrics into multiple groups. These 39 | groups are listed below and can be referred to by short keys. For example, 40 | "intensity" generates signal intensity-related quality metrics. 41 | 42 | ```{r echo=FALSE} 43 | library(knitr) 44 | kable(data.frame( 45 | "Short Key" = c( 46 | "detection", 47 | "numProbes", 48 | "intensity", 49 | "channel", 50 | "dyeBias", 51 | "betas"), 52 | "Description" = c( 53 | "Signal Detection", 54 | "Number of Probes", 55 | "Signal Intensity", 56 | "Color Channel", 57 | "Dye Bias", 58 | "Beta Value"))) 59 | ``` 60 | 61 | By default, `sesameQC_calcStats` calculates all QC groups. To save time, one 62 | can compute a specific QC group by specifying one or multiple short keys in 63 | the `funs=` argument: 64 | 65 | ```{r qc2} 66 | sdfs <- sesameDataGet("EPIC.5.SigDF.normal")[1:2] # get two examples 67 | ## only compute signal detection stats 68 | qcs = openSesame(sdfs, prep="", func=sesameQC_calcStats, funs="detection") 69 | qcs[[1]] 70 | ``` 71 | 72 | > We consider signal detection the most important QC metric. 73 | 74 | One can retrieve the actual stat numbers from `sesameQC` using the 75 | sesameQC_getStats (the following generates the fraction of probes with 76 | detection success): 77 | 78 | ```{r qc3} 79 | sesameQC_getStats(qcs[[1]], "frac_dt") 80 | ``` 81 | 82 | After computing the QCs, one can optionally combine the `sesameQC` objects into 83 | a data frame for easy comparison. 84 | 85 | ```{r qc4} 86 | ## combine a list of sesameQC into a data frame 87 | head(do.call(rbind, lapply(qcs, as.data.frame))) 88 | ``` 89 | 90 | Note that when the input is an `SigDF` object, calling `sesameQC_calcStats` 91 | within `openSesame` and as a standalone function are equivalent. 92 | 93 | ```{r qc5, message=FALSE} 94 | sdf <- sesameDataGet('EPIC.1.SigDF') 95 | qc = openSesame(sdf, prep="", func=sesameQC_calcStats, funs=c("detection")) 96 | ## equivalent direct call 97 | qc = sesameQC_calcStats(sdf, c("detection")) 98 | qc 99 | ``` 100 | 101 | # Rank Quality Metrics 102 | 103 | ```{r qc6, echo=FALSE} 104 | options(rmarkdown.html_vignette.check_title = FALSE) 105 | ``` 106 | 107 | SeSAMe features comparison of your sample with public data sets. The 108 | `sesameQC_rankStats()` function ranks the input `sesameQC` object with 109 | `sesameQC` calculated from public datasets. It shows the rank percentage of the 110 | input sample as well as the number of datasets compared. 111 | 112 | ```{r qc7} 113 | sdf <- sesameDataGet('EPIC.1.SigDF') 114 | qc <- sesameQC_calcStats(sdf, "intensity") 115 | qc 116 | sesameQC_rankStats(qc, platform="EPIC") 117 | ``` 118 | 119 | # Quality Control Plots 120 | 121 | SeSAMe provides functions to create QC plots. Some functions takes sesameQC as 122 | input while others directly plot the SigDF objects. Here are some examples: 123 | 124 | - `sesameQC_plotBar()` takes a list of sesameQC objects and creates bar 125 | plot for each metric calculated. 126 | 127 | - `sesameQC_plotRedGrnQQ()` graphs the dye bias between the two color channels. 128 | 129 | - `sesameQC_plotIntensVsBetas()` plots the relationship between $\beta$ values 130 | and signal intensity and can be used to diagnose artificial readout and 131 | influence of signal background. 132 | 133 | - `sesameQC_plotHeatSNPs()` plots SNP probes and can be used to detect sample 134 | swaps. 135 | 136 | More about quality control plots can be found in [Supplemental 137 | Vignette](https://zhou-lab.github.io/sesame/v1.16/supplemental.html#qc). 138 | 139 | # Session Info 140 | 141 | ```{r} 142 | sessionInfo() 143 | ``` 144 | -------------------------------------------------------------------------------- /man/mLiftOver.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mLiftOver.R 3 | \name{mLiftOver} 4 | \alias{mLiftOver} 5 | \title{Lift over beta values or SigDFs to another Infinium platform 6 | This function wraps ID conversion and provide optional 7 | imputation functionality.} 8 | \usage{ 9 | mLiftOver( 10 | x, 11 | target_platform, 12 | source_platform = NULL, 13 | BPPARAM = SerialParam(), 14 | mapping = NULL, 15 | impute = FALSE, 16 | sd_max = 999, 17 | celltype = "Blood", 18 | ... 19 | ) 20 | } 21 | \arguments{ 22 | \item{x}{either named beta value (vector or matrix), probe IDs 23 | or SigDF(s) 24 | if input is a matrix, probe IDs should be in the row names 25 | if input is a numeric vector, probe IDs should be in the vector 26 | names. 27 | If input is a character vector, the input will be 28 | considered probe IDs.} 29 | 30 | \item{target_platform}{the platform to take the data to} 31 | 32 | \item{source_platform}{optional information of the source data 33 | platform (when there might be ambiguity).} 34 | 35 | \item{BPPARAM}{use MulticoreParam(n) for parallel processing} 36 | 37 | \item{mapping}{a liftOver mapping file. Typically this file 38 | contains empirical evidence whether a probe mapping is reliable. 39 | If given, probe ID-based mapping will be skipped. This is to 40 | perform more stringent probe ID mapping.} 41 | 42 | \item{impute}{whether to impute or not, default is FALSE} 43 | 44 | \item{sd_max}{the maximum standard deviation for filtering low 45 | confidence imputation.} 46 | 47 | \item{celltype}{the cell type / tissue context of imputation, 48 | if not given, will use nearest neighbor to find out.} 49 | 50 | \item{...}{extra arguments, see ?convertProbeID} 51 | } 52 | \value{ 53 | imputed data, vector, matrix, SigDF(s) 54 | } 55 | \description{ 56 | Lift over beta values or SigDFs to another Infinium platform 57 | This function wraps ID conversion and provide optional 58 | imputation functionality. 59 | } 60 | \examples{ 61 | 62 | \dontrun{ 63 | sesameDataCache() 64 | 65 | ## lift SigDF 66 | 67 | sdf = sesameDataGet("EPICv2.8.SigDF")[["GM12878_206909630042_R08C01"]] 68 | dim(mLiftOver(sdf, "EPICv2")) 69 | dim(mLiftOver(sdf, "EPIC")) 70 | dim(mLiftOver(sdf, "HM450")) 71 | 72 | sdfs = sesameDataGet("EPICv2.8.SigDF")[1:2] 73 | sdfs_hm450 = mLiftOver(sdfs, "HM450") 74 | ## parallel processing 75 | sdfs_hm450 = mLiftOver(sdfs, "HM450", BPPARAM=BiocParallel::MulticoreParam(2)) 76 | 77 | sdf = sesameDataGet("EPIC.5.SigDF.normal")[[1]] 78 | dim(mLiftOver(sdf, "EPICv2")) 79 | dim(mLiftOver(sdf, "EPIC")) 80 | dim(mLiftOver(sdf, "HM450")) 81 | 82 | sdf = sesameDataGet("HM450.10.SigDF")[[1]] 83 | dim(mLiftOver(sdf, "EPICv2")) 84 | dim(mLiftOver(sdf, "EPIC")) 85 | dim(mLiftOver(sdf, "HM450")) 86 | 87 | ## lift beta values 88 | 89 | betas = openSesame(sesameDataGet("EPICv2.8.SigDF")[[1]]) 90 | betas_hm450 = mLiftOver(betas, "HM450", impute=TRUE) 91 | length(betas_hm450) 92 | sum(is.na(betas_hm450)) 93 | betas_hm450 <- mLiftOver(betas, "HM450", impute=FALSE) 94 | length(betas_hm450) 95 | sum(is.na(betas_hm450)) 96 | betas_epic1 <- mLiftOver(betas, "EPIC", impute=TRUE) 97 | length(betas_epic1) 98 | sum(is.na(betas_epic1)) 99 | betas_epic1 <- mLiftOver(betas, "EPIC", impute=FALSE) 100 | length(betas_epic1) 101 | sum(is.na(betas_epic1)) 102 | 103 | betas_matrix = openSesame(sesameDataGet("EPICv2.8.SigDF")[1:4]) 104 | dim(betas_matrix) 105 | betas_matrix_hm450 = mLiftOver(betas_matrix, "HM450", impute=T) 106 | dim(betas_matrix_hm450) 107 | ## parallel processing 108 | betas_matrix_hm450 = mLiftOver(betas_matrix, "HM450", impute=T, 109 | BPPARAM=BiocParallel::MulticoreParam(4)) 110 | 111 | ## use empirical evidence in mLiftOver 112 | mapping = sesameDataGet("liftOver.EPICv2ToEPIC") 113 | betas_matrix = openSesame(sesameDataGet("EPICv2.8.SigDF")[1:4]) 114 | dim(mLiftOver(betas_matrix, "EPIC", mapping = mapping)) 115 | ## compare to without using empirical evidence 116 | dim(mLiftOver(betas_matrix, "EPIC")) 117 | 118 | betas <- c("cg04707299"=0.2, "cg13380562"=0.9, "cg00000103"=0.1) 119 | head(mLiftOver(betas, "HM450", impute=TRUE)) 120 | 121 | betas <- c("cg00004963_TC21"=0, "cg00004963_TC22"=0.5, "cg00004747_TC21"=1.0) 122 | betas_hm450 <- mLiftOver(betas, "HM450", impute=TRUE) 123 | head(na.omit(mLiftOver(betas, "HM450", impute=FALSE))) 124 | 125 | ## lift probe IDs 126 | 127 | cg_epic2 = names(sesameData_getManifestGRanges("EPICv2")) 128 | head(mLiftOver(cg_epic2, "HM450")) 129 | 130 | cg_epic2 = grep("cg", names(sesameData_getManifestGRanges("EPICv2")), value=T) 131 | head(mLiftOver(cg_epic2, "HM450")) 132 | 133 | cg_hm450 = grep("cg", names(sesameData_getManifestGRanges("HM450")), value=T) 134 | head(mLiftOver(cg_hm450, "EPICv2")) 135 | 136 | rs_epic2 = grep("rs", names(sesameData_getManifestGRanges("EPICv2")), value=T) 137 | head(mLiftOver(rs_epic2, "HM450", source_platform="EPICv2")) 138 | 139 | probes_epic2 = names(sesameData_getManifestGRanges("EPICv2")) 140 | head(mLiftOver(probes_epic2, "EPIC")) 141 | head(mLiftOver(probes_epic2, "EPIC", target_uniq = TRUE)) 142 | head(mLiftOver(probes_epic2, "EPIC", include_new = FALSE)) 143 | head(mLiftOver(probes_epic2, "EPIC", include_old = FALSE)) 144 | head(mLiftOver(probes_epic2, "EPIC", return_mapping=TRUE)) 145 | 146 | } 147 | } 148 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | c( 2 | bibentry(bibtype = "Article", 3 | key = "lee2024input", 4 | title = "Low-input and single-cell methods for Infinium DNA methylation BeadChips", 5 | author = c( 6 | person(given = "Sol Moe", family = "Lee"), 7 | person(given = "Christian", family = "Loo"), 8 | person(given = "Rexxi", family = "Prasasya"), 9 | person(given = "Marisa", family = "Bartolomei"), 10 | person(given = "Rahul", family = "Kohli"), 11 | person(given = "Wanding", family = "Zhou")), 12 | year = "2024", month = "Feb", journal = "Nucleic acids research", 13 | ## volume = "24", number = "1", 14 | doi = "10.1093/nar/gkae127", 15 | language = "eng", issn = "1362-4962", 16 | header = "The ELBAR detection calling is described in:"), 17 | 18 | bibentry(bibtype = "Article", 19 | key = "BIB", 20 | title = "Comparative epigenome analysis using Infinium DNA methylation BeadChips", 21 | author = c( 22 | person(given = "Wubin", family = "Ding"), 23 | person(given = "Diljeet", family = "Kaur"), 24 | person(given = "Steve", family = "Horvath"), 25 | person(given = "Wanding", family = "Zhou")), 26 | year = "2023", month = "Jan", journal = "Briefings in bioinformatics", 27 | volume = "24", number = "1", eprint = "36617464", 28 | doi = "10.1093/bib/bbac617", 29 | language = "eng", issn = "1477-4054", 30 | header = "Data analysis on non-standard genome is described in:"), 31 | 32 | bibentry(bibtype = "Article", 33 | key = "MM285", 34 | title = "DNA methylation dynamics and dysregulation delineated by high-throughput profiling in the mouse", 35 | author = c( 36 | person(given = "Wanding", family = "Zhou"), 37 | person(given = "Toshinori", family = "Hinoue"), 38 | person(given = "Bret", family = "Barnes"), 39 | person(given = "Owen", family = "Mitchell"), 40 | person(given = "Waleed", family = "Iqbal"), 41 | person(given = c("Sol", "Moe"), family = "Lee"), 42 | person(given = c("Kelly", "K"), family = "Foy"), 43 | person(given = "Kwang-Ho", family = "Lee"), 44 | person(given = c("Ethan", "J"), family = "Moyer"), 45 | person(given = "Alexandra", family = "VanderArk"), 46 | person(given = c("Julie", "M"), family = "Koeman"), 47 | person(given = "Wubin", family = "Ding"), 48 | person(given = "Manpreet", family = "Kalkat"), 49 | person(given = c("Nathan", "J"), family = "Spix"), 50 | person(given = "Bryn", family = "Eagleson"), 51 | person(given = c("John", "Andrew"), family = "Pospisilik"), 52 | person(given = c("Piroska", "E"), family = "Szabó"), 53 | person(given = c("Marisa", "S"), family = "Bartolomei"), 54 | person(given = c("Nicole", "A", "Vander"), family = "Schaaf"), 55 | person(given = "Liang", family = "Kang"), 56 | person(given = c("Ashley", "K"), family = "Wiseman"), 57 | person(given = c("Peter", "A"), family = "Jones"), 58 | person(given = c("Connie", "M"), family = "Krawczyk"), 59 | person(given = "Marie", family = "Adams"), 60 | person(given = "Rishi", family = "Porecha"), 61 | person(given = c("Brian", "H"), family = "Chen"), 62 | person(given = "Hui", family = "Shen"), 63 | person(given = c("Peter", "W"), family = "Laird")), 64 | year = "2022", month = "Jul", journal = "Cell genomics", 65 | volume = "2", number = "7", eprint = "35873672", 66 | doi = "10.1016/j.xgen.2022.100144", language = "eng", 67 | issn = "2666-979X", eprinttype = "pubmed", 68 | header = "Mouse (MM285) array informatics is described in:"), 69 | 70 | bibentry(bibtype = "Article", 71 | key = "sesame", 72 | title = "{SeSAMe: reducing artifactual detection of DNA methylation by Infinium BeadChips in genomic deletions}", 73 | author = c( 74 | person("Wanding", "Zhou"), 75 | person(c("Timothy", "J."), "Triche"), 76 | person(c("Peter", "W."), "Laird"), 77 | person("Hui", "Shen") 78 | ), 79 | year = "2018", 80 | journal = "Nucleic Acids Research", 81 | volume = "gky691", 82 | doi = "10.1093/nar/gky691", 83 | pubmed = "30085201", 84 | header = "pOOBAH detection calling is described in:"), 85 | 86 | bibentry(bibtype = "Article", 87 | key = "noob", 88 | author = c(person(given = c("Timothy", "J."), family = "Triche"), 89 | person(given = c("Daniel", "J."), family = "Weisenberger"), 90 | person(given = c("David"), family = "Van Den Berg"), 91 | person(given = c("Peter", "W."), family = "Laird"), 92 | person(given = c("Kimberly", "D."), family = "Siegmund")), 93 | title = "Low-level processing of {Illumina} {Infinium} {DNA} {Methylation} {BeadArrays}", 94 | journal = "Nucleic Acids Research", 95 | year = "2013", 96 | volume = "41", 97 | number = "7", 98 | pages = "e90", 99 | doi = "10.1093/nar/gkt090", 100 | pubmed = "23476028", 101 | header = "noob background correction is described in:") 102 | ) 103 | -------------------------------------------------------------------------------- /R/strain.R: -------------------------------------------------------------------------------- 1 | mouseBetaToAF <- function(betas) { 2 | se <- sesameDataGet('MM285.addressStrain')$strain_snps 3 | rd <- rowData(se) 4 | af <- betas[rd$Probe_ID] 5 | af[rd$flipToAF] <- 1 - af[rd$flipToAF] 6 | af 7 | } 8 | 9 | #' Infer strain information for mouse array 10 | #' 11 | #' @param sdf SigDF 12 | #' @param min_frac_dt minimum fraction of detected signal (DEFAULT: 0.2) 13 | #' otherwise, we give up strain inference and return NA. 14 | #' @param return.probability return probability vector for all strains 15 | #' @param return.pval return p-value 16 | #' @param return.strain return strain name 17 | #' @param verbose print more messages 18 | #' @return a list of best guess, p-value of the best guess 19 | #' and the probabilities of all strains 20 | #' @examples 21 | #' sesameDataCache() # if not done yet 22 | #' sdf <- sesameDataGet('MM285.1.SigDF') 23 | #' inferStrain(sdf, return.strain = TRUE) 24 | #' sdf.strain <- inferStrain(sdf) 25 | #' @import tibble 26 | #' @export 27 | inferStrain <- function( 28 | sdf, return.strain = FALSE, return.probability = FALSE, 29 | return.pval = FALSE, min_frac_dt = 0.2, verbose = FALSE) { 30 | 31 | addr <- sesameDataGet("MM285.addressStrain") 32 | se <- addr$strain_snps 33 | cd <- SummarizedExperiment::colData(se) 34 | rd <- SummarizedExperiment::rowData(se) 35 | md <- metadata(se) 36 | 37 | ## C57BL_6J is the first strain in the table 38 | strain_snps <- rd[,which(colnames(rd)=="C57BL_6J"):ncol(rd)] 39 | 40 | ## give up if the success rate is low 41 | pvals <- pOOBAH(sdf, return.pval=TRUE) 42 | if (sum(pvals[rd$Probe_ID] < 0.05) / nrow(rd) < min_frac_dt) { 43 | if (return.strain) { return(NA) 44 | } else if (return.probability) { return(rep(NA, ncol(strain_snps))) 45 | } else if (return.pval) { return(NA) 46 | } else { return(sdfMsg(sdf, verbose, 47 | "Abort strain inference for low detection rate.")) } 48 | } 49 | 50 | vafs <- getBetas(dyeBiasNL(noob(sdf)), mask=FALSE)[rd$Probe_ID] 51 | vafs[is.na(vafs)] <- 0.5 # just in case 52 | vafs[rd$flipToAF] <- 1 - vafs[rd$flipToAF] 53 | 54 | probes <- intersect(names(vafs), rd$Probe_ID[rd$QC!="FAIL"]) 55 | vafs <- vafs[probes] 56 | bbloglik <- vapply(strain_snps[match(probes, rd$Probe_ID),], 57 | function(x) sum(log(dnorm(x - vafs, mean=0, sd=0.8))), numeric(1)) 58 | probs <- setNames(exp(bbloglik - max(bbloglik)), colnames(strain_snps)) 59 | 60 | best.index <- which.max(probs) 61 | strain <- names(best.index) 62 | if (return.strain) { 63 | strain # addr$strain[[strain]][c("JAX_ID","MGP_ID")] 64 | } else if (return.probability) { 65 | probs / sum(probs) 66 | } else if (return.pval) { 67 | 1 - probs[best.index] / sum(probs) 68 | } else { 69 | updateSigDF(sdf, strain = strain, addr = addr, verbose = verbose) } 70 | } 71 | 72 | #' Compare Strain SNPs with a reference panel 73 | #' 74 | #' @param betas beta value vector or matrix (for multiple samples) 75 | #' @param show_sample_names whether to show sample name 76 | #' @param query_width optional argument for adjusting query width 77 | #' @return grid object that contrast the target sample with 78 | #' pre-built mouse strain reference 79 | #' @importFrom S4Vectors metadata 80 | #' @import wheatmap 81 | #' @export 82 | #' @examples 83 | #' sesameDataCache() # if not done yet 84 | #' compareMouseStrainReference() 85 | #' @export 86 | compareMouseStrainReference <- function( 87 | betas = NULL, show_sample_names = FALSE, query_width = NULL) { 88 | 89 | ## betas = NULL; show_sample_names = FALSE; 90 | se <- sesameDataGet("MM285.addressStrain")$strain_snps 91 | 92 | cd <- as_tibble(SummarizedExperiment::colData(se)) 93 | rd <- as_tibble(SummarizedExperiment::rowData(se)) 94 | md <- metadata(se) 95 | se <- se[rd$QC != "FAIL",]; rd <- rd[rd$QC != "FAIL",] 96 | 97 | if (!is.null(betas) && is.null(dim(betas))) { # in case a vector 98 | betas <- cbind(betas) } 99 | 100 | afs <- do.call(rbind, lapply(seq_along(rd$flipToAF), function(i) 101 | if(xor(rd$flipToAF[i], rd$flipForRefBias[i])) { 102 | 1-assay(se)[i,]} else {assay(se)[i,]})) 103 | rownames(afs) <- rd$Probe_ID 104 | 105 | stops <- c("white", "black") 106 | g <- WHeatmap(afs, cmp=CMPar(stop.points=stops, dmin=0, dmax=1), 107 | xticklabels = show_sample_names, xticklabels.n=ncol(afs), name="b1") 108 | if (!is.null(betas)) { # query samples 109 | afs2 <- do.call(rbind, lapply(seq_along(rd$flipToAF), function(i) { 110 | if(xor(rd$flipToAF[i], rd$flipForRefBias[i])) { 111 | 1 - betas[rd$Probe_ID[i],] 112 | } else { betas[rd$Probe_ID[i],] }})) 113 | g <- g + WHeatmap(afs2, RightOf("b1", width=query_width), 114 | cmp=CMPar(stop.points=stops, dmin=0, dmax=1), 115 | name="b2", xticklabels=TRUE, xticklabels.n=ncol(betas)) 116 | right <- "b2" 117 | } else { # in case target is not given, plot just the reference 118 | right <- "b1" 119 | } 120 | 121 | ## branch color bar (vertical) 122 | g <- g + WColorBarV(rd$BranchLong, RightOf(right, width=0.03), 123 | cmp=CMPar(label2color=md$strain.colors), name="bh") 124 | ## strain color bar (horizontal) 125 | g <- g + WColorBarH(cd$strain, TopOf("b1",height=0.03), 126 | cmp=CMPar(label2color=md$strain.colors), name="st") 127 | ## legends 128 | g <- g + WLegendV("st", 129 | TopRightOf("bh", just=c('left','top'), h.pad=0.02), 130 | height=0.03) 131 | ## g <- g + WLegendV('bh', Beneath(pad=0.06)) 132 | g + WCustomize(mar.bottom=0.15, mar.right=0.06) 133 | } 134 | -------------------------------------------------------------------------------- /R/match_design.R: -------------------------------------------------------------------------------- 1 | 2 | normalizeSetM <- function(input, ref, U) { 3 | bn <- normalize.quantiles.use.target(matrix(input), ref) 4 | U * bn / (1-bn) 5 | } 6 | 7 | calcMode <- function(x) { 8 | dd <- density(na.omit(x)) 9 | dd$x[which.max(dd$y)] 10 | } 11 | 12 | valleyDescent <- function(x1, x2) { 13 | 14 | m1 <- calcMode(x1) 15 | m2 <- calcMode(x2) 16 | dd <- density(na.omit(c(x1, x2))) 17 | dfunc <- approxfun(dd$x, dd$y) 18 | lo <- min(m1, m2) 19 | hi <- max(m1, m2) 20 | va <- min(dfunc(c(x1[x1 >= lo & x1 <= hi], x2[x2 >= lo & x2 <= hi])), 21 | na.rm=TRUE) 22 | va / min(dfunc(c(lo, hi)), na.rm=TRUE) 23 | } 24 | 25 | match1To2_1state <- function(sdf) { 26 | dR <- noMasked(InfIR(sdf)) 27 | bR <- getBetas(dR) 28 | dG <- noMasked(InfIG(sdf)) 29 | bG <- getBetas(dG) 30 | d2 <- noMasked(InfII(sdf)) 31 | b2 <- getBetas(d2) 32 | 33 | dG$MG <- normalizeSetM(bG, b2, dG$UG) 34 | dR$MR <- normalizeSetM(bR, b2, dR$UR) 35 | sdf2 <- rbind(dR, dG, d2) 36 | sdf2 <- rbind(sdf2, sdf[!(sdf$Probe_ID %in% sdf2$Probe_ID),]) 37 | sdf2[order(sdf2$Probe_ID),] 38 | } 39 | 40 | match1To2_3states <- function(sdf) { 41 | dR <- noMasked(InfIR(sdf)) 42 | bR <- getBetas(dR) 43 | dG <- noMasked(InfIG(sdf)) 44 | bG <- getBetas(dG) 45 | d2 <- noMasked(InfII(sdf)) 46 | b2 <- getBetas(d2) 47 | 48 | mR <- as.integer(betaMix3States(bR)) 49 | mG <- as.integer(betaMix3States(bG)) 50 | m2 <- as.integer(betaMix3States(b2)) 51 | 52 | dR$MR[mR==1] <- normalizeSetM(bR[mR==1], b2[m2==1], dR$UR[mR==1]) 53 | dR$MR[mR==2] <- normalizeSetM(bR[mR==2], b2[m2==2], dR$UR[mR==2]) 54 | dR$MR[mR==3] <- normalizeSetM(bR[mR==3], b2[m2==3], dR$UR[mR==3]) 55 | dG$MG[mG==1] <- normalizeSetM(bG[mG==1], b2[m2==1], dG$UG[mG==1]) 56 | dG$MG[mG==2] <- normalizeSetM(bG[mG==2], b2[m2==2], dG$UG[mG==2]) 57 | dG$MG[mG==3] <- normalizeSetM(bG[mG==3], b2[m2==3], dG$UG[mG==3]) 58 | sdf2 <- rbind(dR, dG, d2) 59 | sdf2 <- rbind(sdf2, sdf[!(sdf$Probe_ID %in% sdf2$Probe_ID),]) 60 | sdf2[order(sdf2$Probe_ID),] 61 | } 62 | 63 | #' normalize Infinium I probe betas to Infinium II 64 | #' 65 | #' This is designed to counter tail inflation in Infinium I probes. 66 | #' 67 | #' @param sdf SigDF 68 | #' @param min_dbeta the default algorithm perform 2-state 69 | #' quantile-normalization of the unmethylated and methylated modes 70 | #' separately. However, when the two modes are too close, we fall back 71 | #' to a one-mode normalization. The threshold defines the maximum 72 | #' inter-mode distance. 73 | #' @return SigDF 74 | #' @examples 75 | #' 76 | #' library(RPMM) 77 | #' sdf <- sesameDataGet("MM285.1.SigDF") 78 | #' sesameQC_plotBetaByDesign(sdf) 79 | #' sesameQC_plotBetaByDesign(matchDesign(sdf)) 80 | #' 81 | #' @export 82 | matchDesign <- function(sdf, min_dbeta = 0.3) { 83 | dR <- noMasked(InfIR(sdf)) 84 | dG <- noMasked(InfIG(sdf)) 85 | d2 <- noMasked(InfII(sdf)) 86 | 87 | b2 <- getBetas(d2) 88 | m2 <- as.integer(betaMix2States(b2)) 89 | 90 | ## message(calcMode(b2[m2 == 1]), " ", calcMode(b2[m2 == 2])) 91 | ## message(valleyDescent(b2[m2 == 1], b2[m2 == 2])) 92 | if (sum(m2==1, na.rm=TRUE) > 100 && 93 | sum(m2==2, na.rm=TRUE) > 100 && 94 | abs(calcMode(b2[m2 == 1]) - calcMode(b2[m2 == 2])) > 0.7) { 95 | return(match1To2_3states(sdf)) } 96 | 97 | if (sum(m2==1, na.rm=TRUE) < 10 || 98 | sum(m2==2, na.rm=TRUE) < 10 || 99 | valleyDescent(b2[m2==1], b2[m2==2]) >= 0.8 || 100 | abs(calcMode(b2[m2 == 1]) - calcMode(b2[m2 == 2])) < min_dbeta) { 101 | return(match1To2_1state(sdf)) } 102 | 103 | bR <- getBetas(dR, mask = FALSE) 104 | mR <- as.integer(betaMix2States(bR)) 105 | bG <- getBetas(dG, mask = FALSE) 106 | mG <- as.integer(betaMix2States(bG)) 107 | 108 | dR$MR[mR==1] <- normalizeSetM(bR[mR==1], b2[m2==1], dR$UR[mR==1]) 109 | dR$MR[mR==2] <- normalizeSetM(bR[mR==2], b2[m2==2], dR$UR[mR==2]) 110 | dG$MG[mG==1] <- normalizeSetM(bG[mG==1], b2[m2==1], dG$UG[mG==1]) 111 | dG$MG[mG==2] <- normalizeSetM(bG[mG==2], b2[m2==2], dG$UG[mG==2]) 112 | sdf2 <- rbind(dR, dG, d2) 113 | sdf2 <- rbind(sdf2, sdf[!(sdf$Probe_ID %in% sdf2$Probe_ID),]) 114 | sdf2[order(sdf2$Probe_ID),] 115 | } 116 | 117 | betaMix2States <- function(x, n_samples = 10000, th_init = 0.5) { 118 | if (sum(!is.na(x)) > n_samples) { 119 | x1 <- sample(na.omit(x), n_samples) 120 | } else { 121 | x1 <- na.omit(x) 122 | } 123 | m <- matrix(0, nrow = length(x1), ncol = 2) # membership matrix 124 | m[x1 <= th_init, 1] <- 1 125 | m[x1 > th_init, 2] <- 1 126 | 127 | fitres <- RPMM::blc( 128 | matrix(x1), m, maxiter = 5, tol = 0.001, verbose = FALSE) 129 | m1 <- apply(fitres$w, 1, which.max) 130 | th <- mean(max(x1[m1 == 1]), min(x1[m1 == 2])) 131 | m2 <- cut(x, breaks=c(0, th, 1), include.lowest = TRUE) 132 | names(m2) <- names(x) 133 | m2 134 | } 135 | 136 | betaMix3States <- function( 137 | x, n_samples = 10000, th_init1 = 0.2, th_init2 = 0.7) { 138 | 139 | if (sum(!is.na(x)) > n_samples) { 140 | x1 <- sample(na.omit(x), n_samples) 141 | } else { 142 | x1 <- na.omit(x) 143 | } 144 | m <- matrix(0, nrow = length(x1), ncol = 3) # membership matrix 145 | m[x1 <= th_init1, 1] <- 1 146 | m[x1 > th_init1 & x1 <= th_init2, 2] <- 1 147 | m[x1 > th_init2, 3] <- 1 148 | 149 | fitres <- RPMM::blc( 150 | matrix(x1), m, maxiter = 5, tol = 0.001, verbose = FALSE) 151 | m1 <- apply(fitres$w, 1, which.max) 152 | th1 <- mean(max(x1[m1 == 1]), min(x1[m1 == 2])) 153 | th2 <- mean(max(x1[m1 == 2]), min(x1[m1 == 3])) 154 | m2 <- cut(x, breaks=c(0, th1, th2, 1), include.lowest = TRUE) 155 | names(m2) <- names(x) 156 | m2 157 | } 158 | -------------------------------------------------------------------------------- /R/open.R: -------------------------------------------------------------------------------- 1 | #' List supported prepSesame functions 2 | #' 3 | #' @return a data frame with code, func, description 4 | #' @examples 5 | #' prepSesameList() 6 | #' @export 7 | prepSesameList <- function() { 8 | x <- data.frame(rbind( 9 | c("0", "resetMask", "Reset mask to all FALSE"), 10 | c("Q", "qualityMask", "Mask probes of poor design"), 11 | c("G", "prefixMaskButCG", "Mask all but cg- probes"), 12 | c("H", "prefixMaskButC", "Mask all but cg- and ch-probes"), 13 | c("C", "inferInfiniumIChannel", "Infer channel for Infinium-I probes"), 14 | c("D", "dyeBiasNL", "Dye bias correction (non-linear)"), 15 | c("E", "dyeBiasL", "Dye bias correction (linear)"), 16 | c("P", "pOOBAH", "Detection p-value masking using oob"), 17 | c("I", "ELBAR", "Mask background-dominated readings"), 18 | c("B", "noob", "Background subtraction using oob"), 19 | c("U", "scrub", "More aggressive background subtraction using scrub"), 20 | c("S", "inferSpecies", "Set species-specific mask"), 21 | c("T", "inferStrain", "Set strain-specific mask (mouse)"), 22 | c("M", "matchDesign", "Match Inf-I/II in beta distribution"))) 23 | colnames(x) <- c("code", "func", "description") 24 | x 25 | } 26 | 27 | #' Apply a chain of sesame preprocessing functions in an arbitrary order 28 | #' 29 | #' Notes on the order of operation: 30 | #' 1. qualityMask and inferSpecies should go before noob and pOOBAH, 31 | #' otherwise the background is too high because of Multi, 32 | #' uk and other probes 33 | #' 2. dyeBias correction needs to happen early 34 | #' 3. channel inference before dyebias 35 | #' 4. noob should happen last, pOOBAH before noob because noob modifies oob 36 | #' 37 | #' @param sdf SigDF 38 | #' @param prep code that indicates preprocessing functions and their 39 | #' execution order (functions on the left is executed first). 40 | #' @param prep_args optional argument list to individual functions, e.g., 41 | #' prepSesame(sdf, prep_args=list(Q=list(mask_names = "design_issue"))) 42 | #' sets qualityMask(sdf, mask_names = "design_issue") 43 | #' @return SigDF 44 | #' @examples 45 | #' sdf <- sesameDataGet("MM285.1.SigDF") 46 | #' sdf1 <- prepSesame(sdf, "QCDPB") 47 | #' @export 48 | prepSesame <- function(sdf, prep = "QCDPB", prep_args = NULL) { 49 | cfuns <- prepSesameList() 50 | 51 | codes <- str_split(prep,"")[[1]] 52 | stopifnot(all(codes %in% cfuns$code)) 53 | x <- sdf 54 | for(c1 in codes) { 55 | x <- do.call(get(cfuns[cfuns$code == c1, "func"]), 56 | c(list(x), prep_args[[c1]])) 57 | } 58 | x 59 | } 60 | 61 | wrap_openSesame1 <- function(func, ret, ...) { 62 | if (is.null(func)) { 63 | ret 64 | } else { 65 | func(ret, ...) 66 | } 67 | } 68 | 69 | wrap_openSesame <- function(x, ret) { 70 | if (all(vapply(ret, is.numeric, logical(1))) && 71 | length(unique(vapply(ret, length, integer(1)))) == 1) { 72 | ## getBetas, getAFs, ... 73 | ret <- do.call(cbind, ret) 74 | if (is.null(colnames(ret)) && 75 | is.character(x) && length(x) == ncol(ret)) { 76 | colnames(ret) <- basename(x) 77 | } 78 | ret 79 | } else { # others 80 | if (is.null(names(ret)) && 81 | is.character(x) && length(x) == length(ret)) { 82 | names(ret) <- basename(x) 83 | } 84 | ret 85 | 86 | } 87 | } 88 | 89 | #' The openSesame pipeline 90 | #' 91 | #' This function is a simple wrapper of noob + nonlinear dye bias 92 | #' correction + pOOBAH masking. 93 | #' 94 | #' Please use mask=FALSE to turn off masking. 95 | #' 96 | #' If the input is an IDAT prefix or a \code{SigDF}, the output is 97 | #' the beta value numerics. 98 | #' 99 | #' @param x SigDF(s), IDAT prefix(es) 100 | #' @param prep preprocessing code, see ?prepSesame 101 | #' @param prep_args optional preprocessing argument list, see ?prepSesame 102 | #' @param manifest optional dynamic manifest 103 | #' @param func either getBetas or getAFs, if NULL, then return SigDF list 104 | #' @param platform optional platform string 105 | #' @param BPPARAM get parallel with MulticoreParam(n) 106 | #' @param min_beads minimum bead number, probes with R or G smaller than 107 | #' this threshold will be masked. If NULL, no filtering based on bead 108 | #' count will be applied. Default to 1. 109 | #' @param ... parameters to getBetas 110 | #' @return a numeric vector for processed beta values 111 | #' @import BiocParallel 112 | #' @examples 113 | #' 114 | #' in_dir <- system.file("extdata", "", package = "sesameData") 115 | #' betas <- openSesame(in_dir) 116 | #' ## or 117 | #' IDATprefixes <- searchIDATprefixes(in_dir) 118 | #' betas <- openSesame(IDATprefixes) 119 | #' 120 | #' @export 121 | openSesame <- function( 122 | x, prep = "QCDPB", prep_args = NULL, manifest = NULL, 123 | func = getBetas, BPPARAM=SerialParam(), platform = "", 124 | min_beads = 1, ...) { 125 | 126 | ## expand if a directory 127 | if (length(x) == 1 && is(x, 'character') && dir.exists(x)) { 128 | x <- searchIDATprefixes(x) 129 | } 130 | 131 | if (is(x, "SigDF")) { 132 | wrap_openSesame1(func, prepSesame(x, prep, prep_args), ...) 133 | } else if (is(x, 'character')) { 134 | if (length(x) == 1) { 135 | wrap_openSesame1(func, prepSesame(readIDATpair( 136 | x, platform = platform, manifest = manifest, 137 | min_beads = min_beads), prep, prep_args), ...) 138 | } else { # multiple IDAT prefixes / SigDFs 139 | wrap_openSesame(x, bplapply(x, openSesame, 140 | platform = platform, prep = prep, prep_args = prep_args, 141 | func = func, manifest = manifest, BPPARAM=BPPARAM, ...)) 142 | } 143 | } else if (is(x, "list") && is(x[[1]], "SigDF")) { 144 | wrap_openSesame(x, bplapply(x, openSesame, 145 | platform = platform, prep = prep, prep_args = prep_args, 146 | fun = func, manifest = manifest, BPPARAM=BPPARAM, ...)) 147 | } else { 148 | stop("Unsupported input") 149 | } 150 | } 151 | 152 | -------------------------------------------------------------------------------- /R/species.R: -------------------------------------------------------------------------------- 1 | speciesInfo <- function(addr, species) { 2 | res <- addr$species[[species]] 3 | res[c("scientificName", "taxonID", "commonName", "assembly")] 4 | } 5 | 6 | #' Set color and mask using strain/species-specific manifest 7 | #' 8 | #' also sets attr(,"species") 9 | #' 10 | #' @param sdf a \code{SigDF} 11 | #' @param species the species the sample is considered to be 12 | #' @param strain the strain the sample is considered to be 13 | #' @param addr species-specific address species, optional 14 | #' @param verbose print more messages 15 | #' @return a \code{SigDF} with updated color channel and mask 16 | #' @examples 17 | #' sdf <- sesameDataGet('Mammal40.1.SigDF') 18 | #' sdf_mouse <- updateSigDF(sdf, species="mus_musculus") 19 | #' 20 | #' @export 21 | updateSigDF <- function( 22 | sdf, species = NULL, strain = NULL, addr = NULL, verbose = FALSE) { 23 | 24 | if (!is.null(species)) { 25 | if (is.null(addr)) { 26 | addr <- sesameDataGet(sprintf( 27 | "%s.addressSpecies", sdfPlatform(sdf, verbose = verbose))) 28 | } 29 | stopifnot(species %in% names(addr$species)) 30 | addrS <- addr$species[[species]] 31 | sdf <- sdfMsg(sdf, verbose, "Update using species: %s", species) 32 | } else if (!is.null(strain)) { 33 | if (is.null(addr)) { 34 | addr <- sesameDataGet(sprintf( 35 | "%s.addressStrain", sdfPlatform(sdf, verbose = verbose))) } 36 | stopifnot(strain %in% names(addr$strain)) 37 | addrS <- addr$strain[[strain]] 38 | sdf <- sdfMsg(sdf, verbose, "Update using strain: %s", strain) 39 | } else { 40 | stop("Please specify a species or strain.") 41 | } 42 | 43 | ## set color 44 | m <- match(sdf$Probe_ID, addr$ordering$Probe_ID) 45 | ## matched Inf-I probes with non-NA value 46 | ## (NA can be mapping issues) 47 | m_idx <- (!is.na(m)) & !is.na(addrS$col[m]) & (sdf$col != "2") 48 | nc <- as.character(addrS$col[m[m_idx]]) 49 | nc[is.na(nc)] <- '2' 50 | sdf$col[m_idx] <- factor(nc, levels=c("G","R","2")) 51 | 52 | ## add mask 53 | sdf$mask <- sdf$mask | (!is.na(m) & addrS$mask[m]) 54 | sdf 55 | } 56 | 57 | species_ret <- function( 58 | return.auc, return.species, species, auc, sdf, addr, verbose) { 59 | if (return.auc){ 60 | auc 61 | } else if (return.species) { 62 | speciesInfo(addr, species) 63 | } else { 64 | updateSigDF(sdf, species=species, addr=addr, verbose=verbose) 65 | } 66 | } 67 | 68 | #' Infer Species 69 | #' 70 | #' We infer species based on probes pvalues and alignment score. 71 | #' AUC was calculated for each specie, y_true is 1 or 0 72 | #' for pval < threshold.pos or pval > threshold.neg, respeceively, 73 | #' 74 | #' @param sdf a \code{SigDF} 75 | #' @param topN Top n positive and negative probes used to infer species. 76 | #' increase this number can sometimes improve accuracy (DEFAULT: 1000) 77 | #' @param threshold.pos pvalue < threshold.pos are considered positive 78 | #' (default: 0.01). 79 | #' @param threshold.neg pvalue > threshold.neg are considered negative 80 | #' (default: 0.2). 81 | #' @param return.auc return AUC calculated, override return.species 82 | #' @param return.species return a string to represent species 83 | #' @param verbose print more messaeges 84 | #' @return a SigDF 85 | #' @examples 86 | #' sdf <- sesameDataGet("MM285.1.SigDF") 87 | #' sdf <- inferSpecies(sdf) 88 | #' 89 | #' ## all available species 90 | #' all_species <- names(sesameDataGet(sprintf( 91 | #' "%s.addressSpecies", sdfPlatform(sdf)))$species) 92 | #' 93 | #' @export 94 | inferSpecies <- function(sdf, topN = 1000, threshold.pos = 0.01, 95 | threshold.neg = 0.1, return.auc = FALSE, return.species = FALSE, 96 | verbose = FALSE) { 97 | 98 | addr <- sesameDataGet(sprintf( 99 | "%s.addressSpecies", sdfPlatform(sdf, verbose = verbose))) 100 | df_as <- do.call(cbind, lapply(addr$species, function(x) x$AS)) 101 | rownames(df_as) <- addr$ordering$Probe_ID 102 | pvalue <- pOOBAH(sdf, return.pval=TRUE) 103 | pvalue <- pvalue[intersect(names(pvalue),rownames(df_as))] # shared probes 104 | pos_probes <- sort(pvalue[pvalue <= threshold.pos],decreasing = FALSE) 105 | neg_probes <- sort(pvalue[pvalue >= threshold.neg],decreasing = TRUE) 106 | success.rate <- length(pvalue[pvalue<=0.05]) / length(pvalue) 107 | 108 | ## keep the same number of positive and negative probes. 109 | topN1 <- min(length(neg_probes),length(pos_probes), topN) 110 | pos <- pos_probes[seq_len(topN1)] 111 | neg <- neg_probes[seq_len(topN1)] 112 | 113 | y_true <- structure(c( # y_true = 1 for pos and y_true = 0 for neg 114 | rep(TRUE,length(pos)),rep(FALSE,length(neg))), 115 | names = c(names(pos), names(neg))) 116 | 117 | if (length(y_true) == 0){ 118 | warning("Lack of useful signal. Use reference.") 119 | return(species_ret(return.auc, return.species, 120 | addr$reference, NULL, sdf, addr, verbose)) } 121 | 122 | n1 <- as.numeric(sum(y_true)) 123 | n2 <- as.numeric(sum(!y_true)) 124 | df_as <- df_as[names(y_true),,drop = FALSE] 125 | ## df_as[df_as < 35] <- 35 # all under 35 is qualitatively the same 126 | auc <- vapply(colnames(df_as),function(s) { 127 | R1 <- sum(rank(df_as[,s])[seq_along(pos)]) 128 | U1 <- R1 - n1 * (n1 + 1)/2 129 | U1/(n1 * n2)}, numeric(1)) 130 | 131 | ## the following is a empirical ladder where one is going to call 132 | ## reference for lack of negative probes 133 | if (success.rate >= 0.95 || (success.rate >= 0.80 && max(auc) < 0.50)) { 134 | sdf <- sdfMsg(sdf, verbose, "Lack of negative probes. Use reference.") 135 | species <- addr$reference 136 | } else { species <- names(which.max(auc)) } 137 | 138 | species_ret(return.auc, return.species, species, auc, sdf, addr, verbose) 139 | } 140 | 141 | #' Map the SDF (from overlap array platforms) 142 | #' Replicates are merged by picking the best detection 143 | #' 144 | #' @param sdf a \code{SigDF} object 145 | #' @return a named numeric vector for beta values 146 | #' @examples 147 | #' sdf <- sesameDataGet("Mammal40.1.SigDF") 148 | #' betas <- mapToMammal40(sdf[1:10,]) 149 | #' @export 150 | mapToMammal40 <- function(sdf) { 151 | addr <- sesameDataGet("Mammal40.address") 152 | betas <- getBetas(sdf, collapseToPfx = TRUE)[addr$ordering$Probe_ID] 153 | names(betas) <- addr$ordering$Probe_ID 154 | betas 155 | } 156 | -------------------------------------------------------------------------------- /R/tissue.R: -------------------------------------------------------------------------------- 1 | #' Compare array data with references (e.g., tissue, cell types) 2 | #' 3 | #' @param ref the reference beta values in SummarizedExperiment. 4 | #' One can download them from the sesameData package. See examples. 5 | #' @param betas matrix of betas for the target sample 6 | #' This argument is optional. If not given, only the reference will be shown. 7 | #' @param stop.points stop points for the color palette. 8 | #' Default to blue, yellow. 9 | #' @param query_width the width of the query beta value matrix 10 | #' @param show_sample_names whether to show sample names (default: FALSE) 11 | #' @return grid object that contrast the target sample with 12 | #' references. 13 | #' @export 14 | #' @examples 15 | #' 16 | #' sesameDataCache() # if not done yet 17 | #' compareReference(sesameDataGet("MM285.tissueSignature")) 18 | #' sesameDataGet_resetEnv() 19 | #' 20 | #' @importFrom SummarizedExperiment assay 21 | #' @importFrom SummarizedExperiment colData 22 | #' @importFrom SummarizedExperiment rowData 23 | compareReference <- function( 24 | ref, betas = NULL, stop.points = NULL, query_width=0.3, 25 | show_sample_names = FALSE) { 26 | 27 | if (is.null(stop.points)) { stop.points <- c("blue","yellow") } 28 | 29 | cd <- as_tibble(colData(ref)) 30 | rd <- as_tibble(rowData(ref)) 31 | md <- metadata(ref) 32 | if (!is.null(betas) && is.null(dim(betas))) { # in case a vector 33 | betas <- cbind(betas) 34 | } 35 | 36 | ## reference 37 | g <- WHeatmap(assay(ref), cmp=CMPar(stop.points=stop.points, 38 | dmin=0, dmax=1), xticklabels = show_sample_names, name="b1") 39 | ## query samples 40 | if (!is.null(betas)) { 41 | g <- g + WHeatmap(betas[rd$Probe_ID,], RightOf("b1", width=query_width), 42 | cmp=CMPar(stop.points=stop.points, dmin=0, dmax=1), 43 | name="b2", xticklabels = show_sample_names, 44 | xticklabels.n=ncol(betas)) 45 | right <- "b2" 46 | } else { # in case target is not given, plot just the reference 47 | right <- "b1" 48 | } 49 | ## branch color bar (vertical) 50 | g <- g + WColorBarV(rd$branch, RightOf(right, width=0.03), 51 | cmp=CMPar(label2color=md$branch_color), name="bh") 52 | ## tissue color bar (horizontal), branch should be replaced by CellType 53 | g <- g + WColorBarH(cd$branch, TopOf("b1",height=0.03), 54 | cmp=CMPar(label2color=md$branch_color), name="ti") 55 | ## legends 56 | g <- g + WLegendV("ti", TopRightOf("bh", just=c('left','top'), h.pad=0.02), 57 | height=0.02) 58 | g + WCustomize(mar.bottom=0.15, mar.right=0.06) 59 | } 60 | 61 | #' Compare mouse array data with mouse tissue references 62 | #' 63 | #' @param betas matrix of betas for the target sample 64 | #' This argument is optional. If not given, only the reference will be shown. 65 | #' @param ref the reference beta values in SummarizedExperiment. 66 | #' This argument is optional. If not given, the reference will be downloaded 67 | #' from the sesameData package. 68 | #' @param color either blueYellow or fullJet 69 | #' @param query_width the width of the query beta value matrix 70 | #' @return grid object that contrast the target sample with 71 | #' pre-built mouse tissue reference 72 | #' @export 73 | #' @examples 74 | #' cat("Deprecated, see compareReference") 75 | #' @importFrom SummarizedExperiment assay 76 | #' @importFrom SummarizedExperiment colData 77 | #' @importFrom SummarizedExperiment rowData 78 | compareMouseTissueReference <- function( 79 | betas=NULL, ref=NULL, color="blueYellow", query_width=0.3) { 80 | .Deprecated("compareReference") 81 | } 82 | 83 | #' inferTissue infers the tissue of a single sample (as identified through 84 | #' the branchIDs in the row data of the reference) by reporting independent 85 | #' composition through cell type deconvolution. 86 | #' 87 | #' @param betas Named vector with probes and their corresponding beta value 88 | #' measurement 89 | #' @param reference Summarized Experiment with either hypomethylated or 90 | #' hypermethylated probe selection (row data), sample selection (column data), 91 | #' meta data, and the betas (assay) 92 | #' @param platform String representing the array type of the betas and 93 | #' reference 94 | #' @param abs_delta_beta_min Numerical value indicating the absolute minimum 95 | #' required delta beta for the probe selection criteria 96 | #' @param auc_min Numeric value corresponding to the minimum AUC value 97 | #' required for a probe to be considered 98 | #' @param coverage_min Numeric value corresponding to the minimum coverage 99 | #' requirement for a probe to be considered. Coverage is defined here as the 100 | #' proportion of samples without an NA value at a given probe. 101 | #' @param topN number of probes to at most use for each branch 102 | #' 103 | #' @return inferred tissue as a string 104 | #' @examples 105 | #' sesameDataCache() # if not done yet 106 | #' sdf <- sesameDataGet("MM285.1.SigDF") 107 | #' inferTissue(getBetas(dyeBiasNL(noob(sdf)))) 108 | #' 109 | #' sesameDataGet_resetEnv() 110 | #' 111 | #' @export 112 | inferTissue <- function(betas, reference = NULL, platform = NULL, 113 | abs_delta_beta_min = 0.3, auc_min = 0.99, coverage_min = 0.80, topN = 15) { 114 | 115 | stopifnot(is.numeric(betas)) 116 | 117 | if (is.null(reference)) { 118 | if (is.null(platform)) { 119 | platform <- inferPlatformFromProbeIDs(names(betas)) 120 | } 121 | stopifnot(platform %in% c("MM285")) # TODO: add human 122 | reference <- sesameDataGet(sprintf("%s.tissueSignature", platform)) 123 | } 124 | 125 | rd <- rowData(reference) 126 | fracs <- sort(vapply(unique(rd$branch), function(branch) { 127 | rd1 <- rd[ 128 | rd$branch == branch & abs(rd$delta_beta) >= abs_delta_beta_min, ] 129 | 130 | rd1 <- head(rd1[order(-abs(rd1$delta_beta)), ], n = topN) 131 | 132 | fracs1 <- c(1 - betas[rd1[rd1$delta_beta < 0, "Probe_ID"]], 133 | betas[rd1[rd1$delta_beta > 0, "Probe_ID"]]) 134 | 135 | mean(fracs1, na.rm = TRUE) 136 | }, numeric(1)), decreasing = TRUE) 137 | sprintf("[%s](%1.1f) [%s](%1.1f)", 138 | names(fracs)[1], fracs[1], names(fracs)[2], fracs[2]) 139 | 140 | ## results <- results[!(names(results) %in% ignore_branches)] 141 | ## cd <- meta[match(colnames(results), meta$betas),] 142 | ## se <- SummarizedExperiment(assays=list(results=results), colData=cd) 143 | ## metadata(se)$tissue_color <- metadata(reference)$tissue_color 144 | ## metadata(se)$branchID_color <- metadata(reference)$branchID_color 145 | ## se 146 | } 147 | 148 | 149 | --------------------------------------------------------------------------------